Source code for seqme.models.molecule_descriptors
import os
import numpy as np
from .exceptions import OptionalDependencyError
[docs]
class SAScore:
"""Synthetic Accessibility Score (SA Score) for SMILES sequences.
Installation: ``pip install "seqme[molecule_descriptors]"``
Reference:
P. Ertl and A. Schuffenhauer, "Estimation of synthetic accessibility score of drug-like molecules based on molecular complexity and fragment contributions"
(http://www.jcheminf.com/content/1/1/8)
"""
[docs]
def __init__(self):
try:
import importlib.util
from rdkit.Chem import RDConfig
except ModuleNotFoundError:
raise OptionalDependencyError("molecule_descriptors") from None
path = os.path.join(RDConfig.RDContribDir, "SA_Score", "sascorer.py")
assert os.path.isfile(path), "sascorer.py does not exist."
spec = importlib.util.spec_from_file_location("sascorer", path)
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
self._sascorer = module
assert hasattr(self._sascorer, "calculateScore"), "Missing 'calculateScore' function in sascorer module."
[docs]
def __call__(self, sequences: list[str]) -> np.ndarray:
"""Compute SA score for a list of SMILES sequences.
Args:
sequences: List of SMILES strings.
Returns:
SA-score for each sequence.
"""
try:
from rdkit import Chem
except ModuleNotFoundError:
raise OptionalDependencyError("molecule_descriptors") from None
return np.array([self._sascorer.calculateScore(Chem.MolFromSmiles(sequence)) for sequence in sequences])
[docs]
class QED:
"""Quantitative Estimate of Drug-likeness for SMILES sequences.
Installation: ``pip install "seqme[molecule_descriptors]"``
Reference:
G. Richard Bickerton et al., "Quantifying the chemical beauty of drugs"
(https://www.nature.com/articles/nchem.1243)
"""
[docs]
def __call__(self, sequences: list[str]) -> np.ndarray:
"""Compute QED of SMILES sequence.
Args:
sequences: SMILES sequences.
Returns:
QED for each sequence.
"""
try:
from rdkit import Chem
from rdkit.Chem.QED import qed
except ModuleNotFoundError:
raise OptionalDependencyError("molecule_descriptors") from None
return np.array([qed(Chem.MolFromSmiles(sequence)) for sequence in sequences])
[docs]
class LogP:
"""Lipophilicity for SMILES sequences.
Installation: ``pip install "seqme[molecule_descriptors]"``
Reference:
S. A. Wildman and Gordon M. Crippen, "Prediction of Physicochemical Parameters by Atomic Contributions"
(https://pubs.acs.org/doi/10.1021/ci990307l)
"""
[docs]
def __call__(self, sequences: list[str]) -> np.ndarray:
"""Compute lipophilicity of SMILES sequence.
Args:
sequences: SMILES sequences.
Returns:
Log-P for each sequence.
"""
try:
from rdkit import Chem
from rdkit.Chem.Descriptors import MolLogP # type: ignore
except ModuleNotFoundError:
raise OptionalDependencyError("molecule_descriptors") from None
return np.array([MolLogP(Chem.MolFromSmiles(sequence)) for sequence in sequences])