Module ccsfp.informatics.molecules_and_images
Expand source code
#!/usr/bin/env python
# Copyright IBM Corporation 2022.
# SPDX-License-Identifier: MIT
# https://www.rdkit.org/docs/GettingStartedInPython.html
# creative commons sa 4.0 tutorial used to learn rdkit methods
# https://creativecommons.org/licenses/by-sa/4.0/
# (C) 2007-2021 by Greg Landrum
# RDKit
from __future__ import annotations
import logging
import rdkit
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import rdCIPLabeler
# Logging
def smiles_to_molecule(
s: str,
addH: bool = True,
canonicalize: bool = True,
threed: bool = True,
add_stereo: bool = False,
remove_stereo: bool = False,
random_seed: int = 10459,
verbose: bool = False,
test: bool = False,
) -> rdkit.Chem.rdchem.Mol:
"""
A function to build a RDKit molecule from a smiles string
:param s: str - smiles string
:param addH: bool - Add Hydrogens or not
:param canonicalize: bool - canonicalize molecule rep
:param threed: bool - get 3D coordinates of the molecule from smiles
:param add_stereo: bool - set stereo chemistry for the molecule
:param remove_stereo: bool - remove stereo chemistry for the molecule
:param random_seed: int - make the structure generation deterministic
:param verbose: bool - provide verbose logging out
:param test: True/False - for unit testing
>>> smiles_to_molecule("O", test=True)
3
"""
log = logging.getLogger(__name__)
mol = get_mol_from_smiles(s, canonicalize=canonicalize)
Chem.rdmolops.Cleanup(mol)
Chem.rdmolops.SanitizeMol(mol)
if remove_stereo is True:
non_isosmiles = Chem.rdmolfiles.MolToSmiles(
mol, isomericSmiles=False, allHsExplicit=False,
)
mol = get_mol_from_smiles(non_isosmiles, canonicalize=canonicalize)
Chem.rdmolops.Cleanup(mol)
Chem.rdmolops.SanitizeMol(mol)
if verbose is True:
for atom in mol.GetAtoms():
log.info(
"Atom {} {} in molecule from smiles {} tag will be cleared. "
"Set properties {}.".format(
atom.GetIdx(),
atom.GetSymbol(),
s,
atom.GetPropsAsDict(
includePrivate=True, includeComputed=True),
),
)
if addH is True:
mol = Chem.rdmolops.AddHs(mol)
if add_stereo is True:
rdCIPLabeler.AssignCIPLabels(mol)
if threed:
AllChem.EmbedMolecule(mol, randomSeed=random_seed)
if test is True:
return mol.GetNumAtoms()
return mol
def get_mol_from_smiles(
smiles: str, canonicalize: bool = True, test: bool = False,
) -> rdkit.Chem.rdchem.Mol:
"""
Function to make a mol object based on smiles
:param smiles: str - SMILES string
:param canonicalize: True/False - use RDKit canonicalized smile or the input resprectively
:param test: True/False - for unit testing
>>> get_mol_from_smiles("O", test=True)
1
"""
log = logging.getLogger(__name__)
if canonicalize is True:
s = Chem.CanonSmiles(smiles, useChiral=1)
else:
s = smiles
mol = Chem.MolFromSmiles(s)
log.debug(
"Input smiles: {} RDKit Canonicalized smiles {} (Note RDKit does not use "
"general canon smiles rules https://github.com/rdkit/rdkit/issues/2747)".format(
smiles, s,
),
)
Chem.rdmolops.SanitizeMol(mol)
Chem.rdmolops.Cleanup(mol)
if test is True:
return mol.GetNumHeavyAtoms()
return mol
def inchi_to_molecule(
inchi: str,
addH: bool = True,
canonicalize: bool = True,
threed: bool = True,
add_stereo: bool = False,
remove_stereo: bool = False,
random_seed: int = 10459,
verbose: bool = False,
test: bool = False,
) -> rdkit.Chem.rdchem.Mol:
"""
A function to build a RDKit molecule from an InChI string
:param inchi: str - InChI string
:param addH: bool - Add Hydrogens or not
:param canonicalize: bool - canonicalize molecule rep
:param threed: bool - get 3D coordinates of the molecule from smiles
:param add_stereo: bool - set stereo chemistry for the molecule
:param remove_stereo: bool - remove stereo chemistry for the molecule
:param random_seed: int - make the structure generation deterministic
:param verbose: bool - provide verbose logging out
:param test: bool - for unit test
>>> inchi_to_molecule("InChI=1S/H2O/h1H2", test=True)
3
"""
log = logging.getLogger(__name__)
mol = get_mol_from_inchi(inchi)
Chem.rdmolops.Cleanup(mol)
Chem.rdmolops.SanitizeMol(mol)
if remove_stereo is True:
non_isosmiles = Chem.rdmolfiles.MolToSmiles(
mol, isomericSmiles=False, allHsExplicit=False,
)
mol = get_mol_from_smiles(non_isosmiles, canonicalize=canonicalize)
Chem.rdmolops.Cleanup(mol)
Chem.rdmolops.SanitizeMol(mol)
if verbose is True:
for atom in mol.GetAtoms():
log.info(
"Atom {} {} in molecule from InChI {} tag will be cleared. "
"Set properties {}.".format(
atom.GetIdx(),
atom.GetSymbol(),
inchi,
atom.GetPropsAsDict(
includePrivate=True, includeComputed=True),
),
)
if addH is True:
mol = Chem.rdmolops.AddHs(mol)
if add_stereo is True:
rdCIPLabeler.AssignCIPLabels(mol)
if threed:
AllChem.EmbedMolecule(mol, randomSeed=random_seed)
if test is True:
return mol.GetNumAtoms()
return mol
def get_mol_from_inchi(inchi: str, test: bool = False) -> rdkit.Chem.rdchem.Mol:
"""
Function to make a mol object based on smiles
:param inchi: str - SMILES string
:param test: True/False - for unit testing
>>> get_mol_from_inchi("InChI=1S/H2O/h1H2", test=True)
1
"""
log = logging.getLogger(__name__)
mol = Chem.MolFromInchi(inchi)
log.debug("Input inchi: {})".format(inchi))
Chem.rdmolops.SanitizeMol(mol)
Chem.rdmolops.Cleanup(mol)
if test is True:
return mol.GetNumHeavyAtoms()
return mol
if __name__ == "__main__":
import doctest
doctest.testmod()
Functions
def get_mol_from_inchi(inchi: str, test: bool = False) ‑> rdkit.Chem.rdchem.Mol
-
Function to make a mol object based on smiles :param inchi: str - SMILES string :param test: True/False - for unit testing
>>> get_mol_from_inchi("InChI=1S/H2O/h1H2", test=True) 1
Expand source code
def get_mol_from_inchi(inchi: str, test: bool = False) -> rdkit.Chem.rdchem.Mol: """ Function to make a mol object based on smiles :param inchi: str - SMILES string :param test: True/False - for unit testing >>> get_mol_from_inchi("InChI=1S/H2O/h1H2", test=True) 1 """ log = logging.getLogger(__name__) mol = Chem.MolFromInchi(inchi) log.debug("Input inchi: {})".format(inchi)) Chem.rdmolops.SanitizeMol(mol) Chem.rdmolops.Cleanup(mol) if test is True: return mol.GetNumHeavyAtoms() return mol
def get_mol_from_smiles(smiles: str, canonicalize: bool = True, test: bool = False) ‑> rdkit.Chem.rdchem.Mol
-
Function to make a mol object based on smiles :param smiles: str - SMILES string :param canonicalize: True/False - use RDKit canonicalized smile or the input resprectively :param test: True/False - for unit testing
>>> get_mol_from_smiles("O", test=True) 1
Expand source code
def get_mol_from_smiles( smiles: str, canonicalize: bool = True, test: bool = False, ) -> rdkit.Chem.rdchem.Mol: """ Function to make a mol object based on smiles :param smiles: str - SMILES string :param canonicalize: True/False - use RDKit canonicalized smile or the input resprectively :param test: True/False - for unit testing >>> get_mol_from_smiles("O", test=True) 1 """ log = logging.getLogger(__name__) if canonicalize is True: s = Chem.CanonSmiles(smiles, useChiral=1) else: s = smiles mol = Chem.MolFromSmiles(s) log.debug( "Input smiles: {} RDKit Canonicalized smiles {} (Note RDKit does not use " "general canon smiles rules https://github.com/rdkit/rdkit/issues/2747)".format( smiles, s, ), ) Chem.rdmolops.SanitizeMol(mol) Chem.rdmolops.Cleanup(mol) if test is True: return mol.GetNumHeavyAtoms() return mol
def inchi_to_molecule(inchi: str, addH: bool = True, canonicalize: bool = True, threed: bool = True, add_stereo: bool = False, remove_stereo: bool = False, random_seed: int = 10459, verbose: bool = False, test: bool = False) ‑> rdkit.Chem.rdchem.Mol
-
A function to build a RDKit molecule from an InChI string :param inchi: str - InChI string :param addH: bool - Add Hydrogens or not :param canonicalize: bool - canonicalize molecule rep :param threed: bool - get 3D coordinates of the molecule from smiles :param add_stereo: bool - set stereo chemistry for the molecule :param remove_stereo: bool - remove stereo chemistry for the molecule :param random_seed: int - make the structure generation deterministic :param verbose: bool - provide verbose logging out :param test: bool - for unit test
>>> inchi_to_molecule("InChI=1S/H2O/h1H2", test=True) 3
Expand source code
def inchi_to_molecule( inchi: str, addH: bool = True, canonicalize: bool = True, threed: bool = True, add_stereo: bool = False, remove_stereo: bool = False, random_seed: int = 10459, verbose: bool = False, test: bool = False, ) -> rdkit.Chem.rdchem.Mol: """ A function to build a RDKit molecule from an InChI string :param inchi: str - InChI string :param addH: bool - Add Hydrogens or not :param canonicalize: bool - canonicalize molecule rep :param threed: bool - get 3D coordinates of the molecule from smiles :param add_stereo: bool - set stereo chemistry for the molecule :param remove_stereo: bool - remove stereo chemistry for the molecule :param random_seed: int - make the structure generation deterministic :param verbose: bool - provide verbose logging out :param test: bool - for unit test >>> inchi_to_molecule("InChI=1S/H2O/h1H2", test=True) 3 """ log = logging.getLogger(__name__) mol = get_mol_from_inchi(inchi) Chem.rdmolops.Cleanup(mol) Chem.rdmolops.SanitizeMol(mol) if remove_stereo is True: non_isosmiles = Chem.rdmolfiles.MolToSmiles( mol, isomericSmiles=False, allHsExplicit=False, ) mol = get_mol_from_smiles(non_isosmiles, canonicalize=canonicalize) Chem.rdmolops.Cleanup(mol) Chem.rdmolops.SanitizeMol(mol) if verbose is True: for atom in mol.GetAtoms(): log.info( "Atom {} {} in molecule from InChI {} tag will be cleared. " "Set properties {}.".format( atom.GetIdx(), atom.GetSymbol(), inchi, atom.GetPropsAsDict( includePrivate=True, includeComputed=True), ), ) if addH is True: mol = Chem.rdmolops.AddHs(mol) if add_stereo is True: rdCIPLabeler.AssignCIPLabels(mol) if threed: AllChem.EmbedMolecule(mol, randomSeed=random_seed) if test is True: return mol.GetNumAtoms() return mol
def smiles_to_molecule(s: str, addH: bool = True, canonicalize: bool = True, threed: bool = True, add_stereo: bool = False, remove_stereo: bool = False, random_seed: int = 10459, verbose: bool = False, test: bool = False) ‑> rdkit.Chem.rdchem.Mol
-
A function to build a RDKit molecule from a smiles string :param s: str - smiles string :param addH: bool - Add Hydrogens or not :param canonicalize: bool - canonicalize molecule rep :param threed: bool - get 3D coordinates of the molecule from smiles :param add_stereo: bool - set stereo chemistry for the molecule :param remove_stereo: bool - remove stereo chemistry for the molecule :param random_seed: int - make the structure generation deterministic :param verbose: bool - provide verbose logging out :param test: True/False - for unit testing
>>> smiles_to_molecule("O", test=True) 3
Expand source code
def smiles_to_molecule( s: str, addH: bool = True, canonicalize: bool = True, threed: bool = True, add_stereo: bool = False, remove_stereo: bool = False, random_seed: int = 10459, verbose: bool = False, test: bool = False, ) -> rdkit.Chem.rdchem.Mol: """ A function to build a RDKit molecule from a smiles string :param s: str - smiles string :param addH: bool - Add Hydrogens or not :param canonicalize: bool - canonicalize molecule rep :param threed: bool - get 3D coordinates of the molecule from smiles :param add_stereo: bool - set stereo chemistry for the molecule :param remove_stereo: bool - remove stereo chemistry for the molecule :param random_seed: int - make the structure generation deterministic :param verbose: bool - provide verbose logging out :param test: True/False - for unit testing >>> smiles_to_molecule("O", test=True) 3 """ log = logging.getLogger(__name__) mol = get_mol_from_smiles(s, canonicalize=canonicalize) Chem.rdmolops.Cleanup(mol) Chem.rdmolops.SanitizeMol(mol) if remove_stereo is True: non_isosmiles = Chem.rdmolfiles.MolToSmiles( mol, isomericSmiles=False, allHsExplicit=False, ) mol = get_mol_from_smiles(non_isosmiles, canonicalize=canonicalize) Chem.rdmolops.Cleanup(mol) Chem.rdmolops.SanitizeMol(mol) if verbose is True: for atom in mol.GetAtoms(): log.info( "Atom {} {} in molecule from smiles {} tag will be cleared. " "Set properties {}.".format( atom.GetIdx(), atom.GetSymbol(), s, atom.GetPropsAsDict( includePrivate=True, includeComputed=True), ), ) if addH is True: mol = Chem.rdmolops.AddHs(mol) if add_stereo is True: rdCIPLabeler.AssignCIPLabels(mol) if threed: AllChem.EmbedMolecule(mol, randomSeed=random_seed) if test is True: return mol.GetNumAtoms() return mol