- skip fused/shared/multi-anchor side systems during extraction - add fragment library schema and fragment_library.csv export - make scaffold prep strict for non-spliceable positions
304 lines
9.0 KiB
Python
304 lines
9.0 KiB
Python
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
from typing import Mapping
|
|
|
|
from rdkit import Chem
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class BuiltMacrolactone:
|
|
mol: Chem.Mol
|
|
smiles: str
|
|
position_to_atom: dict[int, int]
|
|
|
|
|
|
def build_macrolactone(
|
|
ring_size: int,
|
|
side_chains: Mapping[int, str] | None = None,
|
|
ring_atom_symbols: Mapping[int, str] | None = None,
|
|
) -> BuiltMacrolactone:
|
|
if not 12 <= ring_size <= 20:
|
|
raise ValueError("ring_size must be between 12 and 20")
|
|
|
|
side_chains = dict(side_chains or {})
|
|
ring_atom_symbols = dict(ring_atom_symbols or {})
|
|
rwmol = Chem.RWMol()
|
|
|
|
position_to_atom: dict[int, int] = {
|
|
1: rwmol.AddAtom(Chem.Atom("C")),
|
|
2: rwmol.AddAtom(Chem.Atom("O")),
|
|
}
|
|
for position in range(3, ring_size + 1):
|
|
position_to_atom[position] = rwmol.AddAtom(Chem.Atom(ring_atom_symbols.get(position, "C")))
|
|
|
|
carbonyl_oxygen_idx = rwmol.AddAtom(Chem.Atom("O"))
|
|
|
|
rwmol.AddBond(position_to_atom[1], position_to_atom[2], Chem.BondType.SINGLE)
|
|
for position in range(2, ring_size):
|
|
rwmol.AddBond(
|
|
position_to_atom[position],
|
|
position_to_atom[position + 1],
|
|
Chem.BondType.SINGLE,
|
|
)
|
|
rwmol.AddBond(position_to_atom[ring_size], position_to_atom[1], Chem.BondType.SINGLE)
|
|
rwmol.AddBond(position_to_atom[1], carbonyl_oxygen_idx, Chem.BondType.DOUBLE)
|
|
|
|
for position, side_chain in side_chains.items():
|
|
if position not in position_to_atom:
|
|
raise ValueError(f"Invalid ring position: {position}")
|
|
_add_side_chain(rwmol, position_to_atom[position], side_chain)
|
|
|
|
mol = rwmol.GetMol()
|
|
Chem.SanitizeMol(mol)
|
|
return BuiltMacrolactone(
|
|
mol=mol,
|
|
smiles=Chem.MolToSmiles(mol, isomericSmiles=True),
|
|
position_to_atom=position_to_atom,
|
|
)
|
|
|
|
|
|
def build_ambiguous_smiles() -> str:
|
|
mol_12 = build_macrolactone(12).mol
|
|
mol_14 = build_macrolactone(14).mol
|
|
combined = Chem.CombineMols(mol_12, mol_14)
|
|
return Chem.MolToSmiles(combined, isomericSmiles=True)
|
|
|
|
|
|
def build_non_standard_ring_atom_macrolactone(
|
|
ring_size: int = 16,
|
|
hetero_position: int = 5,
|
|
atom_symbol: str = "N",
|
|
) -> BuiltMacrolactone:
|
|
if hetero_position < 3 or hetero_position > ring_size:
|
|
raise ValueError("hetero_position must be between 3 and ring_size")
|
|
return build_macrolactone(
|
|
ring_size=ring_size,
|
|
ring_atom_symbols={hetero_position: atom_symbol},
|
|
)
|
|
|
|
|
|
def build_macrolactone_with_fused_side_ring(
|
|
ring_size: int = 16,
|
|
fused_positions: tuple[int, int] = (5, 6),
|
|
side_chains: Mapping[int, str] | None = None,
|
|
) -> BuiltMacrolactone:
|
|
base = build_macrolactone(ring_size=ring_size, side_chains=side_chains)
|
|
position_a, position_b = fused_positions
|
|
rwmol = Chem.RWMol(Chem.Mol(base.mol))
|
|
|
|
atom_x = rwmol.AddAtom(Chem.Atom("C"))
|
|
atom_y = rwmol.AddAtom(Chem.Atom("C"))
|
|
rwmol.AddBond(base.position_to_atom[position_a], atom_x, Chem.BondType.SINGLE)
|
|
rwmol.AddBond(atom_x, atom_y, Chem.BondType.SINGLE)
|
|
rwmol.AddBond(atom_y, base.position_to_atom[position_b], Chem.BondType.SINGLE)
|
|
|
|
mol = rwmol.GetMol()
|
|
Chem.SanitizeMol(mol)
|
|
return BuiltMacrolactone(
|
|
mol=mol,
|
|
smiles=Chem.MolToSmiles(mol, isomericSmiles=True),
|
|
position_to_atom=base.position_to_atom,
|
|
)
|
|
|
|
|
|
def build_macrolactone_with_bridge_side_chain(
|
|
ring_size: int = 16,
|
|
bridge_positions: tuple[int, int] = (5, 8),
|
|
side_chains: Mapping[int, str] | None = None,
|
|
) -> BuiltMacrolactone:
|
|
base = build_macrolactone(ring_size=ring_size, side_chains=side_chains)
|
|
position_a, position_b = bridge_positions
|
|
rwmol = Chem.RWMol(Chem.Mol(base.mol))
|
|
|
|
atom_x = rwmol.AddAtom(Chem.Atom("C"))
|
|
atom_y = rwmol.AddAtom(Chem.Atom("C"))
|
|
rwmol.AddBond(base.position_to_atom[position_a], atom_x, Chem.BondType.SINGLE)
|
|
rwmol.AddBond(atom_x, atom_y, Chem.BondType.SINGLE)
|
|
rwmol.AddBond(atom_y, base.position_to_atom[position_b], Chem.BondType.SINGLE)
|
|
|
|
mol = rwmol.GetMol()
|
|
Chem.SanitizeMol(mol)
|
|
return BuiltMacrolactone(
|
|
mol=mol,
|
|
smiles=Chem.MolToSmiles(mol, isomericSmiles=True),
|
|
position_to_atom=base.position_to_atom,
|
|
)
|
|
|
|
|
|
def build_macrolactone_with_shared_atom_side_ring(
|
|
ring_size: int = 16,
|
|
position: int = 5,
|
|
side_chains: Mapping[int, str] | None = None,
|
|
) -> BuiltMacrolactone:
|
|
base = build_macrolactone(ring_size=ring_size, side_chains=side_chains)
|
|
rwmol = Chem.RWMol(Chem.Mol(base.mol))
|
|
|
|
atom_x = rwmol.AddAtom(Chem.Atom("C"))
|
|
atom_y = rwmol.AddAtom(Chem.Atom("C"))
|
|
atom_z = rwmol.AddAtom(Chem.Atom("C"))
|
|
ring_atom_idx = base.position_to_atom[position]
|
|
|
|
rwmol.AddBond(ring_atom_idx, atom_x, Chem.BondType.SINGLE)
|
|
rwmol.AddBond(atom_x, atom_y, Chem.BondType.SINGLE)
|
|
rwmol.AddBond(atom_y, atom_z, Chem.BondType.SINGLE)
|
|
rwmol.AddBond(atom_z, ring_atom_idx, Chem.BondType.SINGLE)
|
|
|
|
mol = rwmol.GetMol()
|
|
Chem.SanitizeMol(mol)
|
|
return BuiltMacrolactone(
|
|
mol=mol,
|
|
smiles=Chem.MolToSmiles(mol, isomericSmiles=True),
|
|
position_to_atom=base.position_to_atom,
|
|
)
|
|
|
|
|
|
def build_macrolactone_with_single_anchor_side_ring(
|
|
ring_size: int = 16,
|
|
position: int = 5,
|
|
side_chains: Mapping[int, str] | None = None,
|
|
) -> BuiltMacrolactone:
|
|
base = build_macrolactone(ring_size=ring_size, side_chains=side_chains)
|
|
rwmol = Chem.RWMol(Chem.Mol(base.mol))
|
|
|
|
atom_x = rwmol.AddAtom(Chem.Atom("C"))
|
|
atom_y = rwmol.AddAtom(Chem.Atom("C"))
|
|
atom_z = rwmol.AddAtom(Chem.Atom("C"))
|
|
ring_atom_idx = base.position_to_atom[position]
|
|
|
|
rwmol.AddBond(ring_atom_idx, atom_x, Chem.BondType.SINGLE)
|
|
rwmol.AddBond(atom_x, atom_y, Chem.BondType.SINGLE)
|
|
rwmol.AddBond(atom_y, atom_z, Chem.BondType.SINGLE)
|
|
rwmol.AddBond(atom_z, atom_x, Chem.BondType.SINGLE)
|
|
|
|
mol = rwmol.GetMol()
|
|
Chem.SanitizeMol(mol)
|
|
return BuiltMacrolactone(
|
|
mol=mol,
|
|
smiles=Chem.MolToSmiles(mol, isomericSmiles=True),
|
|
position_to_atom=base.position_to_atom,
|
|
)
|
|
|
|
|
|
def build_overlapping_candidate_macrolactone() -> BuiltMacrolactone:
|
|
rwmol = Chem.RWMol()
|
|
|
|
atom_labels = (
|
|
"A1",
|
|
"A2",
|
|
"S1",
|
|
"S2",
|
|
"S3",
|
|
"S4",
|
|
"A5",
|
|
"A6",
|
|
"A7",
|
|
"A8",
|
|
"A9",
|
|
"A10",
|
|
"B1",
|
|
"B2",
|
|
"B5",
|
|
"B6",
|
|
"B7",
|
|
"B8",
|
|
"B9",
|
|
"B10",
|
|
"AO",
|
|
"BO",
|
|
)
|
|
atom_symbols = {
|
|
"A1": "C",
|
|
"A2": "O",
|
|
"S1": "C",
|
|
"S2": "C",
|
|
"S3": "C",
|
|
"S4": "C",
|
|
"A5": "C",
|
|
"A6": "C",
|
|
"A7": "C",
|
|
"A8": "C",
|
|
"A9": "C",
|
|
"A10": "C",
|
|
"B1": "C",
|
|
"B2": "O",
|
|
"B5": "C",
|
|
"B6": "C",
|
|
"B7": "C",
|
|
"B8": "C",
|
|
"B9": "C",
|
|
"B10": "C",
|
|
"AO": "O",
|
|
"BO": "O",
|
|
}
|
|
atoms = {label: rwmol.AddAtom(Chem.Atom(atom_symbols[label])) for label in atom_labels}
|
|
|
|
for atom_a, atom_b in (
|
|
("A1", "A2"),
|
|
("A2", "S1"),
|
|
("S1", "S2"),
|
|
("S2", "S3"),
|
|
("S3", "S4"),
|
|
("S4", "A5"),
|
|
("A5", "A6"),
|
|
("A6", "A7"),
|
|
("A7", "A8"),
|
|
("A8", "A9"),
|
|
("A9", "A10"),
|
|
("A10", "A1"),
|
|
("B1", "B2"),
|
|
("B2", "S1"),
|
|
("S4", "B5"),
|
|
("B5", "B6"),
|
|
("B6", "B7"),
|
|
("B7", "B8"),
|
|
("B8", "B9"),
|
|
("B9", "B10"),
|
|
("B10", "B1"),
|
|
):
|
|
rwmol.AddBond(atoms[atom_a], atoms[atom_b], Chem.BondType.SINGLE)
|
|
|
|
rwmol.AddBond(atoms["A1"], atoms["AO"], Chem.BondType.DOUBLE)
|
|
rwmol.AddBond(atoms["B1"], atoms["BO"], Chem.BondType.DOUBLE)
|
|
|
|
mol = rwmol.GetMol()
|
|
Chem.SanitizeMol(mol)
|
|
return BuiltMacrolactone(
|
|
mol=mol,
|
|
smiles=Chem.MolToSmiles(mol, isomericSmiles=True),
|
|
position_to_atom={},
|
|
)
|
|
|
|
|
|
def canonicalize(smiles_or_mol: str | Chem.Mol) -> str:
|
|
if isinstance(smiles_or_mol, Chem.Mol):
|
|
mol = smiles_or_mol
|
|
else:
|
|
mol = Chem.MolFromSmiles(smiles_or_mol)
|
|
if mol is None:
|
|
raise ValueError(f"Unable to parse SMILES: {smiles_or_mol}")
|
|
return Chem.MolToSmiles(mol, isomericSmiles=True)
|
|
|
|
|
|
def _add_side_chain(rwmol: Chem.RWMol, ring_atom_idx: int, side_chain: str) -> None:
|
|
if side_chain == "methyl":
|
|
carbon_idx = rwmol.AddAtom(Chem.Atom("C"))
|
|
rwmol.AddBond(ring_atom_idx, carbon_idx, Chem.BondType.SINGLE)
|
|
return
|
|
|
|
if side_chain == "ethyl":
|
|
carbon_1_idx = rwmol.AddAtom(Chem.Atom("C"))
|
|
carbon_2_idx = rwmol.AddAtom(Chem.Atom("C"))
|
|
rwmol.AddBond(ring_atom_idx, carbon_1_idx, Chem.BondType.SINGLE)
|
|
rwmol.AddBond(carbon_1_idx, carbon_2_idx, Chem.BondType.SINGLE)
|
|
return
|
|
|
|
if side_chain == "exocyclic_alkene":
|
|
carbon_1_idx = rwmol.AddAtom(Chem.Atom("C"))
|
|
carbon_2_idx = rwmol.AddAtom(Chem.Atom("C"))
|
|
rwmol.AddBond(ring_atom_idx, carbon_1_idx, Chem.BondType.DOUBLE)
|
|
rwmol.AddBond(carbon_1_idx, carbon_2_idx, Chem.BondType.SINGLE)
|
|
return
|
|
|
|
raise ValueError(f"Unsupported side chain: {side_chain}")
|