from __future__ import annotations from dataclasses import dataclass from typing import Mapping from rdkit import Chem @dataclass(frozen=True) class BuiltMacrolactone: mol: Chem.Mol smiles: str position_to_atom: dict[int, int] def build_macrolactone( ring_size: int, side_chains: Mapping[int, str] | None = None, ring_atom_symbols: Mapping[int, str] | None = None, ) -> BuiltMacrolactone: if not 12 <= ring_size <= 20: raise ValueError("ring_size must be between 12 and 20") side_chains = dict(side_chains or {}) ring_atom_symbols = dict(ring_atom_symbols or {}) rwmol = Chem.RWMol() position_to_atom: dict[int, int] = { 1: rwmol.AddAtom(Chem.Atom("C")), 2: rwmol.AddAtom(Chem.Atom("O")), } for position in range(3, ring_size + 1): position_to_atom[position] = rwmol.AddAtom(Chem.Atom(ring_atom_symbols.get(position, "C"))) carbonyl_oxygen_idx = rwmol.AddAtom(Chem.Atom("O")) rwmol.AddBond(position_to_atom[1], position_to_atom[2], Chem.BondType.SINGLE) for position in range(2, ring_size): rwmol.AddBond( position_to_atom[position], position_to_atom[position + 1], Chem.BondType.SINGLE, ) rwmol.AddBond(position_to_atom[ring_size], position_to_atom[1], Chem.BondType.SINGLE) rwmol.AddBond(position_to_atom[1], carbonyl_oxygen_idx, Chem.BondType.DOUBLE) for position, side_chain in side_chains.items(): if position not in position_to_atom: raise ValueError(f"Invalid ring position: {position}") _add_side_chain(rwmol, position_to_atom[position], side_chain) mol = rwmol.GetMol() Chem.SanitizeMol(mol) return BuiltMacrolactone( mol=mol, smiles=Chem.MolToSmiles(mol, isomericSmiles=True), position_to_atom=position_to_atom, ) def build_ambiguous_smiles() -> str: mol_12 = build_macrolactone(12).mol mol_14 = build_macrolactone(14).mol combined = Chem.CombineMols(mol_12, mol_14) return Chem.MolToSmiles(combined, isomericSmiles=True) def build_non_standard_ring_atom_macrolactone( ring_size: int = 16, hetero_position: int = 5, atom_symbol: str = "N", ) -> BuiltMacrolactone: if hetero_position < 3 or hetero_position > ring_size: raise ValueError("hetero_position must be between 3 and ring_size") return build_macrolactone( ring_size=ring_size, ring_atom_symbols={hetero_position: atom_symbol}, ) def build_overlapping_candidate_macrolactone() -> BuiltMacrolactone: rwmol = Chem.RWMol() atom_labels = ( "A1", "A2", "S1", "S2", "S3", "S4", "A5", "A6", "A7", "A8", "A9", "A10", "B1", "B2", "B5", "B6", "B7", "B8", "B9", "B10", "AO", "BO", ) atom_symbols = { "A1": "C", "A2": "O", "S1": "C", "S2": "C", "S3": "C", "S4": "C", "A5": "C", "A6": "C", "A7": "C", "A8": "C", "A9": "C", "A10": "C", "B1": "C", "B2": "O", "B5": "C", "B6": "C", "B7": "C", "B8": "C", "B9": "C", "B10": "C", "AO": "O", "BO": "O", } atoms = {label: rwmol.AddAtom(Chem.Atom(atom_symbols[label])) for label in atom_labels} for atom_a, atom_b in ( ("A1", "A2"), ("A2", "S1"), ("S1", "S2"), ("S2", "S3"), ("S3", "S4"), ("S4", "A5"), ("A5", "A6"), ("A6", "A7"), ("A7", "A8"), ("A8", "A9"), ("A9", "A10"), ("A10", "A1"), ("B1", "B2"), ("B2", "S1"), ("S4", "B5"), ("B5", "B6"), ("B6", "B7"), ("B7", "B8"), ("B8", "B9"), ("B9", "B10"), ("B10", "B1"), ): rwmol.AddBond(atoms[atom_a], atoms[atom_b], Chem.BondType.SINGLE) rwmol.AddBond(atoms["A1"], atoms["AO"], Chem.BondType.DOUBLE) rwmol.AddBond(atoms["B1"], atoms["BO"], Chem.BondType.DOUBLE) mol = rwmol.GetMol() Chem.SanitizeMol(mol) return BuiltMacrolactone( mol=mol, smiles=Chem.MolToSmiles(mol, isomericSmiles=True), position_to_atom={}, ) def canonicalize(smiles_or_mol: str | Chem.Mol) -> str: if isinstance(smiles_or_mol, Chem.Mol): mol = smiles_or_mol else: mol = Chem.MolFromSmiles(smiles_or_mol) if mol is None: raise ValueError(f"Unable to parse SMILES: {smiles_or_mol}") return Chem.MolToSmiles(mol, isomericSmiles=True) def _add_side_chain(rwmol: Chem.RWMol, ring_atom_idx: int, side_chain: str) -> None: if side_chain == "methyl": carbon_idx = rwmol.AddAtom(Chem.Atom("C")) rwmol.AddBond(ring_atom_idx, carbon_idx, Chem.BondType.SINGLE) return if side_chain == "ethyl": carbon_1_idx = rwmol.AddAtom(Chem.Atom("C")) carbon_2_idx = rwmol.AddAtom(Chem.Atom("C")) rwmol.AddBond(ring_atom_idx, carbon_1_idx, Chem.BondType.SINGLE) rwmol.AddBond(carbon_1_idx, carbon_2_idx, Chem.BondType.SINGLE) return if side_chain == "exocyclic_alkene": carbon_1_idx = rwmol.AddAtom(Chem.Atom("C")) carbon_2_idx = rwmol.AddAtom(Chem.Atom("C")) rwmol.AddBond(ring_atom_idx, carbon_1_idx, Chem.BondType.DOUBLE) rwmol.AddBond(carbon_1_idx, carbon_2_idx, Chem.BondType.SINGLE) return raise ValueError(f"Unsupported side chain: {side_chain}")