from __future__ import annotations from dataclasses import dataclass from typing import Mapping from rdkit import Chem @dataclass(frozen=True) class BuiltMacrolactone: mol: Chem.Mol smiles: str position_to_atom: dict[int, int] def build_macrolactone( ring_size: int, side_chains: Mapping[int, str] | None = None, ) -> BuiltMacrolactone: if not 12 <= ring_size <= 20: raise ValueError("ring_size must be between 12 and 20") side_chains = dict(side_chains or {}) rwmol = Chem.RWMol() position_to_atom: dict[int, int] = { 1: rwmol.AddAtom(Chem.Atom("C")), 2: rwmol.AddAtom(Chem.Atom("O")), } for position in range(3, ring_size + 1): position_to_atom[position] = rwmol.AddAtom(Chem.Atom("C")) carbonyl_oxygen_idx = rwmol.AddAtom(Chem.Atom("O")) rwmol.AddBond(position_to_atom[1], position_to_atom[2], Chem.BondType.SINGLE) for position in range(2, ring_size): rwmol.AddBond( position_to_atom[position], position_to_atom[position + 1], Chem.BondType.SINGLE, ) rwmol.AddBond(position_to_atom[ring_size], position_to_atom[1], Chem.BondType.SINGLE) rwmol.AddBond(position_to_atom[1], carbonyl_oxygen_idx, Chem.BondType.DOUBLE) for position, side_chain in side_chains.items(): if position not in position_to_atom: raise ValueError(f"Invalid ring position: {position}") _add_side_chain(rwmol, position_to_atom[position], side_chain) mol = rwmol.GetMol() Chem.SanitizeMol(mol) return BuiltMacrolactone( mol=mol, smiles=Chem.MolToSmiles(mol, isomericSmiles=True), position_to_atom=position_to_atom, ) def build_ambiguous_smiles() -> str: mol_12 = build_macrolactone(12).mol mol_14 = build_macrolactone(14).mol combined = Chem.CombineMols(mol_12, mol_14) return Chem.MolToSmiles(combined, isomericSmiles=True) def canonicalize(smiles_or_mol: str | Chem.Mol) -> str: if isinstance(smiles_or_mol, Chem.Mol): mol = smiles_or_mol else: mol = Chem.MolFromSmiles(smiles_or_mol) if mol is None: raise ValueError(f"Unable to parse SMILES: {smiles_or_mol}") return Chem.MolToSmiles(mol, isomericSmiles=True) def _add_side_chain(rwmol: Chem.RWMol, ring_atom_idx: int, side_chain: str) -> None: if side_chain == "methyl": carbon_idx = rwmol.AddAtom(Chem.Atom("C")) rwmol.AddBond(ring_atom_idx, carbon_idx, Chem.BondType.SINGLE) return if side_chain == "ethyl": carbon_1_idx = rwmol.AddAtom(Chem.Atom("C")) carbon_2_idx = rwmol.AddAtom(Chem.Atom("C")) rwmol.AddBond(ring_atom_idx, carbon_1_idx, Chem.BondType.SINGLE) rwmol.AddBond(carbon_1_idx, carbon_2_idx, Chem.BondType.SINGLE) return if side_chain == "exocyclic_alkene": carbon_1_idx = rwmol.AddAtom(Chem.Atom("C")) carbon_2_idx = rwmol.AddAtom(Chem.Atom("C")) rwmol.AddBond(ring_atom_idx, carbon_1_idx, Chem.BondType.DOUBLE) rwmol.AddBond(carbon_1_idx, carbon_2_idx, Chem.BondType.SINGLE) return raise ValueError(f"Unsupported side chain: {side_chain}")