Files
macro_split/tests/helpers.py
lingyuzeng 5e7b236f31 feat(toolkit): ship macro_lactone_toolkit package
Unify macrolactone detection, numbering, fragmentation, and
splicing under the installable macro_lactone_toolkit package.

- replace legacy src.* modules with the new package layout
- add analyze/number/fragment CLI entrypoints and pixi tasks
- migrate tests, README, and scripts to the new package API
2026-03-18 22:06:45 +08:00

97 lines
3.1 KiB
Python

from __future__ import annotations
from dataclasses import dataclass
from typing import Mapping
from rdkit import Chem
@dataclass(frozen=True)
class BuiltMacrolactone:
mol: Chem.Mol
smiles: str
position_to_atom: dict[int, int]
def build_macrolactone(
ring_size: int,
side_chains: Mapping[int, str] | None = None,
) -> BuiltMacrolactone:
if not 12 <= ring_size <= 20:
raise ValueError("ring_size must be between 12 and 20")
side_chains = dict(side_chains or {})
rwmol = Chem.RWMol()
position_to_atom: dict[int, int] = {
1: rwmol.AddAtom(Chem.Atom("C")),
2: rwmol.AddAtom(Chem.Atom("O")),
}
for position in range(3, ring_size + 1):
position_to_atom[position] = rwmol.AddAtom(Chem.Atom("C"))
carbonyl_oxygen_idx = rwmol.AddAtom(Chem.Atom("O"))
rwmol.AddBond(position_to_atom[1], position_to_atom[2], Chem.BondType.SINGLE)
for position in range(2, ring_size):
rwmol.AddBond(
position_to_atom[position],
position_to_atom[position + 1],
Chem.BondType.SINGLE,
)
rwmol.AddBond(position_to_atom[ring_size], position_to_atom[1], Chem.BondType.SINGLE)
rwmol.AddBond(position_to_atom[1], carbonyl_oxygen_idx, Chem.BondType.DOUBLE)
for position, side_chain in side_chains.items():
if position not in position_to_atom:
raise ValueError(f"Invalid ring position: {position}")
_add_side_chain(rwmol, position_to_atom[position], side_chain)
mol = rwmol.GetMol()
Chem.SanitizeMol(mol)
return BuiltMacrolactone(
mol=mol,
smiles=Chem.MolToSmiles(mol, isomericSmiles=True),
position_to_atom=position_to_atom,
)
def build_ambiguous_smiles() -> str:
mol_12 = build_macrolactone(12).mol
mol_14 = build_macrolactone(14).mol
combined = Chem.CombineMols(mol_12, mol_14)
return Chem.MolToSmiles(combined, isomericSmiles=True)
def canonicalize(smiles_or_mol: str | Chem.Mol) -> str:
if isinstance(smiles_or_mol, Chem.Mol):
mol = smiles_or_mol
else:
mol = Chem.MolFromSmiles(smiles_or_mol)
if mol is None:
raise ValueError(f"Unable to parse SMILES: {smiles_or_mol}")
return Chem.MolToSmiles(mol, isomericSmiles=True)
def _add_side_chain(rwmol: Chem.RWMol, ring_atom_idx: int, side_chain: str) -> None:
if side_chain == "methyl":
carbon_idx = rwmol.AddAtom(Chem.Atom("C"))
rwmol.AddBond(ring_atom_idx, carbon_idx, Chem.BondType.SINGLE)
return
if side_chain == "ethyl":
carbon_1_idx = rwmol.AddAtom(Chem.Atom("C"))
carbon_2_idx = rwmol.AddAtom(Chem.Atom("C"))
rwmol.AddBond(ring_atom_idx, carbon_1_idx, Chem.BondType.SINGLE)
rwmol.AddBond(carbon_1_idx, carbon_2_idx, Chem.BondType.SINGLE)
return
if side_chain == "exocyclic_alkene":
carbon_1_idx = rwmol.AddAtom(Chem.Atom("C"))
carbon_2_idx = rwmol.AddAtom(Chem.Atom("C"))
rwmol.AddBond(ring_atom_idx, carbon_1_idx, Chem.BondType.DOUBLE)
rwmol.AddBond(carbon_1_idx, carbon_2_idx, Chem.BondType.SINGLE)
return
raise ValueError(f"Unsupported side chain: {side_chain}")