feat(toolkit): ship macro_lactone_toolkit package
Unify macrolactone detection, numbering, fragmentation, and splicing under the installable macro_lactone_toolkit package. - replace legacy src.* modules with the new package layout - add analyze/number/fragment CLI entrypoints and pixi tasks - migrate tests, README, and scripts to the new package API
This commit is contained in:
96
tests/helpers.py
Normal file
96
tests/helpers.py
Normal file
@@ -0,0 +1,96 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Mapping
|
||||
|
||||
from rdkit import Chem
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class BuiltMacrolactone:
|
||||
mol: Chem.Mol
|
||||
smiles: str
|
||||
position_to_atom: dict[int, int]
|
||||
|
||||
|
||||
def build_macrolactone(
|
||||
ring_size: int,
|
||||
side_chains: Mapping[int, str] | None = None,
|
||||
) -> BuiltMacrolactone:
|
||||
if not 12 <= ring_size <= 20:
|
||||
raise ValueError("ring_size must be between 12 and 20")
|
||||
|
||||
side_chains = dict(side_chains or {})
|
||||
rwmol = Chem.RWMol()
|
||||
|
||||
position_to_atom: dict[int, int] = {
|
||||
1: rwmol.AddAtom(Chem.Atom("C")),
|
||||
2: rwmol.AddAtom(Chem.Atom("O")),
|
||||
}
|
||||
for position in range(3, ring_size + 1):
|
||||
position_to_atom[position] = rwmol.AddAtom(Chem.Atom("C"))
|
||||
|
||||
carbonyl_oxygen_idx = rwmol.AddAtom(Chem.Atom("O"))
|
||||
|
||||
rwmol.AddBond(position_to_atom[1], position_to_atom[2], Chem.BondType.SINGLE)
|
||||
for position in range(2, ring_size):
|
||||
rwmol.AddBond(
|
||||
position_to_atom[position],
|
||||
position_to_atom[position + 1],
|
||||
Chem.BondType.SINGLE,
|
||||
)
|
||||
rwmol.AddBond(position_to_atom[ring_size], position_to_atom[1], Chem.BondType.SINGLE)
|
||||
rwmol.AddBond(position_to_atom[1], carbonyl_oxygen_idx, Chem.BondType.DOUBLE)
|
||||
|
||||
for position, side_chain in side_chains.items():
|
||||
if position not in position_to_atom:
|
||||
raise ValueError(f"Invalid ring position: {position}")
|
||||
_add_side_chain(rwmol, position_to_atom[position], side_chain)
|
||||
|
||||
mol = rwmol.GetMol()
|
||||
Chem.SanitizeMol(mol)
|
||||
return BuiltMacrolactone(
|
||||
mol=mol,
|
||||
smiles=Chem.MolToSmiles(mol, isomericSmiles=True),
|
||||
position_to_atom=position_to_atom,
|
||||
)
|
||||
|
||||
|
||||
def build_ambiguous_smiles() -> str:
|
||||
mol_12 = build_macrolactone(12).mol
|
||||
mol_14 = build_macrolactone(14).mol
|
||||
combined = Chem.CombineMols(mol_12, mol_14)
|
||||
return Chem.MolToSmiles(combined, isomericSmiles=True)
|
||||
|
||||
|
||||
def canonicalize(smiles_or_mol: str | Chem.Mol) -> str:
|
||||
if isinstance(smiles_or_mol, Chem.Mol):
|
||||
mol = smiles_or_mol
|
||||
else:
|
||||
mol = Chem.MolFromSmiles(smiles_or_mol)
|
||||
if mol is None:
|
||||
raise ValueError(f"Unable to parse SMILES: {smiles_or_mol}")
|
||||
return Chem.MolToSmiles(mol, isomericSmiles=True)
|
||||
|
||||
|
||||
def _add_side_chain(rwmol: Chem.RWMol, ring_atom_idx: int, side_chain: str) -> None:
|
||||
if side_chain == "methyl":
|
||||
carbon_idx = rwmol.AddAtom(Chem.Atom("C"))
|
||||
rwmol.AddBond(ring_atom_idx, carbon_idx, Chem.BondType.SINGLE)
|
||||
return
|
||||
|
||||
if side_chain == "ethyl":
|
||||
carbon_1_idx = rwmol.AddAtom(Chem.Atom("C"))
|
||||
carbon_2_idx = rwmol.AddAtom(Chem.Atom("C"))
|
||||
rwmol.AddBond(ring_atom_idx, carbon_1_idx, Chem.BondType.SINGLE)
|
||||
rwmol.AddBond(carbon_1_idx, carbon_2_idx, Chem.BondType.SINGLE)
|
||||
return
|
||||
|
||||
if side_chain == "exocyclic_alkene":
|
||||
carbon_1_idx = rwmol.AddAtom(Chem.Atom("C"))
|
||||
carbon_2_idx = rwmol.AddAtom(Chem.Atom("C"))
|
||||
rwmol.AddBond(ring_atom_idx, carbon_1_idx, Chem.BondType.DOUBLE)
|
||||
rwmol.AddBond(carbon_1_idx, carbon_2_idx, Chem.BondType.SINGLE)
|
||||
return
|
||||
|
||||
raise ValueError(f"Unsupported side chain: {side_chain}")
|
||||
Reference in New Issue
Block a user