feat(toolkit): add classification and migration

Implement the standard/non-standard/not-macrolactone classification layer
and integrate it into analyzer, fragmenter, and CLI outputs.

Port the remaining legacy package capabilities into new visualization and
workflow modules, restore batch/statistics/SDF scripts on top of the flat
CSV workflow, and update active docs to the new package API.
This commit is contained in:
2026-03-18 23:56:41 +08:00
parent 9ccbcfcd04
commit c0ead42384
24 changed files with 1497 additions and 313 deletions

View File

@@ -16,11 +16,13 @@ class BuiltMacrolactone:
def build_macrolactone(
ring_size: int,
side_chains: Mapping[int, str] | None = None,
ring_atom_symbols: Mapping[int, str] | None = None,
) -> BuiltMacrolactone:
if not 12 <= ring_size <= 20:
raise ValueError("ring_size must be between 12 and 20")
side_chains = dict(side_chains or {})
ring_atom_symbols = dict(ring_atom_symbols or {})
rwmol = Chem.RWMol()
position_to_atom: dict[int, int] = {
@@ -28,7 +30,7 @@ def build_macrolactone(
2: rwmol.AddAtom(Chem.Atom("O")),
}
for position in range(3, ring_size + 1):
position_to_atom[position] = rwmol.AddAtom(Chem.Atom("C"))
position_to_atom[position] = rwmol.AddAtom(Chem.Atom(ring_atom_symbols.get(position, "C")))
carbonyl_oxygen_idx = rwmol.AddAtom(Chem.Atom("O"))
@@ -63,6 +65,109 @@ def build_ambiguous_smiles() -> str:
return Chem.MolToSmiles(combined, isomericSmiles=True)
def build_non_standard_ring_atom_macrolactone(
ring_size: int = 16,
hetero_position: int = 5,
atom_symbol: str = "N",
) -> BuiltMacrolactone:
if hetero_position < 3 or hetero_position > ring_size:
raise ValueError("hetero_position must be between 3 and ring_size")
return build_macrolactone(
ring_size=ring_size,
ring_atom_symbols={hetero_position: atom_symbol},
)
def build_overlapping_candidate_macrolactone() -> BuiltMacrolactone:
rwmol = Chem.RWMol()
atom_labels = (
"A1",
"A2",
"S1",
"S2",
"S3",
"S4",
"A5",
"A6",
"A7",
"A8",
"A9",
"A10",
"B1",
"B2",
"B5",
"B6",
"B7",
"B8",
"B9",
"B10",
"AO",
"BO",
)
atom_symbols = {
"A1": "C",
"A2": "O",
"S1": "C",
"S2": "C",
"S3": "C",
"S4": "C",
"A5": "C",
"A6": "C",
"A7": "C",
"A8": "C",
"A9": "C",
"A10": "C",
"B1": "C",
"B2": "O",
"B5": "C",
"B6": "C",
"B7": "C",
"B8": "C",
"B9": "C",
"B10": "C",
"AO": "O",
"BO": "O",
}
atoms = {label: rwmol.AddAtom(Chem.Atom(atom_symbols[label])) for label in atom_labels}
for atom_a, atom_b in (
("A1", "A2"),
("A2", "S1"),
("S1", "S2"),
("S2", "S3"),
("S3", "S4"),
("S4", "A5"),
("A5", "A6"),
("A6", "A7"),
("A7", "A8"),
("A8", "A9"),
("A9", "A10"),
("A10", "A1"),
("B1", "B2"),
("B2", "S1"),
("S4", "B5"),
("B5", "B6"),
("B6", "B7"),
("B7", "B8"),
("B8", "B9"),
("B9", "B10"),
("B10", "B1"),
):
rwmol.AddBond(atoms[atom_a], atoms[atom_b], Chem.BondType.SINGLE)
rwmol.AddBond(atoms["A1"], atoms["AO"], Chem.BondType.DOUBLE)
rwmol.AddBond(atoms["B1"], atoms["BO"], Chem.BondType.DOUBLE)
mol = rwmol.GetMol()
Chem.SanitizeMol(mol)
return BuiltMacrolactone(
mol=mol,
smiles=Chem.MolToSmiles(mol, isomericSmiles=True),
position_to_atom={},
)
def canonicalize(smiles_or_mol: str | Chem.Mol) -> str:
if isinstance(smiles_or_mol, Chem.Mol):
mol = smiles_or_mol