feat(toolkit): add classification and migration

Implement the standard/non-standard/not-macrolactone classification layer and integrate it into analyzer, fragmenter, and CLI outputs. Port the remaining legacy package capabilities into new visualization and workflow modules, restore batch/statistics/SDF scripts on top of the flat CSV workflow, and update active docs to the new package API.
2026-03-18 23:56:41 +08:00
parent 9ccbcfcd04
commit c0ead42384
24 changed files with 1497 additions and 313 deletions
--- a/tests/helpers.py
+++ b/tests/helpers.py
@@ -16,11 +16,13 @@ class BuiltMacrolactone:
 def build_macrolactone(
    ring_size: int,
    side_chains: Mapping[int, str] | None = None,
+    ring_atom_symbols: Mapping[int, str] | None = None,
 ) -> BuiltMacrolactone:
    if not 12 <= ring_size <= 20:
        raise ValueError("ring_size must be between 12 and 20")

    side_chains = dict(side_chains or {})
+    ring_atom_symbols = dict(ring_atom_symbols or {})
    rwmol = Chem.RWMol()

    position_to_atom: dict[int, int] = {
@@ -28,7 +30,7 @@ def build_macrolactone(
        2: rwmol.AddAtom(Chem.Atom("O")),
    }
    for position in range(3, ring_size + 1):
-        position_to_atom[position] = rwmol.AddAtom(Chem.Atom("C"))
+        position_to_atom[position] = rwmol.AddAtom(Chem.Atom(ring_atom_symbols.get(position, "C")))

    carbonyl_oxygen_idx = rwmol.AddAtom(Chem.Atom("O"))

@@ -63,6 +65,109 @@ def build_ambiguous_smiles() -> str:
    return Chem.MolToSmiles(combined, isomericSmiles=True)


+def build_non_standard_ring_atom_macrolactone(
+    ring_size: int = 16,
+    hetero_position: int = 5,
+    atom_symbol: str = "N",
+) -> BuiltMacrolactone:
+    if hetero_position < 3 or hetero_position > ring_size:
+        raise ValueError("hetero_position must be between 3 and ring_size")
+    return build_macrolactone(
+        ring_size=ring_size,
+        ring_atom_symbols={hetero_position: atom_symbol},
+    )
+
+
+def build_overlapping_candidate_macrolactone() -> BuiltMacrolactone:
+    rwmol = Chem.RWMol()
+
+    atom_labels = (
+        "A1",
+        "A2",
+        "S1",
+        "S2",
+        "S3",
+        "S4",
+        "A5",
+        "A6",
+        "A7",
+        "A8",
+        "A9",
+        "A10",
+        "B1",
+        "B2",
+        "B5",
+        "B6",
+        "B7",
+        "B8",
+        "B9",
+        "B10",
+        "AO",
+        "BO",
+    )
+    atom_symbols = {
+        "A1": "C",
+        "A2": "O",
+        "S1": "C",
+        "S2": "C",
+        "S3": "C",
+        "S4": "C",
+        "A5": "C",
+        "A6": "C",
+        "A7": "C",
+        "A8": "C",
+        "A9": "C",
+        "A10": "C",
+        "B1": "C",
+        "B2": "O",
+        "B5": "C",
+        "B6": "C",
+        "B7": "C",
+        "B8": "C",
+        "B9": "C",
+        "B10": "C",
+        "AO": "O",
+        "BO": "O",
+    }
+    atoms = {label: rwmol.AddAtom(Chem.Atom(atom_symbols[label])) for label in atom_labels}
+
+    for atom_a, atom_b in (
+        ("A1", "A2"),
+        ("A2", "S1"),
+        ("S1", "S2"),
+        ("S2", "S3"),
+        ("S3", "S4"),
+        ("S4", "A5"),
+        ("A5", "A6"),
+        ("A6", "A7"),
+        ("A7", "A8"),
+        ("A8", "A9"),
+        ("A9", "A10"),
+        ("A10", "A1"),
+        ("B1", "B2"),
+        ("B2", "S1"),
+        ("S4", "B5"),
+        ("B5", "B6"),
+        ("B6", "B7"),
+        ("B7", "B8"),
+        ("B8", "B9"),
+        ("B9", "B10"),
+        ("B10", "B1"),
+    ):
+        rwmol.AddBond(atoms[atom_a], atoms[atom_b], Chem.BondType.SINGLE)
+
+    rwmol.AddBond(atoms["A1"], atoms["AO"], Chem.BondType.DOUBLE)
+    rwmol.AddBond(atoms["B1"], atoms["BO"], Chem.BondType.DOUBLE)
+
+    mol = rwmol.GetMol()
+    Chem.SanitizeMol(mol)
+    return BuiltMacrolactone(
+        mol=mol,
+        smiles=Chem.MolToSmiles(mol, isomericSmiles=True),
+        position_to_atom={},
+    )
+
+
 def canonicalize(smiles_or_mol: str | Chem.Mol) -> str:
    if isinstance(smiles_or_mol, Chem.Mol):
        mol = smiles_or_mol