feat(toolkit): add classification and migration
Implement the standard/non-standard/not-macrolactone classification layer and integrate it into analyzer, fragmenter, and CLI outputs. Port the remaining legacy package capabilities into new visualization and workflow modules, restore batch/statistics/SDF scripts on top of the flat CSV workflow, and update active docs to the new package API.
This commit is contained in:
107
tests/helpers.py
107
tests/helpers.py
@@ -16,11 +16,13 @@ class BuiltMacrolactone:
|
||||
def build_macrolactone(
|
||||
ring_size: int,
|
||||
side_chains: Mapping[int, str] | None = None,
|
||||
ring_atom_symbols: Mapping[int, str] | None = None,
|
||||
) -> BuiltMacrolactone:
|
||||
if not 12 <= ring_size <= 20:
|
||||
raise ValueError("ring_size must be between 12 and 20")
|
||||
|
||||
side_chains = dict(side_chains or {})
|
||||
ring_atom_symbols = dict(ring_atom_symbols or {})
|
||||
rwmol = Chem.RWMol()
|
||||
|
||||
position_to_atom: dict[int, int] = {
|
||||
@@ -28,7 +30,7 @@ def build_macrolactone(
|
||||
2: rwmol.AddAtom(Chem.Atom("O")),
|
||||
}
|
||||
for position in range(3, ring_size + 1):
|
||||
position_to_atom[position] = rwmol.AddAtom(Chem.Atom("C"))
|
||||
position_to_atom[position] = rwmol.AddAtom(Chem.Atom(ring_atom_symbols.get(position, "C")))
|
||||
|
||||
carbonyl_oxygen_idx = rwmol.AddAtom(Chem.Atom("O"))
|
||||
|
||||
@@ -63,6 +65,109 @@ def build_ambiguous_smiles() -> str:
|
||||
return Chem.MolToSmiles(combined, isomericSmiles=True)
|
||||
|
||||
|
||||
def build_non_standard_ring_atom_macrolactone(
|
||||
ring_size: int = 16,
|
||||
hetero_position: int = 5,
|
||||
atom_symbol: str = "N",
|
||||
) -> BuiltMacrolactone:
|
||||
if hetero_position < 3 or hetero_position > ring_size:
|
||||
raise ValueError("hetero_position must be between 3 and ring_size")
|
||||
return build_macrolactone(
|
||||
ring_size=ring_size,
|
||||
ring_atom_symbols={hetero_position: atom_symbol},
|
||||
)
|
||||
|
||||
|
||||
def build_overlapping_candidate_macrolactone() -> BuiltMacrolactone:
|
||||
rwmol = Chem.RWMol()
|
||||
|
||||
atom_labels = (
|
||||
"A1",
|
||||
"A2",
|
||||
"S1",
|
||||
"S2",
|
||||
"S3",
|
||||
"S4",
|
||||
"A5",
|
||||
"A6",
|
||||
"A7",
|
||||
"A8",
|
||||
"A9",
|
||||
"A10",
|
||||
"B1",
|
||||
"B2",
|
||||
"B5",
|
||||
"B6",
|
||||
"B7",
|
||||
"B8",
|
||||
"B9",
|
||||
"B10",
|
||||
"AO",
|
||||
"BO",
|
||||
)
|
||||
atom_symbols = {
|
||||
"A1": "C",
|
||||
"A2": "O",
|
||||
"S1": "C",
|
||||
"S2": "C",
|
||||
"S3": "C",
|
||||
"S4": "C",
|
||||
"A5": "C",
|
||||
"A6": "C",
|
||||
"A7": "C",
|
||||
"A8": "C",
|
||||
"A9": "C",
|
||||
"A10": "C",
|
||||
"B1": "C",
|
||||
"B2": "O",
|
||||
"B5": "C",
|
||||
"B6": "C",
|
||||
"B7": "C",
|
||||
"B8": "C",
|
||||
"B9": "C",
|
||||
"B10": "C",
|
||||
"AO": "O",
|
||||
"BO": "O",
|
||||
}
|
||||
atoms = {label: rwmol.AddAtom(Chem.Atom(atom_symbols[label])) for label in atom_labels}
|
||||
|
||||
for atom_a, atom_b in (
|
||||
("A1", "A2"),
|
||||
("A2", "S1"),
|
||||
("S1", "S2"),
|
||||
("S2", "S3"),
|
||||
("S3", "S4"),
|
||||
("S4", "A5"),
|
||||
("A5", "A6"),
|
||||
("A6", "A7"),
|
||||
("A7", "A8"),
|
||||
("A8", "A9"),
|
||||
("A9", "A10"),
|
||||
("A10", "A1"),
|
||||
("B1", "B2"),
|
||||
("B2", "S1"),
|
||||
("S4", "B5"),
|
||||
("B5", "B6"),
|
||||
("B6", "B7"),
|
||||
("B7", "B8"),
|
||||
("B8", "B9"),
|
||||
("B9", "B10"),
|
||||
("B10", "B1"),
|
||||
):
|
||||
rwmol.AddBond(atoms[atom_a], atoms[atom_b], Chem.BondType.SINGLE)
|
||||
|
||||
rwmol.AddBond(atoms["A1"], atoms["AO"], Chem.BondType.DOUBLE)
|
||||
rwmol.AddBond(atoms["B1"], atoms["BO"], Chem.BondType.DOUBLE)
|
||||
|
||||
mol = rwmol.GetMol()
|
||||
Chem.SanitizeMol(mol)
|
||||
return BuiltMacrolactone(
|
||||
mol=mol,
|
||||
smiles=Chem.MolToSmiles(mol, isomericSmiles=True),
|
||||
position_to_atom={},
|
||||
)
|
||||
|
||||
|
||||
def canonicalize(smiles_or_mol: str | Chem.Mol) -> str:
|
||||
if isinstance(smiles_or_mol, Chem.Mol):
|
||||
mol = smiles_or_mol
|
||||
|
||||
Reference in New Issue
Block a user