Files
macrolactone-toolkit/tests/test_fragmentation.py
lingyuzeng 46a438dd36 feat(validation): enforce single-anchor fragments
- skip fused/shared/multi-anchor side systems during extraction
- add fragment library schema and fragment_library.csv export
- make scaffold prep strict for non-spliceable positions
2026-03-19 14:20:32 +08:00

80 lines
3.2 KiB
Python

from rdkit import Chem
from macro_lactone_toolkit import MacrolactoneFragmenter
from .helpers import (
build_macrolactone,
build_macrolactone_with_fused_side_ring,
build_macrolactone_with_shared_atom_side_ring,
build_macrolactone_with_single_anchor_side_ring,
)
def test_fragmentation_returns_empty_list_without_sidechains():
built = build_macrolactone(12)
result = MacrolactoneFragmenter().fragment_molecule(built.smiles, parent_id="plain")
assert result.fragments == []
def test_fragmentation_emits_labeled_and_plain_smiles_round_trip():
built = build_macrolactone(16, {5: "ethyl", 8: "methyl"})
result = MacrolactoneFragmenter().fragment_molecule(built.smiles, parent_id="mol_001")
assert result.parent_id == "mol_001"
assert result.ring_size == 16
assert {fragment.cleavage_position for fragment in result.fragments} == {5, 8}
for fragment in result.fragments:
labeled = Chem.MolFromSmiles(fragment.fragment_smiles_labeled)
plain = Chem.MolFromSmiles(fragment.fragment_smiles_plain)
assert labeled is not None
assert plain is not None
assert Chem.MolToSmiles(labeled, isomericSmiles=True)
assert Chem.MolToSmiles(plain, isomericSmiles=True)
assert any(
atom.GetAtomicNum() == 0 and atom.GetIsotope() == fragment.cleavage_position
for atom in labeled.GetAtoms()
)
assert any(
atom.GetAtomicNum() == 0 and atom.GetIsotope() == 0
for atom in plain.GetAtoms()
)
def test_fragmentation_preserves_attachment_bond_type():
built = build_macrolactone(16, {6: "exocyclic_alkene"})
result = MacrolactoneFragmenter().fragment_molecule(built.smiles, parent_id="bond_type")
fragment = next(fragment for fragment in result.fragments if fragment.cleavage_position == 6)
labeled = Chem.MolFromSmiles(fragment.fragment_smiles_labeled)
plain = Chem.MolFromSmiles(fragment.fragment_smiles_plain)
for mol in (labeled, plain):
dummy_atom = next(atom for atom in mol.GetAtoms() if atom.GetAtomicNum() == 0)
neighbor = dummy_atom.GetNeighbors()[0]
bond = mol.GetBondBetweenAtoms(dummy_atom.GetIdx(), neighbor.GetIdx())
assert bond.GetBondType() == Chem.BondType.DOUBLE
def test_fragmentation_skips_fused_side_ring_but_keeps_single_anchor_sidechains():
built = build_macrolactone_with_fused_side_ring(side_chains={10: "methyl"})
result = MacrolactoneFragmenter().fragment_molecule(built.smiles, parent_id="fused")
assert {fragment.cleavage_position for fragment in result.fragments} == {10}
def test_fragmentation_skips_shared_atom_multi_anchor_component():
built = build_macrolactone_with_shared_atom_side_ring(side_chains={11: "ethyl"})
result = MacrolactoneFragmenter().fragment_molecule(built.smiles, parent_id="shared_atom")
assert {fragment.cleavage_position for fragment in result.fragments} == {11}
def test_fragmentation_allows_single_anchor_side_ring():
built = build_macrolactone_with_single_anchor_side_ring()
result = MacrolactoneFragmenter().fragment_molecule(built.smiles, parent_id="single_anchor_ring")
assert {fragment.cleavage_position for fragment in result.fragments} == {5}