feat(toolkit): add classification and migration

Implement the standard/non-standard/not-macrolactone classification layer
and integrate it into analyzer, fragmenter, and CLI outputs.

Port the remaining legacy package capabilities into new visualization and
workflow modules, restore batch/statistics/SDF scripts on top of the flat
CSV workflow, and update active docs to the new package API.
This commit is contained in:
2026-03-18 23:56:41 +08:00
parent 9ccbcfcd04
commit c0ead42384
24 changed files with 1497 additions and 313 deletions

View File

@@ -8,7 +8,12 @@ from macro_lactone_toolkit import (
MacrolactoneFragmenter,
)
from .helpers import build_ambiguous_smiles, build_macrolactone
from .helpers import (
build_ambiguous_smiles,
build_macrolactone,
build_non_standard_ring_atom_macrolactone,
build_overlapping_candidate_macrolactone,
)
@pytest.mark.parametrize("ring_size", [12, 14, 16, 20])
@@ -25,6 +30,77 @@ def test_analyzer_rejects_non_lactone_macrocycle():
assert analyzer.get_valid_ring_sizes("C1CCCCCCCCCCC1") == []
@pytest.mark.parametrize("ring_size", [12, 14, 16, 20])
def test_analyzer_classifies_supported_ring_sizes(ring_size: int):
built = build_macrolactone(ring_size)
analyzer = MacroLactoneAnalyzer()
result = analyzer.classify_macrocycle(built.smiles)
assert result.classification == "standard_macrolactone"
assert result.ring_size == ring_size
assert result.primary_reason_code is None
assert result.primary_reason_message is None
assert result.all_reason_codes == []
assert result.all_reason_messages == []
assert result.candidate_ring_sizes == [ring_size]
def test_analyzer_classifies_ring_heteroatom_as_non_standard():
built = build_non_standard_ring_atom_macrolactone()
analyzer = MacroLactoneAnalyzer()
result = analyzer.classify_macrocycle(built.smiles)
assert result.classification == "non_standard_macrocycle"
assert result.ring_size == 16
assert result.primary_reason_code == "contains_non_carbon_ring_atoms_outside_positions_1_2"
assert result.primary_reason_message == "Ring positions 3..N contain non-carbon atoms."
assert result.all_reason_codes == ["contains_non_carbon_ring_atoms_outside_positions_1_2"]
assert result.candidate_ring_sizes == [16]
def test_analyzer_classifies_overlapping_candidates_as_non_standard():
built = build_overlapping_candidate_macrolactone()
analyzer = MacroLactoneAnalyzer()
result = analyzer.classify_macrocycle(built.smiles)
assert result.classification == "non_standard_macrocycle"
assert result.ring_size == 12
assert result.primary_reason_code == "multiple_overlapping_macrocycle_candidates"
assert result.primary_reason_message == "Overlapping macrolactone candidate rings were detected."
assert result.all_reason_codes == ["multiple_overlapping_macrocycle_candidates"]
assert result.candidate_ring_sizes == [12]
def test_analyzer_classifies_non_lactone_macrocycle():
analyzer = MacroLactoneAnalyzer()
result = analyzer.classify_macrocycle("C1CCCCCCCCCCC1")
assert result.classification == "not_macrolactone"
assert result.ring_size is None
assert result.primary_reason_code == "no_lactone_ring_in_12_to_20_range"
assert result.primary_reason_message == "No 12-20 membered lactone ring was detected."
assert result.all_reason_codes == ["no_lactone_ring_in_12_to_20_range"]
assert result.candidate_ring_sizes == []
def test_analyzer_explicit_ring_size_miss_returns_requested_ring_not_found():
built = build_macrolactone(12)
analyzer = MacroLactoneAnalyzer()
result = analyzer.classify_macrocycle(built.smiles, ring_size=16)
assert result.classification == "not_macrolactone"
assert result.ring_size is None
assert result.primary_reason_code == "requested_ring_size_not_found"
assert result.primary_reason_message == "The requested ring size was not detected as a lactone ring."
assert result.all_reason_codes == ["requested_ring_size_not_found"]
assert result.candidate_ring_sizes == []
def test_fragmenter_auto_numbers_ring_with_expected_positions():
built = build_macrolactone(16, {5: "methyl"})
result = MacrolactoneFragmenter().number_molecule(built.mol)
@@ -55,10 +131,35 @@ def test_fragmenter_requires_explicit_ring_size_for_ambiguous_molecule():
def test_fragmenter_raises_for_missing_macrolactone():
with pytest.raises(MacrolactoneDetectionError):
with pytest.raises(
MacrolactoneDetectionError,
match="classification=not_macrolactone primary_reason_code=no_lactone_ring_in_12_to_20_range",
):
MacrolactoneFragmenter().number_molecule("CCO")
def test_fragmenter_rejects_non_standard_macrocycle_with_reason_code():
built = build_non_standard_ring_atom_macrolactone()
with pytest.raises(
MacrolactoneDetectionError,
match="classification=non_standard_macrocycle "
"primary_reason_code=contains_non_carbon_ring_atoms_outside_positions_1_2",
):
MacrolactoneFragmenter().number_molecule(built.smiles)
def test_fragmenter_rejects_non_standard_macrocycle_during_fragmentation():
built = build_overlapping_candidate_macrolactone()
with pytest.raises(
MacrolactoneDetectionError,
match="classification=non_standard_macrocycle "
"primary_reason_code=multiple_overlapping_macrocycle_candidates",
):
MacrolactoneFragmenter().fragment_molecule(built.smiles)
def test_explicit_ring_size_selects_requested_ring():
built = build_macrolactone(14)
result = MacrolactoneFragmenter(ring_size=14).number_molecule(built.smiles)