feat(toolkit): add classification and migration
Implement the standard/non-standard/not-macrolactone classification layer and integrate it into analyzer, fragmenter, and CLI outputs. Port the remaining legacy package capabilities into new visualization and workflow modules, restore batch/statistics/SDF scripts on top of the flat CSV workflow, and update active docs to the new package API.
This commit is contained in:
@@ -8,7 +8,12 @@ from macro_lactone_toolkit import (
|
||||
MacrolactoneFragmenter,
|
||||
)
|
||||
|
||||
from .helpers import build_ambiguous_smiles, build_macrolactone
|
||||
from .helpers import (
|
||||
build_ambiguous_smiles,
|
||||
build_macrolactone,
|
||||
build_non_standard_ring_atom_macrolactone,
|
||||
build_overlapping_candidate_macrolactone,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ring_size", [12, 14, 16, 20])
|
||||
@@ -25,6 +30,77 @@ def test_analyzer_rejects_non_lactone_macrocycle():
|
||||
assert analyzer.get_valid_ring_sizes("C1CCCCCCCCCCC1") == []
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ring_size", [12, 14, 16, 20])
|
||||
def test_analyzer_classifies_supported_ring_sizes(ring_size: int):
|
||||
built = build_macrolactone(ring_size)
|
||||
analyzer = MacroLactoneAnalyzer()
|
||||
|
||||
result = analyzer.classify_macrocycle(built.smiles)
|
||||
|
||||
assert result.classification == "standard_macrolactone"
|
||||
assert result.ring_size == ring_size
|
||||
assert result.primary_reason_code is None
|
||||
assert result.primary_reason_message is None
|
||||
assert result.all_reason_codes == []
|
||||
assert result.all_reason_messages == []
|
||||
assert result.candidate_ring_sizes == [ring_size]
|
||||
|
||||
|
||||
def test_analyzer_classifies_ring_heteroatom_as_non_standard():
|
||||
built = build_non_standard_ring_atom_macrolactone()
|
||||
analyzer = MacroLactoneAnalyzer()
|
||||
|
||||
result = analyzer.classify_macrocycle(built.smiles)
|
||||
|
||||
assert result.classification == "non_standard_macrocycle"
|
||||
assert result.ring_size == 16
|
||||
assert result.primary_reason_code == "contains_non_carbon_ring_atoms_outside_positions_1_2"
|
||||
assert result.primary_reason_message == "Ring positions 3..N contain non-carbon atoms."
|
||||
assert result.all_reason_codes == ["contains_non_carbon_ring_atoms_outside_positions_1_2"]
|
||||
assert result.candidate_ring_sizes == [16]
|
||||
|
||||
|
||||
def test_analyzer_classifies_overlapping_candidates_as_non_standard():
|
||||
built = build_overlapping_candidate_macrolactone()
|
||||
analyzer = MacroLactoneAnalyzer()
|
||||
|
||||
result = analyzer.classify_macrocycle(built.smiles)
|
||||
|
||||
assert result.classification == "non_standard_macrocycle"
|
||||
assert result.ring_size == 12
|
||||
assert result.primary_reason_code == "multiple_overlapping_macrocycle_candidates"
|
||||
assert result.primary_reason_message == "Overlapping macrolactone candidate rings were detected."
|
||||
assert result.all_reason_codes == ["multiple_overlapping_macrocycle_candidates"]
|
||||
assert result.candidate_ring_sizes == [12]
|
||||
|
||||
|
||||
def test_analyzer_classifies_non_lactone_macrocycle():
|
||||
analyzer = MacroLactoneAnalyzer()
|
||||
|
||||
result = analyzer.classify_macrocycle("C1CCCCCCCCCCC1")
|
||||
|
||||
assert result.classification == "not_macrolactone"
|
||||
assert result.ring_size is None
|
||||
assert result.primary_reason_code == "no_lactone_ring_in_12_to_20_range"
|
||||
assert result.primary_reason_message == "No 12-20 membered lactone ring was detected."
|
||||
assert result.all_reason_codes == ["no_lactone_ring_in_12_to_20_range"]
|
||||
assert result.candidate_ring_sizes == []
|
||||
|
||||
|
||||
def test_analyzer_explicit_ring_size_miss_returns_requested_ring_not_found():
|
||||
built = build_macrolactone(12)
|
||||
analyzer = MacroLactoneAnalyzer()
|
||||
|
||||
result = analyzer.classify_macrocycle(built.smiles, ring_size=16)
|
||||
|
||||
assert result.classification == "not_macrolactone"
|
||||
assert result.ring_size is None
|
||||
assert result.primary_reason_code == "requested_ring_size_not_found"
|
||||
assert result.primary_reason_message == "The requested ring size was not detected as a lactone ring."
|
||||
assert result.all_reason_codes == ["requested_ring_size_not_found"]
|
||||
assert result.candidate_ring_sizes == []
|
||||
|
||||
|
||||
def test_fragmenter_auto_numbers_ring_with_expected_positions():
|
||||
built = build_macrolactone(16, {5: "methyl"})
|
||||
result = MacrolactoneFragmenter().number_molecule(built.mol)
|
||||
@@ -55,10 +131,35 @@ def test_fragmenter_requires_explicit_ring_size_for_ambiguous_molecule():
|
||||
|
||||
|
||||
def test_fragmenter_raises_for_missing_macrolactone():
|
||||
with pytest.raises(MacrolactoneDetectionError):
|
||||
with pytest.raises(
|
||||
MacrolactoneDetectionError,
|
||||
match="classification=not_macrolactone primary_reason_code=no_lactone_ring_in_12_to_20_range",
|
||||
):
|
||||
MacrolactoneFragmenter().number_molecule("CCO")
|
||||
|
||||
|
||||
def test_fragmenter_rejects_non_standard_macrocycle_with_reason_code():
|
||||
built = build_non_standard_ring_atom_macrolactone()
|
||||
|
||||
with pytest.raises(
|
||||
MacrolactoneDetectionError,
|
||||
match="classification=non_standard_macrocycle "
|
||||
"primary_reason_code=contains_non_carbon_ring_atoms_outside_positions_1_2",
|
||||
):
|
||||
MacrolactoneFragmenter().number_molecule(built.smiles)
|
||||
|
||||
|
||||
def test_fragmenter_rejects_non_standard_macrocycle_during_fragmentation():
|
||||
built = build_overlapping_candidate_macrolactone()
|
||||
|
||||
with pytest.raises(
|
||||
MacrolactoneDetectionError,
|
||||
match="classification=non_standard_macrocycle "
|
||||
"primary_reason_code=multiple_overlapping_macrocycle_candidates",
|
||||
):
|
||||
MacrolactoneFragmenter().fragment_molecule(built.smiles)
|
||||
|
||||
|
||||
def test_explicit_ring_size_selects_requested_ring():
|
||||
built = build_macrolactone(14)
|
||||
result = MacrolactoneFragmenter(ring_size=14).number_molecule(built.smiles)
|
||||
|
||||
Reference in New Issue
Block a user