Files
macrolactone-toolkit/tests/test_detection_and_numbering.py
lingyuzeng c0ead42384 feat(toolkit): add classification and migration
Implement the standard/non-standard/not-macrolactone classification layer
and integrate it into analyzer, fragmenter, and CLI outputs.

Port the remaining legacy package capabilities into new visualization and
workflow modules, restore batch/statistics/SDF scripts on top of the flat
CSV workflow, and update active docs to the new package API.
2026-03-18 23:56:41 +08:00

175 lines
6.3 KiB
Python

import pytest
from rdkit import Chem
from macro_lactone_toolkit import (
AmbiguousMacrolactoneError,
MacroLactoneAnalyzer,
MacrolactoneDetectionError,
MacrolactoneFragmenter,
)
from .helpers import (
build_ambiguous_smiles,
build_macrolactone,
build_non_standard_ring_atom_macrolactone,
build_overlapping_candidate_macrolactone,
)
@pytest.mark.parametrize("ring_size", [12, 14, 16, 20])
def test_analyzer_detects_supported_ring_sizes(ring_size: int):
built = build_macrolactone(ring_size)
analyzer = MacroLactoneAnalyzer()
assert analyzer.get_valid_ring_sizes(built.smiles) == [ring_size]
def test_analyzer_rejects_non_lactone_macrocycle():
analyzer = MacroLactoneAnalyzer()
assert analyzer.get_valid_ring_sizes("C1CCCCCCCCCCC1") == []
@pytest.mark.parametrize("ring_size", [12, 14, 16, 20])
def test_analyzer_classifies_supported_ring_sizes(ring_size: int):
built = build_macrolactone(ring_size)
analyzer = MacroLactoneAnalyzer()
result = analyzer.classify_macrocycle(built.smiles)
assert result.classification == "standard_macrolactone"
assert result.ring_size == ring_size
assert result.primary_reason_code is None
assert result.primary_reason_message is None
assert result.all_reason_codes == []
assert result.all_reason_messages == []
assert result.candidate_ring_sizes == [ring_size]
def test_analyzer_classifies_ring_heteroatom_as_non_standard():
built = build_non_standard_ring_atom_macrolactone()
analyzer = MacroLactoneAnalyzer()
result = analyzer.classify_macrocycle(built.smiles)
assert result.classification == "non_standard_macrocycle"
assert result.ring_size == 16
assert result.primary_reason_code == "contains_non_carbon_ring_atoms_outside_positions_1_2"
assert result.primary_reason_message == "Ring positions 3..N contain non-carbon atoms."
assert result.all_reason_codes == ["contains_non_carbon_ring_atoms_outside_positions_1_2"]
assert result.candidate_ring_sizes == [16]
def test_analyzer_classifies_overlapping_candidates_as_non_standard():
built = build_overlapping_candidate_macrolactone()
analyzer = MacroLactoneAnalyzer()
result = analyzer.classify_macrocycle(built.smiles)
assert result.classification == "non_standard_macrocycle"
assert result.ring_size == 12
assert result.primary_reason_code == "multiple_overlapping_macrocycle_candidates"
assert result.primary_reason_message == "Overlapping macrolactone candidate rings were detected."
assert result.all_reason_codes == ["multiple_overlapping_macrocycle_candidates"]
assert result.candidate_ring_sizes == [12]
def test_analyzer_classifies_non_lactone_macrocycle():
analyzer = MacroLactoneAnalyzer()
result = analyzer.classify_macrocycle("C1CCCCCCCCCCC1")
assert result.classification == "not_macrolactone"
assert result.ring_size is None
assert result.primary_reason_code == "no_lactone_ring_in_12_to_20_range"
assert result.primary_reason_message == "No 12-20 membered lactone ring was detected."
assert result.all_reason_codes == ["no_lactone_ring_in_12_to_20_range"]
assert result.candidate_ring_sizes == []
def test_analyzer_explicit_ring_size_miss_returns_requested_ring_not_found():
built = build_macrolactone(12)
analyzer = MacroLactoneAnalyzer()
result = analyzer.classify_macrocycle(built.smiles, ring_size=16)
assert result.classification == "not_macrolactone"
assert result.ring_size is None
assert result.primary_reason_code == "requested_ring_size_not_found"
assert result.primary_reason_message == "The requested ring size was not detected as a lactone ring."
assert result.all_reason_codes == ["requested_ring_size_not_found"]
assert result.candidate_ring_sizes == []
def test_fragmenter_auto_numbers_ring_with_expected_positions():
built = build_macrolactone(16, {5: "methyl"})
result = MacrolactoneFragmenter().number_molecule(built.mol)
assert result.ring_size == 16
assert result.position_to_atom == built.position_to_atom
assert set(result.position_to_atom) == set(range(1, 17))
assert result.atom_to_position == {
atom_idx: position for position, atom_idx in built.position_to_atom.items()
}
carbonyl_atom = built.mol.GetAtomWithIdx(result.position_to_atom[1])
assert carbonyl_atom.GetSymbol() == "C"
assert any(
bond.GetBondType() == Chem.BondType.DOUBLE and bond.GetOtherAtom(carbonyl_atom).GetSymbol() == "O"
for bond in carbonyl_atom.GetBonds()
)
ester_oxygen = built.mol.GetAtomWithIdx(result.position_to_atom[2])
assert ester_oxygen.GetSymbol() == "O"
def test_fragmenter_requires_explicit_ring_size_for_ambiguous_molecule():
ambiguous_smiles = build_ambiguous_smiles()
with pytest.raises(AmbiguousMacrolactoneError):
MacrolactoneFragmenter().number_molecule(ambiguous_smiles)
def test_fragmenter_raises_for_missing_macrolactone():
with pytest.raises(
MacrolactoneDetectionError,
match="classification=not_macrolactone primary_reason_code=no_lactone_ring_in_12_to_20_range",
):
MacrolactoneFragmenter().number_molecule("CCO")
def test_fragmenter_rejects_non_standard_macrocycle_with_reason_code():
built = build_non_standard_ring_atom_macrolactone()
with pytest.raises(
MacrolactoneDetectionError,
match="classification=non_standard_macrocycle "
"primary_reason_code=contains_non_carbon_ring_atoms_outside_positions_1_2",
):
MacrolactoneFragmenter().number_molecule(built.smiles)
def test_fragmenter_rejects_non_standard_macrocycle_during_fragmentation():
built = build_overlapping_candidate_macrolactone()
with pytest.raises(
MacrolactoneDetectionError,
match="classification=non_standard_macrocycle "
"primary_reason_code=multiple_overlapping_macrocycle_candidates",
):
MacrolactoneFragmenter().fragment_molecule(built.smiles)
def test_explicit_ring_size_selects_requested_ring():
built = build_macrolactone(14)
result = MacrolactoneFragmenter(ring_size=14).number_molecule(built.smiles)
assert result.ring_size == 14
def test_explicit_ring_size_rejects_wrong_ring():
built = build_macrolactone(12)
with pytest.raises(MacrolactoneDetectionError):
MacrolactoneFragmenter(ring_size=16).number_molecule(built.smiles)