Implement the standard/non-standard/not-macrolactone classification layer and integrate it into analyzer, fragmenter, and CLI outputs. Port the remaining legacy package capabilities into new visualization and workflow modules, restore batch/statistics/SDF scripts on top of the flat CSV workflow, and update active docs to the new package API.
175 lines
6.3 KiB
Python
175 lines
6.3 KiB
Python
import pytest
|
|
from rdkit import Chem
|
|
|
|
from macro_lactone_toolkit import (
|
|
AmbiguousMacrolactoneError,
|
|
MacroLactoneAnalyzer,
|
|
MacrolactoneDetectionError,
|
|
MacrolactoneFragmenter,
|
|
)
|
|
|
|
from .helpers import (
|
|
build_ambiguous_smiles,
|
|
build_macrolactone,
|
|
build_non_standard_ring_atom_macrolactone,
|
|
build_overlapping_candidate_macrolactone,
|
|
)
|
|
|
|
|
|
@pytest.mark.parametrize("ring_size", [12, 14, 16, 20])
|
|
def test_analyzer_detects_supported_ring_sizes(ring_size: int):
|
|
built = build_macrolactone(ring_size)
|
|
analyzer = MacroLactoneAnalyzer()
|
|
|
|
assert analyzer.get_valid_ring_sizes(built.smiles) == [ring_size]
|
|
|
|
|
|
def test_analyzer_rejects_non_lactone_macrocycle():
|
|
analyzer = MacroLactoneAnalyzer()
|
|
|
|
assert analyzer.get_valid_ring_sizes("C1CCCCCCCCCCC1") == []
|
|
|
|
|
|
@pytest.mark.parametrize("ring_size", [12, 14, 16, 20])
|
|
def test_analyzer_classifies_supported_ring_sizes(ring_size: int):
|
|
built = build_macrolactone(ring_size)
|
|
analyzer = MacroLactoneAnalyzer()
|
|
|
|
result = analyzer.classify_macrocycle(built.smiles)
|
|
|
|
assert result.classification == "standard_macrolactone"
|
|
assert result.ring_size == ring_size
|
|
assert result.primary_reason_code is None
|
|
assert result.primary_reason_message is None
|
|
assert result.all_reason_codes == []
|
|
assert result.all_reason_messages == []
|
|
assert result.candidate_ring_sizes == [ring_size]
|
|
|
|
|
|
def test_analyzer_classifies_ring_heteroatom_as_non_standard():
|
|
built = build_non_standard_ring_atom_macrolactone()
|
|
analyzer = MacroLactoneAnalyzer()
|
|
|
|
result = analyzer.classify_macrocycle(built.smiles)
|
|
|
|
assert result.classification == "non_standard_macrocycle"
|
|
assert result.ring_size == 16
|
|
assert result.primary_reason_code == "contains_non_carbon_ring_atoms_outside_positions_1_2"
|
|
assert result.primary_reason_message == "Ring positions 3..N contain non-carbon atoms."
|
|
assert result.all_reason_codes == ["contains_non_carbon_ring_atoms_outside_positions_1_2"]
|
|
assert result.candidate_ring_sizes == [16]
|
|
|
|
|
|
def test_analyzer_classifies_overlapping_candidates_as_non_standard():
|
|
built = build_overlapping_candidate_macrolactone()
|
|
analyzer = MacroLactoneAnalyzer()
|
|
|
|
result = analyzer.classify_macrocycle(built.smiles)
|
|
|
|
assert result.classification == "non_standard_macrocycle"
|
|
assert result.ring_size == 12
|
|
assert result.primary_reason_code == "multiple_overlapping_macrocycle_candidates"
|
|
assert result.primary_reason_message == "Overlapping macrolactone candidate rings were detected."
|
|
assert result.all_reason_codes == ["multiple_overlapping_macrocycle_candidates"]
|
|
assert result.candidate_ring_sizes == [12]
|
|
|
|
|
|
def test_analyzer_classifies_non_lactone_macrocycle():
|
|
analyzer = MacroLactoneAnalyzer()
|
|
|
|
result = analyzer.classify_macrocycle("C1CCCCCCCCCCC1")
|
|
|
|
assert result.classification == "not_macrolactone"
|
|
assert result.ring_size is None
|
|
assert result.primary_reason_code == "no_lactone_ring_in_12_to_20_range"
|
|
assert result.primary_reason_message == "No 12-20 membered lactone ring was detected."
|
|
assert result.all_reason_codes == ["no_lactone_ring_in_12_to_20_range"]
|
|
assert result.candidate_ring_sizes == []
|
|
|
|
|
|
def test_analyzer_explicit_ring_size_miss_returns_requested_ring_not_found():
|
|
built = build_macrolactone(12)
|
|
analyzer = MacroLactoneAnalyzer()
|
|
|
|
result = analyzer.classify_macrocycle(built.smiles, ring_size=16)
|
|
|
|
assert result.classification == "not_macrolactone"
|
|
assert result.ring_size is None
|
|
assert result.primary_reason_code == "requested_ring_size_not_found"
|
|
assert result.primary_reason_message == "The requested ring size was not detected as a lactone ring."
|
|
assert result.all_reason_codes == ["requested_ring_size_not_found"]
|
|
assert result.candidate_ring_sizes == []
|
|
|
|
|
|
def test_fragmenter_auto_numbers_ring_with_expected_positions():
|
|
built = build_macrolactone(16, {5: "methyl"})
|
|
result = MacrolactoneFragmenter().number_molecule(built.mol)
|
|
|
|
assert result.ring_size == 16
|
|
assert result.position_to_atom == built.position_to_atom
|
|
assert set(result.position_to_atom) == set(range(1, 17))
|
|
assert result.atom_to_position == {
|
|
atom_idx: position for position, atom_idx in built.position_to_atom.items()
|
|
}
|
|
|
|
carbonyl_atom = built.mol.GetAtomWithIdx(result.position_to_atom[1])
|
|
assert carbonyl_atom.GetSymbol() == "C"
|
|
assert any(
|
|
bond.GetBondType() == Chem.BondType.DOUBLE and bond.GetOtherAtom(carbonyl_atom).GetSymbol() == "O"
|
|
for bond in carbonyl_atom.GetBonds()
|
|
)
|
|
|
|
ester_oxygen = built.mol.GetAtomWithIdx(result.position_to_atom[2])
|
|
assert ester_oxygen.GetSymbol() == "O"
|
|
|
|
|
|
def test_fragmenter_requires_explicit_ring_size_for_ambiguous_molecule():
|
|
ambiguous_smiles = build_ambiguous_smiles()
|
|
|
|
with pytest.raises(AmbiguousMacrolactoneError):
|
|
MacrolactoneFragmenter().number_molecule(ambiguous_smiles)
|
|
|
|
|
|
def test_fragmenter_raises_for_missing_macrolactone():
|
|
with pytest.raises(
|
|
MacrolactoneDetectionError,
|
|
match="classification=not_macrolactone primary_reason_code=no_lactone_ring_in_12_to_20_range",
|
|
):
|
|
MacrolactoneFragmenter().number_molecule("CCO")
|
|
|
|
|
|
def test_fragmenter_rejects_non_standard_macrocycle_with_reason_code():
|
|
built = build_non_standard_ring_atom_macrolactone()
|
|
|
|
with pytest.raises(
|
|
MacrolactoneDetectionError,
|
|
match="classification=non_standard_macrocycle "
|
|
"primary_reason_code=contains_non_carbon_ring_atoms_outside_positions_1_2",
|
|
):
|
|
MacrolactoneFragmenter().number_molecule(built.smiles)
|
|
|
|
|
|
def test_fragmenter_rejects_non_standard_macrocycle_during_fragmentation():
|
|
built = build_overlapping_candidate_macrolactone()
|
|
|
|
with pytest.raises(
|
|
MacrolactoneDetectionError,
|
|
match="classification=non_standard_macrocycle "
|
|
"primary_reason_code=multiple_overlapping_macrocycle_candidates",
|
|
):
|
|
MacrolactoneFragmenter().fragment_molecule(built.smiles)
|
|
|
|
|
|
def test_explicit_ring_size_selects_requested_ring():
|
|
built = build_macrolactone(14)
|
|
result = MacrolactoneFragmenter(ring_size=14).number_molecule(built.smiles)
|
|
|
|
assert result.ring_size == 14
|
|
|
|
|
|
def test_explicit_ring_size_rejects_wrong_ring():
|
|
built = build_macrolactone(12)
|
|
|
|
with pytest.raises(MacrolactoneDetectionError):
|
|
MacrolactoneFragmenter(ring_size=16).number_molecule(built.smiles)
|