feat(toolkit): add classification and migration
Implement the standard/non-standard/not-macrolactone classification layer and integrate it into analyzer, fragmenter, and CLI outputs. Port the remaining legacy package capabilities into new visualization and workflow modules, restore batch/statistics/SDF scripts on top of the flat CSV workflow, and update active docs to the new package API.
This commit is contained in:
107
tests/helpers.py
107
tests/helpers.py
@@ -16,11 +16,13 @@ class BuiltMacrolactone:
|
||||
def build_macrolactone(
|
||||
ring_size: int,
|
||||
side_chains: Mapping[int, str] | None = None,
|
||||
ring_atom_symbols: Mapping[int, str] | None = None,
|
||||
) -> BuiltMacrolactone:
|
||||
if not 12 <= ring_size <= 20:
|
||||
raise ValueError("ring_size must be between 12 and 20")
|
||||
|
||||
side_chains = dict(side_chains or {})
|
||||
ring_atom_symbols = dict(ring_atom_symbols or {})
|
||||
rwmol = Chem.RWMol()
|
||||
|
||||
position_to_atom: dict[int, int] = {
|
||||
@@ -28,7 +30,7 @@ def build_macrolactone(
|
||||
2: rwmol.AddAtom(Chem.Atom("O")),
|
||||
}
|
||||
for position in range(3, ring_size + 1):
|
||||
position_to_atom[position] = rwmol.AddAtom(Chem.Atom("C"))
|
||||
position_to_atom[position] = rwmol.AddAtom(Chem.Atom(ring_atom_symbols.get(position, "C")))
|
||||
|
||||
carbonyl_oxygen_idx = rwmol.AddAtom(Chem.Atom("O"))
|
||||
|
||||
@@ -63,6 +65,109 @@ def build_ambiguous_smiles() -> str:
|
||||
return Chem.MolToSmiles(combined, isomericSmiles=True)
|
||||
|
||||
|
||||
def build_non_standard_ring_atom_macrolactone(
|
||||
ring_size: int = 16,
|
||||
hetero_position: int = 5,
|
||||
atom_symbol: str = "N",
|
||||
) -> BuiltMacrolactone:
|
||||
if hetero_position < 3 or hetero_position > ring_size:
|
||||
raise ValueError("hetero_position must be between 3 and ring_size")
|
||||
return build_macrolactone(
|
||||
ring_size=ring_size,
|
||||
ring_atom_symbols={hetero_position: atom_symbol},
|
||||
)
|
||||
|
||||
|
||||
def build_overlapping_candidate_macrolactone() -> BuiltMacrolactone:
|
||||
rwmol = Chem.RWMol()
|
||||
|
||||
atom_labels = (
|
||||
"A1",
|
||||
"A2",
|
||||
"S1",
|
||||
"S2",
|
||||
"S3",
|
||||
"S4",
|
||||
"A5",
|
||||
"A6",
|
||||
"A7",
|
||||
"A8",
|
||||
"A9",
|
||||
"A10",
|
||||
"B1",
|
||||
"B2",
|
||||
"B5",
|
||||
"B6",
|
||||
"B7",
|
||||
"B8",
|
||||
"B9",
|
||||
"B10",
|
||||
"AO",
|
||||
"BO",
|
||||
)
|
||||
atom_symbols = {
|
||||
"A1": "C",
|
||||
"A2": "O",
|
||||
"S1": "C",
|
||||
"S2": "C",
|
||||
"S3": "C",
|
||||
"S4": "C",
|
||||
"A5": "C",
|
||||
"A6": "C",
|
||||
"A7": "C",
|
||||
"A8": "C",
|
||||
"A9": "C",
|
||||
"A10": "C",
|
||||
"B1": "C",
|
||||
"B2": "O",
|
||||
"B5": "C",
|
||||
"B6": "C",
|
||||
"B7": "C",
|
||||
"B8": "C",
|
||||
"B9": "C",
|
||||
"B10": "C",
|
||||
"AO": "O",
|
||||
"BO": "O",
|
||||
}
|
||||
atoms = {label: rwmol.AddAtom(Chem.Atom(atom_symbols[label])) for label in atom_labels}
|
||||
|
||||
for atom_a, atom_b in (
|
||||
("A1", "A2"),
|
||||
("A2", "S1"),
|
||||
("S1", "S2"),
|
||||
("S2", "S3"),
|
||||
("S3", "S4"),
|
||||
("S4", "A5"),
|
||||
("A5", "A6"),
|
||||
("A6", "A7"),
|
||||
("A7", "A8"),
|
||||
("A8", "A9"),
|
||||
("A9", "A10"),
|
||||
("A10", "A1"),
|
||||
("B1", "B2"),
|
||||
("B2", "S1"),
|
||||
("S4", "B5"),
|
||||
("B5", "B6"),
|
||||
("B6", "B7"),
|
||||
("B7", "B8"),
|
||||
("B8", "B9"),
|
||||
("B9", "B10"),
|
||||
("B10", "B1"),
|
||||
):
|
||||
rwmol.AddBond(atoms[atom_a], atoms[atom_b], Chem.BondType.SINGLE)
|
||||
|
||||
rwmol.AddBond(atoms["A1"], atoms["AO"], Chem.BondType.DOUBLE)
|
||||
rwmol.AddBond(atoms["B1"], atoms["BO"], Chem.BondType.DOUBLE)
|
||||
|
||||
mol = rwmol.GetMol()
|
||||
Chem.SanitizeMol(mol)
|
||||
return BuiltMacrolactone(
|
||||
mol=mol,
|
||||
smiles=Chem.MolToSmiles(mol, isomericSmiles=True),
|
||||
position_to_atom={},
|
||||
)
|
||||
|
||||
|
||||
def canonicalize(smiles_or_mol: str | Chem.Mol) -> str:
|
||||
if isinstance(smiles_or_mol, Chem.Mol):
|
||||
mol = smiles_or_mol
|
||||
|
||||
@@ -6,7 +6,12 @@ import sys
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from .helpers import build_ambiguous_smiles, build_macrolactone
|
||||
from .helpers import (
|
||||
build_ambiguous_smiles,
|
||||
build_macrolactone,
|
||||
build_non_standard_ring_atom_macrolactone,
|
||||
build_overlapping_candidate_macrolactone,
|
||||
)
|
||||
|
||||
|
||||
def run_cli(*args: str) -> subprocess.CompletedProcess[str]:
|
||||
@@ -24,7 +29,10 @@ def test_cli_smoke_commands():
|
||||
analyze = run_cli("analyze", "--smiles", built.smiles)
|
||||
assert analyze.returncode == 0, analyze.stderr
|
||||
analyze_payload = json.loads(analyze.stdout)
|
||||
assert analyze_payload["valid_ring_sizes"] == [16]
|
||||
assert analyze_payload["classification"] == "standard_macrolactone"
|
||||
assert analyze_payload["ring_size"] == 16
|
||||
assert analyze_payload["primary_reason_code"] is None
|
||||
assert analyze_payload["candidate_ring_sizes"] == [16]
|
||||
|
||||
number = run_cli("number", "--smiles", built.smiles)
|
||||
assert number.returncode == 0, number.stderr
|
||||
@@ -40,6 +48,55 @@ def test_cli_smoke_commands():
|
||||
assert fragment_payload["fragments"][0]["fragment_smiles_labeled"]
|
||||
|
||||
|
||||
def test_cli_analyze_reports_non_standard_classifications():
|
||||
hetero = build_non_standard_ring_atom_macrolactone()
|
||||
overlap = build_overlapping_candidate_macrolactone()
|
||||
|
||||
hetero_result = run_cli("analyze", "--smiles", hetero.smiles)
|
||||
assert hetero_result.returncode == 0, hetero_result.stderr
|
||||
hetero_payload = json.loads(hetero_result.stdout)
|
||||
assert hetero_payload["classification"] == "non_standard_macrocycle"
|
||||
assert hetero_payload["primary_reason_code"] == "contains_non_carbon_ring_atoms_outside_positions_1_2"
|
||||
assert hetero_payload["ring_size"] == 16
|
||||
|
||||
overlap_result = run_cli("analyze", "--smiles", overlap.smiles)
|
||||
assert overlap_result.returncode == 0, overlap_result.stderr
|
||||
overlap_payload = json.loads(overlap_result.stdout)
|
||||
assert overlap_payload["classification"] == "non_standard_macrocycle"
|
||||
assert overlap_payload["primary_reason_code"] == "multiple_overlapping_macrocycle_candidates"
|
||||
assert overlap_payload["ring_size"] == 12
|
||||
|
||||
|
||||
def test_cli_analyze_csv_reports_classification_fields(tmp_path):
|
||||
valid = build_macrolactone(14)
|
||||
hetero = build_non_standard_ring_atom_macrolactone()
|
||||
input_path = tmp_path / "molecules.csv"
|
||||
output_path = tmp_path / "analysis.csv"
|
||||
|
||||
pd.DataFrame(
|
||||
[
|
||||
{"id": "valid_1", "smiles": valid.smiles},
|
||||
{"id": "hetero_1", "smiles": hetero.smiles},
|
||||
]
|
||||
).to_csv(input_path, index=False)
|
||||
|
||||
completed = run_cli(
|
||||
"analyze",
|
||||
"--input",
|
||||
str(input_path),
|
||||
"--output",
|
||||
str(output_path),
|
||||
)
|
||||
|
||||
assert completed.returncode == 0, completed.stderr
|
||||
|
||||
analysis = pd.read_csv(output_path)
|
||||
assert set(analysis["parent_id"]) == {"valid_1", "hetero_1"}
|
||||
assert set(analysis["classification"]) == {"standard_macrolactone", "non_standard_macrocycle"}
|
||||
assert "primary_reason_code" in analysis.columns
|
||||
assert "ring_size" in analysis.columns
|
||||
|
||||
|
||||
def test_cli_fragment_csv_skips_ambiguous_and_records_errors(tmp_path):
|
||||
valid = build_macrolactone(14, {4: "methyl"})
|
||||
ambiguous = build_ambiguous_smiles()
|
||||
|
||||
@@ -8,7 +8,12 @@ from macro_lactone_toolkit import (
|
||||
MacrolactoneFragmenter,
|
||||
)
|
||||
|
||||
from .helpers import build_ambiguous_smiles, build_macrolactone
|
||||
from .helpers import (
|
||||
build_ambiguous_smiles,
|
||||
build_macrolactone,
|
||||
build_non_standard_ring_atom_macrolactone,
|
||||
build_overlapping_candidate_macrolactone,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ring_size", [12, 14, 16, 20])
|
||||
@@ -25,6 +30,77 @@ def test_analyzer_rejects_non_lactone_macrocycle():
|
||||
assert analyzer.get_valid_ring_sizes("C1CCCCCCCCCCC1") == []
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ring_size", [12, 14, 16, 20])
|
||||
def test_analyzer_classifies_supported_ring_sizes(ring_size: int):
|
||||
built = build_macrolactone(ring_size)
|
||||
analyzer = MacroLactoneAnalyzer()
|
||||
|
||||
result = analyzer.classify_macrocycle(built.smiles)
|
||||
|
||||
assert result.classification == "standard_macrolactone"
|
||||
assert result.ring_size == ring_size
|
||||
assert result.primary_reason_code is None
|
||||
assert result.primary_reason_message is None
|
||||
assert result.all_reason_codes == []
|
||||
assert result.all_reason_messages == []
|
||||
assert result.candidate_ring_sizes == [ring_size]
|
||||
|
||||
|
||||
def test_analyzer_classifies_ring_heteroatom_as_non_standard():
|
||||
built = build_non_standard_ring_atom_macrolactone()
|
||||
analyzer = MacroLactoneAnalyzer()
|
||||
|
||||
result = analyzer.classify_macrocycle(built.smiles)
|
||||
|
||||
assert result.classification == "non_standard_macrocycle"
|
||||
assert result.ring_size == 16
|
||||
assert result.primary_reason_code == "contains_non_carbon_ring_atoms_outside_positions_1_2"
|
||||
assert result.primary_reason_message == "Ring positions 3..N contain non-carbon atoms."
|
||||
assert result.all_reason_codes == ["contains_non_carbon_ring_atoms_outside_positions_1_2"]
|
||||
assert result.candidate_ring_sizes == [16]
|
||||
|
||||
|
||||
def test_analyzer_classifies_overlapping_candidates_as_non_standard():
|
||||
built = build_overlapping_candidate_macrolactone()
|
||||
analyzer = MacroLactoneAnalyzer()
|
||||
|
||||
result = analyzer.classify_macrocycle(built.smiles)
|
||||
|
||||
assert result.classification == "non_standard_macrocycle"
|
||||
assert result.ring_size == 12
|
||||
assert result.primary_reason_code == "multiple_overlapping_macrocycle_candidates"
|
||||
assert result.primary_reason_message == "Overlapping macrolactone candidate rings were detected."
|
||||
assert result.all_reason_codes == ["multiple_overlapping_macrocycle_candidates"]
|
||||
assert result.candidate_ring_sizes == [12]
|
||||
|
||||
|
||||
def test_analyzer_classifies_non_lactone_macrocycle():
|
||||
analyzer = MacroLactoneAnalyzer()
|
||||
|
||||
result = analyzer.classify_macrocycle("C1CCCCCCCCCCC1")
|
||||
|
||||
assert result.classification == "not_macrolactone"
|
||||
assert result.ring_size is None
|
||||
assert result.primary_reason_code == "no_lactone_ring_in_12_to_20_range"
|
||||
assert result.primary_reason_message == "No 12-20 membered lactone ring was detected."
|
||||
assert result.all_reason_codes == ["no_lactone_ring_in_12_to_20_range"]
|
||||
assert result.candidate_ring_sizes == []
|
||||
|
||||
|
||||
def test_analyzer_explicit_ring_size_miss_returns_requested_ring_not_found():
|
||||
built = build_macrolactone(12)
|
||||
analyzer = MacroLactoneAnalyzer()
|
||||
|
||||
result = analyzer.classify_macrocycle(built.smiles, ring_size=16)
|
||||
|
||||
assert result.classification == "not_macrolactone"
|
||||
assert result.ring_size is None
|
||||
assert result.primary_reason_code == "requested_ring_size_not_found"
|
||||
assert result.primary_reason_message == "The requested ring size was not detected as a lactone ring."
|
||||
assert result.all_reason_codes == ["requested_ring_size_not_found"]
|
||||
assert result.candidate_ring_sizes == []
|
||||
|
||||
|
||||
def test_fragmenter_auto_numbers_ring_with_expected_positions():
|
||||
built = build_macrolactone(16, {5: "methyl"})
|
||||
result = MacrolactoneFragmenter().number_molecule(built.mol)
|
||||
@@ -55,10 +131,35 @@ def test_fragmenter_requires_explicit_ring_size_for_ambiguous_molecule():
|
||||
|
||||
|
||||
def test_fragmenter_raises_for_missing_macrolactone():
|
||||
with pytest.raises(MacrolactoneDetectionError):
|
||||
with pytest.raises(
|
||||
MacrolactoneDetectionError,
|
||||
match="classification=not_macrolactone primary_reason_code=no_lactone_ring_in_12_to_20_range",
|
||||
):
|
||||
MacrolactoneFragmenter().number_molecule("CCO")
|
||||
|
||||
|
||||
def test_fragmenter_rejects_non_standard_macrocycle_with_reason_code():
|
||||
built = build_non_standard_ring_atom_macrolactone()
|
||||
|
||||
with pytest.raises(
|
||||
MacrolactoneDetectionError,
|
||||
match="classification=non_standard_macrocycle "
|
||||
"primary_reason_code=contains_non_carbon_ring_atoms_outside_positions_1_2",
|
||||
):
|
||||
MacrolactoneFragmenter().number_molecule(built.smiles)
|
||||
|
||||
|
||||
def test_fragmenter_rejects_non_standard_macrocycle_during_fragmentation():
|
||||
built = build_overlapping_candidate_macrolactone()
|
||||
|
||||
with pytest.raises(
|
||||
MacrolactoneDetectionError,
|
||||
match="classification=non_standard_macrocycle "
|
||||
"primary_reason_code=multiple_overlapping_macrocycle_candidates",
|
||||
):
|
||||
MacrolactoneFragmenter().fragment_molecule(built.smiles)
|
||||
|
||||
|
||||
def test_explicit_ring_size_selects_requested_ring():
|
||||
built = build_macrolactone(14)
|
||||
result = MacrolactoneFragmenter(ring_size=14).number_molecule(built.smiles)
|
||||
|
||||
149
tests/test_scripts_and_docs.py
Normal file
149
tests/test_scripts_and_docs.py
Normal file
@@ -0,0 +1,149 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from macro_lactone_toolkit import MacrolactoneFragmenter
|
||||
|
||||
from .helpers import build_ambiguous_smiles, build_macrolactone
|
||||
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parents[1]
|
||||
ACTIVE_TEXT_ASSETS = [
|
||||
PROJECT_ROOT / "scripts" / "README.md",
|
||||
PROJECT_ROOT / "docs" / "SUMMARY.md",
|
||||
PROJECT_ROOT / "docs" / "project-docs" / "QUICK_COMMANDS.md",
|
||||
PROJECT_ROOT / "notebooks" / "README_analyze_ring16.md",
|
||||
]
|
||||
|
||||
|
||||
def run_script(script_name: str, *args: str) -> subprocess.CompletedProcess[str]:
|
||||
return subprocess.run(
|
||||
[sys.executable, str(PROJECT_ROOT / "scripts" / script_name), *args],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
cwd=PROJECT_ROOT,
|
||||
)
|
||||
|
||||
|
||||
def test_batch_process_script_writes_flat_outputs_and_summary(tmp_path):
|
||||
valid = build_macrolactone(14, {4: "methyl"})
|
||||
ambiguous = build_ambiguous_smiles()
|
||||
input_path = tmp_path / "molecules.csv"
|
||||
output_path = tmp_path / "fragments.csv"
|
||||
errors_path = tmp_path / "errors.csv"
|
||||
summary_path = tmp_path / "summary.json"
|
||||
|
||||
pd.DataFrame(
|
||||
[
|
||||
{"id": "valid_1", "smiles": valid.smiles},
|
||||
{"id": "ambiguous_1", "smiles": ambiguous},
|
||||
]
|
||||
).to_csv(input_path, index=False)
|
||||
|
||||
completed = run_script(
|
||||
"batch_process.py",
|
||||
"--input",
|
||||
str(input_path),
|
||||
"--output",
|
||||
str(output_path),
|
||||
"--errors-output",
|
||||
str(errors_path),
|
||||
"--summary-output",
|
||||
str(summary_path),
|
||||
)
|
||||
|
||||
assert completed.returncode == 0, completed.stderr
|
||||
assert output_path.exists()
|
||||
assert errors_path.exists()
|
||||
assert summary_path.exists()
|
||||
|
||||
summary = json.loads(summary_path.read_text(encoding="utf-8"))
|
||||
assert summary["processed"] == 2
|
||||
assert summary["successful"] == 1
|
||||
assert summary["failed"] == 1
|
||||
|
||||
|
||||
def test_analyze_fragments_script_generates_reports_and_plot(tmp_path):
|
||||
built = build_macrolactone(16, {5: "methyl", 7: "ethyl"})
|
||||
result = MacrolactoneFragmenter().fragment_molecule(built.smiles, parent_id="analysis_1")
|
||||
fragments = pd.DataFrame(
|
||||
[
|
||||
{
|
||||
"parent_id": result.parent_id,
|
||||
"parent_smiles": result.parent_smiles,
|
||||
"ring_size": result.ring_size,
|
||||
**fragment.to_dict(),
|
||||
}
|
||||
for fragment in result.fragments
|
||||
]
|
||||
)
|
||||
input_path = tmp_path / "fragments.csv"
|
||||
output_dir = tmp_path / "analysis"
|
||||
fragments.to_csv(input_path, index=False)
|
||||
|
||||
completed = run_script(
|
||||
"analyze_fragments.py",
|
||||
"--input",
|
||||
str(input_path),
|
||||
"--output-dir",
|
||||
str(output_dir),
|
||||
)
|
||||
|
||||
assert completed.returncode == 0, completed.stderr
|
||||
assert (output_dir / "position_statistics.csv").exists()
|
||||
assert (output_dir / "fragment_property_summary.csv").exists()
|
||||
assert (output_dir / "position_frequencies.png").exists()
|
||||
assert (output_dir / "analysis_summary.txt").exists()
|
||||
|
||||
|
||||
def test_generate_sdf_and_statistics_script_generates_artifacts(tmp_path):
|
||||
built = build_macrolactone(16, {5: "methyl"})
|
||||
result = MacrolactoneFragmenter().fragment_molecule(built.smiles, parent_id="sdf_1")
|
||||
fragments = pd.DataFrame(
|
||||
[
|
||||
{
|
||||
"parent_id": result.parent_id,
|
||||
"parent_smiles": result.parent_smiles,
|
||||
"ring_size": result.ring_size,
|
||||
**fragment.to_dict(),
|
||||
}
|
||||
for fragment in result.fragments
|
||||
]
|
||||
)
|
||||
input_path = tmp_path / "fragments.csv"
|
||||
output_dir = tmp_path / "sdf_output"
|
||||
fragments.to_csv(input_path, index=False)
|
||||
|
||||
completed = run_script(
|
||||
"generate_sdf_and_statistics.py",
|
||||
"--input",
|
||||
str(input_path),
|
||||
"--output-dir",
|
||||
str(output_dir),
|
||||
)
|
||||
|
||||
assert completed.returncode == 0, completed.stderr
|
||||
assert (output_dir / "cleavage_position_statistics.json").exists()
|
||||
assert (output_dir / "sdf" / "sdf_1_3d.sdf").exists()
|
||||
|
||||
|
||||
def test_active_text_assets_do_not_reference_legacy_api():
|
||||
forbidden_patterns = [
|
||||
"from src.",
|
||||
"import src.",
|
||||
"process_csv(",
|
||||
"batch_to_dataframe(",
|
||||
"visualize_molecule(",
|
||||
"save_to_json(",
|
||||
]
|
||||
|
||||
for path in ACTIVE_TEXT_ASSETS:
|
||||
text = path.read_text(encoding="utf-8")
|
||||
for pattern in forbidden_patterns:
|
||||
assert pattern not in text, f"{path} still contains legacy reference: {pattern}"
|
||||
171
tests/test_visualization_and_workflows.py
Normal file
171
tests/test_visualization_and_workflows.py
Normal file
@@ -0,0 +1,171 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
|
||||
from macro_lactone_toolkit import MacroLactoneAnalyzer, MacrolactoneFragmenter
|
||||
|
||||
from .helpers import (
|
||||
build_ambiguous_smiles,
|
||||
build_macrolactone,
|
||||
build_non_standard_ring_atom_macrolactone,
|
||||
)
|
||||
|
||||
|
||||
def test_visualization_exports_numbered_svg_and_png(tmp_path):
|
||||
from macro_lactone_toolkit.visualization import (
|
||||
numbered_molecule_svg,
|
||||
save_fragment_png,
|
||||
save_numbered_molecule_png,
|
||||
)
|
||||
|
||||
built = build_macrolactone(16, {5: "methyl"})
|
||||
fragment = MacrolactoneFragmenter().fragment_molecule(built.smiles, parent_id="viz_1").fragments[0]
|
||||
|
||||
svg = numbered_molecule_svg(built.smiles)
|
||||
assert "<svg" in svg
|
||||
|
||||
numbered_path = tmp_path / "numbered.png"
|
||||
returned_numbered_path = save_numbered_molecule_png(built.smiles, numbered_path)
|
||||
assert returned_numbered_path == numbered_path
|
||||
assert numbered_path.exists()
|
||||
assert numbered_path.stat().st_size > 0
|
||||
|
||||
fragment_path = tmp_path / "fragment.png"
|
||||
returned_fragment_path = save_fragment_png(fragment.fragment_smiles_labeled, fragment_path)
|
||||
assert returned_fragment_path == fragment_path
|
||||
assert fragment_path.exists()
|
||||
assert fragment_path.stat().st_size > 0
|
||||
|
||||
|
||||
def test_visualization_supports_allowed_ring_atom_type_filtering():
|
||||
from macro_lactone_toolkit.visualization import numbered_molecule_svg
|
||||
|
||||
hetero = build_non_standard_ring_atom_macrolactone()
|
||||
|
||||
svg = numbered_molecule_svg(hetero.smiles, allowed_ring_atom_types=["C", "N"])
|
||||
assert "<svg" in svg
|
||||
|
||||
with pytest.raises(ValueError, match="allowed ring atom types"):
|
||||
numbered_molecule_svg(hetero.smiles, allowed_ring_atom_types=["C"])
|
||||
|
||||
|
||||
def test_fragment_csv_and_results_to_dataframe(tmp_path):
|
||||
from macro_lactone_toolkit.workflows import fragment_csv, results_to_dataframe, write_result_json
|
||||
|
||||
valid_14 = build_macrolactone(14, {4: "methyl"})
|
||||
valid_16 = build_macrolactone(16, {6: "ethyl"})
|
||||
input_path = tmp_path / "molecules.csv"
|
||||
|
||||
pd.DataFrame(
|
||||
[
|
||||
{"id": "mol_14", "smiles": valid_14.smiles},
|
||||
{"id": "mol_16", "smiles": valid_16.smiles},
|
||||
]
|
||||
).to_csv(input_path, index=False)
|
||||
|
||||
results = fragment_csv(str(input_path))
|
||||
dataframe = results_to_dataframe(results)
|
||||
|
||||
assert {result.parent_id for result in results} == {"mol_14", "mol_16"}
|
||||
assert {
|
||||
"parent_id",
|
||||
"parent_smiles",
|
||||
"ring_size",
|
||||
"fragment_id",
|
||||
"cleavage_position",
|
||||
"attachment_atom_idx",
|
||||
"fragment_smiles_labeled",
|
||||
"fragment_smiles_plain",
|
||||
"atom_count",
|
||||
"molecular_weight",
|
||||
}.issubset(dataframe.columns)
|
||||
|
||||
json_path = tmp_path / "result.json"
|
||||
returned_json_path = write_result_json(results[0], json_path)
|
||||
assert returned_json_path == json_path
|
||||
payload = json.loads(json_path.read_text(encoding="utf-8"))
|
||||
assert payload["parent_id"] in {"mol_14", "mol_16"}
|
||||
assert payload["fragments"]
|
||||
|
||||
|
||||
def test_fragment_csv_raises_for_invalid_or_ambiguous_rows(tmp_path):
|
||||
from macro_lactone_toolkit.workflows import fragment_csv
|
||||
|
||||
valid = build_macrolactone(14)
|
||||
input_path = tmp_path / "molecules.csv"
|
||||
|
||||
pd.DataFrame(
|
||||
[
|
||||
{"id": "valid_1", "smiles": valid.smiles},
|
||||
{"id": "ambiguous_1", "smiles": build_ambiguous_smiles()},
|
||||
]
|
||||
).to_csv(input_path, index=False)
|
||||
|
||||
with pytest.raises(Exception, match="ambiguous|Multiple valid macrolactone candidates"):
|
||||
fragment_csv(str(input_path))
|
||||
|
||||
|
||||
def test_export_numbered_macrolactone_csv_writes_status_and_images(tmp_path):
|
||||
from macro_lactone_toolkit.workflows import export_numbered_macrolactone_csv
|
||||
|
||||
valid = build_macrolactone(14)
|
||||
hetero = build_non_standard_ring_atom_macrolactone()
|
||||
input_path = tmp_path / "molecules.csv"
|
||||
output_dir = tmp_path / "numbered"
|
||||
|
||||
pd.DataFrame(
|
||||
[
|
||||
{"id": "valid_1", "smiles": valid.smiles},
|
||||
{"id": "hetero_1", "smiles": hetero.smiles},
|
||||
]
|
||||
).to_csv(input_path, index=False)
|
||||
|
||||
csv_path = export_numbered_macrolactone_csv(
|
||||
str(input_path),
|
||||
output_dir=output_dir,
|
||||
allowed_ring_atom_types=["C", "N"],
|
||||
)
|
||||
|
||||
exported = pd.read_csv(csv_path)
|
||||
assert {"parent_id", "status", "image_path", "classification", "primary_reason_code", "ring_size"}.issubset(
|
||||
exported.columns
|
||||
)
|
||||
assert set(exported["parent_id"]) == {"valid_1", "hetero_1"}
|
||||
assert set(exported["status"]) == {"success"}
|
||||
|
||||
for image_path in exported["image_path"]:
|
||||
assert image_path
|
||||
assert (tmp_path / image_path).exists()
|
||||
|
||||
|
||||
def test_analyzer_bulk_helpers():
|
||||
valid = build_macrolactone(12)
|
||||
hetero = build_non_standard_ring_atom_macrolactone()
|
||||
non_lactone = "C1CCCCCCCCCCC1"
|
||||
dataframe = pd.DataFrame(
|
||||
[
|
||||
{"id": "valid_1", "smiles": valid.smiles},
|
||||
{"id": "hetero_1", "smiles": hetero.smiles},
|
||||
{"id": "plain_1", "smiles": non_lactone},
|
||||
]
|
||||
)
|
||||
analyzer = MacroLactoneAnalyzer()
|
||||
|
||||
summary = analyzer.analyze_many([valid.smiles, hetero.smiles, non_lactone])
|
||||
ring_size_groups, rejected = analyzer.classify_dataframe(dataframe)
|
||||
smarts_match = analyzer.match_dynamic_smarts(valid.smiles, ring_size=12)
|
||||
properties = analyzer.calculate_properties(valid.smiles)
|
||||
|
||||
assert summary["total"] == 3
|
||||
assert summary["classification_counts"]["standard_macrolactone"] == 1
|
||||
assert summary["classification_counts"]["non_standard_macrocycle"] == 1
|
||||
assert summary["classification_counts"]["not_macrolactone"] == 1
|
||||
assert 12 in ring_size_groups
|
||||
assert list(ring_size_groups[12]["id"]) == ["valid_1"]
|
||||
assert set(rejected["classification"]) == {"non_standard_macrocycle", "not_macrolactone"}
|
||||
assert smarts_match is not None
|
||||
assert properties is not None
|
||||
assert {"molecular_weight", "logp", "qed", "tpsa"}.issubset(properties)
|
||||
Reference in New Issue
Block a user