Files
macrolactone-toolkit/tests/test_visualization_and_workflows.py
lingyuzeng c0ead42384 feat(toolkit): add classification and migration
Implement the standard/non-standard/not-macrolactone classification layer
and integrate it into analyzer, fragmenter, and CLI outputs.

Port the remaining legacy package capabilities into new visualization and
workflow modules, restore batch/statistics/SDF scripts on top of the flat
CSV workflow, and update active docs to the new package API.
2026-03-18 23:56:41 +08:00

172 lines
5.8 KiB
Python

from __future__ import annotations
import json
import pandas as pd
import pytest
from macro_lactone_toolkit import MacroLactoneAnalyzer, MacrolactoneFragmenter
from .helpers import (
build_ambiguous_smiles,
build_macrolactone,
build_non_standard_ring_atom_macrolactone,
)
def test_visualization_exports_numbered_svg_and_png(tmp_path):
from macro_lactone_toolkit.visualization import (
numbered_molecule_svg,
save_fragment_png,
save_numbered_molecule_png,
)
built = build_macrolactone(16, {5: "methyl"})
fragment = MacrolactoneFragmenter().fragment_molecule(built.smiles, parent_id="viz_1").fragments[0]
svg = numbered_molecule_svg(built.smiles)
assert "<svg" in svg
numbered_path = tmp_path / "numbered.png"
returned_numbered_path = save_numbered_molecule_png(built.smiles, numbered_path)
assert returned_numbered_path == numbered_path
assert numbered_path.exists()
assert numbered_path.stat().st_size > 0
fragment_path = tmp_path / "fragment.png"
returned_fragment_path = save_fragment_png(fragment.fragment_smiles_labeled, fragment_path)
assert returned_fragment_path == fragment_path
assert fragment_path.exists()
assert fragment_path.stat().st_size > 0
def test_visualization_supports_allowed_ring_atom_type_filtering():
from macro_lactone_toolkit.visualization import numbered_molecule_svg
hetero = build_non_standard_ring_atom_macrolactone()
svg = numbered_molecule_svg(hetero.smiles, allowed_ring_atom_types=["C", "N"])
assert "<svg" in svg
with pytest.raises(ValueError, match="allowed ring atom types"):
numbered_molecule_svg(hetero.smiles, allowed_ring_atom_types=["C"])
def test_fragment_csv_and_results_to_dataframe(tmp_path):
from macro_lactone_toolkit.workflows import fragment_csv, results_to_dataframe, write_result_json
valid_14 = build_macrolactone(14, {4: "methyl"})
valid_16 = build_macrolactone(16, {6: "ethyl"})
input_path = tmp_path / "molecules.csv"
pd.DataFrame(
[
{"id": "mol_14", "smiles": valid_14.smiles},
{"id": "mol_16", "smiles": valid_16.smiles},
]
).to_csv(input_path, index=False)
results = fragment_csv(str(input_path))
dataframe = results_to_dataframe(results)
assert {result.parent_id for result in results} == {"mol_14", "mol_16"}
assert {
"parent_id",
"parent_smiles",
"ring_size",
"fragment_id",
"cleavage_position",
"attachment_atom_idx",
"fragment_smiles_labeled",
"fragment_smiles_plain",
"atom_count",
"molecular_weight",
}.issubset(dataframe.columns)
json_path = tmp_path / "result.json"
returned_json_path = write_result_json(results[0], json_path)
assert returned_json_path == json_path
payload = json.loads(json_path.read_text(encoding="utf-8"))
assert payload["parent_id"] in {"mol_14", "mol_16"}
assert payload["fragments"]
def test_fragment_csv_raises_for_invalid_or_ambiguous_rows(tmp_path):
from macro_lactone_toolkit.workflows import fragment_csv
valid = build_macrolactone(14)
input_path = tmp_path / "molecules.csv"
pd.DataFrame(
[
{"id": "valid_1", "smiles": valid.smiles},
{"id": "ambiguous_1", "smiles": build_ambiguous_smiles()},
]
).to_csv(input_path, index=False)
with pytest.raises(Exception, match="ambiguous|Multiple valid macrolactone candidates"):
fragment_csv(str(input_path))
def test_export_numbered_macrolactone_csv_writes_status_and_images(tmp_path):
from macro_lactone_toolkit.workflows import export_numbered_macrolactone_csv
valid = build_macrolactone(14)
hetero = build_non_standard_ring_atom_macrolactone()
input_path = tmp_path / "molecules.csv"
output_dir = tmp_path / "numbered"
pd.DataFrame(
[
{"id": "valid_1", "smiles": valid.smiles},
{"id": "hetero_1", "smiles": hetero.smiles},
]
).to_csv(input_path, index=False)
csv_path = export_numbered_macrolactone_csv(
str(input_path),
output_dir=output_dir,
allowed_ring_atom_types=["C", "N"],
)
exported = pd.read_csv(csv_path)
assert {"parent_id", "status", "image_path", "classification", "primary_reason_code", "ring_size"}.issubset(
exported.columns
)
assert set(exported["parent_id"]) == {"valid_1", "hetero_1"}
assert set(exported["status"]) == {"success"}
for image_path in exported["image_path"]:
assert image_path
assert (tmp_path / image_path).exists()
def test_analyzer_bulk_helpers():
valid = build_macrolactone(12)
hetero = build_non_standard_ring_atom_macrolactone()
non_lactone = "C1CCCCCCCCCCC1"
dataframe = pd.DataFrame(
[
{"id": "valid_1", "smiles": valid.smiles},
{"id": "hetero_1", "smiles": hetero.smiles},
{"id": "plain_1", "smiles": non_lactone},
]
)
analyzer = MacroLactoneAnalyzer()
summary = analyzer.analyze_many([valid.smiles, hetero.smiles, non_lactone])
ring_size_groups, rejected = analyzer.classify_dataframe(dataframe)
smarts_match = analyzer.match_dynamic_smarts(valid.smiles, ring_size=12)
properties = analyzer.calculate_properties(valid.smiles)
assert summary["total"] == 3
assert summary["classification_counts"]["standard_macrolactone"] == 1
assert summary["classification_counts"]["non_standard_macrocycle"] == 1
assert summary["classification_counts"]["not_macrolactone"] == 1
assert 12 in ring_size_groups
assert list(ring_size_groups[12]["id"]) == ["valid_1"]
assert set(rejected["classification"]) == {"non_standard_macrocycle", "not_macrolactone"}
assert smarts_match is not None
assert properties is not None
assert {"molecular_weight", "logp", "qed", "tpsa"}.issubset(properties)