Implement the standard/non-standard/not-macrolactone classification layer and integrate it into analyzer, fragmenter, and CLI outputs. Port the remaining legacy package capabilities into new visualization and workflow modules, restore batch/statistics/SDF scripts on top of the flat CSV workflow, and update active docs to the new package API.
172 lines
5.8 KiB
Python
172 lines
5.8 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
|
|
import pandas as pd
|
|
import pytest
|
|
|
|
from macro_lactone_toolkit import MacroLactoneAnalyzer, MacrolactoneFragmenter
|
|
|
|
from .helpers import (
|
|
build_ambiguous_smiles,
|
|
build_macrolactone,
|
|
build_non_standard_ring_atom_macrolactone,
|
|
)
|
|
|
|
|
|
def test_visualization_exports_numbered_svg_and_png(tmp_path):
|
|
from macro_lactone_toolkit.visualization import (
|
|
numbered_molecule_svg,
|
|
save_fragment_png,
|
|
save_numbered_molecule_png,
|
|
)
|
|
|
|
built = build_macrolactone(16, {5: "methyl"})
|
|
fragment = MacrolactoneFragmenter().fragment_molecule(built.smiles, parent_id="viz_1").fragments[0]
|
|
|
|
svg = numbered_molecule_svg(built.smiles)
|
|
assert "<svg" in svg
|
|
|
|
numbered_path = tmp_path / "numbered.png"
|
|
returned_numbered_path = save_numbered_molecule_png(built.smiles, numbered_path)
|
|
assert returned_numbered_path == numbered_path
|
|
assert numbered_path.exists()
|
|
assert numbered_path.stat().st_size > 0
|
|
|
|
fragment_path = tmp_path / "fragment.png"
|
|
returned_fragment_path = save_fragment_png(fragment.fragment_smiles_labeled, fragment_path)
|
|
assert returned_fragment_path == fragment_path
|
|
assert fragment_path.exists()
|
|
assert fragment_path.stat().st_size > 0
|
|
|
|
|
|
def test_visualization_supports_allowed_ring_atom_type_filtering():
|
|
from macro_lactone_toolkit.visualization import numbered_molecule_svg
|
|
|
|
hetero = build_non_standard_ring_atom_macrolactone()
|
|
|
|
svg = numbered_molecule_svg(hetero.smiles, allowed_ring_atom_types=["C", "N"])
|
|
assert "<svg" in svg
|
|
|
|
with pytest.raises(ValueError, match="allowed ring atom types"):
|
|
numbered_molecule_svg(hetero.smiles, allowed_ring_atom_types=["C"])
|
|
|
|
|
|
def test_fragment_csv_and_results_to_dataframe(tmp_path):
|
|
from macro_lactone_toolkit.workflows import fragment_csv, results_to_dataframe, write_result_json
|
|
|
|
valid_14 = build_macrolactone(14, {4: "methyl"})
|
|
valid_16 = build_macrolactone(16, {6: "ethyl"})
|
|
input_path = tmp_path / "molecules.csv"
|
|
|
|
pd.DataFrame(
|
|
[
|
|
{"id": "mol_14", "smiles": valid_14.smiles},
|
|
{"id": "mol_16", "smiles": valid_16.smiles},
|
|
]
|
|
).to_csv(input_path, index=False)
|
|
|
|
results = fragment_csv(str(input_path))
|
|
dataframe = results_to_dataframe(results)
|
|
|
|
assert {result.parent_id for result in results} == {"mol_14", "mol_16"}
|
|
assert {
|
|
"parent_id",
|
|
"parent_smiles",
|
|
"ring_size",
|
|
"fragment_id",
|
|
"cleavage_position",
|
|
"attachment_atom_idx",
|
|
"fragment_smiles_labeled",
|
|
"fragment_smiles_plain",
|
|
"atom_count",
|
|
"molecular_weight",
|
|
}.issubset(dataframe.columns)
|
|
|
|
json_path = tmp_path / "result.json"
|
|
returned_json_path = write_result_json(results[0], json_path)
|
|
assert returned_json_path == json_path
|
|
payload = json.loads(json_path.read_text(encoding="utf-8"))
|
|
assert payload["parent_id"] in {"mol_14", "mol_16"}
|
|
assert payload["fragments"]
|
|
|
|
|
|
def test_fragment_csv_raises_for_invalid_or_ambiguous_rows(tmp_path):
|
|
from macro_lactone_toolkit.workflows import fragment_csv
|
|
|
|
valid = build_macrolactone(14)
|
|
input_path = tmp_path / "molecules.csv"
|
|
|
|
pd.DataFrame(
|
|
[
|
|
{"id": "valid_1", "smiles": valid.smiles},
|
|
{"id": "ambiguous_1", "smiles": build_ambiguous_smiles()},
|
|
]
|
|
).to_csv(input_path, index=False)
|
|
|
|
with pytest.raises(Exception, match="ambiguous|Multiple valid macrolactone candidates"):
|
|
fragment_csv(str(input_path))
|
|
|
|
|
|
def test_export_numbered_macrolactone_csv_writes_status_and_images(tmp_path):
|
|
from macro_lactone_toolkit.workflows import export_numbered_macrolactone_csv
|
|
|
|
valid = build_macrolactone(14)
|
|
hetero = build_non_standard_ring_atom_macrolactone()
|
|
input_path = tmp_path / "molecules.csv"
|
|
output_dir = tmp_path / "numbered"
|
|
|
|
pd.DataFrame(
|
|
[
|
|
{"id": "valid_1", "smiles": valid.smiles},
|
|
{"id": "hetero_1", "smiles": hetero.smiles},
|
|
]
|
|
).to_csv(input_path, index=False)
|
|
|
|
csv_path = export_numbered_macrolactone_csv(
|
|
str(input_path),
|
|
output_dir=output_dir,
|
|
allowed_ring_atom_types=["C", "N"],
|
|
)
|
|
|
|
exported = pd.read_csv(csv_path)
|
|
assert {"parent_id", "status", "image_path", "classification", "primary_reason_code", "ring_size"}.issubset(
|
|
exported.columns
|
|
)
|
|
assert set(exported["parent_id"]) == {"valid_1", "hetero_1"}
|
|
assert set(exported["status"]) == {"success"}
|
|
|
|
for image_path in exported["image_path"]:
|
|
assert image_path
|
|
assert (tmp_path / image_path).exists()
|
|
|
|
|
|
def test_analyzer_bulk_helpers():
|
|
valid = build_macrolactone(12)
|
|
hetero = build_non_standard_ring_atom_macrolactone()
|
|
non_lactone = "C1CCCCCCCCCCC1"
|
|
dataframe = pd.DataFrame(
|
|
[
|
|
{"id": "valid_1", "smiles": valid.smiles},
|
|
{"id": "hetero_1", "smiles": hetero.smiles},
|
|
{"id": "plain_1", "smiles": non_lactone},
|
|
]
|
|
)
|
|
analyzer = MacroLactoneAnalyzer()
|
|
|
|
summary = analyzer.analyze_many([valid.smiles, hetero.smiles, non_lactone])
|
|
ring_size_groups, rejected = analyzer.classify_dataframe(dataframe)
|
|
smarts_match = analyzer.match_dynamic_smarts(valid.smiles, ring_size=12)
|
|
properties = analyzer.calculate_properties(valid.smiles)
|
|
|
|
assert summary["total"] == 3
|
|
assert summary["classification_counts"]["standard_macrolactone"] == 1
|
|
assert summary["classification_counts"]["non_standard_macrocycle"] == 1
|
|
assert summary["classification_counts"]["not_macrolactone"] == 1
|
|
assert 12 in ring_size_groups
|
|
assert list(ring_size_groups[12]["id"]) == ["valid_1"]
|
|
assert set(rejected["classification"]) == {"non_standard_macrocycle", "not_macrolactone"}
|
|
assert smarts_match is not None
|
|
assert properties is not None
|
|
assert {"molecular_weight", "logp", "qed", "tpsa"}.issubset(properties)
|