macrolactone-toolkit/tests/test_visualization_and_workflows.py

from __future__ import annotations

import json

import pandas as pd
import pytest

from macro_lactone_toolkit import MacroLactoneAnalyzer, MacrolactoneFragmenter

from .helpers import (
    build_ambiguous_smiles,
    build_macrolactone,
    build_non_standard_ring_atom_macrolactone,
)


def test_visualization_exports_numbered_svg_and_png(tmp_path):
    from macro_lactone_toolkit.visualization import (
        numbered_molecule_svg,
        save_fragment_png,
        save_numbered_molecule_png,
    )

    built = build_macrolactone(16, {5: "methyl"})
    fragment = MacrolactoneFragmenter().fragment_molecule(built.smiles, parent_id="viz_1").fragments[0]

    svg = numbered_molecule_svg(built.smiles)
    assert "<svg" in svg

    numbered_path = tmp_path / "numbered.png"
    returned_numbered_path = save_numbered_molecule_png(built.smiles, numbered_path)
    assert returned_numbered_path == numbered_path
    assert numbered_path.exists()
    assert numbered_path.stat().st_size > 0

    fragment_path = tmp_path / "fragment.png"
    returned_fragment_path = save_fragment_png(fragment.fragment_smiles_labeled, fragment_path)
    assert returned_fragment_path == fragment_path
    assert fragment_path.exists()
    assert fragment_path.stat().st_size > 0


def test_visualization_supports_allowed_ring_atom_type_filtering():
    from macro_lactone_toolkit.visualization import numbered_molecule_svg

    hetero = build_non_standard_ring_atom_macrolactone()

    svg = numbered_molecule_svg(hetero.smiles, allowed_ring_atom_types=["C", "N"])
    assert "<svg" in svg

    with pytest.raises(ValueError, match="allowed ring atom types"):
        numbered_molecule_svg(hetero.smiles, allowed_ring_atom_types=["C"])


def test_fragment_csv_and_results_to_dataframe(tmp_path):
    from macro_lactone_toolkit.workflows import fragment_csv, results_to_dataframe, write_result_json

    valid_14 = build_macrolactone(14, {4: "methyl"})
    valid_16 = build_macrolactone(16, {6: "ethyl"})
    input_path = tmp_path / "molecules.csv"

    pd.DataFrame(
        [
            {"id": "mol_14", "smiles": valid_14.smiles},
            {"id": "mol_16", "smiles": valid_16.smiles},
        ]
    ).to_csv(input_path, index=False)

    results = fragment_csv(str(input_path))
    dataframe = results_to_dataframe(results)

    assert {result.parent_id for result in results} == {"mol_14", "mol_16"}
    assert {
        "parent_id",
        "parent_smiles",
        "ring_size",
        "fragment_id",
        "cleavage_position",
        "attachment_atom_idx",
        "fragment_smiles_labeled",
        "fragment_smiles_plain",
        "atom_count",
        "molecular_weight",
    }.issubset(dataframe.columns)

    json_path = tmp_path / "result.json"
    returned_json_path = write_result_json(results[0], json_path)
    assert returned_json_path == json_path
    payload = json.loads(json_path.read_text(encoding="utf-8"))
    assert payload["parent_id"] in {"mol_14", "mol_16"}
    assert payload["fragments"]


def test_fragment_csv_raises_for_invalid_or_ambiguous_rows(tmp_path):
    from macro_lactone_toolkit.workflows import fragment_csv

    valid = build_macrolactone(14)
    input_path = tmp_path / "molecules.csv"

    pd.DataFrame(
        [
            {"id": "valid_1", "smiles": valid.smiles},
            {"id": "ambiguous_1", "smiles": build_ambiguous_smiles()},
        ]
    ).to_csv(input_path, index=False)

    with pytest.raises(Exception, match="ambiguous|Multiple valid macrolactone candidates"):
        fragment_csv(str(input_path))


def test_export_numbered_macrolactone_csv_writes_status_and_images(tmp_path):
    from macro_lactone_toolkit.workflows import export_numbered_macrolactone_csv

    valid = build_macrolactone(14)
    hetero = build_non_standard_ring_atom_macrolactone()
    input_path = tmp_path / "molecules.csv"
    output_dir = tmp_path / "numbered"

    pd.DataFrame(
        [
            {"id": "valid_1", "smiles": valid.smiles},
            {"id": "hetero_1", "smiles": hetero.smiles},
        ]
    ).to_csv(input_path, index=False)

    csv_path = export_numbered_macrolactone_csv(
        str(input_path),
        output_dir=output_dir,
        allowed_ring_atom_types=["C", "N"],
    )

    exported = pd.read_csv(csv_path)
    assert {"parent_id", "status", "image_path", "classification", "primary_reason_code", "ring_size"}.issubset(
        exported.columns
    )
    assert set(exported["parent_id"]) == {"valid_1", "hetero_1"}
    assert set(exported["status"]) == {"success"}

    for image_path in exported["image_path"]:
        assert image_path
        assert (tmp_path / image_path).exists()


def test_analyzer_bulk_helpers():
    valid = build_macrolactone(12)
    hetero = build_non_standard_ring_atom_macrolactone()
    non_lactone = "C1CCCCCCCCCCC1"
    dataframe = pd.DataFrame(
        [
            {"id": "valid_1", "smiles": valid.smiles},
            {"id": "hetero_1", "smiles": hetero.smiles},
            {"id": "plain_1", "smiles": non_lactone},
        ]
    )
    analyzer = MacroLactoneAnalyzer()

    summary = analyzer.analyze_many([valid.smiles, hetero.smiles, non_lactone])
    ring_size_groups, rejected = analyzer.classify_dataframe(dataframe)
    smarts_match = analyzer.match_dynamic_smarts(valid.smiles, ring_size=12)
    properties = analyzer.calculate_properties(valid.smiles)

    assert summary["total"] == 3
    assert summary["classification_counts"]["standard_macrolactone"] == 1
    assert summary["classification_counts"]["non_standard_macrocycle"] == 1
    assert summary["classification_counts"]["not_macrolactone"] == 1
    assert 12 in ring_size_groups
    assert list(ring_size_groups[12]["id"]) == ["valid_1"]
    assert set(rejected["classification"]) == {"non_standard_macrocycle", "not_macrolactone"}
    assert smarts_match is not None
    assert properties is not None
    assert {"molecular_weight", "logp", "qed", "tpsa"}.issubset(properties)