feat(toolkit): add classification and migration

Implement the standard/non-standard/not-macrolactone classification layer and integrate it into analyzer, fragmenter, and CLI outputs. Port the remaining legacy package capabilities into new visualization and workflow modules, restore batch/statistics/SDF scripts on top of the flat CSV workflow, and update active docs to the new package API.
2026-03-18 23:56:41 +08:00
parent 9ccbcfcd04
commit c0ead42384
24 changed files with 1497 additions and 313 deletions
--- a/tests/helpers.py
+++ b/tests/helpers.py
@@ -16,11 +16,13 @@ class BuiltMacrolactone:
 def build_macrolactone(
    ring_size: int,
    side_chains: Mapping[int, str] | None = None,
+    ring_atom_symbols: Mapping[int, str] | None = None,
 ) -> BuiltMacrolactone:
    if not 12 <= ring_size <= 20:
        raise ValueError("ring_size must be between 12 and 20")

    side_chains = dict(side_chains or {})
+    ring_atom_symbols = dict(ring_atom_symbols or {})
    rwmol = Chem.RWMol()

    position_to_atom: dict[int, int] = {
@@ -28,7 +30,7 @@ def build_macrolactone(
        2: rwmol.AddAtom(Chem.Atom("O")),
    }
    for position in range(3, ring_size + 1):
-        position_to_atom[position] = rwmol.AddAtom(Chem.Atom("C"))
+        position_to_atom[position] = rwmol.AddAtom(Chem.Atom(ring_atom_symbols.get(position, "C")))

    carbonyl_oxygen_idx = rwmol.AddAtom(Chem.Atom("O"))

@@ -63,6 +65,109 @@ def build_ambiguous_smiles() -> str:
    return Chem.MolToSmiles(combined, isomericSmiles=True)


+def build_non_standard_ring_atom_macrolactone(
+    ring_size: int = 16,
+    hetero_position: int = 5,
+    atom_symbol: str = "N",
+) -> BuiltMacrolactone:
+    if hetero_position < 3 or hetero_position > ring_size:
+        raise ValueError("hetero_position must be between 3 and ring_size")
+    return build_macrolactone(
+        ring_size=ring_size,
+        ring_atom_symbols={hetero_position: atom_symbol},
+    )
+
+
+def build_overlapping_candidate_macrolactone() -> BuiltMacrolactone:
+    rwmol = Chem.RWMol()
+
+    atom_labels = (
+        "A1",
+        "A2",
+        "S1",
+        "S2",
+        "S3",
+        "S4",
+        "A5",
+        "A6",
+        "A7",
+        "A8",
+        "A9",
+        "A10",
+        "B1",
+        "B2",
+        "B5",
+        "B6",
+        "B7",
+        "B8",
+        "B9",
+        "B10",
+        "AO",
+        "BO",
+    )
+    atom_symbols = {
+        "A1": "C",
+        "A2": "O",
+        "S1": "C",
+        "S2": "C",
+        "S3": "C",
+        "S4": "C",
+        "A5": "C",
+        "A6": "C",
+        "A7": "C",
+        "A8": "C",
+        "A9": "C",
+        "A10": "C",
+        "B1": "C",
+        "B2": "O",
+        "B5": "C",
+        "B6": "C",
+        "B7": "C",
+        "B8": "C",
+        "B9": "C",
+        "B10": "C",
+        "AO": "O",
+        "BO": "O",
+    }
+    atoms = {label: rwmol.AddAtom(Chem.Atom(atom_symbols[label])) for label in atom_labels}
+
+    for atom_a, atom_b in (
+        ("A1", "A2"),
+        ("A2", "S1"),
+        ("S1", "S2"),
+        ("S2", "S3"),
+        ("S3", "S4"),
+        ("S4", "A5"),
+        ("A5", "A6"),
+        ("A6", "A7"),
+        ("A7", "A8"),
+        ("A8", "A9"),
+        ("A9", "A10"),
+        ("A10", "A1"),
+        ("B1", "B2"),
+        ("B2", "S1"),
+        ("S4", "B5"),
+        ("B5", "B6"),
+        ("B6", "B7"),
+        ("B7", "B8"),
+        ("B8", "B9"),
+        ("B9", "B10"),
+        ("B10", "B1"),
+    ):
+        rwmol.AddBond(atoms[atom_a], atoms[atom_b], Chem.BondType.SINGLE)
+
+    rwmol.AddBond(atoms["A1"], atoms["AO"], Chem.BondType.DOUBLE)
+    rwmol.AddBond(atoms["B1"], atoms["BO"], Chem.BondType.DOUBLE)
+
+    mol = rwmol.GetMol()
+    Chem.SanitizeMol(mol)
+    return BuiltMacrolactone(
+        mol=mol,
+        smiles=Chem.MolToSmiles(mol, isomericSmiles=True),
+        position_to_atom={},
+    )
+
+
 def canonicalize(smiles_or_mol: str | Chem.Mol) -> str:
    if isinstance(smiles_or_mol, Chem.Mol):
        mol = smiles_or_mol
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -6,7 +6,12 @@ import sys

 import pandas as pd

-from .helpers import build_ambiguous_smiles, build_macrolactone
+from .helpers import (
+    build_ambiguous_smiles,
+    build_macrolactone,
+    build_non_standard_ring_atom_macrolactone,
+    build_overlapping_candidate_macrolactone,
+)


 def run_cli(*args: str) -> subprocess.CompletedProcess[str]:
@@ -24,7 +29,10 @@ def test_cli_smoke_commands():
    analyze = run_cli("analyze", "--smiles", built.smiles)
    assert analyze.returncode == 0, analyze.stderr
    analyze_payload = json.loads(analyze.stdout)
-    assert analyze_payload["valid_ring_sizes"] == [16]
+    assert analyze_payload["classification"] == "standard_macrolactone"
+    assert analyze_payload["ring_size"] == 16
+    assert analyze_payload["primary_reason_code"] is None
+    assert analyze_payload["candidate_ring_sizes"] == [16]

    number = run_cli("number", "--smiles", built.smiles)
    assert number.returncode == 0, number.stderr
@@ -40,6 +48,55 @@ def test_cli_smoke_commands():
    assert fragment_payload["fragments"][0]["fragment_smiles_labeled"]


+def test_cli_analyze_reports_non_standard_classifications():
+    hetero = build_non_standard_ring_atom_macrolactone()
+    overlap = build_overlapping_candidate_macrolactone()
+
+    hetero_result = run_cli("analyze", "--smiles", hetero.smiles)
+    assert hetero_result.returncode == 0, hetero_result.stderr
+    hetero_payload = json.loads(hetero_result.stdout)
+    assert hetero_payload["classification"] == "non_standard_macrocycle"
+    assert hetero_payload["primary_reason_code"] == "contains_non_carbon_ring_atoms_outside_positions_1_2"
+    assert hetero_payload["ring_size"] == 16
+
+    overlap_result = run_cli("analyze", "--smiles", overlap.smiles)
+    assert overlap_result.returncode == 0, overlap_result.stderr
+    overlap_payload = json.loads(overlap_result.stdout)
+    assert overlap_payload["classification"] == "non_standard_macrocycle"
+    assert overlap_payload["primary_reason_code"] == "multiple_overlapping_macrocycle_candidates"
+    assert overlap_payload["ring_size"] == 12
+
+
+def test_cli_analyze_csv_reports_classification_fields(tmp_path):
+    valid = build_macrolactone(14)
+    hetero = build_non_standard_ring_atom_macrolactone()
+    input_path = tmp_path / "molecules.csv"
+    output_path = tmp_path / "analysis.csv"
+
+    pd.DataFrame(
+        [
+            {"id": "valid_1", "smiles": valid.smiles},
+            {"id": "hetero_1", "smiles": hetero.smiles},
+        ]
+    ).to_csv(input_path, index=False)
+
+    completed = run_cli(
+        "analyze",
+        "--input",
+        str(input_path),
+        "--output",
+        str(output_path),
+    )
+
+    assert completed.returncode == 0, completed.stderr
+
+    analysis = pd.read_csv(output_path)
+    assert set(analysis["parent_id"]) == {"valid_1", "hetero_1"}
+    assert set(analysis["classification"]) == {"standard_macrolactone", "non_standard_macrocycle"}
+    assert "primary_reason_code" in analysis.columns
+    assert "ring_size" in analysis.columns
+
+
 def test_cli_fragment_csv_skips_ambiguous_and_records_errors(tmp_path):
    valid = build_macrolactone(14, {4: "methyl"})
    ambiguous = build_ambiguous_smiles()
--- a/tests/test_detection_and_numbering.py
+++ b/tests/test_detection_and_numbering.py
@@ -8,7 +8,12 @@ from macro_lactone_toolkit import (
    MacrolactoneFragmenter,
 )

-from .helpers import build_ambiguous_smiles, build_macrolactone
+from .helpers import (
+    build_ambiguous_smiles,
+    build_macrolactone,
+    build_non_standard_ring_atom_macrolactone,
+    build_overlapping_candidate_macrolactone,
+)


@pytest.mark.parametrize("ring_size", [12, 14, 16, 20])
@@ -25,6 +30,77 @@ def test_analyzer_rejects_non_lactone_macrocycle():
    assert analyzer.get_valid_ring_sizes("C1CCCCCCCCCCC1") == []


+@pytest.mark.parametrize("ring_size", [12, 14, 16, 20])
+def test_analyzer_classifies_supported_ring_sizes(ring_size: int):
+    built = build_macrolactone(ring_size)
+    analyzer = MacroLactoneAnalyzer()
+
+    result = analyzer.classify_macrocycle(built.smiles)
+
+    assert result.classification == "standard_macrolactone"
+    assert result.ring_size == ring_size
+    assert result.primary_reason_code is None
+    assert result.primary_reason_message is None
+    assert result.all_reason_codes == []
+    assert result.all_reason_messages == []
+    assert result.candidate_ring_sizes == [ring_size]
+
+
+def test_analyzer_classifies_ring_heteroatom_as_non_standard():
+    built = build_non_standard_ring_atom_macrolactone()
+    analyzer = MacroLactoneAnalyzer()
+
+    result = analyzer.classify_macrocycle(built.smiles)
+
+    assert result.classification == "non_standard_macrocycle"
+    assert result.ring_size == 16
+    assert result.primary_reason_code == "contains_non_carbon_ring_atoms_outside_positions_1_2"
+    assert result.primary_reason_message == "Ring positions 3..N contain non-carbon atoms."
+    assert result.all_reason_codes == ["contains_non_carbon_ring_atoms_outside_positions_1_2"]
+    assert result.candidate_ring_sizes == [16]
+
+
+def test_analyzer_classifies_overlapping_candidates_as_non_standard():
+    built = build_overlapping_candidate_macrolactone()
+    analyzer = MacroLactoneAnalyzer()
+
+    result = analyzer.classify_macrocycle(built.smiles)
+
+    assert result.classification == "non_standard_macrocycle"
+    assert result.ring_size == 12
+    assert result.primary_reason_code == "multiple_overlapping_macrocycle_candidates"
+    assert result.primary_reason_message == "Overlapping macrolactone candidate rings were detected."
+    assert result.all_reason_codes == ["multiple_overlapping_macrocycle_candidates"]
+    assert result.candidate_ring_sizes == [12]
+
+
+def test_analyzer_classifies_non_lactone_macrocycle():
+    analyzer = MacroLactoneAnalyzer()
+
+    result = analyzer.classify_macrocycle("C1CCCCCCCCCCC1")
+
+    assert result.classification == "not_macrolactone"
+    assert result.ring_size is None
+    assert result.primary_reason_code == "no_lactone_ring_in_12_to_20_range"
+    assert result.primary_reason_message == "No 12-20 membered lactone ring was detected."
+    assert result.all_reason_codes == ["no_lactone_ring_in_12_to_20_range"]
+    assert result.candidate_ring_sizes == []
+
+
+def test_analyzer_explicit_ring_size_miss_returns_requested_ring_not_found():
+    built = build_macrolactone(12)
+    analyzer = MacroLactoneAnalyzer()
+
+    result = analyzer.classify_macrocycle(built.smiles, ring_size=16)
+
+    assert result.classification == "not_macrolactone"
+    assert result.ring_size is None
+    assert result.primary_reason_code == "requested_ring_size_not_found"
+    assert result.primary_reason_message == "The requested ring size was not detected as a lactone ring."
+    assert result.all_reason_codes == ["requested_ring_size_not_found"]
+    assert result.candidate_ring_sizes == []
+
+
 def test_fragmenter_auto_numbers_ring_with_expected_positions():
    built = build_macrolactone(16, {5: "methyl"})
    result = MacrolactoneFragmenter().number_molecule(built.mol)
@@ -55,10 +131,35 @@ def test_fragmenter_requires_explicit_ring_size_for_ambiguous_molecule():


 def test_fragmenter_raises_for_missing_macrolactone():
-    with pytest.raises(MacrolactoneDetectionError):
+    with pytest.raises(
+        MacrolactoneDetectionError,
+        match="classification=not_macrolactone primary_reason_code=no_lactone_ring_in_12_to_20_range",
+    ):
        MacrolactoneFragmenter().number_molecule("CCO")


+def test_fragmenter_rejects_non_standard_macrocycle_with_reason_code():
+    built = build_non_standard_ring_atom_macrolactone()
+
+    with pytest.raises(
+        MacrolactoneDetectionError,
+        match="classification=non_standard_macrocycle "
+        "primary_reason_code=contains_non_carbon_ring_atoms_outside_positions_1_2",
+    ):
+        MacrolactoneFragmenter().number_molecule(built.smiles)
+
+
+def test_fragmenter_rejects_non_standard_macrocycle_during_fragmentation():
+    built = build_overlapping_candidate_macrolactone()
+
+    with pytest.raises(
+        MacrolactoneDetectionError,
+        match="classification=non_standard_macrocycle "
+        "primary_reason_code=multiple_overlapping_macrocycle_candidates",
+    ):
+        MacrolactoneFragmenter().fragment_molecule(built.smiles)
+
+
 def test_explicit_ring_size_selects_requested_ring():
    built = build_macrolactone(14)
    result = MacrolactoneFragmenter(ring_size=14).number_molecule(built.smiles)
--- a/tests/test_scripts_and_docs.py
+++ b/tests/test_scripts_and_docs.py
@@ -0,0 +1,149 @@
+from __future__ import annotations
+
+import json
+import subprocess
+import sys
+from pathlib import Path
+
+import pandas as pd
+
+from macro_lactone_toolkit import MacrolactoneFragmenter
+
+from .helpers import build_ambiguous_smiles, build_macrolactone
+
+
+PROJECT_ROOT = Path(__file__).resolve().parents[1]
+ACTIVE_TEXT_ASSETS = [
+    PROJECT_ROOT / "scripts" / "README.md",
+    PROJECT_ROOT / "docs" / "SUMMARY.md",
+    PROJECT_ROOT / "docs" / "project-docs" / "QUICK_COMMANDS.md",
+    PROJECT_ROOT / "notebooks" / "README_analyze_ring16.md",
+]
+
+
+def run_script(script_name: str, *args: str) -> subprocess.CompletedProcess[str]:
+    return subprocess.run(
+        [sys.executable, str(PROJECT_ROOT / "scripts" / script_name), *args],
+        capture_output=True,
+        text=True,
+        check=False,
+        cwd=PROJECT_ROOT,
+    )
+
+
+def test_batch_process_script_writes_flat_outputs_and_summary(tmp_path):
+    valid = build_macrolactone(14, {4: "methyl"})
+    ambiguous = build_ambiguous_smiles()
+    input_path = tmp_path / "molecules.csv"
+    output_path = tmp_path / "fragments.csv"
+    errors_path = tmp_path / "errors.csv"
+    summary_path = tmp_path / "summary.json"
+
+    pd.DataFrame(
+        [
+            {"id": "valid_1", "smiles": valid.smiles},
+            {"id": "ambiguous_1", "smiles": ambiguous},
+        ]
+    ).to_csv(input_path, index=False)
+
+    completed = run_script(
+        "batch_process.py",
+        "--input",
+        str(input_path),
+        "--output",
+        str(output_path),
+        "--errors-output",
+        str(errors_path),
+        "--summary-output",
+        str(summary_path),
+    )
+
+    assert completed.returncode == 0, completed.stderr
+    assert output_path.exists()
+    assert errors_path.exists()
+    assert summary_path.exists()
+
+    summary = json.loads(summary_path.read_text(encoding="utf-8"))
+    assert summary["processed"] == 2
+    assert summary["successful"] == 1
+    assert summary["failed"] == 1
+
+
+def test_analyze_fragments_script_generates_reports_and_plot(tmp_path):
+    built = build_macrolactone(16, {5: "methyl", 7: "ethyl"})
+    result = MacrolactoneFragmenter().fragment_molecule(built.smiles, parent_id="analysis_1")
+    fragments = pd.DataFrame(
+        [
+            {
+                "parent_id": result.parent_id,
+                "parent_smiles": result.parent_smiles,
+                "ring_size": result.ring_size,
+                **fragment.to_dict(),
+            }
+            for fragment in result.fragments
+        ]
+    )
+    input_path = tmp_path / "fragments.csv"
+    output_dir = tmp_path / "analysis"
+    fragments.to_csv(input_path, index=False)
+
+    completed = run_script(
+        "analyze_fragments.py",
+        "--input",
+        str(input_path),
+        "--output-dir",
+        str(output_dir),
+    )
+
+    assert completed.returncode == 0, completed.stderr
+    assert (output_dir / "position_statistics.csv").exists()
+    assert (output_dir / "fragment_property_summary.csv").exists()
+    assert (output_dir / "position_frequencies.png").exists()
+    assert (output_dir / "analysis_summary.txt").exists()
+
+
+def test_generate_sdf_and_statistics_script_generates_artifacts(tmp_path):
+    built = build_macrolactone(16, {5: "methyl"})
+    result = MacrolactoneFragmenter().fragment_molecule(built.smiles, parent_id="sdf_1")
+    fragments = pd.DataFrame(
+        [
+            {
+                "parent_id": result.parent_id,
+                "parent_smiles": result.parent_smiles,
+                "ring_size": result.ring_size,
+                **fragment.to_dict(),
+            }
+            for fragment in result.fragments
+        ]
+    )
+    input_path = tmp_path / "fragments.csv"
+    output_dir = tmp_path / "sdf_output"
+    fragments.to_csv(input_path, index=False)
+
+    completed = run_script(
+        "generate_sdf_and_statistics.py",
+        "--input",
+        str(input_path),
+        "--output-dir",
+        str(output_dir),
+    )
+
+    assert completed.returncode == 0, completed.stderr
+    assert (output_dir / "cleavage_position_statistics.json").exists()
+    assert (output_dir / "sdf" / "sdf_1_3d.sdf").exists()
+
+
+def test_active_text_assets_do_not_reference_legacy_api():
+    forbidden_patterns = [
+        "from src.",
+        "import src.",
+        "process_csv(",
+        "batch_to_dataframe(",
+        "visualize_molecule(",
+        "save_to_json(",
+    ]
+
+    for path in ACTIVE_TEXT_ASSETS:
+        text = path.read_text(encoding="utf-8")
+        for pattern in forbidden_patterns:
+            assert pattern not in text, f"{path} still contains legacy reference: {pattern}"
--- a/tests/test_visualization_and_workflows.py
+++ b/tests/test_visualization_and_workflows.py
@@ -0,0 +1,171 @@
+from __future__ import annotations
+
+import json
+
+import pandas as pd
+import pytest
+
+from macro_lactone_toolkit import MacroLactoneAnalyzer, MacrolactoneFragmenter
+
+from .helpers import (
+    build_ambiguous_smiles,
+    build_macrolactone,
+    build_non_standard_ring_atom_macrolactone,
+)
+
+
+def test_visualization_exports_numbered_svg_and_png(tmp_path):
+    from macro_lactone_toolkit.visualization import (
+        numbered_molecule_svg,
+        save_fragment_png,
+        save_numbered_molecule_png,
+    )
+
+    built = build_macrolactone(16, {5: "methyl"})
+    fragment = MacrolactoneFragmenter().fragment_molecule(built.smiles, parent_id="viz_1").fragments[0]
+
+    svg = numbered_molecule_svg(built.smiles)
+    assert "<svg" in svg
+
+    numbered_path = tmp_path / "numbered.png"
+    returned_numbered_path = save_numbered_molecule_png(built.smiles, numbered_path)
+    assert returned_numbered_path == numbered_path
+    assert numbered_path.exists()
+    assert numbered_path.stat().st_size > 0
+
+    fragment_path = tmp_path / "fragment.png"
+    returned_fragment_path = save_fragment_png(fragment.fragment_smiles_labeled, fragment_path)
+    assert returned_fragment_path == fragment_path
+    assert fragment_path.exists()
+    assert fragment_path.stat().st_size > 0
+
+
+def test_visualization_supports_allowed_ring_atom_type_filtering():
+    from macro_lactone_toolkit.visualization import numbered_molecule_svg
+
+    hetero = build_non_standard_ring_atom_macrolactone()
+
+    svg = numbered_molecule_svg(hetero.smiles, allowed_ring_atom_types=["C", "N"])
+    assert "<svg" in svg
+
+    with pytest.raises(ValueError, match="allowed ring atom types"):
+        numbered_molecule_svg(hetero.smiles, allowed_ring_atom_types=["C"])
+
+
+def test_fragment_csv_and_results_to_dataframe(tmp_path):
+    from macro_lactone_toolkit.workflows import fragment_csv, results_to_dataframe, write_result_json
+
+    valid_14 = build_macrolactone(14, {4: "methyl"})
+    valid_16 = build_macrolactone(16, {6: "ethyl"})
+    input_path = tmp_path / "molecules.csv"
+
+    pd.DataFrame(
+        [
+            {"id": "mol_14", "smiles": valid_14.smiles},
+            {"id": "mol_16", "smiles": valid_16.smiles},
+        ]
+    ).to_csv(input_path, index=False)
+
+    results = fragment_csv(str(input_path))
+    dataframe = results_to_dataframe(results)
+
+    assert {result.parent_id for result in results} == {"mol_14", "mol_16"}
+    assert {
+        "parent_id",
+        "parent_smiles",
+        "ring_size",
+        "fragment_id",
+        "cleavage_position",
+        "attachment_atom_idx",
+        "fragment_smiles_labeled",
+        "fragment_smiles_plain",
+        "atom_count",
+        "molecular_weight",
+    }.issubset(dataframe.columns)
+
+    json_path = tmp_path / "result.json"
+    returned_json_path = write_result_json(results[0], json_path)
+    assert returned_json_path == json_path
+    payload = json.loads(json_path.read_text(encoding="utf-8"))
+    assert payload["parent_id"] in {"mol_14", "mol_16"}
+    assert payload["fragments"]
+
+
+def test_fragment_csv_raises_for_invalid_or_ambiguous_rows(tmp_path):
+    from macro_lactone_toolkit.workflows import fragment_csv
+
+    valid = build_macrolactone(14)
+    input_path = tmp_path / "molecules.csv"
+
+    pd.DataFrame(
+        [
+            {"id": "valid_1", "smiles": valid.smiles},
+            {"id": "ambiguous_1", "smiles": build_ambiguous_smiles()},
+        ]
+    ).to_csv(input_path, index=False)
+
+    with pytest.raises(Exception, match="ambiguous|Multiple valid macrolactone candidates"):
+        fragment_csv(str(input_path))
+
+
+def test_export_numbered_macrolactone_csv_writes_status_and_images(tmp_path):
+    from macro_lactone_toolkit.workflows import export_numbered_macrolactone_csv
+
+    valid = build_macrolactone(14)
+    hetero = build_non_standard_ring_atom_macrolactone()
+    input_path = tmp_path / "molecules.csv"
+    output_dir = tmp_path / "numbered"
+
+    pd.DataFrame(
+        [
+            {"id": "valid_1", "smiles": valid.smiles},
+            {"id": "hetero_1", "smiles": hetero.smiles},
+        ]
+    ).to_csv(input_path, index=False)
+
+    csv_path = export_numbered_macrolactone_csv(
+        str(input_path),
+        output_dir=output_dir,
+        allowed_ring_atom_types=["C", "N"],
+    )
+
+    exported = pd.read_csv(csv_path)
+    assert {"parent_id", "status", "image_path", "classification", "primary_reason_code", "ring_size"}.issubset(
+        exported.columns
+    )
+    assert set(exported["parent_id"]) == {"valid_1", "hetero_1"}
+    assert set(exported["status"]) == {"success"}
+
+    for image_path in exported["image_path"]:
+        assert image_path
+        assert (tmp_path / image_path).exists()
+
+
+def test_analyzer_bulk_helpers():
+    valid = build_macrolactone(12)
+    hetero = build_non_standard_ring_atom_macrolactone()
+    non_lactone = "C1CCCCCCCCCCC1"
+    dataframe = pd.DataFrame(
+        [
+            {"id": "valid_1", "smiles": valid.smiles},
+            {"id": "hetero_1", "smiles": hetero.smiles},
+            {"id": "plain_1", "smiles": non_lactone},
+        ]
+    )
+    analyzer = MacroLactoneAnalyzer()
+
+    summary = analyzer.analyze_many([valid.smiles, hetero.smiles, non_lactone])
+    ring_size_groups, rejected = analyzer.classify_dataframe(dataframe)
+    smarts_match = analyzer.match_dynamic_smarts(valid.smiles, ring_size=12)
+    properties = analyzer.calculate_properties(valid.smiles)
+
+    assert summary["total"] == 3
+    assert summary["classification_counts"]["standard_macrolactone"] == 1
+    assert summary["classification_counts"]["non_standard_macrocycle"] == 1
+    assert summary["classification_counts"]["not_macrolactone"] == 1
+    assert 12 in ring_size_groups
+    assert list(ring_size_groups[12]["id"]) == ["valid_1"]
+    assert set(rejected["classification"]) == {"non_standard_macrocycle", "not_macrolactone"}
+    assert smarts_match is not None
+    assert properties is not None
+    assert {"molecular_weight", "logp", "qed", "tpsa"}.issubset(properties)