from __future__ import annotations import json import subprocess import sys from pathlib import Path import pandas as pd from macro_lactone_toolkit import MacrolactoneFragmenter from .helpers import build_ambiguous_smiles, build_macrolactone PROJECT_ROOT = Path(__file__).resolve().parents[1] ACTIVE_TEXT_ASSETS = [ PROJECT_ROOT / "scripts" / "README.md", PROJECT_ROOT / "docs" / "SUMMARY.md", PROJECT_ROOT / "docs" / "project-docs" / "QUICK_COMMANDS.md", PROJECT_ROOT / "notebooks" / "README_analyze_ring16.md", ] def run_script(script_name: str, *args: str) -> subprocess.CompletedProcess[str]: return subprocess.run( [sys.executable, str(PROJECT_ROOT / "scripts" / script_name), *args], capture_output=True, text=True, check=False, cwd=PROJECT_ROOT, ) def test_batch_process_script_writes_flat_outputs_and_summary(tmp_path): valid = build_macrolactone(14, {4: "methyl"}) ambiguous = build_ambiguous_smiles() input_path = tmp_path / "molecules.csv" output_path = tmp_path / "fragments.csv" errors_path = tmp_path / "errors.csv" summary_path = tmp_path / "summary.json" pd.DataFrame( [ {"id": "valid_1", "smiles": valid.smiles}, {"id": "ambiguous_1", "smiles": ambiguous}, ] ).to_csv(input_path, index=False) completed = run_script( "batch_process.py", "--input", str(input_path), "--output", str(output_path), "--errors-output", str(errors_path), "--summary-output", str(summary_path), ) assert completed.returncode == 0, completed.stderr assert output_path.exists() assert errors_path.exists() assert summary_path.exists() summary = json.loads(summary_path.read_text(encoding="utf-8")) assert summary["processed"] == 2 assert summary["successful"] == 1 assert summary["failed"] == 1 def test_analyze_fragments_script_generates_reports_and_plot(tmp_path): built = build_macrolactone(16, {5: "methyl", 7: "ethyl"}) result = MacrolactoneFragmenter().fragment_molecule(built.smiles, parent_id="analysis_1") fragments = pd.DataFrame( [ { "parent_id": result.parent_id, "parent_smiles": result.parent_smiles, "ring_size": result.ring_size, **fragment.to_dict(), } for fragment in result.fragments ] ) input_path = tmp_path / "fragments.csv" output_dir = tmp_path / "analysis" fragments.to_csv(input_path, index=False) completed = run_script( "analyze_fragments.py", "--input", str(input_path), "--output-dir", str(output_dir), ) assert completed.returncode == 0, completed.stderr assert (output_dir / "position_statistics.csv").exists() assert (output_dir / "fragment_property_summary.csv").exists() assert (output_dir / "position_frequencies.png").exists() assert (output_dir / "analysis_summary.txt").exists() def test_generate_sdf_and_statistics_script_generates_artifacts(tmp_path): built = build_macrolactone(16, {5: "methyl"}) result = MacrolactoneFragmenter().fragment_molecule(built.smiles, parent_id="sdf_1") fragments = pd.DataFrame( [ { "parent_id": result.parent_id, "parent_smiles": result.parent_smiles, "ring_size": result.ring_size, **fragment.to_dict(), } for fragment in result.fragments ] ) input_path = tmp_path / "fragments.csv" output_dir = tmp_path / "sdf_output" fragments.to_csv(input_path, index=False) completed = run_script( "generate_sdf_and_statistics.py", "--input", str(input_path), "--output-dir", str(output_dir), ) assert completed.returncode == 0, completed.stderr assert (output_dir / "cleavage_position_statistics.json").exists() assert (output_dir / "sdf" / "sdf_1_3d.sdf").exists() def test_active_text_assets_do_not_reference_legacy_api(): forbidden_patterns = [ "from src.", "import src.", "process_csv(", "batch_to_dataframe(", "visualize_molecule(", "save_to_json(", ] for path in ACTIVE_TEXT_ASSETS: text = path.read_text(encoding="utf-8") for pattern in forbidden_patterns: assert pattern not in text, f"{path} still contains legacy reference: {pattern}"