from __future__ import annotations import json import subprocess import sys import pandas as pd from .helpers import ( build_ambiguous_smiles, build_macrolactone, build_non_standard_ring_atom_macrolactone, build_overlapping_candidate_macrolactone, ) def run_cli(*args: str) -> subprocess.CompletedProcess[str]: return subprocess.run( [sys.executable, "-m", "macro_lactone_toolkit.cli", *args], capture_output=True, text=True, check=False, ) def test_cli_smoke_commands(): built = build_macrolactone(16, {5: "methyl"}) analyze = run_cli("analyze", "--smiles", built.smiles) assert analyze.returncode == 0, analyze.stderr analyze_payload = json.loads(analyze.stdout) assert analyze_payload["classification"] == "standard_macrolactone" assert analyze_payload["ring_size"] == 16 assert analyze_payload["primary_reason_code"] is None assert analyze_payload["candidate_ring_sizes"] == [16] number = run_cli("number", "--smiles", built.smiles) assert number.returncode == 0, number.stderr number_payload = json.loads(number.stdout) assert number_payload["ring_size"] == 16 assert number_payload["position_to_atom"]["1"] >= 0 fragment = run_cli("fragment", "--smiles", built.smiles, "--parent-id", "cli_1") assert fragment.returncode == 0, fragment.stderr fragment_payload = json.loads(fragment.stdout) assert fragment_payload["parent_id"] == "cli_1" assert fragment_payload["ring_size"] == 16 assert fragment_payload["fragments"][0]["fragment_smiles_labeled"] def test_cli_analyze_reports_non_standard_classifications(): hetero = build_non_standard_ring_atom_macrolactone() overlap = build_overlapping_candidate_macrolactone() hetero_result = run_cli("analyze", "--smiles", hetero.smiles) assert hetero_result.returncode == 0, hetero_result.stderr hetero_payload = json.loads(hetero_result.stdout) assert hetero_payload["classification"] == "non_standard_macrocycle" assert hetero_payload["primary_reason_code"] == "contains_non_carbon_ring_atoms_outside_positions_1_2" assert hetero_payload["ring_size"] == 16 overlap_result = run_cli("analyze", "--smiles", overlap.smiles) assert overlap_result.returncode == 0, overlap_result.stderr overlap_payload = json.loads(overlap_result.stdout) assert overlap_payload["classification"] == "non_standard_macrocycle" assert overlap_payload["primary_reason_code"] == "multiple_overlapping_macrocycle_candidates" assert overlap_payload["ring_size"] == 12 def test_cli_analyze_csv_reports_classification_fields(tmp_path): valid = build_macrolactone(14) hetero = build_non_standard_ring_atom_macrolactone() input_path = tmp_path / "molecules.csv" output_path = tmp_path / "analysis.csv" pd.DataFrame( [ {"id": "valid_1", "smiles": valid.smiles}, {"id": "hetero_1", "smiles": hetero.smiles}, ] ).to_csv(input_path, index=False) completed = run_cli( "analyze", "--input", str(input_path), "--output", str(output_path), ) assert completed.returncode == 0, completed.stderr analysis = pd.read_csv(output_path) assert set(analysis["parent_id"]) == {"valid_1", "hetero_1"} assert set(analysis["classification"]) == {"standard_macrolactone", "non_standard_macrocycle"} assert "primary_reason_code" in analysis.columns assert "ring_size" in analysis.columns def test_cli_fragment_csv_skips_ambiguous_and_records_errors(tmp_path): valid = build_macrolactone(14, {4: "methyl"}) ambiguous = build_ambiguous_smiles() input_path = tmp_path / "molecules.csv" output_path = tmp_path / "fragments.csv" errors_path = tmp_path / "errors.csv" pd.DataFrame( [ {"id": "valid_1", "smiles": valid.smiles}, {"id": "ambiguous_1", "smiles": ambiguous}, ] ).to_csv(input_path, index=False) completed = run_cli( "fragment", "--input", str(input_path), "--output", str(output_path), "--errors-output", str(errors_path), ) assert completed.returncode == 0, completed.stderr fragments = pd.read_csv(output_path) errors = pd.read_csv(errors_path) assert set(fragments["parent_id"]) == {"valid_1"} assert errors.loc[0, "parent_id"] == "ambiguous_1" assert errors.loc[0, "error_type"] == "AmbiguousMacrolactoneError"