from __future__ import annotations import json import sqlite3 import pandas as pd from macro_lactone_toolkit.validation.validator import MacrolactoneValidator from ..helpers import build_macrolactone_with_fused_side_ring def test_validator_exports_only_single_anchor_fragments_and_fragment_library(tmp_path): built = build_macrolactone_with_fused_side_ring(side_chains={10: "methyl"}) input_path = tmp_path / "input.csv" output_dir = tmp_path / "validation_output" pd.DataFrame( [ { "ml_id": "ML00000001", "IDs": "CHEMBL0001", "smiles": built.smiles, } ] ).to_csv(input_path, index=False) validator = MacrolactoneValidator(output_dir=output_dir, sample_ratio=1.0) results = validator.run(input_path) assert results == {"total": 1, "success": 1, "failed": 0, "skipped": 0} with sqlite3.connect(output_dir / "fragments.db") as connection: fragments = connection.execute( "SELECT cleavage_position, has_dummy_atom, dummy_atom_count FROM side_chain_fragments" ).fetchall() library_entries = connection.execute( """ SELECT source_type, source_parent_ml_id, source_parent_chembl_id, cleavage_position, has_dummy_atom, dummy_atom_count, splice_ready FROM fragment_library_entries """ ).fetchall() assert fragments == [(10, 1, 1)] assert library_entries == [("validation_extract", "ML00000001", "CHEMBL0001", 10, 1, 1, 1)] summary = pd.read_csv(output_dir / "summary.csv") assert summary.loc[0, "num_sidechains"] == 1 assert json.loads(summary.loc[0, "cleavage_positions"]) == [10] fragment_library = pd.read_csv(output_dir / "fragment_library.csv") assert fragment_library.loc[0, "source_type"] == "validation_extract" assert int(fragment_library.loc[0, "cleavage_position"]) == 10 assert bool(fragment_library.loc[0, "has_dummy_atom"]) is True assert int(fragment_library.loc[0, "dummy_atom_count"]) == 1 assert bool(fragment_library.loc[0, "splice_ready"]) is True