Files
macrolactone-toolkit/tests/validation/test_validator.py
lingyuzeng 46a438dd36 feat(validation): enforce single-anchor fragments
- skip fused/shared/multi-anchor side systems during extraction
- add fragment library schema and fragment_library.csv export
- make scaffold prep strict for non-spliceable positions
2026-03-19 14:20:32 +08:00

58 lines
2.1 KiB
Python

from __future__ import annotations
import json
import sqlite3
import pandas as pd
from macro_lactone_toolkit.validation.validator import MacrolactoneValidator
from ..helpers import build_macrolactone_with_fused_side_ring
def test_validator_exports_only_single_anchor_fragments_and_fragment_library(tmp_path):
built = build_macrolactone_with_fused_side_ring(side_chains={10: "methyl"})
input_path = tmp_path / "input.csv"
output_dir = tmp_path / "validation_output"
pd.DataFrame(
[
{
"ml_id": "ML00000001",
"IDs": "CHEMBL0001",
"smiles": built.smiles,
}
]
).to_csv(input_path, index=False)
validator = MacrolactoneValidator(output_dir=output_dir, sample_ratio=1.0)
results = validator.run(input_path)
assert results == {"total": 1, "success": 1, "failed": 0, "skipped": 0}
with sqlite3.connect(output_dir / "fragments.db") as connection:
fragments = connection.execute(
"SELECT cleavage_position, has_dummy_atom, dummy_atom_count FROM side_chain_fragments"
).fetchall()
library_entries = connection.execute(
"""
SELECT source_type, source_parent_ml_id, source_parent_chembl_id,
cleavage_position, has_dummy_atom, dummy_atom_count, splice_ready
FROM fragment_library_entries
"""
).fetchall()
assert fragments == [(10, 1, 1)]
assert library_entries == [("validation_extract", "ML00000001", "CHEMBL0001", 10, 1, 1, 1)]
summary = pd.read_csv(output_dir / "summary.csv")
assert summary.loc[0, "num_sidechains"] == 1
assert json.loads(summary.loc[0, "cleavage_positions"]) == [10]
fragment_library = pd.read_csv(output_dir / "fragment_library.csv")
assert fragment_library.loc[0, "source_type"] == "validation_extract"
assert int(fragment_library.loc[0, "cleavage_position"]) == 10
assert bool(fragment_library.loc[0, "has_dummy_atom"]) is True
assert int(fragment_library.loc[0, "dummy_atom_count"]) == 1
assert bool(fragment_library.loc[0, "splice_ready"]) is True