feat(validation): enforce single-anchor fragments
- skip fused/shared/multi-anchor side systems during extraction - add fragment library schema and fragment_library.csv export - make scaffold prep strict for non-spliceable positions
This commit is contained in:
57
tests/validation/test_validator.py
Normal file
57
tests/validation/test_validator.py
Normal file
@@ -0,0 +1,57 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sqlite3
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from macro_lactone_toolkit.validation.validator import MacrolactoneValidator
|
||||
|
||||
from ..helpers import build_macrolactone_with_fused_side_ring
|
||||
|
||||
|
||||
def test_validator_exports_only_single_anchor_fragments_and_fragment_library(tmp_path):
|
||||
built = build_macrolactone_with_fused_side_ring(side_chains={10: "methyl"})
|
||||
input_path = tmp_path / "input.csv"
|
||||
output_dir = tmp_path / "validation_output"
|
||||
|
||||
pd.DataFrame(
|
||||
[
|
||||
{
|
||||
"ml_id": "ML00000001",
|
||||
"IDs": "CHEMBL0001",
|
||||
"smiles": built.smiles,
|
||||
}
|
||||
]
|
||||
).to_csv(input_path, index=False)
|
||||
|
||||
validator = MacrolactoneValidator(output_dir=output_dir, sample_ratio=1.0)
|
||||
results = validator.run(input_path)
|
||||
|
||||
assert results == {"total": 1, "success": 1, "failed": 0, "skipped": 0}
|
||||
|
||||
with sqlite3.connect(output_dir / "fragments.db") as connection:
|
||||
fragments = connection.execute(
|
||||
"SELECT cleavage_position, has_dummy_atom, dummy_atom_count FROM side_chain_fragments"
|
||||
).fetchall()
|
||||
library_entries = connection.execute(
|
||||
"""
|
||||
SELECT source_type, source_parent_ml_id, source_parent_chembl_id,
|
||||
cleavage_position, has_dummy_atom, dummy_atom_count, splice_ready
|
||||
FROM fragment_library_entries
|
||||
"""
|
||||
).fetchall()
|
||||
|
||||
assert fragments == [(10, 1, 1)]
|
||||
assert library_entries == [("validation_extract", "ML00000001", "CHEMBL0001", 10, 1, 1, 1)]
|
||||
|
||||
summary = pd.read_csv(output_dir / "summary.csv")
|
||||
assert summary.loc[0, "num_sidechains"] == 1
|
||||
assert json.loads(summary.loc[0, "cleavage_positions"]) == [10]
|
||||
|
||||
fragment_library = pd.read_csv(output_dir / "fragment_library.csv")
|
||||
assert fragment_library.loc[0, "source_type"] == "validation_extract"
|
||||
assert int(fragment_library.loc[0, "cleavage_position"]) == 10
|
||||
assert bool(fragment_library.loc[0, "has_dummy_atom"]) is True
|
||||
assert int(fragment_library.loc[0, "dummy_atom_count"]) == 1
|
||||
assert bool(fragment_library.loc[0, "splice_ready"]) is True
|
||||
Reference in New Issue
Block a user