- skip fused/shared/multi-anchor side systems during extraction - add fragment library schema and fragment_library.csv export - make scaffold prep strict for non-spliceable positions
58 lines
2.1 KiB
Python
58 lines
2.1 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
import sqlite3
|
|
|
|
import pandas as pd
|
|
|
|
from macro_lactone_toolkit.validation.validator import MacrolactoneValidator
|
|
|
|
from ..helpers import build_macrolactone_with_fused_side_ring
|
|
|
|
|
|
def test_validator_exports_only_single_anchor_fragments_and_fragment_library(tmp_path):
|
|
built = build_macrolactone_with_fused_side_ring(side_chains={10: "methyl"})
|
|
input_path = tmp_path / "input.csv"
|
|
output_dir = tmp_path / "validation_output"
|
|
|
|
pd.DataFrame(
|
|
[
|
|
{
|
|
"ml_id": "ML00000001",
|
|
"IDs": "CHEMBL0001",
|
|
"smiles": built.smiles,
|
|
}
|
|
]
|
|
).to_csv(input_path, index=False)
|
|
|
|
validator = MacrolactoneValidator(output_dir=output_dir, sample_ratio=1.0)
|
|
results = validator.run(input_path)
|
|
|
|
assert results == {"total": 1, "success": 1, "failed": 0, "skipped": 0}
|
|
|
|
with sqlite3.connect(output_dir / "fragments.db") as connection:
|
|
fragments = connection.execute(
|
|
"SELECT cleavage_position, has_dummy_atom, dummy_atom_count FROM side_chain_fragments"
|
|
).fetchall()
|
|
library_entries = connection.execute(
|
|
"""
|
|
SELECT source_type, source_parent_ml_id, source_parent_chembl_id,
|
|
cleavage_position, has_dummy_atom, dummy_atom_count, splice_ready
|
|
FROM fragment_library_entries
|
|
"""
|
|
).fetchall()
|
|
|
|
assert fragments == [(10, 1, 1)]
|
|
assert library_entries == [("validation_extract", "ML00000001", "CHEMBL0001", 10, 1, 1, 1)]
|
|
|
|
summary = pd.read_csv(output_dir / "summary.csv")
|
|
assert summary.loc[0, "num_sidechains"] == 1
|
|
assert json.loads(summary.loc[0, "cleavage_positions"]) == [10]
|
|
|
|
fragment_library = pd.read_csv(output_dir / "fragment_library.csv")
|
|
assert fragment_library.loc[0, "source_type"] == "validation_extract"
|
|
assert int(fragment_library.loc[0, "cleavage_position"]) == 10
|
|
assert bool(fragment_library.loc[0, "has_dummy_atom"]) is True
|
|
assert int(fragment_library.loc[0, "dummy_atom_count"]) == 1
|
|
assert bool(fragment_library.loc[0, "splice_ready"]) is True
|