Keep key validation outputs and analysis tables tracked directly, package analysis plot PNGs into a small tar.gz backup, and add analysis scripts plus tests so the stored results remain reproducible without flooding git with large image trees.
41 lines
1.5 KiB
Python
41 lines
1.5 KiB
Python
from __future__ import annotations
|
|
|
|
import pandas as pd
|
|
import pytest
|
|
|
|
from macro_lactone_toolkit.validation.fragment_library_analysis import (
|
|
build_position_diversity_table,
|
|
count_non_dummy_atoms,
|
|
)
|
|
|
|
|
|
def test_count_non_dummy_atoms_excludes_dummy_atoms() -> None:
|
|
assert count_non_dummy_atoms("*O") == 1
|
|
assert count_non_dummy_atoms("*C") == 1
|
|
assert count_non_dummy_atoms("*C(C)C") == 3
|
|
|
|
|
|
def test_build_position_diversity_table_combines_frequency_and_structure_metrics() -> None:
|
|
dataframe = pd.DataFrame(
|
|
[
|
|
{"cleavage_position": 3, "fragment_smiles_plain": "*C"},
|
|
{"cleavage_position": 3, "fragment_smiles_plain": "*CC"},
|
|
{"cleavage_position": 3, "fragment_smiles_plain": "*CC"},
|
|
{"cleavage_position": 3, "fragment_smiles_plain": "*O"},
|
|
{"cleavage_position": 4, "fragment_smiles_plain": "*C"},
|
|
]
|
|
)
|
|
|
|
summary = build_position_diversity_table(dataframe).set_index("cleavage_position")
|
|
|
|
assert summary.loc[3, "total_fragments"] == 4
|
|
assert summary.loc[3, "unique_fragments"] == 3
|
|
assert summary.loc[3, "normalized_unique_ratio"] == pytest.approx(0.75)
|
|
assert summary.loc[3, "shannon_entropy"] > 0.0
|
|
assert summary.loc[3, "normalized_shannon_entropy"] > 0.0
|
|
assert summary.loc[3, "mean_pairwise_tanimoto_distance"] > 0.0
|
|
|
|
assert summary.loc[4, "total_fragments"] == 1
|
|
assert summary.loc[4, "unique_fragments"] == 1
|
|
assert summary.loc[4, "mean_pairwise_tanimoto_distance"] == 0.0
|