Files
macrolactone-toolkit/tests/validation/test_fragment_library_analysis.py
lingyuzeng 8071a141ee feat(validation): archive key result assets
Keep key validation outputs and analysis tables tracked directly,
package analysis plot PNGs into a small tar.gz backup, and add
analysis scripts plus tests so the stored results remain
reproducible without flooding git with large image trees.
2026-03-19 21:34:27 +08:00

41 lines
1.5 KiB
Python

from __future__ import annotations
import pandas as pd
import pytest
from macro_lactone_toolkit.validation.fragment_library_analysis import (
build_position_diversity_table,
count_non_dummy_atoms,
)
def test_count_non_dummy_atoms_excludes_dummy_atoms() -> None:
assert count_non_dummy_atoms("*O") == 1
assert count_non_dummy_atoms("*C") == 1
assert count_non_dummy_atoms("*C(C)C") == 3
def test_build_position_diversity_table_combines_frequency_and_structure_metrics() -> None:
dataframe = pd.DataFrame(
[
{"cleavage_position": 3, "fragment_smiles_plain": "*C"},
{"cleavage_position": 3, "fragment_smiles_plain": "*CC"},
{"cleavage_position": 3, "fragment_smiles_plain": "*CC"},
{"cleavage_position": 3, "fragment_smiles_plain": "*O"},
{"cleavage_position": 4, "fragment_smiles_plain": "*C"},
]
)
summary = build_position_diversity_table(dataframe).set_index("cleavage_position")
assert summary.loc[3, "total_fragments"] == 4
assert summary.loc[3, "unique_fragments"] == 3
assert summary.loc[3, "normalized_unique_ratio"] == pytest.approx(0.75)
assert summary.loc[3, "shannon_entropy"] > 0.0
assert summary.loc[3, "normalized_shannon_entropy"] > 0.0
assert summary.loc[3, "mean_pairwise_tanimoto_distance"] > 0.0
assert summary.loc[4, "total_fragments"] == 1
assert summary.loc[4, "unique_fragments"] == 1
assert summary.loc[4, "mean_pairwise_tanimoto_distance"] == 0.0