from __future__ import annotations import pandas as pd import pytest from macro_lactone_toolkit.validation.fragment_library_analysis import ( build_position_diversity_table, count_non_dummy_atoms, ) def test_count_non_dummy_atoms_excludes_dummy_atoms() -> None: assert count_non_dummy_atoms("*O") == 1 assert count_non_dummy_atoms("*C") == 1 assert count_non_dummy_atoms("*C(C)C") == 3 def test_build_position_diversity_table_combines_frequency_and_structure_metrics() -> None: dataframe = pd.DataFrame( [ {"cleavage_position": 3, "fragment_smiles_plain": "*C"}, {"cleavage_position": 3, "fragment_smiles_plain": "*CC"}, {"cleavage_position": 3, "fragment_smiles_plain": "*CC"}, {"cleavage_position": 3, "fragment_smiles_plain": "*O"}, {"cleavage_position": 4, "fragment_smiles_plain": "*C"}, ] ) summary = build_position_diversity_table(dataframe).set_index("cleavage_position") assert summary.loc[3, "total_fragments"] == 4 assert summary.loc[3, "unique_fragments"] == 3 assert summary.loc[3, "normalized_unique_ratio"] == pytest.approx(0.75) assert summary.loc[3, "shannon_entropy"] > 0.0 assert summary.loc[3, "normalized_shannon_entropy"] > 0.0 assert summary.loc[3, "mean_pairwise_tanimoto_distance"] > 0.0 assert summary.loc[4, "total_fragments"] == 1 assert summary.loc[4, "unique_fragments"] == 1 assert summary.loc[4, "mean_pairwise_tanimoto_distance"] == 0.0