feat(validation): add stratified sampling by ring size

This commit is contained in:
2026-03-19 10:28:38 +08:00
parent 2e3b52d049
commit 1e36e52112
2 changed files with 78 additions and 0 deletions

View File

@@ -0,0 +1,23 @@
import pandas as pd
import pytest
from macro_lactone_toolkit.validation.sampling import stratified_sample_by_ring_size
def test_stratified_sample():
# Create test data with known ring sizes
data = {
"smiles": [
"O=C1CCCCCCCCCCCCCCO1", # 16-membered
"O=C1CCCCCCCCCCCCO1", # 14-membered
"O=C1CCCCCCCCCCCCCCCCO1", # 18-membered
],
"id": ["A", "B", "C"],
}
df = pd.DataFrame(data)
sampled = stratified_sample_by_ring_size(df, sample_ratio=0.5, random_state=42)
# Should get at least 1 from each ring size (50% of 1 = 1)
assert len(sampled) >= 1
assert len(sampled) <= 3