From 13dd336e377e266fb4ed6238439e71d3c7fed568 Mon Sep 17 00:00:00 2001 From: lingyuzeng Date: Thu, 19 Mar 2026 10:33:10 +0800 Subject: [PATCH] fix(validation): handle exceptions in sampling and processing --- .../validation/sampling.py | 11 ++++++++--- .../validation/validator.py | 17 ++++++++++++++--- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/src/macro_lactone_toolkit/validation/sampling.py b/src/macro_lactone_toolkit/validation/sampling.py index e932a04..9c6bb00 100644 --- a/src/macro_lactone_toolkit/validation/sampling.py +++ b/src/macro_lactone_toolkit/validation/sampling.py @@ -23,9 +23,14 @@ def stratified_sample_by_ring_size( ring_sizes = [] for smiles in df[smiles_col]: - result = analyzer.classify_macrocycle(smiles) - classifications.append(result.classification) - ring_sizes.append(result.ring_size) + try: + result = analyzer.classify_macrocycle(smiles) + classifications.append(result.classification) + ring_sizes.append(result.ring_size) + except Exception: + # Handle ambiguous or invalid molecules + classifications.append("not_macrolactone") + ring_sizes.append(None) df = df.copy() df["_classification"] = classifications diff --git a/src/macro_lactone_toolkit/validation/validator.py b/src/macro_lactone_toolkit/validation/validator.py index c141d7c..5642c5f 100644 --- a/src/macro_lactone_toolkit/validation/validator.py +++ b/src/macro_lactone_toolkit/validation/validator.py @@ -88,9 +88,20 @@ class MacrolactoneValidator: name = row.get("molecule_pref_name", None) # Classify - classification_result = self.analyzer.classify_macrocycle(smiles) - classification = classification_result.classification - ring_size = classification_result.ring_size + try: + classification_result = self.analyzer.classify_macrocycle(smiles) + classification = classification_result.classification + ring_size = classification_result.ring_size + except Exception as e: + # Handle classification errors - treat as not_macrolactone + classification = ClassificationType.NOT_MACROLACTONE + ring_size = None + classification_result = type('obj', (object,), { + 'classification': classification, + 'ring_size': ring_size, + 'primary_reason_code': 'classification_error', + 'primary_reason_message': str(e) + })() # Create parent record parent = ParentMolecule(