fix(validation): handle exceptions in sampling and processing
This commit is contained in:
@@ -23,9 +23,14 @@ def stratified_sample_by_ring_size(
|
|||||||
ring_sizes = []
|
ring_sizes = []
|
||||||
|
|
||||||
for smiles in df[smiles_col]:
|
for smiles in df[smiles_col]:
|
||||||
result = analyzer.classify_macrocycle(smiles)
|
try:
|
||||||
classifications.append(result.classification)
|
result = analyzer.classify_macrocycle(smiles)
|
||||||
ring_sizes.append(result.ring_size)
|
classifications.append(result.classification)
|
||||||
|
ring_sizes.append(result.ring_size)
|
||||||
|
except Exception:
|
||||||
|
# Handle ambiguous or invalid molecules
|
||||||
|
classifications.append("not_macrolactone")
|
||||||
|
ring_sizes.append(None)
|
||||||
|
|
||||||
df = df.copy()
|
df = df.copy()
|
||||||
df["_classification"] = classifications
|
df["_classification"] = classifications
|
||||||
|
|||||||
@@ -88,9 +88,20 @@ class MacrolactoneValidator:
|
|||||||
name = row.get("molecule_pref_name", None)
|
name = row.get("molecule_pref_name", None)
|
||||||
|
|
||||||
# Classify
|
# Classify
|
||||||
classification_result = self.analyzer.classify_macrocycle(smiles)
|
try:
|
||||||
classification = classification_result.classification
|
classification_result = self.analyzer.classify_macrocycle(smiles)
|
||||||
ring_size = classification_result.ring_size
|
classification = classification_result.classification
|
||||||
|
ring_size = classification_result.ring_size
|
||||||
|
except Exception as e:
|
||||||
|
# Handle classification errors - treat as not_macrolactone
|
||||||
|
classification = ClassificationType.NOT_MACROLACTONE
|
||||||
|
ring_size = None
|
||||||
|
classification_result = type('obj', (object,), {
|
||||||
|
'classification': classification,
|
||||||
|
'ring_size': ring_size,
|
||||||
|
'primary_reason_code': 'classification_error',
|
||||||
|
'primary_reason_message': str(e)
|
||||||
|
})()
|
||||||
|
|
||||||
# Create parent record
|
# Create parent record
|
||||||
parent = ParentMolecule(
|
parent = ParentMolecule(
|
||||||
|
|||||||
Reference in New Issue
Block a user