feat(validation): enforce single-anchor fragments

- skip fused/shared/multi-anchor side systems during extraction
- add fragment library schema and fragment_library.csv export
- make scaffold prep strict for non-spliceable positions
This commit is contained in:
2026-03-19 14:20:32 +08:00
parent 07ba27be2b
commit 46a438dd36
10 changed files with 383 additions and 21 deletions

View File

@@ -1,6 +1,6 @@
from __future__ import annotations
from datetime import datetime
from datetime import UTC, datetime
from typing import List, Optional
from sqlalchemy.orm import Mapped, mapped_column, relationship
@@ -40,7 +40,7 @@ class ParentMolecule(SQLModel, table=True):
num_sidechains: Optional[int] = None
cleavage_positions: Optional[str] = None
numbered_image_path: Optional[str] = None
created_at: datetime = Field(default_factory=datetime.utcnow)
created_at: datetime = Field(default_factory=lambda: datetime.now(UTC))
processed_at: Optional[datetime] = None
@@ -72,6 +72,8 @@ class SideChainFragment(SQLModel, table=True):
fragment_smiles_labeled: str
fragment_smiles_plain: str
dummy_isotope: int
has_dummy_atom: bool = Field(default=True)
dummy_atom_count: int = Field(default=1)
atom_count: int
heavy_atom_count: int
molecular_weight: float
@@ -79,6 +81,26 @@ class SideChainFragment(SQLModel, table=True):
image_path: Optional[str] = None
class FragmentLibraryEntry(SQLModel, table=True):
"""Unified fragment library entries."""
__tablename__ = "fragment_library_entries"
id: Optional[int] = Field(default=None, primary_key=True)
source_type: str = Field(index=True)
source_fragment_id: Optional[str] = Field(default=None, index=True)
source_parent_ml_id: Optional[str] = Field(default=None, index=True)
source_parent_chembl_id: Optional[str] = Field(default=None, index=True)
cleavage_position: Optional[int] = Field(default=None, index=True)
fragment_smiles_labeled: Optional[str] = None
fragment_smiles_plain: str
has_dummy_atom: bool = Field(default=False)
dummy_atom_count: int = Field(default=0)
splice_ready: bool = Field(default=False, index=True)
original_bond_type: Optional[str] = None
created_at: datetime = Field(default_factory=lambda: datetime.now(UTC))
class ValidationResult(SQLModel, table=True):
"""Manual validation records."""