feat(validation): add SQLModel database models

This commit is contained in:
2026-03-19 10:23:47 +08:00
parent c0ead42384
commit d0cdf50ed5
3 changed files with 100 additions and 0 deletions

View File

@@ -19,6 +19,7 @@ rdkit = ">=2025.9.1,<2026"
pandas = ">=2.3.3,<3"
numpy = ">=2.3.4,<3"
matplotlib = ">=3.10,<4"
sqlmodel = ">=0.0.37,<0.0.38"
[pypi-dependencies]
macro_lactone_toolkit = { path = ".", editable = true }

View File

@@ -0,0 +1,99 @@
from __future__ import annotations
from datetime import datetime
from enum import Enum
from typing import List, Optional
from sqlmodel import Field, Relationship, SQLModel
class ClassificationType(str, Enum):
STANDARD = "standard_macrolactone"
NON_STANDARD = "non_standard_macrocycle"
NOT_MACROLACTONE = "not_macrolactone"
class ProcessingStatus(str, Enum):
PENDING = "pending"
SUCCESS = "success"
FAILED = "failed"
SKIPPED = "skipped"
class ParentMolecule(SQLModel, table=True):
"""Original molecule information."""
__tablename__ = "parent_molecules"
id: Optional[int] = Field(default=None, primary_key=True)
source_id: str = Field(index=True)
molecule_name: Optional[str] = None
smiles: str = Field(index=True)
classification: ClassificationType = Field(index=True)
ring_size: Optional[int] = Field(default=None, index=True)
primary_reason_code: Optional[str] = None
primary_reason_message: Optional[str] = None
processing_status: ProcessingStatus = Field(default=ProcessingStatus.PENDING)
error_message: Optional[str] = None
num_sidechains: Optional[int] = None
cleavage_positions: Optional[str] = None
numbered_image_path: Optional[str] = None
created_at: datetime = Field(default_factory=datetime.utcnow)
processed_at: Optional[datetime] = None
fragments: List["SideChainFragment"] = Relationship(back_populates="parent")
numbering: Optional["RingNumbering"] = Relationship(back_populates="parent")
class RingNumbering(SQLModel, table=True):
"""Ring numbering details."""
__tablename__ = "ring_numberings"
id: Optional[int] = Field(default=None, primary_key=True)
parent_id: int = Field(foreign_key="parent_molecules.id", unique=True)
ring_size: int
carbonyl_carbon_idx: int
ester_oxygen_idx: int
position_to_atom: str
atom_to_position: str
parent: Optional[ParentMolecule] = Relationship(back_populates="numbering")
class SideChainFragment(SQLModel, table=True):
"""Side chain fragments from cleavage."""
__tablename__ = "side_chain_fragments"
id: Optional[int] = Field(default=None, primary_key=True)
parent_id: int = Field(foreign_key="parent_molecules.id", index=True)
fragment_id: str = Field(index=True)
cleavage_position: int = Field(index=True)
attachment_atom_idx: int
attachment_atom_symbol: str
fragment_smiles_labeled: str
fragment_smiles_plain: str
dummy_isotope: int
atom_count: int
heavy_atom_count: int
molecular_weight: float
original_bond_type: str
image_path: Optional[str] = None
parent: Optional[ParentMolecule] = Relationship(back_populates="fragments")
class ValidationResult(SQLModel, table=True):
"""Manual validation records."""
__tablename__ = "validation_results"
id: Optional[int] = Field(default=None, primary_key=True)
parent_id: int = Field(foreign_key="parent_molecules.id")
numbering_correct: Optional[bool] = None
cleavage_correct: Optional[bool] = None
classification_correct: Optional[bool] = None
notes: Optional[str] = None
validated_by: Optional[str] = None
validated_at: Optional[datetime] = None