diff --git a/pixi.toml b/pixi.toml index c53bd4d..747f0da 100644 --- a/pixi.toml +++ b/pixi.toml @@ -19,6 +19,7 @@ rdkit = ">=2025.9.1,<2026" pandas = ">=2.3.3,<3" numpy = ">=2.3.4,<3" matplotlib = ">=3.10,<4" +sqlmodel = ">=0.0.37,<0.0.38" [pypi-dependencies] macro_lactone_toolkit = { path = ".", editable = true } diff --git a/src/macro_lactone_toolkit/validation/__init__.py b/src/macro_lactone_toolkit/validation/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/macro_lactone_toolkit/validation/models.py b/src/macro_lactone_toolkit/validation/models.py new file mode 100644 index 0000000..f43e352 --- /dev/null +++ b/src/macro_lactone_toolkit/validation/models.py @@ -0,0 +1,99 @@ +from __future__ import annotations + +from datetime import datetime +from enum import Enum +from typing import List, Optional + +from sqlmodel import Field, Relationship, SQLModel + + +class ClassificationType(str, Enum): + STANDARD = "standard_macrolactone" + NON_STANDARD = "non_standard_macrocycle" + NOT_MACROLACTONE = "not_macrolactone" + + +class ProcessingStatus(str, Enum): + PENDING = "pending" + SUCCESS = "success" + FAILED = "failed" + SKIPPED = "skipped" + + +class ParentMolecule(SQLModel, table=True): + """Original molecule information.""" + + __tablename__ = "parent_molecules" + + id: Optional[int] = Field(default=None, primary_key=True) + source_id: str = Field(index=True) + molecule_name: Optional[str] = None + smiles: str = Field(index=True) + classification: ClassificationType = Field(index=True) + ring_size: Optional[int] = Field(default=None, index=True) + primary_reason_code: Optional[str] = None + primary_reason_message: Optional[str] = None + processing_status: ProcessingStatus = Field(default=ProcessingStatus.PENDING) + error_message: Optional[str] = None + num_sidechains: Optional[int] = None + cleavage_positions: Optional[str] = None + numbered_image_path: Optional[str] = None + created_at: datetime = Field(default_factory=datetime.utcnow) + processed_at: Optional[datetime] = None + + fragments: List["SideChainFragment"] = Relationship(back_populates="parent") + numbering: Optional["RingNumbering"] = Relationship(back_populates="parent") + + +class RingNumbering(SQLModel, table=True): + """Ring numbering details.""" + + __tablename__ = "ring_numberings" + + id: Optional[int] = Field(default=None, primary_key=True) + parent_id: int = Field(foreign_key="parent_molecules.id", unique=True) + ring_size: int + carbonyl_carbon_idx: int + ester_oxygen_idx: int + position_to_atom: str + atom_to_position: str + + parent: Optional[ParentMolecule] = Relationship(back_populates="numbering") + + +class SideChainFragment(SQLModel, table=True): + """Side chain fragments from cleavage.""" + + __tablename__ = "side_chain_fragments" + + id: Optional[int] = Field(default=None, primary_key=True) + parent_id: int = Field(foreign_key="parent_molecules.id", index=True) + fragment_id: str = Field(index=True) + cleavage_position: int = Field(index=True) + attachment_atom_idx: int + attachment_atom_symbol: str + fragment_smiles_labeled: str + fragment_smiles_plain: str + dummy_isotope: int + atom_count: int + heavy_atom_count: int + molecular_weight: float + original_bond_type: str + image_path: Optional[str] = None + + parent: Optional[ParentMolecule] = Relationship(back_populates="fragments") + + +class ValidationResult(SQLModel, table=True): + """Manual validation records.""" + + __tablename__ = "validation_results" + + id: Optional[int] = Field(default=None, primary_key=True) + parent_id: int = Field(foreign_key="parent_molecules.id") + numbering_correct: Optional[bool] = None + cleavage_correct: Optional[bool] = None + classification_correct: Optional[bool] = None + notes: Optional[str] = None + validated_by: Optional[str] = None + validated_at: Optional[datetime] = None