- skip fused/shared/multi-anchor side systems during extraction - add fragment library schema and fragment_library.csv export - make scaffold prep strict for non-spliceable positions
117 lines
4.0 KiB
Python
117 lines
4.0 KiB
Python
from __future__ import annotations
|
|
|
|
from datetime import UTC, datetime
|
|
from typing import List, Optional
|
|
|
|
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
|
from sqlmodel import Field, SQLModel
|
|
|
|
|
|
class ClassificationType:
|
|
STANDARD = "standard_macrolactone"
|
|
NON_STANDARD = "non_standard_macrocycle"
|
|
NOT_MACROLACTONE = "not_macrolactone"
|
|
|
|
|
|
class ProcessingStatus:
|
|
PENDING = "pending"
|
|
SUCCESS = "success"
|
|
FAILED = "failed"
|
|
SKIPPED = "skipped"
|
|
|
|
|
|
# Define all tables without relationships first
|
|
class ParentMolecule(SQLModel, table=True):
|
|
"""Original molecule information."""
|
|
|
|
__tablename__ = "parent_molecules"
|
|
|
|
id: Optional[int] = Field(default=None, primary_key=True)
|
|
ml_id: str = Field(index=True) # MacrolactoneDB unique ID (e.g., ML00000001)
|
|
chembl_id: Optional[str] = Field(default=None, index=True) # Original CHEMBL ID
|
|
molecule_name: Optional[str] = None
|
|
smiles: str = Field(index=True)
|
|
classification: str = Field(index=True)
|
|
ring_size: Optional[int] = Field(default=None, index=True)
|
|
primary_reason_code: Optional[str] = None
|
|
primary_reason_message: Optional[str] = None
|
|
processing_status: str = Field(default=ProcessingStatus.PENDING)
|
|
error_message: Optional[str] = None
|
|
num_sidechains: Optional[int] = None
|
|
cleavage_positions: Optional[str] = None
|
|
numbered_image_path: Optional[str] = None
|
|
created_at: datetime = Field(default_factory=lambda: datetime.now(UTC))
|
|
processed_at: Optional[datetime] = None
|
|
|
|
|
|
class RingNumbering(SQLModel, table=True):
|
|
"""Ring numbering details."""
|
|
|
|
__tablename__ = "ring_numberings"
|
|
|
|
id: Optional[int] = Field(default=None, primary_key=True)
|
|
parent_id: int = Field(foreign_key="parent_molecules.id", unique=True)
|
|
ring_size: int
|
|
carbonyl_carbon_idx: int
|
|
ester_oxygen_idx: int
|
|
position_to_atom: str
|
|
atom_to_position: str
|
|
|
|
|
|
class SideChainFragment(SQLModel, table=True):
|
|
"""Side chain fragments from cleavage."""
|
|
|
|
__tablename__ = "side_chain_fragments"
|
|
|
|
id: Optional[int] = Field(default=None, primary_key=True)
|
|
parent_id: int = Field(foreign_key="parent_molecules.id", index=True)
|
|
fragment_id: str = Field(index=True)
|
|
cleavage_position: int = Field(index=True)
|
|
attachment_atom_idx: int
|
|
attachment_atom_symbol: str
|
|
fragment_smiles_labeled: str
|
|
fragment_smiles_plain: str
|
|
dummy_isotope: int
|
|
has_dummy_atom: bool = Field(default=True)
|
|
dummy_atom_count: int = Field(default=1)
|
|
atom_count: int
|
|
heavy_atom_count: int
|
|
molecular_weight: float
|
|
original_bond_type: str
|
|
image_path: Optional[str] = None
|
|
|
|
|
|
class FragmentLibraryEntry(SQLModel, table=True):
|
|
"""Unified fragment library entries."""
|
|
|
|
__tablename__ = "fragment_library_entries"
|
|
|
|
id: Optional[int] = Field(default=None, primary_key=True)
|
|
source_type: str = Field(index=True)
|
|
source_fragment_id: Optional[str] = Field(default=None, index=True)
|
|
source_parent_ml_id: Optional[str] = Field(default=None, index=True)
|
|
source_parent_chembl_id: Optional[str] = Field(default=None, index=True)
|
|
cleavage_position: Optional[int] = Field(default=None, index=True)
|
|
fragment_smiles_labeled: Optional[str] = None
|
|
fragment_smiles_plain: str
|
|
has_dummy_atom: bool = Field(default=False)
|
|
dummy_atom_count: int = Field(default=0)
|
|
splice_ready: bool = Field(default=False, index=True)
|
|
original_bond_type: Optional[str] = None
|
|
created_at: datetime = Field(default_factory=lambda: datetime.now(UTC))
|
|
|
|
|
|
class ValidationResult(SQLModel, table=True):
|
|
"""Manual validation records."""
|
|
|
|
__tablename__ = "validation_results"
|
|
|
|
id: Optional[int] = Field(default=None, primary_key=True)
|
|
parent_id: int = Field(foreign_key="parent_molecules.id")
|
|
numbering_correct: Optional[bool] = None
|
|
cleavage_correct: Optional[bool] = None
|
|
classification_correct: Optional[bool] = None
|
|
notes: Optional[str] = None
|
|
validated_by: Optional[str] = None
|
|
validated_at: Optional[datetime] = None
|