first add
This commit is contained in:
292
notebooks/01_cheminformatics_quickstart.ipynb
Normal file
292
notebooks/01_cheminformatics_quickstart.ipynb
Normal file
@@ -0,0 +1,292 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# sqlmodel-pg-kit — Cheminformatics Quickstart\n\n",
|
||||
"This notebook demonstrates how to use `sqlmodel-pg-kit` for day-to-day data work, including:\n",
|
||||
"- Environment setup (SQLite smoke vs PostgreSQL)\n",
|
||||
"- Core CRUD with SQLModel sessions\n",
|
||||
"- Multi-table schema and joins for molecules and datasets\n",
|
||||
"- Optional RDKit + Mordred descriptor computation and storage\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 0. Install (in Jupyter)\n",
|
||||
"Uncomment as needed to install the package and optional chem deps."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# %pip install -e . pytest\n",
|
||||
"# Optional: RDKit + Mordred\n",
|
||||
"# %pip install rdkit-pypi mordred\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1. Basic kit usage (SQLite smoke)\n",
|
||||
"Use the built-in smoke test pattern with an in-memory SQLite DB to verify CRUD paths."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"# Add the src directory to the path so we can import sqlmodel_pg_kit\n",
|
||||
"sys.path.insert(0, os.path.join(os.getcwd(), '..', 'src'))\n",
|
||||
"\n",
|
||||
"from sqlmodel_pg_kit import db, create_all, Repository\n",
|
||||
"\n",
|
||||
"# Override to SQLite in-memory for quick check\n",
|
||||
"db.cfg = db.DatabaseConfig(host='', port=0, user='', password='', database=':memory:', sslmode='disable')\n",
|
||||
"db.engine = db.create_engine('sqlite:///:memory:', echo=False)\n",
|
||||
"\n",
|
||||
"from typing import Optional\n",
|
||||
"from sqlmodel import SQLModel, Field\n",
|
||||
"\n",
|
||||
"class Hero(SQLModel, table=True):\n",
|
||||
" id: Optional[int] = Field(default=None, primary_key=True)\n",
|
||||
" name: str = Field(index=True)\n",
|
||||
" age: Optional[int] = None\n",
|
||||
"\n",
|
||||
"create_all()\n",
|
||||
"repo = Repository(Hero)\n",
|
||||
"from sqlmodel_pg_kit.db import get_session\n",
|
||||
"with get_session() as s:\n",
|
||||
" repo.create(s, {'name': 'Iron Man', 'age': 45})\n",
|
||||
" [h.name for h in repo.list(s)]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Connect to PostgreSQL (optional)\n",
|
||||
"Export `SQL_*` or `PG*` variables in your shell before starting Jupyter or set them here."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"# Example (adjust to your environment, uncomment to set in-process):\n",
|
||||
"# os.environ['SQL_HOST'] = '127.0.0.1'\n",
|
||||
"# os.environ['SQL_PORT'] = '5432'\n",
|
||||
"# os.environ['SQL_USER'] = 'postgres'\n",
|
||||
"# os.environ['SQL_PASSWORD'] = 'change-me-strong'\n",
|
||||
"# os.environ['SQL_DATABASE'] = 'appdb'\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 3. Cheminformatics schema (Molecule, Dataset, MoleculeDataset)\n",
|
||||
"Define models inline (you can also place them in your own package)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from __future__ import annotations\n",
|
||||
"from dataclasses import dataclass\n",
|
||||
"from datetime import datetime\n",
|
||||
"from typing import List, Optional\n",
|
||||
"\n",
|
||||
"from sqlmodel_pg_kit.db import get_session, engine\n",
|
||||
"from sqlmodel import SQLModel, Field, Relationship, select\n",
|
||||
"from sqlalchemy.orm import Mapped\n",
|
||||
"\n",
|
||||
"class MoleculeDataset(SQLModel, table=True):\n",
|
||||
" molecule_id: int = Field(foreign_key='molecule.id', primary_key=True)\n",
|
||||
" dataset_id: int = Field(foreign_key='dataset.id', primary_key=True)\n",
|
||||
" added_at: datetime = Field(default_factory=datetime.utcnow)\n",
|
||||
"\n",
|
||||
"class Molecule(SQLModel, table=True):\n",
|
||||
" id: Optional[int] = Field(default=None, primary_key=True)\n",
|
||||
" smiles: str = Field(index=True)\n",
|
||||
" selfies: Optional[str] = Field(default=None)\n",
|
||||
" qed: Optional[float] = Field(default=None, index=True)\n",
|
||||
" sa_score: Optional[float] = Field(default=None, index=True)\n",
|
||||
" created_at: datetime = Field(default_factory=datetime.utcnow)\n",
|
||||
" updated_at: datetime = Field(default_factory=datetime.utcnow)\n",
|
||||
" datasets: Mapped[List[\"Dataset\"]] = Relationship(back_populates=\"molecules\", link_model=MoleculeDataset)\n",
|
||||
"\n",
|
||||
"class Dataset(SQLModel, table=True):\n",
|
||||
" id: Optional[int] = Field(default=None, primary_key=True)\n",
|
||||
" name: str = Field(index=True)\n",
|
||||
" molecules: Mapped[List[\"Molecule\"]] = Relationship(back_populates=\"datasets\", link_model=MoleculeDataset)\n",
|
||||
"\n",
|
||||
"@dataclass\n",
|
||||
"class MoleculeDTO:\n",
|
||||
" smiles: str\n",
|
||||
" selfies: Optional[str] = None\n",
|
||||
" qed: Optional[float] = None\n",
|
||||
" sa_score: Optional[float] = None\n",
|
||||
" def to_model(self) -> Molecule:\n",
|
||||
" return Molecule(**vars(self))\n",
|
||||
"\n",
|
||||
"# Create tables for these models\n",
|
||||
"SQLModel.metadata.create_all(engine)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 4. CRUD and common queries\n",
|
||||
"Insert molecules/datasets, link them, filter and join."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Clean\n",
|
||||
"with get_session() as s:\n",
|
||||
" s.execute(MoleculeDataset.__table__.delete())\n",
|
||||
" s.execute(Molecule.__table__.delete())\n",
|
||||
" s.execute(Dataset.__table__.delete())\n",
|
||||
" s.commit()\n",
|
||||
"\n",
|
||||
"# Insert molecules via DTO\n",
|
||||
"mols = [\n",
|
||||
" MoleculeDTO(smiles='CCO', qed=0.45, sa_score=2.1),\n",
|
||||
" MoleculeDTO(smiles='c1ccccc1', qed=0.76, sa_score=3.5),\n",
|
||||
" MoleculeDTO(smiles='CCN(CC)CC', qed=0.62, sa_score=2.8),\n",
|
||||
"]\n",
|
||||
"with get_session() as s:\n",
|
||||
" s.add_all([dto.to_model() for dto in mols])\n",
|
||||
" s.commit()\n",
|
||||
"\n",
|
||||
"# Datasets and linking\n",
|
||||
"with get_session() as s:\n",
|
||||
" ds_train = Dataset(name='train'); ds_holdout = Dataset(name='holdout')\n",
|
||||
" s.add_all([ds_train, ds_holdout]); s.commit(); s.refresh(ds_train); s.refresh(ds_holdout)\n",
|
||||
" mol_list = s.exec(select(Molecule).order_by(Molecule.id.asc())).all()\n",
|
||||
" links = [\n",
|
||||
" MoleculeDataset(molecule_id=mol_list[0].id, dataset_id=ds_train.id),\n",
|
||||
" MoleculeDataset(molecule_id=mol_list[1].id, dataset_id=ds_train.id),\n",
|
||||
" MoleculeDataset(molecule_id=mol_list[2].id, dataset_id=ds_holdout.id),\n",
|
||||
" ]\n",
|
||||
" s.add_all(links); s.commit()\n",
|
||||
"\n",
|
||||
"# Update one\n",
|
||||
"with get_session() as s:\n",
|
||||
" mol = s.exec(select(Molecule).where(Molecule.smiles=='CCO')).one()\n",
|
||||
" mol.qed = 0.50; s.add(mol); s.commit(); s.refresh(mol)\n",
|
||||
" mol.qed\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Filters and joins\n",
|
||||
"from sqlalchemy.orm import selectinload\n",
|
||||
"\n",
|
||||
"with get_session() as s:\n",
|
||||
" hi_qed = s.exec(select(Molecule).where(Molecule.qed>=0.6).order_by(Molecule.sa_score.asc())).all()\n",
|
||||
" hi_qed_view = [(m.smiles, m.qed, m.sa_score) for m in hi_qed]\n",
|
||||
"\n",
|
||||
"with get_session() as s:\n",
|
||||
" stmt = select(Molecule).options(selectinload(Molecule.datasets)).order_by(Molecule.id.asc())\n",
|
||||
" mols_with_ds = s.exec(stmt).all()\n",
|
||||
" mols_with_ds_view = [(m.smiles, [d.name for d in m.datasets]) for m in mols_with_ds]\n",
|
||||
"\n",
|
||||
"with get_session() as s:\n",
|
||||
" stmt = (select(Molecule)\n",
|
||||
" .join(MoleculeDataset, Molecule.id==MoleculeDataset.molecule_id)\n",
|
||||
" .join(Dataset, Dataset.id==MoleculeDataset.dataset_id)\n",
|
||||
" .where(Dataset.name=='train')\n",
|
||||
" .order_by(Molecule.id.asc()))\n",
|
||||
" train_mols = s.exec(stmt).all()\n",
|
||||
" train_mols_view = [m.smiles for m in train_mols]\n",
|
||||
"\n",
|
||||
"hi_qed_view, mols_with_ds_view, train_mols_view\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 5. Optional: RDKit + Mordred integration\n",
|
||||
"Compute descriptors and store them. If you prefer flexible storage, use a JSONB column or a normalized EAV table."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"try:\n",
|
||||
" from rdkit import Chem\n",
|
||||
" from rdkit.Chem import QED\n",
|
||||
" from mordred import Calculator, descriptors\n",
|
||||
" rdkit_ok = True\n",
|
||||
"except Exception as e:\n",
|
||||
" rdkit_ok = False\n",
|
||||
" print('RDKit/Mordred not available in this environment. Skipping demo.\n', e)\n",
|
||||
"\n",
|
||||
"if rdkit_ok:\n",
|
||||
" mol = Chem.MolFromSmiles('c1ccccc1O')\n",
|
||||
" qed = float(QED.qed(mol))\n",
|
||||
" calc = Calculator(descriptors, ignore_3D=True)\n",
|
||||
" md = calc(mol)\n",
|
||||
" # keep numeric descriptors only\n",
|
||||
" desc = {k: float(v) for k, v in md.items() if v is not None and isinstance(v, (int, float))}\n",
|
||||
" print('qed:', qed, 'num_desc:', len(desc))\n",
|
||||
"\n",
|
||||
" # Upsert molecule with refined qed as a column; optionally also persist `desc` via JSONB/EAV patterns.\n",
|
||||
" with get_session() as s:\n",
|
||||
" m = s.exec(select(Molecule).where(Molecule.smiles=='c1ccccc1O')).first()\n",
|
||||
" if m is None:\n",
|
||||
" m = Molecule(smiles='c1ccccc1O', qed=qed)\n",
|
||||
" else:\n",
|
||||
" m.qed = qed\n",
|
||||
" s.add(m); s.commit(); s.refresh(m)\n",
|
||||
" print('Stored molecule id:', m.id)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"pygments_lexer": "ipython3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
18
notebooks/01_sync_crud.ipynb
Normal file
18
notebooks/01_sync_crud.ipynb
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"cells": [
|
||||
{"cell_type":"markdown","metadata":{},"source":["# 01 — Sync CRUD Tutorial\n\n","Covers create/get/list/update/delete using the kit's helpers. Optional SQLite override for quick demo."]},
|
||||
{"cell_type":"markdown","metadata":{},"source":["## 0. Install (optional)\n","Uncomment if you haven't installed the package in this env."]},
|
||||
{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["# %pip install -e . pytest\n"]},
|
||||
{"cell_type":"markdown","metadata":{},"source":["## 1. Optional: use in-memory SQLite for demo\n","Comment this cell if you want to use Postgres via SQL_*/PG* env vars."]},
|
||||
{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["from sqlmodel_pg_kit import db\n","db.cfg = db.DatabaseConfig(host='', port=0, user='', password='', database=':memory:', sslmode='disable')\n","db.engine = db.create_engine('sqlite:///:memory:', echo=False)\n"]},
|
||||
{"cell_type":"markdown","metadata":{},"source":["## 2. CRUD operations (generic Repository)\n","Define a simple model and use the generic Repository for CRUD."]},
|
||||
{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["from typing import Optional\n","from sqlmodel import SQLModel, Field\n","from sqlmodel_pg_kit import create_all, Repository\n","from sqlmodel_pg_kit.db import get_session\n","\n","class Hero(SQLModel, table=True):\n"," id: Optional[int] = Field(default=None, primary_key=True)\n"," name: str = Field(index=True)\n"," age: Optional[int] = None\n","\n","create_all()\n","repo = Repository(Hero)\n","with get_session() as s:\n"," h = repo.create(s, {'name': 'Alice', 'age': 20})\n"," h\n"]},
|
||||
{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["with get_session() as s:\n"," h2 = repo.get(s, h.id)\n"," h2\n"]},
|
||||
{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["with get_session() as s:\n"," h3 = repo.update(s, h.id, age=21)\n"," h3\n"]},
|
||||
{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["with get_session() as s:\n"," page = repo.list(s, page=1, size=5)\n"," [(x.id, x.name, x.age) for x in page]\n"]},
|
||||
{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["with get_session() as s:\n"," ok = repo.delete(s, h.id)\n"," ok\n"]}
|
||||
],
|
||||
"metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"name": "python", "pygments_lexer": "ipython3"}},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
15
notebooks/02_bulk_and_filters.ipynb
Normal file
15
notebooks/02_bulk_and_filters.ipynb
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"cells": [
|
||||
{"cell_type":"markdown","metadata":{},"source":["# 02 — Bulk Insert and Filters\n\n","Demonstrates bulk inserts and filtering with SQLModel expressions."]},
|
||||
{"cell_type":"markdown","metadata":{},"source":["## 0. Install (optional)"]},
|
||||
{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["# %pip install -e . pytest\n"]},
|
||||
{"cell_type":"markdown","metadata":{},"source":["## 1. Optional: SQLite in-memory for demo"]},
|
||||
{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["from sqlmodel_pg_kit import db\n","db.cfg = db.DatabaseConfig(host='', port=0, user='', password='', database=':memory:', sslmode='disable')\n","db.engine = db.create_engine('sqlite:///:memory:', echo=False)\n"]},
|
||||
{"cell_type":"markdown","metadata":{},"source":["## 2. Bulk insert and query"]},
|
||||
{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["from typing import List, Optional\n","from sqlmodel import select, SQLModel, Field\n","from sqlmodel_pg_kit import create_all, Repository\n","from sqlmodel_pg_kit.db import get_session\n","\n","class Hero(SQLModel, table=True):\n"," id: Optional[int] = Field(default=None, primary_key=True)\n"," name: str = Field(index=True)\n"," age: Optional[int] = None\n","\n","create_all()\n","repo = Repository(Hero)\n","# Clean slate\n","with get_session() as s:\n"," s.exec(select(Hero)) # warmup\n"," s.execute(Hero.__table__.delete())\n"," s.commit()\n"," rows = [\n"," {'name': 'PG Hero', 'age': 1},\n"," {'name': 'PG Hero', 'age': 2},\n"," {'name': 'Bob', 'age': 30},\n"," {'name': 'Carol', 'age': 40},\n"," ]\n"," repo.bulk_insert(s, rows)\n","\n","with get_session() as s:\n"," heroes: List[Hero] = s.exec(select(Hero).where(Hero.name=='PG Hero')).all()\n"," [(h.id, h.age) for h in heroes]\n"]},
|
||||
{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["with get_session() as s:\n"," res = s.exec(select(Hero).where(Hero.age>=2).order_by(Hero.age.asc())).all()\n","[(h.name, h.age) for h in res]\n"]}
|
||||
],
|
||||
"metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"name": "python", "pygments_lexer": "ipython3"}},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
14
notebooks/03_relationships.ipynb
Normal file
14
notebooks/03_relationships.ipynb
Normal file
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"cells": [
|
||||
{"cell_type":"markdown","metadata":{},"source":["# 03 — Relationships (Team ↔ Hero)\n\n","Demonstrates one-to-many relationship and eager loading with selectinload."]},
|
||||
{"cell_type":"markdown","metadata":{},"source":["## 0. Install (optional)"]},
|
||||
{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["# %pip install -e . pytest\n"]},
|
||||
{"cell_type":"markdown","metadata":{},"source":["## 1. Optional: SQLite in-memory for demo"]},
|
||||
{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["from sqlmodel_pg_kit import db\n","db.cfg = db.DatabaseConfig(host='', port=0, user='', password='', database=':memory:', sslmode='disable')\n","db.engine = db.create_engine('sqlite:///:memory:', echo=False)\n"]},
|
||||
{"cell_type":"markdown","metadata":{},"source":["## 2. Create Team and Hero, eager load with selectinload"]},
|
||||
{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["from typing import List, Optional\n","from sqlalchemy.orm import selectinload\n","from sqlmodel import select, SQLModel, Field, Relationship\n","from sqlmodel_pg_kit import create_all\n","from sqlmodel_pg_kit.db import get_session\n","\n","class Team(SQLModel, table=True):\n"," id: Optional[int] = Field(default=None, primary_key=True)\n"," name: str = Field(index=True)\n"," heroes: List['Hero'] = Relationship(back_populates='team')\n","\n","class Hero(SQLModel, table=True):\n"," id: Optional[int] = Field(default=None, primary_key=True)\n"," name: str = Field(index=True)\n"," age: Optional[int] = None\n"," team_id: Optional[int] = Field(default=None, foreign_key='team.id')\n"," team: Optional[Team] = Relationship(back_populates='heroes')\n","\n","create_all()\n","with get_session() as s:\n"," s.execute(Hero.__table__.delete())\n"," s.execute(Team.__table__.delete())\n"," s.commit()\n"," t = Team(name='Avengers'); s.add(t); s.commit(); s.refresh(t)\n"," s.add(Hero(name='Thor', age=1500, team_id=t.id))\n"," s.add(Hero(name='Hulk', age=49, team_id=t.id))\n"," s.commit()\n","\n","stmt = select(Hero).options(selectinload(Hero.team)).order_by(Hero.id.asc())\n","with get_session() as s:\n"," heroes: List[Hero] = s.exec(stmt).all()\n","[(h.name, h.team.name if h.team else None) for h in heroes]\n"]}
|
||||
],
|
||||
"metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"name": "python", "pygments_lexer": "ipython3"}},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
14
notebooks/04_async_crud.ipynb
Normal file
14
notebooks/04_async_crud.ipynb
Normal file
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"cells": [
|
||||
{"cell_type":"markdown","metadata":{},"source":["# 04 — Async CRUD Tutorial\n\n","Demonstrates async session usage. Includes optional SQLite async override."]},
|
||||
{"cell_type":"markdown","metadata":{},"source":["## 0. Install (optional)"]},
|
||||
{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["# %pip install -e . pytest\n"]},
|
||||
{"cell_type":"markdown","metadata":{},"source":["## 1. Optional: SQLite async override for demo\n","If you don't have Postgres ready, configure an async SQLite engine."]},
|
||||
{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker\n","from sqlmodel_pg_kit import db, create_all\n","# Keep sync paths on SQLite in-memory for create_all\n","db.cfg = db.DatabaseConfig(host='', port=0, user='', password='', database=':memory:', sslmode='disable')\n","db.engine = db.create_engine('sqlite:///:memory:', echo=False)\n","create_all()\n","# Now override async engine/session to SQLite as well\n","db.async_engine = create_async_engine('sqlite+aiosqlite:///:memory:', echo=False)\n","db.AsyncSessionLocal = async_sessionmaker(db.async_engine, expire_on_commit=False)\n"]},
|
||||
{"cell_type":"markdown","metadata":{},"source":["## 2. Async CRUD\n","Clean table, insert, and read back using async session."]},
|
||||
{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["import asyncio\n","from typing import Optional\n","from sqlmodel import select, SQLModel, Field\n","from sqlmodel_pg_kit import AsyncRepository, create_all\n","from sqlmodel_pg_kit.db import get_async_session\n","\n","class Hero(SQLModel, table=True):\n"," id: Optional[int] = Field(default=None, primary_key=True)\n"," name: str = Field(index=True)\n"," age: Optional[int] = None\n","\n","create_all()\n","repo = AsyncRepository(Hero)\n","\n","async def amain():\n"," async with get_async_session() as s:\n"," await s.execute(Hero.__table__.delete())\n"," await s.commit()\n"," await repo.create(s, {'name': 'Async Hero', 'age': 7})\n"," res = await s.execute(select(Hero))\n"," heroes = res.scalars().all()\n"," return [h.name for h in heroes]\n","\n","asyncio.run(amain())\n"]}
|
||||
],
|
||||
"metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"name": "python", "pygments_lexer": "ipython3"}},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
372
notebooks/05_cheminformatics.ipynb
Normal file
372
notebooks/05_cheminformatics.ipynb
Normal file
@@ -0,0 +1,372 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Cheminformatics Tutorial — Molecules, Datasets, CRUD & Joins\n\n",
|
||||
"This notebook is a teaching version of `examples/05_cheminformatics.py`.\n",
|
||||
"It demonstrates:\n",
|
||||
"- Modeling molecules with descriptors (smiles, selfies, qed, sa_score)\n",
|
||||
"- Linking molecules to datasets (many-to-many)\n",
|
||||
"- Dataclass interop for fast inserts\n",
|
||||
"- Common CRUD, filtering, eager loading, and joins\n",
|
||||
"- Optional RDKit + Mordred descriptor computation (if installed)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 0. Environment (micromamba)\n",
|
||||
"In your shell, activate the env before launching Jupyter:\n",
|
||||
"```bash\n",
|
||||
"micromamba activate sqlmodel\n",
|
||||
"jupyter lab # or jupyter notebook\n",
|
||||
"```\n\n",
|
||||
"Optional installs inside Jupyter (uncomment to run):"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# %pip install -e . pytest\n",
|
||||
"# Optional cheminformatics packages:\n",
|
||||
"# %pip install rdkit-pypi mordred\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1. Configure database connection\n",
|
||||
"- For quick smoke in-memory SQLite, see the cell below.\n",
|
||||
"- For PostgreSQL, ensure `SQL_*` or `PG*` env vars are set before starting Jupyter."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sqlmodel_pg_kit import db, create_all as _create_all\n",
|
||||
"\n",
|
||||
"# QUICK OPTION: Use SQLite in-memory for learning/demo.\n",
|
||||
"# Comment these two lines out if you prefer to use Postgres via environment variables.\n",
|
||||
"db.cfg = db.DatabaseConfig(host='', port=0, user='', password='', database=':memory:', sslmode='disable')\n",
|
||||
"db.engine = db.create_engine('sqlite:///:memory:', echo=False)\n",
|
||||
"_create_all() # create base kit models if any\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Define models & dataclass\n",
|
||||
"We define `Molecule`, `Dataset`, and the link table `MoleculeDataset`.\n",
|
||||
"We also provide a `MoleculeDTO` dataclass to show how to bring computed values\n",
|
||||
"(e.g., from RDKit/Mordred pipelines) into SQLModel quickly."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from __future__ import annotations\n",
|
||||
"from dataclasses import dataclass\n",
|
||||
"from datetime import datetime\n",
|
||||
"from typing import List, Optional\n",
|
||||
"\n",
|
||||
"from sqlalchemy.orm import selectinload\n",
|
||||
"from sqlmodel import SQLModel, Field, Relationship, select\n",
|
||||
"from sqlmodel_pg_kit.db import get_session, engine\n",
|
||||
"\n",
|
||||
"class MoleculeDataset(SQLModel, table=True):\n",
|
||||
" molecule_id: int = Field(foreign_key='molecule.id', primary_key=True)\n",
|
||||
" dataset_id: int = Field(foreign_key='dataset.id', primary_key=True)\n",
|
||||
" added_at: datetime = Field(default_factory=datetime.utcnow)\n",
|
||||
"\n",
|
||||
"class Molecule(SQLModel, table=True):\n",
|
||||
" id: Optional[int] = Field(default=None, primary_key=True)\n",
|
||||
" smiles: str = Field(index=True)\n",
|
||||
" selfies: Optional[str] = Field(default=None)\n",
|
||||
" qed: Optional[float] = Field(default=None, index=True)\n",
|
||||
" sa_score: Optional[float] = Field(default=None, index=True)\n",
|
||||
" created_at: datetime = Field(default_factory=datetime.utcnow)\n",
|
||||
" updated_at: datetime = Field(default_factory=datetime.utcnow)\n",
|
||||
" datasets: List['Dataset'] = Relationship(back_populates='molecules', link_model=MoleculeDataset)\n",
|
||||
"\n",
|
||||
"class Dataset(SQLModel, table=True):\n",
|
||||
" id: Optional[int] = Field(default=None, primary_key=True)\n",
|
||||
" name: str = Field(index=True)\n",
|
||||
" molecules: List['Molecule'] = Relationship(back_populates='datasets', link_model=MoleculeDataset)\n",
|
||||
"\n",
|
||||
"@dataclass\n",
|
||||
"class MoleculeDTO:\n",
|
||||
" smiles: str\n",
|
||||
" selfies: Optional[str] = None\n",
|
||||
" qed: Optional[float] = None\n",
|
||||
" sa_score: Optional[float] = None\n",
|
||||
" def to_model(self) -> Molecule:\n",
|
||||
" return Molecule(**vars(self))\n",
|
||||
"\n",
|
||||
"# Create the tables defined above\n",
|
||||
"SQLModel.metadata.create_all(engine)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 3. Clean slate (idempotent runs)\n",
|
||||
"We delete existing rows to make this notebook repeatable."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with get_session() as s:\n",
|
||||
" s.execute(MoleculeDataset.__table__.delete())\n",
|
||||
" s.execute(Molecule.__table__.delete())\n",
|
||||
" s.execute(Dataset.__table__.delete())\n",
|
||||
" s.commit()\n",
|
||||
"'cleaned'\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 4. Insert molecules via dataclass\n",
|
||||
"Create a few molecules as you would after computing descriptors upstream."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"mols = [\n",
|
||||
" MoleculeDTO(smiles='CCO', qed=0.45, sa_score=2.1),\n",
|
||||
" MoleculeDTO(smiles='c1ccccc1', qed=0.76, sa_score=3.5),\n",
|
||||
" MoleculeDTO(smiles='CCN(CC)CC', qed=0.62, sa_score=2.8),\n",
|
||||
"]\n",
|
||||
"with get_session() as s:\n",
|
||||
" for dto in mols:\n",
|
||||
" s.add(dto.to_model())\n",
|
||||
" s.commit()\n",
|
||||
"\n",
|
||||
"with get_session() as s:\n",
|
||||
" inserted = s.exec(select(Molecule).order_by(Molecule.id.asc())).all()\n",
|
||||
"[(m.id, m.smiles, m.qed, m.sa_score) for m in inserted]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 5. Create datasets and link molecules\n",
|
||||
"Use a many-to-many link table to assign molecules to `train` or `holdout`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with get_session() as s:\n",
|
||||
" ds_train = Dataset(name='train')\n",
|
||||
" ds_holdout = Dataset(name='holdout')\n",
|
||||
" s.add(ds_train); s.add(ds_holdout); s.commit()\n",
|
||||
" s.refresh(ds_train); s.refresh(ds_holdout)\n",
|
||||
" mol_list: List[Molecule] = s.exec(select(Molecule).order_by(Molecule.id.asc())).all()\n",
|
||||
" links = [\n",
|
||||
" MoleculeDataset(molecule_id=mol_list[0].id, dataset_id=ds_train.id),\n",
|
||||
" MoleculeDataset(molecule_id=mol_list[1].id, dataset_id=ds_train.id),\n",
|
||||
" MoleculeDataset(molecule_id=mol_list[2].id, dataset_id=ds_holdout.id),\n",
|
||||
" ]\n",
|
||||
" s.add_all(links); s.commit()\n",
|
||||
"[(l.molecule_id, l.dataset_id) for l in links]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 6. Update a descriptor (refined QED)\n",
|
||||
"Typical pattern: load → modify → commit → refresh."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from datetime import datetime\n",
|
||||
"\n",
|
||||
"with get_session() as s:\n",
|
||||
" mol = s.exec(select(Molecule).where(Molecule.smiles=='CCO')).one()\n",
|
||||
" mol.qed = 0.50\n",
|
||||
" mol.updated_at = datetime.utcnow()\n",
|
||||
" s.add(mol); s.commit(); s.refresh(mol)\n",
|
||||
"(mol.id, mol.smiles, mol.qed)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 7. Filtering and ordering\n",
|
||||
"Examples: threshold on `qed` and ordering by `sa_score`; prefix search on smiles."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with get_session() as s:\n",
|
||||
" hi_qed = s.exec(select(Molecule).where(Molecule.qed>=0.6).order_by(Molecule.sa_score.asc())).all()\n",
|
||||
" hi_qed_view = [(m.smiles, m.qed, m.sa_score) for m in hi_qed]\n",
|
||||
"\n",
|
||||
"with get_session() as s:\n",
|
||||
" starts_with_cc = s.exec(select(Molecule).where(Molecule.smiles.like('CC%'))).all()\n",
|
||||
" starts_with_cc_view = [m.smiles for m in starts_with_cc]\n",
|
||||
"\n",
|
||||
"hi_qed_view, starts_with_cc_view\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 8. Eager loading relationships (avoid N+1)\n",
|
||||
"Read molecules with their datasets efficiently using `selectinload`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with get_session() as s:\n",
|
||||
" stmt = select(Molecule).options(selectinload(Molecule.datasets)).order_by(Molecule.id.asc())\n",
|
||||
" molecules = s.exec(stmt).all()\n",
|
||||
"[(m.smiles, [d.name for d in m.datasets]) for m in molecules]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 9. Join filtering\n",
|
||||
"Return only molecules that belong to the `train` dataset."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with get_session() as s:\n",
|
||||
" stmt = (select(Molecule)\n",
|
||||
" .join(MoleculeDataset, Molecule.id==MoleculeDataset.molecule_id)\n",
|
||||
" .join(Dataset, Dataset.id==MoleculeDataset.dataset_id)\n",
|
||||
" .where(Dataset.name=='train')\n",
|
||||
" .order_by(Molecule.id.asc()))\n",
|
||||
" train_mols = s.exec(stmt).all()\n",
|
||||
"[m.smiles for m in train_mols]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 10. Delete a molecule\n",
|
||||
"Load → delete → commit; verify remaining molecules."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with get_session() as s:\n",
|
||||
" target = s.exec(select(Molecule).where(Molecule.smiles=='CCN(CC)CC')).one()\n",
|
||||
" s.delete(target); s.commit()\n",
|
||||
" left = s.exec(select(Molecule).order_by(Molecule.id.asc())).all()\n",
|
||||
"[m.smiles for m in left]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 11. Optional: RDKit + Mordred computation\n",
|
||||
"If installed, compute descriptors and update a molecule (e.g., refine QED)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"try:\n",
|
||||
" from rdkit import Chem\n",
|
||||
" from rdkit.Chem import QED\n",
|
||||
" from mordred import Calculator, descriptors\n",
|
||||
" ok = True\n",
|
||||
"except Exception as e:\n",
|
||||
" ok = False\n",
|
||||
" print('RDKit/Mordred not available; skipping.\\n', e)\n",
|
||||
"\n",
|
||||
"if ok:\n",
|
||||
" mol = Chem.MolFromSmiles('c1ccccc1O')\n",
|
||||
" qed_val = float(QED.qed(mol))\n",
|
||||
" calc = Calculator(descriptors, ignore_3D=True)\n",
|
||||
" md = calc(mol)\n",
|
||||
" num_desc = sum(1 for _ in md.items())\n",
|
||||
" print('Computed QED:', qed_val, 'Mordred descriptors:', num_desc)\n",
|
||||
" with get_session() as s:\n",
|
||||
" m = s.exec(select(Molecule).where(Molecule.smiles=='c1ccccc1O')).first()\n",
|
||||
" if m is None:\n",
|
||||
" m = Molecule(smiles='c1ccccc1O', qed=qed_val)\n",
|
||||
" else:\n",
|
||||
" m.qed = qed_val\n",
|
||||
" s.add(m); s.commit(); s.refresh(m)\n",
|
||||
" (m.id, m.smiles, m.qed)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"pygments_lexer": "ipython3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
|
||||
172
notebooks/06_csv_import.ipynb
Normal file
172
notebooks/06_csv_import.ipynb
Normal file
@@ -0,0 +1,172 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 06 — CSV → SQLModel → Table\n\n",
|
||||
"Auto-generate a SQLModel from a CSV header, import rows, and query.\n",
|
||||
"Demonstrates SQLite/Postgres switching and simple filtering."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 0. Install (optional) and choose backend\n",
|
||||
"- Use SQLite in-memory or file by overriding `db.engine`\n",
|
||||
"- Or rely on Postgres via `SQL_*`/`PG*` env vars"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# %pip install -e . pytest\n",
|
||||
"from sqlmodel_pg_kit import db\n",
|
||||
"# Uncomment ONE of the following to use SQLite:\n",
|
||||
"# db.engine = db.create_engine('sqlite:///:memory:', echo=False)\n",
|
||||
"# db.engine = db.create_engine('sqlite:///./demo.db', echo=False)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1. Build model from CSV and insert rows\n",
|
||||
"We use the sample CSV at `data/molecules_sample.csv`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Parameters\n",
|
||||
"Set CSV path, optional class/table names, null sentinels, type overrides, and column renames."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# CSV path and optional names\n",
|
||||
"csv_path = 'data/molecules_sample.csv'\n",
|
||||
"class_name_override = None # e.g., 'Molecules'\n",
|
||||
"table_name_override = None # e.g., 'molecules'\n",
|
||||
"\n",
|
||||
"# Null sentinels (in addition to default: '', na, nan, none, null)\n",
|
||||
"custom_nulls = ['N/A']\n",
|
||||
"\n",
|
||||
"# Type overrides: name -> type (bool/int/float/str)\n",
|
||||
"type_overrides = { # e.g., 'count': int, 'qed': float\n",
|
||||
" # 'count': int,\n",
|
||||
"} \n",
|
||||
"\n",
|
||||
"# Rename mappings: original header -> new name before sanitization\n",
|
||||
"rename_map = { # e.g., 'sa': 'sa_score'\n",
|
||||
" # 'sa': 'sa_score',\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"# Columns to index after import (B-Tree)\n",
|
||||
"index_columns = ['qed', 'sa_score']\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sqlmodel_pg_kit.csv_import import build_model_from_csv, insert_rows, create_indexes\n",
|
||||
"from sqlmodel_pg_kit import create_all\n",
|
||||
"from sqlmodel_pg_kit.db import get_session\n",
|
||||
"\n",
|
||||
"spec, rows = build_model_from_csv(\n",
|
||||
" csv_path,\n",
|
||||
" class_name=class_name_override,\n",
|
||||
" table_name=table_name_override,\n",
|
||||
" null_values=custom_nulls,\n",
|
||||
" type_overrides=type_overrides,\n",
|
||||
" rename_map=rename_map,\n",
|
||||
" warn_on_nulls=True,\n",
|
||||
")\n",
|
||||
"spec.model, spec.table_name, spec.columns\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Create table and insert\n",
|
||||
"create_all()\n",
|
||||
"with get_session() as s:\n",
|
||||
" n = insert_rows(spec.model, rows, s)\n",
|
||||
"n\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Optionally create B-Tree indexes on selected columns\n",
|
||||
"from sqlmodel_pg_kit import db\n",
|
||||
"created = create_indexes(spec.model, index_columns, db.engine)\n",
|
||||
"created\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Query a few rows and filters\n",
|
||||
"Use SQLModel/SQLAlchemy expressions to filter by inferred columns."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sqlmodel import select\n",
|
||||
"\n",
|
||||
"with get_session() as s:\n",
|
||||
" all_rows = s.exec(select(spec.model).order_by(spec.model.id.asc())).all()\n",
|
||||
"[(r.id, r.smiles, r.qed, r.sa_score, r.active) for r in all_rows]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Filter: high QED and active\n",
|
||||
"with get_session() as s:\n",
|
||||
" hi = s.exec(select(spec.model).where((spec.model.qed>=0.6) & (spec.model.active==True))).all()\n",
|
||||
"[(r.smiles, r.qed, r.active) for r in hi]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"pygments_lexer": "ipython3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user