173 lines
5.1 KiB
Plaintext
173 lines
5.1 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# 06 — CSV → SQLModel → Table\n\n",
|
|
"Auto-generate a SQLModel from a CSV header, import rows, and query.\n",
|
|
"Demonstrates SQLite/Postgres switching and simple filtering."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 0. Install (optional) and choose backend\n",
|
|
"- Use SQLite in-memory or file by overriding `db.engine`\n",
|
|
"- Or rely on Postgres via `SQL_*`/`PG*` env vars"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# %pip install -e . pytest\n",
|
|
"from sqlmodel_pg_kit import db\n",
|
|
"# Uncomment ONE of the following to use SQLite:\n",
|
|
"# db.engine = db.create_engine('sqlite:///:memory:', echo=False)\n",
|
|
"# db.engine = db.create_engine('sqlite:///./demo.db', echo=False)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 1. Build model from CSV and insert rows\n",
|
|
"We use the sample CSV at `data/molecules_sample.csv`."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Parameters\n",
|
|
"Set CSV path, optional class/table names, null sentinels, type overrides, and column renames."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# CSV path and optional names\n",
|
|
"csv_path = 'data/molecules_sample.csv'\n",
|
|
"class_name_override = None # e.g., 'Molecules'\n",
|
|
"table_name_override = None # e.g., 'molecules'\n",
|
|
"\n",
|
|
"# Null sentinels (in addition to default: '', na, nan, none, null)\n",
|
|
"custom_nulls = ['N/A']\n",
|
|
"\n",
|
|
"# Type overrides: name -> type (bool/int/float/str)\n",
|
|
"type_overrides = { # e.g., 'count': int, 'qed': float\n",
|
|
" # 'count': int,\n",
|
|
"} \n",
|
|
"\n",
|
|
"# Rename mappings: original header -> new name before sanitization\n",
|
|
"rename_map = { # e.g., 'sa': 'sa_score'\n",
|
|
" # 'sa': 'sa_score',\n",
|
|
"}\n",
|
|
"\n",
|
|
"# Columns to index after import (B-Tree)\n",
|
|
"index_columns = ['qed', 'sa_score']\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sqlmodel_pg_kit.csv_import import build_model_from_csv, insert_rows, create_indexes\n",
|
|
"from sqlmodel_pg_kit import create_all\n",
|
|
"from sqlmodel_pg_kit.db import get_session\n",
|
|
"\n",
|
|
"spec, rows = build_model_from_csv(\n",
|
|
" csv_path,\n",
|
|
" class_name=class_name_override,\n",
|
|
" table_name=table_name_override,\n",
|
|
" null_values=custom_nulls,\n",
|
|
" type_overrides=type_overrides,\n",
|
|
" rename_map=rename_map,\n",
|
|
" warn_on_nulls=True,\n",
|
|
")\n",
|
|
"spec.model, spec.table_name, spec.columns\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Create table and insert\n",
|
|
"create_all()\n",
|
|
"with get_session() as s:\n",
|
|
" n = insert_rows(spec.model, rows, s)\n",
|
|
"n\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Optionally create B-Tree indexes on selected columns\n",
|
|
"from sqlmodel_pg_kit import db\n",
|
|
"created = create_indexes(spec.model, index_columns, db.engine)\n",
|
|
"created\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 2. Query a few rows and filters\n",
|
|
"Use SQLModel/SQLAlchemy expressions to filter by inferred columns."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sqlmodel import select\n",
|
|
"\n",
|
|
"with get_session() as s:\n",
|
|
" all_rows = s.exec(select(spec.model).order_by(spec.model.id.asc())).all()\n",
|
|
"[(r.id, r.smiles, r.qed, r.sa_score, r.active) for r in all_rows]\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Filter: high QED and active\n",
|
|
"with get_session() as s:\n",
|
|
" hi = s.exec(select(spec.model).where((spec.model.qed>=0.6) & (spec.model.active==True))).all()\n",
|
|
"[(r.smiles, r.qed, r.active) for r in hi]\n"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"name": "python",
|
|
"pygments_lexer": "ipython3"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|