{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 06 — CSV → SQLModel → Table\n\n", "Auto-generate a SQLModel from a CSV header, import rows, and query.\n", "Demonstrates SQLite/Postgres switching and simple filtering." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 0. Install (optional) and choose backend\n", "- Use SQLite in-memory or file by overriding `db.engine`\n", "- Or rely on Postgres via `SQL_*`/`PG*` env vars" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# %pip install -e . pytest\n", "from sqlmodel_pg_kit import db\n", "# Uncomment ONE of the following to use SQLite:\n", "# db.engine = db.create_engine('sqlite:///:memory:', echo=False)\n", "# db.engine = db.create_engine('sqlite:///./demo.db', echo=False)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. Build model from CSV and insert rows\n", "We use the sample CSV at `data/molecules_sample.csv`." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Parameters\n", "Set CSV path, optional class/table names, null sentinels, type overrides, and column renames." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# CSV path and optional names\n", "csv_path = 'data/molecules_sample.csv'\n", "class_name_override = None # e.g., 'Molecules'\n", "table_name_override = None # e.g., 'molecules'\n", "\n", "# Null sentinels (in addition to default: '', na, nan, none, null)\n", "custom_nulls = ['N/A']\n", "\n", "# Type overrides: name -> type (bool/int/float/str)\n", "type_overrides = { # e.g., 'count': int, 'qed': float\n", " # 'count': int,\n", "} \n", "\n", "# Rename mappings: original header -> new name before sanitization\n", "rename_map = { # e.g., 'sa': 'sa_score'\n", " # 'sa': 'sa_score',\n", "}\n", "\n", "# Columns to index after import (B-Tree)\n", "index_columns = ['qed', 'sa_score']\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sqlmodel_pg_kit.csv_import import build_model_from_csv, insert_rows, create_indexes\n", "from sqlmodel_pg_kit import create_all\n", "from sqlmodel_pg_kit.db import get_session\n", "\n", "spec, rows = build_model_from_csv(\n", " csv_path,\n", " class_name=class_name_override,\n", " table_name=table_name_override,\n", " null_values=custom_nulls,\n", " type_overrides=type_overrides,\n", " rename_map=rename_map,\n", " warn_on_nulls=True,\n", ")\n", "spec.model, spec.table_name, spec.columns\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Create table and insert\n", "create_all()\n", "with get_session() as s:\n", " n = insert_rows(spec.model, rows, s)\n", "n\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Optionally create B-Tree indexes on selected columns\n", "from sqlmodel_pg_kit import db\n", "created = create_indexes(spec.model, index_columns, db.engine)\n", "created\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. Query a few rows and filters\n", "Use SQLModel/SQLAlchemy expressions to filter by inferred columns." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sqlmodel import select\n", "\n", "with get_session() as s:\n", " all_rows = s.exec(select(spec.model).order_by(spec.model.id.asc())).all()\n", "[(r.id, r.smiles, r.qed, r.sa_score, r.active) for r in all_rows]\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Filter: high QED and active\n", "with get_session() as s:\n", " hi = s.exec(select(spec.model).where((spec.model.qed>=0.6) & (spec.model.active==True))).all()\n", "[(r.smiles, r.qed, r.active) for r in hi]\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "name": "python", "pygments_lexer": "ipython3" } }, "nbformat": 4, "nbformat_minor": 5 }