Files
sqlmodel-pg-kit/examples/06_csv_to_sqlmodel.py
2025-08-17 22:18:45 +08:00

121 lines
3.7 KiB
Python

"""
Import a CSV into the database by auto-generating a SQLModel class.
Usage:
uv run python examples/06_csv_to_sqlmodel.py --csv path/to/file.csv \
[--class-name MyTable] [--table-name my_table] \
[--sqlite-memory | --sqlite FILE]
If neither --sqlite-* is given, the script uses Postgres via SQL_* / PG* env vars.
"""
from __future__ import annotations
import argparse
from typing import Optional
from sqlmodel_pg_kit import create_all
from sqlmodel_pg_kit.csv_import import build_model_from_csv, insert_rows, create_indexes
from sqlmodel_pg_kit import db
from sqlmodel_pg_kit.db import get_session
def parse_args() -> argparse.Namespace:
ap = argparse.ArgumentParser("csv-to-sqlmodel")
ap.add_argument("--csv", required=True, help="Path to CSV file")
ap.add_argument("--class-name", help="Override generated SQLModel class name (default: CSV stem)")
ap.add_argument("--table-name", help="Override table name (default: snake_case of class name)")
g = ap.add_mutually_exclusive_group()
g.add_argument("--sqlite-memory", action="store_true", help="Use in-memory SQLite")
g.add_argument("--sqlite", help="Use SQLite file path (e.g., ./demo.db)")
ap.add_argument(
"--null",
action="append",
default=[],
help="Add custom null sentinel (repeatable). Default includes '',na,nan,none,null",
)
ap.add_argument(
"--type",
action="append",
default=[],
help="Type override mapping NAME=TYPE (TYPE in bool,int,float,str). Repeatable.",
)
ap.add_argument(
"--rename",
action="append",
default=[],
help="Rename mapping OLD=NEW for columns before sanitization. Repeatable.",
)
ap.add_argument(
"--index",
action="append",
default=[],
help="Create B-Tree index on column (repeatable).",
)
return ap.parse_args()
def maybe_override_engine(args: argparse.Namespace) -> None:
if args.sqlite_memory:
db.engine = db.create_engine("sqlite:///:memory:", echo=False)
elif args.sqlite:
db.engine = db.create_engine(f"sqlite:///{args.sqlite}", echo=False)
def main() -> None:
args = parse_args()
maybe_override_engine(args)
# Parse mappings
def parse_kv(items):
m = {}
for it in items:
if "=" not in it:
raise SystemExit(f"Invalid mapping '{it}', expected NAME=VALUE")
k, v = it.split("=", 1)
m[k.strip()] = v.strip()
return m
rename_map = parse_kv(args.rename)
type_map_raw = parse_kv(args.type)
type_map = {}
for k, v in type_map_raw.items():
v_lower = v.lower()
if v_lower == "bool":
type_map[k] = bool
elif v_lower == "int":
type_map[k] = int
elif v_lower == "float":
type_map[k] = float
elif v_lower == "str":
type_map[k] = str
else:
raise SystemExit(f"Unsupported type '{v}' for column '{k}' (use bool,int,float,str)")
spec, rows = build_model_from_csv(
args.csv,
class_name=args.class_name,
table_name=args.table_name,
null_values=args.null or None,
type_overrides=type_map or None,
rename_map=rename_map or None,
warn_on_nulls=True,
)
# Create table for the generated model
create_all()
# Insert rows
with get_session() as s:
n = insert_rows(spec.model, rows, s)
print(f"Created table '{spec.table_name}' and inserted {n} rows.")
# Create indexes if requested
if args.index:
created = create_indexes(spec.model, args.index, db.engine)
print("Created indexes:", created)
if __name__ == "__main__":
main()