feat: add LLM SQL query assistant and dataset sampler

- ask.py: Python script to query Base dos Dados via natural language using Gemini,
  generates and executes DuckDB SQL from Portuguese questions
- ask/ (Rust): CLI companion for the SQL query assistant with system prompt
- sample_datasets.py: samples parquet files from S3 into a local DuckDB for exploration
- sample_datasets/ (Rust): CLI for dataset sampling
- context/: LLM context bundle (schemas, join keys, file tree) for query generation
This commit is contained in:
2026-03-28 11:23:51 +01:00
parent 6801db427e
commit b5d84e3556
13 changed files with 156445 additions and 0 deletions

View File

@@ -0,0 +1,15 @@
[package]
name = "sample_datasets"
version = "0.1.0"
edition = "2021"
[[bin]]
name = "sample_datasets"
path = "src/main.rs"
[dependencies]
duckdb = { version = "1", features = ["bundled"] }
arrow = { version = "=58.0.0", features = ["prettyprint"] }
dotenvy = "0.15"
ctrlc = "3"
anyhow = "1"