#!/usr/bin/env python3 import re import duckdb def convert_mysql_to_duckdb(sql): # Remove MySQL-specific conditional comments /*!...*/ sql = re.sub(r'/\*!.*?\*/', '', sql, flags=re.DOTALL) # Remove SET statements sql = re.sub(r'^SET\s+.*?;', '', sql, flags=re.MULTILINE | re.IGNORECASE) # Remove LOCK/UNLOCK TABLES sql = re.sub(r'^LOCK TABLES.*?;', '', sql, flags=re.MULTILINE | re.IGNORECASE) sql = re.sub(r'^UNLOCK TABLES.*?;', '', sql, flags=re.MULTILINE | re.IGNORECASE) # Remove DROP TABLE IF EXISTS (DuckDB will handle via CREATE OR REPLACE) sql = re.sub(r'^DROP TABLE IF EXISTS\s+`(\w+)`;', r'DROP TABLE IF EXISTS "\1";', sql, flags=re.MULTILINE | re.IGNORECASE) # Replace backtick identifiers with double-quoted sql = re.sub(r'`(\w+)`', r'"\1"', sql) # Remove trailing table options (ENGINE=..., AUTO_INCREMENT=..., DEFAULT CHARSET=...) sql = re.sub( r'\)\s*ENGINE\s*=\s*\w+[^;]*;', r');', sql, flags=re.IGNORECASE ) # Remove index/key definitions from CREATE TABLE (KEY, UNIQUE KEY, but keep PRIMARY KEY) # We'll process CREATE TABLE blocks to remove non-primary key definitions def clean_create_table(match): block = match.group(0) lines = block.split('\n') result = [] for line in lines: stripped = line.strip().rstrip(',') # Skip KEY and UNIQUE KEY lines (not PRIMARY KEY) if re.match(r'(UNIQUE\s+)?KEY\s+"', stripped, re.IGNORECASE): continue result.append(line) # Fix trailing comma before closing paren cleaned = '\n'.join(result) cleaned = re.sub(r',\s*\n(\s*\))', r'\n\1', cleaned) return cleaned sql = re.sub(r'CREATE TABLE.*?;', clean_create_table, sql, flags=re.DOTALL | re.IGNORECASE) # Fix column type widths: int(N) -> int, bigint(N) -> bigint, tinyint(N) -> tinyint sql = re.sub(r'\b(int|bigint|tinyint|smallint|mediumint)\(\d+\)', r'\1', sql, flags=re.IGNORECASE) # Convert MySQL types to DuckDB types sql = re.sub(r'\blongtext\b', 'text', sql, flags=re.IGNORECASE) sql = re.sub(r'\bmediumtext\b', 'text', sql, flags=re.IGNORECASE) sql = re.sub(r'\bmediumblob\b', 'blob', sql, flags=re.IGNORECASE) sql = re.sub(r'\bdatetime\b', 'timestamp', sql, flags=re.IGNORECASE) sql = re.sub(r'\bdouble\b', 'double', sql, flags=re.IGNORECASE) # Remove AUTO_INCREMENT from column definitions sql = re.sub(r'\s+AUTO_INCREMENT\b', '', sql, flags=re.IGNORECASE) # Remove DEFAULT CHARSET and COLLATE from column definitions sql = re.sub(r'\s+CHARACTER SET\s+\w+', '', sql, flags=re.IGNORECASE) sql = re.sub(r'\s+COLLATE\s+\w+', '', sql, flags=re.IGNORECASE) # Remove 'unsigned' modifier (DuckDB doesn't have unsigned types) sql = re.sub(r'\s+unsigned\b', '', sql, flags=re.IGNORECASE) return sql def main(): print("Reading dump.sql...") with open('dump.sql', 'r', encoding='utf-8') as f: sql = f.read() print("Converting MySQL SQL to DuckDB-compatible SQL...") converted = convert_mysql_to_duckdb(sql) # Split into individual statements statements = [s.strip() for s in converted.split(';') if s.strip() and not s.strip().startswith('--')] print(f"Connecting to ambienteja.duckdb...") con = duckdb.connect('ambienteja.duckdb') errors = [] ok = 0 for i, stmt in enumerate(statements): if not stmt: continue try: con.execute(stmt) ok += 1 except Exception as e: errors.append((stmt[:80].replace('\n', ' '), str(e))) con.close() print(f"\nDone: {ok} statements executed successfully.") if errors: print(f"{len(errors)} errors:") for stmt_preview, err in errors[:20]: print(f" STMT: {stmt_preview}") print(f" ERR: {err}\n") if __name__ == '__main__': main()