#!/usr/bin/env python3
import re
import duckdb

def convert_mysql_to_duckdb(sql):
    # Remove MySQL-specific conditional comments /*!...*/
    sql = re.sub(r'/\*!.*?\*/', '', sql, flags=re.DOTALL)

    # Remove SET statements
    sql = re.sub(r'^SET\s+.*?;', '', sql, flags=re.MULTILINE | re.IGNORECASE)

    # Remove LOCK/UNLOCK TABLES
    sql = re.sub(r'^LOCK TABLES.*?;', '', sql, flags=re.MULTILINE | re.IGNORECASE)
    sql = re.sub(r'^UNLOCK TABLES.*?;', '', sql, flags=re.MULTILINE | re.IGNORECASE)

    # Remove DROP TABLE IF EXISTS (DuckDB will handle via CREATE OR REPLACE)
    sql = re.sub(r'^DROP TABLE IF EXISTS\s+`(\w+)`;', r'DROP TABLE IF EXISTS "\1";', sql, flags=re.MULTILINE | re.IGNORECASE)

    # Replace backtick identifiers with double-quoted
    sql = re.sub(r'`(\w+)`', r'"\1"', sql)

    # Remove trailing table options (ENGINE=..., AUTO_INCREMENT=..., DEFAULT CHARSET=...)
    sql = re.sub(
        r'\)\s*ENGINE\s*=\s*\w+[^;]*;',
        r');',
        sql,
        flags=re.IGNORECASE
    )

    # Remove index/key definitions from CREATE TABLE (KEY, UNIQUE KEY, but keep PRIMARY KEY)
    # We'll process CREATE TABLE blocks to remove non-primary key definitions
    def clean_create_table(match):
        block = match.group(0)
        lines = block.split('\n')
        result = []
        for line in lines:
            stripped = line.strip().rstrip(',')
            # Skip KEY and UNIQUE KEY lines (not PRIMARY KEY)
            if re.match(r'(UNIQUE\s+)?KEY\s+"', stripped, re.IGNORECASE):
                continue
            result.append(line)
        # Fix trailing comma before closing paren
        cleaned = '\n'.join(result)
        cleaned = re.sub(r',\s*\n(\s*\))', r'\n\1', cleaned)
        return cleaned

    sql = re.sub(r'CREATE TABLE.*?;', clean_create_table, sql, flags=re.DOTALL | re.IGNORECASE)

    # Fix column type widths: int(N) -> int, bigint(N) -> bigint, tinyint(N) -> tinyint
    sql = re.sub(r'\b(int|bigint|tinyint|smallint|mediumint)\(\d+\)', r'\1', sql, flags=re.IGNORECASE)

    # Convert MySQL types to DuckDB types
    sql = re.sub(r'\blongtext\b', 'text', sql, flags=re.IGNORECASE)
    sql = re.sub(r'\bmediumtext\b', 'text', sql, flags=re.IGNORECASE)
    sql = re.sub(r'\bmediumblob\b', 'blob', sql, flags=re.IGNORECASE)
    sql = re.sub(r'\bdatetime\b', 'timestamp', sql, flags=re.IGNORECASE)
    sql = re.sub(r'\bdouble\b', 'double', sql, flags=re.IGNORECASE)

    # Remove AUTO_INCREMENT from column definitions
    sql = re.sub(r'\s+AUTO_INCREMENT\b', '', sql, flags=re.IGNORECASE)

    # Remove DEFAULT CHARSET and COLLATE from column definitions
    sql = re.sub(r'\s+CHARACTER SET\s+\w+', '', sql, flags=re.IGNORECASE)
    sql = re.sub(r'\s+COLLATE\s+\w+', '', sql, flags=re.IGNORECASE)

    # Remove 'unsigned' modifier (DuckDB doesn't have unsigned types)
    sql = re.sub(r'\s+unsigned\b', '', sql, flags=re.IGNORECASE)

    return sql


def main():
    print("Reading dump.sql...")
    with open('dump.sql', 'r', encoding='utf-8') as f:
        sql = f.read()

    print("Converting MySQL SQL to DuckDB-compatible SQL...")
    converted = convert_mysql_to_duckdb(sql)

    # Split into individual statements
    statements = [s.strip() for s in converted.split(';') if s.strip() and not s.strip().startswith('--')]

    print(f"Connecting to ambienteja.duckdb...")
    con = duckdb.connect('ambienteja.duckdb')

    errors = []
    ok = 0
    for i, stmt in enumerate(statements):
        if not stmt:
            continue
        try:
            con.execute(stmt)
            ok += 1
        except Exception as e:
            errors.append((stmt[:80].replace('\n', ' '), str(e)))

    con.close()
    print(f"\nDone: {ok} statements executed successfully.")
    if errors:
        print(f"{len(errors)} errors:")
        for stmt_preview, err in errors[:20]:
            print(f"  STMT: {stmt_preview}")
            print(f"  ERR:  {err}\n")


if __name__ == '__main__':
    main()