Spaces:

amaye15
/

DuckDB-UI

Running

App Files Files Community

amaye15 commited on 24 days ago

Commit

c26b6eb

1 Parent(s): f574b59

Deploy

Browse files

Files changed (8) hide show

.dockerignore +45 -0
.gitignore +45 -0
Dockerfile +23 -47
README.md +1 -1
database_api.py +426 -0
main.py +372 -169
requirements.txt +3 -2
test_api.py +246 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,45 @@

+# .dockerignore
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+.Python
+env/
+.env
+.venv/
+venv/
+ENV/
+env.bak/
+venv.bak/
+.pytest_cache/
+.mypy_cache/
+.nox/
+.tox/
+.coverage
+.coverage.*
+coverage.xml
+htmlcov/
+.hypothesis/
+*.db
+*.db.wal
+*.log
+*.sqlite
+*.sqlite3
+# Ignore specific generated files if needed
+api_database.db
+api_database.db.wal
+my_duckdb_api_db.db
+my_duckdb_api_db.db.wal
+exported_db/
+duckdb_api_exports/ # Don't copy local temp exports
+# OS-specific files
+.DS_Store
+Thumbs.db
+# IDE files
+.idea/
+.vscode/

.gitignore ADDED Viewed

	@@ -0,0 +1,45 @@

+# .dockerignore
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+.Python
+env/
+.env
+.venv/
+venv/
+ENV/
+env.bak/
+venv.bak/
+.pytest_cache/
+.mypy_cache/
+.nox/
+.tox/
+.coverage
+.coverage.*
+coverage.xml
+htmlcov/
+.hypothesis/
+*.db
+*.db.wal
+*.log
+*.sqlite
+*.sqlite3
+# Ignore specific generated files if needed
+api_database.db
+api_database.db.wal
+my_duckdb_api_db.db
+my_duckdb_api_db.db.wal
+exported_db/
+duckdb_api_exports/ # Don't copy local temp exports
+# OS-specific files
+.DS_Store
+Thumbs.db
+# IDE files
+.idea/
+.vscode/

Dockerfile CHANGED Viewed

@@ -1,57 +1,33 @@
-# Use an official Python runtime as a parent image
-FROM python:3.10-slim
-# Define arguments for user/group IDs (optional, but good practice)
-ARG USER_ID=1001
-ARG GROUP_ID=1001
-# Create a non-root user and group
-# Use standard IDs > 1000. Don't use 'node' or common names if not applicable.
-RUN groupadd --system --gid ${GROUP_ID} appgroup && \
-    useradd --system --uid ${USER_ID} --gid appgroup --shell /sbin/nologin --create-home appuser
-# Set the working directory
 WORKDIR /app
-# Create essential directories and set ownership *before* copying files
-# DuckDB UI often uses ~/.duckdb (which will be /home/appuser/.duckdb)
-# Ensure these are owned by the user *before* VOLUME instruction
-RUN mkdir -p /app/data /home/appuser/.duckdb && \
-    chown -R ${USER_ID}:${GROUP_ID} /app /home/appuser/.duckdb
-# Switch context to the non-root user early for subsequent RUN/COPY commands
-USER appuser
-# Copy requirements file (as appuser)
 COPY requirements.txt .
-# Install dependencies (as appuser)
-# This also ensures packages are installed in a user-context if applicable
-RUN pip install --no-cache-dir --user --upgrade pip && \
-    pip install --no-cache-dir --user -r requirements.txt
-# Copy application code (as appuser)
-COPY main.py .
-# --- Define Volumes ---
-# These paths MUST match the directories the 'appuser' process will write to.
-# Note: We created and chowned these earlier.
-VOLUME /app/data
-VOLUME /home/appuser/.duckdb
-# --- End Define Volumes ---
-# Expose ports
 EXPOSE 8000
-EXPOSE 8080
-# Define environment variables
-ENV PYTHONUNBUFFERED=1
-ENV UI_EXPECTED_PORT=8080
-# Ensure Python user packages are in the path
-ENV PATH="/home/appuser/.local/bin:${PATH}"
-# Set HOME so things like ~/.duckdb resolve correctly
-ENV HOME=/home/appuser
-# Command to run the application (now runs as appuser)
-# No chmod needed here. Ownership was handled during build.
-CMD ["python", "-m", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]

+# Dockerfile
+# 1. Choose a base Python image
+#    Using a specific version is recommended for reproducibility.
+#    The '-slim' variant is smaller.
+FROM python:3.12-slim
+# 2. Set environment variables (optional but good practice)
+ENV PYTHONDONTWRITEBYTECODE 1
+ENV PYTHONUNBUFFERED 1
+# 3. Set the working directory inside the container
 WORKDIR /app
+# 4. Copy only the requirements file first to leverage Docker cache
 COPY requirements.txt .
+# 5. Install dependencies
+#    --no-cache-dir makes the image smaller
+#    --upgrade pip ensures we have the latest pip
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt
+# 6. Copy the rest of the application code into the working directory
+COPY . .
+# 7. Expose the port the app runs on (uvicorn default is 8000)
 EXPOSE 8000
+# 8. Define the default command to run when the container starts
+#    Use exec form for proper signal handling.
+#    Do NOT use --reload in production.
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]

README.md CHANGED Viewed

@@ -7,7 +7,7 @@ sdk: docker
 pinned: false
 license: mit
 short_description: DuckDB Hosting with UI & FastAPI 4 SQL Calls & DB Downloads
-port:
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 pinned: false
 license: mit
 short_description: DuckDB Hosting with UI & FastAPI 4 SQL Calls & DB Downloads
+port: 8000
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

database_api.py ADDED Viewed

	@@ -0,0 +1,426 @@

+# database_api.py
+import duckdb
+import pandas as pd
+import pyarrow as pa
+import pyarrow.ipc
+from pathlib import Path
+import tempfile
+import os
+import shutil
+from typing import Optional, List, Dict, Any, Union, Iterator, Generator, Tuple
+# No need for pybind11 import here anymore
+# --- Custom Exceptions ---
+class DatabaseAPIError(Exception):
+    """Base exception for our custom API."""
+    pass
+class QueryError(DatabaseAPIError):
+    """Exception raised for errors during query execution."""
+    pass
+# --- Helper function to format COPY options ---
+def _format_copy_options(options: Optional[Dict[str, Any]]) -> str:
+    if not options:
+        return ""
+    opts_parts = []
+    for k, v in options.items():
+        key_upper = k.upper()
+        if isinstance(v, bool):
+            value_repr = str(v).upper()
+        elif isinstance(v, (int, float)):
+             value_repr = str(v)
+        elif isinstance(v, str):
+             escaped_v = v.replace("'", "''")
+             value_repr = f"'{escaped_v}'"
+        else:
+             value_repr = repr(v)
+        opts_parts.append(f"{key_upper} {value_repr}")
+    opts_str = ", ".join(opts_parts)
+    return f"WITH ({opts_str})"
+# --- Main DatabaseAPI Class ---
+class DatabaseAPI:
+    def __init__(self,
+                 db_path: Union[str, Path] = ":memory:",
+                 read_only: bool = False,
+                 config: Optional[Dict[str, str]] = None):
+        self._db_path = str(db_path)
+        self._config = config or {}
+        self._read_only = read_only
+        self._conn: Optional[duckdb.DuckDBPyConnection] = None
+        try:
+            self._conn = duckdb.connect(
+                database=self._db_path,
+                read_only=self._read_only,
+                config=self._config
+            )
+            print(f"Connected to DuckDB database at '{self._db_path}'")
+        except duckdb.Error as e:
+            print(f"Failed to connect to DuckDB: {e}")
+            raise DatabaseAPIError(f"Failed to connect to DuckDB: {e}") from e
+    def _ensure_connection(self):
+        if self._conn is None:
+            raise DatabaseAPIError("Database connection is not established or has been closed.")
+        try:
+             self._conn.execute("SELECT 1", [])
+        except (duckdb.ConnectionException, RuntimeError) as e:
+             if "Connection has already been closed" in str(e) or "connection closed" in str(e).lower():
+                 self._conn = None
+                 raise DatabaseAPIError("Database connection is closed.") from e
+             else:
+                 raise DatabaseAPIError(f"Database connection error: {e}") from e
+    # --- Basic Query Methods --- (Keep as before)
+    def execute_sql(self, sql: str, parameters: Optional[List[Any]] = None) -> None:
+        self._ensure_connection()
+        print(f"Executing SQL: {sql}")
+        try:
+            self._conn.execute(sql, parameters)
+        except duckdb.Error as e:
+            print(f"Error executing SQL: {e}")
+            raise QueryError(f"Error executing SQL: {e}") from e
+    def query_sql(self, sql: str, parameters: Optional[List[Any]] = None) -> duckdb.DuckDBPyRelation:
+        self._ensure_connection()
+        print(f"Querying SQL: {sql}")
+        try:
+            return self._conn.sql(sql, params=parameters)
+        except duckdb.Error as e:
+            print(f"Error querying SQL: {e}")
+            raise QueryError(f"Error querying SQL: {e}") from e
+    def query_df(self, sql: str, parameters: Optional[List[Any]] = None) -> pd.DataFrame:
+        self._ensure_connection()
+        print(f"Querying SQL to DataFrame: {sql}")
+        try:
+            return self._conn.execute(sql, parameters).df()
+        except ImportError:
+             print("Pandas library is required for DataFrame operations.")
+             raise
+        except duckdb.Error as e:
+            print(f"Error querying SQL to DataFrame: {e}")
+            raise QueryError(f"Error querying SQL to DataFrame: {e}") from e
+    def query_arrow(self, sql: str, parameters: Optional[List[Any]] = None) -> pa.Table:
+        self._ensure_connection()
+        print(f"Querying SQL to Arrow Table: {sql}")
+        try:
+            return self._conn.execute(sql, parameters).arrow()
+        except ImportError:
+             print("PyArrow library is required for Arrow operations.")
+             raise
+        except duckdb.Error as e:
+            print(f"Error querying SQL to Arrow Table: {e}")
+            raise QueryError(f"Error querying SQL to Arrow Table: {e}") from e
+    def query_fetchall(self, sql: str, parameters: Optional[List[Any]] = None) -> List[Tuple[Any, ...]]:
+        self._ensure_connection()
+        print(f"Querying SQL and fetching all: {sql}")
+        try:
+            return self._conn.execute(sql, parameters).fetchall()
+        except duckdb.Error as e:
+            print(f"Error querying SQL: {e}")
+            raise QueryError(f"Error querying SQL: {e}") from e
+    def query_fetchone(self, sql: str, parameters: Optional[List[Any]] = None) -> Optional[Tuple[Any, ...]]:
+        self._ensure_connection()
+        print(f"Querying SQL and fetching one: {sql}")
+        try:
+            return self._conn.execute(sql, parameters).fetchone()
+        except duckdb.Error as e:
+            print(f"Error querying SQL: {e}")
+            raise QueryError(f"Error querying SQL: {e}") from e
+    # --- Registration Methods --- (Keep as before)
+    def register_df(self, name: str, df: pd.DataFrame):
+        self._ensure_connection()
+        print(f"Registering DataFrame as '{name}'")
+        try:
+            self._conn.register(name, df)
+        except duckdb.Error as e:
+            print(f"Error registering DataFrame: {e}")
+            raise QueryError(f"Error registering DataFrame: {e}") from e
+    def unregister_df(self, name: str):
+        self._ensure_connection()
+        print(f"Unregistering virtual table '{name}'")
+        try:
+            self._conn.unregister(name)
+        except duckdb.Error as e:
+            if "not found" in str(e).lower():
+                 print(f"Warning: Virtual table '{name}' not found for unregistering.")
+            else:
+                print(f"Error unregistering virtual table: {e}")
+                raise QueryError(f"Error unregistering virtual table: {e}") from e
+    # --- Extension Methods --- (Keep as before)
+    def install_extension(self, extension_name: str, force_install: bool = False):
+        self._ensure_connection()
+        print(f"Installing extension: {extension_name}")
+        try:
+            self._conn.install_extension(extension_name, force_install=force_install)
+        except duckdb.Error as e:
+            print(f"Error installing extension '{extension_name}': {e}")
+            raise DatabaseAPIError(f"Error installing extension '{extension_name}': {e}") from e
+    def load_extension(self, extension_name: str):
+        self._ensure_connection()
+        print(f"Loading extension: {extension_name}")
+        try:
+            self._conn.load_extension(extension_name)
+        # Catch specific DuckDB errors that indicate failure but aren't API errors
+        except (duckdb.IOException, duckdb.CatalogException) as load_err:
+             print(f"Error loading extension '{extension_name}': {load_err}")
+             raise QueryError(f"Error loading extension '{extension_name}': {load_err}") from load_err
+        except duckdb.Error as e: # Catch other DuckDB errors
+            print(f"Unexpected DuckDB error loading extension '{extension_name}': {e}")
+            raise DatabaseAPIError(f"Unexpected DuckDB error loading extension '{extension_name}': {e}") from e
+    # --- Export Methods ---
+    def export_database(self, directory_path: Union[str, Path]):
+        self._ensure_connection()
+        path_str = str(directory_path)
+        if not os.path.isdir(path_str):
+             try:
+                 os.makedirs(path_str)
+                 print(f"Created export directory: {path_str}")
+             except OSError as e:
+                 raise DatabaseAPIError(f"Could not create export directory '{path_str}': {e}") from e
+        print(f"Exporting database to directory: {path_str}")
+        sql = f"EXPORT DATABASE '{path_str}' (FORMAT CSV)"
+        try:
+            self._conn.execute(sql)
+            print("Database export completed successfully.")
+        except duckdb.Error as e:
+            print(f"Error exporting database: {e}")
+            raise DatabaseAPIError(f"Error exporting database: {e}") from e
+    def _export_data(self,
+                      source: str,
+                      output_path: Union[str, Path],
+                      file_format: str,
+                      options: Optional[Dict[str, Any]] = None):
+        self._ensure_connection()
+        path_str = str(output_path)
+        options_str = _format_copy_options(options)
+        source_safe = source.strip()
+        # --- MODIFIED: Use f-string quoting instead of quote_identifier ---
+        if ' ' in source_safe or source_safe.upper().startswith(('SELECT', 'WITH', 'VALUES')):
+            copy_source = f"({source})"
+        else:
+             # Simple quoting, might need refinement for complex identifiers
+             copy_source = f'"{source_safe}"'
+        # --- END MODIFICATION ---
+        sql = f"COPY {copy_source} TO '{path_str}' {options_str}"
+        print(f"Exporting data to {path_str} (Format: {file_format}) with options: {options or {}}")
+        try:
+            self._conn.execute(sql)
+            print("Data export completed successfully.")
+        except duckdb.Error as e:
+            print(f"Error exporting data: {e}")
+            raise QueryError(f"Error exporting data to {file_format}: {e}") from e
+    # --- Keep export_data_to_csv, parquet, json, jsonl as before ---
+    def export_data_to_csv(self,
+                           source: str,
+                           output_path: Union[str, Path],
+                           options: Optional[Dict[str, Any]] = None):
+        csv_options = options.copy() if options else {}
+        csv_options['FORMAT'] = 'CSV'
+        if 'HEADER' not in {k.upper() for k in csv_options}:
+            csv_options['HEADER'] = True
+        self._export_data(source, output_path, "CSV", csv_options)
+    def export_data_to_parquet(self,
+                             source: str,
+                             output_path: Union[str, Path],
+                             options: Optional[Dict[str, Any]] = None):
+        parquet_options = options.copy() if options else {}
+        parquet_options['FORMAT'] = 'PARQUET'
+        self._export_data(source, output_path, "Parquet", parquet_options)
+    def export_data_to_json(self,
+                            source: str,
+                            output_path: Union[str, Path],
+                            array_format: bool = True,
+                            options: Optional[Dict[str, Any]] = None):
+        json_options = options.copy() if options else {}
+        json_options['FORMAT'] = 'JSON'
+        if 'ARRAY' not in {k.upper() for k in json_options}:
+             json_options['ARRAY'] = array_format
+        self._export_data(source, output_path, "JSON", json_options)
+    def export_data_to_jsonl(self,
+                             source: str,
+                             output_path: Union[str, Path],
+                             options: Optional[Dict[str, Any]] = None):
+        self.export_data_to_json(source, output_path, array_format=False, options=options)
+    # # --- Streaming Read Methods --- (Keep as before)
+    # def stream_query_arrow(self,
+    #                        sql: str,
+    #                        parameters: Optional[List[Any]] = None,
+    #                        batch_size: int = 1000000
+    #                       ) -> Iterator[pa.RecordBatch]:
+    #     self._ensure_connection()
+    #     print(f"Streaming Arrow query (batch size {batch_size}): {sql}")
+    #     try:
+    #         result_set = self._conn.execute(sql, parameters)
+    #         while True:
+    #             batch = result_set.fetch_record_batch(batch_size)
+    #             if not batch:
+    #                 break
+    #             yield batch
+    #     except ImportError:
+    #          print("PyArrow library is required for Arrow streaming.")
+    #          raise
+    #     except duckdb.Error as e:
+    #         print(f"Error streaming Arrow query: {e}")
+    #         raise QueryError(f"Error streaming Arrow query: {e}") from e
+    def stream_query_df(self,
+                        sql: str,
+                        parameters: Optional[List[Any]] = None,
+                        vectors_per_chunk: int = 1
+                       ) -> Iterator[pd.DataFrame]:
+        self._ensure_connection()
+        print(f"Streaming DataFrame query (vectors per chunk {vectors_per_chunk}): {sql}")
+        try:
+            result_set = self._conn.execute(sql, parameters)
+            while True:
+                chunk_df = result_set.fetch_df_chunk(vectors_per_chunk)
+                if chunk_df.empty:
+                    break
+                yield chunk_df
+        except ImportError:
+             print("Pandas library is required for DataFrame streaming.")
+             raise
+        except duckdb.Error as e:
+            print(f"Error streaming DataFrame query: {e}")
+            raise QueryError(f"Error streaming DataFrame query: {e}") from e
+    def stream_query_arrow(self,
+                           sql: str,
+                           parameters: Optional[List[Any]] = None,
+                           batch_size: int = 1000000
+                          ) -> Iterator[pa.RecordBatch]:
+        """
+        Executes a SQL query and streams the results as Arrow RecordBatches.
+        Useful for processing large results iteratively in Python without
+        loading the entire result set into memory.
+        Args:
+            sql: The SQL query to execute.
+            parameters: Optional list of parameters for prepared statements.
+            batch_size: The approximate number of rows per Arrow RecordBatch.
+        Yields:
+            pyarrow.RecordBatch: Chunks of the result set.
+        Raises:
+            QueryError: If the query execution or fetching fails.
+            ImportError: If pyarrow is not installed.
+        """
+        self._ensure_connection()
+        print(f"Streaming Arrow query (batch size {batch_size}): {sql}")
+        record_batch_reader = None
+        try:
+            # Use execute() to get a result object that supports streaming fetch
+            result_set = self._conn.execute(sql, parameters)
+            # --- MODIFICATION: Get the reader first ---
+            record_batch_reader = result_set.fetch_record_batch(batch_size)
+            # --- Iterate through the reader ---
+            for batch in record_batch_reader:
+                yield batch
+            # --- END MODIFICATION ---
+        except ImportError:
+             print("PyArrow library is required for Arrow streaming.")
+             raise
+        except duckdb.Error as e:
+            print(f"Error streaming Arrow query: {e}")
+            raise QueryError(f"Error streaming Arrow query: {e}") from e
+        finally:
+            # Clean up the reader if it was created
+            if record_batch_reader is not None:
+                # PyArrow readers don't have an explicit close, relying on GC.
+                # Forcing cleanup might involve ensuring references are dropped.
+                del record_batch_reader # Help GC potentially
+            # The original result_set from execute() might also hold resources,
+            # although fetch_record_batch typically consumes it.
+            # Explicitly closing it if possible, or letting it go out of scope.
+            if 'result_set' in locals() and result_set:
+                 try:
+                     # DuckDBPyResult doesn't have an explicit close, relies on __del__
+                     del result_set
+                 except Exception:
+                     pass # Best effort
+    # --- Resource Management Methods --- (Keep as before)
+    def close(self):
+        if self._conn:
+            conn_id = id(self._conn)
+            print(f"Closing connection to '{self._db_path}' (ID: {conn_id})")
+            try:
+                self._conn.close()
+            except duckdb.Error as e:
+                print(f"Error closing DuckDB connection (ID: {conn_id}): {e}")
+            finally:
+                self._conn = None
+        else:
+            print("Connection already closed or never opened.")
+    def __enter__(self):
+        self._ensure_connection()
+        return self
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.close()
+    def __del__(self):
+        if self._conn:
+            print(f"ResourceWarning: DatabaseAPI for '{self._db_path}' was not explicitly closed. Closing now in __del__.")
+            try:
+                 self.close()
+            except Exception as e:
+                 print(f"Exception during implicit close in __del__: {e}")
+                 self._conn = None
+# --- Example Usage --- (Keep as before)
+if __name__ == "__main__":
+    # ... (rest of the example usage code from previous response) ...
+    temp_dir_obj = tempfile.TemporaryDirectory()
+    temp_dir = temp_dir_obj.name
+    print(f"\n--- Using temporary directory: {temp_dir} ---")
+    db_file = Path(temp_dir) / "export_test.db"
+    try:
+        with DatabaseAPI(db_path=db_file) as db_api:
+            db_api.execute_sql("CREATE OR REPLACE TABLE products(id INTEGER, name VARCHAR, price DECIMAL(8,2))")
+            db_api.execute_sql("INSERT INTO products VALUES (101, 'Gadget', 19.99), (102, 'Widget', 35.00), (103, 'Thing''amajig', 9.50)")
+            db_api.execute_sql("CREATE OR REPLACE TABLE sales(product_id INTEGER, sale_date DATE, quantity INTEGER)")
+            db_api.execute_sql("INSERT INTO sales VALUES (101, '2023-10-26', 5), (102, '2023-10-26', 2), (101, '2023-10-27', 3)")
+            export_dir = Path(temp_dir) / "exported_db"
+            db_api.export_database(export_dir)
+            csv_path = Path(temp_dir) / "products_export.csv"
+            db_api.export_data_to_csv('products', csv_path, options={'HEADER': True})
+            parquet_path = Path(temp_dir) / "high_value_products.parquet"
+            db_api.export_data_to_parquet("SELECT * FROM products WHERE price > 20", parquet_path, options={'COMPRESSION': 'SNAPPY'})
+            json_path = Path(temp_dir) / "sales.json"
+            db_api.export_data_to_json("SELECT * FROM sales", json_path, array_format=True)
+            jsonl_path = Path(temp_dir) / "sales.jsonl"
+            db_api.export_data_to_jsonl("SELECT * FROM sales ORDER BY sale_date", jsonl_path)
+        with DatabaseAPI() as db_api:
+            db_api.execute_sql("CREATE TABLE large_range AS SELECT range AS id, range % 100 AS category FROM range(1000)")
+            for batch in db_api.stream_query_arrow("SELECT * FROM large_range", batch_size=200):
+                pass
+            for df_chunk in db_api.stream_query_df("SELECT * FROM large_range", vectors_per_chunk=1):
+                pass
+    finally:
+        temp_dir_obj.cleanup()
+        print(f"\n--- Cleaned up temporary directory: {temp_dir} ---")

main.py CHANGED Viewed

@@ -1,186 +1,389 @@
-import os
 import duckdb
-from fastapi import FastAPI, HTTPException, Body
-from fastapi.responses import FileResponse, JSONResponse
-from pydantic import BaseModel, Field
 from pathlib import Path
-import logging
-import time # Import time for potential startup delays
-import asyncio
-# --- Configuration ---
-DB_DIR = Path("data")
-DB_FILENAME = "mydatabase.db"
-DB_FILE = DB_DIR / DB_FILENAME
-UI_EXPECTED_PORT = 8080 # Default port DuckDB UI often tries first
-# Ensure the data directory exists
-DB_DIR.mkdir(parents=True, exist_ok=True)
-# --- Logging Setup ---
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-logger = logging.getLogger(__name__)
-# --- FastAPI App ---
-app = FastAPI(
-    title="DuckDB API & UI Host",
-    description="Interact with DuckDB via API (/query, /download) and access the official DuckDB Web UI.",
-    version="1.0.0"
-)
-# --- Pydantic Models ---
 class QueryRequest(BaseModel):
-    sql: str = Field(..., description="The SQL query to execute against DuckDB.")
-class QueryResponse(BaseModel):
-    columns: list[str] | None = None
-    rows: list[dict] | None = None
-    message: str | None = None
-    error: str | None = None
-# --- Helper Function ---
-def execute_duckdb_query(sql_query: str, db_path: str = str(DB_FILE)):
-    """Connects to DuckDB, executes a query, and returns results or error."""
-    con = None
-    try:
-        logger.info(f"Connecting to database: {db_path}")
-        con = duckdb.connect(database=db_path, read_only=False)
-        logger.info(f"Executing SQL: {sql_query[:200]}{'...' if len(sql_query) > 200 else ''}")
-        con.begin()
-        result_relation = con.execute(sql_query)
-        response_data = {"columns": None, "rows": None, "message": None, "error": None}
-        if result_relation.description:
-            columns = [desc[0] for desc in result_relation.description]
-            rows_raw = result_relation.fetchall()
-            rows_dict = [dict(zip(columns, row)) for row in rows_raw]
-            response_data["columns"] = columns
-            response_data["rows"] = rows_dict
-            response_data["message"] = f"Query executed successfully. Fetched {len(rows_dict)} row(s)."
-            logger.info(f"Query successful, returned {len(rows_dict)} rows.")
-        else:
-            response_data["message"] = "Query executed successfully (no data returned)."
-            logger.info("Query successful (no data returned).")
-        con.commit()
-        return response_data
-    except duckdb.Error as e:
-        logger.error(f"DuckDB Error: {e}")
-        if con: con.rollback()
-        return {"columns": None, "rows": None, "message": None, "error": str(e)}
-    except Exception as e:
-        logger.error(f"General Error: {e}")
-        if con: con.rollback()
-        return {"columns": None, "rows": None, "message": None, "error": f"An unexpected error occurred: {e}"}
-    finally:
-        if con:
-            con.close()
-            logger.info("Database connection closed.")
-# --- FastAPI Startup Event ---
 @app.on_event("startup")
 async def startup_event():
-    logger.info("Application startup: Initializing DuckDB UI...")
-    con = None
-    try:
-        # Connect to the main DB file to execute initialization commands
-        # Use a temporary in-memory DB for UI start if main DB doesn't exist yet?
-        # No, start_ui seems to need the target DB. Ensure DB file path exists.
-        if not DB_FILE.parent.exists():
-             DB_FILE.parent.mkdir(parents=True, exist_ok=True)
-        # It's crucial the UI extension can write its state.
-        # By default it uses ~/.duckdb/ which will be /root/.duckdb in the container.
-        # Ensure this is writable or mount a volume there.
-        logger.info(f"Attempting to connect to {DB_FILE} for UI setup.")
-        con = duckdb.connect(database=str(DB_FILE), read_only=False)
-        logger.info("Installing and loading 'ui' extension...")
-        con.execute("INSTALL ui;")
-        con.execute("LOAD ui;")
-        logger.info("Calling start_ui()... This will start a separate web server.")
-        # CALL start_ui() starts the server in the background (usually)
-        # It might print the URL/port it's using to stderr/stdout of the main process
-        con.execute("CALL start_ui();")
-        # Give the UI server a moment to start up. This is a guess.
-        # A more robust solution might involve checking if the port is listening.
-        await asyncio.sleep(2)
-        logger.info(f"DuckDB UI server startup initiated. It usually listens on port {UI_EXPECTED_PORT}.")
-        logger.info("Check container logs for the exact URL if it differs.")
-        logger.info("API server (FastAPI/Uvicorn) is running on port 8000.")
-    except duckdb.Error as e:
-        logger.error(f"CRITICAL: Failed to install/load/start DuckDB UI extension: {e}")
-        logger.error("The DuckDB UI will likely not be available.")
     except Exception as e:
-        logger.error(f"CRITICAL: An unexpected error occurred during UI startup: {e}")
-        logger.error("The DuckDB UI will likely not be available.")
     finally:
-        if con:
-            con.close()
-            logger.info("UI setup connection closed.")
-# --- API Endpoints ---
-@app.get("/", summary="Root Endpoint / Info", tags=["General"])
-async def read_root():
-    """Provides links to the API docs and the DuckDB UI."""
-    # Assumes UI is running on localhost from the container's perspective
-    # User needs to map the port correctly
-    return JSONResponse({
-        "message": "DuckDB API and UI Host",
-        "api_details": {
-            "docs": "/docs",
-            "query_endpoint": "/query (POST)",
-            "download_endpoint": "/download (GET)"
-        },
-        "duckdb_ui": {
-            "message": f"Access the official DuckDB Web UI. It should be running on port {UI_EXPECTED_PORT} inside the container.",
-            "typical_access_url": f"http://localhost:{UI_EXPECTED_PORT}",
-            "notes": f"Ensure you have mapped port {UI_EXPECTED_PORT} from the container when running `docker run` (e.g., -p {UI_EXPECTED_PORT}:{UI_EXPECTED_PORT})."
-        },
-        "database_file_container_path": str(DB_FILE)
-    })
-@app.post("/query", response_model=QueryResponse, summary="Execute SQL Query", tags=["Database API"])
-async def execute_query_endpoint(query_request: QueryRequest):
-    """
-    Executes a given SQL query against the DuckDB database via the API.
-    Handles SELECT, INSERT, UPDATE, DELETE, CREATE TABLE, etc.
-    """
-    result = execute_duckdb_query(query_request.sql)
-    if result["error"]:
-        raise HTTPException(status_code=400, detail=result["error"])
-    return JSONResponse(content=result)
-@app.get("/download", summary="Download Database File", tags=["Database API"])
-async def download_database_file():
-    """
-    Allows downloading the current DuckDB database file via the API.
-    """
-    if not DB_FILE.is_file():
-        logger.error(f"Download request failed: Database file not found at {DB_FILE}")
-        raise HTTPException(status_code=404, detail="Database file not found.")
-    logger.info(f"Serving database file for download: {DB_FILE}")
-    return FileResponse(
-        path=str(DB_FILE),
-        filename=DB_FILENAME,
-        media_type='application/octet-stream'
-    )
-# Need asyncio for sleep in startup
-# import asyncio
-# --- Run with Uvicorn (for local testing - doesn't handle UI startup well here) ---
-# if __name__ == "__main__":
-#     # Note: Running directly with python main.py won't trigger the startup
-#     # event correctly in the same way uvicorn command does.
-#     # Use `uvicorn main:app --reload --port 8000` for local dev testing.
-#     print("Run using: uvicorn main:app --host 0.0.0.0 --port 8000")

+# main.py
 import duckdb
+import pandas as pd
+import pyarrow as pa
+import pyarrow.ipc
 from pathlib import Path
+import tempfile
+import os
+import shutil
+from typing import Optional, List, Dict, Any, Union, Iterator, Generator, Tuple
+from fastapi import FastAPI, HTTPException, Body, Query, BackgroundTasks, Depends
+from fastapi.responses import StreamingResponse, FileResponse
+from pydantic import BaseModel, Field
+from database_api import DatabaseAPI, DatabaseAPIError, QueryError
+# --- Configuration --- (Keep as before)
+DUCKDB_API_DB_PATH = os.getenv("DUCKDB_API_DB_PATH", "api_database.db")
+DUCKDB_API_READ_ONLY = os.getenv("DUCKDB_API_READ_ONLY", False)
+DUCKDB_API_CONFIG = {}
+TEMP_EXPORT_DIR = Path(tempfile.gettempdir()) / "duckdb_api_exports"
+TEMP_EXPORT_DIR.mkdir(exist_ok=True)
+print(f"Using temporary directory for exports: {TEMP_EXPORT_DIR}")
+# --- Pydantic Models --- (Keep as before)
+class StatusResponse(BaseModel):
+    status: str
+    message: Optional[str] = None
+class ExecuteRequest(BaseModel):
+    sql: str
+    parameters: Optional[List[Any]] = None
 class QueryRequest(BaseModel):
+    sql: str
+    parameters: Optional[List[Any]] = None
+class DataFrameResponse(BaseModel):
+    columns: List[str]
+    records: List[Dict[str, Any]]
+class InstallRequest(BaseModel):
+    extension_name: str
+    force_install: bool = False
+class LoadRequest(BaseModel):
+    extension_name: str
+class ExportDataRequest(BaseModel):
+    source: str = Field(..., description="Table name or SQL SELECT query to export")
+    options: Optional[Dict[str, Any]] = Field(None, description="Format-specific export options")
+# --- FastAPI Application --- (Keep as before)
+app = FastAPI(
+    title="DuckDB API Wrapper",
+    description="Exposes DuckDB functionalities via a RESTful API.",
+    version="0.2.1" # Incremented version
+)
+# --- Global DatabaseAPI Instance & Lifecycle --- (Keep as before)
+db_api_instance: Optional[DatabaseAPI] = None
 @app.on_event("startup")
 async def startup_event():
+    global db_api_instance
+    print("Starting up DuckDB API...")
+    try:
+        db_api_instance = DatabaseAPI(db_path=DUCKDB_API_DB_PATH, read_only=DUCKDB_API_READ_ONLY, config=DUCKDB_API_CONFIG)
+    except DatabaseAPIError as e:
+        print(f"FATAL: Could not initialize DatabaseAPI on startup: {e}")
+        db_api_instance = None
+@app.on_event("shutdown")
+def shutdown_event():
+    print("Shutting down DuckDB API...")
+    if db_api_instance:
+        db_api_instance.close()
+# --- Dependency to get the DB API instance --- (Keep as before)
+def get_db_api() -> DatabaseAPI:
+    if db_api_instance is None:
+         raise HTTPException(status_code=503, detail="Database service is unavailable (failed to initialize).")
+    try:
+        db_api_instance._ensure_connection()
+        return db_api_instance
+    except DatabaseAPIError as e:
+         raise HTTPException(status_code=503, detail=f"Database service error: {e}")
+# --- API Endpoints ---
+# --- CRUD and Querying Endpoints (Keep as before) ---
+@app.post("/execute", response_model=StatusResponse, tags=["CRUD"])
+async def execute_statement(request: ExecuteRequest, api: DatabaseAPI = Depends(get_db_api)):
+    try:
+        api.execute_sql(request.sql, request.parameters)
+        return {"status": "success", "message": None} # Explicitly return None for message
+    except QueryError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except DatabaseAPIError as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/query/fetchall", response_model=List[tuple], tags=["Querying"])
+async def query_fetchall_endpoint(request: QueryRequest, api: DatabaseAPI = Depends(get_db_api)):
+    try:
+        return api.query_fetchall(request.sql, request.parameters)
+    except QueryError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except DatabaseAPIError as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/query/dataframe", response_model=DataFrameResponse, tags=["Querying"])
+async def query_dataframe_endpoint(request: QueryRequest, api: DatabaseAPI = Depends(get_db_api)):
+    try:
+        df = api.query_df(request.sql, request.parameters)
+        df_serializable = df.replace({pd.NA: None, pd.NaT: None, float('nan'): None})
+        return {"columns": df_serializable.columns.tolist(), "records": df_serializable.to_dict(orient='records')}
+    except (QueryError, ImportError) as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except DatabaseAPIError as e:
+        raise HTTPException(status_code=500, detail=str(e))
+# --- Streaming Endpoints ---
+# --- CORRECTED _stream_arrow_ipc ---
+async def _stream_arrow_ipc(record_batch_iterator: Iterator[pa.RecordBatch]) -> Generator[bytes, None, None]:
+    """Helper generator to stream Arrow IPC Stream format."""
+    writer = None
+    sink = pa.BufferOutputStream() # Create sink once
+    try:
+        first_batch = next(record_batch_iterator)
+        writer = pa.ipc.new_stream(sink, first_batch.schema)
+        writer.write_batch(first_batch)
+        # Do NOT yield yet, wait for potential subsequent batches or closure
+        for batch in record_batch_iterator:
+            # Write subsequent batches to the SAME writer
+            writer.write_batch(batch)
+    except StopIteration:
+        # Handles the case where the iterator was empty initially
+        if writer is None: # No batches were ever processed
+             print("Warning: Arrow stream iterator was empty.")
+             # Yield empty bytes or handle as needed, depends on client expectation
+             # yield b'' # Option 1: empty bytes
+             return # Option 2: Just finish generator
     except Exception as e:
+        print(f"Error during Arrow streaming generator: {e}")
+        # Consider how to signal error downstream if possible
     finally:
+        if writer:
+             try:
+                  print("Closing Arrow IPC Stream Writer...")
+                  writer.close() # Close the writer to finalize the stream in the sink
+                  print("Writer closed.")
+             except Exception as close_e:
+                  print(f"Error closing Arrow writer: {close_e}")
+        if sink:
+             try:
+                  buffer = sink.getvalue()
+                  if buffer:
+                      print(f"Yielding final Arrow buffer (size: {len(buffer.to_pybytes())})...")
+                      yield buffer.to_pybytes() # Yield the complete stream buffer
+                  else:
+                      print("Arrow sink buffer was empty after closing writer.")
+                  sink.close()
+             except Exception as close_e:
+                  print(f"Error closing or getting value from Arrow sink: {close_e}")
+# --- END CORRECTION ---
+@app.post("/query/stream/arrow", tags=["Streaming"])
+async def query_stream_arrow_endpoint(request: QueryRequest, api: DatabaseAPI = Depends(get_db_api)):
+    """Executes a SQL query and streams results as Arrow IPC Stream format."""
+    try:
+        iterator = api.stream_query_arrow(request.sql, request.parameters)
+        return StreamingResponse(
+            _stream_arrow_ipc(iterator),
+            media_type="application/vnd.apache.arrow.stream"
+        )
+    except (QueryError, ImportError) as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except DatabaseAPIError as e:
+        raise HTTPException(status_code=500, detail=str(e))
+# --- _stream_jsonl (Keep as before) ---
+async def _stream_jsonl(dataframe_iterator: Iterator[pd.DataFrame]) -> Generator[bytes, None, None]:
+    try:
+        for df_chunk in dataframe_iterator:
+            df_serializable = df_chunk.replace({pd.NA: None, pd.NaT: None, float('nan'): None})
+            jsonl_string = df_serializable.to_json(orient='records', lines=True, date_format='iso')
+            if jsonl_string:
+                 # pandas>=1.5.0 adds newline by default
+                 if not jsonl_string.endswith('\n'):
+                     jsonl_string += '\n'
+                 yield jsonl_string.encode('utf-8')
+    except Exception as e:
+        print(f"Error during JSONL streaming generator: {e}")
+@app.post("/query/stream/jsonl", tags=["Streaming"])
+async def query_stream_jsonl_endpoint(request: QueryRequest, api: DatabaseAPI = Depends(get_db_api)):
+    """Executes a SQL query and streams results as JSON Lines (JSONL)."""
+    try:
+        iterator = api.stream_query_df(request.sql, request.parameters)
+        return StreamingResponse(_stream_jsonl(iterator), media_type="application/jsonl")
+    except (QueryError, ImportError) as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except DatabaseAPIError as e:
+        raise HTTPException(status_code=500, detail=str(e))
+# --- Download / Export Endpoints (Keep as before, uses corrected _export_data) ---
+def _cleanup_temp_file(path: Union[str, Path]):
+    try:
+        if Path(path).is_file():
+             os.remove(path)
+             print(f"Cleaned up temporary file: {path}")
+    except OSError as e:
+        print(f"Error cleaning up temporary file {path}: {e}")
+async def _create_temp_export(
+    api: DatabaseAPI,
+    source: str,
+    export_format: str,
+    options: Optional[Dict[str, Any]] = None,
+    suffix: str = ".tmp"
+) -> Path:
+    fd, temp_path_str = tempfile.mkstemp(suffix=suffix, dir=TEMP_EXPORT_DIR)
+    os.close(fd)
+    temp_file_path = Path(temp_path_str)
+    try:
+        print(f"Exporting to temporary file: {temp_file_path}")
+        if export_format == 'csv':
+            api.export_data_to_csv(source, temp_file_path, options)
+        elif export_format == 'parquet':
+            api.export_data_to_parquet(source, temp_file_path, options)
+        elif export_format == 'json':
+            api.export_data_to_json(source, temp_file_path, array_format=True, options=options)
+        elif export_format == 'jsonl':
+             api.export_data_to_jsonl(source, temp_file_path, options=options)
+        else:
+            raise ValueError(f"Unsupported export format: {export_format}")
+        return temp_file_path
+    except Exception as e:
+        _cleanup_temp_file(temp_file_path)
+        raise e
+@app.post("/export/data/csv", response_class=FileResponse, tags=["Export / Download"])
+async def export_csv_endpoint(request: ExportDataRequest, background_tasks: BackgroundTasks, api: DatabaseAPI = Depends(get_db_api)):
+    try:
+        temp_file_path = await _create_temp_export(api, request.source, 'csv', request.options, suffix=".csv")
+        background_tasks.add_task(_cleanup_temp_file, temp_file_path)
+        filename = f"export_{Path(request.source).stem if '.' not in request.source else 'query'}.csv"
+        return FileResponse(temp_file_path, media_type='text/csv', filename=filename)
+    except (QueryError, ValueError) as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except DatabaseAPIError as e:
+        raise HTTPException(status_code=500, detail=str(e))
+    except Exception as e:
+         raise HTTPException(status_code=500, detail=f"Unexpected error during CSV export: {e}")
+@app.post("/export/data/parquet", response_class=FileResponse, tags=["Export / Download"])
+async def export_parquet_endpoint(request: ExportDataRequest, background_tasks: BackgroundTasks, api: DatabaseAPI = Depends(get_db_api)):
+    try:
+        temp_file_path = await _create_temp_export(api, request.source, 'parquet', request.options, suffix=".parquet")
+        background_tasks.add_task(_cleanup_temp_file, temp_file_path)
+        filename = f"export_{Path(request.source).stem if '.' not in request.source else 'query'}.parquet"
+        return FileResponse(temp_file_path, media_type='application/vnd.apache.parquet', filename=filename)
+    except (QueryError, ValueError) as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except DatabaseAPIError as e:
+        raise HTTPException(status_code=500, detail=str(e))
+    except Exception as e:
+         raise HTTPException(status_code=500, detail=f"Unexpected error during Parquet export: {e}")
+@app.post("/export/data/json", response_class=FileResponse, tags=["Export / Download"])
+async def export_json_endpoint(request: ExportDataRequest, background_tasks: BackgroundTasks, api: DatabaseAPI = Depends(get_db_api)):
+    try:
+        temp_file_path = await _create_temp_export(api, request.source, 'json', request.options, suffix=".json")
+        background_tasks.add_task(_cleanup_temp_file, temp_file_path)
+        filename = f"export_{Path(request.source).stem if '.' not in request.source else 'query'}.json"
+        return FileResponse(temp_file_path, media_type='application/json', filename=filename)
+    except (QueryError, ValueError) as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except DatabaseAPIError as e:
+        raise HTTPException(status_code=500, detail=str(e))
+    except Exception as e:
+         raise HTTPException(status_code=500, detail=f"Unexpected error during JSON export: {e}")
+@app.post("/export/data/jsonl", response_class=FileResponse, tags=["Export / Download"])
+async def export_jsonl_endpoint(request: ExportDataRequest, background_tasks: BackgroundTasks, api: DatabaseAPI = Depends(get_db_api)):
+    try:
+        temp_file_path = await _create_temp_export(api, request.source, 'jsonl', request.options, suffix=".jsonl")
+        background_tasks.add_task(_cleanup_temp_file, temp_file_path)
+        filename = f"export_{Path(request.source).stem if '.' not in request.source else 'query'}.jsonl"
+        return FileResponse(temp_file_path, media_type='application/jsonl', filename=filename)
+    except (QueryError, ValueError) as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except DatabaseAPIError as e:
+        raise HTTPException(status_code=500, detail=str(e))
+    except Exception as e:
+         raise HTTPException(status_code=500, detail=f"Unexpected error during JSONL export: {e}")
+@app.post("/export/database", response_class=FileResponse, tags=["Export / Download"])
+async def export_database_endpoint(background_tasks: BackgroundTasks, api: DatabaseAPI = Depends(get_db_api)):
+    export_target_dir = Path(tempfile.mkdtemp(dir=TEMP_EXPORT_DIR))
+    fd, zip_path_str = tempfile.mkstemp(suffix=".zip", dir=TEMP_EXPORT_DIR)
+    os.close(fd)
+    zip_file_path = Path(zip_path_str)
+    try:
+        print(f"Exporting database to temporary directory: {export_target_dir}")
+        api.export_database(export_target_dir)
+        print(f"Creating zip archive at: {zip_file_path}")
+        shutil.make_archive(str(zip_file_path.with_suffix('')), 'zip', str(export_target_dir))
+        print(f"Zip archive created: {zip_file_path}")
+        background_tasks.add_task(shutil.rmtree, export_target_dir, ignore_errors=True)
+        background_tasks.add_task(_cleanup_temp_file, zip_file_path)
+        db_name = Path(api._db_path).stem if api._db_path != ':memory:' else 'in_memory_db'
+        return FileResponse(zip_file_path, media_type='application/zip', filename=f"{db_name}_export.zip")
+    except (QueryError, ValueError, OSError, DatabaseAPIError) as e:
+        print(f"Error during database export: {e}")
+        shutil.rmtree(export_target_dir, ignore_errors=True)
+        _cleanup_temp_file(zip_file_path)
+        if isinstance(e, DatabaseAPIError):
+            raise HTTPException(status_code=500, detail=str(e))
+        else:
+            raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        print(f"Unexpected error during database export: {e}")
+        shutil.rmtree(export_target_dir, ignore_errors=True)
+        _cleanup_temp_file(zip_file_path)
+        raise HTTPException(status_code=500, detail=f"Unexpected error during database export: {e}")
+# --- Extension Management Endpoints ---
+@app.post("/extensions/install", response_model=StatusResponse, tags=["Extensions"])
+async def install_extension_endpoint(request: InstallRequest, api: DatabaseAPI = Depends(get_db_api)):
+    try:
+        api.install_extension(request.extension_name, request.force_install)
+        return {"status": "success", "message": f"Extension '{request.extension_name}' installed."}
+    except DatabaseAPIError as e:
+        raise HTTPException(status_code=500, detail=str(e))
+    # Catch specific DuckDB errors that should be client errors (400)
+    except (duckdb.IOException, duckdb.CatalogException, duckdb.InvalidInputException) as e:
+        raise HTTPException(status_code=400, detail=f"DuckDB Error during install: {e}")
+    except duckdb.Error as e: # Catch other potential DuckDB errors as 500
+        raise HTTPException(status_code=500, detail=f"Unexpected DuckDB Error during install: {e}")
+@app.post("/extensions/load", response_model=StatusResponse, tags=["Extensions"])
+async def load_extension_endpoint(request: LoadRequest, api: DatabaseAPI = Depends(get_db_api)):
+    """Loads an installed DuckDB extension."""
+    try:
+        api.load_extension(request.extension_name)
+        return {"status": "success", "message": f"Extension '{request.extension_name}' loaded."}
+    # --- MODIFIED Exception Handling ---
+    except QueryError as e: # If api.load_extension raised QueryError (e.g., IO/Catalog)
+        raise HTTPException(status_code=400, detail=str(e))
+    except DatabaseAPIError as e: # For other API-level issues
+        raise HTTPException(status_code=500, detail=str(e))
+    # Catch specific DuckDB errors that should be client errors (400)
+    except (duckdb.IOException, duckdb.CatalogException) as e:
+        raise HTTPException(status_code=400, detail=f"DuckDB Error during load: {e}")
+    except duckdb.Error as e: # Catch other potential DuckDB errors as 500
+        raise HTTPException(status_code=500, detail=f"Unexpected DuckDB Error during load: {e}")
+    # --- END MODIFICATION ---
+# --- Health Check --- (Keep as before)
+@app.get("/health", response_model=StatusResponse, tags=["Health"])
+async def health_check():
+    """Basic health check."""
+    try:
+        _ = get_db_api()
+        return {"status": "ok", "message": None} # Explicitly return None for message
+    except HTTPException as e:
+        raise e
+    except Exception as e:
+         raise HTTPException(status_code=500, detail=f"Health check failed unexpectedly: {e}")
+# --- Run the app --- (Keep as before)
+if __name__ == "__main__":
+    import uvicorn
+    print(f"Starting DuckDB API server...")
+    print(f"Database file configured at: {DUCKDB_API_DB_PATH}")
+    print(f"Read-only mode: {DUCKDB_API_READ_ONLY}")
+    print(f"Temporary export directory: {TEMP_EXPORT_DIR}")
+    uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)

requirements.txt CHANGED Viewed

@@ -1,5 +1,6 @@
 fastapi
 uvicorn[standard]
-duckdb>=1.0.0 # Ensure version compatibility with UI extension
 pydantic
-python-multipart

 fastapi
 uvicorn[standard]
+duckdb>=1.2.1
 pydantic
+python-multipart
+httpx

test_api.py ADDED Viewed

	@@ -0,0 +1,246 @@

+import pytest
+import os
+import shutil
+import tempfile
+import zipfile
+import json
+from pathlib import Path
+from typing import List, Dict, Any
+from unittest.mock import patch
+pd = pytest.importorskip("pandas")
+pa = pytest.importorskip("pyarrow")
+pa_ipc = pytest.importorskip("pyarrow.ipc")
+from fastapi.testclient import TestClient
+import main # Import main to reload and access config
+# --- Test Fixtures --- (Keep client fixture as before)
+@pytest.fixture(scope="module")
+def client():
+    with patch.dict(os.environ, {"DUCKDB_API_DB_PATH": ":memory:"}):
+        import importlib
+        importlib.reload(main)
+        main.TEMP_EXPORT_DIR.mkdir(exist_ok=True)
+        print(f"TestClient using temp export dir: {main.TEMP_EXPORT_DIR}")
+        with TestClient(main.app) as c:
+            yield c
+        print(f"Cleaning up test export dir: {main.TEMP_EXPORT_DIR}")
+        for item in main.TEMP_EXPORT_DIR.iterdir():
+            try:
+                if item.is_file():
+                    os.remove(item)
+                elif item.is_dir():
+                    shutil.rmtree(item)
+            except Exception as e:
+                print(f"Error cleaning up {item}: {e}")
+# --- Test Classes ---
+class TestHealth: # (Keep as before)
+    def test_health_check(self, client: TestClient):
+        response = client.get("/health")
+        assert response.status_code == 200
+        assert response.json() == {"status": "ok", "message": None}
+class TestExecution: # (Keep as before)
+    def test_execute_create(self, client: TestClient):
+        response = client.post("/execute", json={"sql": "CREATE TABLE test_table(id INTEGER, name VARCHAR);"})
+        assert response.status_code == 200
+        assert response.json() == {"status": "success", "message": None}
+        response_fail = client.post("/execute", json={"sql": "CREATE TABLE test_table(id INTEGER);"})
+        assert response_fail.status_code == 400
+    def test_execute_insert(self, client: TestClient):
+        client.post("/execute", json={"sql": "CREATE OR REPLACE TABLE test_table(id INTEGER, name VARCHAR);"})
+        response = client.post("/execute", json={"sql": "INSERT INTO test_table VALUES (1, 'Alice')"})
+        assert response.status_code == 200
+        query_response = client.post("/query/fetchall", json={"sql": "SELECT COUNT(*) FROM test_table"})
+        assert query_response.status_code == 200
+        assert query_response.json() == [[1]]
+    def test_execute_insert_params(self, client: TestClient):
+        client.post("/execute", json={"sql": "CREATE OR REPLACE TABLE test_table(id INTEGER, name VARCHAR);"})
+        response = client.post("/execute", json={"sql": "INSERT INTO test_table VALUES (?, ?)", "parameters": [2, "Bob"]})
+        assert response.status_code == 200
+        query_response = client.post("/query/fetchall", json={"sql": "SELECT * FROM test_table WHERE id = 2"})
+        assert query_response.status_code == 200
+        assert query_response.json() == [[2, "Bob"]]
+    def test_execute_invalid_sql(self, client: TestClient):
+        response = client.post("/execute", json={"sql": "INVALID SQL STATEMENT"})
+        assert response.status_code == 400
+        assert "Parser Error" in response.json()["detail"]
+class TestQuerying: # (Keep as before)
+    @pytest.fixture(scope="class", autouse=True)
+    def setup_data(self, client: TestClient):
+        client.post("/execute", json={"sql": "CREATE OR REPLACE TABLE query_test(id INTEGER, val VARCHAR)"})
+        client.post("/execute", json={"sql": "INSERT INTO query_test VALUES (1, 'one'), (2, 'two'), (3, 'three')"})
+    def test_query_fetchall(self, client: TestClient):
+        response = client.post("/query/fetchall", json={"sql": "SELECT * FROM query_test ORDER BY id"})
+        assert response.status_code == 200
+        assert response.json() == [[1, 'one'], [2, 'two'], [3, 'three']]
+    def test_query_fetchall_params(self, client: TestClient):
+        response = client.post("/query/fetchall", json={"sql": "SELECT * FROM query_test WHERE id > ? ORDER BY id", "parameters": [1]})
+        assert response.status_code == 200
+        assert response.json() == [[2, 'two'], [3, 'three']]
+    def test_query_fetchall_empty(self, client: TestClient):
+        response = client.post("/query/fetchall", json={"sql": "SELECT * FROM query_test WHERE id > 100"})
+        assert response.status_code == 200
+        assert response.json() == []
+    def test_query_dataframe(self, client: TestClient):
+        response = client.post("/query/dataframe", json={"sql": "SELECT * FROM query_test ORDER BY id"})
+        assert response.status_code == 200
+        data = response.json()
+        assert data["columns"] == ["id", "val"]
+        assert data["records"] == [
+            {"id": 1, "val": "one"},
+            {"id": 2, "val": "two"},
+            {"id": 3, "val": "three"}
+        ]
+    def test_query_dataframe_invalid_sql(self, client: TestClient):
+        response = client.post("/query/dataframe", json={"sql": "SELECT non_existent FROM query_test"})
+        assert response.status_code == 400
+        assert "Binder Error" in response.json()["detail"]
+class TestStreaming: # (Keep as before)
+    @pytest.fixture(scope="class", autouse=True)
+    def setup_data(self, client: TestClient):
+        client.post("/execute", json={"sql": "CREATE OR REPLACE TABLE stream_test AS SELECT range AS id, range % 5 AS category FROM range(10)"})
+    def test_stream_arrow(self, client: TestClient):
+        response = client.post("/query/stream/arrow", json={"sql": "SELECT * FROM stream_test"})
+        assert response.status_code == 200
+        assert response.headers["content-type"] == "application/vnd.apache.arrow.stream"
+        if not response.content:
+             pytest.fail("Arrow stream response content is empty")
+        try:
+            reader = pa_ipc.open_stream(response.content)
+            table = reader.read_all()
+        except pa.ArrowInvalid as e:
+            pytest.fail(f"Failed to read Arrow stream: {e}")
+        assert table.num_rows == 10
+        assert table.column_names == ["id", "category"]
+        assert table.column('id').to_pylist() == list(range(10))
+    def test_stream_arrow_empty(self, client: TestClient):
+        response = client.post("/query/stream/arrow", json={"sql": "SELECT * FROM stream_test WHERE id < 0"})
+        assert response.status_code == 200
+        assert response.headers["content-type"] == "application/vnd.apache.arrow.stream"
+        try:
+             reader = pa_ipc.open_stream(response.content)
+             table = reader.read_all()
+             assert table.num_rows == 0
+        except pa.ArrowInvalid as e:
+             print(f"Received ArrowInvalid for empty stream, which is acceptable: {e}")
+             assert response.content == b''
+    def test_stream_jsonl(self, client: TestClient):
+        response = client.post("/query/stream/jsonl", json={"sql": "SELECT * FROM stream_test ORDER BY id"})
+        assert response.status_code == 200
+        assert response.headers["content-type"] == "application/jsonl"
+        lines = response.text.strip().split('\n')
+        records = [json.loads(line) for line in lines if line]
+        assert len(records) == 10
+        assert records[0] == {"id": 0, "category": 0}
+        assert records[9] == {"id": 9, "category": 4}
+    def test_stream_jsonl_empty(self, client: TestClient):
+        response = client.post("/query/stream/jsonl", json={"sql": "SELECT * FROM stream_test WHERE id < 0"})
+        assert response.status_code == 200
+        assert response.headers["content-type"] == "application/jsonl"
+        assert response.text.strip() == ""
+class TestExportDownload: # (Keep setup_data as before)
+    @pytest.fixture(scope="class", autouse=True)
+    def setup_data(self, client: TestClient):
+        client.post("/execute", json={"sql": "CREATE OR REPLACE TABLE export_table(id INTEGER, name VARCHAR, price DECIMAL(5,2))"})
+        client.post("/execute", json={"sql": "INSERT INTO export_table VALUES (1, 'Apple', 0.50), (2, 'Banana', 0.30), (3, 'Orange', 0.75)"})
+    @pytest.mark.parametrize(
+            "endpoint_suffix, expected_content_type, expected_filename_ext, validation_fn",
+            [
+                ("csv", "text/csv", ".csv", lambda c: b"id,name,price\n1,Apple,0.50\n" in c),
+                ("parquet", "application/vnd.apache.parquet", ".parquet", lambda c: c.startswith(b"PAR1")),
+                # --- MODIFIED JSON/JSONL Lambdas ---
+                ("json", "application/json", ".json", lambda c: c.strip().startswith(b'[') and c.strip().endswith(b']')),
+                ("jsonl", "application/jsonl", ".jsonl", lambda c: b'"id":1' in c and b'"name":"Apple"' in c and b'\n' in c),
+                # --- END MODIFICATION ---
+            ]
+    )
+    def test_export_data(self, client: TestClient, endpoint_suffix, expected_content_type, expected_filename_ext, validation_fn, tmp_path):
+        endpoint = f"/export/data/{endpoint_suffix}"
+        payload = {"source": "export_table"}
+        if endpoint_suffix == 'csv':
+             payload['options'] = {'HEADER': True}
+        response = client.post(endpoint, json=payload)
+        assert response.status_code == 200, f"Request to {endpoint} failed: {response.text}"
+        assert response.headers["content-type"].startswith(expected_content_type)
+        assert "content-disposition" in response.headers
+        assert f'filename="export_export_table{expected_filename_ext}"' in response.headers["content-disposition"]
+        downloaded_path = tmp_path / f"downloaded{expected_filename_ext}"
+        with open(downloaded_path, "wb") as f:
+            f.write(response.content)
+        assert downloaded_path.exists()
+        assert validation_fn(response.content), f"Validation failed for {endpoint_suffix}"
+        # Test with a query source
+        payload = {"source": "SELECT id, name FROM export_table WHERE price > 0.40 ORDER BY id"}
+        response = client.post(endpoint, json=payload)
+        assert response.status_code == 200
+        assert f'filename="export_query{expected_filename_ext}"' in response.headers["content-disposition"]
+        assert len(response.content) > 0
+    # --- Keep test_export_database as before ---
+    def test_export_database(self, client: TestClient, tmp_path):
+        client.post("/execute", json={"sql": "CREATE TABLE IF NOT EXISTS another_table(x int)"})
+        response = client.post("/export/database")
+        assert response.status_code == 200
+        assert response.headers["content-type"] == "application/zip"
+        assert "content-disposition" in response.headers
+        assert response.headers["content-disposition"].startswith("attachment; filename=")
+        assert 'filename="in_memory_db_export.zip"' in response.headers["content-disposition"]
+        zip_path = tmp_path / "db_export.zip"
+        with open(zip_path, "wb") as f:
+            f.write(response.content)
+        assert zip_path.exists()
+        with zipfile.ZipFile(zip_path, 'r') as z:
+            print(f"Zip contents: {z.namelist()}")
+            assert "schema.sql" in z.namelist()
+            assert "load.sql" in z.namelist()
+            assert any(name.startswith("export_table") for name in z.namelist())
+            assert any(name.startswith("another_table") for name in z.namelist())
+class TestExtensions: # (Keep as before)
+    def test_install_extension_fail(self, client: TestClient):
+        response = client.post("/extensions/install", json={"extension_name": "nonexistent_dummy_ext"})
+        assert response.status_code >= 400
+        assert "Error during install" in response.json()["detail"] or "Failed to download" in response.json()["detail"]
+    def test_load_extension_fail(self, client: TestClient):
+        response = client.post("/extensions/load", json={"extension_name": "nonexistent_dummy_ext"})
+        assert response.status_code == 400
+        # --- MODIFIED Assertion ---
+        assert "Error loading extension" in response.json()["detail"]
+        # --- END MODIFICATION ---
+        assert "not found" in response.json()["detail"].lower()
+    @pytest.mark.skip(reason="Requires httpfs extension to be available for install/load")
+    def test_install_and_load_httpfs(self, client: TestClient):
+        install_response = client.post("/extensions/install", json={"extension_name": "httpfs"})
+        assert install_response.status_code == 200
+        assert install_response.json()["status"] == "success"
+        load_response = client.post("/extensions/load", json={"extension_name": "httpfs"})
+        assert load_response.status_code == 200
+        assert load_response.json()["status"] == "success"