Spaces:

aproli90
/

query-sql

Running

+import streamlit as st
+import os
+import pandas as pd
+from typing import Literal, TypedDict
+from sqlalchemy import create_engine, inspect
+import json
+from transformers import AutoTokenizer
+from utils import pprint
+import time
+import re
+from openai import OpenAI
+import anthropic
+from clients.openRouter import OpenRouter
+# Load environment variables
+from dotenv import load_dotenv
+load_dotenv()
+ModelType = Literal["GPT_4o", "GPT_o1", "CLAUDE", "LLAMA", "DEEPSEEK", "DEEPSEEK_R1", "DEEPSEEK_R1_DISTILL"]
+ModelConfig = TypedDict("ModelConfig", {
+    "client": OpenAI | anthropic.Anthropic,
+    "model": str,
+    "max_context": int,
+    "tokenizer": AutoTokenizer
+})
+MODEL_CONFIG: dict[ModelType, ModelConfig] = {
+    "CLAUDE": {
+        "client": anthropic.Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY")),
+        "model": "claude-3-5-haiku-20241022",
+        # "model": "claude-3-5-sonnet-20241022",
+        # "model": "claude-3-5-sonnet-20240620",
+        "max_context": 40000,
+        "tokenizer": AutoTokenizer.from_pretrained("Xenova/claude-tokenizer")
+    },
+    "GPT_4o": {
+        "client": OpenAI(api_key=os.environ.get("OPENAI_API_KEY")),
+        "model": "gpt-4o",
+        "max_context": 15000,
+        "tokenizer": AutoTokenizer.from_pretrained("Xenova/gpt-4o")
+    },
+    # "GPT_o1": {
+    #     "client": OpenAI(api_key=os.environ.get("OPENAI_API_KEY")),
+    #     "model": "o1-preview",
+    #     "max_context": 15000,
+    #     "tokenizer": AutoTokenizer.from_pretrained("Xenova/gpt-4o")
+    # },
+    "DEEPSEEK": {
+        "client": OpenRouter(
+            api_key=os.environ.get("OPENROUTER_API_KEY"),
+        ),
+        "model": "deepseek/deepseek-chat",
+        "max_context": 30000,
+        "tokenizer": AutoTokenizer.from_pretrained("Xenova/gpt-4o")
+    },
+    "DEEPSEEK_R1": {
+        "client": OpenRouter(
+            api_key=os.environ.get("OPENROUTER_API_KEY"),
+        ),
+        "model": "deepseek/deepseek-r1",
+        "max_context": 30000,
+        "tokenizer": AutoTokenizer.from_pretrained("Xenova/gpt-4o")
+    },
+}
+def get_model_type():
+    """
+    Get the model type from Streamlit sidebar with model names
+    """
+    # Get the available model types from the MODEL_CONFIG keys
+    available_models = list(MODEL_CONFIG.keys())
+    # Create a list of display labels with just the model names
+    model_display_labels = [
+        MODEL_CONFIG[model_type]['model']
+        for model_type in available_models
+    ]
+    # Add a sidebar selection for model name
+    selected_model_name = st.sidebar.selectbox(
+        "Select AI Model",
+        model_display_labels,
+        index=0
+    )
+    # Find the corresponding model type for the selected model name
+    selected_model_type = next(
+        model_type for model_type in available_models
+        if MODEL_CONFIG[model_type]['model'] == selected_model_name
+    )
+    return selected_model_type
+# In the main application flow, replace the previous modelType assignment
+modelType = get_model_type()
+client = MODEL_CONFIG[modelType]["client"]
+MODEL = MODEL_CONFIG[modelType]["model"]
+TOOLS_MODEL = MODEL_CONFIG[modelType].get("tools_model") or MODEL
+MAX_CONTEXT = MODEL_CONFIG[modelType]["max_context"]
+tokenizer = MODEL_CONFIG[modelType]["tokenizer"]
+isClaudeModel = modelType == "CLAUDE"
+isDeepSeekModel = modelType.startswith("DEEPSEEK")
+def __countTokens(text):
+    text = str(text)
+    tokens = tokenizer.encode(text, add_special_tokens=False)
+    return len(tokens)
+# Initialize session state variables
+if "ipAddress" not in st.session_state:
+    st.session_state.ipAddress = st.context.headers.get("x-forwarded-for")
+if "connection_string" not in st.session_state:
+    st.session_state.connection_string = None
+if "selected_table" not in st.session_state:
+    st.session_state.selected_table = None
+if "table_schema" not in st.session_state:
+    st.session_state.table_schema = None
+if "sample_data" not in st.session_state:
+    st.session_state.sample_data = None
+if "engine" not in st.session_state:
+    st.session_state.engine = None
+def connect_to_db(connection_string):
+    try:
+        engine = create_engine(connection_string)
+        # Test the connection
+        with engine.connect():
+            pass
+        st.session_state.engine = engine
+        return True
+    except Exception as e:
+        st.error(f"Failed to connect to database: {str(e)}")
+        return False
+def get_table_schema(table_name):
+    if not st.session_state.engine:
+        return None
+    inspector = inspect(st.session_state.engine)
+    columns = inspector.get_columns(table_name)
+    return {col['name']: str(col['type']) for col in columns}
+def get_sample_data(table_name):
+    if not st.session_state.engine:
+        return None
+    query = f"SELECT * FROM {table_name} ORDER BY 1 DESC LIMIT 3"
+    try:
+        with st.session_state.engine.connect() as conn:
+            df = pd.read_sql(query, conn)
+            return df
+    except Exception as e:
+        st.error(f"Error fetching sample data: {str(e)}")
+        return None
+def clean_sql_response(response: str) -> str:
+    """Extract clean SQL query from a potentially formatted response."""
+    # If response contains SQL code block, extract it
+    sql_block_match = re.search(r'```sql\n(.*?)\n```', response, re.DOTALL)
+    if sql_block_match:
+        return sql_block_match.group(1).strip()
+    return response.strip()
+def execute_query(query):
+    if not st.session_state.engine:
+        return None
+    try:
+        start_time = time.time()
+        with st.spinner("Executing SQL query..."):
+            with st.session_state.engine.connect() as conn:
+                df = pd.read_sql(query, conn)
+            execution_time = time.time() - start_time
+            pprint(f"[Query Execution] Latency: {execution_time:.2f}s")
+        return df
+    except Exception as e:
+        st.error(f"Error executing query: {str(e)}")
+        return None
+def generate_sql_query(user_query):
+    prompt = f"""You are a SQL expert. Generate a valid PostgreSQL query based on the following context and user query.
+Table Name: {st.session_state.selected_table}
+Table Schema:
+{json.dumps(st.session_state.table_schema, indent=2)}
+Sample Data:
+{st.session_state.sample_data.to_markdown(index=False)}
+Important:
+1. Only return the SQL query, nothing else
+2. The query should be valid PostgreSQL syntax
+3. Do not include any explanations or comments
+4. Make sure to handle NULL values appropriately
+5. Use the table name '{st.session_state.selected_table}' in your query
+User Query: {user_query}
+"""
+    prompt_tokens = __countTokens(prompt)
+    pprint(f"[{MODEL}] Prompt tokens for SQL generation: {prompt_tokens}")
+    # Debug prompt in a Streamlit expander for better organization
+    with st.expander("Debug: Prompt Generation"):
+        st.write(f"\nUser Query: {user_query}")
+        st.write("\nFull Prompt:")
+        st.code(prompt, language="text")
+    start_time = time.time()
+    with st.spinner(f"Generating SQL query using {MODEL}..."):
+        if isClaudeModel:
+            response = client.messages.create(
+                model=MODEL,
+                max_tokens=1000,
+                messages=[
+                    {"role": "user", "content": prompt},
+                ]
+            )
+            raw_response = response.content[0].text
+        else:
+            response = client.chat.completions.create(
+                model=MODEL,
+                messages=[
+                    {"role": "user", "content": prompt},
+                ]
+            )
+            raw_response = response.choices[0].message.content
+        generation_time = time.time() - start_time
+        pprint(f"[{MODEL}] Query Generation Latency: {generation_time:.2f}s")
+    return clean_sql_response(raw_response)
+# UI Components
+st.title("SQL Query Assistant")
+# Database Connection Section
+st.header("1. Database Connection")
+connection_string = st.text_input(
+    "Enter PostgreSQL Connection String",
+    value=st.session_state.connection_string if st.session_state.connection_string else "",
+    type="password"
+)
+if connection_string and connection_string != st.session_state.connection_string:
+    if connect_to_db(connection_string):
+        st.session_state.connection_string = connection_string
+        st.success("Successfully connected to database!")
+# Table Selection Section
+if st.session_state.connection_string:
+    st.header("2. Table Selection")
+    inspector = inspect(st.session_state.engine)
+    tables = inspector.get_table_names()
+    # Set default index to 'lsq_leads' if present, otherwise 0
+    default_index = tables.index('lsq_leads') if 'lsq_leads' in tables else 0
+    selected_table = st.selectbox("Select a table", tables, index=default_index)
+    # Create containers for schema and data
+    schema_container = st.container()
+    data_container = st.container()
+    # Always load table data if we have a selected table
+    if selected_table:
+        # Update session state
+        if selected_table != st.session_state.selected_table:
+            st.session_state.selected_table = selected_table
+        # Always fetch schema and sample data
+        st.session_state.table_schema = get_table_schema(selected_table)
+        st.session_state.sample_data = get_sample_data(selected_table)
+        # Always display schema and sample data if available
+        with schema_container:
+            if st.session_state.table_schema:
+                st.subheader("Table Schema")
+                # Force immediate rendering with an empty element
+                st.empty()
+                st.json(st.session_state.table_schema)
+        with data_container:
+            if st.session_state.sample_data is not None:
+                st.subheader("Sample Data (Last 3 rows)")
+                # Force immediate rendering with an empty element
+                st.empty()
+                st.dataframe(
+                    st.session_state.sample_data,
+                    use_container_width=True,
+                    hide_index=True
+                )
+# Query Input Section
+if st.session_state.selected_table:
+    st.header("3. Query Input")
+    user_query = st.text_area("Enter your query in plain English")
+    if st.button("Generate and Execute Query"):
+        if user_query:
+            # Generate SQL query
+            sql_query = generate_sql_query(user_query)
+            # Display the generated query
+            st.subheader("Generated SQL Query")
+            st.code(sql_query, language="sql")
+            # Execute the query
+            results = execute_query(sql_query)
+            if results is not None:
+                st.subheader("Query Results")
+                st.dataframe(results)

clients/openRouter.py ADDED Viewed

	@@ -0,0 +1,172 @@

+import requests
+import json
+from typing import List, Dict, Optional
+class ResponseWrapper:
+    def __init__(self, response_data):
+        """
+        Wrap the response data to support both dict-like and attribute-like access
+        :param response_data: The raw response dictionary from OpenRouter
+        """
+        self._data = response_data
+    def __getattr__(self, name):
+        """
+        Allow attribute-style access to the response data
+        :param name: Attribute name to access
+        :return: Corresponding value from the response data
+        """
+        if name in self._data:
+            value = self._data[name]
+            return self._wrap(value)
+        raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")
+    def __getitem__(self, key):
+        """
+        Allow dictionary-style access to the response data
+        :param key: Key to access
+        :return: Corresponding value from the response data
+        """
+        value = self._data[key]
+        return self._wrap(value)
+    def _wrap(self, value):
+        """
+        Recursively wrap dictionaries and lists to support attribute access
+        :param value: Value to wrap
+        :return: Wrapped value
+        """
+        if isinstance(value, dict):
+            return ResponseWrapper(value)
+        elif isinstance(value, list):
+            return [self._wrap(item) for item in value]
+        return value
+    def __iter__(self):
+        """
+        Allow iteration over the wrapped dictionary
+        """
+        return iter(self._data)
+    def get(self, key, default=None):
+        """
+        Provide a get method similar to dictionary
+        """
+        return self._wrap(self._data.get(key, default))
+    def keys(self):
+        """
+        Return dictionary keys
+        """
+        return self._data.keys()
+    def items(self):
+        """
+        Return dictionary items
+        """
+        return [(k, self._wrap(v)) for k, v in self._data.items()]
+    def __str__(self):
+        """
+        Return a JSON string representation of the response data
+        :return: JSON-formatted string of the response
+        """
+        return json.dumps(self._data, indent=2)
+    def __repr__(self):
+        """
+        Return a string representation for debugging
+        :return: Representation of the ResponseWrapper
+        """
+        return f"ResponseWrapper({json.dumps(self._data, indent=2)})"
+class OpenRouter:
+    def __init__(self, api_key: str, base_url: str = "https://openrouter.ai/api/v1"):
+        """
+        Initialize OpenRouter client
+        :param api_key: API key for OpenRouter
+        :param base_url: Base URL for OpenRouter API (default is standard endpoint)
+        """
+        self.api_key = api_key
+        self.base_url = base_url
+        self.chat = self.ChatNamespace(self)
+    class ChatNamespace:
+        def __init__(self, client):
+            self._client = client
+            self.completions = self.CompletionsNamespace(client)
+        class CompletionsNamespace:
+            def __init__(self, client):
+                self._client = client
+            def create(
+                self,
+                model: str,
+                messages: List[Dict[str, str]],
+                temperature: float = 0.7,
+                max_tokens: Optional[int] = None,
+                **kwargs
+            ):
+                """
+                Create a chat completion request
+                :param model: Model to use
+                :param messages: List of message dictionaries
+                :param temperature: Sampling temperature
+                :param max_tokens: Maximum number of tokens to generate
+                :return: Wrapped response object
+                """
+                headers = {
+                    "Authorization": f"Bearer {self._client.api_key}",
+                    "Content-Type": "application/json",
+                    "HTTP-Referer": kwargs.get("http_referer", "https://your-app-domain.com"),
+                    "X-Title": kwargs.get("x_title", "AI Ad Generator")
+                }
+                payload = {
+                    "model": model,
+                    "messages": messages,
+                    "temperature": temperature,
+                }
+                if model.startswith("deepseek"):
+                    payload["provider"] = {
+                        "order": [
+                            "DeepSeek",
+                            "DeepInfra",
+                            "Fireworks",
+                        ],
+                        "allow_fallbacks": False
+                    }
+                if max_tokens is not None:
+                    payload["max_tokens"] = max_tokens
+                # Add any additional parameters
+                payload.update({k: v for k, v in kwargs.items()
+                                if k not in ["http_referer", "x_title"]})
+                try:
+                    response = requests.post(
+                        f"{self._client.base_url}/chat/completions",
+                        headers=headers,
+                        data=json.dumps(payload)
+                    )
+                    response.raise_for_status()
+                    # Wrap the response data
+                    return ResponseWrapper(response.json())
+                except requests.RequestException as e:
+                    raise Exception(f"OpenRouter API request failed: {e}")

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+# streamlit
+# pandas
+python-dotenv
+# groq
+openai
+transformers
+# gradio_client
+anthropic
+sqlalchemy
+psycopg2-binary

utils.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import datetime as DT
+import pytz
+import streamlit as st
+FONTS = [
+    # "Poppins:ital,wght@0,100;0,200;0,300;0,400;0,500;0,600;0,700;0,800;0,900;1,100;1,200;1,300;1,400;1,500;1,600;1,700;1,800;1,900",
+    # "Roboto:ital,wght@0,100;0,300;0,400;0,500;0,700;0,900;1,100;1,300;1,400;1,500;1,700;1,900",
+    # "Raleway:ital,wght@0,100..900;1,100..900",
+    # "Lato:ital,wght@0,100;0,300;0,400;0,700;0,900;1,100;1,300;1,400;1,700;1,900",
+    # "Nunito:ital,wght@0,200..1000;1,200..1000",
+    # "Quicksand:[email protected]",
+    "Montserrat:ital,wght@0,100..900;1,100..900",
+    # "Edu+AU+VIC+WA+NT+Dots:[email protected]",
+    "Whisper",
+    # "Merienda:[email protected]",
+    "Playwrite+DE+Grund:[email protected]",
+    # "Roboto+Slab:[email protected]",
+    # "Open+Sans:ital,wght@0,300..800;1,300..800",
+    # "Nunito+Sans:ital,opsz,wght@0,6..12,200..1000;1,6..12,200..1000",
+    # "Ubuntu:ital,wght@0,300;0,400;0,500;0,700;1,300;1,400;1,500;1,700",
+]
+def __nowInIST() -> DT.datetime:
+    return DT.datetime.now(pytz.timezone("Asia/Kolkata"))
+def pprint(log: str):
+    now = __nowInIST()
+    now = now.strftime("%Y-%m-%d %H:%M:%S")
+    print(f"[{now}] [{st.session_state.ipAddress}] {log}")
+def getFontsUrl():
+    baseLink = "https://fonts.googleapis.com/css2"
+    params = "&".join([f"family={font}" for font in FONTS])
+    params = f"{params}&display=swap"
+    fontsUrl = f"{baseLink}?{params}"
+    # pprint(f"{fontsUrl=}")
+    return fontsUrl