Spaces:

claudion-ai
/

Frappe

Sleeping

File size: 1,723 Bytes

f4f80f2
 
748a3d0
b8c4f28
748a3d0
b8c4f28
 
f4f80f2
 
 
 
 
b8c4f28
 
 
 
 
7e713f6
b8c4f28
 
 
 
 
 
 
 
 
 
 
aaa6d98
7e713f6
 
 
 
b8c4f28
1eb39b3
b8c4f28
7e713f6
aaa6d98
7e713f6

import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from datasets import load_dataset
from difflib import get_close_matches

# Load the WikiSQL dataset
wikisql_dataset = load_dataset("wikisql", split='train[:100]')  # Load a subset of the dataset

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-wikiSQL")
model = AutoModelForSeq2SeqLM.from_pretrained("mrm8488/t5-base-finetuned-wikiSQL")

def find_closest_match(query, dataset):
    questions = [item['question'] for item in dataset]
    matches = get_close_matches(query, questions, n=1)
    return matches[0] if matches else None

def generate_sql_from_user_input(query):
    # Find the closest match in the dataset
    matched_query = find_closest_match(query, wikisql_dataset)
    if not matched_query:
        return "No close match found in the dataset.", ""

    # Find the corresponding SQL query in the dataset
    for item in wikisql_dataset:
        if item['question'] == matched_query:
            return matched_query, item['sql']['human_readable']

    return "Match found, but corresponding SQL query not found in dataset.", ""

# Create a Gradio interface
interface = gr.Interface(
    fn=generate_sql_from_user_input,
    inputs=gr.Textbox(label="Enter your natural language query"),
    outputs=[gr.Textbox(label="Matched Query from Dataset"), gr.Textbox(label="Corresponding SQL Query from Dataset")],
    title="NL to SQL with T5 using WikiSQL Dataset",
    description="This model finds the closest match in the WikiSQL dataset for your query and returns the corresponding SQL query from the dataset."
)

# Launch the app
if __name__ == "__main__":
    interface.launch()