NLSQL

Runtime error

HusnaManakkot commited on Feb 26, 2024

Commit

72e6803

verified ·

1 Parent(s): a588039

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,37 +1,37 @@
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
-# Load the tokenizer and model
-tokenizer = AutoTokenizer.from_pretrained("Salesforce/codet5-base-multi-summarization-sql-en")
-model = AutoModelForSeq2SeqLM.from_pretrained("Salesforce/codet5-base-multi-summarization-sql-en")
-def generate_sql(natural_language_query):
-    # Tokenize the input query
-    input_ids = tokenizer(natural_language_query, return_tensors="pt").input_ids
-    # Generate the SQL query
-    output_ids = model.generate(input_ids, max_length=512)[0]
-    # Decode the generated SQL query
-    sql_query = tokenizer.decode(output_ids, skip_special_tokens=True)
     return sql_query
-# Example questions for the interface
-example_questions = [
-    "What is the average salary of employees?",
-    "List the names of employees who work in the IT department.",
-    "Count the number of employees who joined after 2015."
-]
-# Create the Gradio interface
 interface = gr.Interface(
     fn=generate_sql,
     inputs=gr.Textbox(lines=2, placeholder="Enter your natural language query here..."),
     outputs="text",
     examples=example_questions,
-    title="NL to SQL with CodeT5",
     description="This model converts natural language queries into SQL using the WikiSQL dataset. Try one of the example questions or enter your own!"
 )
-# Launch the interface
-interface.launch()

 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
+from datasets import load_dataset
+# Load tokenizer and model
+tokenizer = AutoTokenizer.from_pretrained("hrshtsharma2012/NL2SQL-Picard-final")
+model = AutoModelForSeq2SeqLM.from_pretrained("hrshtsharma2012/NL2SQL-Picard-final")
+# Initialize the pipeline
+nl2sql_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
+# Load a part of the WikiSQL dataset
+wikisql_dataset = load_dataset("wikisql", split='train[:5]')
+def generate_sql(query):
+    results = nl2sql_pipeline(query)
+    sql_query = results[0]['generated_text']
+    # Post-process the output to ensure it's a valid SQL query
+    sql_query = sql_query.replace('<pad>', '').replace('</s>', '').strip()
     return sql_query
+# Use examples from the WikiSQL dataset
+example_questions = [(question['question'],) for question in wikisql_dataset]
+# Create a Gradio interface
 interface = gr.Interface(
     fn=generate_sql,
     inputs=gr.Textbox(lines=2, placeholder="Enter your natural language query here..."),
     outputs="text",
     examples=example_questions,
+    title="NL to SQL with Picard",
     description="This model converts natural language queries into SQL using the WikiSQL dataset. Try one of the example questions or enter your own!"
 )
+# Launch the app
+if __name__ == "__main__":
+    interface.launch()