NLSQL

Runtime error

HusnaManakkot commited on Feb 26, 2024

Commit

5665aa8

verified ·

1 Parent(s): d82d943

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,37 +1,21 @@
-import gradio as gr
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
-from datasets import load_dataset
-# Load tokenizer and model
-tokenizer = AutoTokenizer.from_pretrained("hrshtsharma2012/NL2SQL-Picard-final")
-model = AutoModelForSeq2SeqLM.from_pretrained("hrshtsharma2012/NL2SQL-Picard-final")
-# Initialize the pipeline
-nl2sql_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
-# Load a part of the WikiSQL dataset
-wikisql_dataset = load_dataset("wikisql", split='train[:5]')
-def generate_sql(query):
-    results = nl2sql_pipeline(query)
-    sql_query = results[0]['generated_text']
-    # Post-process the output to ensure it's a valid SQL query
-    sql_query = sql_query.replace('<pad>', '').replace('</s>', '').strip()
     return sql_query
-# Use examples from the WikiSQL dataset
-example_questions = [(question['question'],) for question in wikisql_dataset]
-# Create a Gradio interface
-interface = gr.Interface(
-    fn=generate_sql,
-    inputs=gr.Textbox(lines=2, placeholder="Enter your natural language query here..."),
-    outputs="text",
-    examples=example_questions,
-    title="NL to SQL with Picard",
-    description="This model converts natural language queries into SQL using the WikiSQL dataset. Try one of the example questions or enter your own!"
-)
-# Launch the app
-if __name__ == "__main__":
-    interface.launch()

+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+# Load the tokenizer and model
+tokenizer = AutoTokenizer.from_pretrained("Salesforce/codet5-base-multi-summarization-sql-en")
+model = AutoModelForSeq2SeqLM.from_pretrained("Salesforce/codet5-base-multi-summarization-sql-en")
+def nl_to_sql(natural_language_query):
+    # Tokenize the input query
+    input_ids = tokenizer(natural_language_query, return_tensors="pt").input_ids
+    # Generate the SQL query
+    output_ids = model.generate(input_ids, max_length=512)[0]
+    # Decode the generated SQL query
+    sql_query = tokenizer.decode(output_ids, skip_special_tokens=True)
     return sql_query
+# Example usage
+natural_language_query = "What is the average salary of employees?"
+sql_query = nl_to_sql(natural_language_query)
+print(f"SQL Query: {sql_query}")