Spaces:
Runtime error
Runtime error
File size: 2,993 Bytes
e7abd03 716f829 e7abd03 fcb1289 0e045b0 d0ab9b7 716f829 e7abd03 716f829 d0ab9b7 e7abd03 fcb1289 e7abd03 716f829 0e045b0 716f829 d0ab9b7 716f829 b71f887 fcb1289 d0ab9b7 0e045b0 716f829 fcb1289 d0ab9b7 e7abd03 716f829 e3be5c3 6f729e6 e7abd03 fcb1289 6f729e6 716f829 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
import pandas as pd
import torch
from sentence_transformers import SentenceTransformer, util
import gradio as gr
import json
from transformers import AutoTokenizer, AutoModelForQuestionAnswering, AutoModelForSequenceClassification
import spaces
# Load the CSV file with embeddings
df = pd.read_csv('RBDx10kstats.csv')
df['embedding'] = df['embedding'].apply(json.loads) # Convert JSON string back to list
# Convert embeddings to tensor for efficient retrieval
embeddings = torch.tensor(df['embedding'].tolist(), device=device)
# Load the Sentence Transformer model
model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
# Load the ai model for response generation
tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased-distilled-squad")
model_response = AutoModelForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased-distilled-squad").to(device)
# Load the NLU model for intent detection
nlu_model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased-finetuned-sst-2-english").to(device)
# Define the function to find the most relevant document
@spaces.GPU(duration=120)
def retrieve_relevant_doc(query):
query_embedding = model.encode(query, convert_to_tensor=True, device=device)
similarities = util.pytorch_cos_sim(query_embedding, embeddings)[0]
best_match_idx = torch.argmax(similarities).item()
return df.iloc[best_match_idx]['Abstract']
# Define the function to detect intent
@spaces.GPU(duration=120)
def detect_intent(query):
inputs = tokenizer(query, return_tensors="pt").to(device)
outputs = nlu_model(inputs["input_ids"], attention_mask=inputs["attention_mask"])
intent = torch.argmax(outputs.logits).item()
return intent
# Define the function to generate a response
@spaces.GPU(duration=120)
def generate_response(query):
relevant_doc = retrieve_relevant_doc(query)
intent = detect_intent(query)
if intent == 0: # Handle intent 0 (e.g., informational query)
input_text = f"Document: {relevant_doc}\n\nQuestion: {query}\n\nAnswer:"
inputs = tokenizer(input_text, return_tensors="pt").to(device)
outputs = model_response.generate(inputs["input_ids"], max_length=500)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
elif intent == 1: # Handle intent 1 (e.g., opinion-based query)
# Generate a response based on the detected intent
response = "I'm not sure I understand your question. Can you please rephrase?"
else:
response = "I'm not sure I understand your question. Can you please rephrase?"
return response
# Create a Gradio interface
iface = gr.Interface(
fn=generate_response,
inputs=gr.Textbox(lines=2, placeholder="Enter your query here..."),
outputs="text",
title="RAG Chatbot",
description="This chatbot retrieves relevant documents based on your query and generates responses using ai models."
)
# Launch the Gradio interface
iface.launch() |