Spaces:

Aranwer
/

LegalAssistantChatbot

Running

App Files Files Community

Aranwer commited on 27 days ago

Commit

dbe8ae7

verified ·

1 Parent(s): 088c109

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -16

app.py CHANGED Viewed

@@ -5,44 +5,54 @@ import faiss
 import numpy as np
 from transformers import pipeline
 dataset = load_dataset("lex_glue", "scotus")
-corpus = [doc['text'] for doc in dataset['train'].select(range(200))]
 embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
 corpus_embeddings = embedder.encode(corpus, convert_to_numpy=True)
 dimension = corpus_embeddings.shape[1]
 index = faiss.IndexFlatL2(dimension)
 index.add(corpus_embeddings)
 gen_pipeline = pipeline("text2text-generation", model="facebook/bart-large-cnn")
 def rag_query(user_question):
     question_embedding = embedder.encode([user_question])
-    k = 3
     if index.ntotal < k:
-        k = index.ntotal
     _, indices = index.search(np.array(question_embedding), k=k)
-    if len(indices[0]) == 0:
         return "Sorry, no relevant documents were found."
-    context = " ".join([corpus[i] for i in indices[0] if i < len(corpus)])
     prompt = f"Question: {user_question}\nContext: {context}\nAnswer:"
     result = gen_pipeline(prompt, max_length=250, do_sample=False)[0]['generated_text']
     return result
 def chatbot_interface(query):
     return rag_query(query)
 css = """
     .gradio-container {
         background-color: #f0f4f8;
@@ -88,7 +98,7 @@ css = """
     }
 """
 iface = gr.Interface(
     fn=chatbot_interface,
     inputs="text",
@@ -99,5 +109,5 @@ iface = gr.Interface(
     css=css
 )
-iface.launch()

 import numpy as np
 from transformers import pipeline
+# Load dataset
 dataset = load_dataset("lex_glue", "scotus")
+corpus = [doc['text'] for doc in dataset['train'].select(range(200))]  # just 200 to keep it light
+# Embedding model
 embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
 corpus_embeddings = embedder.encode(corpus, convert_to_numpy=True)
+# Build FAISS index
 dimension = corpus_embeddings.shape[1]
 index = faiss.IndexFlatL2(dimension)
 index.add(corpus_embeddings)
+# Text generation model
 gen_pipeline = pipeline("text2text-generation", model="facebook/bart-large-cnn")
+# RAG-like query function
 def rag_query(user_question):
+    # Encode the user question
     question_embedding = embedder.encode([user_question])
+    k = 3  # top 3 documents
     if index.ntotal < k:
+        k = index.ntotal  # Adjust if there are fewer documents than requested
+    # Perform the search in the FAISS index
     _, indices = index.search(np.array(question_embedding), k=k)
+    # Ensure indices are valid (within range of the corpus)
+    valid_indices = [i for i in indices[0] if i < len(corpus)]
+    if len(valid_indices) == 0:
         return "Sorry, no relevant documents were found."
+    # Extract relevant context from the corpus based on valid indices
+    context = " ".join([corpus[i] for i in valid_indices])
+    # Prepare the prompt and generate the response
     prompt = f"Question: {user_question}\nContext: {context}\nAnswer:"
     result = gen_pipeline(prompt, max_length=250, do_sample=False)[0]['generated_text']
     return result
+# Gradio UI
 def chatbot_interface(query):
     return rag_query(query)
+# Styling for the interface
 css = """
     .gradio-container {
         background-color: #f0f4f8;
     }
 """
+# Create the Gradio interface
 iface = gr.Interface(
     fn=chatbot_interface,
     inputs="text",
     css=css
 )
+# Launch the Gradio interface
+iface.launch()