Aranwer commited on
Commit
29bb18b
·
verified ·
1 Parent(s): 4bb226d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -0
app.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from datasets import load_dataset
3
+ from sentence_transformers import SentenceTransformer
4
+ import faiss
5
+ import numpy as np
6
+ from transformers import pipeline
7
+
8
+ # Load dataset
9
+ dataset = load_dataset("lex_glue", "scotus")
10
+ corpus = [doc['text'] for doc in dataset['train'].select(range(200))] # just 200 to keep it light
11
+
12
+ # Embedding model
13
+ embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
14
+ corpus_embeddings = embedder.encode(corpus, convert_to_numpy=True)
15
+
16
+ # Build FAISS index
17
+ dimension = corpus_embeddings.shape[1]
18
+ index = faiss.IndexFlatL2(dimension)
19
+ index.add(corpus_embeddings)
20
+
21
+ # Text generation model
22
+ gen_pipeline = pipeline("text2text-generation", model="facebook/bart-large-cnn")
23
+
24
+ # RAG-like query function
25
+ def rag_query(user_question):
26
+ question_embedding = embedder.encode([user_question])
27
+ _, indices = index.search(np.array(question_embedding), k=3)
28
+ context = " ".join([corpus[i] for i in indices[0]])
29
+
30
+ prompt = f"Question: {user_question}\nContext: {context}\nAnswer:"
31
+ result = gen_pipeline(prompt, max_length=250, do_sample=False)[0]['generated_text']
32
+ return result
33
+
34
+ # Gradio UI
35
+ def chatbot_interface(query):
36
+ return rag_query(query)
37
+
38
+ iface = gr.Interface(fn=chatbot_interface,
39
+ inputs="text",
40
+ outputs="text",
41
+ title="🧑‍⚖️ Legal Assistant Chatbot",
42
+ description="Ask legal questions based on case data (LexGLUE - SCOTUS subset)")
43
+
44
+ iface.launch()