Aranwer's picture
Update app.py
0e1688f verified
import zipfile
import os
import pandas as pd
import numpy as np
import ast
import gradio as gr
import faiss
from sentence_transformers import SentenceTransformer
from transformers import pipeline
"""
Legal Assistant Chatbot using LexGLUE dataset and GPT-2
Includes session memory for last 5 Q&A and sample questions for user guidance.
"""
zip_path = "lexglue-legal-nlp-benchmark-dataset.zip"
extract_dir = "lexglue_data"
if not os.path.exists(extract_dir):
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(extract_dir)
df = pd.read_csv(os.path.join(extract_dir, "case_hold_test.csv"))
df = df[['context', 'endings', 'label']]
df['endings'] = df['endings'].apply(ast.literal_eval)
corpus = []
for idx, row in df.iterrows():
context = row['context']
for ending in row['endings']:
corpus.append(f"{context.strip()} {ending.strip()}")
embedder = SentenceTransformer('all-MiniLM-L6-v2')
corpus_embeddings = embedder.encode(corpus, show_progress_bar=True)
dimension = corpus_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(corpus_embeddings))
generator = pipeline("text-generation", model="gpt2")
history = []
def legal_assistant_query(query):
query_embedding = embedder.encode([query])
D, I = index.search(np.array(query_embedding), k=5)
retrieved_docs = [corpus[i] for i in I[0]]
context_combined = "\n\n".join(retrieved_docs[:3])
context_combined = context_combined[:1024]
prompt = f"Given the following legal references, answer the question:\n\n{context_combined}\n\nQuestion: {query}\nAnswer:"
result = generator(prompt, max_new_tokens=200, do_sample=True)[0]['generated_text']
answer = result.split("Answer:")[-1].strip()
history.append((query, answer))
if len(history) > 5:
history.pop(0)
formatted_history = "\n\n".join([f"Q: {q}\nA: {a}" for q, a in history])
return f"{answer}\n\n---\nRecent Q&A:\n{formatted_history}"
sample_questions = [
"What rights does a person have under the Fourth Amendment?",
"Explain due process in simple terms.",
"What is double jeopardy?",
"Can the police search your car without a warrant?",
"What is considered a fair trial?"
]
iface = gr.Interface(
fn=legal_assistant_query,
inputs=gr.Textbox(lines=2, placeholder="Ask a legal question...", label="Your Question"),
outputs=gr.Textbox(label="Legal Response with History"),
title="πŸ§‘β€βš–οΈ Legal Assistant Chatbot",
description="Ask any legal question and get context-based case.\n\nπŸ’‘ Sample Questions:\n- " + "\n- ".join(sample_questions)
)
iface.launch()