Spaces:
Running
Running
import zipfile | |
import os | |
import pandas as pd | |
import numpy as np | |
import ast | |
import gradio as gr | |
import faiss | |
from sentence_transformers import SentenceTransformer | |
from transformers import pipeline | |
""" | |
Legal Assistant Chatbot using LexGLUE dataset and GPT-2 | |
Includes session memory for last 5 Q&A and sample questions for user guidance. | |
""" | |
zip_path = "lexglue-legal-nlp-benchmark-dataset.zip" | |
extract_dir = "lexglue_data" | |
if not os.path.exists(extract_dir): | |
with zipfile.ZipFile(zip_path, 'r') as zip_ref: | |
zip_ref.extractall(extract_dir) | |
df = pd.read_csv(os.path.join(extract_dir, "case_hold_test.csv")) | |
df = df[['context', 'endings', 'label']] | |
df['endings'] = df['endings'].apply(ast.literal_eval) | |
corpus = [] | |
for idx, row in df.iterrows(): | |
context = row['context'] | |
for ending in row['endings']: | |
corpus.append(f"{context.strip()} {ending.strip()}") | |
embedder = SentenceTransformer('all-MiniLM-L6-v2') | |
corpus_embeddings = embedder.encode(corpus, show_progress_bar=True) | |
dimension = corpus_embeddings.shape[1] | |
index = faiss.IndexFlatL2(dimension) | |
index.add(np.array(corpus_embeddings)) | |
generator = pipeline("text-generation", model="gpt2") | |
history = [] | |
def legal_assistant_query(query): | |
query_embedding = embedder.encode([query]) | |
D, I = index.search(np.array(query_embedding), k=5) | |
retrieved_docs = [corpus[i] for i in I[0]] | |
context_combined = "\n\n".join(retrieved_docs[:3]) | |
context_combined = context_combined[:1024] | |
prompt = f"Given the following legal references, answer the question:\n\n{context_combined}\n\nQuestion: {query}\nAnswer:" | |
result = generator(prompt, max_new_tokens=200, do_sample=True)[0]['generated_text'] | |
answer = result.split("Answer:")[-1].strip() | |
history.append((query, answer)) | |
if len(history) > 5: | |
history.pop(0) | |
formatted_history = "\n\n".join([f"Q: {q}\nA: {a}" for q, a in history]) | |
return f"{answer}\n\n---\nRecent Q&A:\n{formatted_history}" | |
sample_questions = [ | |
"What rights does a person have under the Fourth Amendment?", | |
"Explain due process in simple terms.", | |
"What is double jeopardy?", | |
"Can the police search your car without a warrant?", | |
"What is considered a fair trial?" | |
] | |
iface = gr.Interface( | |
fn=legal_assistant_query, | |
inputs=gr.Textbox(lines=2, placeholder="Ask a legal question...", label="Your Question"), | |
outputs=gr.Textbox(label="Legal Response with History"), | |
title="π§ββοΈ Legal Assistant Chatbot", | |
description="Ask any legal question and get context-based case.\n\nπ‘ Sample Questions:\n- " + "\n- ".join(sample_questions) | |
) | |
iface.launch() |