Spaces:
Running
Running
File size: 2,637 Bytes
904c6a6 29bb18b 0e1688f 904c6a6 29bb18b 0e1688f 904c6a6 29bb18b 904c6a6 29bb18b 904c6a6 29bb18b 904c6a6 29bb18b 904c6a6 29bb18b 38b37ec 0e1688f 904c6a6 0e1688f 904c6a6 38b37ec 0e1688f 088c109 904c6a6 38b37ec 0e1688f 38b37ec 0e1688f 088c109 2a8a0e5 904c6a6 0e1688f 2a8a0e5 0e1688f 2a8a0e5 29bb18b 904c6a6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import zipfile
import os
import pandas as pd
import numpy as np
import ast
import gradio as gr
import faiss
from sentence_transformers import SentenceTransformer
from transformers import pipeline
"""
Legal Assistant Chatbot using LexGLUE dataset and GPT-2
Includes session memory for last 5 Q&A and sample questions for user guidance.
"""
zip_path = "lexglue-legal-nlp-benchmark-dataset.zip"
extract_dir = "lexglue_data"
if not os.path.exists(extract_dir):
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(extract_dir)
df = pd.read_csv(os.path.join(extract_dir, "case_hold_test.csv"))
df = df[['context', 'endings', 'label']]
df['endings'] = df['endings'].apply(ast.literal_eval)
corpus = []
for idx, row in df.iterrows():
context = row['context']
for ending in row['endings']:
corpus.append(f"{context.strip()} {ending.strip()}")
embedder = SentenceTransformer('all-MiniLM-L6-v2')
corpus_embeddings = embedder.encode(corpus, show_progress_bar=True)
dimension = corpus_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(corpus_embeddings))
generator = pipeline("text-generation", model="gpt2")
history = []
def legal_assistant_query(query):
query_embedding = embedder.encode([query])
D, I = index.search(np.array(query_embedding), k=5)
retrieved_docs = [corpus[i] for i in I[0]]
context_combined = "\n\n".join(retrieved_docs[:3])
context_combined = context_combined[:1024]
prompt = f"Given the following legal references, answer the question:\n\n{context_combined}\n\nQuestion: {query}\nAnswer:"
result = generator(prompt, max_new_tokens=200, do_sample=True)[0]['generated_text']
answer = result.split("Answer:")[-1].strip()
history.append((query, answer))
if len(history) > 5:
history.pop(0)
formatted_history = "\n\n".join([f"Q: {q}\nA: {a}" for q, a in history])
return f"{answer}\n\n---\nRecent Q&A:\n{formatted_history}"
sample_questions = [
"What rights does a person have under the Fourth Amendment?",
"Explain due process in simple terms.",
"What is double jeopardy?",
"Can the police search your car without a warrant?",
"What is considered a fair trial?"
]
iface = gr.Interface(
fn=legal_assistant_query,
inputs=gr.Textbox(lines=2, placeholder="Ask a legal question...", label="Your Question"),
outputs=gr.Textbox(label="Legal Response with History"),
title="🧑⚖️ Legal Assistant Chatbot",
description="Ask any legal question and get context-based case.\n\n💡 Sample Questions:\n- " + "\n- ".join(sample_questions)
)
iface.launch() |