import zipfile import os import pandas as pd import numpy as np import ast import gradio as gr import faiss from sentence_transformers import SentenceTransformer from transformers import pipeline """ Legal Assistant Chatbot using LexGLUE dataset and GPT-2 Includes session memory for last 5 Q&A and sample questions for user guidance. """ zip_path = "lexglue-legal-nlp-benchmark-dataset.zip" extract_dir = "lexglue_data" if not os.path.exists(extract_dir): with zipfile.ZipFile(zip_path, 'r') as zip_ref: zip_ref.extractall(extract_dir) df = pd.read_csv(os.path.join(extract_dir, "case_hold_test.csv")) df = df[['context', 'endings', 'label']] df['endings'] = df['endings'].apply(ast.literal_eval) corpus = [] for idx, row in df.iterrows(): context = row['context'] for ending in row['endings']: corpus.append(f"{context.strip()} {ending.strip()}") embedder = SentenceTransformer('all-MiniLM-L6-v2') corpus_embeddings = embedder.encode(corpus, show_progress_bar=True) dimension = corpus_embeddings.shape[1] index = faiss.IndexFlatL2(dimension) index.add(np.array(corpus_embeddings)) generator = pipeline("text-generation", model="gpt2") history = [] def legal_assistant_query(query): query_embedding = embedder.encode([query]) D, I = index.search(np.array(query_embedding), k=5) retrieved_docs = [corpus[i] for i in I[0]] context_combined = "\n\n".join(retrieved_docs[:3]) context_combined = context_combined[:1024] prompt = f"Given the following legal references, answer the question:\n\n{context_combined}\n\nQuestion: {query}\nAnswer:" result = generator(prompt, max_new_tokens=200, do_sample=True)[0]['generated_text'] answer = result.split("Answer:")[-1].strip() history.append((query, answer)) if len(history) > 5: history.pop(0) formatted_history = "\n\n".join([f"Q: {q}\nA: {a}" for q, a in history]) return f"{answer}\n\n---\nRecent Q&A:\n{formatted_history}" sample_questions = [ "What rights does a person have under the Fourth Amendment?", "Explain due process in simple terms.", "What is double jeopardy?", "Can the police search your car without a warrant?", "What is considered a fair trial?" ] iface = gr.Interface( fn=legal_assistant_query, inputs=gr.Textbox(lines=2, placeholder="Ask a legal question...", label="Your Question"), outputs=gr.Textbox(label="Legal Response with History"), title="šŸ§‘ā€āš–ļø Legal Assistant Chatbot", description="Ask any legal question and get context-based case.\n\nšŸ’” Sample Questions:\n- " + "\n- ".join(sample_questions) ) iface.launch()