File size: 2,637 Bytes
904c6a6
 
 
 
 
29bb18b
 
0e1688f
904c6a6
29bb18b
 
0e1688f
 
 
 
 
904c6a6
 
 
 
 
 
 
 
 
 
29bb18b
904c6a6
 
 
 
 
29bb18b
904c6a6
 
 
29bb18b
 
904c6a6
29bb18b
904c6a6
29bb18b
38b37ec
 
0e1688f
904c6a6
 
0e1688f
904c6a6
38b37ec
0e1688f
088c109
904c6a6
 
38b37ec
 
0e1688f
38b37ec
 
 
0e1688f
 
 
 
 
 
 
 
 
 
088c109
2a8a0e5
904c6a6
0e1688f
 
2a8a0e5
0e1688f
2a8a0e5
29bb18b
904c6a6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import zipfile
import os
import pandas as pd
import numpy as np
import ast
import gradio as gr
import faiss

from sentence_transformers import SentenceTransformer
from transformers import pipeline

"""
Legal Assistant Chatbot using LexGLUE dataset and GPT-2
Includes session memory for last 5 Q&A and sample questions for user guidance.
"""

zip_path = "lexglue-legal-nlp-benchmark-dataset.zip"
extract_dir = "lexglue_data"

if not os.path.exists(extract_dir):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_dir)

df = pd.read_csv(os.path.join(extract_dir, "case_hold_test.csv"))
df = df[['context', 'endings', 'label']]
df['endings'] = df['endings'].apply(ast.literal_eval)

corpus = []
for idx, row in df.iterrows():
    context = row['context']
    for ending in row['endings']:
        corpus.append(f"{context.strip()} {ending.strip()}")

embedder = SentenceTransformer('all-MiniLM-L6-v2')
corpus_embeddings = embedder.encode(corpus, show_progress_bar=True)

dimension = corpus_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(corpus_embeddings))

generator = pipeline("text-generation", model="gpt2")

history = []

def legal_assistant_query(query):
    query_embedding = embedder.encode([query])
    D, I = index.search(np.array(query_embedding), k=5)

    retrieved_docs = [corpus[i] for i in I[0]]
    context_combined = "\n\n".join(retrieved_docs[:3])
    context_combined = context_combined[:1024]

    prompt = f"Given the following legal references, answer the question:\n\n{context_combined}\n\nQuestion: {query}\nAnswer:"
    result = generator(prompt, max_new_tokens=200, do_sample=True)[0]['generated_text']
    answer = result.split("Answer:")[-1].strip()

    history.append((query, answer))
    if len(history) > 5:
        history.pop(0)

    formatted_history = "\n\n".join([f"Q: {q}\nA: {a}" for q, a in history])
    return f"{answer}\n\n---\nRecent Q&A:\n{formatted_history}"

sample_questions = [
    "What rights does a person have under the Fourth Amendment?",
    "Explain due process in simple terms.",
    "What is double jeopardy?",
    "Can the police search your car without a warrant?",
    "What is considered a fair trial?"
]

iface = gr.Interface(
    fn=legal_assistant_query,
    inputs=gr.Textbox(lines=2, placeholder="Ask a legal question...", label="Your Question"),
    outputs=gr.Textbox(label="Legal Response with History"),
    title="🧑‍⚖️ Legal Assistant Chatbot",
    description="Ask any legal question and get context-based case.\n\n💡 Sample Questions:\n- " + "\n- ".join(sample_questions)
)

iface.launch()