import gradio as gr import pdfplumber import docx import os import datetime from transformers import pipeline # Load open-source LLMs summary_llm = pipeline("summarization", model="google/pegasus-xsum", tokenizer="google/pegasus-xsum") text_llm = pipeline("text2text-generation", model="MBZUAI/LaMini-T5-738M", tokenizer="MBZUAI/LaMini-T5-738M") # Extract text from files def extract_text(file): if file.name.endswith(".pdf"): with pdfplumber.open(file.name) as pdf: return "\n".join([p.extract_text() for p in pdf.pages if p.extract_text()]) elif file.name.endswith(".docx"): doc = docx.Document(file) return "\n".join([para.text for para in doc.paragraphs]) elif file.name.endswith(".txt"): return file.read().decode("utf-8") else: return "Unsupported file format." # Format glossary visually def format_glossary_html(glossary_text): lines = glossary_text.split('\n') html = "" for line in lines: if ":" in line: term, desc = line.split(":", 1) html += f"{term.strip()}: {desc.strip()}
" else: html += f"{line}
" return html # Generate summary def generate_summary(text): return summary_llm(text[:1024], max_length=250, min_length=80, do_sample=False)[0]["summary_text"] # Generate text (glossary/verdict/custom) def generate_text_response(prompt, max_len=512): return text_llm(prompt, max_length=max_len, do_sample=True)[0]["generated_text"] # Main document analyzer def analyze_document(file): filename = os.path.basename(file.name) text = extract_text(file) if not text.strip(): return "No content found in file.", "", "", "", "", None, "" short_text = text[:3000] # Enhanced prompts summary_prompt = f""" You are a legal assistant. Read the following legal document and generate a comprehensive summary. Include: parties involved, key facts, legal issues, arguments, court observations, and likely outcome. Document: {short_text} """ glossary_prompt = f""" Extract and explain all legal terms, laws, or references. Format: Term: ... Explanation: ... Document: {short_text} """ verdict_prompt = f""" Based on the document, predict the likely verdict in 2โ€“3 sentences using standard legal reasoning. Document: {short_text} """ # Run LLMs summary = generate_summary(short_text) glossary = generate_text_response(glossary_prompt) verdict = generate_text_response(verdict_prompt) glossary_html = format_glossary_html(glossary) # Save report timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") output_filename = f"LegalSummary_{timestamp}.txt" with open(output_filename, "w", encoding="utf-8") as f: f.write(f"๐Ÿ“„ File: {filename}\n๐Ÿ•’ Time: {timestamp}\n\n") f.write("=== ๐Ÿ“‘ Summary ===\n" + summary + "\n\n") f.write("=== ๐Ÿ“˜ Glossary ===\n" + glossary + "\n\n") f.write("=== โš–๏ธ Verdict ===\n" + verdict + "\n") return text, summary, glossary, glossary_html, verdict, output_filename, short_text # Custom prompt answer def custom_prompt_response(doc_text, user_prompt): if not doc_text.strip() or not user_prompt.strip(): return "โš ๏ธ Please provide both a document and a prompt." prompt = f""" You are a legal expert. Answer the question below using only the document provided. Question: {user_prompt.strip()} Document: {doc_text.strip()} """ return generate_text_response(prompt) # Gradio UI with gr.Blocks(css="body { background-color: #f9f9f9; font-family: 'Segoe UI'; }") as demo: with gr.Row(): with gr.Column(scale=3): gr.Markdown("""
๐Ÿงพ Legal Document Summarizer Using LLMs
Upload legal documents in PDF, DOCX, or TXT format to receive structured summaries, legal term glossaries, and AI-inferred verdicts using open-source language models.
""") file_input = gr.File(label="๐Ÿ“ Upload Legal Document") submit_btn = gr.Button("๐Ÿ” Analyze Document") download_btn = gr.File(label="โฌ‡๏ธ Download Report") with gr.Column(scale=1): gr.Markdown("### ๐Ÿ’ก Features") gr.Markdown(""" - ๐Ÿ“ AI-generated legal summaries - ๐Ÿ“˜ Glossary of legal terms - โš–๏ธ Inferred legal verdict - โ“ Custom Q&A based on the document """) extracted = gr.Textbox(label="๐Ÿ“„ Extracted Text", lines=10, interactive=False) summary = gr.Textbox(label="๐Ÿ“ Summary", lines=6, interactive=False) glossary_raw = gr.Textbox(visible=False) glossary_html = gr.HTML(label="๐Ÿ“˜ Glossary of Legal Terms") final_verdict = gr.Textbox(label="โš–๏ธ Verdict (AI Inferred)", lines=3, interactive=False) with gr.Row(): gr.Markdown("### โ“ Ask a Question About the Document") user_prompt = gr.Textbox(label="Your Question", placeholder="e.g., What is the legal issue?") custom_response = gr.Textbox(label="๐Ÿค– AI Answer", lines=4) custom_btn = gr.Button("๐Ÿง  Get Answer") hidden_doc_text = gr.Textbox(visible=False) submit_btn.click(fn=analyze_document, inputs=[file_input], outputs=[ extracted, summary, glossary_raw, glossary_html, final_verdict, download_btn, hidden_doc_text ]) custom_btn.click(fn=custom_prompt_response, inputs=[hidden_doc_text, user_prompt], outputs=custom_response) demo.launch()