fazil99 commited on
Commit
1d9d928
Β·
verified Β·
1 Parent(s): 8d6bb81

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +154 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pdfplumber
3
+ import docx
4
+ import os
5
+ import datetime
6
+ from transformers import pipeline
7
+
8
+ # Load open-source LLMs
9
+ summary_llm = pipeline("summarization", model="google/pegasus-xsum", tokenizer="google/pegasus-xsum")
10
+ text_llm = pipeline("text2text-generation", model="MBZUAI/LaMini-T5-738M", tokenizer="MBZUAI/LaMini-T5-738M")
11
+
12
+ # Extract text from files
13
+ def extract_text(file):
14
+ if file.name.endswith(".pdf"):
15
+ with pdfplumber.open(file.name) as pdf:
16
+ return "\n".join([p.extract_text() for p in pdf.pages if p.extract_text()])
17
+ elif file.name.endswith(".docx"):
18
+ doc = docx.Document(file)
19
+ return "\n".join([para.text for para in doc.paragraphs])
20
+ elif file.name.endswith(".txt"):
21
+ return file.read().decode("utf-8")
22
+ else:
23
+ return "Unsupported file format."
24
+
25
+ # Format glossary visually
26
+ def format_glossary_html(glossary_text):
27
+ lines = glossary_text.split('\n')
28
+ html = ""
29
+ for line in lines:
30
+ if ":" in line:
31
+ term, desc = line.split(":", 1)
32
+ html += f"<b style='color:#1e3a8a'>{term.strip()}</b>: {desc.strip()}<br>"
33
+ else:
34
+ html += f"{line}<br>"
35
+ return html
36
+
37
+ # Generate summary
38
+ def generate_summary(text):
39
+ return summary_llm(text[:1024], max_length=250, min_length=80, do_sample=False)[0]["summary_text"]
40
+
41
+ # Generate text (glossary/verdict/custom)
42
+ def generate_text_response(prompt, max_len=512):
43
+ return text_llm(prompt, max_length=max_len, do_sample=True)[0]["generated_text"]
44
+
45
+ # Main document analyzer
46
+ def analyze_document(file):
47
+ filename = os.path.basename(file.name)
48
+ text = extract_text(file)
49
+ if not text.strip():
50
+ return "No content found in file.", "", "", "", "", None, ""
51
+
52
+ short_text = text[:3000]
53
+
54
+ # Enhanced prompts
55
+ summary_prompt = f"""
56
+ You are a legal assistant. Read the following legal document and generate a comprehensive summary.
57
+
58
+ Include: parties involved, key facts, legal issues, arguments, court observations, and likely outcome.
59
+
60
+ Document:
61
+ {short_text}
62
+ """
63
+ glossary_prompt = f"""
64
+ Extract and explain all legal terms, laws, or references. Format:
65
+
66
+ Term: ...
67
+ Explanation: ...
68
+
69
+ Document:
70
+ {short_text}
71
+ """
72
+ verdict_prompt = f"""
73
+ Based on the document, predict the likely verdict in 2–3 sentences using standard legal reasoning.
74
+
75
+ Document:
76
+ {short_text}
77
+ """
78
+
79
+ # Run LLMs
80
+ summary = generate_summary(short_text)
81
+ glossary = generate_text_response(glossary_prompt)
82
+ verdict = generate_text_response(verdict_prompt)
83
+ glossary_html = format_glossary_html(glossary)
84
+
85
+ # Save report
86
+ timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
87
+ output_filename = f"LegalSummary_{timestamp}.txt"
88
+ with open(output_filename, "w", encoding="utf-8") as f:
89
+ f.write(f"πŸ“„ File: {filename}\nπŸ•’ Time: {timestamp}\n\n")
90
+ f.write("=== πŸ“‘ Summary ===\n" + summary + "\n\n")
91
+ f.write("=== πŸ“˜ Glossary ===\n" + glossary + "\n\n")
92
+ f.write("=== βš–οΈ Verdict ===\n" + verdict + "\n")
93
+
94
+ return text, summary, glossary, glossary_html, verdict, output_filename, short_text
95
+
96
+ # Custom prompt answer
97
+ def custom_prompt_response(doc_text, user_prompt):
98
+ if not doc_text.strip() or not user_prompt.strip():
99
+ return "⚠️ Please provide both a document and a prompt."
100
+ prompt = f"""
101
+ You are a legal expert. Answer the question below using only the document provided.
102
+
103
+ Question:
104
+ {user_prompt.strip()}
105
+
106
+ Document:
107
+ {doc_text.strip()}
108
+ """
109
+ return generate_text_response(prompt)
110
+
111
+ # Gradio UI
112
+ with gr.Blocks(css="body { background-color: #f9f9f9; font-family: 'Segoe UI'; }") as demo:
113
+ with gr.Row():
114
+ with gr.Column(scale=3):
115
+ gr.Markdown("""
116
+ <div style='text-align: center; font-size: 28px; font-weight: bold; color: #1e3a8a; margin-bottom: 10px;'>
117
+ 🧾 Legal Document Summarizer Using LLMs
118
+ </div>
119
+ <div style='text-align: center; font-size: 16px; color: #444444; margin-bottom: 25px;'>
120
+ Upload legal documents in PDF, DOCX, or TXT format to receive structured summaries, legal term glossaries, and AI-inferred verdicts using open-source language models.
121
+ </div>
122
+ """)
123
+ file_input = gr.File(label="πŸ“ Upload Legal Document")
124
+ submit_btn = gr.Button("πŸ” Analyze Document")
125
+ download_btn = gr.File(label="⬇️ Download Report")
126
+
127
+ with gr.Column(scale=1):
128
+ gr.Markdown("### πŸ’‘ Features")
129
+ gr.Markdown("""
130
+ - πŸ“ AI-generated legal summaries
131
+ - πŸ“˜ Glossary of legal terms
132
+ - βš–οΈ Inferred legal verdict
133
+ - ❓ Custom Q&A based on the document
134
+ """)
135
+
136
+ extracted = gr.Textbox(label="πŸ“„ Extracted Text", lines=10, interactive=False)
137
+ summary = gr.Textbox(label="πŸ“ Summary", lines=6, interactive=False)
138
+ glossary_raw = gr.Textbox(visible=False)
139
+ glossary_html = gr.HTML(label="πŸ“˜ Glossary of Legal Terms")
140
+ final_verdict = gr.Textbox(label="βš–οΈ Verdict (AI Inferred)", lines=3, interactive=False)
141
+
142
+ with gr.Row():
143
+ gr.Markdown("### ❓ Ask a Question About the Document")
144
+ user_prompt = gr.Textbox(label="Your Question", placeholder="e.g., What is the legal issue?")
145
+ custom_response = gr.Textbox(label="πŸ€– AI Answer", lines=4)
146
+ custom_btn = gr.Button("🧠 Get Answer")
147
+ hidden_doc_text = gr.Textbox(visible=False)
148
+
149
+ submit_btn.click(fn=analyze_document, inputs=[file_input], outputs=[
150
+ extracted, summary, glossary_raw, glossary_html, final_verdict, download_btn, hidden_doc_text
151
+ ])
152
+ custom_btn.click(fn=custom_prompt_response, inputs=[hidden_doc_text, user_prompt], outputs=custom_response)
153
+
154
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ transformers==4.40.1
2
+ torch
3
+ gradio==4.14.0
4
+ pdfplumber==0.10.3
5
+ python-docx==1.1.0