Pradeepthi30 commited on
Commit
64d115b
Β·
verified Β·
1 Parent(s): 37449ac

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +73 -0
  2. requirements.txt +9 -0
app.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from langchain.document_loaders import PyPDFLoader
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain.embeddings import HuggingFaceEmbeddings
6
+ from langchain.vectorstores import FAISS
7
+ from langchain.chains import RetrievalQA
8
+ from langchain.llms import HuggingFaceHub
9
+
10
+ # LLM for summarization and glossary extraction
11
+ llm = HuggingFaceHub(repo_id="google/flan-t5-base", model_kwargs={"temperature": 0.7, "max_length": 512})
12
+
13
+ summary_cache = ""
14
+ glossary_cache = ""
15
+ retriever_chain = None
16
+
17
+ def extract_text_and_summary(file):
18
+ global retriever_chain, summary_cache, glossary_cache
19
+
20
+ loader = PyPDFLoader(file.name)
21
+ docs = loader.load()
22
+
23
+ splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
24
+ splits = splitter.split_documents(docs)
25
+ full_text = "\n".join([doc.page_content for doc in splits])
26
+
27
+ embeddings = HuggingFaceEmbeddings()
28
+ db = FAISS.from_documents(splits, embeddings)
29
+ retriever = db.as_retriever()
30
+ retriever_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
31
+
32
+ summary_prompt = f"Summarize this legal document:\n{full_text[:1500]}"
33
+ glossary_prompt = f"Extract and define legal terms from the document:\n{full_text[:1500]}"
34
+
35
+ summary_cache = llm(summary_prompt)[0]['generated_text']
36
+ glossary_cache = llm(glossary_prompt)[0]['generated_text']
37
+
38
+ # Save downloadable summary file
39
+ filename = "summary_output.txt"
40
+ with open(filename, "w", encoding="utf-8") as f:
41
+ f.write("=== Summary ===\n")
42
+ f.write(summary_cache + "\n\n")
43
+ f.write("=== Glossary ===\n")
44
+ f.write(glossary_cache + "\n")
45
+
46
+ return full_text, summary_cache, glossary_cache, filename
47
+
48
+ def answer_custom_question(question):
49
+ if retriever_chain:
50
+ return retriever_chain.run(question)
51
+ return "Please upload and process a document first."
52
+
53
+ with gr.Blocks() as demo:
54
+ gr.Markdown("## 🧾 Legal Document Summarizer Using LangChain")
55
+
56
+ with gr.Row():
57
+ file = gr.File(label="πŸ“ Upload Legal PDF", file_types=[".pdf"])
58
+ process_btn = gr.Button("πŸ” Extract & Summarize")
59
+
60
+ extracted_text = gr.Textbox(label="πŸ“„ Extracted Text", lines=10)
61
+ summary_output = gr.Textbox(label="πŸ“ Summary", lines=5)
62
+ glossary_output = gr.Textbox(label="πŸ“˜ Glossary", lines=5)
63
+ download_link = gr.File(label="⬇️ Download Summary")
64
+
65
+ with gr.Row():
66
+ user_question = gr.Textbox(label="❓ Ask a Custom Question")
67
+ custom_answer = gr.Textbox(label="πŸ€– AI Answer")
68
+ ask_btn = gr.Button("🧠 Get Answer")
69
+
70
+ process_btn.click(fn=extract_text_and_summary, inputs=file, outputs=[extracted_text, summary_output, glossary_output, download_link])
71
+ ask_btn.click(fn=answer_custom_question, inputs=user_question, outputs=custom_answer)
72
+
73
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ gradio==4.14.0
2
+ langchain==0.1.14
3
+ huggingface_hub
4
+ pdfplumber
5
+ python-docx
6
+ tiktoken
7
+ faiss-cpu
8
+ transformers==4.40.1
9
+ torch