Pradeepthi30 commited on
Commit
9b4a2f7
Β·
verified Β·
1 Parent(s): dd9ec35

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +81 -0
  2. requirements.txt +10 -0
app.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from langchain.document_loaders import PyPDFLoader
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain.embeddings import HuggingFaceEmbeddings
6
+ from langchain.vectorstores import FAISS
7
+ from langchain.chains import RetrievalQA
8
+ from langchain_community.llms import HuggingFaceEndpoint
9
+
10
+ # Load Hugging Face API token
11
+ hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
12
+
13
+ # Load LLM with token
14
+ llm = HuggingFaceEndpoint(
15
+ repo_id="google/flan-t5-base",
16
+ huggingfacehub_api_token=hf_token,
17
+ model_kwargs={"temperature": 0.7, "max_length": 512}
18
+ )
19
+
20
+ summary_cache = ""
21
+ glossary_cache = ""
22
+ retriever_chain = None
23
+
24
+ def extract_text_and_summary(file):
25
+ global retriever_chain, summary_cache, glossary_cache
26
+
27
+ loader = PyPDFLoader(file.name)
28
+ docs = loader.load()
29
+
30
+ splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
31
+ splits = splitter.split_documents(docs)
32
+ full_text = "\n".join([doc.page_content for doc in splits])
33
+
34
+ embeddings = HuggingFaceEmbeddings()
35
+ db = FAISS.from_documents(splits, embeddings)
36
+ retriever = db.as_retriever()
37
+ retriever_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
38
+
39
+ summary_prompt = f"Summarize this legal document:\n{full_text[:1500]}"
40
+ glossary_prompt = f"Extract and define legal terms from the document:\n{full_text[:1500]}"
41
+
42
+ summary_cache = llm(summary_prompt)
43
+ glossary_cache = llm(glossary_prompt)
44
+
45
+ filename = "summary_output.txt"
46
+ with open(filename, "w", encoding="utf-8") as f:
47
+ f.write("=== Summary ===\n")
48
+ f.write(summary_cache + "\n\n")
49
+ f.write("=== Glossary ===\n")
50
+ f.write(glossary_cache + "\n")
51
+
52
+ return full_text, summary_cache, glossary_cache, filename
53
+
54
+ def answer_custom_question(question):
55
+ if retriever_chain:
56
+ return retriever_chain.run(question)
57
+ return "Please upload and process a document first."
58
+
59
+ with gr.Blocks() as demo:
60
+ gr.Markdown("## 🧾 Legal Document Summarizer Using LangChain")
61
+
62
+ with gr.Row():
63
+ file = gr.File(label="πŸ“ Upload Legal PDF", file_types=[".pdf"])
64
+ process_btn = gr.Button("πŸ” Extract & Summarize")
65
+
66
+ extracted_text = gr.Textbox(label="πŸ“„ Extracted Text", lines=10)
67
+ summary_output = gr.Textbox(label="πŸ“ Summary", lines=5)
68
+ glossary_output = gr.Textbox(label="πŸ“˜ Glossary", lines=5)
69
+ download_link = gr.File(label="⬇️ Download Summary")
70
+
71
+ with gr.Row():
72
+ user_question = gr.Textbox(label="❓ Ask a Custom Question")
73
+ custom_answer = gr.Textbox(label="πŸ€– AI Answer")
74
+ ask_btn = gr.Button("🧠 Get Answer")
75
+
76
+ process_btn.click(fn=extract_text_and_summary, inputs=file, outputs=[
77
+ extracted_text, summary_output, glossary_output, download_link
78
+ ])
79
+ ask_btn.click(fn=answer_custom_question, inputs=user_question, outputs=custom_answer)
80
+
81
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio==4.14.0
2
+ langchain==0.1.14
3
+ langchain-community
4
+ huggingface_hub
5
+ pdfplumber
6
+ python-docx
7
+ tiktoken
8
+ faiss-cpu
9
+ transformers==4.40.1
10
+ torch