Shivamsinghtomar78 commited on
Commit
64a73c3
Β·
verified Β·
1 Parent(s): 0287e6d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -2
app.py CHANGED
@@ -17,6 +17,7 @@ from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
17
  from langchain_community.vectorstores import FAISS
18
  from langchain_text_splitters import RecursiveCharacterTextSplitter
19
 
 
20
  GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
21
  HUGGINGFACE_ACCESS_TOKEN = os.environ.get("HUGGINGFACE_ACCESS_TOKEN")
22
 
@@ -28,6 +29,7 @@ if not HUGGINGFACE_ACCESS_TOKEN:
28
  st.error("❌ HUGGINGFACE_ACCESS_TOKEN not found.")
29
  st.stop()
30
 
 
31
  llm = ChatGoogleGenerativeAI(
32
  model="gemini-1.5-pro",
33
  google_api_key=GOOGLE_API_KEY
@@ -38,6 +40,7 @@ embeddings = HuggingFaceInferenceAPIEmbeddings(
38
  model_name="BAAI/bge-small-en-v1.5"
39
  )
40
 
 
41
  class KeyPoint(BaseModel):
42
  point: str = Field(description="A key point extracted from the document.")
43
 
@@ -48,8 +51,10 @@ class DocumentAnalysis(BaseModel):
48
  key_points: List[KeyPoint] = Field(description="List of key points from the document.")
49
  summary: Summary = Field(description="Summary of the document.")
50
 
 
51
  parser = PydanticOutputParser(pydantic_object=DocumentAnalysis)
52
 
 
53
  prompt_template = """
54
  Analyze the following text and extract key points and a summary.
55
  {format_instructions}
@@ -61,15 +66,19 @@ prompt = PromptTemplate(
61
  partial_variables={"format_instructions": parser.get_format_instructions()}
62
  )
63
 
 
64
  chain = LLMChain(llm=llm, prompt=prompt, output_parser=parser)
65
 
 
66
  def analyze_text_structured(text):
67
  return chain.run(text=text)
68
 
 
69
  def extract_text_from_pdf(pdf_file):
70
  pdf_reader = PyPDF2.PdfReader(pdf_file)
71
  return "".join(page.extract_text() for page in pdf_reader.pages)
72
 
 
73
  def json_to_text(analysis):
74
  text_output = "=== Summary ===\n" + f"{analysis.summary.summary}\n\n"
75
  text_output += "=== Key Points ===\n"
@@ -77,6 +86,7 @@ def json_to_text(analysis):
77
  text_output += f"{i}. {key_point.point}\n"
78
  return text_output
79
 
 
80
  def create_pdf_report(analysis):
81
  pdf = FPDF()
82
  pdf.add_page()
@@ -84,22 +94,31 @@ def create_pdf_report(analysis):
84
  pdf.cell(200, 10, txt="PDF Analysis Report", ln=True, align='C')
85
  pdf.cell(200, 10, txt=f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", ln=True, align='C')
86
  pdf.multi_cell(0, 10, txt=json_to_text(analysis))
87
- pdf_output = pdf.output(dest='S')
88
- return pdf_output.encode('latin-1') if isinstance(pdf_output, str) else pdf_output
 
 
 
 
89
 
 
90
  def create_word_report(analysis):
91
  doc = Document()
92
  doc.add_heading('PDF Analysis Report', 0)
93
  doc.add_paragraph(f'Generated on: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}')
94
  doc.add_heading('Analysis', level=1)
95
  doc.add_paragraph(json_to_text(analysis))
 
 
96
  docx_bytes = io.BytesIO()
97
  doc.save(docx_bytes)
98
  docx_bytes.seek(0)
99
  return docx_bytes.getvalue()
100
 
 
101
  st.set_page_config(page_title="Chat With PDF", page_icon="πŸ“„")
102
 
 
103
  def local_css():
104
  st.markdown("""
105
  <style>
@@ -184,6 +203,7 @@ def local_css():
184
 
185
  local_css()
186
 
 
187
  if "current_file" not in st.session_state:
188
  st.session_state.current_file = None
189
  if "pdf_summary" not in st.session_state:
@@ -199,12 +219,14 @@ if "vectorstore" not in st.session_state:
199
  if "messages" not in st.session_state:
200
  st.session_state.messages = []
201
 
 
202
  st.markdown('<div class="main-header">', unsafe_allow_html=True)
203
  st.markdown('<div class="flag-stripe"></div>', unsafe_allow_html=True)
204
  st.title("πŸ“„ Chat With PDF")
205
  st.caption("Your AI-powered Document Analyzer")
206
  st.markdown('</div>', unsafe_allow_html=True)
207
 
 
208
  with st.container():
209
  st.markdown('<div class="card animate-fadeIn">', unsafe_allow_html=True)
210
  uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
@@ -254,6 +276,7 @@ with st.container():
254
  )
255
  st.markdown('</div>', unsafe_allow_html=True)
256
 
 
257
  if "vectorstore" in st.session_state and st.session_state.vectorstore is not None:
258
  st.subheader("Chat with the Document")
259
 
@@ -282,6 +305,7 @@ if "vectorstore" in st.session_state and st.session_state.vectorstore is not Non
282
 
283
  st.session_state.messages.append({"role": "assistant", "content": response.content})
284
 
 
285
  st.markdown(
286
  f'<div class="footer">Analysis Time: {st.session_state.analysis_time:.1f}s | Powered by Google Generative AI</div>',
287
  unsafe_allow_html=True
 
17
  from langchain_community.vectorstores import FAISS
18
  from langchain_text_splitters import RecursiveCharacterTextSplitter
19
 
20
+ # Environment Variable Checks
21
  GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
22
  HUGGINGFACE_ACCESS_TOKEN = os.environ.get("HUGGINGFACE_ACCESS_TOKEN")
23
 
 
29
  st.error("❌ HUGGINGFACE_ACCESS_TOKEN not found.")
30
  st.stop()
31
 
32
+ # Initialize LLM and Embeddings
33
  llm = ChatGoogleGenerativeAI(
34
  model="gemini-1.5-pro",
35
  google_api_key=GOOGLE_API_KEY
 
40
  model_name="BAAI/bge-small-en-v1.5"
41
  )
42
 
43
+ # Pydantic Models for Structured Output
44
  class KeyPoint(BaseModel):
45
  point: str = Field(description="A key point extracted from the document.")
46
 
 
51
  key_points: List[KeyPoint] = Field(description="List of key points from the document.")
52
  summary: Summary = Field(description="Summary of the document.")
53
 
54
+ # Output Parser
55
  parser = PydanticOutputParser(pydantic_object=DocumentAnalysis)
56
 
57
+ # Prompt Template
58
  prompt_template = """
59
  Analyze the following text and extract key points and a summary.
60
  {format_instructions}
 
66
  partial_variables={"format_instructions": parser.get_format_instructions()}
67
  )
68
 
69
+ # LLM Chain
70
  chain = LLMChain(llm=llm, prompt=prompt, output_parser=parser)
71
 
72
+ # Text Analysis Function
73
  def analyze_text_structured(text):
74
  return chain.run(text=text)
75
 
76
+ # PDF Text Extraction
77
  def extract_text_from_pdf(pdf_file):
78
  pdf_reader = PyPDF2.PdfReader(pdf_file)
79
  return "".join(page.extract_text() for page in pdf_reader.pages)
80
 
81
+ # JSON to Readable Text
82
  def json_to_text(analysis):
83
  text_output = "=== Summary ===\n" + f"{analysis.summary.summary}\n\n"
84
  text_output += "=== Key Points ===\n"
 
86
  text_output += f"{i}. {key_point.point}\n"
87
  return text_output
88
 
89
+ # PDF Report Generation (Updated)
90
  def create_pdf_report(analysis):
91
  pdf = FPDF()
92
  pdf.add_page()
 
94
  pdf.cell(200, 10, txt="PDF Analysis Report", ln=True, align='C')
95
  pdf.cell(200, 10, txt=f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", ln=True, align='C')
96
  pdf.multi_cell(0, 10, txt=json_to_text(analysis))
97
+
98
+ # Use BytesIO to create a bytes-like object
99
+ pdf_bytes = io.BytesIO()
100
+ pdf.output(pdf_bytes, dest='S')
101
+ pdf_bytes.seek(0)
102
+ return pdf_bytes.getvalue()
103
 
104
+ # Word Report Generation (Updated)
105
  def create_word_report(analysis):
106
  doc = Document()
107
  doc.add_heading('PDF Analysis Report', 0)
108
  doc.add_paragraph(f'Generated on: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}')
109
  doc.add_heading('Analysis', level=1)
110
  doc.add_paragraph(json_to_text(analysis))
111
+
112
+ # Use BytesIO to create a bytes-like object
113
  docx_bytes = io.BytesIO()
114
  doc.save(docx_bytes)
115
  docx_bytes.seek(0)
116
  return docx_bytes.getvalue()
117
 
118
+ # Streamlit Page Configuration
119
  st.set_page_config(page_title="Chat With PDF", page_icon="πŸ“„")
120
 
121
+ # Custom CSS
122
  def local_css():
123
  st.markdown("""
124
  <style>
 
203
 
204
  local_css()
205
 
206
+ # Session State Initialization
207
  if "current_file" not in st.session_state:
208
  st.session_state.current_file = None
209
  if "pdf_summary" not in st.session_state:
 
219
  if "messages" not in st.session_state:
220
  st.session_state.messages = []
221
 
222
+ # Main App Layout
223
  st.markdown('<div class="main-header">', unsafe_allow_html=True)
224
  st.markdown('<div class="flag-stripe"></div>', unsafe_allow_html=True)
225
  st.title("πŸ“„ Chat With PDF")
226
  st.caption("Your AI-powered Document Analyzer")
227
  st.markdown('</div>', unsafe_allow_html=True)
228
 
229
+ # PDF Upload and Analysis Section
230
  with st.container():
231
  st.markdown('<div class="card animate-fadeIn">', unsafe_allow_html=True)
232
  uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
 
276
  )
277
  st.markdown('</div>', unsafe_allow_html=True)
278
 
279
+ # Document Chat Section
280
  if "vectorstore" in st.session_state and st.session_state.vectorstore is not None:
281
  st.subheader("Chat with the Document")
282
 
 
305
 
306
  st.session_state.messages.append({"role": "assistant", "content": response.content})
307
 
308
+ # Footer
309
  st.markdown(
310
  f'<div class="footer">Analysis Time: {st.session_state.analysis_time:.1f}s | Powered by Google Generative AI</div>',
311
  unsafe_allow_html=True