pradeepsengarr commited on
Commit
52807fc
·
verified ·
1 Parent(s): 6956d92

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +302 -75
app.py CHANGED
@@ -208,53 +208,263 @@
208
  # else:
209
  # st.sidebar.info("Upload PDF files to get started!")
210
 
211
- # -------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  import os
213
  import logging
214
  import math
215
  import streamlit as st
216
  import fitz # PyMuPDF
217
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
218
- # from langchain_community.document_loaders import PDFMinerLoader
219
- from langchain_community.document_loaders import PyMuPDFLoader
220
  from langchain.text_splitter import RecursiveCharacterTextSplitter
221
  from langchain_community.embeddings import SentenceTransformerEmbeddings
222
  from langchain_community.vectorstores import Chroma
223
  from langchain_community.llms import HuggingFacePipeline
224
  from langchain.chains import RetrievalQA
225
 
226
- # Configuration
 
 
 
227
  device = 'cpu'
228
  persist_directory = "db"
229
  uploaded_files_dir = "uploaded_files"
230
 
231
- # Setup logging
232
- logging.basicConfig(level=logging.INFO)
233
-
234
- # Streamlit Page Setup
235
- st.set_page_config(page_title="RAG Chatbot", layout="wide")
236
- st.title("📚 RAG-based PDF Assistant")
237
 
238
- # Load LLM model
239
  checkpoint = "MBZUAI/LaMini-T5-738M"
240
  tokenizer = AutoTokenizer.from_pretrained(checkpoint)
241
  base_model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
242
 
243
- # ---------------- HELPER FUNCTIONS ---------------- #
244
 
245
- def extract_outline_from_pdf(path):
 
246
  try:
247
- doc = fitz.open(path)
248
- outline_text = ""
249
- for page_num in range(len(doc)):
250
- page = doc[page_num]
251
- outline_text += f"### Page {page_num+1}:\n{page.get_text('text')[:500]}\n---\n"
252
- return outline_text if outline_text else "No preview available."
253
  except Exception as e:
254
- return f"Could not preview PDF: {e}"
 
255
 
256
  def data_ingestion():
257
- """Load PDFs, validate content, and generate embeddings."""
258
  try:
259
  logging.info("Starting data ingestion")
260
 
@@ -268,21 +478,18 @@ def data_ingestion():
268
  logging.info(f"Processing file: {file_path}")
269
 
270
  try:
271
- loader = PyMuPDFLoader(file_path)
272
  loaded_docs = loader.load()
273
-
274
- # Check if any content exists in loaded_docs
275
- if not loaded_docs or len(loaded_docs[0].page_content.strip()) == 0:
276
- logging.warning(f"No readable text found in {file_path}. Might be a scanned image or unsupported format.")
277
  continue
278
-
279
  for doc in loaded_docs:
280
  if hasattr(doc, 'page_content') and len(doc.page_content.strip()) > 0:
281
  documents.append(doc)
282
  else:
283
  logging.warning(f"Skipping invalid document structure in {file_path}")
284
-
285
- except Exception as e:
286
  logging.error(f"Skipping {file_path}: {str(e)}")
287
  continue
288
 
@@ -304,6 +511,7 @@ def data_ingestion():
304
 
305
  embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
306
 
 
307
  MAX_BATCH_SIZE = 5461
308
  total_batches = math.ceil(len(texts) / MAX_BATCH_SIZE)
309
 
@@ -324,13 +532,14 @@ def data_ingestion():
324
 
325
  db.persist()
326
  logging.info("Data ingestion completed successfully")
327
-
328
  except Exception as e:
329
  logging.error(f"Error during data ingestion: {str(e)}")
330
  raise
331
 
332
-
333
  def llm_pipeline():
 
 
334
  pipe = pipeline(
335
  'text2text-generation',
336
  model=base_model,
@@ -341,80 +550,98 @@ def llm_pipeline():
341
  top_p=0.95,
342
  device=device
343
  )
344
- return HuggingFacePipeline(pipeline=pipe)
 
 
345
 
346
  def qa_llm():
 
 
347
  llm = llm_pipeline()
348
  embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
349
  db = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
350
- retriever = db.as_retriever()
351
- return RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True)
 
 
 
 
 
 
 
352
 
353
  def process_answer(user_question):
354
- """Generate an answer to the user’s question using a general RAG-based prompt."""
355
  try:
356
  logging.info("Processing user question")
357
- qa = qa_llm() # Set up the retrieval-based QA chain
358
 
359
- # Generalized, flexible prompt for any kind of PDF (resume, legal doc, etc.)
360
  tailored_prompt = f"""
361
- You are an intelligent and helpful AI assistant that provides answers strictly based on the provided document contents.
362
- If the question cannot be answered using the documents, say: 'The document does not contain this information.'
363
- Otherwise, respond clearly and concisely with relevant and factual details from the PDF.
364
-
365
- Question: {user_question}
366
- """
367
 
368
  generated_text = qa({"query": tailored_prompt})
369
  answer = generated_text['result']
370
 
371
- # Add a safeguard for uncertain or hallucinated answers
372
- if "not provide" in answer.lower() or "no information" in answer.lower() or len(answer.strip()) < 10:
373
- return "The document does not contain this information."
374
 
375
  logging.info("Answer generated successfully")
376
  return answer
377
 
378
  except Exception as e:
379
  logging.error(f"Error during answer generation: {str(e)}")
380
- return "Sorry, something went wrong while processing your question."
381
-
382
 
383
- # ---------------- STREAMLIT UI ---------------- #
384
-
385
- # Sidebar Upload
386
- st.sidebar.header("📤 Upload PDF Files")
387
- uploaded_files = st.sidebar.file_uploader("Select one or more PDF files", type="pdf", accept_multiple_files=True)
388
 
389
  if uploaded_files:
 
390
  if not os.path.exists(uploaded_files_dir):
391
  os.makedirs(uploaded_files_dir)
392
 
393
- for file in uploaded_files:
394
- path = os.path.join(uploaded_files_dir, file.name)
395
- with open(path, "wb") as f:
396
- f.write(file.getbuffer())
397
-
398
- st.sidebar.success(f"{len(uploaded_files)} file(s) uploaded.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399
 
400
- # Display previews
401
- st.subheader("📄 Uploaded PDF Previews")
402
- for file in uploaded_files:
403
- with st.expander(file.name):
404
- st.text(extract_outline_from_pdf(os.path.join(uploaded_files_dir, file.name)))
405
 
406
- # Trigger ingestion
407
- with st.spinner("🔄 Ingesting uploaded documents..."):
408
- data_ingestion()
409
 
410
- # Ask a question
411
- st.header("❓ Ask a Question from Your Documents")
412
- user_input = st.text_input("Enter your question:")
413
- if user_input:
414
- with st.spinner("💬 Generating response..."):
415
- response = process_answer(user_input)
416
- st.success(response)
417
 
418
  else:
419
- st.sidebar.info("Upload PDFs to begin your QA journey.")
 
420
 
 
208
  # else:
209
  # st.sidebar.info("Upload PDF files to get started!")
210
 
211
+ # # -------this is the second code!!!
212
+ # import os
213
+ # import logging
214
+ # import math
215
+ # import streamlit as st
216
+ # import fitz # PyMuPDF
217
+ # from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
218
+ # # from langchain_community.document_loaders import PDFMinerLoader
219
+ # from langchain_community.document_loaders import PyMuPDFLoader
220
+ # from langchain.text_splitter import RecursiveCharacterTextSplitter
221
+ # from langchain_community.embeddings import SentenceTransformerEmbeddings
222
+ # from langchain_community.vectorstores import Chroma
223
+ # from langchain_community.llms import HuggingFacePipeline
224
+ # from langchain.chains import RetrievalQA
225
+
226
+ # device = 'cpu'
227
+ # persist_directory = "db"
228
+ # uploaded_files_dir = "uploaded_files"
229
+
230
+
231
+ # logging.basicConfig(level=logging.INFO)
232
+
233
+ # # for main Page Setup
234
+ # st.set_page_config(page_title="RAG Chatbot", layout="wide")
235
+ # st.title("📚 RAG-based PDF Assistant")
236
+
237
+ # # Load my model
238
+ # checkpoint = "MBZUAI/LaMini-T5-738M"
239
+ # tokenizer = AutoTokenizer.from_pretrained(checkpoint)
240
+ # base_model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
241
+
242
+ # # ------------------------------- #
243
+
244
+ # def extract_outline_from_pdf(path):
245
+ # try:
246
+ # doc = fitz.open(path)
247
+ # outline_text = ""
248
+ # for page_num in range(len(doc)):
249
+ # page = doc[page_num]
250
+ # outline_text += f"### Page {page_num+1}:\n{page.get_text('text')[:500]}\n---\n"
251
+ # return outline_text if outline_text else "No preview available."
252
+ # except Exception as e:
253
+ # return f"Could not preview PDF: {e}"
254
+
255
+ # def data_ingestion():
256
+ # """Load PDFs, validate content, and generate embeddings."""
257
+ # try:
258
+ # logging.info("Starting data ingestion")
259
+
260
+ # if not os.path.exists(uploaded_files_dir):
261
+ # os.makedirs(uploaded_files_dir)
262
+
263
+ # documents = []
264
+ # for filename in os.listdir(uploaded_files_dir):
265
+ # if filename.endswith(".pdf"):
266
+ # file_path = os.path.join(uploaded_files_dir, filename)
267
+ # logging.info(f"Processing file: {file_path}")
268
+
269
+ # try:
270
+ # loader = PyMuPDFLoader(file_path)
271
+ # loaded_docs = loader.load()
272
+
273
+ # # Check if any content exists in loaded_docs
274
+ # if not loaded_docs or len(loaded_docs[0].page_content.strip()) == 0:
275
+ # logging.warning(f"No readable text found in {file_path}. Might be a scanned image or unsupported format.")
276
+ # continue
277
+
278
+ # for doc in loaded_docs:
279
+ # if hasattr(doc, 'page_content') and len(doc.page_content.strip()) > 0:
280
+ # documents.append(doc)
281
+ # else:
282
+ # logging.warning(f"Skipping invalid document structure in {file_path}")
283
+
284
+ # except Exception as e:
285
+ # logging.error(f"Skipping {file_path}: {str(e)}")
286
+ # continue
287
+
288
+ # if not documents:
289
+ # logging.error("No valid documents found to process.")
290
+ # return
291
+
292
+ # logging.info(f"Total valid documents: {len(documents)}")
293
+
294
+ # # Proceed with splitting and embedding documents
295
+ # text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
296
+ # texts = text_splitter.split_documents(documents)
297
+
298
+ # logging.info(f"Total text chunks created: {len(texts)}")
299
+
300
+ # if not texts:
301
+ # logging.error("No valid text chunks to create embeddings.")
302
+ # return
303
+
304
+ # embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
305
+
306
+ # MAX_BATCH_SIZE = 5461
307
+ # total_batches = math.ceil(len(texts) / MAX_BATCH_SIZE)
308
+
309
+ # logging.info(f"Processing {len(texts)} text chunks in {total_batches} batches...")
310
+
311
+ # db = None
312
+ # for i in range(total_batches):
313
+ # batch_start = i * MAX_BATCH_SIZE
314
+ # batch_end = min((i + 1) * MAX_BATCH_SIZE, len(texts))
315
+ # text_batch = texts[batch_start:batch_end]
316
+
317
+ # logging.info(f"Processing batch {i + 1}/{total_batches}, size: {len(text_batch)}")
318
+
319
+ # if db is None:
320
+ # db = Chroma.from_documents(text_batch, embeddings, persist_directory=persist_directory)
321
+ # else:
322
+ # db.add_documents(text_batch)
323
+
324
+ # db.persist()
325
+ # logging.info("Data ingestion completed successfully")
326
+
327
+ # except Exception as e:
328
+ # logging.error(f"Error during data ingestion: {str(e)}")
329
+ # raise
330
+
331
+
332
+ # def llm_pipeline():
333
+ # pipe = pipeline(
334
+ # 'text2text-generation',
335
+ # model=base_model,
336
+ # tokenizer=tokenizer,
337
+ # max_length=256,
338
+ # do_sample=True,
339
+ # temperature=0.3,
340
+ # top_p=0.95,
341
+ # device=device
342
+ # )
343
+ # return HuggingFacePipeline(pipeline=pipe)
344
+
345
+ # def qa_llm():
346
+ # llm = llm_pipeline()
347
+ # embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
348
+ # db = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
349
+ # retriever = db.as_retriever()
350
+ # return RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True)
351
+
352
+ # def process_answer(user_question):
353
+ # """Generate an answer to the user’s question using a general RAG-based prompt."""
354
+ # try:
355
+ # logging.info("Processing user question")
356
+ # qa = qa_llm() # Set up the retrieval-based QA chain
357
+
358
+ # # Generalized, flexible prompt for any kind of PDF (resume, legal doc, etc.)
359
+ # tailored_prompt = f"""
360
+ # You are an intelligent and helpful AI assistant that provides answers strictly based on the provided document contents.
361
+ # If the question cannot be answered using the documents, say: 'The document does not contain this information.'
362
+ # Otherwise, respond clearly and concisely with relevant and factual details from the PDF.
363
+
364
+ # Question: {user_question}
365
+ # """
366
+
367
+ # generated_text = qa({"query": tailored_prompt})
368
+ # answer = generated_text['result']
369
+
370
+ # # Add a safeguard for hallucinated answers
371
+ # if "not provide" in answer.lower() or "no information" in answer.lower() or len(answer.strip()) < 10:
372
+ # return "The document does not contain this information."
373
+
374
+ # logging.info("Answer generated successfully")
375
+ # return answer
376
+
377
+ # except Exception as e:
378
+ # logging.error(f"Error during answer generation: {str(e)}")
379
+ # return "Sorry, something went wrong while processing your question."
380
+
381
+
382
+ # # ---------------- STREAMLIT UI ---------------- #
383
+
384
+ # # Sidebar Upload
385
+ # st.sidebar.header("📤 Upload PDF Files")
386
+ # uploaded_files = st.sidebar.file_uploader("Select one or more PDF files", type="pdf", accept_multiple_files=True)
387
+
388
+ # if uploaded_files:
389
+ # if not os.path.exists(uploaded_files_dir):
390
+ # os.makedirs(uploaded_files_dir)
391
+
392
+ # for file in uploaded_files:
393
+ # path = os.path.join(uploaded_files_dir, file.name)
394
+ # with open(path, "wb") as f:
395
+ # f.write(file.getbuffer())
396
+
397
+ # st.sidebar.success(f"{len(uploaded_files)} file(s) uploaded.")
398
+
399
+ # # Display previews
400
+ # st.subheader("📄 Uploaded PDF Previews")
401
+ # for file in uploaded_files:
402
+ # with st.expander(file.name):
403
+ # st.text(extract_outline_from_pdf(os.path.join(uploaded_files_dir, file.name)))
404
+
405
+ # # Trigger ingestion
406
+ # with st.spinner("🔄 Ingesting uploaded documents..."):
407
+ # data_ingestion()
408
+
409
+ # # Ask a question
410
+ # st.header("❓ Ask a Question from Your Documents")
411
+ # user_input = st.text_input("Enter your question:")
412
+ # if user_input:
413
+ # with st.spinner("💬 Generating response..."):
414
+ # response = process_answer(user_input)
415
+ # st.success(response)
416
+
417
+ # else:
418
+ # st.sidebar.info("Upload PDFs to begin your QA journey.")
419
+
420
+
421
  import os
422
  import logging
423
  import math
424
  import streamlit as st
425
  import fitz # PyMuPDF
426
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
427
+ from langchain_community.document_loaders import PDFMinerLoader
 
428
  from langchain.text_splitter import RecursiveCharacterTextSplitter
429
  from langchain_community.embeddings import SentenceTransformerEmbeddings
430
  from langchain_community.vectorstores import Chroma
431
  from langchain_community.llms import HuggingFacePipeline
432
  from langchain.chains import RetrievalQA
433
 
434
+ # Set up logging
435
+ logging.basicConfig(level=logging.INFO)
436
+
437
+ # Define global variables
438
  device = 'cpu'
439
  persist_directory = "db"
440
  uploaded_files_dir = "uploaded_files"
441
 
442
+ # Streamlit app configuration
443
+ st.set_page_config(page_title="Audit Assistant", layout="wide")
444
+ st.title("Audit Assistant")
 
 
 
445
 
446
+ # Load the model
447
  checkpoint = "MBZUAI/LaMini-T5-738M"
448
  tokenizer = AutoTokenizer.from_pretrained(checkpoint)
449
  base_model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
450
 
451
+ # Helper Functions
452
 
453
+ def extract_text_from_pdf(file_path):
454
+ """Extract text from a PDF using PyMuPDF (fitz)."""
455
  try:
456
+ doc = fitz.open(file_path)
457
+ text = ""
458
+ for page_num in range(doc.page_count):
459
+ page = doc.load_page(page_num)
460
+ text += page.get_text("text")
461
+ return text
462
  except Exception as e:
463
+ logging.error(f"Error reading PDF {file_path}: {e}")
464
+ return None
465
 
466
  def data_ingestion():
467
+ """Function to load PDFs and create embeddings with improved error handling and efficiency."""
468
  try:
469
  logging.info("Starting data ingestion")
470
 
 
478
  logging.info(f"Processing file: {file_path}")
479
 
480
  try:
481
+ loader = PDFMinerLoader(file_path)
482
  loaded_docs = loader.load()
483
+ if not loaded_docs:
484
+ logging.warning(f"Skipping file with missing or invalid metadata: {file_path}")
 
 
485
  continue
486
+
487
  for doc in loaded_docs:
488
  if hasattr(doc, 'page_content') and len(doc.page_content.strip()) > 0:
489
  documents.append(doc)
490
  else:
491
  logging.warning(f"Skipping invalid document structure in {file_path}")
492
+ except ValueError as e:
 
493
  logging.error(f"Skipping {file_path}: {str(e)}")
494
  continue
495
 
 
511
 
512
  embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
513
 
514
+ # Proceed to split and embed the documents
515
  MAX_BATCH_SIZE = 5461
516
  total_batches = math.ceil(len(texts) / MAX_BATCH_SIZE)
517
 
 
532
 
533
  db.persist()
534
  logging.info("Data ingestion completed successfully")
535
+
536
  except Exception as e:
537
  logging.error(f"Error during data ingestion: {str(e)}")
538
  raise
539
 
 
540
  def llm_pipeline():
541
+ """Set up the language model pipeline."""
542
+ logging.info("Setting up LLM pipeline")
543
  pipe = pipeline(
544
  'text2text-generation',
545
  model=base_model,
 
550
  top_p=0.95,
551
  device=device
552
  )
553
+ local_llm = HuggingFacePipeline(pipeline=pipe)
554
+ logging.info("LLM pipeline setup complete")
555
+ return local_llm
556
 
557
  def qa_llm():
558
+ """Set up the question-answering chain."""
559
+ logging.info("Setting up QA model")
560
  llm = llm_pipeline()
561
  embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
562
  db = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
563
+ retriever = db.as_retriever() # Set up the retriever for the vector store
564
+ qa = RetrievalQA.from_chain_type(
565
+ llm=llm,
566
+ chain_type="stuff",
567
+ retriever=retriever,
568
+ return_source_documents=True
569
+ )
570
+ logging.info("QA model setup complete")
571
+ return qa
572
 
573
  def process_answer(user_question):
574
+ """Generate an answer to the user’s question."""
575
  try:
576
  logging.info("Processing user question")
577
+ qa = qa_llm()
578
 
 
579
  tailored_prompt = f"""
580
+ You are an expert chatbot designed to assist the user in the field of audits or any topic the user wants.
581
+ Your goal is to provide accurate and comprehensive answers to any questions related to audit policies, procedures,
582
+ and accounting standards based on the provided PDF documents.
583
+ Please respond effectively and refer to the relevant standards and policies whenever applicable.
584
+ User question: {user_question}
585
+ """
586
 
587
  generated_text = qa({"query": tailored_prompt})
588
  answer = generated_text['result']
589
 
590
+ if "not provide" in answer or "no information" in answer:
591
+ return "The document does not provide sufficient information to answer your question."
 
592
 
593
  logging.info("Answer generated successfully")
594
  return answer
595
 
596
  except Exception as e:
597
  logging.error(f"Error during answer generation: {str(e)}")
598
+ return "Error processing the question."
 
599
 
600
+ # Streamlit UI Setup
601
+ st.sidebar.header("File Upload")
602
+ uploaded_files = st.sidebar.file_uploader("Upload your PDF files", type=["pdf"], accept_multiple_files=True)
 
 
603
 
604
  if uploaded_files:
605
+ # Save uploaded files
606
  if not os.path.exists(uploaded_files_dir):
607
  os.makedirs(uploaded_files_dir)
608
 
609
+ uploaded_file_names = []
610
+ for uploaded_file in uploaded_files:
611
+ file_path = os.path.join(uploaded_files_dir, uploaded_file.name)
612
+ with open(file_path, "wb") as f:
613
+ f.write(uploaded_file.getbuffer())
614
+
615
+ uploaded_file_names.append(uploaded_file.name)
616
+
617
+ st.sidebar.success(f"Uploaded {len(uploaded_files)} file(s) successfully!")
618
+
619
+ # Show uploaded PDFs
620
+ st.header("Uploaded PDF Files")
621
+ for filename in uploaded_file_names:
622
+ st.write(f"- {filename}")
623
+ # Suggestion buttons to generate summary
624
+ if st.button(f"Generate summary of {filename}"):
625
+ # Generate summary (you can customize this to use LLM or simple summarization)
626
+ text = extract_text_from_pdf(os.path.join(uploaded_files_dir, filename))
627
+ if text:
628
+ summary = text[:1000] # Taking first 1000 chars as a simple summary (use LLM or other methods for better summaries)
629
+ st.write(f"Summary of {filename}:\n\n{summary}")
630
+ else:
631
+ st.write(f"Could not extract text from {filename}.")
632
 
633
+ # Run data ingestion when files are uploaded
634
+ data_ingestion()
 
 
 
635
 
636
+ # Display UI for Q&A
637
+ st.header("Ask a Question")
638
+ user_question = st.text_input("Enter your question here:")
639
 
640
+ if user_question:
641
+ answer = process_answer(user_question)
642
+ st.write(answer)
 
 
 
 
643
 
644
  else:
645
+ st.sidebar.info("Upload PDF files to get started!")
646
+
647