pradeepsengarr commited on
Commit
83e3bd0
·
verified ·
1 Parent(s): 52807fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -27
app.py CHANGED
@@ -440,8 +440,8 @@ persist_directory = "db"
440
  uploaded_files_dir = "uploaded_files"
441
 
442
  # Streamlit app configuration
443
- st.set_page_config(page_title="Audit Assistant", layout="wide")
444
- st.title("Audit Assistant")
445
 
446
  # Load the model
447
  checkpoint = "MBZUAI/LaMini-T5-738M"
@@ -451,7 +451,7 @@ base_model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
451
  # Helper Functions
452
 
453
  def extract_text_from_pdf(file_path):
454
- """Extract text from a PDF using PyMuPDF (fitz)."""
455
  try:
456
  doc = fitz.open(file_path)
457
  text = ""
@@ -577,10 +577,8 @@ def process_answer(user_question):
577
  qa = qa_llm()
578
 
579
  tailored_prompt = f"""
580
- You are an expert chatbot designed to assist the user in the field of audits or any topic the user wants.
581
- Your goal is to provide accurate and comprehensive answers to any questions related to audit policies, procedures,
582
- and accounting standards based on the provided PDF documents.
583
- Please respond effectively and refer to the relevant standards and policies whenever applicable.
584
  User question: {user_question}
585
  """
586
 
@@ -602,34 +600,43 @@ st.sidebar.header("File Upload")
602
  uploaded_files = st.sidebar.file_uploader("Upload your PDF files", type=["pdf"], accept_multiple_files=True)
603
 
604
  if uploaded_files:
605
- # Save uploaded files
606
  if not os.path.exists(uploaded_files_dir):
607
  os.makedirs(uploaded_files_dir)
608
 
609
- uploaded_file_names = []
610
  for uploaded_file in uploaded_files:
611
  file_path = os.path.join(uploaded_files_dir, uploaded_file.name)
612
  with open(file_path, "wb") as f:
613
  f.write(uploaded_file.getbuffer())
614
-
615
- uploaded_file_names.append(uploaded_file.name)
616
 
617
  st.sidebar.success(f"Uploaded {len(uploaded_files)} file(s) successfully!")
618
 
619
- # Show uploaded PDFs
620
- st.header("Uploaded PDF Files")
621
- for filename in uploaded_file_names:
622
- st.write(f"- {filename}")
623
- # Suggestion buttons to generate summary
624
- if st.button(f"Generate summary of {filename}"):
625
- # Generate summary (you can customize this to use LLM or simple summarization)
626
- text = extract_text_from_pdf(os.path.join(uploaded_files_dir, filename))
627
- if text:
628
- summary = text[:1000] # Taking first 1000 chars as a simple summary (use LLM or other methods for better summaries)
629
- st.write(f"Summary of {filename}:\n\n{summary}")
630
- else:
631
- st.write(f"Could not extract text from {filename}.")
632
-
 
 
 
 
 
 
 
 
 
 
 
 
633
  # Run data ingestion when files are uploaded
634
  data_ingestion()
635
 
@@ -643,5 +650,3 @@ if uploaded_files:
643
 
644
  else:
645
  st.sidebar.info("Upload PDF files to get started!")
646
-
647
-
 
440
  uploaded_files_dir = "uploaded_files"
441
 
442
  # Streamlit app configuration
443
+ st.set_page_config(page_title="RAG-based Chatbot", layout="wide")
444
+ st.title("RAG-based Chatbot")
445
 
446
  # Load the model
447
  checkpoint = "MBZUAI/LaMini-T5-738M"
 
451
  # Helper Functions
452
 
453
  def extract_text_from_pdf(file_path):
454
+ """Extract full text from a PDF using PyMuPDF (fitz)."""
455
  try:
456
  doc = fitz.open(file_path)
457
  text = ""
 
577
  qa = qa_llm()
578
 
579
  tailored_prompt = f"""
580
+ You are an expert chatbot designed to assist with any topic, providing accurate and detailed answers based on the provided PDFs.
581
+ Your goal is to deliver the most relevant information and resources based on the question asked.
 
 
582
  User question: {user_question}
583
  """
584
 
 
600
  uploaded_files = st.sidebar.file_uploader("Upload your PDF files", type=["pdf"], accept_multiple_files=True)
601
 
602
  if uploaded_files:
603
+ # Save uploaded files and extract their text
604
  if not os.path.exists(uploaded_files_dir):
605
  os.makedirs(uploaded_files_dir)
606
 
 
607
  for uploaded_file in uploaded_files:
608
  file_path = os.path.join(uploaded_files_dir, uploaded_file.name)
609
  with open(file_path, "wb") as f:
610
  f.write(uploaded_file.getbuffer())
 
 
611
 
612
  st.sidebar.success(f"Uploaded {len(uploaded_files)} file(s) successfully!")
613
 
614
+ # Show the uploaded files' names
615
+ st.subheader("Uploaded PDF(s):")
616
+ for uploaded_file in uploaded_files:
617
+ st.write(uploaded_file.name)
618
+ # Display PDF preview link if possible
619
+ with open(file_path, "rb") as f:
620
+ file_bytes = f.read()
621
+ st.download_button(
622
+ label="Download PDF",
623
+ data=file_bytes,
624
+ file_name=uploaded_file.name,
625
+ mime="application/pdf",
626
+ )
627
+
628
+ # Extract and display the full text from the PDF
629
+ st.subheader("Full Text from the PDF:")
630
+ full_text = extract_text_from_pdf(file_path)
631
+ if full_text:
632
+ st.text_area("PDF Text", full_text, height=300)
633
+ else:
634
+ st.warning("Failed to extract text from this PDF.")
635
+
636
+ # Generate summary option
637
+ if st.button("Generate Summary of Document"):
638
+ st.write("Summary: [Provide the generated summary here]")
639
+
640
  # Run data ingestion when files are uploaded
641
  data_ingestion()
642
 
 
650
 
651
  else:
652
  st.sidebar.info("Upload PDF files to get started!")