Spaces:

pradeepsengarr
/

Bot_RAG

Sleeping

App Files Files Community

pradeepsengarr commited on Apr 15

Commit

83e3bd0

verified ·

1 Parent(s): 52807fc

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -27

app.py CHANGED Viewed

@@ -440,8 +440,8 @@ persist_directory = "db"
 uploaded_files_dir = "uploaded_files"
 # Streamlit app configuration
-st.set_page_config(page_title="Audit Assistant", layout="wide")
-st.title("Audit Assistant")
 # Load the model
 checkpoint = "MBZUAI/LaMini-T5-738M"
@@ -451,7 +451,7 @@ base_model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
 # Helper Functions
 def extract_text_from_pdf(file_path):
-    """Extract text from a PDF using PyMuPDF (fitz)."""
     try:
         doc = fitz.open(file_path)
         text = ""
@@ -577,10 +577,8 @@ def process_answer(user_question):
         qa = qa_llm()
         tailored_prompt = f"""
-        You are an expert chatbot designed to assist the user in the field of audits or any topic the user wants.
-        Your goal is to provide accurate and comprehensive answers to any questions related to audit policies, procedures,
-        and accounting standards based on the provided PDF documents.
-        Please respond effectively and refer to the relevant standards and policies whenever applicable.
         User question: {user_question}
         """
@@ -602,34 +600,43 @@ st.sidebar.header("File Upload")
 uploaded_files = st.sidebar.file_uploader("Upload your PDF files", type=["pdf"], accept_multiple_files=True)
 if uploaded_files:
-    # Save uploaded files
     if not os.path.exists(uploaded_files_dir):
         os.makedirs(uploaded_files_dir)
-    uploaded_file_names = []
     for uploaded_file in uploaded_files:
         file_path = os.path.join(uploaded_files_dir, uploaded_file.name)
         with open(file_path, "wb") as f:
             f.write(uploaded_file.getbuffer())
-        uploaded_file_names.append(uploaded_file.name)
     st.sidebar.success(f"Uploaded {len(uploaded_files)} file(s) successfully!")
-    # Show uploaded PDFs
-    st.header("Uploaded PDF Files")
-    for filename in uploaded_file_names:
-        st.write(f"- {filename}")
-        # Suggestion buttons to generate summary
-        if st.button(f"Generate summary of {filename}"):
-            # Generate summary (you can customize this to use LLM or simple summarization)
-            text = extract_text_from_pdf(os.path.join(uploaded_files_dir, filename))
-            if text:
-                summary = text[:1000]  # Taking first 1000 chars as a simple summary (use LLM or other methods for better summaries)
-                st.write(f"Summary of {filename}:\n\n{summary}")
-            else:
-                st.write(f"Could not extract text from {filename}.")
     # Run data ingestion when files are uploaded
     data_ingestion()
@@ -643,5 +650,3 @@ if uploaded_files:
 else:
     st.sidebar.info("Upload PDF files to get started!")

 uploaded_files_dir = "uploaded_files"
 # Streamlit app configuration
+st.set_page_config(page_title="RAG-based Chatbot", layout="wide")
+st.title("RAG-based Chatbot")
 # Load the model
 checkpoint = "MBZUAI/LaMini-T5-738M"
 # Helper Functions
 def extract_text_from_pdf(file_path):
+    """Extract full text from a PDF using PyMuPDF (fitz)."""
     try:
         doc = fitz.open(file_path)
         text = ""
         qa = qa_llm()
         tailored_prompt = f"""
+        You are an expert chatbot designed to assist with any topic, providing accurate and detailed answers based on the provided PDFs.
+        Your goal is to deliver the most relevant information and resources based on the question asked.
         User question: {user_question}
         """
 uploaded_files = st.sidebar.file_uploader("Upload your PDF files", type=["pdf"], accept_multiple_files=True)
 if uploaded_files:
+    # Save uploaded files and extract their text
     if not os.path.exists(uploaded_files_dir):
         os.makedirs(uploaded_files_dir)
     for uploaded_file in uploaded_files:
         file_path = os.path.join(uploaded_files_dir, uploaded_file.name)
         with open(file_path, "wb") as f:
             f.write(uploaded_file.getbuffer())
     st.sidebar.success(f"Uploaded {len(uploaded_files)} file(s) successfully!")
+    # Show the uploaded files' names
+    st.subheader("Uploaded PDF(s):")
+    for uploaded_file in uploaded_files:
+        st.write(uploaded_file.name)
+        # Display PDF preview link if possible
+        with open(file_path, "rb") as f:
+            file_bytes = f.read()
+            st.download_button(
+                label="Download PDF",
+                data=file_bytes,
+                file_name=uploaded_file.name,
+                mime="application/pdf",
+            )
+        # Extract and display the full text from the PDF
+        st.subheader("Full Text from the PDF:")
+        full_text = extract_text_from_pdf(file_path)
+        if full_text:
+            st.text_area("PDF Text", full_text, height=300)
+        else:
+            st.warning("Failed to extract text from this PDF.")
+    # Generate summary option
+    if st.button("Generate Summary of Document"):
+        st.write("Summary: [Provide the generated summary here]")
     # Run data ingestion when files are uploaded
     data_ingestion()
 else:
     st.sidebar.info("Upload PDF files to get started!")