Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -440,8 +440,8 @@ persist_directory = "db"
|
|
440 |
uploaded_files_dir = "uploaded_files"
|
441 |
|
442 |
# Streamlit app configuration
|
443 |
-
st.set_page_config(page_title="
|
444 |
-
st.title("
|
445 |
|
446 |
# Load the model
|
447 |
checkpoint = "MBZUAI/LaMini-T5-738M"
|
@@ -451,7 +451,7 @@ base_model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
|
|
451 |
# Helper Functions
|
452 |
|
453 |
def extract_text_from_pdf(file_path):
|
454 |
-
"""Extract text from a PDF using PyMuPDF (fitz)."""
|
455 |
try:
|
456 |
doc = fitz.open(file_path)
|
457 |
text = ""
|
@@ -577,10 +577,8 @@ def process_answer(user_question):
|
|
577 |
qa = qa_llm()
|
578 |
|
579 |
tailored_prompt = f"""
|
580 |
-
You are an expert chatbot designed to assist
|
581 |
-
Your goal is to
|
582 |
-
and accounting standards based on the provided PDF documents.
|
583 |
-
Please respond effectively and refer to the relevant standards and policies whenever applicable.
|
584 |
User question: {user_question}
|
585 |
"""
|
586 |
|
@@ -602,34 +600,43 @@ st.sidebar.header("File Upload")
|
|
602 |
uploaded_files = st.sidebar.file_uploader("Upload your PDF files", type=["pdf"], accept_multiple_files=True)
|
603 |
|
604 |
if uploaded_files:
|
605 |
-
# Save uploaded files
|
606 |
if not os.path.exists(uploaded_files_dir):
|
607 |
os.makedirs(uploaded_files_dir)
|
608 |
|
609 |
-
uploaded_file_names = []
|
610 |
for uploaded_file in uploaded_files:
|
611 |
file_path = os.path.join(uploaded_files_dir, uploaded_file.name)
|
612 |
with open(file_path, "wb") as f:
|
613 |
f.write(uploaded_file.getbuffer())
|
614 |
-
|
615 |
-
uploaded_file_names.append(uploaded_file.name)
|
616 |
|
617 |
st.sidebar.success(f"Uploaded {len(uploaded_files)} file(s) successfully!")
|
618 |
|
619 |
-
# Show uploaded
|
620 |
-
st.
|
621 |
-
for
|
622 |
-
st.write(
|
623 |
-
#
|
624 |
-
|
625 |
-
|
626 |
-
|
627 |
-
|
628 |
-
|
629 |
-
|
630 |
-
|
631 |
-
|
632 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
633 |
# Run data ingestion when files are uploaded
|
634 |
data_ingestion()
|
635 |
|
@@ -643,5 +650,3 @@ if uploaded_files:
|
|
643 |
|
644 |
else:
|
645 |
st.sidebar.info("Upload PDF files to get started!")
|
646 |
-
|
647 |
-
|
|
|
440 |
uploaded_files_dir = "uploaded_files"
|
441 |
|
442 |
# Streamlit app configuration
|
443 |
+
st.set_page_config(page_title="RAG-based Chatbot", layout="wide")
|
444 |
+
st.title("RAG-based Chatbot")
|
445 |
|
446 |
# Load the model
|
447 |
checkpoint = "MBZUAI/LaMini-T5-738M"
|
|
|
451 |
# Helper Functions
|
452 |
|
453 |
def extract_text_from_pdf(file_path):
|
454 |
+
"""Extract full text from a PDF using PyMuPDF (fitz)."""
|
455 |
try:
|
456 |
doc = fitz.open(file_path)
|
457 |
text = ""
|
|
|
577 |
qa = qa_llm()
|
578 |
|
579 |
tailored_prompt = f"""
|
580 |
+
You are an expert chatbot designed to assist with any topic, providing accurate and detailed answers based on the provided PDFs.
|
581 |
+
Your goal is to deliver the most relevant information and resources based on the question asked.
|
|
|
|
|
582 |
User question: {user_question}
|
583 |
"""
|
584 |
|
|
|
600 |
uploaded_files = st.sidebar.file_uploader("Upload your PDF files", type=["pdf"], accept_multiple_files=True)
|
601 |
|
602 |
if uploaded_files:
|
603 |
+
# Save uploaded files and extract their text
|
604 |
if not os.path.exists(uploaded_files_dir):
|
605 |
os.makedirs(uploaded_files_dir)
|
606 |
|
|
|
607 |
for uploaded_file in uploaded_files:
|
608 |
file_path = os.path.join(uploaded_files_dir, uploaded_file.name)
|
609 |
with open(file_path, "wb") as f:
|
610 |
f.write(uploaded_file.getbuffer())
|
|
|
|
|
611 |
|
612 |
st.sidebar.success(f"Uploaded {len(uploaded_files)} file(s) successfully!")
|
613 |
|
614 |
+
# Show the uploaded files' names
|
615 |
+
st.subheader("Uploaded PDF(s):")
|
616 |
+
for uploaded_file in uploaded_files:
|
617 |
+
st.write(uploaded_file.name)
|
618 |
+
# Display PDF preview link if possible
|
619 |
+
with open(file_path, "rb") as f:
|
620 |
+
file_bytes = f.read()
|
621 |
+
st.download_button(
|
622 |
+
label="Download PDF",
|
623 |
+
data=file_bytes,
|
624 |
+
file_name=uploaded_file.name,
|
625 |
+
mime="application/pdf",
|
626 |
+
)
|
627 |
+
|
628 |
+
# Extract and display the full text from the PDF
|
629 |
+
st.subheader("Full Text from the PDF:")
|
630 |
+
full_text = extract_text_from_pdf(file_path)
|
631 |
+
if full_text:
|
632 |
+
st.text_area("PDF Text", full_text, height=300)
|
633 |
+
else:
|
634 |
+
st.warning("Failed to extract text from this PDF.")
|
635 |
+
|
636 |
+
# Generate summary option
|
637 |
+
if st.button("Generate Summary of Document"):
|
638 |
+
st.write("Summary: [Provide the generated summary here]")
|
639 |
+
|
640 |
# Run data ingestion when files are uploaded
|
641 |
data_ingestion()
|
642 |
|
|
|
650 |
|
651 |
else:
|
652 |
st.sidebar.info("Upload PDF files to get started!")
|
|
|
|