Spaces:

ivyblossom
/

question-answering

Running

App Files Files Community

ivyblossom commited on Aug 9, 2023

Commit

4688ae4

1 Parent(s): 2de15d0

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -37

app.py CHANGED Viewed

@@ -1,17 +1,14 @@
 import streamlit as st
-from transformers import AutoModelForQuestionAnswering, AutoTokenizer, QuestionAnsweringPipeline
 from PyPDF2 import PdfReader
 # Function to perform question-answering
 def question_answering(questions, pdf_text):
-    # Load the model and tokenizer
-    model_name = "distilbert-base-cased-distilled-squad"
-    model = AutoModelForQuestionAnswering.from_pretrained(model_name)
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    # Create a QuestionAnsweringPipeline instance
-    question_answerer = QuestionAnsweringPipeline(model=model, tokenizer=tokenizer)
     answers = question_answerer(question=questions, context=pdf_text)
     return answers
@@ -19,41 +16,34 @@ def question_answering(questions, pdf_text):
 def main():
     st.title("Question Answering on PDF Files")
-    # Allow user to upload a single PDF file
-    uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
-    if not uploaded_file:
-        st.warning("Please upload a PDF file.")
-        return
-    st.subheader(f"Processing PDF file: {uploaded_file.name}")
-    if uploaded_file.size == 0:
-        st.error(f"Error: File '{uploaded_file.name}' is empty.")
-        return
-    with uploaded_file:
-        pdf_reader = PdfReader(uploaded_file)
         pdf_text = "\n".join([pdf_page.extract_text() for pdf_page in pdf_reader.pages])
-        # Get questions from the user (allow for multiple questions separated by newlines)
-        user_input = st.text_area("Enter your question(s) separated by newlines:")
-        questions = user_input.strip().split("\n")
-        if not questions:
-            st.warning("No questions entered.")
-            return
-        if st.button("Get Answers"):
-            # Perform question-answering
-            answers = question_answering(questions, pdf_text)
-            st.subheader("Questions and Answers:")
-            for i, (question, answer) in enumerate(zip(questions, answers)):
-                st.write(f"Question {i + 1}: '{question}'")
-                st.write("Answer:", answer['answer'])  # Access the answer directly
-                st.write(f"Score: {answer['score']:.2f}")  # Format the score to 2 decimal places
-                st.write("")  # Add a new line after each answer
 if __name__ == "__main__":
     main()

+import os
 import streamlit as st
+from transformers import pipeline
 from PyPDF2 import PdfReader
+import tempfile
 # Function to perform question-answering
+@st.cache_data(show_spinner=False)
 def question_answering(questions, pdf_text):
+    # Perform question-answering using Hugging Face's Transformers
+    question_answerer = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="distilbert-base-cased-distilled-squad")
     answers = question_answerer(question=questions, context=pdf_text)
     return answers
 def main():
     st.title("Question Answering on PDF Files")
+    uploaded_file = st.file_uploader("Upload a PDF file:", type=["pdf"])
+    st.write("Enter your question(s) below (separate multiple questions with new lines):")
+    question = st.text_area("Question(s)")
+    if st.button("Answer") and uploaded_file is not None:
+        pdf_path = os.path.join(tempfile.gettempdir(), uploaded_file.name)
+        with open(pdf_path, "wb") as f:
+            f.write(uploaded_file.read())
+        # Read PDF text once and cache it for batch processing
+        pdf_reader = PdfReader(pdf_path)
         pdf_text = "\n".join([pdf_page.extract_text() for pdf_page in pdf_reader.pages])
+        # Get a list of questions (assuming the user enters multiple questions separated by newlines)
+        questions = question.split('\n')
+        # Perform question-answering in batches
+        answers = question_answering(questions, pdf_text)
+        # Delete the uploaded file after processing
+        #os.remove(pdf_path)
+        st.write("Questions and Answers:")
+        for i, (question, answer) in enumerate(zip(questions, answers)):
+            st.write(f"Question {i + 1}: '{question}'")
+            st.write("Answer:", answer['answer'])
+            st.write("Score:", answer['score'])
 if __name__ == "__main__":
     main()