ivyblossom commited on
Commit
272eebb
·
1 Parent(s): bed9c2c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -19
app.py CHANGED
@@ -1,11 +1,8 @@
1
- import os
2
  import streamlit as st
3
  from transformers import pipeline
4
  from PyPDF2 import PdfReader
5
- import tempfile
6
 
7
  # Function to perform question-answering
8
- @st.cache_data(show_spinner=False)
9
  def question_answering(questions, pdf_text):
10
  # Perform question-answering using Hugging Face's Transformers
11
  question_answerer = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="distilbert-base-cased-distilled-squad")
@@ -16,31 +13,40 @@ def question_answering(questions, pdf_text):
16
  def main():
17
  st.title("Question Answering on PDF Files")
18
 
19
- uploaded_file = st.file_uploader("Upload a PDF file:", type=["pdf"])
20
-
21
- st.write("Enter your question(s) below (separate multiple questions with new lines):")
22
- question = st.text_area("Question(s)")
23
 
24
- if st.button("Answer") and uploaded_file is not None:
25
- pdf_path = os.path.join(tempfile.gettempdir(), uploaded_file.name)
26
- with open(pdf_path, "wb") as f:
27
- f.write(uploaded_file.read())
28
 
29
- # Read PDF text once and cache it for batch processing
30
- pdf_reader = PdfReader(pdf_path)
 
 
 
 
 
 
31
  pdf_text = "\n".join([pdf_page.extract_text() for pdf_page in pdf_reader.pages])
32
 
33
- # Get a list of questions (assuming the user enters multiple questions separated by newlines)
34
- questions = question.split('\n')
 
 
 
 
 
35
 
36
- # Perform question-answering in batches
37
  answers = question_answering(questions, pdf_text)
38
 
39
- st.write("Questions and Answers:")
40
  for i, (question, answer) in enumerate(zip(questions, answers)):
41
  st.write(f"Question {i + 1}: '{question}'")
42
  st.write("Answer:", answer['answer'])
43
- st.write("Score:", answer['score'])
 
44
 
45
  if __name__ == "__main__":
46
- main()
 
 
1
  import streamlit as st
2
  from transformers import pipeline
3
  from PyPDF2 import PdfReader
 
4
 
5
  # Function to perform question-answering
 
6
  def question_answering(questions, pdf_text):
7
  # Perform question-answering using Hugging Face's Transformers
8
  question_answerer = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="distilbert-base-cased-distilled-squad")
 
13
  def main():
14
  st.title("Question Answering on PDF Files")
15
 
16
+ # Allow user to upload a single PDF file
17
+ uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
 
 
18
 
19
+ if not uploaded_file:
20
+ st.warning("Please upload a PDF file.")
21
+ return
 
22
 
23
+ st.subheader(f"Processing PDF file: {uploaded_file.name}")
24
+
25
+ if uploaded_file.size == 0:
26
+ st.error(f"Error: File '{uploaded_file.name}' is empty.")
27
+ return
28
+
29
+ with uploaded_file:
30
+ pdf_reader = PdfReader(uploaded_file)
31
  pdf_text = "\n".join([pdf_page.extract_text() for pdf_page in pdf_reader.pages])
32
 
33
+ # Get questions from the user (allow for multiple questions separated by newlines)
34
+ user_input = st.text_area("Enter your question(s) separated by newlines:")
35
+ questions = user_input.strip().split("\n")
36
+
37
+ if not questions:
38
+ st.warning("No questions entered.")
39
+ return
40
 
41
+ # Perform question-answering
42
  answers = question_answering(questions, pdf_text)
43
 
44
+ st.subheader("Questions and Answers:")
45
  for i, (question, answer) in enumerate(zip(questions, answers)):
46
  st.write(f"Question {i + 1}: '{question}'")
47
  st.write("Answer:", answer['answer'])
48
+ st.write(f"Score: {answer['score']:.2f}") # Format the score to 2 decimal places
49
+ st.write("") # Add a new line after each answer
50
 
51
  if __name__ == "__main__":
52
+ main()