ivyblossom commited on
Commit
4688ae4
·
1 Parent(s): 2de15d0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -37
app.py CHANGED
@@ -1,17 +1,14 @@
 
1
  import streamlit as st
2
- from transformers import AutoModelForQuestionAnswering, AutoTokenizer, QuestionAnsweringPipeline
3
  from PyPDF2 import PdfReader
 
4
 
5
  # Function to perform question-answering
 
6
  def question_answering(questions, pdf_text):
7
- # Load the model and tokenizer
8
- model_name = "distilbert-base-cased-distilled-squad"
9
- model = AutoModelForQuestionAnswering.from_pretrained(model_name)
10
- tokenizer = AutoTokenizer.from_pretrained(model_name)
11
-
12
- # Create a QuestionAnsweringPipeline instance
13
- question_answerer = QuestionAnsweringPipeline(model=model, tokenizer=tokenizer)
14
-
15
  answers = question_answerer(question=questions, context=pdf_text)
16
 
17
  return answers
@@ -19,41 +16,34 @@ def question_answering(questions, pdf_text):
19
  def main():
20
  st.title("Question Answering on PDF Files")
21
 
22
- # Allow user to upload a single PDF file
23
- uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
24
-
25
- if not uploaded_file:
26
- st.warning("Please upload a PDF file.")
27
- return
28
-
29
- st.subheader(f"Processing PDF file: {uploaded_file.name}")
30
 
31
- if uploaded_file.size == 0:
32
- st.error(f"Error: File '{uploaded_file.name}' is empty.")
33
- return
 
34
 
35
- with uploaded_file:
36
- pdf_reader = PdfReader(uploaded_file)
37
  pdf_text = "\n".join([pdf_page.extract_text() for pdf_page in pdf_reader.pages])
38
 
39
- # Get questions from the user (allow for multiple questions separated by newlines)
40
- user_input = st.text_area("Enter your question(s) separated by newlines:")
41
- questions = user_input.strip().split("\n")
42
 
43
- if not questions:
44
- st.warning("No questions entered.")
45
- return
46
 
47
- if st.button("Get Answers"):
48
- # Perform question-answering
49
- answers = question_answering(questions, pdf_text)
50
 
51
- st.subheader("Questions and Answers:")
52
- for i, (question, answer) in enumerate(zip(questions, answers)):
53
- st.write(f"Question {i + 1}: '{question}'")
54
- st.write("Answer:", answer['answer']) # Access the answer directly
55
- st.write(f"Score: {answer['score']:.2f}") # Format the score to 2 decimal places
56
- st.write("") # Add a new line after each answer
57
 
58
  if __name__ == "__main__":
59
  main()
 
1
+ import os
2
  import streamlit as st
3
+ from transformers import pipeline
4
  from PyPDF2 import PdfReader
5
+ import tempfile
6
 
7
  # Function to perform question-answering
8
+ @st.cache_data(show_spinner=False)
9
  def question_answering(questions, pdf_text):
10
+ # Perform question-answering using Hugging Face's Transformers
11
+ question_answerer = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="distilbert-base-cased-distilled-squad")
 
 
 
 
 
 
12
  answers = question_answerer(question=questions, context=pdf_text)
13
 
14
  return answers
 
16
  def main():
17
  st.title("Question Answering on PDF Files")
18
 
19
+ uploaded_file = st.file_uploader("Upload a PDF file:", type=["pdf"])
20
+
21
+ st.write("Enter your question(s) below (separate multiple questions with new lines):")
22
+ question = st.text_area("Question(s)")
 
 
 
 
23
 
24
+ if st.button("Answer") and uploaded_file is not None:
25
+ pdf_path = os.path.join(tempfile.gettempdir(), uploaded_file.name)
26
+ with open(pdf_path, "wb") as f:
27
+ f.write(uploaded_file.read())
28
 
29
+ # Read PDF text once and cache it for batch processing
30
+ pdf_reader = PdfReader(pdf_path)
31
  pdf_text = "\n".join([pdf_page.extract_text() for pdf_page in pdf_reader.pages])
32
 
33
+ # Get a list of questions (assuming the user enters multiple questions separated by newlines)
34
+ questions = question.split('\n')
 
35
 
36
+ # Perform question-answering in batches
37
+ answers = question_answering(questions, pdf_text)
 
38
 
39
+ # Delete the uploaded file after processing
40
+ #os.remove(pdf_path)
 
41
 
42
+ st.write("Questions and Answers:")
43
+ for i, (question, answer) in enumerate(zip(questions, answers)):
44
+ st.write(f"Question {i + 1}: '{question}'")
45
+ st.write("Answer:", answer['answer'])
46
+ st.write("Score:", answer['score'])
 
47
 
48
  if __name__ == "__main__":
49
  main()