kanneboinakumar commited on
Commit
07fafa2
·
verified ·
1 Parent(s): a2bdbc5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -2
app.py CHANGED
@@ -64,8 +64,15 @@ mcq_chain = LLMChain(llm=llm, prompt=prompt)
64
  # Extract text from PDF or Word
65
  def extract_text(file):
66
  if file.name.endswith(".pdf"):
67
- doc = fitz.open(stream=file.read(), filetype="pdf")
68
- return "\n".join([page.get_text() for page in doc])
 
 
 
 
 
 
 
69
  elif file.name.endswith(".docx"):
70
  doc = docx.Document(file)
71
  return "\n".join([para.text for para in doc.paragraphs])
 
64
  # Extract text from PDF or Word
65
  def extract_text(file):
66
  if file.name.endswith(".pdf"):
67
+ # Read the entire file content into memory
68
+ file_bytes = file.read()
69
+ # Open the PDF from the byte stream
70
+ doc = fitz.open(stream=file_bytes, filetype="pdf")
71
+ # Extract text from all pages
72
+ text = ""
73
+ for page in doc:
74
+ text += page.get_text()
75
+ return text
76
  elif file.name.endswith(".docx"):
77
  doc = docx.Document(file)
78
  return "\n".join([para.text for para in doc.paragraphs])