ikraamkb commited on
Commit
7acae8b
·
verified ·
1 Parent(s): 3aa8146

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -9
app.py CHANGED
@@ -100,7 +100,7 @@ async def question_answering_image(question: str = Form(...), image_file: Upload
100
  async def get_docs(request: Request):
101
  return templates.TemplateResponse("index.html", {"request": request})
102
  """
103
- from fastapi import FastAPI, Form, File, UploadFile
104
  from fastapi.responses import RedirectResponse
105
  from fastapi.staticfiles import StaticFiles
106
  from pydantic import BaseModel
@@ -137,6 +137,9 @@ templates = Jinja2Templates(directory="templates")
137
  temp_dir = "temp_files"
138
  os.makedirs(temp_dir, exist_ok=True)
139
 
 
 
 
140
  # Function to process PDFs
141
  def extract_pdf_text(file_path: str):
142
  with pdfplumber.open(file_path) as pdf:
@@ -170,30 +173,49 @@ def home():
170
  # Function to answer questions based on document content
171
  @app.post("/question-answering-doc")
172
  async def question_answering_doc(request: Request, question: str = Form(...), file: UploadFile = File(...)):
 
 
 
 
173
  file_path = os.path.join(temp_dir, file.filename)
174
  with open(file_path, "wb") as f:
175
  f.write(await file.read())
176
 
177
- if file.filename.endswith(".pdf"):
178
- text = extract_pdf_text(file_path)
179
- elif file.filename.endswith(".docx"):
180
- text = extract_docx_text(file_path)
181
- elif file.filename.endswith(".pptx"):
182
- text = extract_pptx_text(file_path)
183
- else:
184
- return {"error": "Unsupported file format"}
 
 
 
 
185
 
186
  qa_result = qa_pipeline(question=question, context=text)
 
 
 
 
187
  return templates.TemplateResponse("index.html", {"request": request, "answer": qa_result['answer']})
188
 
189
  # Function to answer questions based on images
190
  @app.post("/question-answering-image")
191
  async def question_answering_image(request: Request, question: str = Form(...), image_file: UploadFile = File(...)):
 
 
 
 
192
  image = Image.open(BytesIO(await image_file.read()))
193
  image_text = extract_text_from_image(image)
194
 
195
  image_qa_result = image_qa_pipeline({"image": image, "question": question})
196
 
 
 
 
197
  return templates.TemplateResponse("index.html", {"request": request, "answer": image_qa_result[0]['answer'], "image_text": image_text})
198
 
199
  # Serve the application in Hugging Face space
 
100
  async def get_docs(request: Request):
101
  return templates.TemplateResponse("index.html", {"request": request})
102
  """
103
+ from fastapi import FastAPI, Form, File, UploadFile, HTTPException
104
  from fastapi.responses import RedirectResponse
105
  from fastapi.staticfiles import StaticFiles
106
  from pydantic import BaseModel
 
137
  temp_dir = "temp_files"
138
  os.makedirs(temp_dir, exist_ok=True)
139
 
140
+ # Maximum allowed file size in bytes (e.g., 5 MB)
141
+ MAX_FILE_SIZE = 5 * 1024 * 1024 # 5 MB
142
+
143
  # Function to process PDFs
144
  def extract_pdf_text(file_path: str):
145
  with pdfplumber.open(file_path) as pdf:
 
173
  # Function to answer questions based on document content
174
  @app.post("/question-answering-doc")
175
  async def question_answering_doc(request: Request, question: str = Form(...), file: UploadFile = File(...)):
176
+ # Validate file size
177
+ if file.spool_max_size > MAX_FILE_SIZE:
178
+ raise HTTPException(status_code=400, detail=f"File size exceeds the {MAX_FILE_SIZE / (1024 * 1024)} MB limit.")
179
+
180
  file_path = os.path.join(temp_dir, file.filename)
181
  with open(file_path, "wb") as f:
182
  f.write(await file.read())
183
 
184
+ try:
185
+ # Extract text based on the file type
186
+ if file.filename.endswith(".pdf"):
187
+ text = extract_pdf_text(file_path)
188
+ elif file.filename.endswith(".docx"):
189
+ text = extract_docx_text(file_path)
190
+ elif file.filename.endswith(".pptx"):
191
+ text = extract_pptx_text(file_path)
192
+ else:
193
+ raise HTTPException(status_code=400, detail="Unsupported file format")
194
+ except Exception as e:
195
+ raise HTTPException(status_code=500, detail=f"An error occurred while processing the file: {str(e)}")
196
 
197
  qa_result = qa_pipeline(question=question, context=text)
198
+
199
+ # Clean up the temporary file
200
+ os.remove(file_path)
201
+
202
  return templates.TemplateResponse("index.html", {"request": request, "answer": qa_result['answer']})
203
 
204
  # Function to answer questions based on images
205
  @app.post("/question-answering-image")
206
  async def question_answering_image(request: Request, question: str = Form(...), image_file: UploadFile = File(...)):
207
+ # Validate file size
208
+ if image_file.spool_max_size > MAX_FILE_SIZE:
209
+ raise HTTPException(status_code=400, detail=f"File size exceeds the {MAX_FILE_SIZE / (1024 * 1024)} MB limit.")
210
+
211
  image = Image.open(BytesIO(await image_file.read()))
212
  image_text = extract_text_from_image(image)
213
 
214
  image_qa_result = image_qa_pipeline({"image": image, "question": question})
215
 
216
+ # Clean up the temporary image file
217
+ del image
218
+
219
  return templates.TemplateResponse("index.html", {"request": request, "answer": image_qa_result[0]['answer'], "image_text": image_text})
220
 
221
  # Serve the application in Hugging Face space