Spaces:

ikraamkb
/

qtAnswering

Running

App Files Files Community

ikraamkb commited on Apr 3

Commit

7acae8b

verified ·

1 Parent(s): 3aa8146

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -9

app.py CHANGED Viewed

@@ -100,7 +100,7 @@ async def question_answering_image(question: str = Form(...), image_file: Upload
 async def get_docs(request: Request):
     return templates.TemplateResponse("index.html", {"request": request})
 """
-from fastapi import FastAPI, Form, File, UploadFile
 from fastapi.responses import RedirectResponse
 from fastapi.staticfiles import StaticFiles
 from pydantic import BaseModel
@@ -137,6 +137,9 @@ templates = Jinja2Templates(directory="templates")
 temp_dir = "temp_files"
 os.makedirs(temp_dir, exist_ok=True)
 # Function to process PDFs
 def extract_pdf_text(file_path: str):
     with pdfplumber.open(file_path) as pdf:
@@ -170,30 +173,49 @@ def home():
 # Function to answer questions based on document content
 @app.post("/question-answering-doc")
 async def question_answering_doc(request: Request, question: str = Form(...), file: UploadFile = File(...)):
     file_path = os.path.join(temp_dir, file.filename)
     with open(file_path, "wb") as f:
         f.write(await file.read())
-    if file.filename.endswith(".pdf"):
-        text = extract_pdf_text(file_path)
-    elif file.filename.endswith(".docx"):
-        text = extract_docx_text(file_path)
-    elif file.filename.endswith(".pptx"):
-        text = extract_pptx_text(file_path)
-    else:
-        return {"error": "Unsupported file format"}
     qa_result = qa_pipeline(question=question, context=text)
     return templates.TemplateResponse("index.html", {"request": request, "answer": qa_result['answer']})
 # Function to answer questions based on images
 @app.post("/question-answering-image")
 async def question_answering_image(request: Request, question: str = Form(...), image_file: UploadFile = File(...)):
     image = Image.open(BytesIO(await image_file.read()))
     image_text = extract_text_from_image(image)
     image_qa_result = image_qa_pipeline({"image": image, "question": question})
     return templates.TemplateResponse("index.html", {"request": request, "answer": image_qa_result[0]['answer'], "image_text": image_text})
 # Serve the application in Hugging Face space

 async def get_docs(request: Request):
     return templates.TemplateResponse("index.html", {"request": request})
 """
+from fastapi import FastAPI, Form, File, UploadFile, HTTPException
 from fastapi.responses import RedirectResponse
 from fastapi.staticfiles import StaticFiles
 from pydantic import BaseModel
 temp_dir = "temp_files"
 os.makedirs(temp_dir, exist_ok=True)
+# Maximum allowed file size in bytes (e.g., 5 MB)
+MAX_FILE_SIZE = 5 * 1024 * 1024  # 5 MB
 # Function to process PDFs
 def extract_pdf_text(file_path: str):
     with pdfplumber.open(file_path) as pdf:
 # Function to answer questions based on document content
 @app.post("/question-answering-doc")
 async def question_answering_doc(request: Request, question: str = Form(...), file: UploadFile = File(...)):
+    # Validate file size
+    if file.spool_max_size > MAX_FILE_SIZE:
+        raise HTTPException(status_code=400, detail=f"File size exceeds the {MAX_FILE_SIZE / (1024 * 1024)} MB limit.")
     file_path = os.path.join(temp_dir, file.filename)
     with open(file_path, "wb") as f:
         f.write(await file.read())
+    try:
+        # Extract text based on the file type
+        if file.filename.endswith(".pdf"):
+            text = extract_pdf_text(file_path)
+        elif file.filename.endswith(".docx"):
+            text = extract_docx_text(file_path)
+        elif file.filename.endswith(".pptx"):
+            text = extract_pptx_text(file_path)
+        else:
+            raise HTTPException(status_code=400, detail="Unsupported file format")
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"An error occurred while processing the file: {str(e)}")
     qa_result = qa_pipeline(question=question, context=text)
+    # Clean up the temporary file
+    os.remove(file_path)
     return templates.TemplateResponse("index.html", {"request": request, "answer": qa_result['answer']})
 # Function to answer questions based on images
 @app.post("/question-answering-image")
 async def question_answering_image(request: Request, question: str = Form(...), image_file: UploadFile = File(...)):
+    # Validate file size
+    if image_file.spool_max_size > MAX_FILE_SIZE:
+        raise HTTPException(status_code=400, detail=f"File size exceeds the {MAX_FILE_SIZE / (1024 * 1024)} MB limit.")
     image = Image.open(BytesIO(await image_file.read()))
     image_text = extract_text_from_image(image)
     image_qa_result = image_qa_pipeline({"image": image, "question": question})
+    # Clean up the temporary image file
+    del image
     return templates.TemplateResponse("index.html", {"request": request, "answer": image_qa_result[0]['answer'], "image_text": image_text})
 # Serve the application in Hugging Face space