Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -100,7 +100,7 @@ async def question_answering_image(question: str = Form(...), image_file: Upload
|
|
100 |
async def get_docs(request: Request):
|
101 |
return templates.TemplateResponse("index.html", {"request": request})
|
102 |
"""
|
103 |
-
from fastapi import FastAPI, Form, File, UploadFile
|
104 |
from fastapi.responses import RedirectResponse
|
105 |
from fastapi.staticfiles import StaticFiles
|
106 |
from pydantic import BaseModel
|
@@ -137,6 +137,9 @@ templates = Jinja2Templates(directory="templates")
|
|
137 |
temp_dir = "temp_files"
|
138 |
os.makedirs(temp_dir, exist_ok=True)
|
139 |
|
|
|
|
|
|
|
140 |
# Function to process PDFs
|
141 |
def extract_pdf_text(file_path: str):
|
142 |
with pdfplumber.open(file_path) as pdf:
|
@@ -170,30 +173,49 @@ def home():
|
|
170 |
# Function to answer questions based on document content
|
171 |
@app.post("/question-answering-doc")
|
172 |
async def question_answering_doc(request: Request, question: str = Form(...), file: UploadFile = File(...)):
|
|
|
|
|
|
|
|
|
173 |
file_path = os.path.join(temp_dir, file.filename)
|
174 |
with open(file_path, "wb") as f:
|
175 |
f.write(await file.read())
|
176 |
|
177 |
-
|
178 |
-
text
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
|
|
|
|
|
|
|
|
185 |
|
186 |
qa_result = qa_pipeline(question=question, context=text)
|
|
|
|
|
|
|
|
|
187 |
return templates.TemplateResponse("index.html", {"request": request, "answer": qa_result['answer']})
|
188 |
|
189 |
# Function to answer questions based on images
|
190 |
@app.post("/question-answering-image")
|
191 |
async def question_answering_image(request: Request, question: str = Form(...), image_file: UploadFile = File(...)):
|
|
|
|
|
|
|
|
|
192 |
image = Image.open(BytesIO(await image_file.read()))
|
193 |
image_text = extract_text_from_image(image)
|
194 |
|
195 |
image_qa_result = image_qa_pipeline({"image": image, "question": question})
|
196 |
|
|
|
|
|
|
|
197 |
return templates.TemplateResponse("index.html", {"request": request, "answer": image_qa_result[0]['answer'], "image_text": image_text})
|
198 |
|
199 |
# Serve the application in Hugging Face space
|
|
|
100 |
async def get_docs(request: Request):
|
101 |
return templates.TemplateResponse("index.html", {"request": request})
|
102 |
"""
|
103 |
+
from fastapi import FastAPI, Form, File, UploadFile, HTTPException
|
104 |
from fastapi.responses import RedirectResponse
|
105 |
from fastapi.staticfiles import StaticFiles
|
106 |
from pydantic import BaseModel
|
|
|
137 |
temp_dir = "temp_files"
|
138 |
os.makedirs(temp_dir, exist_ok=True)
|
139 |
|
140 |
+
# Maximum allowed file size in bytes (e.g., 5 MB)
|
141 |
+
MAX_FILE_SIZE = 5 * 1024 * 1024 # 5 MB
|
142 |
+
|
143 |
# Function to process PDFs
|
144 |
def extract_pdf_text(file_path: str):
|
145 |
with pdfplumber.open(file_path) as pdf:
|
|
|
173 |
# Function to answer questions based on document content
|
174 |
@app.post("/question-answering-doc")
|
175 |
async def question_answering_doc(request: Request, question: str = Form(...), file: UploadFile = File(...)):
|
176 |
+
# Validate file size
|
177 |
+
if file.spool_max_size > MAX_FILE_SIZE:
|
178 |
+
raise HTTPException(status_code=400, detail=f"File size exceeds the {MAX_FILE_SIZE / (1024 * 1024)} MB limit.")
|
179 |
+
|
180 |
file_path = os.path.join(temp_dir, file.filename)
|
181 |
with open(file_path, "wb") as f:
|
182 |
f.write(await file.read())
|
183 |
|
184 |
+
try:
|
185 |
+
# Extract text based on the file type
|
186 |
+
if file.filename.endswith(".pdf"):
|
187 |
+
text = extract_pdf_text(file_path)
|
188 |
+
elif file.filename.endswith(".docx"):
|
189 |
+
text = extract_docx_text(file_path)
|
190 |
+
elif file.filename.endswith(".pptx"):
|
191 |
+
text = extract_pptx_text(file_path)
|
192 |
+
else:
|
193 |
+
raise HTTPException(status_code=400, detail="Unsupported file format")
|
194 |
+
except Exception as e:
|
195 |
+
raise HTTPException(status_code=500, detail=f"An error occurred while processing the file: {str(e)}")
|
196 |
|
197 |
qa_result = qa_pipeline(question=question, context=text)
|
198 |
+
|
199 |
+
# Clean up the temporary file
|
200 |
+
os.remove(file_path)
|
201 |
+
|
202 |
return templates.TemplateResponse("index.html", {"request": request, "answer": qa_result['answer']})
|
203 |
|
204 |
# Function to answer questions based on images
|
205 |
@app.post("/question-answering-image")
|
206 |
async def question_answering_image(request: Request, question: str = Form(...), image_file: UploadFile = File(...)):
|
207 |
+
# Validate file size
|
208 |
+
if image_file.spool_max_size > MAX_FILE_SIZE:
|
209 |
+
raise HTTPException(status_code=400, detail=f"File size exceeds the {MAX_FILE_SIZE / (1024 * 1024)} MB limit.")
|
210 |
+
|
211 |
image = Image.open(BytesIO(await image_file.read()))
|
212 |
image_text = extract_text_from_image(image)
|
213 |
|
214 |
image_qa_result = image_qa_pipeline({"image": image, "question": question})
|
215 |
|
216 |
+
# Clean up the temporary image file
|
217 |
+
del image
|
218 |
+
|
219 |
return templates.TemplateResponse("index.html", {"request": request, "answer": image_qa_result[0]['answer'], "image_text": image_text})
|
220 |
|
221 |
# Serve the application in Hugging Face space
|