Spaces:

ikraamkb
/

qtAnswering

Running

App Files Files Community

ikraamkb commited on Apr 3

Commit

4fa0b0a

verified ·

1 Parent(s): 7acae8b

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -17

app.py CHANGED Viewed

@@ -123,6 +123,9 @@ app = FastAPI()
 # Mount the static directory to serve HTML, CSS, JS files
 app.mount("/static", StaticFiles(directory="static"), name="static")
 # Initialize transformers pipelines
 qa_pipeline = pipeline("question-answering", model="microsoft/phi-2", tokenizer="microsoft/phi-2")
 image_qa_pipeline = pipeline("vqa", model="Salesforce/blip-vqa-base")
@@ -130,13 +133,6 @@ image_qa_pipeline = pipeline("vqa", model="Salesforce/blip-vqa-base")
 # Initialize EasyOCR for image-based text extraction
 reader = easyocr.Reader(['en'])
-# Define a template for rendering HTML
-templates = Jinja2Templates(directory="templates")
-# Ensure temp_files directory exists
-temp_dir = "temp_files"
-os.makedirs(temp_dir, exist_ok=True)
 # Maximum allowed file size in bytes (e.g., 5 MB)
 MAX_FILE_SIZE = 5 * 1024 * 1024  # 5 MB
@@ -165,10 +161,10 @@ def extract_pptx_text(file_path: str):
 def extract_text_from_image(image: Image):
     return pytesseract.image_to_string(image)
-# Home route
 @app.get("/")
-def home():
-    return RedirectResponse(url="/docs")
 # Function to answer questions based on document content
 @app.post("/question-answering-doc")
@@ -177,18 +173,29 @@ async def question_answering_doc(request: Request, question: str = Form(...), fi
     if file.spool_max_size > MAX_FILE_SIZE:
         raise HTTPException(status_code=400, detail=f"File size exceeds the {MAX_FILE_SIZE / (1024 * 1024)} MB limit.")
-    file_path = os.path.join(temp_dir, file.filename)
-    with open(file_path, "wb") as f:
-        f.write(await file.read())
     try:
         # Extract text based on the file type
         if file.filename.endswith(".pdf"):
             text = extract_pdf_text(file_path)
         elif file.filename.endswith(".docx"):
             text = extract_docx_text(file_path)
         elif file.filename.endswith(".pptx"):
             text = extract_pptx_text(file_path)
         else:
             raise HTTPException(status_code=400, detail="Unsupported file format")
     except Exception as e:
@@ -196,9 +203,6 @@ async def question_answering_doc(request: Request, question: str = Form(...), fi
     qa_result = qa_pipeline(question=question, context=text)
-    # Clean up the temporary file
-    os.remove(file_path)
     return templates.TemplateResponse("index.html", {"request": request, "answer": qa_result['answer']})
 # Function to answer questions based on images

 # Mount the static directory to serve HTML, CSS, JS files
 app.mount("/static", StaticFiles(directory="static"), name="static")
+# Define a template for rendering HTML
+templates = Jinja2Templates(directory="templates")
 # Initialize transformers pipelines
 qa_pipeline = pipeline("question-answering", model="microsoft/phi-2", tokenizer="microsoft/phi-2")
 image_qa_pipeline = pipeline("vqa", model="Salesforce/blip-vqa-base")
 # Initialize EasyOCR for image-based text extraction
 reader = easyocr.Reader(['en'])
 # Maximum allowed file size in bytes (e.g., 5 MB)
 MAX_FILE_SIZE = 5 * 1024 * 1024  # 5 MB
 def extract_text_from_image(image: Image):
     return pytesseract.image_to_string(image)
+# Home route - Render the index page
 @app.get("/")
+async def home(request: Request):
+    return templates.TemplateResponse("index.html", {"request": request})
 # Function to answer questions based on document content
 @app.post("/question-answering-doc")
     if file.spool_max_size > MAX_FILE_SIZE:
         raise HTTPException(status_code=400, detail=f"File size exceeds the {MAX_FILE_SIZE / (1024 * 1024)} MB limit.")
     try:
+        # Read the file content into memory
+        file_content = await file.read()
         # Extract text based on the file type
         if file.filename.endswith(".pdf"):
+            file_path = "/tmp/tempfile.pdf"
+            with open(file_path, "wb") as f:
+                f.write(file_content)
             text = extract_pdf_text(file_path)
+            os.remove(file_path)
         elif file.filename.endswith(".docx"):
+            file_path = "/tmp/tempfile.docx"
+            with open(file_path, "wb") as f:
+                f.write(file_content)
             text = extract_docx_text(file_path)
+            os.remove(file_path)
         elif file.filename.endswith(".pptx"):
+            file_path = "/tmp/tempfile.pptx"
+            with open(file_path, "wb") as f:
+                f.write(file_content)
             text = extract_pptx_text(file_path)
+            os.remove(file_path)
         else:
             raise HTTPException(status_code=400, detail="Unsupported file format")
     except Exception as e:
     qa_result = qa_pipeline(question=question, context=text)
     return templates.TemplateResponse("index.html", {"request": request, "answer": qa_result['answer']})
 # Function to answer questions based on images