### ✅ app.py — Document QA Backend (Cleaned) from fastapi import FastAPI from fastapi.responses import FileResponse, JSONResponse import fitz # PyMuPDF import easyocr import openpyxl import pptx import docx from transformers import pipeline from gtts import gTTS import tempfile import os app = FastAPI() qa_model = pipeline("question-answering", model="deepset/roberta-base-squad2") reader = easyocr.Reader(['en', 'fr']) def extract_text_from_pdf(pdf_file): try: with fitz.open(pdf_file) as doc: return "\n".join(page.get_text("text") for page in doc) except Exception as e: return f"Error reading PDF: {e}" def extract_text_from_docx(docx_file): doc = docx.Document(docx_file) return "\n".join(p.text for p in doc.paragraphs if p.text.strip()) def extract_text_from_pptx(pptx_file): try: prs = pptx.Presentation(pptx_file) return "\n".join(shape.text for slide in prs.slides for shape in slide.shapes if hasattr(shape, "text")) except Exception as e: return f"Error reading PPTX: {e}" def extract_text_from_xlsx(xlsx_file): try: wb = openpyxl.load_workbook(xlsx_file) return "\n".join(" ".join(str(cell) for cell in row if cell) for sheet in wb.sheetnames for row in wb[sheet].iter_rows(values_only=True)) except Exception as e: return f"Error reading XLSX: {e}" def answer_question_from_doc(file, question): ext = file.filename.split(".")[-1].lower() file_path = f"/tmp/{file.filename}" with open(file_path, "wb") as f: f.write(file.read()) if ext == "pdf": context = extract_text_from_pdf(file_path) elif ext == "docx": context = extract_text_from_docx(file_path) elif ext == "pptx": context = extract_text_from_pptx(file_path) elif ext == "xlsx": context = extract_text_from_xlsx(file_path) else: return "Unsupported file format.", None if not context.strip(): return "No text found in the document.", None try: result = qa_model({"question": question, "context": context}) answer = result["answer"] tts = gTTS(answer) with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp: tts.save(tmp.name) return answer, tmp.name except Exception as e: return f"Error generating answer: {e}", None