Spaces:
Running
Running
File size: 3,349 Bytes
e540abd 5b863e3 2644d07 e540abd 2a21b95 e540abd 50195a6 e540abd 04626e2 e540abd 04626e2 5b863e3 04626e2 e540abd 04626e2 5b863e3 e540abd 5b863e3 04626e2 5b863e3 04626e2 e540abd 5b863e3 e540abd 811b0b3 5b863e3 e540abd 5b863e3 e540abd 5b863e3 e540abd 5b863e3 e540abd 5b863e3 2644d07 04626e2 5b863e3 2644d07 04626e2 5f2bd1b 2644d07 e540abd 2644d07 5f2bd1b 2644d07 e540abd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
from fastapi import FastAPI, UploadFile, Form
from fastapi.responses import JSONResponse, FileResponse
import fitz # PyMuPDF for PDFs
import easyocr # OCR for images
import openpyxl # XLSX processing
import pptx # PPTX processing
import docx # DOCX processing
from transformers import pipeline
from gtts import gTTS
import tempfile
import os
app = FastAPI()
# Load AI models
qa_model = pipeline("question-answering", model="deepset/roberta-base-squad2")
reader = easyocr.Reader(['en', 'fr'])
# Text Extraction
def extract_text_from_pdf(pdf_file):
text = []
try:
with fitz.open(pdf_file) as doc:
for page in doc:
text.append(page.get_text("text"))
except Exception as e:
return f"Error reading PDF: {e}"
return "\n".join(text)
def extract_text_from_docx(docx_file):
doc = docx.Document(docx_file)
return "\n".join(p.text for p in doc.paragraphs if p.text.strip())
def extract_text_from_pptx(pptx_file):
text = []
try:
prs = pptx.Presentation(pptx_file)
for slide in prs.slides:
for shape in slide.shapes:
if hasattr(shape, "text"):
text.append(shape.text)
except Exception as e:
return f"Error reading PPTX: {e}"
return "\n".join(text)
def extract_text_from_xlsx(xlsx_file):
text = []
try:
wb = openpyxl.load_workbook(xlsx_file)
for sheet in wb.sheetnames:
ws = wb[sheet]
for row in ws.iter_rows(values_only=True):
text.append(" ".join(str(cell) for cell in row if cell))
except Exception as e:
return f"Error reading XLSX: {e}"
return "\n".join(text)
# Main QA logic
def answer_question_from_doc(file, question):
ext = file.filename.split(".")[-1].lower()
file_path = f"/tmp/{file.filename}"
with open(file_path, "wb") as f:
f.write(file.file.read())
if ext == "pdf":
context = extract_text_from_pdf(file_path)
elif ext == "docx":
context = extract_text_from_docx(file_path)
elif ext == "pptx":
context = extract_text_from_pptx(file_path)
elif ext == "xlsx":
context = extract_text_from_xlsx(file_path)
else:
return "Unsupported file format.", None
if not context.strip():
return "No text found in the document.", None
try:
result = qa_model({"question": question, "context": context})
answer = result["answer"]
tts = gTTS(answer)
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
tts.save(tmp.name)
audio_path = tmp.name
return answer, audio_path
except Exception as e:
return f"Error generating answer: {e}", None
# API route for prediction
@app.post("/predict")
async def predict(file: UploadFile, question: str = Form(...)):
answer, audio_path = answer_question_from_doc(file, question)
if audio_path:
return JSONResponse(content={"answer": answer, "audio": f"/audio/{os.path.basename(audio_path)}"})
else:
return JSONResponse(content={"answer": answer})
# Route to serve audio
@app.get("/audio/{filename}")
async def get_audio(filename: str):
file_path = os.path.join(tempfile.gettempdir(), filename)
return FileResponse(path=file_path, media_type="audio/mpeg")
|