Spaces:
Running
Running
File size: 1,983 Bytes
09a2e2c 0000b07 09a2e2c 5e640af 09a2e2c 0000b07 09a2e2c 5e640af 09a2e2c 5e640af 09a2e2c 5e640af 09a2e2c 5e640af 09a2e2c 5e640af 297dd8a 09a2e2c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
from fastapi import FastAPI
from fastapi.responses import RedirectResponse, JSONResponse, FileResponse
import os
from PIL import Image
from transformers import ViltProcessor, ViltForQuestionAnswering, pipeline
from gtts import gTTS
import easyocr
import torch
import tempfile
import numpy as np
from io import BytesIO
app = FastAPI()
vqa_processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
vqa_model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
reader = easyocr.Reader(['en', 'fr'])
def classify_question(question: str):
q = question.lower()
if any(w in q for w in ["text", "say", "written", "read"]):
return "ocr"
if any(w in q for w in ["caption", "describe", "what is in the image"]):
return "caption"
return "vqa"
def answer_question_from_image(image, question):
if image is None or not question.strip():
return "Please upload an image and ask a question.", None
mode = classify_question(question)
try:
if mode == "ocr":
result = reader.readtext(np.array(image))
answer = " ".join([entry[1] for entry in result]) or "No readable text found."
elif mode == "caption":
answer = captioner(image)[0]['generated_text']
else:
inputs = vqa_processor(image, question, return_tensors="pt")
with torch.no_grad():
outputs = vqa_model(**inputs)
predicted_id = outputs.logits.argmax(-1).item()
answer = vqa_model.config.id2label[predicted_id]
tts = gTTS(text=answer)
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
tts.save(tmp.name)
return answer, tmp.name
except Exception as e:
return f"Error: {e}", None
@app.get("/")
def home():
return RedirectResponse(url="/templates/home.html") |