Spaces:
Running
Running
"""from fastapi import FastAPI, UploadFile, File | |
from fastapi.responses import RedirectResponse, JSONResponse | |
from transformers import AutoProcessor, AutoModelForCausalLM | |
from PIL import Image | |
import tempfile | |
import torch | |
app = FastAPI() | |
# Load model | |
try: | |
processor = AutoProcessor.from_pretrained("microsoft/git-large-coco") | |
model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco") | |
USE_GIT = True | |
except Exception: | |
from transformers import pipeline | |
captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning") | |
USE_GIT = False | |
def generate_caption(image_path): | |
try: | |
if USE_GIT: | |
image = Image.open(image_path) | |
inputs = processor(images=image, return_tensors="pt") | |
outputs = model.generate(**inputs, max_length=50) | |
return processor.batch_decode(outputs, skip_special_tokens=True)[0] | |
else: | |
result = captioner(image_path) | |
return result[0]['generated_text'] | |
except Exception as e: | |
return f"Error generating caption: {str(e)}" | |
@app.post("/imagecaption/") | |
async def caption_from_frontend(file: UploadFile = File(...)): | |
contents = await file.read() | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp: | |
tmp.write(contents) | |
image_path = tmp.name | |
caption = generate_caption(image_path) | |
return JSONResponse({"caption": caption}) | |
@app.get("/") | |
def home(): | |
return RedirectResponse(url="/")""" | |
# appImage.py | |
from transformers import pipeline, AutoProcessor, AutoModelForCausalLM | |
import tempfile, os | |
from PIL import Image | |
from gtts import gTTS | |
import torch | |
try: | |
processor = AutoProcessor.from_pretrained("microsoft/git-large-coco") | |
model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco") | |
USE_GIT = True | |
except Exception: | |
captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning") | |
USE_GIT = False | |
async def caption_image(file): | |
contents = await file.read() | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp: | |
tmp.write(contents) | |
image_path = tmp.name | |
if USE_GIT: | |
image = Image.open(image_path).convert('RGB') | |
pixel_values = processor(images=image, return_tensors="pt").pixel_values | |
generated_ids = model.generate(pixel_values, max_length=500) | |
caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
else: | |
captions = captioner(image_path) | |
caption = captions[0]['generated_text'] if captions else "No caption generated." | |
audio_path = text_to_speech(caption) | |
result = {"caption": caption} | |
if audio_path: | |
result["audio"] = f"/files/{os.path.basename(audio_path)}" | |
return result | |
def text_to_speech(text: str): | |
try: | |
tts = gTTS(text) | |
temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") | |
tts.save(temp_audio.name) | |
return temp_audio.name | |
except: | |
return "" | |