Spaces:
Sleeping
Sleeping
File size: 3,032 Bytes
85abd3d 32dd4d2 7cab805 d5d3aa6 32dd4d2 d5d3aa6 32dd4d2 d5d3aa6 32dd4d2 d5d3aa6 32dd4d2 d5d3aa6 7cab805 d5d3aa6 7cab805 d5d3aa6 32dd4d2 7cab805 d5d3aa6 7cab805 d5d3aa6 32dd4d2 d5d3aa6 32dd4d2 d5d3aa6 32dd4d2 d5d3aa6 7cab805 32dd4d2 85abd3d 8d67b19 6852c86 8d67b19 85abd3d 8d67b19 6852c86 85abd3d 6852c86 85abd3d 8d67b19 6852c86 cc49774 6852c86 8d67b19 6852c86 8d67b19 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
"""from fastapi import FastAPI, UploadFile, File
from fastapi.responses import RedirectResponse, JSONResponse
from transformers import AutoProcessor, AutoModelForCausalLM
from PIL import Image
import tempfile
import torch
app = FastAPI()
# Load model
try:
processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
USE_GIT = True
except Exception:
from transformers import pipeline
captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
USE_GIT = False
def generate_caption(image_path):
try:
if USE_GIT:
image = Image.open(image_path)
inputs = processor(images=image, return_tensors="pt")
outputs = model.generate(**inputs, max_length=50)
return processor.batch_decode(outputs, skip_special_tokens=True)[0]
else:
result = captioner(image_path)
return result[0]['generated_text']
except Exception as e:
return f"Error generating caption: {str(e)}"
@app.post("/imagecaption/")
async def caption_from_frontend(file: UploadFile = File(...)):
contents = await file.read()
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
tmp.write(contents)
image_path = tmp.name
caption = generate_caption(image_path)
return JSONResponse({"caption": caption})
@app.get("/")
def home():
return RedirectResponse(url="/")"""
# appImage.py
from transformers import pipeline, AutoProcessor, AutoModelForCausalLM
import tempfile, os
from PIL import Image
from gtts import gTTS
import torch
try:
processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
USE_GIT = True
except Exception:
captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
USE_GIT = False
async def caption_image(file):
contents = await file.read()
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
tmp.write(contents)
image_path = tmp.name
if USE_GIT:
image = Image.open(image_path).convert('RGB')
pixel_values = processor(images=image, return_tensors="pt").pixel_values
generated_ids = model.generate(pixel_values, max_length=500)
caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
else:
captions = captioner(image_path)
caption = captions[0]['generated_text'] if captions else "No caption generated."
audio_path = text_to_speech(caption)
result = {"caption": caption}
if audio_path:
result["audio"] = f"/files/{os.path.basename(audio_path)}"
return result
def text_to_speech(text: str):
try:
tts = gTTS(text)
temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
tts.save(temp_audio.name)
return temp_audio.name
except:
return ""
|