"""from fastapi import FastAPI, UploadFile, File from fastapi.responses import RedirectResponse, JSONResponse from transformers import AutoProcessor, AutoModelForCausalLM from PIL import Image import tempfile import torch app = FastAPI() # Load model try: processor = AutoProcessor.from_pretrained("microsoft/git-large-coco") model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco") USE_GIT = True except Exception: from transformers import pipeline captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning") USE_GIT = False def generate_caption(image_path): try: if USE_GIT: image = Image.open(image_path) inputs = processor(images=image, return_tensors="pt") outputs = model.generate(**inputs, max_length=50) return processor.batch_decode(outputs, skip_special_tokens=True)[0] else: result = captioner(image_path) return result[0]['generated_text'] except Exception as e: return f"Error generating caption: {str(e)}" @app.post("/imagecaption/") async def caption_from_frontend(file: UploadFile = File(...)): contents = await file.read() with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp: tmp.write(contents) image_path = tmp.name caption = generate_caption(image_path) return JSONResponse({"caption": caption}) @app.get("/") def home(): return RedirectResponse(url="/")""" # appImage.py from transformers import pipeline import tempfile, os from PIL import Image from gtts import gTTS captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning") async def caption_image(file): contents = await file.read() with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp: tmp.write(contents) image_path = tmp.name captions = captioner(image_path) caption = captions[0]['generated_text'] if captions else "No caption generated." audio_path = text_to_speech(caption) result = {"caption": caption} if audio_path: result["audioUrl"] = f"/files/{os.path.basename(audio_path)}" return result def text_to_speech(text: str): try: tts = gTTS(text) temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") tts.save(temp_audio.name) return temp_audio.name except: return ""