"""import gradio as gr from transformers import AutoProcessor, AutoModelForCausalLM from PIL import Image import torch from fastapi import FastAPI from fastapi.responses import RedirectResponse # Initialize FastAPI app = FastAPI() # Load models - Using microsoft/git-large-coco try: # Load the better model processor = AutoProcessor.from_pretrained("microsoft/git-large-coco") git_model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco") print("Successfully loaded microsoft/git-large-coco model") USE_GIT = True except Exception as e: print(f"Failed to load GIT model: {e}. Falling back to smaller model") captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning") USE_GIT = False def generate_caption(image_path): "Generate caption using the best available model"" try: if USE_GIT: image = Image.open(image_path) inputs = processor(images=image, return_tensors="pt") outputs = git_model.generate(**inputs, max_length=50) return processor.batch_decode(outputs, skip_special_tokens=True)[0] else: result = captioner(image_path) return result[0]['generated_text'] except Exception as e: print(f"Caption generation error: {e}") return "Could not generate caption" def process_image(file_path: str): "Handle image processing for Gradio interface" if not file_path: return "Please upload an image first" try: caption = generate_caption(file_path) return f"📷 Image Caption:\n{caption}" except Exception as e: return f"Error processing image: {str(e)}" # Gradio Interface with gr.Blocks(title="Image Captioning Service", theme=gr.themes.Soft()) as demo: gr.Markdown("# 🖼️ Image Captioning Service") gr.Markdown("Upload an image to get automatic captioning") with gr.Row(): with gr.Column(): image_input = gr.Image(label="Upload Image", type="filepath") analyze_btn = gr.Button("Generate Caption", variant="primary") with gr.Column(): output = gr.Textbox(label="Caption Result", lines=5) analyze_btn.click( fn=process_image, inputs=[image_input], outputs=[output] ) # Mount Gradio app to FastAPI app = gr.mount_gradio_app(app, demo, path="/") @app.get("/") def redirect_to_interface(): return RedirectResponse(url="/") """ import gradio as gr from transformers import AutoProcessor, AutoModelForCausalLM, pipeline from PIL import Image import torch from fastapi import FastAPI, UploadFile, Form from fastapi.responses import RedirectResponse, JSONResponse, FileResponse from fastapi.middleware.cors import CORSMiddleware import os import tempfile # ✅ Initialize FastAPI app = FastAPI() # ✅ Enable CORS (so frontend JS can call backend) app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # ✅ Load caption model USE_GIT = False try: processor = AutoProcessor.from_pretrained("microsoft/git-large-coco") git_model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco") git_model.eval() USE_GIT = True except Exception as e: print(f"[INFO] Falling back to ViT: {e}") captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning") # ✅ Image captioning logic def generate_caption(image_path: str) -> str: try: if USE_GIT: image = Image.open(image_path).convert("RGB") inputs = processor(images=image, return_tensors="pt") outputs = git_model.generate(**inputs, max_length=50) caption = processor.batch_decode(outputs, skip_special_tokens=True)[0] else: result = captioner(image_path) caption = result[0]['generated_text'] return caption except Exception as e: return f"Error: {str(e)}" # ✅ For Gradio demo def process_image(file_path: str): if not file_path: return "Please upload an image." return f"📷 Image Caption:\n{generate_caption(file_path)}" # ✅ FastAPI endpoint for frontend POSTs @app.post("/imagecaption/") async def caption_from_frontend(file: UploadFile, question: str = Form("")): try: # Save temp image contents = await file.read() tmp_path = os.path.join(tempfile.gettempdir(), file.filename) with open(tmp_path, "wb") as f: f.write(contents) caption = generate_caption(tmp_path) # Optionally generate audio from gtts import gTTS audio_path = os.path.join(tempfile.gettempdir(), file.filename + ".mp3") tts = gTTS(text=caption) tts.save(audio_path) return { "answer": caption, "audio": f"/files/{os.path.basename(audio_path)}" } except Exception as e: return JSONResponse({"error": str(e)}, status_code=500) # ✅ Serve static files @app.get("/files/{file_name}") async def serve_file(file_name: str): path = os.path.join(tempfile.gettempdir(), file_name) if os.path.exists(path): return FileResponse(path) return JSONResponse({"error": "File not found"}, status_code=404) # ✅ Mount Gradio demo for test with gr.Blocks(title="🖼️ Image Captioning") as demo: gr.Markdown("# 🖼️ Image Captioning Demo") image_input = gr.Image(type="filepath", label="Upload Image") result_box = gr.Textbox(label="Caption") btn = gr.Button("Generate Caption") btn.click(fn=process_image, inputs=[image_input], outputs=[result_box]) app = gr.mount_gradio_app(app, demo, path="/") # ✅ Optional root redirect to frontend @app.get("/") def redirect_to_frontend(): return RedirectResponse(url="/templates/home.html")