Spaces:

ikraamkb
/

Summarization

Sleeping

App Files Files Community

ikraamkb commited on Apr 25

Commit

7cab805

verified ·

1 Parent(s): c48e937

Update appImage.py

Browse files

Files changed (1) hide show

appImage.py +47 -66

appImage.py CHANGED Viewed

@@ -1,92 +1,73 @@
-from fastapi import FastAPI, UploadFile, File, HTTPException
-from fastapi.responses import JSONResponse, FileResponse
-from fastapi.middleware.cors import CORSMiddleware
-from transformers import AutoProcessor, AutoModelForCausalLM, pipeline
 from PIL import Image
 import torch
-import os
-import tempfile
-from gtts import gTTS
 app = FastAPI()
-# CORS Configuration
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-# Initialize models
 try:
     processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
     git_model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
-    git_model.eval()
     USE_GIT = True
 except Exception as e:
-    print(f"[INFO] Falling back to ViT: {e}")
     captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
     USE_GIT = False
-def generate_caption(image_path: str) -> str:
     try:
         if USE_GIT:
-            image = Image.open(image_path).convert("RGB")
             inputs = processor(images=image, return_tensors="pt")
             outputs = git_model.generate(**inputs, max_length=50)
-            caption = processor.batch_decode(outputs, skip_special_tokens=True)[0]
         else:
             result = captioner(image_path)
-            caption = result[0]['generated_text']
-        return caption
     except Exception as e:
-        raise Exception(f"Caption generation failed: {str(e)}")
-@app.post("/imagecaption/")
-async def caption_image(file: UploadFile = File(...)):
-    # Validate file type
-    valid_types = ['image/jpeg', 'image/png', 'image/gif', 'image/webp']
-    if file.content_type not in valid_types:
-        raise HTTPException(
-            status_code=400,
-            detail="Please upload a valid image (JPEG, PNG, GIF, or WEBP)"
-        )
     try:
-        # Save temp file
-        with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1]) as temp:
-            shutil.copyfileobj(file.file, temp)
-            temp_path = temp.name
-        # Generate caption
-        caption = generate_caption(temp_path)
-        # Generate audio
-        audio_path = os.path.join(tempfile.gettempdir(), f"caption_{os.path.basename(temp_path)}.mp3")
-        tts = gTTS(text=caption)
-        tts.save(audio_path)
-        return {
-            "answer": caption,
-            "audio": f"/files/{os.path.basename(audio_path)}"
-        }
-    except HTTPException:
-        raise
-    except Exception as e:
-        raise HTTPException(
-            status_code=500,
-            detail=str(e)
-        )
-    finally:
-        if 'temp_path' in locals() and os.path.exists(temp_path):
-            os.unlink(temp_path)
-@app.get("/files/{filename}")
-async def get_file(filename: str):
-    file_path = os.path.join(tempfile.gettempdir(), filename)
-    if os.path.exists(file_path):
-        return FileResponse(file_path)
-    raise HTTPException(status_code=404, detail="File not found")

+import gradio as gr
+from transformers import AutoProcessor, AutoModelForCausalLM
 from PIL import Image
 import torch
+from fastapi import FastAPI
+from fastapi.responses import RedirectResponse
+# Initialize FastAPI
 app = FastAPI()
+# Load models - Using microsoft/git-large-coco
 try:
+    # Load the better model
     processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
     git_model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
+    print("Successfully loaded microsoft/git-large-coco model")
     USE_GIT = True
 except Exception as e:
+    print(f"Failed to load GIT model: {e}. Falling back to smaller model")
     captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
     USE_GIT = False
+def generate_caption(image_path):
+    "Generate caption using the best available model""
     try:
         if USE_GIT:
+            image = Image.open(image_path)
             inputs = processor(images=image, return_tensors="pt")
             outputs = git_model.generate(**inputs, max_length=50)
+            return processor.batch_decode(outputs, skip_special_tokens=True)[0]
         else:
             result = captioner(image_path)
+            return result[0]['generated_text']
     except Exception as e:
+        print(f"Caption generation error: {e}")
+        return "Could not generate caption"
+def process_image(file_path: str):
+    "Handle image processing for Gradio interface"
+    if not file_path:
+        return "Please upload an image first"
     try:
+        caption = generate_caption(file_path)
+        return f"📷 Image Caption:\n{caption}"
+    except Exception as e:
+        return f"Error processing image: {str(e)}"
+# Gradio Interface
+with gr.Blocks(title="Image Captioning Service", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🖼️ Image Captioning Service")
+    gr.Markdown("Upload an image to get automatic captioning")
+    with gr.Row():
+        with gr.Column():
+            image_input = gr.Image(label="Upload Image", type="filepath")
+            analyze_btn = gr.Button("Generate Caption", variant="primary")
+        with gr.Column():
+            output = gr.Textbox(label="Caption Result", lines=5)
+    analyze_btn.click(
+        fn=process_image,
+        inputs=[image_input],
+        outputs=[output]
+    )
+# Mount Gradio app to FastAPI
+app = gr.mount_gradio_app(app, demo, path="/")
+@app.get("/")
+def redirect_to_interface():
+    return RedirectResponse(url="/")