Spaces:
Running
Running
Update appImage.py
Browse files- appImage.py +47 -66
appImage.py
CHANGED
@@ -1,92 +1,73 @@
|
|
1 |
-
|
2 |
-
from
|
3 |
-
from fastapi.middleware.cors import CORSMiddleware
|
4 |
-
from transformers import AutoProcessor, AutoModelForCausalLM, pipeline
|
5 |
from PIL import Image
|
6 |
import torch
|
7 |
-
import
|
8 |
-
import
|
9 |
-
from gtts import gTTS
|
10 |
|
|
|
11 |
app = FastAPI()
|
12 |
|
13 |
-
#
|
14 |
-
app.add_middleware(
|
15 |
-
CORSMiddleware,
|
16 |
-
allow_origins=["*"],
|
17 |
-
allow_credentials=True,
|
18 |
-
allow_methods=["*"],
|
19 |
-
allow_headers=["*"],
|
20 |
-
)
|
21 |
-
|
22 |
-
# Initialize models
|
23 |
try:
|
|
|
24 |
processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
|
25 |
git_model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
|
26 |
-
|
27 |
USE_GIT = True
|
28 |
except Exception as e:
|
29 |
-
print(f"
|
30 |
captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
|
31 |
USE_GIT = False
|
32 |
|
33 |
-
def generate_caption(image_path
|
|
|
34 |
try:
|
35 |
if USE_GIT:
|
36 |
-
image = Image.open(image_path)
|
37 |
inputs = processor(images=image, return_tensors="pt")
|
38 |
outputs = git_model.generate(**inputs, max_length=50)
|
39 |
-
|
40 |
else:
|
41 |
result = captioner(image_path)
|
42 |
-
|
43 |
-
return caption
|
44 |
except Exception as e:
|
45 |
-
|
46 |
-
|
47 |
-
@app.post("/imagecaption/")
|
48 |
-
async def caption_image(file: UploadFile = File(...)):
|
49 |
-
# Validate file type
|
50 |
-
valid_types = ['image/jpeg', 'image/png', 'image/gif', 'image/webp']
|
51 |
-
if file.content_type not in valid_types:
|
52 |
-
raise HTTPException(
|
53 |
-
status_code=400,
|
54 |
-
detail="Please upload a valid image (JPEG, PNG, GIF, or WEBP)"
|
55 |
-
)
|
56 |
|
|
|
|
|
|
|
|
|
|
|
57 |
try:
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
|
63 |
-
|
64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
|
|
|
|
|
|
|
|
70 |
|
71 |
-
|
72 |
-
|
73 |
-
"audio": f"/files/{os.path.basename(audio_path)}"
|
74 |
-
}
|
75 |
-
|
76 |
-
except HTTPException:
|
77 |
-
raise
|
78 |
-
except Exception as e:
|
79 |
-
raise HTTPException(
|
80 |
-
status_code=500,
|
81 |
-
detail=str(e)
|
82 |
-
)
|
83 |
-
finally:
|
84 |
-
if 'temp_path' in locals() and os.path.exists(temp_path):
|
85 |
-
os.unlink(temp_path)
|
86 |
|
87 |
-
@app.get("/
|
88 |
-
|
89 |
-
|
90 |
-
if os.path.exists(file_path):
|
91 |
-
return FileResponse(file_path)
|
92 |
-
raise HTTPException(status_code=404, detail="File not found")
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from transformers import AutoProcessor, AutoModelForCausalLM
|
|
|
|
|
3 |
from PIL import Image
|
4 |
import torch
|
5 |
+
from fastapi import FastAPI
|
6 |
+
from fastapi.responses import RedirectResponse
|
|
|
7 |
|
8 |
+
# Initialize FastAPI
|
9 |
app = FastAPI()
|
10 |
|
11 |
+
# Load models - Using microsoft/git-large-coco
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
try:
|
13 |
+
# Load the better model
|
14 |
processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
|
15 |
git_model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
|
16 |
+
print("Successfully loaded microsoft/git-large-coco model")
|
17 |
USE_GIT = True
|
18 |
except Exception as e:
|
19 |
+
print(f"Failed to load GIT model: {e}. Falling back to smaller model")
|
20 |
captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
|
21 |
USE_GIT = False
|
22 |
|
23 |
+
def generate_caption(image_path):
|
24 |
+
"Generate caption using the best available model""
|
25 |
try:
|
26 |
if USE_GIT:
|
27 |
+
image = Image.open(image_path)
|
28 |
inputs = processor(images=image, return_tensors="pt")
|
29 |
outputs = git_model.generate(**inputs, max_length=50)
|
30 |
+
return processor.batch_decode(outputs, skip_special_tokens=True)[0]
|
31 |
else:
|
32 |
result = captioner(image_path)
|
33 |
+
return result[0]['generated_text']
|
|
|
34 |
except Exception as e:
|
35 |
+
print(f"Caption generation error: {e}")
|
36 |
+
return "Could not generate caption"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
+
def process_image(file_path: str):
|
39 |
+
"Handle image processing for Gradio interface"
|
40 |
+
if not file_path:
|
41 |
+
return "Please upload an image first"
|
42 |
+
|
43 |
try:
|
44 |
+
caption = generate_caption(file_path)
|
45 |
+
return f"📷 Image Caption:\n{caption}"
|
46 |
+
except Exception as e:
|
47 |
+
return f"Error processing image: {str(e)}"
|
48 |
|
49 |
+
# Gradio Interface
|
50 |
+
with gr.Blocks(title="Image Captioning Service", theme=gr.themes.Soft()) as demo:
|
51 |
+
gr.Markdown("# 🖼️ Image Captioning Service")
|
52 |
+
gr.Markdown("Upload an image to get automatic captioning")
|
53 |
+
|
54 |
+
with gr.Row():
|
55 |
+
with gr.Column():
|
56 |
+
image_input = gr.Image(label="Upload Image", type="filepath")
|
57 |
+
analyze_btn = gr.Button("Generate Caption", variant="primary")
|
58 |
|
59 |
+
with gr.Column():
|
60 |
+
output = gr.Textbox(label="Caption Result", lines=5)
|
61 |
+
|
62 |
+
analyze_btn.click(
|
63 |
+
fn=process_image,
|
64 |
+
inputs=[image_input],
|
65 |
+
outputs=[output]
|
66 |
+
)
|
67 |
|
68 |
+
# Mount Gradio app to FastAPI
|
69 |
+
app = gr.mount_gradio_app(app, demo, path="/")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
|
71 |
+
@app.get("/")
|
72 |
+
def redirect_to_interface():
|
73 |
+
return RedirectResponse(url="/")
|
|
|
|
|
|