Spaces:
Running
Running
Update appImage.py
Browse files- appImage.py +85 -9
appImage.py
CHANGED
@@ -1,16 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
@app.post("/imagecaption/")
|
2 |
-
async def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
try:
|
4 |
-
#
|
5 |
-
|
6 |
-
|
|
|
|
|
|
|
|
|
7 |
|
|
|
|
|
|
|
|
|
|
|
8 |
return {
|
9 |
"answer": caption,
|
10 |
-
"audio": audio_path
|
11 |
}
|
|
|
|
|
|
|
12 |
except Exception as e:
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, UploadFile, File, HTTPException
|
2 |
+
from fastapi.responses import JSONResponse, FileResponse
|
3 |
+
from fastapi.middleware.cors import CORSMiddleware
|
4 |
+
from transformers import AutoProcessor, AutoModelForCausalLM, pipeline
|
5 |
+
from PIL import Image
|
6 |
+
import torch
|
7 |
+
import os
|
8 |
+
import tempfile
|
9 |
+
from gtts import gTTS
|
10 |
+
|
11 |
+
app = FastAPI()
|
12 |
+
|
13 |
+
# CORS Configuration
|
14 |
+
app.add_middleware(
|
15 |
+
CORSMiddleware,
|
16 |
+
allow_origins=["*"],
|
17 |
+
allow_credentials=True,
|
18 |
+
allow_methods=["*"],
|
19 |
+
allow_headers=["*"],
|
20 |
+
)
|
21 |
+
|
22 |
+
# Initialize models
|
23 |
+
try:
|
24 |
+
processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
|
25 |
+
git_model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
|
26 |
+
git_model.eval()
|
27 |
+
USE_GIT = True
|
28 |
+
except Exception as e:
|
29 |
+
print(f"[INFO] Falling back to ViT: {e}")
|
30 |
+
captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
|
31 |
+
USE_GIT = False
|
32 |
+
|
33 |
+
def generate_caption(image_path: str) -> str:
|
34 |
+
try:
|
35 |
+
if USE_GIT:
|
36 |
+
image = Image.open(image_path).convert("RGB")
|
37 |
+
inputs = processor(images=image, return_tensors="pt")
|
38 |
+
outputs = git_model.generate(**inputs, max_length=50)
|
39 |
+
caption = processor.batch_decode(outputs, skip_special_tokens=True)[0]
|
40 |
+
else:
|
41 |
+
result = captioner(image_path)
|
42 |
+
caption = result[0]['generated_text']
|
43 |
+
return caption
|
44 |
+
except Exception as e:
|
45 |
+
raise Exception(f"Caption generation failed: {str(e)}")
|
46 |
+
|
47 |
@app.post("/imagecaption/")
|
48 |
+
async def caption_image(file: UploadFile = File(...)):
|
49 |
+
# Validate file type
|
50 |
+
valid_types = ['image/jpeg', 'image/png', 'image/gif', 'image/webp']
|
51 |
+
if file.content_type not in valid_types:
|
52 |
+
raise HTTPException(
|
53 |
+
status_code=400,
|
54 |
+
detail="Please upload a valid image (JPEG, PNG, GIF, or WEBP)"
|
55 |
+
)
|
56 |
+
|
57 |
try:
|
58 |
+
# Save temp file
|
59 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1]) as temp:
|
60 |
+
shutil.copyfileobj(file.file, temp)
|
61 |
+
temp_path = temp.name
|
62 |
+
|
63 |
+
# Generate caption
|
64 |
+
caption = generate_caption(temp_path)
|
65 |
|
66 |
+
# Generate audio
|
67 |
+
audio_path = os.path.join(tempfile.gettempdir(), f"caption_{os.path.basename(temp_path)}.mp3")
|
68 |
+
tts = gTTS(text=caption)
|
69 |
+
tts.save(audio_path)
|
70 |
+
|
71 |
return {
|
72 |
"answer": caption,
|
73 |
+
"audio": f"/files/{os.path.basename(audio_path)}"
|
74 |
}
|
75 |
+
|
76 |
+
except HTTPException:
|
77 |
+
raise
|
78 |
except Exception as e:
|
79 |
+
raise HTTPException(
|
80 |
+
status_code=500,
|
81 |
+
detail=str(e)
|
82 |
+
)
|
83 |
+
finally:
|
84 |
+
if 'temp_path' in locals() and os.path.exists(temp_path):
|
85 |
+
os.unlink(temp_path)
|
86 |
+
|
87 |
+
@app.get("/files/{filename}")
|
88 |
+
async def get_file(filename: str):
|
89 |
+
file_path = os.path.join(tempfile.gettempdir(), filename)
|
90 |
+
if os.path.exists(file_path):
|
91 |
+
return FileResponse(file_path)
|
92 |
+
raise HTTPException(status_code=404, detail="File not found")
|