ikraamkb commited on
Commit
7cab805
·
verified ·
1 Parent(s): c48e937

Update appImage.py

Browse files
Files changed (1) hide show
  1. appImage.py +47 -66
appImage.py CHANGED
@@ -1,92 +1,73 @@
1
- from fastapi import FastAPI, UploadFile, File, HTTPException
2
- from fastapi.responses import JSONResponse, FileResponse
3
- from fastapi.middleware.cors import CORSMiddleware
4
- from transformers import AutoProcessor, AutoModelForCausalLM, pipeline
5
  from PIL import Image
6
  import torch
7
- import os
8
- import tempfile
9
- from gtts import gTTS
10
 
 
11
  app = FastAPI()
12
 
13
- # CORS Configuration
14
- app.add_middleware(
15
- CORSMiddleware,
16
- allow_origins=["*"],
17
- allow_credentials=True,
18
- allow_methods=["*"],
19
- allow_headers=["*"],
20
- )
21
-
22
- # Initialize models
23
  try:
 
24
  processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
25
  git_model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
26
- git_model.eval()
27
  USE_GIT = True
28
  except Exception as e:
29
- print(f"[INFO] Falling back to ViT: {e}")
30
  captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
31
  USE_GIT = False
32
 
33
- def generate_caption(image_path: str) -> str:
 
34
  try:
35
  if USE_GIT:
36
- image = Image.open(image_path).convert("RGB")
37
  inputs = processor(images=image, return_tensors="pt")
38
  outputs = git_model.generate(**inputs, max_length=50)
39
- caption = processor.batch_decode(outputs, skip_special_tokens=True)[0]
40
  else:
41
  result = captioner(image_path)
42
- caption = result[0]['generated_text']
43
- return caption
44
  except Exception as e:
45
- raise Exception(f"Caption generation failed: {str(e)}")
46
-
47
- @app.post("/imagecaption/")
48
- async def caption_image(file: UploadFile = File(...)):
49
- # Validate file type
50
- valid_types = ['image/jpeg', 'image/png', 'image/gif', 'image/webp']
51
- if file.content_type not in valid_types:
52
- raise HTTPException(
53
- status_code=400,
54
- detail="Please upload a valid image (JPEG, PNG, GIF, or WEBP)"
55
- )
56
 
 
 
 
 
 
57
  try:
58
- # Save temp file
59
- with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1]) as temp:
60
- shutil.copyfileobj(file.file, temp)
61
- temp_path = temp.name
62
 
63
- # Generate caption
64
- caption = generate_caption(temp_path)
 
 
 
 
 
 
 
65
 
66
- # Generate audio
67
- audio_path = os.path.join(tempfile.gettempdir(), f"caption_{os.path.basename(temp_path)}.mp3")
68
- tts = gTTS(text=caption)
69
- tts.save(audio_path)
 
 
 
 
70
 
71
- return {
72
- "answer": caption,
73
- "audio": f"/files/{os.path.basename(audio_path)}"
74
- }
75
-
76
- except HTTPException:
77
- raise
78
- except Exception as e:
79
- raise HTTPException(
80
- status_code=500,
81
- detail=str(e)
82
- )
83
- finally:
84
- if 'temp_path' in locals() and os.path.exists(temp_path):
85
- os.unlink(temp_path)
86
 
87
- @app.get("/files/{filename}")
88
- async def get_file(filename: str):
89
- file_path = os.path.join(tempfile.gettempdir(), filename)
90
- if os.path.exists(file_path):
91
- return FileResponse(file_path)
92
- raise HTTPException(status_code=404, detail="File not found")
 
1
+ import gradio as gr
2
+ from transformers import AutoProcessor, AutoModelForCausalLM
 
 
3
  from PIL import Image
4
  import torch
5
+ from fastapi import FastAPI
6
+ from fastapi.responses import RedirectResponse
 
7
 
8
+ # Initialize FastAPI
9
  app = FastAPI()
10
 
11
+ # Load models - Using microsoft/git-large-coco
 
 
 
 
 
 
 
 
 
12
  try:
13
+ # Load the better model
14
  processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
15
  git_model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
16
+ print("Successfully loaded microsoft/git-large-coco model")
17
  USE_GIT = True
18
  except Exception as e:
19
+ print(f"Failed to load GIT model: {e}. Falling back to smaller model")
20
  captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
21
  USE_GIT = False
22
 
23
+ def generate_caption(image_path):
24
+ "Generate caption using the best available model""
25
  try:
26
  if USE_GIT:
27
+ image = Image.open(image_path)
28
  inputs = processor(images=image, return_tensors="pt")
29
  outputs = git_model.generate(**inputs, max_length=50)
30
+ return processor.batch_decode(outputs, skip_special_tokens=True)[0]
31
  else:
32
  result = captioner(image_path)
33
+ return result[0]['generated_text']
 
34
  except Exception as e:
35
+ print(f"Caption generation error: {e}")
36
+ return "Could not generate caption"
 
 
 
 
 
 
 
 
 
37
 
38
+ def process_image(file_path: str):
39
+ "Handle image processing for Gradio interface"
40
+ if not file_path:
41
+ return "Please upload an image first"
42
+
43
  try:
44
+ caption = generate_caption(file_path)
45
+ return f"📷 Image Caption:\n{caption}"
46
+ except Exception as e:
47
+ return f"Error processing image: {str(e)}"
48
 
49
+ # Gradio Interface
50
+ with gr.Blocks(title="Image Captioning Service", theme=gr.themes.Soft()) as demo:
51
+ gr.Markdown("# 🖼️ Image Captioning Service")
52
+ gr.Markdown("Upload an image to get automatic captioning")
53
+
54
+ with gr.Row():
55
+ with gr.Column():
56
+ image_input = gr.Image(label="Upload Image", type="filepath")
57
+ analyze_btn = gr.Button("Generate Caption", variant="primary")
58
 
59
+ with gr.Column():
60
+ output = gr.Textbox(label="Caption Result", lines=5)
61
+
62
+ analyze_btn.click(
63
+ fn=process_image,
64
+ inputs=[image_input],
65
+ outputs=[output]
66
+ )
67
 
68
+ # Mount Gradio app to FastAPI
69
+ app = gr.mount_gradio_app(app, demo, path="/")
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
+ @app.get("/")
72
+ def redirect_to_interface():
73
+ return RedirectResponse(url="/")