ikraamkb commited on
Commit
d5d3aa6
·
verified ·
1 Parent(s): 7839da1

Update appImage.py

Browse files
Files changed (1) hide show
  1. appImage.py +85 -9
appImage.py CHANGED
@@ -1,16 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  @app.post("/imagecaption/")
2
- async def caption_from_frontend(file: UploadFile = File(...)):
 
 
 
 
 
 
 
 
3
  try:
4
- # Process image and generate caption
5
- caption = "This would be your generated image caption"
6
- audio_path = "/files/caption_audio.mp3" # Generated audio path
 
 
 
 
7
 
 
 
 
 
 
8
  return {
9
  "answer": caption,
10
- "audio": audio_path
11
  }
 
 
 
12
  except Exception as e:
13
- return JSONResponse(
14
- {"detail": str(e)},
15
- status_code=500
16
- )
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, File, HTTPException
2
+ from fastapi.responses import JSONResponse, FileResponse
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from transformers import AutoProcessor, AutoModelForCausalLM, pipeline
5
+ from PIL import Image
6
+ import torch
7
+ import os
8
+ import tempfile
9
+ from gtts import gTTS
10
+
11
+ app = FastAPI()
12
+
13
+ # CORS Configuration
14
+ app.add_middleware(
15
+ CORSMiddleware,
16
+ allow_origins=["*"],
17
+ allow_credentials=True,
18
+ allow_methods=["*"],
19
+ allow_headers=["*"],
20
+ )
21
+
22
+ # Initialize models
23
+ try:
24
+ processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
25
+ git_model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
26
+ git_model.eval()
27
+ USE_GIT = True
28
+ except Exception as e:
29
+ print(f"[INFO] Falling back to ViT: {e}")
30
+ captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
31
+ USE_GIT = False
32
+
33
+ def generate_caption(image_path: str) -> str:
34
+ try:
35
+ if USE_GIT:
36
+ image = Image.open(image_path).convert("RGB")
37
+ inputs = processor(images=image, return_tensors="pt")
38
+ outputs = git_model.generate(**inputs, max_length=50)
39
+ caption = processor.batch_decode(outputs, skip_special_tokens=True)[0]
40
+ else:
41
+ result = captioner(image_path)
42
+ caption = result[0]['generated_text']
43
+ return caption
44
+ except Exception as e:
45
+ raise Exception(f"Caption generation failed: {str(e)}")
46
+
47
  @app.post("/imagecaption/")
48
+ async def caption_image(file: UploadFile = File(...)):
49
+ # Validate file type
50
+ valid_types = ['image/jpeg', 'image/png', 'image/gif', 'image/webp']
51
+ if file.content_type not in valid_types:
52
+ raise HTTPException(
53
+ status_code=400,
54
+ detail="Please upload a valid image (JPEG, PNG, GIF, or WEBP)"
55
+ )
56
+
57
  try:
58
+ # Save temp file
59
+ with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1]) as temp:
60
+ shutil.copyfileobj(file.file, temp)
61
+ temp_path = temp.name
62
+
63
+ # Generate caption
64
+ caption = generate_caption(temp_path)
65
 
66
+ # Generate audio
67
+ audio_path = os.path.join(tempfile.gettempdir(), f"caption_{os.path.basename(temp_path)}.mp3")
68
+ tts = gTTS(text=caption)
69
+ tts.save(audio_path)
70
+
71
  return {
72
  "answer": caption,
73
+ "audio": f"/files/{os.path.basename(audio_path)}"
74
  }
75
+
76
+ except HTTPException:
77
+ raise
78
  except Exception as e:
79
+ raise HTTPException(
80
+ status_code=500,
81
+ detail=str(e)
82
+ )
83
+ finally:
84
+ if 'temp_path' in locals() and os.path.exists(temp_path):
85
+ os.unlink(temp_path)
86
+
87
+ @app.get("/files/{filename}")
88
+ async def get_file(filename: str):
89
+ file_path = os.path.join(tempfile.gettempdir(), filename)
90
+ if os.path.exists(file_path):
91
+ return FileResponse(file_path)
92
+ raise HTTPException(status_code=404, detail="File not found")