ikraamkb commited on
Commit
6852c86
·
verified ·
1 Parent(s): 82d4697

Update appImage.py

Browse files
Files changed (1) hide show
  1. appImage.py +18 -5
appImage.py CHANGED
@@ -44,12 +44,19 @@ async def caption_from_frontend(file: UploadFile = File(...)):
44
  def home():
45
  return RedirectResponse(url="/")"""
46
  # appImage.py
47
- from transformers import pipeline
48
  import tempfile, os
49
  from PIL import Image
50
  from gtts import gTTS
 
51
 
52
- captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
 
 
 
 
 
 
53
 
54
  async def caption_image(file):
55
  contents = await file.read()
@@ -57,14 +64,20 @@ async def caption_image(file):
57
  tmp.write(contents)
58
  image_path = tmp.name
59
 
60
- captions = captioner(image_path)
61
- caption = captions[0]['generated_text'] if captions else "No caption generated."
 
 
 
 
 
 
62
 
63
  audio_path = text_to_speech(caption)
64
 
65
  result = {"caption": caption}
66
  if audio_path:
67
- result["audioUrl"] = f"/files/{os.path.basename(audio_path)}"
68
  return result
69
 
70
  def text_to_speech(text: str):
 
44
  def home():
45
  return RedirectResponse(url="/")"""
46
  # appImage.py
47
+ from transformers import pipeline, AutoProcessor, AutoModelForCausalLM
48
  import tempfile, os
49
  from PIL import Image
50
  from gtts import gTTS
51
+ import torch
52
 
53
+ try:
54
+ processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
55
+ model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
56
+ USE_GIT = True
57
+ except Exception:
58
+ captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
59
+ USE_GIT = False
60
 
61
  async def caption_image(file):
62
  contents = await file.read()
 
64
  tmp.write(contents)
65
  image_path = tmp.name
66
 
67
+ if USE_GIT:
68
+ image = Image.open(image_path).convert('RGB')
69
+ pixel_values = processor(images=image, return_tensors="pt").pixel_values
70
+ generated_ids = model.generate(pixel_values, max_length=50)
71
+ caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
72
+ else:
73
+ captions = captioner(image_path)
74
+ caption = captions[0]['generated_text'] if captions else "No caption generated."
75
 
76
  audio_path = text_to_speech(caption)
77
 
78
  result = {"caption": caption}
79
  if audio_path:
80
+ result["audio"] = f"/files/{os.path.basename(audio_path)}"
81
  return result
82
 
83
  def text_to_speech(text: str):