Spaces:

ikraamkb
/

Summarization

Running

ikraamkb commited on 6 days ago

Commit

6852c86

verified ·

1 Parent(s): 82d4697

Update appImage.py

Files changed (1) hide show

appImage.py CHANGED Viewed

@@ -44,12 +44,19 @@ async def caption_from_frontend(file: UploadFile = File(...)):
 def home():
     return RedirectResponse(url="/")"""
 # appImage.py
-from transformers import pipeline
 import tempfile, os
 from PIL import Image
 from gtts import gTTS
-captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
 async def caption_image(file):
     contents = await file.read()
@@ -57,14 +64,20 @@ async def caption_image(file):
         tmp.write(contents)
         image_path = tmp.name
-    captions = captioner(image_path)
-    caption = captions[0]['generated_text'] if captions else "No caption generated."
     audio_path = text_to_speech(caption)
     result = {"caption": caption}
     if audio_path:
-        result["audioUrl"] = f"/files/{os.path.basename(audio_path)}"
     return result
 def text_to_speech(text: str):

 def home():
     return RedirectResponse(url="/")"""
 # appImage.py
+from transformers import pipeline, AutoProcessor, AutoModelForCausalLM
 import tempfile, os
 from PIL import Image
 from gtts import gTTS
+import torch
+try:
+    processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
+    model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
+    USE_GIT = True
+except Exception:
+    captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
+    USE_GIT = False
 async def caption_image(file):
     contents = await file.read()
         tmp.write(contents)
         image_path = tmp.name
+    if USE_GIT:
+        image = Image.open(image_path).convert('RGB')
+        pixel_values = processor(images=image, return_tensors="pt").pixel_values
+        generated_ids = model.generate(pixel_values, max_length=50)
+        caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    else:
+        captions = captioner(image_path)
+        caption = captions[0]['generated_text'] if captions else "No caption generated."
     audio_path = text_to_speech(caption)
     result = {"caption": caption}
     if audio_path:
+        result["audio"] = f"/files/{os.path.basename(audio_path)}"
     return result
 def text_to_speech(text: str):