ImageCaptioning

Sleeping

krishnv commited on Aug 9, 2024

Commit

9633d94

verified ·

1 Parent(s): 31e8f8b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,41 +1,38 @@
 from PIL import Image
-from transformers import VisionEncoderDecoderModel, ViTFeatureExtractor, PreTrainedTokenizerFast
 import gradio as gr
-# Load the model and processor
-model = VisionEncoderDecoderModel.from_pretrained("microsoft/git-base")
-feature_extractor = ViTFeatureExtractor.from_pretrained("microsoft/git-base")
-tokenizer = PreTrainedTokenizerFast.from_pretrained("microsoft/git-base")
 # Define the captioning function
-def caption_images(image):
-    # Preprocess the image
-    pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values
     # Generate captions
-    encoder_outputs = model.generate(pixel_values.to('cpu'), num_beams=5)
-    generated_sentence = tokenizer.batch_decode(encoder_outputs, skip_special_tokens=True)
-    return generated_sentence[0].strip()
 # Define Gradio interface components
 inputs = [
-    gr.inputs.Image(type='pil', label='Original Image')
 ]
 outputs = [
-    gr.outputs.Textbox(label='Caption')
 ]
 # Define Gradio app properties
-title = "Simple Image Captioning Application"
-description = "Upload an image to see the caption generated"
-example = ['messi.jpg']  # Replace with a valid path to an example image
 # Create and launch the Gradio interface
 gr.Interface(
-    fn=caption_images,
     inputs=inputs,
     outputs=outputs,
     title=title,
     description=description,
-    examples=example,
 ).launch(debug=True)

+from transformers import AutoProcessor, AutoModelForCausalLM
 from PIL import Image
 import gradio as gr
+# Load the processor and model
+processor = AutoProcessor.from_pretrained("microsoft/git-base-coco")
+model = AutoModelForCausalLM.from_pretrained("microsoft/git-base-coco")
 # Define the captioning function
+def caption_image(image):
+    # Process the image
+    pixel_values = processor(images=image, return_tensors="pt").pixel_values
     # Generate captions
+    generated_ids = model.generate(pixel_values=pixel_values, max_length=50)
+    generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    return generated_caption
 # Define Gradio interface components
 inputs = [
+    gr.inputs.Image(type='pil', label='Upload Image')
 ]
 outputs = [
+    gr.outputs.Textbox(label='Generated Caption')
 ]
 # Define Gradio app properties
+title = "Image Captioning Application"
+description = "Upload an image to see the caption generated by the model"
 # Create and launch the Gradio interface
 gr.Interface(
+    fn=caption_image,
     inputs=inputs,
     outputs=outputs,
     title=title,
     description=description,
 ).launch(debug=True)