Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import BlipProcessor, BlipForConditionalGeneration | |
from PIL import Image | |
# Load an advanced image captioning model | |
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large") | |
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large") | |
# Function to generate a descriptive caption | |
def generate_caption(image): | |
inputs = processor(image, return_tensors="pt") | |
out = model.generate(**inputs, max_length=50, num_beams=5, repetition_penalty=1.5) | |
caption = processor.decode(out[0], skip_special_tokens=True) | |
return caption | |
# Gradio interface with webcam support | |
iface = gr.Interface( | |
fn=generate_caption, | |
inputs=[gr.Image(type="pil", source="webcam", label="Capture an image")], | |
outputs=gr.Textbox(label="Image Description"), | |
title="π· Image Capture & Description App", | |
description="Capture an image using your webcam and let AI describe what's happening in the image!", | |
live=True, | |
) | |
if __name__ == "__main__": | |
iface.launch() | |