import torch from transformers import pipeline from PIL import Image import gradio as gr import os # Specify the device (CPU or GPU) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Load the image-to-text pipeline caption_image = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large", device=device) # List of local image paths example_images = ["flower.jpg"] # Function to process the image def process_image(image): caption = caption_image(image)[0]['generated_text'] return caption # Create Gradio interface with example images iface = gr.Interface( fn=process_image, inputs=gr.Image(type="pil"), outputs=gr.Textbox(label="Generated Caption"), examples=example_images # Use local images as examples ) # Launch the interface iface.launch()