import torch
from transformers import pipeline
from PIL import Image
import gradio as gr
import os

# Specify the device (CPU or GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the image-to-text pipeline
caption_image = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large", device=device)

# List of local image paths
example_images = ["flower.jpg"]

# Function to process the image
def process_image(image):
    caption = caption_image(image)[0]['generated_text']
    return caption

# Create Gradio interface with example images
iface = gr.Interface(
    fn=process_image,
    inputs=gr.Image(type="pil"),
    outputs=gr.Textbox(label="Generated Caption"),
    examples=example_images  # Use local images as examples
)

# Launch the interface
iface.launch()