import torch
from transformers import pipeline
from PIL import Image
import gradio as gr
import os

# Specify the device (CPU or GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the image-to-text pipeline
caption_image = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large", device=device)
translate = pipeline("translation_en_to_ar", model="Helsinki-NLP/opus-mt-en-ar")

# List of local image paths
example_images = ["flower.jpg"]

def Arabic_Image_Captioning(image):
    caption = caption_image(image)
    caption = caption[0]['generated_text']

    arabic_caption = translate(caption)
    arabic_caption = arabic_caption[0]['translation_text']

    html_result = f'<div dir="rtl">{arabic_caption}</div>'
    return html_result

demo = gr.Interface(
    fn=Arabic_Image_Captioning,
    inputs=gr.Image(type="pil"),
    outputs=gr.HTML(label="Caption in Arabic"),
    title="Arabic Image Captioning",
    description="Upload an image to generate an arabic caption.",
    examples=example_images

)


# Launch the interface
demo.launch()