import gradio as gr
import torch
from diffusers import StableDiffusionPipeline, DiffusionPipeline
import os

# Load Text-to-Image Model (Redshift Diffusion)
device = "cuda" if torch.cuda.is_available() else "cpu"
image_pipe = StableDiffusionPipeline.from_pretrained(
    "nitrosocke/redshift-diffusion", torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
).to(device)

# Load Image-to-Video Model (Zeroscope v2 XL)
video_model = DiffusionPipeline.from_pretrained(
    "cerspense/zeroscope_v2_XL", torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
).to(device)

# Function to Generate Image from Text
def generate_image(prompt):
    image = image_pipe(prompt).images[0]
    image_path = "generated_image.png"
    image.save(image_path)
    return image_path

# Function to Convert Image to Video
def generate_video(image_path):
    image = image_pipe(prompt).images[0]  # Reload image for video generation
    video_frames = video_model(image)  # Generate video frames
    video_path = "generated_video.mp4"
    video_frames.save(video_path)  # Save output video
    return video_path

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("## 🎨 AI Cartoon Image & Video Generator")

    with gr.Row():
        prompt_input = gr.Textbox(label="Enter Text Prompt", placeholder="A 3D cartoon cat playing in a park")
        generate_image_btn = gr.Button("Generate Image")

    image_output = gr.Image(label="Generated Image")

    with gr.Row():
        generate_video_btn = gr.Button("Convert to Video")
        video_output = gr.Video(label="Generated Video")

    download_image = gr.File(label="Download Image")
    download_video = gr.File(label="Download Video")

    generate_image_btn.click(generate_image, inputs=[prompt_input], outputs=[image_output, download_image])
    generate_video_btn.click(generate_video, inputs=[image_output], outputs=[video_output, download_video])

demo.launch()