import gradio as gr
import torch
from diffusers import AutoencoderKLWan, WanPipeline
from diffusers.utils import export_to_video
import spaces  # ZeroGPU integration

def load_pipeline_on_cpu():
    model_id = "Wan-AI/Wan2.1-T2V-1.3B-Diffusers"
    print("Preloading model on CPU...")
    vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
    pipeline_cpu = WanPipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16)
    # Remain on CPU for now
    print("Model preloaded on CPU.")
    return pipeline_cpu

# Load the model on CPU during container initialization.
PIPELINE_CPU = load_pipeline_on_cpu()
PIPELINE_GPU = None  # Will hold the GPU-loaded pipeline after the first request

@spaces.GPU  # This ensures GPU is only initialized in the request worker process
def generate_video(prompt, negative_prompt=""):
    global PIPELINE_GPU
    # Move to GPU on first request if not already done.
    if PIPELINE_GPU is None:
        print("Moving model to GPU...")
        PIPELINE_GPU = PIPELINE_CPU.to("cuda")
        print("Model moved to GPU.")
    pipeline_gpu = PIPELINE_GPU

    # Generate video frames at 480p resolution
    output = pipeline_gpu(
        prompt=prompt,
        negative_prompt=negative_prompt,
        height=480,        # 480p height
        width=832,         # Suitable width for 480p videos
        num_frames=81,     # Adjust for desired video length
        guidance_scale=5.0 # Recommended guidance scale for the 1.3B model
    ).frames[0]
    
    video_path = "output.mp4"
    export_to_video(output, video_path, fps=15)
    return video_path

# Create the Gradio interface
iface = gr.Interface(
    fn=generate_video,
    inputs=[
        gr.Textbox(label="Prompt", placeholder="Enter your video prompt here"),
        gr.Textbox(label="Negative Prompt", placeholder="Optional negative prompt", value="")
    ],
    outputs=gr.Video(label="Generated Video"),
    title="Wan2.1-T2V-1.3B Video Generator",
    description="No signing in required, all inference run locally on space. Generate 480p videos using the Wan2.1-T2V-1.3B diffusers pipeline, ZeroGPU would queue up for wayyy to long so doesn't work. Takes 3 minutes on L40S per 5 seconds of video. Copy and change HW. Everything takes 10 to 15 minutes to load up. support by giving a like or add to discussion. Please help improve this."
)

if __name__ == "__main__":
    iface.launch()