import os import gradio as gr import torch import ftfy import spaces from diffusers import DiffusionPipeline # Read token and optional model override from environment token = os.environ.get("HUGGINGFACE_TOKEN") if not token: raise ValueError("Environment variable HUGGINGFACE_TOKEN is not set.") # Use the Diffusers-ready model repository by default model_id = os.environ.get("WAN_MODEL_ID", "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers") @spaces.GPU # GPU is only activated when this function is called def generate_video(image, prompt, num_frames=16, steps=25, guidance_scale=7.5): torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 # Load pipeline inside the GPU-allocated function pipe = DiffusionPipeline.from_pretrained( model_id, torch_dtype=torch_dtype, trust_remote_code=True, use_auth_token=token ).to("cuda") pipe.enable_attention_slicing() # Generate video output = pipe( prompt=prompt, image=image, num_inference_steps=steps, guidance_scale=guidance_scale, num_frames=num_frames ) return output.videos # Gradio UI def main(): with gr.Blocks() as demo: gr.Markdown("# Wan2.1 Image-to-Video Demo (ZeroGPU Edition)") with gr.Row(): img_in = gr.Image(type="pil", label="Input Image") txt_p = gr.Textbox(label="Prompt") btn = gr.Button("Generate Video") out = gr.Video(label="Generated Video") btn.click(fn=generate_video, inputs=[img_in, txt_p], outputs=out) return demo if __name__ == "__main__": main().launch()