Spaces:

SeedOfEvil
/

Wan2.1-T2V-1.3B-Local

Running

File size: 2,436 Bytes

7d2f6d5
 
 
 
 
 
57ac0c8
 
 
 
 
 
 
 
50d19ca
57ac0c8
 
 
50d19ca
57ac0c8
50d19ca
57ac0c8
 
 
 
 
 
 
 
 
 
7d2f6d5
 
 
50d19ca
57ac0c8
50d19ca
7d2f6d5
 
 
 
 
 
50d19ca
7d2f6d5
 
 
 
 
 
 
 
caf8340
7d2f6d5
 
 
50d19ca

import gradio as gr
import torch
from diffusers import AutoencoderKLWan, WanPipeline
from diffusers.utils import export_to_video
import spaces  # ZeroGPU integration

def load_pipeline_on_cpu():
    model_id = "Wan-AI/Wan2.1-T2V-1.3B-Diffusers"
    print("Preloading model on CPU...")
    vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
    pipeline_cpu = WanPipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16)
    # Remain on CPU for now
    print("Model preloaded on CPU.")
    return pipeline_cpu

# Load the model on CPU during container initialization.
PIPELINE_CPU = load_pipeline_on_cpu()
PIPELINE_GPU = None  # Will hold the GPU-loaded pipeline after the first request

@spaces.GPU  # This ensures GPU is only initialized in the request worker process
def generate_video(prompt, negative_prompt=""):
    global PIPELINE_GPU
    # Move to GPU on first request if not already done.
    if PIPELINE_GPU is None:
        print("Moving model to GPU...")
        PIPELINE_GPU = PIPELINE_CPU.to("cuda")
        print("Model moved to GPU.")
    pipeline_gpu = PIPELINE_GPU

    # Generate video frames at 480p resolution
    output = pipeline_gpu(
        prompt=prompt,
        negative_prompt=negative_prompt,
        height=480,        # 480p height
        width=832,         # Suitable width for 480p videos
        num_frames=81,     # Adjust for desired video length
        guidance_scale=5.0 # Recommended guidance scale for the 1.3B model
    ).frames[0]
    
    video_path = "output.mp4"
    export_to_video(output, video_path, fps=15)
    return video_path

# Create the Gradio interface
iface = gr.Interface(
    fn=generate_video,
    inputs=[
        gr.Textbox(label="Prompt", placeholder="Enter your video prompt here"),
        gr.Textbox(label="Negative Prompt", placeholder="Optional negative prompt", value="")
    ],
    outputs=gr.Video(label="Generated Video"),
    title="Wan2.1-T2V-1.3B Video Generator",
    description="No signing in required, all inference run locally on space. Generate 480p videos using the Wan2.1-T2V-1.3B diffusers pipeline, ZeroGPU would queue up for wayyy to long so doesn't work. Takes 3 minutes on L40S per 5 seconds of video. Copy and change HW. Everything takes 10 to 15 minutes to load up. support by giving a like or add to discussion. Please help improve this."
)

if __name__ == "__main__":
    iface.launch()