Spaces:

Kidbea
/

Experiment

Running on Zero

Experiment / app.py

new

3c6bbec 7 days ago

1.64 kB

	import os
	import gradio as gr
	import torch
	import ftfy
	import spaces
	from diffusers import DiffusionPipeline

	# Read token and optional model override from environment
	token = os.environ.get("HUGGINGFACE_TOKEN")
	if not token:
	raise ValueError("Environment variable HUGGINGFACE_TOKEN is not set.")

	# Use the Diffusers-ready model repository by default
	model_id = os.environ.get("WAN_MODEL_ID", "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers")

	@spaces.GPU # GPU is only activated when this function is called
	def generate_video(image, prompt, num_frames=16, steps=25, guidance_scale=7.5):
	torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

	# Load pipeline inside the GPU-allocated function
	pipe = DiffusionPipeline.from_pretrained(
	model_id,
	torch_dtype=torch_dtype,
	trust_remote_code=True,
	use_auth_token=token
	).to("cuda")

	pipe.enable_attention_slicing()

	# Generate video
	output = pipe(
	prompt=prompt,
	image=image,
	num_inference_steps=steps,
	guidance_scale=guidance_scale,
	num_frames=num_frames
	)

	return output.videos

	# Gradio UI
	def main():
	with gr.Blocks() as demo:
	gr.Markdown("# Wan2.1 Image-to-Video Demo (ZeroGPU Edition)")
	with gr.Row():
	img_in = gr.Image(type="pil", label="Input Image")
	txt_p = gr.Textbox(label="Prompt")
	btn = gr.Button("Generate Video")
	out = gr.Video(label="Generated Video")
	btn.click(fn=generate_video, inputs=[img_in, txt_p], outputs=out)
	return demo

	if __name__ == "__main__":
	main().launch()