alexanderbaikal commited on
Commit
554ee03
·
1 Parent(s): ffc27f9

Initial commit

Browse files
Files changed (2) hide show
  1. app.py +60 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import numpy as np
4
+ import torchvision.transforms.functional as TF
5
+ from diffusers import AutoencoderKLWan, WanImageToVideoPipeline
6
+ from diffusers.utils import export_to_video, load_image
7
+ from transformers import CLIPVisionModel
8
+
9
+ def generate_video(first_frame_url, last_frame_url, prompt):
10
+ model_id = "Wan-AI/Wan2.1-FLF2V-14B-720P-diffusers"
11
+ image_encoder = CLIPVisionModel.from_pretrained(model_id, subfolder="image_encoder", torch_dtype=torch.float32)
12
+ vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
13
+ pipe = WanImageToVideoPipeline.from_pretrained(
14
+ model_id, vae=vae, image_encoder=image_encoder, torch_dtype=torch.bfloat16
15
+ )
16
+ pipe.to("cuda")
17
+
18
+ first_frame = load_image(first_frame_url)
19
+ last_frame = load_image(last_frame_url)
20
+
21
+ def aspect_ratio_resize(image, pipe, max_area=720 * 1280):
22
+ aspect_ratio = image.height / image.width
23
+ mod_value = pipe.vae_scale_factor_spatial * pipe.transformer.config.patch_size[1]
24
+ height = round(np.sqrt(max_area * aspect_ratio)) // mod_value * mod_value
25
+ width = round(np.sqrt(max_area / aspect_ratio)) // mod_value * mod_value
26
+ image = image.resize((width, height))
27
+ return image, height, width
28
+
29
+ def center_crop_resize(image, height, width):
30
+ resize_ratio = max(width / image.width, height / image.height)
31
+ width = round(image.width * resize_ratio)
32
+ height = round(image.height * resize_ratio)
33
+ size = [width, height]
34
+ image = TF.center_crop(image, size)
35
+ return image, height, width
36
+
37
+ first_frame, height, width = aspect_ratio_resize(first_frame, pipe)
38
+ if last_frame.size != first_frame.size:
39
+ last_frame, _, _ = center_crop_resize(last_frame, height, width)
40
+
41
+ output = pipe(
42
+ image=first_frame, last_image=last_frame, prompt=prompt, height=height, width=width, guidance_scale=5.5
43
+ ).frames[0]
44
+ video_path = "wan_output.mp4"
45
+ export_to_video(output, video_path, fps=16)
46
+ return video_path
47
+
48
+ iface = gr.Interface(
49
+ fn=generate_video,
50
+ inputs=[
51
+ gr.Textbox(label="First Frame URL"),
52
+ gr.Textbox(label="Last Frame URL"),
53
+ gr.Textbox(label="Prompt")
54
+ ],
55
+ outputs=gr.Video(label="Generated Video"),
56
+ title="Wan2.1 FLF2V Video Generator"
57
+ )
58
+
59
+ iface.launch()
60
+
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ diffusers
2
+ transformers
3
+ torch
4
+ torchvision
5
+ gradio