Spaces:
Runtime error
Runtime error
Use community version
Browse files
app.py
CHANGED
@@ -16,36 +16,32 @@ from hyvideo.constants import NEGATIVE_PROMPT
|
|
16 |
|
17 |
from huggingface_hub import snapshot_download
|
18 |
|
19 |
-
if torch.cuda.device_count() > 0:
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
def initialize_model(
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
print(f"`models_root` exists: {models_root_path}")
|
44 |
-
hunyuan_video_sampler = HunyuanVideoSampler.from_pretrained(models_root_path, args=args)
|
45 |
-
print("Model initialized: " + model_path)
|
46 |
return hunyuan_video_sampler
|
47 |
|
48 |
-
model = initialize_model(
|
49 |
|
50 |
def generate_video(
|
51 |
prompt,
|
@@ -81,20 +77,6 @@ def generate_video_gpu(
|
|
81 |
guidance_scale,
|
82 |
flow_shift,
|
83 |
embedded_guidance_scale
|
84 |
-
):
|
85 |
-
return None
|
86 |
-
|
87 |
-
@spaces.GPU(duration=120)
|
88 |
-
def generate_video_gpu2(
|
89 |
-
model,
|
90 |
-
prompt,
|
91 |
-
resolution,
|
92 |
-
video_length,
|
93 |
-
seed,
|
94 |
-
num_inference_steps,
|
95 |
-
guidance_scale,
|
96 |
-
flow_shift,
|
97 |
-
embedded_guidance_scale
|
98 |
):
|
99 |
print("generate_video_gpu (prompt: " + prompt + ")")
|
100 |
if torch.cuda.device_count() == 0:
|
@@ -106,37 +88,21 @@ def generate_video_gpu2(
|
|
106 |
width, height = int(width), int(height)
|
107 |
negative_prompt = "" # not applicable in the inference
|
108 |
print("Predicting video...")
|
109 |
-
|
110 |
-
outputs = model.predict(
|
111 |
prompt=prompt,
|
112 |
height=height,
|
113 |
width=width,
|
114 |
-
|
115 |
seed=seed,
|
116 |
-
|
117 |
-
infer_steps=num_inference_steps,
|
118 |
guidance_scale=guidance_scale,
|
119 |
-
num_videos_per_prompt=1
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
)
|
124 |
-
|
125 |
-
print("Video predicted")
|
126 |
-
samples = outputs["samples"]
|
127 |
-
sample = samples[0].unsqueeze(0)
|
128 |
-
|
129 |
-
save_path = "./gradio_outputs"
|
130 |
-
os.makedirs(save_path, exist_ok=True)
|
131 |
-
|
132 |
-
time_flag = datetime.fromtimestamp(time.time()).strftime("%Y-%m-%d-%H:%M:%S")
|
133 |
-
video_path = f"{save_path}/{time_flag}_seed{outputs['seeds'][0]}_{outputs['prompts'][0][:100].replace('/','')}.mp4"
|
134 |
-
save_videos_grid(sample, video_path, fps=24)
|
135 |
-
logger.info(f"Sample saved to: {video_path}")
|
136 |
-
|
137 |
-
print("Return the video")
|
138 |
return video_path
|
139 |
|
|
|
140 |
def create_demo(model_path):
|
141 |
with gr.Blocks() as demo:
|
142 |
if torch.cuda.device_count() == 0:
|
|
|
16 |
|
17 |
from huggingface_hub import snapshot_download
|
18 |
|
19 |
+
# if torch.cuda.device_count() > 0:
|
20 |
+
# snapshot_download(repo_id="tencent/HunyuanVideo", repo_type="model", local_dir="ckpts", force_download=False)
|
21 |
+
# snapshot_download(repo_id="xtuner/llava-llama-3-8b-v1_1-transformers", repo_type="model", local_dir="ckpts/llava-llama-3-8b-v1_1-transformers", force_download=True)
|
22 |
+
|
23 |
+
# class Args:
|
24 |
+
# def __init__(self, input_dir, output_dir):
|
25 |
+
# self.input_dir = input_dir
|
26 |
+
# self.output_dir = output_dir
|
27 |
+
|
28 |
+
# # Create the object
|
29 |
+
# args = Args("ckpts/llava-llama-3-8b-v1_1-transformers", "ckpts/text_encoder")
|
30 |
+
# preprocess_text_encoder_tokenizer(args)
|
31 |
+
# snapshot_download(repo_id="openai/clip-vit-large-patch14", repo_type="model", local_dir="ckpts/text_encoder_2", force_download=True)
|
32 |
+
|
33 |
+
def initialize_model():
|
34 |
+
model_id = "hunyuanvideo-community/HunyuanVideo"
|
35 |
+
|
36 |
+
transformer = HunyuanVideoTransformer3DModel.from_pretrained(
|
37 |
+
model_id, subfolder="transformer", torch_dtype=torch.bfloat16
|
38 |
+
)
|
39 |
+
model = HunyuanVideoPipeline.from_pretrained(model_id, transformer=transformer, torch_dtype=torch.float16)
|
40 |
+
model.vae.enable_tiling()
|
41 |
+
model.to("cuda")
|
|
|
|
|
|
|
|
|
42 |
return hunyuan_video_sampler
|
43 |
|
44 |
+
model = initialize_model()
|
45 |
|
46 |
def generate_video(
|
47 |
prompt,
|
|
|
77 |
guidance_scale,
|
78 |
flow_shift,
|
79 |
embedded_guidance_scale
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
):
|
81 |
print("generate_video_gpu (prompt: " + prompt + ")")
|
82 |
if torch.cuda.device_count() == 0:
|
|
|
88 |
width, height = int(width), int(height)
|
89 |
negative_prompt = "" # not applicable in the inference
|
90 |
print("Predicting video...")
|
91 |
+
frames: List[PIL.Image.Image] = model(
|
|
|
92 |
prompt=prompt,
|
93 |
height=height,
|
94 |
width=width,
|
95 |
+
num_frames=video_length,
|
96 |
seed=seed,
|
97 |
+
num_inference_steps=num_inference_steps,
|
|
|
98 |
guidance_scale=guidance_scale,
|
99 |
+
num_videos_per_prompt=1
|
100 |
+
).frames[0]
|
101 |
+
|
102 |
+
output_video = export_to_video(frames, fps=15)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
return video_path
|
104 |
|
105 |
+
|
106 |
def create_demo(model_path):
|
107 |
with gr.Blocks() as demo:
|
108 |
if torch.cuda.device_count() == 0:
|