jonluca commited on
Commit
566bd68
Β·
verified Β·
1 Parent(s): da8972b

Use community version

Browse files
Files changed (1) hide show
  1. app.py +32 -66
app.py CHANGED
@@ -16,36 +16,32 @@ from hyvideo.constants import NEGATIVE_PROMPT
16
 
17
  from huggingface_hub import snapshot_download
18
 
19
- if torch.cuda.device_count() > 0:
20
- snapshot_download(repo_id="tencent/HunyuanVideo", repo_type="model", local_dir="ckpts", force_download=True)
21
- snapshot_download(repo_id="xtuner/llava-llama-3-8b-v1_1-transformers", repo_type="model", local_dir="ckpts/llava-llama-3-8b-v1_1-transformers", force_download=True)
22
-
23
- class Args:
24
- def __init__(self, input_dir, output_dir):
25
- self.input_dir = input_dir
26
- self.output_dir = output_dir
27
-
28
- # Create the object
29
- args = Args("ckpts/llava-llama-3-8b-v1_1-transformers", "ckpts/text_encoder")
30
- preprocess_text_encoder_tokenizer(args)
31
- snapshot_download(repo_id="openai/clip-vit-large-patch14", repo_type="model", local_dir="ckpts/text_encoder_2", force_download=True)
32
-
33
- def initialize_model(model_path):
34
- print("initialize_model: " + model_path)
35
- if torch.cuda.device_count() == 0:
36
- return None
37
-
38
- args = parse_args()
39
- models_root_path = Path(model_path)
40
- if not models_root_path.exists():
41
- raise ValueError(f"`models_root` not exists: {models_root_path}")
42
-
43
- print(f"`models_root` exists: {models_root_path}")
44
- hunyuan_video_sampler = HunyuanVideoSampler.from_pretrained(models_root_path, args=args)
45
- print("Model initialized: " + model_path)
46
  return hunyuan_video_sampler
47
 
48
- model = initialize_model("ckpts")
49
 
50
  def generate_video(
51
  prompt,
@@ -81,20 +77,6 @@ def generate_video_gpu(
81
  guidance_scale,
82
  flow_shift,
83
  embedded_guidance_scale
84
- ):
85
- return None
86
-
87
- @spaces.GPU(duration=120)
88
- def generate_video_gpu2(
89
- model,
90
- prompt,
91
- resolution,
92
- video_length,
93
- seed,
94
- num_inference_steps,
95
- guidance_scale,
96
- flow_shift,
97
- embedded_guidance_scale
98
  ):
99
  print("generate_video_gpu (prompt: " + prompt + ")")
100
  if torch.cuda.device_count() == 0:
@@ -106,37 +88,21 @@ def generate_video_gpu2(
106
  width, height = int(width), int(height)
107
  negative_prompt = "" # not applicable in the inference
108
  print("Predicting video...")
109
-
110
- outputs = model.predict(
111
  prompt=prompt,
112
  height=height,
113
  width=width,
114
- video_length=video_length,
115
  seed=seed,
116
- negative_prompt=negative_prompt,
117
- infer_steps=num_inference_steps,
118
  guidance_scale=guidance_scale,
119
- num_videos_per_prompt=1,
120
- flow_shift=flow_shift,
121
- batch_size=1,
122
- embedded_guidance_scale=embedded_guidance_scale
123
- )
124
-
125
- print("Video predicted")
126
- samples = outputs["samples"]
127
- sample = samples[0].unsqueeze(0)
128
-
129
- save_path = "./gradio_outputs"
130
- os.makedirs(save_path, exist_ok=True)
131
-
132
- time_flag = datetime.fromtimestamp(time.time()).strftime("%Y-%m-%d-%H:%M:%S")
133
- video_path = f"{save_path}/{time_flag}_seed{outputs['seeds'][0]}_{outputs['prompts'][0][:100].replace('/','')}.mp4"
134
- save_videos_grid(sample, video_path, fps=24)
135
- logger.info(f"Sample saved to: {video_path}")
136
-
137
- print("Return the video")
138
  return video_path
139
 
 
140
  def create_demo(model_path):
141
  with gr.Blocks() as demo:
142
  if torch.cuda.device_count() == 0:
 
16
 
17
  from huggingface_hub import snapshot_download
18
 
19
+ # if torch.cuda.device_count() > 0:
20
+ # snapshot_download(repo_id="tencent/HunyuanVideo", repo_type="model", local_dir="ckpts", force_download=False)
21
+ # snapshot_download(repo_id="xtuner/llava-llama-3-8b-v1_1-transformers", repo_type="model", local_dir="ckpts/llava-llama-3-8b-v1_1-transformers", force_download=True)
22
+
23
+ # class Args:
24
+ # def __init__(self, input_dir, output_dir):
25
+ # self.input_dir = input_dir
26
+ # self.output_dir = output_dir
27
+
28
+ # # Create the object
29
+ # args = Args("ckpts/llava-llama-3-8b-v1_1-transformers", "ckpts/text_encoder")
30
+ # preprocess_text_encoder_tokenizer(args)
31
+ # snapshot_download(repo_id="openai/clip-vit-large-patch14", repo_type="model", local_dir="ckpts/text_encoder_2", force_download=True)
32
+
33
+ def initialize_model():
34
+ model_id = "hunyuanvideo-community/HunyuanVideo"
35
+
36
+ transformer = HunyuanVideoTransformer3DModel.from_pretrained(
37
+ model_id, subfolder="transformer", torch_dtype=torch.bfloat16
38
+ )
39
+ model = HunyuanVideoPipeline.from_pretrained(model_id, transformer=transformer, torch_dtype=torch.float16)
40
+ model.vae.enable_tiling()
41
+ model.to("cuda")
 
 
 
 
42
  return hunyuan_video_sampler
43
 
44
+ model = initialize_model()
45
 
46
  def generate_video(
47
  prompt,
 
77
  guidance_scale,
78
  flow_shift,
79
  embedded_guidance_scale
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  ):
81
  print("generate_video_gpu (prompt: " + prompt + ")")
82
  if torch.cuda.device_count() == 0:
 
88
  width, height = int(width), int(height)
89
  negative_prompt = "" # not applicable in the inference
90
  print("Predicting video...")
91
+ frames: List[PIL.Image.Image] = model(
 
92
  prompt=prompt,
93
  height=height,
94
  width=width,
95
+ num_frames=video_length,
96
  seed=seed,
97
+ num_inference_steps=num_inference_steps,
 
98
  guidance_scale=guidance_scale,
99
+ num_videos_per_prompt=1
100
+ ).frames[0]
101
+
102
+ output_video = export_to_video(frames, fps=15)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  return video_path
104
 
105
+
106
  def create_demo(model_path):
107
  with gr.Blocks() as demo:
108
  if torch.cuda.device_count() == 0: