MohamedRashad commited on
Commit
5f9a8a4
·
verified ·
1 Parent(s): 432b86b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -17
app.py CHANGED
@@ -56,8 +56,14 @@ text_encoder_2.requires_grad_(False)
56
  image_encoder.requires_grad_(False)
57
  transformer.requires_grad_(False)
58
 
59
- DynamicSwapInstaller.install_model(transformer, device=gpu)
60
- DynamicSwapInstaller.install_model(text_encoder, device=gpu)
 
 
 
 
 
 
61
 
62
  stream = AsyncStream()
63
 
@@ -75,16 +81,16 @@ def worker(input_image, prompt, n_prompt, seed, total_second_length, latent_wind
75
  stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Starting ...'))))
76
 
77
  try:
78
- unload_complete_models(
79
- text_encoder, text_encoder_2, image_encoder, vae, transformer
80
- )
81
 
82
  # Text encoding
83
 
84
  stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Text encoding ...'))))
85
 
86
- fake_diffusers_current_device(text_encoder, gpu) # since we only encode one text - that is one model move and one encode, offload is same time consumption since it is also one load and one encode.
87
- load_model_as_complete(text_encoder_2, target_device=gpu)
88
 
89
  llama_vec, clip_l_pooler = encode_prompt_conds(prompt, text_encoder, text_encoder_2, tokenizer, tokenizer_2)
90
 
@@ -113,7 +119,7 @@ def worker(input_image, prompt, n_prompt, seed, total_second_length, latent_wind
113
 
114
  stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'VAE encoding ...'))))
115
 
116
- load_model_as_complete(vae, target_device=gpu)
117
 
118
  start_latent = vae_encode(input_image_pt, vae)
119
 
@@ -121,7 +127,7 @@ def worker(input_image, prompt, n_prompt, seed, total_second_length, latent_wind
121
 
122
  stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'CLIP Vision encoding ...'))))
123
 
124
- load_model_as_complete(image_encoder, target_device=gpu)
125
 
126
  image_encoder_output = hf_clip_vision_encode(input_image_np, feature_extractor, image_encoder)
127
  image_encoder_last_hidden_state = image_encoder_output.last_hidden_state
@@ -172,8 +178,8 @@ def worker(input_image, prompt, n_prompt, seed, total_second_length, latent_wind
172
  clean_latents_post, clean_latents_2x, clean_latents_4x = history_latents[:, :, :1 + 2 + 16, :, :].split([1, 2, 16], dim=2)
173
  clean_latents = torch.cat([clean_latents_pre, clean_latents_post], dim=2)
174
 
175
- unload_complete_models()
176
- move_model_to_device_with_memory_preservation(transformer, target_device=gpu, preserved_memory_gb=gpu_memory_preservation)
177
 
178
  if use_teacache:
179
  transformer.initialize_teacache(enable_teacache=True, num_steps=steps)
@@ -235,8 +241,8 @@ def worker(input_image, prompt, n_prompt, seed, total_second_length, latent_wind
235
  total_generated_latent_frames += int(generated_latents.shape[2])
236
  history_latents = torch.cat([generated_latents.to(history_latents), history_latents], dim=2)
237
 
238
- offload_model_from_device_for_memory_preservation(transformer, target_device=gpu, preserved_memory_gb=8)
239
- load_model_as_complete(vae, target_device=gpu)
240
 
241
  real_history_latents = history_latents[:, :, :total_generated_latent_frames, :, :]
242
 
@@ -249,7 +255,7 @@ def worker(input_image, prompt, n_prompt, seed, total_second_length, latent_wind
249
  current_pixels = vae_decode(real_history_latents[:, :, :section_latent_frames], vae).cpu()
250
  history_pixels = soft_append_bcthw(current_pixels, history_pixels, overlapped_frames)
251
 
252
- unload_complete_models()
253
 
254
  output_filename = os.path.join(outputs_folder, f'{job_id}_{total_generated_latent_frames}.mp4')
255
 
@@ -264,9 +270,9 @@ def worker(input_image, prompt, n_prompt, seed, total_second_length, latent_wind
264
  except:
265
  traceback.print_exc()
266
 
267
- unload_complete_models(
268
- text_encoder, text_encoder_2, image_encoder, vae, transformer
269
- )
270
 
271
  stream.output_queue.push(('end', None))
272
  return
 
56
  image_encoder.requires_grad_(False)
57
  transformer.requires_grad_(False)
58
 
59
+ # DynamicSwapInstaller.install_model(transformer, device=gpu)
60
+ # DynamicSwapInstaller.install_model(text_encoder, device=gpu)
61
+
62
+ text_encoder.to(gpu)
63
+ text_encoder_2.to(gpu)
64
+ image_encoder.to(gpu)
65
+ vae.to(gpu)
66
+ transformer.to(gpu)
67
 
68
  stream = AsyncStream()
69
 
 
81
  stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Starting ...'))))
82
 
83
  try:
84
+ # unload_complete_models(
85
+ # text_encoder, text_encoder_2, image_encoder, vae, transformer
86
+ # )
87
 
88
  # Text encoding
89
 
90
  stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Text encoding ...'))))
91
 
92
+ # fake_diffusers_current_device(text_encoder, gpu) # since we only encode one text - that is one model move and one encode, offload is same time consumption since it is also one load and one encode.
93
+ # load_model_as_complete(text_encoder_2, target_device=gpu)
94
 
95
  llama_vec, clip_l_pooler = encode_prompt_conds(prompt, text_encoder, text_encoder_2, tokenizer, tokenizer_2)
96
 
 
119
 
120
  stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'VAE encoding ...'))))
121
 
122
+ # load_model_as_complete(vae, target_device=gpu)
123
 
124
  start_latent = vae_encode(input_image_pt, vae)
125
 
 
127
 
128
  stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'CLIP Vision encoding ...'))))
129
 
130
+ # load_model_as_complete(image_encoder, target_device=gpu)
131
 
132
  image_encoder_output = hf_clip_vision_encode(input_image_np, feature_extractor, image_encoder)
133
  image_encoder_last_hidden_state = image_encoder_output.last_hidden_state
 
178
  clean_latents_post, clean_latents_2x, clean_latents_4x = history_latents[:, :, :1 + 2 + 16, :, :].split([1, 2, 16], dim=2)
179
  clean_latents = torch.cat([clean_latents_pre, clean_latents_post], dim=2)
180
 
181
+ # unload_complete_models()
182
+ # move_model_to_device_with_memory_preservation(transformer, target_device=gpu, preserved_memory_gb=gpu_memory_preservation)
183
 
184
  if use_teacache:
185
  transformer.initialize_teacache(enable_teacache=True, num_steps=steps)
 
241
  total_generated_latent_frames += int(generated_latents.shape[2])
242
  history_latents = torch.cat([generated_latents.to(history_latents), history_latents], dim=2)
243
 
244
+ # offload_model_from_device_for_memory_preservation(transformer, target_device=gpu, preserved_memory_gb=8)
245
+ # load_model_as_complete(vae, target_device=gpu)
246
 
247
  real_history_latents = history_latents[:, :, :total_generated_latent_frames, :, :]
248
 
 
255
  current_pixels = vae_decode(real_history_latents[:, :, :section_latent_frames], vae).cpu()
256
  history_pixels = soft_append_bcthw(current_pixels, history_pixels, overlapped_frames)
257
 
258
+ # unload_complete_models()
259
 
260
  output_filename = os.path.join(outputs_folder, f'{job_id}_{total_generated_latent_frames}.mp4')
261
 
 
270
  except:
271
  traceback.print_exc()
272
 
273
+ # unload_complete_models(
274
+ # text_encoder, text_encoder_2, image_encoder, vae, transformer
275
+ # )
276
 
277
  stream.output_queue.push(('end', None))
278
  return