FramePack

Runtime error

App Files Files Community

MohamedRashad commited on 12 days ago

Commit

5f9a8a4

verified ·

1 Parent(s): 432b86b

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -17

app.py CHANGED Viewed

@@ -56,8 +56,14 @@ text_encoder_2.requires_grad_(False)
 image_encoder.requires_grad_(False)
 transformer.requires_grad_(False)
-DynamicSwapInstaller.install_model(transformer, device=gpu)
-DynamicSwapInstaller.install_model(text_encoder, device=gpu)
 stream = AsyncStream()
@@ -75,16 +81,16 @@ def worker(input_image, prompt, n_prompt, seed, total_second_length, latent_wind
     stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Starting ...'))))
     try:
-        unload_complete_models(
-            text_encoder, text_encoder_2, image_encoder, vae, transformer
-        )
         # Text encoding
         stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Text encoding ...'))))
-        fake_diffusers_current_device(text_encoder, gpu)  # since we only encode one text - that is one model move and one encode, offload is same time consumption since it is also one load and one encode.
-        load_model_as_complete(text_encoder_2, target_device=gpu)
         llama_vec, clip_l_pooler = encode_prompt_conds(prompt, text_encoder, text_encoder_2, tokenizer, tokenizer_2)
@@ -113,7 +119,7 @@ def worker(input_image, prompt, n_prompt, seed, total_second_length, latent_wind
         stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'VAE encoding ...'))))
-        load_model_as_complete(vae, target_device=gpu)
         start_latent = vae_encode(input_image_pt, vae)
@@ -121,7 +127,7 @@ def worker(input_image, prompt, n_prompt, seed, total_second_length, latent_wind
         stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'CLIP Vision encoding ...'))))
-        load_model_as_complete(image_encoder, target_device=gpu)
         image_encoder_output = hf_clip_vision_encode(input_image_np, feature_extractor, image_encoder)
         image_encoder_last_hidden_state = image_encoder_output.last_hidden_state
@@ -172,8 +178,8 @@ def worker(input_image, prompt, n_prompt, seed, total_second_length, latent_wind
             clean_latents_post, clean_latents_2x, clean_latents_4x = history_latents[:, :, :1 + 2 + 16, :, :].split([1, 2, 16], dim=2)
             clean_latents = torch.cat([clean_latents_pre, clean_latents_post], dim=2)
-            unload_complete_models()
-            move_model_to_device_with_memory_preservation(transformer, target_device=gpu, preserved_memory_gb=gpu_memory_preservation)
             if use_teacache:
                 transformer.initialize_teacache(enable_teacache=True, num_steps=steps)
@@ -235,8 +241,8 @@ def worker(input_image, prompt, n_prompt, seed, total_second_length, latent_wind
             total_generated_latent_frames += int(generated_latents.shape[2])
             history_latents = torch.cat([generated_latents.to(history_latents), history_latents], dim=2)
-            offload_model_from_device_for_memory_preservation(transformer, target_device=gpu, preserved_memory_gb=8)
-            load_model_as_complete(vae, target_device=gpu)
             real_history_latents = history_latents[:, :, :total_generated_latent_frames, :, :]
@@ -249,7 +255,7 @@ def worker(input_image, prompt, n_prompt, seed, total_second_length, latent_wind
                 current_pixels = vae_decode(real_history_latents[:, :, :section_latent_frames], vae).cpu()
                 history_pixels = soft_append_bcthw(current_pixels, history_pixels, overlapped_frames)
-            unload_complete_models()
             output_filename = os.path.join(outputs_folder, f'{job_id}_{total_generated_latent_frames}.mp4')
@@ -264,9 +270,9 @@ def worker(input_image, prompt, n_prompt, seed, total_second_length, latent_wind
     except:
         traceback.print_exc()
-        unload_complete_models(
-            text_encoder, text_encoder_2, image_encoder, vae, transformer
-        )
     stream.output_queue.push(('end', None))
     return

 image_encoder.requires_grad_(False)
 transformer.requires_grad_(False)
+# DynamicSwapInstaller.install_model(transformer, device=gpu)
+# DynamicSwapInstaller.install_model(text_encoder, device=gpu)
+text_encoder.to(gpu)
+text_encoder_2.to(gpu)
+image_encoder.to(gpu)
+vae.to(gpu)
+transformer.to(gpu)
 stream = AsyncStream()
     stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Starting ...'))))
     try:
+        # unload_complete_models(
+        #     text_encoder, text_encoder_2, image_encoder, vae, transformer
+        # )
         # Text encoding
         stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Text encoding ...'))))
+        # fake_diffusers_current_device(text_encoder, gpu)  # since we only encode one text - that is one model move and one encode, offload is same time consumption since it is also one load and one encode.
+        # load_model_as_complete(text_encoder_2, target_device=gpu)
         llama_vec, clip_l_pooler = encode_prompt_conds(prompt, text_encoder, text_encoder_2, tokenizer, tokenizer_2)
         stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'VAE encoding ...'))))
+        # load_model_as_complete(vae, target_device=gpu)
         start_latent = vae_encode(input_image_pt, vae)
         stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'CLIP Vision encoding ...'))))
+        # load_model_as_complete(image_encoder, target_device=gpu)
         image_encoder_output = hf_clip_vision_encode(input_image_np, feature_extractor, image_encoder)
         image_encoder_last_hidden_state = image_encoder_output.last_hidden_state
             clean_latents_post, clean_latents_2x, clean_latents_4x = history_latents[:, :, :1 + 2 + 16, :, :].split([1, 2, 16], dim=2)
             clean_latents = torch.cat([clean_latents_pre, clean_latents_post], dim=2)
+            # unload_complete_models()
+            # move_model_to_device_with_memory_preservation(transformer, target_device=gpu, preserved_memory_gb=gpu_memory_preservation)
             if use_teacache:
                 transformer.initialize_teacache(enable_teacache=True, num_steps=steps)
             total_generated_latent_frames += int(generated_latents.shape[2])
             history_latents = torch.cat([generated_latents.to(history_latents), history_latents], dim=2)
+            # offload_model_from_device_for_memory_preservation(transformer, target_device=gpu, preserved_memory_gb=8)
+            # load_model_as_complete(vae, target_device=gpu)
             real_history_latents = history_latents[:, :, :total_generated_latent_frames, :, :]
                 current_pixels = vae_decode(real_history_latents[:, :, :section_latent_frames], vae).cpu()
                 history_pixels = soft_append_bcthw(current_pixels, history_pixels, overlapped_frames)
+            # unload_complete_models()
             output_filename = os.path.join(outputs_folder, f'{job_id}_{total_generated_latent_frames}.mp4')
     except:
         traceback.print_exc()
+        # unload_complete_models(
+        #     text_encoder, text_encoder_2, image_encoder, vae, transformer
+        # )
     stream.output_queue.push(('end', None))
     return