openfree commited on
Commit
6dfa0f3
·
verified ·
1 Parent(s): b4189cf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +508 -708
app.py CHANGED
@@ -18,153 +18,227 @@ import safetensors.torch as sf
18
  import numpy as np
19
  import math
20
 
21
- # Hugging Face Space環境内かどうか確認
22
  IN_HF_SPACE = os.environ.get('SPACE_ID') is not None
23
 
24
- # GPU利用可能性を追跡する変数を追加
25
  GPU_AVAILABLE = False
26
  GPU_INITIALIZED = False
27
  last_update_time = time.time()
28
 
29
- # Hugging Face Space内の場合、spacesモジュールをインポート
30
  if IN_HF_SPACE:
31
  try:
32
  import spaces
33
- print("Hugging Face Space環境内で実行中、spacesモジュールをインポートしました")
34
 
35
- # GPU利用可能性をチェック
36
  try:
37
  GPU_AVAILABLE = torch.cuda.is_available()
38
- print(f"GPU利用可能: {GPU_AVAILABLE}")
39
  if GPU_AVAILABLE:
40
- print(f"GPUデバイス名: {torch.cuda.get_device_name(0)}")
41
- print(f"GPUメモリ: {torch.cuda.get_device_properties(0).total_memory / 1e9} GB")
42
 
43
- # 小規模なGPU操作を試行し、GPUが実際に使用可能か確認
44
- test_tensor = torch.zeros(1, device='cuda')
45
- test_tensor = test_tensor + 1
46
  del test_tensor
47
- print("GPUテスト操作に成功しました")
48
  else:
49
- print("警告: CUDAが利用可能と報告されていますが、GPUデバイスが検出されませんでした")
50
  except Exception as e:
51
  GPU_AVAILABLE = False
52
- print(f"GPU確認中にエラーが発生しました: {e}")
53
- print("CPUモードで実行します")
54
  except ImportError:
55
- print("spacesモジュールのインポートに失敗しました。Hugging Face Space環境外かもしれません")
56
  GPU_AVAILABLE = torch.cuda.is_available()
57
 
58
  from PIL import Image
59
  from diffusers import AutoencoderKLHunyuanVideo
60
- from transformers import LlamaModel, CLIPTextModel, LlamaTokenizerFast, CLIPTokenizer
61
- from diffusers_helper.hunyuan import encode_prompt_conds, vae_decode, vae_encode, vae_decode_fake
62
- from diffusers_helper.utils import save_bcthw_as_mp4, crop_or_pad_yield_mask, soft_append_bcthw, resize_and_center_crop, state_dict_weighted_merge, state_dict_offset_merge, generate_timestamp
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  from diffusers_helper.models.hunyuan_video_packed import HunyuanVideoTransformer3DModelPacked
64
  from diffusers_helper.pipelines.k_diffusion_hunyuan import sample_hunyuan
65
- from diffusers_helper.memory import cpu, gpu, get_cuda_free_memory_gb, move_model_to_device_with_memory_preservation, offload_model_from_device_for_memory_preservation, fake_diffusers_current_device, DynamicSwapInstaller, unload_complete_models, load_model_as_complete, IN_HF_SPACE as MEMORY_IN_HF_SPACE
 
 
 
 
 
 
 
 
 
 
66
  from diffusers_helper.thread_utils import AsyncStream, async_run
67
  from diffusers_helper.gradio.progress_bar import make_progress_bar_css, make_progress_bar_html
68
- from transformers import SiglipImageProcessor, SiglipVisionModel
69
  from diffusers_helper.clip_vision import hf_clip_vision_encode
70
- from diffusers_helper.bucket_tools import find_nearest_bucket
71
 
72
  outputs_folder = './outputs/'
73
  os.makedirs(outputs_folder, exist_ok=True)
74
 
75
- # Spaces環境では、すべてのCUDA操作を遅延させる
76
  if not IN_HF_SPACE:
77
- # 非Spaces環境でのみCUDAメモリを取得
78
  try:
79
  if torch.cuda.is_available():
80
  free_mem_gb = get_cuda_free_memory_gb(gpu)
81
- print(f'空きVRAM {free_mem_gb} GB')
82
  else:
83
- free_mem_gb = 6.0 # デフォルト値
84
- print("CUDAが利用できません。デフォルトのメモリ設定を使用します")
85
  except Exception as e:
86
- free_mem_gb = 6.0 # デフォルト値
87
- print(f"CUDAメモリ取得中にエラーが発生しました: {e}、デフォルトのメモリ設定を使用します")
88
-
89
  high_vram = free_mem_gb > 60
90
- print(f'VRAM モード: {high_vram}')
91
  else:
92
- # Spaces環境ではデフォルト値を使用
93
- print("Spaces環境でデフォルトのメモリ設定を使用します")
94
  try:
95
  if GPU_AVAILABLE:
96
- free_mem_gb = torch.cuda.get_device_properties(0).total_memory / 1e9 * 0.9 # GPUメモリの90%を使用
97
- high_vram = free_mem_gb > 10 # より保守的な条件
98
  else:
99
- free_mem_gb = 6.0 # デフォルト値
100
  high_vram = False
101
  except Exception as e:
102
- print(f"GPUメモリ取得中にエラーが発生しました: {e}")
103
- free_mem_gb = 6.0 # デフォルト値
104
  high_vram = False
105
-
106
- print(f'GPUメモリ: {free_mem_gb:.2f} GB, 高VRAMモード: {high_vram}')
107
 
108
- # modelsグローバル変数でモデル参照を保存
109
  models = {}
110
- cpu_fallback_mode = not GPU_AVAILABLE # GPUが利用できない場合、CPU代替モードを使用
 
111
 
112
- # モデルロード関数を使用
113
  def load_models():
 
 
 
114
  global models, cpu_fallback_mode, GPU_INITIALIZED
115
 
116
  if GPU_INITIALIZED:
117
- print("モデルはすでに読み込まれています。重複読み込みをスキップします")
118
  return models
119
 
120
- print("モデルの読み込みを開始しています...")
121
-
122
  try:
123
- # GPU利用可能性に基づいてデバイスを設定
124
- device = 'cuda' if GPU_AVAILABLE and not cpu_fallback_mode else 'cpu'
125
- model_device = 'cpu' # 初期はCPUに読み込み
126
-
127
- # メモリ節約のために精度を下げる
128
  dtype = torch.float16 if GPU_AVAILABLE else torch.float32
129
  transformer_dtype = torch.bfloat16 if GPU_AVAILABLE else torch.float32
130
-
131
- print(f"使用デバイス: {device}, モデル精度: {dtype}, Transformer精度: {transformer_dtype}")
132
-
133
- # モデルを読み込み
134
- try:
135
- text_encoder = LlamaModel.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='text_encoder', torch_dtype=dtype).to(model_device)
136
- text_encoder_2 = CLIPTextModel.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='text_encoder_2', torch_dtype=dtype).to(model_device)
137
- tokenizer = LlamaTokenizerFast.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='tokenizer')
138
- tokenizer_2 = CLIPTokenizer.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='tokenizer_2')
139
- vae = AutoencoderKLHunyuanVideo.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='vae', torch_dtype=dtype).to(model_device)
140
 
141
- feature_extractor = SiglipImageProcessor.from_pretrained("lllyasviel/flux_redux_bfl", subfolder='feature_extractor')
142
- image_encoder = SiglipVisionModel.from_pretrained("lllyasviel/flux_redux_bfl", subfolder='image_encoder', torch_dtype=dtype).to(model_device)
143
 
144
- transformer = HunyuanVideoTransformer3DModelPacked.from_pretrained('tori29umai/FramePackI2V_HY_rotate_landscape', torch_dtype=transformer_dtype).to(model_device)
145
-
146
- print("すべてのモデルの読み込みに成功しました")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  except Exception as e:
148
- print(f"モデル読み込み中にエラーが発生しました: {e}")
149
- print("精度を下げて再試行します...")
150
-
151
- # 精度を下げて再試行
152
  dtype = torch.float32
153
  transformer_dtype = torch.float32
154
  cpu_fallback_mode = True
155
-
156
- text_encoder = LlamaModel.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='text_encoder', torch_dtype=dtype).to('cpu')
157
- text_encoder_2 = CLIPTextModel.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='text_encoder_2', torch_dtype=dtype).to('cpu')
158
- tokenizer = LlamaTokenizerFast.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='tokenizer')
159
- tokenizer_2 = CLIPTokenizer.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='tokenizer_2')
160
- vae = AutoencoderKLHunyuanVideo.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='vae', torch_dtype=dtype).to('cpu')
161
 
162
- feature_extractor = SiglipImageProcessor.from_pretrained("lllyasviel/flux_redux_bfl", subfolder='feature_extractor')
163
- image_encoder = SiglipVisionModel.from_pretrained("lllyasviel/flux_redux_bfl", subfolder='image_encoder', torch_dtype=dtype).to('cpu')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
 
165
- transformer = HunyuanVideoTransformer3DModelPacked.from_pretrained('tori29umai/FramePackI2V_HY_rotate_landscape', torch_dtype=transformer_dtype).to('cpu')
166
-
167
- print("CPUモードですべてのモデルの読み込みに成功しました")
 
 
 
168
 
169
  vae.eval()
170
  text_encoder.eval()
@@ -177,9 +251,8 @@ def load_models():
177
  vae.enable_tiling()
178
 
179
  transformer.high_quality_fp32_output_for_inference = True
180
- print('transformer.high_quality_fp32_output_for_inference = True')
181
 
182
- # モデル精度を設定
183
  if not cpu_fallback_mode:
184
  transformer.to(dtype=transformer_dtype)
185
  vae.to(dtype=dtype)
@@ -196,7 +269,6 @@ def load_models():
196
  if torch.cuda.is_available() and not cpu_fallback_mode:
197
  try:
198
  if not high_vram:
199
- # DynamicSwapInstallerはhuggingfaceのenable_sequential_offloadと同じですが3倍高速です
200
  DynamicSwapInstaller.install_model(transformer, device=device)
201
  DynamicSwapInstaller.install_model(text_encoder, device=device)
202
  else:
@@ -205,14 +277,13 @@ def load_models():
205
  image_encoder.to(device)
206
  vae.to(device)
207
  transformer.to(device)
208
- print(f"モデルを{device}デバイスに移動することに成功しました")
209
  except Exception as e:
210
- print(f"モデルを{device}に移動中にエラーが発生しました: {e}")
211
- print("CPUモードにフォールバックします")
212
  cpu_fallback_mode = True
213
-
214
- # グローバル変数に保存
215
- models = {
216
  'text_encoder': text_encoder,
217
  'text_encoder_2': text_encoder_2,
218
  'tokenizer': tokenizer,
@@ -224,196 +295,168 @@ def load_models():
224
  }
225
 
226
  GPU_INITIALIZED = True
227
- print(f"モデルの読み込みが完了しました。実行モード: {'CPU' if cpu_fallback_mode else 'GPU'}")
 
228
  return models
229
  except Exception as e:
230
- print(f"モデル読み込みプロセスでエラーが発生しました: {e}")
231
  traceback.print_exc()
232
-
233
- # より詳細なエラー情報を記録
234
- error_info = {
235
- "error": str(e),
236
- "traceback": traceback.format_exc(),
237
- "cuda_available": torch.cuda.is_available(),
238
- "device": "cpu" if cpu_fallback_mode else "cuda",
239
- }
240
-
241
- # トラブルシューティングのためにエラー情報をファイルに保存
242
- try:
243
- with open(os.path.join(outputs_folder, "error_log.txt"), "w") as f:
244
- f.write(str(error_info))
245
- except:
246
- pass
247
-
248
- # アプリが引き続き実行を試みることができるよう空の辞書を返す
249
  cpu_fallback_mode = True
250
  return {}
251
 
252
 
253
- # Hugging Face Spaces GPU装飾子を使用
254
  if IN_HF_SPACE and 'spaces' in globals() and GPU_AVAILABLE:
255
  try:
256
  @spaces.GPU
257
  def initialize_models():
258
- """@spaces.GPU装飾子内でモデルを初期化"""
259
  global GPU_INITIALIZED
260
  try:
261
  result = load_models()
262
  GPU_INITIALIZED = True
263
  return result
264
  except Exception as e:
265
- print(f"spaces.GPUを使用したモデル初期化中にエラーが発生しました: {e}")
266
- traceback.print_exc()
267
  global cpu_fallback_mode
268
  cpu_fallback_mode = True
269
- # 装飾子を使わずに再試行
270
  return load_models()
271
  except Exception as e:
272
- print(f"spaces.GPU装飾子の作成中にエラーが発生しました: {e}")
273
- # 装飾子がエラーの場合、非装飾子版を直接使用
274
  def initialize_models():
275
  return load_models()
 
 
 
276
 
277
 
278
- # 以下の関数内部でモデルの取得を遅延させる
279
  def get_models():
280
- """モデルを取得し、まだ読み込まれていない場合は読み込む"""
281
- global models, GPU_INITIALIZED
282
-
283
- # 並行読み込みを防ぐためのモデル読み込みロックを追加
284
  model_loading_key = "__model_loading__"
285
-
286
  if not models:
287
- # モデルが読み込み中かチェック
288
  if model_loading_key in globals():
289
- print("モデルは現在読み込み中です。お待ちください...")
290
- # モデル読み込み完了を待機
291
  import time
292
- start_wait = time.time()
293
- while not models and model_loading_key in globals():
294
  time.sleep(0.5)
295
- # 60秒以上待機したら読み込み失敗と判断
296
- if time.time() - start_wait > 60:
297
- print("モデル読み込み待機がタイムアウトしました")
298
  break
299
-
300
  if models:
301
  return models
302
-
303
  try:
304
- # 読み込みフラグを設定
305
  globals()[model_loading_key] = True
306
-
307
  if IN_HF_SPACE and 'spaces' in globals() and GPU_AVAILABLE and not cpu_fallback_mode:
308
  try:
309
- print("@spaces.GPU装飾子を使用してモデルを読み込みます")
310
- models = initialize_models()
 
311
  except Exception as e:
312
- print(f"GPU装飾子を使用したモデル読み込みに失敗しました: {e}")
313
- print("直接モデルを読み込みます")
314
- models = load_models()
315
  else:
316
- print("モデルを直接読み込みます")
317
- models = load_models()
318
  except Exception as e:
319
- print(f"モデル読み込み中に予期しないエラーが発生しました: {e}")
320
- traceback.print_exc()
321
- # 空の辞書を確保
322
- models = {}
323
  finally:
324
- # 成功か失敗にかかわらず、読み込みフラグを削除
325
  if model_loading_key in globals():
326
  del globals()[model_loading_key]
327
-
328
  return models
329
 
330
 
331
- # 事前定義された解像度リスト(グローバル変数として追加)
332
  PREDEFINED_RESOLUTIONS = [
333
  (416, 960), (448, 864), (480, 832), (512, 768), (544, 704),
334
  (576, 672), (608, 640), (640, 608), (672, 576), (704, 544),
335
  (768, 512), (832, 480), (864, 448), (960, 416)
336
  ]
337
 
338
- # 最も近いアスペクト比を見つける関数
339
  def find_closest_aspect_ratio(width, height, target_resolutions):
340
  """
341
- 事前定義された解像度リストから、元の画像のアスペクト比に最も近い解像度を見つける
342
-
343
- 引数:
344
- width: 元の画像の幅
345
- height: 元の画像の高さ
346
- target_resolutions: 目標解像度のリスト(幅, 高さ)のタプル
347
-
348
- 戻り値:
349
- tuple: 最も近いアスペクト比の (target_width, target_height)
350
  """
351
  original_aspect = width / height
352
-
353
- # 各目標解像度に対してアスペクト比の差を計算
354
  min_diff = float('inf')
355
  closest_resolution = None
356
 
357
- for target_width, target_height in target_resolutions:
358
- target_aspect = target_width / target_height
359
  diff = abs(original_aspect - target_aspect)
360
-
361
  if diff < min_diff:
362
  min_diff = diff
363
- closest_resolution = (target_width, target_height)
364
-
365
  return closest_resolution
366
 
367
 
368
  stream = AsyncStream()
369
 
370
-
371
  @torch.no_grad()
372
- def worker(input_image, prompt, n_prompt, seed, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
373
  global last_update_time
374
  last_update_time = time.time()
375
-
376
- # 動画の長さを5秒以下に制限
377
  total_second_length = min(total_second_length, 3.0)
378
 
379
- # モデルを取得
380
  try:
381
- models = get_models()
382
- if not models:
383
- error_msg = "モデルの読み込みに失敗しました。詳細情報はログを確認してください"
384
- print(error_msg)
385
- stream.output_queue.push(('error', error_msg))
386
  stream.output_queue.push(('end', None))
387
  return
388
 
389
- text_encoder = models['text_encoder']
390
- text_encoder_2 = models['text_encoder_2']
391
- tokenizer = models['tokenizer']
392
- tokenizer_2 = models['tokenizer_2']
393
- vae = models['vae']
394
- feature_extractor = models['feature_extractor']
395
- image_encoder = models['image_encoder']
396
- transformer = models['transformer']
397
  except Exception as e:
398
- error_msg = f"モデル取得中にエラーが発生しました: {e}"
399
- print(error_msg)
400
  traceback.print_exc()
401
- stream.output_queue.push(('error', error_msg))
402
  stream.output_queue.push(('end', None))
403
  return
404
-
405
- # デバイスを決定
406
- device = 'cuda' if GPU_AVAILABLE and not cpu_fallback_mode else 'cpu'
407
- print(f"推論に使用するデバイス: {device}")
408
-
409
- # CPUモードに合わせてパラメータを調整
410
  if cpu_fallback_mode:
411
- print("CPUモードではより軽量なパラメータを使用します")
412
- # CPU処理を高速化するために処理サイズを小さくする
413
  latent_window_size = min(latent_window_size, 5)
414
- steps = min(steps, 15) # ステップ数を減らす
415
- total_second_length = min(total_second_length, 2.0) # CPUモードでは動画の長さをさらに制限
416
-
417
  total_latent_sections = (total_second_length * 30) / (latent_window_size * 4)
418
  total_latent_sections = int(max(round(total_latent_sections), 1))
419
 
@@ -423,22 +466,20 @@ def worker(input_image, prompt, n_prompt, seed, total_second_length, latent_wind
423
  history_latents = None
424
  total_generated_latent_frames = 0
425
 
426
- stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, '開始中 ...'))))
427
 
428
  try:
429
- # GPUをクリーン
430
  if not high_vram and not cpu_fallback_mode:
431
  try:
432
  unload_complete_models(
433
  text_encoder, text_encoder_2, image_encoder, vae, transformer
434
  )
435
  except Exception as e:
436
- print(f"モデルのアンロード中にエラーが発生しました: {e}")
437
- # 処理を中断せずに続行
438
 
439
- # テキストエンコーディング
440
  last_update_time = time.time()
441
- stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'テキストエンコーディング中 ...'))))
442
 
443
  try:
444
  if not high_vram and not cpu_fallback_mode:
@@ -446,7 +487,6 @@ def worker(input_image, prompt, n_prompt, seed, total_second_length, latent_wind
446
  load_model_as_complete(text_encoder_2, target_device=device)
447
 
448
  llama_vec, clip_l_pooler = encode_prompt_conds(prompt, text_encoder, text_encoder_2, tokenizer, tokenizer_2)
449
-
450
  if cfg == 1:
451
  llama_vec_n, clip_l_pooler_n = torch.zeros_like(llama_vec), torch.zeros_like(clip_l_pooler)
452
  else:
@@ -455,85 +495,72 @@ def worker(input_image, prompt, n_prompt, seed, total_second_length, latent_wind
455
  llama_vec, llama_attention_mask = crop_or_pad_yield_mask(llama_vec, length=512)
456
  llama_vec_n, llama_attention_mask_n = crop_or_pad_yield_mask(llama_vec_n, length=512)
457
  except Exception as e:
458
- error_msg = f"テキストエンコーディング中にエラーが発生しました: {e}"
459
- print(error_msg)
460
  traceback.print_exc()
461
- stream.output_queue.push(('error', error_msg))
462
  stream.output_queue.push(('end', None))
463
  return
464
 
465
- # 入��画像の処理
466
  try:
467
  H, W, C = input_image.shape
 
468
 
469
- # 事前定義された解像度から最も近いアスペクト比を見つける
470
- target_width, target_height = find_closest_aspect_ratio(W, H, PREDEFINED_RESOLUTIONS)
471
-
472
- # height, width変数も保持する(元のコードとの互換性のため)
473
- width = target_width
474
- height = target_height
475
-
476
- # CPUモードの場合、処理サイズを小さくする
477
  if cpu_fallback_mode:
478
- scale_factor = min(320 / target_height, 320 / target_width)
479
- target_height = int(target_height * scale_factor)
480
- target_width = int(target_width * scale_factor)
481
- # 縮小後の値も更新
482
- height = target_height
483
- width = target_width
484
-
485
- print(f'元の画像サイズ: {W}x{H}, リサイズ先: {target_width}x{target_height}')
486
 
487
- # 選択された解像度にリサイズ
488
- input_image_np = resize_and_center_crop(input_image, target_width=target_width, target_height=target_height)
489
  Image.fromarray(input_image_np).save(os.path.join(outputs_folder, f'{job_id}.png'))
490
 
491
  input_image_pt = torch.from_numpy(input_image_np).float() / 127.5 - 1
492
  input_image_pt = input_image_pt.permute(2, 0, 1)[None, :, None]
493
  except Exception as e:
494
- error_msg = f"画像処理中にエラーが発生しました: {e}"
495
- print(error_msg)
496
  traceback.print_exc()
497
- stream.output_queue.push(('error', error_msg))
498
  stream.output_queue.push(('end', None))
499
  return
500
 
501
- # VAEエンコーディング
502
  last_update_time = time.time()
503
- stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'VAEエンコーディング中 ...'))))
504
 
505
  try:
506
  if not high_vram and not cpu_fallback_mode:
507
  load_model_as_complete(vae, target_device=device)
508
-
509
  start_latent = vae_encode(input_image_pt, vae)
510
  except Exception as e:
511
- error_msg = f"VAEエンコーディング中にエラーが発生しました: {e}"
512
- print(error_msg)
513
  traceback.print_exc()
514
- stream.output_queue.push(('error', error_msg))
515
  stream.output_queue.push(('end', None))
516
  return
517
 
518
  # CLIP Vision
519
  last_update_time = time.time()
520
- stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'CLIP Visionエンコーディング中 ...'))))
521
 
522
  try:
523
  if not high_vram and not cpu_fallback_mode:
524
  load_model_as_complete(image_encoder, target_device=device)
525
-
526
  image_encoder_output = hf_clip_vision_encode(input_image_np, feature_extractor, image_encoder)
527
  image_encoder_last_hidden_state = image_encoder_output.last_hidden_state
528
  except Exception as e:
529
- error_msg = f"CLIP Visionエンコーディング中にエラーが発生しました: {e}"
530
- print(error_msg)
531
  traceback.print_exc()
532
- stream.output_queue.push(('error', error_msg))
533
  stream.output_queue.push(('end', None))
534
  return
535
 
536
- # データ型
537
  try:
538
  llama_vec = llama_vec.to(transformer.dtype)
539
  llama_vec_n = llama_vec_n.to(transformer.dtype)
@@ -541,75 +568,76 @@ def worker(input_image, prompt, n_prompt, seed, total_second_length, latent_wind
541
  clip_l_pooler_n = clip_l_pooler_n.to(transformer.dtype)
542
  image_encoder_last_hidden_state = image_encoder_last_hidden_state.to(transformer.dtype)
543
  except Exception as e:
544
- error_msg = f"データ型変換中にエ���ーが発生しました: {e}"
545
- print(error_msg)
546
  traceback.print_exc()
547
- stream.output_queue.push(('error', error_msg))
548
  stream.output_queue.push(('end', None))
549
  return
550
 
551
- # サンプリング
552
  last_update_time = time.time()
553
- stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'サンプリング開始 ...'))))
554
 
555
  rnd = torch.Generator("cpu").manual_seed(seed)
556
  num_frames = latent_window_size * 4 - 3
557
 
558
  try:
559
- history_latents = torch.zeros(size=(1, 16, 1 + 2 + 16, height // 8, width // 8), dtype=torch.float32).cpu()
 
 
 
560
  history_pixels = None
561
  total_generated_latent_frames = 0
562
  except Exception as e:
563
- error_msg = f"履歴状態の初期化中にエラーが発生しました: {e}"
564
- print(error_msg)
565
  traceback.print_exc()
566
- stream.output_queue.push(('error', error_msg))
567
  stream.output_queue.push(('end', None))
568
  return
569
 
570
- latent_paddings = reversed(range(total_latent_sections))
571
-
572
  if total_latent_sections > 4:
573
- # 理論的にはlatent_paddingsは上記のシーケンスに従うべきですが、
574
- # total_latent_sections > 4の場合、展開するよりもいくつかの項目を複製する方が
575
- # 良い結果になるようです
576
- # 比較するために、latent_paddings = list(reversed(range(total_latent_sections)))を
577
- # 使用して下記のトリックを削除することもできます
578
- latent_paddings = [3] + [2] * (total_latent_sections - 3) + [1, 0]
579
 
580
  for latent_padding in latent_paddings:
581
  last_update_time = time.time()
582
- is_last_section = latent_padding == 0
583
  latent_padding_size = latent_padding * latent_window_size
584
 
585
  if stream.input_queue.top() == 'end':
586
- # 終了時に現在の動画を保存することを確認
587
  if history_pixels is not None and total_generated_latent_frames > 0:
588
  try:
589
- output_filename = os.path.join(outputs_folder, f'{job_id}_final_{total_generated_latent_frames}.mp4')
590
- save_bcthw_as_mp4(history_pixels, output_filename, fps=30, crf=18)
591
- stream.output_queue.push(('file', output_filename))
592
  except Exception as e:
593
- print(f"最終動画保存中にエラーが発生しました: {e}")
594
-
595
  stream.output_queue.push(('end', None))
596
  return
597
 
598
- print(f'latent_padding_size = {latent_padding_size}, is_last_section = {is_last_section}')
599
 
600
  try:
601
  indices = torch.arange(0, sum([1, latent_padding_size, latent_window_size, 1, 2, 16])).unsqueeze(0)
602
- clean_latent_indices_pre, blank_indices, latent_indices, clean_latent_indices_post, clean_latent_2x_indices, clean_latent_4x_indices = indices.split([1, latent_padding_size, latent_window_size, 1, 2, 16], dim=1)
603
- clean_latent_indices = torch.cat([clean_latent_indices_pre, clean_latent_indices_post], dim=1)
 
 
 
 
 
 
 
604
 
605
  clean_latents_pre = start_latent.to(history_latents)
606
- clean_latents_post, clean_latents_2x, clean_latents_4x = history_latents[:, :, :1 + 2 + 16, :, :].split([1, 2, 16], dim=2)
607
- clean_latents = torch.cat([clean_latents_pre, clean_latents_post], dim=2)
608
  except Exception as e:
609
- error_msg = f"サンプリングデータ準備中にエラーが発生しました: {e}"
610
- print(error_msg)
611
  traceback.print_exc()
612
- # 完全に終了せずに次のイテレーションを試みる
613
  if last_output_filename:
614
  stream.output_queue.push(('file', last_output_filename))
615
  continue
@@ -617,17 +645,17 @@ def worker(input_image, prompt, n_prompt, seed, total_second_length, latent_wind
617
  if not high_vram and not cpu_fallback_mode:
618
  try:
619
  unload_complete_models()
620
- move_model_to_device_with_memory_preservation(transformer, target_device=device, preserved_memory_gb=gpu_memory_preservation)
 
 
621
  except Exception as e:
622
- print(f"transformerGPUに移動中にエラーが発生しました: {e}")
623
- # パフォーマンスに影響する可能性はありますが、終了する必要はないので続行
624
 
625
  if use_teacache and not cpu_fallback_mode:
626
  try:
627
  transformer.initialize_teacache(enable_teacache=True, num_steps=steps)
628
  except Exception as e:
629
- print(f"teacache初期化中にエラーが発生しました: {e}")
630
- # teacacheを無効にして続行
631
  transformer.initialize_teacache(enable_teacache=False)
632
  else:
633
  transformer.initialize_teacache(enable_teacache=False)
@@ -635,65 +663,39 @@ def worker(input_image, prompt, n_prompt, seed, total_second_length, latent_wind
635
  def callback(d):
636
  global last_update_time
637
  last_update_time = time.time()
638
-
639
  try:
640
- # まず停止信号があるかチェック
641
- print(f"【デバッグ】コールバック関数: ステップ {d['i']}, 停止信号のチェック")
642
- try:
643
- queue_top = stream.input_queue.top()
644
- print(f"【デバッグ】コールバック関数: キュー先頭信号 = {queue_top}")
645
-
646
- if queue_top == 'end':
647
- print("【デバッグ】コールバック関数: 停止信号を検出、中断準備中...")
648
- try:
649
- stream.output_queue.push(('end', None))
650
- print("【デバッグ】コールバック関数: 出力キューにend信号を正常に送信")
651
- except Exception as e:
652
- print(f"【デバッグ】コールバック関数: 出力キューにend信号送信中にエラー: {e}")
653
-
654
- print("【デバッグ】コールバック関数: KeyboardInterrupt例外を投げる準備")
655
- raise KeyboardInterrupt('ユーザーによるタスク停止')
656
- except Exception as e:
657
- print(f"【デバッグ】コールバック関数: キュー先頭信号チェック中にエラー: {e}")
658
-
659
- preview = d['denoised']
660
- preview = vae_decode_fake(preview)
661
-
662
- preview = (preview * 255.0).detach().cpu().numpy().clip(0, 255).astype(np.uint8)
663
- preview = einops.rearrange(preview, 'b c t h w -> (b h) (t w) c')
664
-
665
- current_step = d['i'] + 1
666
- percentage = int(100.0 * current_step / steps)
667
- hint = f'サンプリング中 {current_step}/{steps}'
668
- desc = f'総生成フレーム数: {int(max(0, total_generated_latent_frames * 4 - 3))}, 動画長: {max(0, (total_generated_latent_frames * 4 - 3) / 30) :.2f} 秒 (FPS-30). 動画を現在拡張中...'
669
- stream.output_queue.push(('progress', (preview, desc, make_progress_bar_html(percentage, hint))))
670
- except KeyboardInterrupt as e:
671
- # 中断例外をキャッチして再スローし、サンプリング関数に伝播されるようにする
672
- print(f"【デバッグ】コールバック関数: KeyboardInterruptをキャッチ: {e}")
673
- print("【デバッグ】コールバック関数: 中断例外を再スロー、サンプリング関数に伝播")
674
  raise
675
- except Exception as e:
676
- print(f"【デバッグ】コールバック関数でエラー: {e}")
677
- # サンプリングプロセスを中断しない
678
- print(f"【デバッグ】コールバック関数: ステップ {d['i']} 完了")
679
  return
680
 
681
  try:
682
- sampling_start_time = time.time()
683
- print(f"サンプリング開始、デバイス: {device}, データ型: {transformer.dtype}, TeaCache使用: {use_teacache and not cpu_fallback_mode}")
684
-
685
  try:
686
- print("【デバッグ】sample_hunyuanサンプリングプロセス開始")
687
  generated_latents = sample_hunyuan(
688
  transformer=transformer,
689
  sampler='unipc',
690
- width=width,
691
- height=height,
692
  frames=num_frames,
693
  real_guidance_scale=cfg,
694
  distilled_guidance_scale=gs,
695
  guidance_rescale=rs,
696
- # shift=3.0,
697
  num_inference_steps=steps,
698
  generator=rnd,
699
  prompt_embeds=llama_vec,
@@ -708,181 +710,119 @@ def worker(input_image, prompt, n_prompt, seed, total_second_length, latent_wind
708
  latent_indices=latent_indices,
709
  clean_latents=clean_latents,
710
  clean_latent_indices=clean_latent_indices,
711
- clean_latents_2x=clean_latents_2x,
712
- clean_latent_2x_indices=clean_latent_2x_indices,
713
- clean_latents_4x=clean_latents_4x,
714
- clean_latent_4x_indices=clean_latent_4x_indices,
715
- callback=callback,
716
  )
717
-
718
- print(f"【デバッグ】サンプリング完了、所要時間: {time.time() - sampling_start_time:.2f}秒")
719
  except KeyboardInterrupt as e:
720
- # ユーザーによる中断
721
- print(f"【デバッグ】KeyboardInterruptをキャッチ: {e}")
722
- print("【デバッグ】ユーザーによるサンプリングプロセス中断、中断ロジック処理中")
723
-
724
- # 既に生成された動画がある場合、最後に生成された動画を返す
725
  if last_output_filename:
726
- print(f"【デバッグ】部分的に生成された動画あり: {last_output_filename}、この動画を返します")
727
  stream.output_queue.push(('file', last_output_filename))
728
- error_msg = "ユーザーにより生成プロセスが中断されましたが、部分的な動画は生成されています"
729
  else:
730
- print("【デバッグ】部分的に生成された動画なし、中断メッセージを返します")
731
- error_msg = "ユーザーにより生成プロセスが中断され、動画は生成されていません"
732
-
733
- print(f"【デバッグ】エラーメッセージを送信: {error_msg}")
734
- stream.output_queue.push(('error', error_msg))
735
- print("【デバッグ】end信号を送信")
736
  stream.output_queue.push(('end', None))
737
- print("【デバッグ】中断処理完了、リターン")
738
  return
739
  except Exception as e:
740
- print(f"サンプリングプロセス中にエラーが発生しました: {e}")
741
  traceback.print_exc()
742
-
743
- # 既に生成された動画がある場合、最後に生成された動画を返す
744
  if last_output_filename:
745
  stream.output_queue.push(('file', last_output_filename))
746
-
747
- # エラーメッセージを作成
748
- error_msg = f"サンプリングプロセス中にエラーが発生しましたが、部分的に生成された動画を返します: {e}"
749
- stream.output_queue.push(('error', error_msg))
750
  else:
751
- # 生成された動画がない場合、エラーメッセージを返す
752
- error_msg = f"サンプリングプロセス中にエラーが発生し、動画を生成できませんでした: {e}"
753
- stream.output_queue.push(('error', error_msg))
754
-
755
  stream.output_queue.push(('end', None))
756
  return
757
 
758
  try:
759
  if is_last_section:
760
  generated_latents = torch.cat([start_latent.to(generated_latents), generated_latents], dim=2)
761
-
762
  total_generated_latent_frames += int(generated_latents.shape[2])
763
  history_latents = torch.cat([generated_latents.to(history_latents), history_latents], dim=2)
764
  except Exception as e:
765
- error_msg = f"生成された潜在変数の処理中にエラーが発生しました: {e}"
766
- print(error_msg)
767
  traceback.print_exc()
768
-
769
  if last_output_filename:
770
  stream.output_queue.push(('file', last_output_filename))
771
- stream.output_queue.push(('error', error_msg))
772
  stream.output_queue.push(('end', None))
773
  return
774
 
775
  if not high_vram and not cpu_fallback_mode:
776
  try:
777
- offload_model_from_device_for_memory_preservation(transformer, target_device=device, preserved_memory_gb=8)
 
 
778
  load_model_as_complete(vae, target_device=device)
779
  except Exception as e:
780
- print(f"モデルメモリ管理中にエラーが発生しました: {e}")
781
- # 続行
782
 
783
  try:
784
- real_history_latents = history_latents[:, :, :total_generated_latent_frames, :, :]
785
  except Exception as e:
786
- error_msg = f"履歴潜在変数の処理中にエラーが発生しました: {e}"
787
- print(error_msg)
788
-
789
  if last_output_filename:
790
  stream.output_queue.push(('file', last_output_filename))
791
  continue
792
 
793
  try:
794
- vae_start_time = time.time()
795
- print(f"VAEデコード開始、潜在変数形状: {real_history_latents.shape}")
796
-
797
  if history_pixels is None:
798
  history_pixels = vae_decode(real_history_latents, vae).cpu()
799
  else:
800
  section_latent_frames = (latent_window_size * 2 + 1) if is_last_section else (latent_window_size * 2)
801
  overlapped_frames = latent_window_size * 4 - 3
802
-
803
  current_pixels = vae_decode(real_history_latents[:, :, :section_latent_frames], vae).cpu()
804
  history_pixels = soft_append_bcthw(current_pixels, history_pixels, overlapped_frames)
805
 
806
- print(f"VAEデコード完了、所要時間: {time.time() - vae_start_time:.2f}秒")
807
-
808
- if not high_vram and not cpu_fallback_mode:
809
- try:
810
- unload_complete_models()
811
- except Exception as e:
812
- print(f"モデルのアンロード中にエラーが発生しました: {e}")
813
-
814
  output_filename = os.path.join(outputs_folder, f'{job_id}_{total_generated_latent_frames}.mp4')
815
-
816
- save_start_time = time.time()
817
  save_bcthw_as_mp4(history_pixels, output_filename, fps=30, crf=18)
818
- print(f"動画保存完了、所要時間: {time.time() - save_start_time:.2f}秒")
819
-
820
- print(f'デコード完了。現在の潜在変数形状 {real_history_latents.shape}; ピクセル形状 {history_pixels.shape}')
821
-
822
  last_output_filename = output_filename
823
  stream.output_queue.push(('file', output_filename))
824
  except Exception as e:
825
- print(f"動画のデコードまたは保存中にエラーが発生しました: {e}")
826
  traceback.print_exc()
827
-
828
- # 既に生成された動画がある場合、最後に生成された動画を返す
829
  if last_output_filename:
830
  stream.output_queue.push(('file', last_output_filename))
831
-
832
- # エラー情報を記録
833
- error_msg = f"動画のデコードまたは保存中にエラーが発生しました: {e}"
834
- stream.output_queue.push(('error', error_msg))
835
-
836
- # 次のイテレーションを試みる
837
  continue
838
 
839
  if is_last_section:
840
  break
841
  except Exception as e:
842
- print(f"【デバッグ】処理中にエラーが発生しました: {e}, タイプ: {type(e)}")
843
- print(f"【デバッグ】エラー詳細:")
844
  traceback.print_exc()
845
-
846
- # 中断型例外かチェック
847
- if isinstance(e, KeyboardInterrupt):
848
- print("【デバッグ】外部KeyboardInterrupt例外を検出")
849
-
850
  if not high_vram and not cpu_fallback_mode:
851
  try:
852
- print("【デバッグ】リソース解放のためモデルをアンロード")
853
  unload_complete_models(
854
  text_encoder, text_encoder_2, image_encoder, vae, transformer
855
  )
856
- print("【デバッグ】モデルのアンロードに成功")
857
- except Exception as unload_error:
858
- print(f"【デバッグ】モデルのアンロード中にエラー: {unload_error}")
859
- pass
860
-
861
- # 既に生成された動画がある場合、最後に生成された動画を返す
862
  if last_output_filename:
863
- print(f"【デバッグ】外部例外処理: 生成済み部分動画を返す {last_output_filename}")
864
  stream.output_queue.push(('file', last_output_filename))
865
- else:
866
- print("【デバッグ】外部例外処理: 生成済み動画が見つかりません")
867
-
868
- # エラーメッセージを返す
869
- error_msg = f"処理中にエラーが発生しました: {e}"
870
- print(f"【デバッグ】外部例外処理: エラーメッセージを送信: {error_msg}")
871
- stream.output_queue.push(('error', error_msg))
872
 
873
- # 常にend信号を返すことを確認
874
- print("【デバッグ】ワーカー関数終了、end信号を送信")
875
  stream.output_queue.push(('end', None))
876
- return
877
 
878
 
879
- # Hugging Face Spaces GPU装飾子を使用してプロセス関数を処理
880
  if IN_HF_SPACE and 'spaces' in globals():
881
  @spaces.GPU
882
  def process_with_gpu(input_image, prompt, n_prompt, seed, total_second_length, use_teacache):
883
  global stream
884
- assert input_image is not None, '入力画像がありません!'
885
 
 
886
  latent_window_size = 9
887
  steps = 25
888
  cfg = 1.0
@@ -890,79 +830,60 @@ if IN_HF_SPACE and 'spaces' in globals():
890
  rs = 0.0
891
  gpu_memory_preservation = 6
892
 
893
-
894
- # UI状態の初期化
895
  yield None, None, '', '', gr.update(interactive=False), gr.update(interactive=True)
896
-
897
  try:
898
  stream = AsyncStream()
899
-
900
- # ワーカーを非同期で起動
901
- async_run(worker, input_image, prompt, n_prompt, seed, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache)
 
 
 
902
 
903
  output_filename = None
904
  prev_output_filename = None
905
  error_message = None
906
 
907
- # ワーカーの出力を継続的にチェック
908
  while True:
909
  try:
910
  flag, data = stream.output_queue.next()
911
-
912
  if flag == 'file':
913
  output_filename = data
914
  prev_output_filename = output_filename
915
- # ファイル成功時にエラー表示をクリア
916
  yield output_filename, gr.update(), gr.update(), '', gr.update(interactive=False), gr.update(interactive=True)
917
-
918
- if flag == 'progress':
919
  preview, desc, html = data
920
- # 進捗更新時にエラーメッセージを変更せず、停止ボタンがインタラクティブであることを確認
921
  yield gr.update(), gr.update(visible=True, value=preview), desc, html, gr.update(interactive=False), gr.update(interactive=True)
922
-
923
- if flag == 'error':
924
  error_message = data
925
- print(f"エラーメッセージを受信: {error_message}")
926
- # 即時表示せず、end信号を待機
927
-
928
- if flag == 'end':
929
- # 最後の動画ファイルがある場合、確実に返す
930
  if output_filename is None and prev_output_filename is not None:
931
  output_filename = prev_output_filename
932
-
933
- # エラーメッセージがある場合、わかりやすいエラー表示を作成
934
  if error_message:
935
  yield output_filename, gr.update(visible=False), gr.update(), gr.update(interactive=True), gr.update(interactive=False)
936
  else:
937
- # 成功時にエラー表示をしない
938
  yield output_filename, gr.update(visible=False), gr.update(), '', gr.update(interactive=True), gr.update(interactive=False)
939
  break
940
  except Exception as e:
941
- print(f"出力処理中にエラーが発生しました: {e}")
942
- # 長時間更新がないか確認
943
- current_time = time.time()
944
- if current_time - last_update_time > 60: # 60秒間更新がない場合、処理がフリーズした可能性
945
- print(f"処理がフリーズした可能性があります。{current_time - last_update_time:.1f}秒間更新がありません")
946
-
947
- # 部分的に生成された動画がある場合、それを返す
948
  if prev_output_filename:
949
  yield prev_output_filename, gr.update(visible=False), gr.update(), gr.update(interactive=True), gr.update(interactive=False)
950
  else:
951
  yield None, gr.update(visible=False), gr.update(), gr.update(interactive=True), gr.update(interactive=False)
952
  break
953
-
954
  except Exception as e:
955
- print(f"処理の開始中にエラーが発生しました: {e}")
956
  traceback.print_exc()
957
- error_msg = str(e)
958
-
959
  yield None, gr.update(visible=False), gr.update(), gr.update(interactive=True), gr.update(interactive=False)
960
 
961
  process = process_with_gpu
962
  else:
963
  def process(input_image, prompt, n_prompt, seed, total_second_length, use_teacache):
964
  global stream
965
- assert input_image is not None, '入力画像がありません!'
966
 
967
  latent_window_size = 9
968
  steps = 25
@@ -971,373 +892,252 @@ else:
971
  rs = 0.0
972
  gpu_memory_preservation = 6
973
 
974
- # UI状態の初期化
975
  yield None, None, '', '', gr.update(interactive=False), gr.update(interactive=True)
976
-
977
  try:
978
  stream = AsyncStream()
979
-
980
- # ワーカーを非同期で起動
981
- async_run(worker, input_image, prompt, n_prompt, seed, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache)
 
 
 
982
 
983
  output_filename = None
984
  prev_output_filename = None
985
  error_message = None
986
 
987
- # ワーカーの出力を継続的にチェック
988
  while True:
989
  try:
990
  flag, data = stream.output_queue.next()
991
-
992
  if flag == 'file':
993
  output_filename = data
994
  prev_output_filename = output_filename
995
- # ファイル成功時にエラー表示をクリア
996
  yield output_filename, gr.update(), gr.update(), '', gr.update(interactive=False), gr.update(interactive=True)
997
-
998
- if flag == 'progress':
999
  preview, desc, html = data
1000
- # 進捗更新時にエラーメッセージを変更せず、停止ボタンがインタラクティブであることを確認
1001
  yield gr.update(), gr.update(visible=True, value=preview), desc, html, gr.update(interactive=False), gr.update(interactive=True)
1002
-
1003
- if flag == 'error':
1004
  error_message = data
1005
- print(f"エラーメッセージを受信: {error_message}")
1006
- # 即時表示せず、end信号を待機
1007
-
1008
- if flag == 'end':
1009
- # 最後の動画ファイルがある場合、確実に返す
1010
  if output_filename is None and prev_output_filename is not None:
1011
  output_filename = prev_output_filename
1012
-
1013
- # エラーメッセージがある場合、わかりやすいエラー表示を作成
1014
  if error_message:
1015
  yield output_filename, gr.update(visible=False), gr.update(), gr.update(interactive=True), gr.update(interactive=False)
1016
  else:
1017
- # 成功時にエラー表示をしない
1018
  yield output_filename, gr.update(visible=False), gr.update(), '', gr.update(interactive=True), gr.update(interactive=False)
1019
  break
1020
  except Exception as e:
1021
- print(f"出力処理中にエラーが発生しました: {e}")
1022
- # 長時間更新がないか確認
1023
- current_time = time.time()
1024
- if current_time - last_update_time > 60: # 60秒間更新がない場合、処理がフリーズした可能性
1025
- print(f"処理がフリーズした可能性があります。{current_time - last_update_time:.1f}秒間更新がありません")
1026
-
1027
- # 部分的に生成された動画がある場合、それを返す
1028
  if prev_output_filename:
1029
  yield prev_output_filename, gr.update(visible=False), gr.update(), gr.update(interactive=True), gr.update(interactive=False)
1030
  else:
1031
  yield None, gr.update(visible=False), gr.update(), gr.update(interactive=True), gr.update(interactive=False)
1032
  break
1033
-
1034
  except Exception as e:
1035
- print(f"処理の開始中にエラーが発生しました: {e}")
1036
  traceback.print_exc()
1037
- error_msg = str(e)
1038
-
1039
  yield None, gr.update(visible=False), gr.update(), gr.update(interactive=True), gr.update(interactive=False)
1040
-
1041
 
1042
  def end_process():
1043
- """生成プロセスを停止する関数 - キューに'end'信号を送信して生成を中断します"""
1044
- print("【デバッグ】ユーザーが停止ボタンをクリックしました。停止信号を送信中...")
1045
- # streamが初期化されていることを確認
 
 
1046
  if 'stream' in globals() and stream is not None:
1047
- # 送信前にキューの状態を確認
1048
  try:
1049
  current_top = stream.input_queue.top()
1050
- print(f"【デバッグ】現在のキュー先頭信号: {current_top}")
1051
  except Exception as e:
1052
- print(f"【デバッグ】キュー状態確認中にエラー: {e}")
1053
-
1054
- # end信号を送信
1055
  try:
1056
  stream.input_queue.push('end')
1057
- print("【デバッグ】キューにend信号を正常に送信しました")
1058
-
1059
- # 信号が正常に送信されたか確認
1060
- try:
1061
- current_top_after = stream.input_queue.top()
1062
- print(f"【デバッグ】送信後のキュー先頭信号: {current_top_after}")
1063
- except Exception as e:
1064
- print(f"【デバッグ】送信後のキュー状態確認中にエラー: {e}")
1065
-
1066
  except Exception as e:
1067
- print(f"【デバッグ】キューへのend信号送信に失敗: {e}")
1068
  else:
1069
- print("【デバッグ】警告: streamが初期化されていないため、停止信号を送信できません")
1070
  return None
1071
 
1072
 
1073
  quick_prompts = [
1074
- 'The camera smoothly orbits around the center of the scene, keeping the center point fixed and always in view',
1075
  ]
1076
- quick_prompts = [[x] for x in quick_prompts]
1077
 
1078
-
1079
- # カスタムCSSを作成し、レスポンシブレイアウトのサポートを追加
1080
  def make_custom_css():
1081
- progress_bar_css = make_progress_bar_css()
1082
-
1083
- responsive_css = """
1084
- /* 基本レスポンシブ設定 */
 
 
1085
  #app-container {
1086
- max-width: 100%;
1087
  margin: 0 auto;
 
 
1088
  }
1089
-
1090
-
1091
- /* ページタイトルのスタイル */
1092
  h1 {
1093
  font-size: 2rem;
1094
  text-align: center;
1095
  margin-bottom: 1rem;
 
 
1096
  }
1097
-
1098
- /* ボタンのスタイル */
1099
  .start-btn, .stop-btn {
1100
  min-height: 45px;
1101
  font-size: 1rem;
 
1102
  }
1103
-
1104
- /* モバイルデバイスのスタイル - 小画面 */
1105
- @media (max-width: 768px) {
1106
- h1 {
1107
- font-size: 1.5rem;
1108
- margin-bottom: 0.5rem;
1109
- }
1110
-
1111
- /* 単一カラムレイアウト */
1112
- .mobile-full-width {
1113
- flex-direction: column !important;
1114
- }
1115
-
1116
- .mobile-full-width > .gr-block {
1117
- min-width: 100% !important;
1118
- flex-grow: 1;
1119
- }
1120
-
1121
- /* 動画サイズの調整 */
1122
- .video-container {
1123
- height: auto !important;
1124
- }
1125
-
1126
- /* ボタンサイズの調整 */
1127
- .button-container button {
1128
- min-height: 50px;
1129
- font-size: 1rem;
1130
- touch-action: manipulation;
1131
- }
1132
-
1133
- /* スライダーの調整 */
1134
- .slider-container input[type="range"] {
1135
- height: 30px;
1136
- }
1137
- }
1138
-
1139
- /* タブレットデバイスのスタイル */
1140
- @media (min-width: 769px) and (max-width: 1024px) {
1141
- .tablet-adjust {
1142
- width: 48% !important;
1143
- }
1144
  }
1145
-
1146
- /* ダークモードサポート */
1147
- @media (prefers-color-scheme: dark) {
1148
- .dark-mode-text {
1149
- color: #f0f0f0;
1150
- }
1151
-
1152
- .dark-mode-bg {
1153
- background-color: #2a2a2a;
1154
- }
1155
  }
1156
-
1157
- /* アクセシビリティの向上 */
1158
- button, input, select, textarea {
1159
- font-size: 16px; /* iOSでの拡大を防止 */
1160
- }
1161
-
1162
- /* タッチ操作の最適化 */
1163
- button, .interactive-element {
1164
- min-height: 44px;
1165
- min-width: 44px;
1166
  }
1167
-
1168
- /* コントラストの向上 */
1169
- .high-contrast {
1170
- color: #fff;
1171
- background-color: #000;
1172
  }
1173
-
1174
- /* プログレスバーのス���イル強化 */
1175
  .progress-container {
1176
- margin-top: 10px;
1177
- margin-bottom: 10px;
1178
  }
1179
-
1180
- /* エラーメッセージのスタイル */
1181
- #error-message {
1182
- color: #ff4444;
1183
- font-weight: bold;
1184
- padding: 10px;
1185
- border-radius: 4px;
1186
- margin-top: 10px;
1187
- }
1188
-
1189
- /* エラーコンテナの正しい表示 */
1190
  .error-message {
1191
- background-color: rgba(255, 0, 0, 0.1);
 
 
1192
  padding: 10px;
1193
  border-radius: 4px;
1194
  margin-top: 10px;
1195
- border: 1px solid #ffcccc;
1196
- }
1197
-
1198
- /* 多言語エラーメッセージの処理 */
1199
- .error-msg-en, .error-msg-ja {
1200
- font-weight: bold;
1201
  }
1202
-
1203
- /* エラーアイコン */
1204
  .error-icon {
1205
- color: #ff4444;
1206
- font-size: 18px;
1207
  margin-right: 8px;
1208
  }
1209
-
1210
- /* 空のエラーメッセージが背景とボーダーを表示しないことを確認 */
1211
- #error-message:empty {
1212
- background-color: transparent;
1213
- border: none;
1214
- padding: 0;
1215
- margin: 0;
1216
  }
1217
-
1218
- /* Gradioのデフォルトエラー表示の修正 */
1219
- .error {
1220
- display: none !important;
 
 
 
 
 
 
1221
  }
1222
  """
1223
-
1224
- # CSSを結合
1225
- combined_css = progress_bar_css + responsive_css
1226
- return combined_css
1227
-
1228
 
1229
  css = make_custom_css()
 
1230
  block = gr.Blocks(css=css).queue()
1231
  with block:
1232
- gr.HTML("<h1 ='title'>FramePack_rotate_landscape - 風景画像回転動画生成</h1>")
1233
 
1234
- # mobile-full-widthクラスを持つレスポンシブ行を使用
1235
  with gr.Row(elem_classes="mobile-full-width"):
1236
- with gr.Column(scale=1, elem_classes="mobile-full-width"):
1237
- # 二言語ラベルを追加 - 画像アップロード
1238
  input_image = gr.Image(
1239
- sources='upload',
1240
- type="numpy",
1241
- label="画像をアップロード / Upload Image",
1242
- elem_id="input-image",
1243
  height=320
1244
  )
1245
-
1246
  prompt = gr.Textbox(
1247
- label="プロンプト / Prompt",
1248
- value='The camera smoothly orbits around the center of the scene, keeping the center point fixed and always in view',
1249
- elem_id="prompt-input"
1250
  )
1251
-
1252
  example_quick_prompts = gr.Dataset(
1253
- samples=quick_prompts,
1254
- label='クイックプロンプト一覧 / Quick Prompts',
1255
- samples_per_page=1000,
1256
  components=[prompt]
1257
  )
1258
- example_quick_prompts.click(lambda x: x[0], inputs=[example_quick_prompts], outputs=prompt, show_progress=False, queue=False)
 
 
 
 
 
 
1259
 
1260
- # スタイルと二言語ラベルを追加したボタン
1261
  with gr.Row(elem_classes="button-container"):
1262
  start_button = gr.Button(
1263
- value="生成開始 / Generate",
1264
- elem_classes="start-btn",
1265
- elem_id="start-button",
1266
  variant="primary"
1267
  )
1268
-
1269
  end_button = gr.Button(
1270
- value="停止 / Stop",
1271
- elem_classes="stop-btn",
1272
- elem_id="stop-button",
1273
  interactive=False
1274
  )
1275
 
1276
- # パラメータ設定エリア
1277
- with gr.Group():
1278
- use_teacache = gr.Checkbox(
1279
- label='TeaCacheを使用 / Use TeaCache',
1280
- value=True,
1281
- info='処理速度が速くなりますが、指や手の生成品質が若干低下する可能性があります。 / Faster speed, but may result in slightly worse finger and hand generation.'
1282
- )
1283
-
1284
- n_prompt = gr.Textbox(label="ネガティブプロンプト / Negative Prompt", value="", visible=False) # 使用しない
1285
-
1286
- seed = gr.Number(
1287
- label="シード値 / Seed",
1288
- value=31337,
1289
- precision=0
1290
- )
1291
 
1292
- # タッチ操作を最適化するためにslider-containerクラスを追加
1293
- with gr.Group(elem_classes="slider-container"):
1294
- total_second_length = gr.Slider(
1295
- label="動画の長さ(最大3秒) / Video Length (max 3 seconds)",
1296
- minimum=0.5,
1297
- maximum=3,
1298
- value=1,
1299
- step=0.1
1300
- )
1301
-
1302
- # 右側のプレビューと結果カラム
1303
- with gr.Column(scale=1, elem_classes="mobile-full-width"):
1304
- # プレビュー画像
1305
  preview_image = gr.Image(
1306
- label="プレビュー / Preview",
1307
- height=200,
1308
  visible=False,
1309
  elem_classes="preview-container"
1310
  )
1311
-
1312
- # 動画結果コンテナ
1313
  result_video = gr.Video(
1314
- label="生成された動画 / Generated Video",
1315
- autoplay=True,
1316
- show_share_button=True, # 共有ボタンを追加
1317
- height=512,
1318
  loop=True,
1319
- elem_classes="video-container",
1320
- elem_id="result-video"
 
1321
  )
1322
-
1323
- gr.HTML("<div ='sampling_note' class='note'>注意:逆順サンプリングのため、終了動作が開始動作より先に生成されます。開始動作が動画に表示されていない場合は、しばらくお待ちください。後で生成されます。</div>")
1324
- gr.HTML("<div ='sampling_note' class='note'>Note that the ending actions will be generated before the starting actions due to the inverted sampling. If the starting action is not in the video, you just need to wait, and it will be generated later.</div>")
 
 
1325
 
1326
- # 進捗インジケーター
1327
  with gr.Group(elem_classes="progress-container"):
1328
- progress_desc = gr.Markdown('', elem_classes='no-generating-animation')
1329
- progress_bar = gr.HTML('', elem_classes='no-generating-animation')
1330
-
1331
- # エラーメッセージエリア - カスタムエラーメッセージ形式をサポートするHTMLコンポーネントを使用
1332
  error_message = gr.HTML('', elem_id='error-message', visible=True)
1333
 
1334
- # 処理関数
1335
  ips = [input_image, prompt, n_prompt, seed, total_second_length, use_teacache]
1336
-
1337
- # 開始と終了ボタンのイベント
1338
- start_button.click(fn=process, inputs=ips, outputs=[result_video, preview_image, progress_desc, progress_bar, start_button, end_button])
 
 
1339
  end_button.click(fn=end_process)
1340
 
1341
-
1342
- block.launch()
1343
-
 
18
  import numpy as np
19
  import math
20
 
21
+ # Check if running in Hugging Face Space
22
  IN_HF_SPACE = os.environ.get('SPACE_ID') is not None
23
 
24
+ # Track GPU availability
25
  GPU_AVAILABLE = False
26
  GPU_INITIALIZED = False
27
  last_update_time = time.time()
28
 
29
+ # If running in a HF Space, import spaces
30
  if IN_HF_SPACE:
31
  try:
32
  import spaces
33
+ print("Running inside a Hugging Face Space, 'spaces' module imported.")
34
 
 
35
  try:
36
  GPU_AVAILABLE = torch.cuda.is_available()
37
+ print(f"GPU available: {GPU_AVAILABLE}")
38
  if GPU_AVAILABLE:
39
+ print(f"GPU device name: {torch.cuda.get_device_name(0)}")
40
+ print(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9} GB")
41
 
42
+ # Small GPU operation test
43
+ test_tensor = torch.zeros(1, device='cuda') + 1
 
44
  del test_tensor
45
+ print("GPU test operation succeeded.")
46
  else:
47
+ print("Warning: CUDA says it's available, but no GPU device was detected.")
48
  except Exception as e:
49
  GPU_AVAILABLE = False
50
+ print(f"Error checking GPU: {e}")
51
+ print("Falling back to CPU mode.")
52
  except ImportError:
53
+ print("Could not import 'spaces' module. Possibly not in a HF Space.")
54
  GPU_AVAILABLE = torch.cuda.is_available()
55
 
56
  from PIL import Image
57
  from diffusers import AutoencoderKLHunyuanVideo
58
+ from transformers import (
59
+ LlamaModel,
60
+ CLIPTextModel,
61
+ LlamaTokenizerFast,
62
+ CLIPTokenizer,
63
+ SiglipImageProcessor,
64
+ SiglipVisionModel
65
+ )
66
+ from diffusers_helper.hunyuan import (
67
+ encode_prompt_conds,
68
+ vae_decode,
69
+ vae_encode,
70
+ vae_decode_fake
71
+ )
72
+ from diffusers_helper.utils import (
73
+ save_bcthw_as_mp4,
74
+ crop_or_pad_yield_mask,
75
+ soft_append_bcthw,
76
+ resize_and_center_crop,
77
+ generate_timestamp
78
+ )
79
  from diffusers_helper.models.hunyuan_video_packed import HunyuanVideoTransformer3DModelPacked
80
  from diffusers_helper.pipelines.k_diffusion_hunyuan import sample_hunyuan
81
+ from diffusers_helper.memory import (
82
+ cpu,
83
+ gpu,
84
+ get_cuda_free_memory_gb,
85
+ move_model_to_device_with_memory_preservation,
86
+ offload_model_from_device_for_memory_preservation,
87
+ fake_diffusers_current_device,
88
+ DynamicSwapInstaller,
89
+ unload_complete_models,
90
+ load_model_as_complete
91
+ )
92
  from diffusers_helper.thread_utils import AsyncStream, async_run
93
  from diffusers_helper.gradio.progress_bar import make_progress_bar_css, make_progress_bar_html
 
94
  from diffusers_helper.clip_vision import hf_clip_vision_encode
 
95
 
96
  outputs_folder = './outputs/'
97
  os.makedirs(outputs_folder, exist_ok=True)
98
 
99
+ # Manage GPU memory if not in HF Space
100
  if not IN_HF_SPACE:
 
101
  try:
102
  if torch.cuda.is_available():
103
  free_mem_gb = get_cuda_free_memory_gb(gpu)
104
+ print(f'Free VRAM: {free_mem_gb} GB')
105
  else:
106
+ free_mem_gb = 6.0
107
+ print("CUDA not available, using default memory setting.")
108
  except Exception as e:
109
+ free_mem_gb = 6.0
110
+ print(f"Error getting CUDA memory: {e}, using default=6GB")
 
111
  high_vram = free_mem_gb > 60
112
+ print(f'High-VRAM mode: {high_vram}')
113
  else:
114
+ print("Using default memory settings in a HF Space.")
 
115
  try:
116
  if GPU_AVAILABLE:
117
+ free_mem_gb = torch.cuda.get_device_properties(0).total_memory / 1e9 * 0.9
118
+ high_vram = free_mem_gb > 10
119
  else:
120
+ free_mem_gb = 6.0
121
  high_vram = False
122
  except Exception as e:
123
+ print(f"Error retrieving GPU memory: {e}")
124
+ free_mem_gb = 6.0
125
  high_vram = False
126
+ print(f'GPU mem: {free_mem_gb:.2f} GB, high_vram={high_vram}')
 
127
 
 
128
  models = {}
129
+ cpu_fallback_mode = not GPU_AVAILABLE
130
+
131
 
 
132
  def load_models():
133
+ """
134
+ Load the entire pipeline models (VAE, text encoders, image encoder, transformer).
135
+ """
136
  global models, cpu_fallback_mode, GPU_INITIALIZED
137
 
138
  if GPU_INITIALIZED:
139
+ print("Models are already loaded. Skipping duplicate loading.")
140
  return models
141
 
142
+ print("Starting model load...")
143
+
144
  try:
145
+ device = 'cuda' if (GPU_AVAILABLE and not cpu_fallback_mode) else 'cpu'
146
+ model_device = 'cpu'
147
+
 
 
148
  dtype = torch.float16 if GPU_AVAILABLE else torch.float32
149
  transformer_dtype = torch.bfloat16 if GPU_AVAILABLE else torch.float32
 
 
 
 
 
 
 
 
 
 
150
 
151
+ print(f"Device: {device}, VAE/encoders dtype={dtype}, transformer dtype={transformer_dtype}")
 
152
 
153
+ try:
154
+ text_encoder = LlamaModel.from_pretrained(
155
+ "hunyuanvideo-community/HunyuanVideo",
156
+ subfolder='text_encoder',
157
+ torch_dtype=dtype
158
+ ).to(model_device)
159
+ text_encoder_2 = CLIPTextModel.from_pretrained(
160
+ "hunyuanvideo-community/HunyuanVideo",
161
+ subfolder='text_encoder_2',
162
+ torch_dtype=dtype
163
+ ).to(model_device)
164
+ tokenizer = LlamaTokenizerFast.from_pretrained(
165
+ "hunyuanvideo-community/HunyuanVideo",
166
+ subfolder='tokenizer'
167
+ )
168
+ tokenizer_2 = CLIPTokenizer.from_pretrained(
169
+ "hunyuanvideo-community/HunyuanVideo",
170
+ subfolder='tokenizer_2'
171
+ )
172
+ vae = AutoencoderKLHunyuanVideo.from_pretrained(
173
+ "hunyuanvideo-community/HunyuanVideo",
174
+ subfolder='vae',
175
+ torch_dtype=dtype
176
+ ).to(model_device)
177
+
178
+ feature_extractor = SiglipImageProcessor.from_pretrained(
179
+ "lllyasviel/flux_redux_bfl",
180
+ subfolder='feature_extractor'
181
+ )
182
+ image_encoder = SiglipVisionModel.from_pretrained(
183
+ "lllyasviel/flux_redux_bfl",
184
+ subfolder='image_encoder',
185
+ torch_dtype=dtype
186
+ ).to(model_device)
187
+
188
+ # Use a custom rotating-landscape model (for example)
189
+ transformer = HunyuanVideoTransformer3DModelPacked.from_pretrained(
190
+ "tori29umai/FramePackI2V_HY_rotate_landscape",
191
+ torch_dtype=transformer_dtype
192
+ ).to(model_device)
193
+
194
+ print("All models loaded successfully.")
195
  except Exception as e:
196
+ print(f"Error loading models: {e}")
197
+ print("Retry with float32 on CPU.")
 
 
198
  dtype = torch.float32
199
  transformer_dtype = torch.float32
200
  cpu_fallback_mode = True
 
 
 
 
 
 
201
 
202
+ text_encoder = LlamaModel.from_pretrained(
203
+ "hunyuanvideo-community/HunyuanVideo",
204
+ subfolder='text_encoder',
205
+ torch_dtype=dtype
206
+ ).to('cpu')
207
+ text_encoder_2 = CLIPTextModel.from_pretrained(
208
+ "hunyuanvideo-community/HunyuanVideo",
209
+ subfolder='text_encoder_2',
210
+ torch_dtype=dtype
211
+ ).to('cpu')
212
+ tokenizer = LlamaTokenizerFast.from_pretrained(
213
+ "hunyuanvideo-community/HunyuanVideo",
214
+ subfolder='tokenizer'
215
+ )
216
+ tokenizer_2 = CLIPTokenizer.from_pretrained(
217
+ "hunyuanvideo-community/HunyuanVideo",
218
+ subfolder='tokenizer_2'
219
+ )
220
+ vae = AutoencoderKLHunyuanVideo.from_pretrained(
221
+ "hunyuanvideo-community/HunyuanVideo",
222
+ subfolder='vae',
223
+ torch_dtype=dtype
224
+ ).to('cpu')
225
+
226
+ feature_extractor = SiglipImageProcessor.from_pretrained(
227
+ "lllyasviel/flux_redux_bfl",
228
+ subfolder='feature_extractor'
229
+ )
230
+ image_encoder = SiglipVisionModel.from_pretrained(
231
+ "lllyasviel/flux_redux_bfl",
232
+ subfolder='image_encoder',
233
+ torch_dtype=dtype
234
+ ).to('cpu')
235
 
236
+ transformer = HunyuanVideoTransformer3DModelPacked.from_pretrained(
237
+ "tori29umai/FramePackI2V_HY_rotate_landscape",
238
+ torch_dtype=transformer_dtype
239
+ ).to('cpu')
240
+
241
+ print("Models loaded in CPU-only fallback mode.")
242
 
243
  vae.eval()
244
  text_encoder.eval()
 
251
  vae.enable_tiling()
252
 
253
  transformer.high_quality_fp32_output_for_inference = True
254
+ print("transformer.high_quality_fp32_output_for_inference = True")
255
 
 
256
  if not cpu_fallback_mode:
257
  transformer.to(dtype=transformer_dtype)
258
  vae.to(dtype=dtype)
 
269
  if torch.cuda.is_available() and not cpu_fallback_mode:
270
  try:
271
  if not high_vram:
 
272
  DynamicSwapInstaller.install_model(transformer, device=device)
273
  DynamicSwapInstaller.install_model(text_encoder, device=device)
274
  else:
 
277
  image_encoder.to(device)
278
  vae.to(device)
279
  transformer.to(device)
280
+ print(f"Successfully moved models to {device}")
281
  except Exception as e:
282
+ print(f"Error moving models to {device}: {e}")
283
+ print("Falling back to CPU.")
284
  cpu_fallback_mode = True
285
+
286
+ models_local = {
 
287
  'text_encoder': text_encoder,
288
  'text_encoder_2': text_encoder_2,
289
  'tokenizer': tokenizer,
 
295
  }
296
 
297
  GPU_INITIALIZED = True
298
+ models.update(models_local)
299
+ print(f"Model load complete. Mode: {'CPU' if cpu_fallback_mode else 'GPU'}")
300
  return models
301
  except Exception as e:
302
+ print(f"Unexpected error in load_models(): {e}")
303
  traceback.print_exc()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304
  cpu_fallback_mode = True
305
  return {}
306
 
307
 
308
+ # Use GPU decorator if in HF Space
309
  if IN_HF_SPACE and 'spaces' in globals() and GPU_AVAILABLE:
310
  try:
311
  @spaces.GPU
312
  def initialize_models():
 
313
  global GPU_INITIALIZED
314
  try:
315
  result = load_models()
316
  GPU_INITIALIZED = True
317
  return result
318
  except Exception as e:
319
+ print(f"Error in @spaces.GPU model init: {e}")
 
320
  global cpu_fallback_mode
321
  cpu_fallback_mode = True
 
322
  return load_models()
323
  except Exception as e:
324
+ print(f"Error creating spaces.GPU decorator: {e}")
 
325
  def initialize_models():
326
  return load_models()
327
+ else:
328
+ def initialize_models():
329
+ return load_models()
330
 
331
 
 
332
  def get_models():
333
+ """
334
+ Retrieve the global models or load them if not yet loaded.
335
+ """
336
+ global models
337
  model_loading_key = "__model_loading__"
338
+
339
  if not models:
 
340
  if model_loading_key in globals():
341
+ print("Models are loading. Please wait.")
 
342
  import time
343
+ start_time = time.time()
344
+ while (not models) and (model_loading_key in globals()):
345
  time.sleep(0.5)
346
+ if time.time() - start_time > 60:
347
+ print("Timed out waiting for model load.")
 
348
  break
 
349
  if models:
350
  return models
 
351
  try:
 
352
  globals()[model_loading_key] = True
 
353
  if IN_HF_SPACE and 'spaces' in globals() and GPU_AVAILABLE and not cpu_fallback_mode:
354
  try:
355
+ print("Loading models via @spaces.GPU")
356
+ models_local = initialize_models()
357
+ models.update(models_local)
358
  except Exception as e:
359
+ print(f"GPU decorator load error: {e}, fallback to direct load.")
360
+ models_local = load_models()
361
+ models.update(models_local)
362
  else:
363
+ models_local = load_models()
364
+ models.update(models_local)
365
  except Exception as e:
366
+ print(f"Unexpected error while loading models: {e}")
367
+ models.clear()
 
 
368
  finally:
 
369
  if model_loading_key in globals():
370
  del globals()[model_loading_key]
 
371
  return models
372
 
373
 
374
+ # Predefined resolutions for a rotating-landscape scenario
375
  PREDEFINED_RESOLUTIONS = [
376
  (416, 960), (448, 864), (480, 832), (512, 768), (544, 704),
377
  (576, 672), (608, 640), (640, 608), (672, 576), (704, 544),
378
  (768, 512), (832, 480), (864, 448), (960, 416)
379
  ]
380
 
 
381
  def find_closest_aspect_ratio(width, height, target_resolutions):
382
  """
383
+ Find the resolution in 'target_resolutions' whose aspect ratio
384
+ is closest to the original image aspect ratio (width/height).
 
 
 
 
 
 
 
385
  """
386
  original_aspect = width / height
 
 
387
  min_diff = float('inf')
388
  closest_resolution = None
389
 
390
+ for tw, th in target_resolutions:
391
+ target_aspect = tw / th
392
  diff = abs(original_aspect - target_aspect)
 
393
  if diff < min_diff:
394
  min_diff = diff
395
+ closest_resolution = (tw, th)
 
396
  return closest_resolution
397
 
398
 
399
  stream = AsyncStream()
400
 
 
401
  @torch.no_grad()
402
+ def worker(
403
+ input_image,
404
+ prompt,
405
+ n_prompt,
406
+ seed,
407
+ total_second_length,
408
+ latent_window_size,
409
+ steps,
410
+ cfg,
411
+ gs,
412
+ rs,
413
+ gpu_memory_preservation,
414
+ use_teacache
415
+ ):
416
+ """
417
+ Background worker that performs the actual generation.
418
+ """
419
  global last_update_time
420
  last_update_time = time.time()
421
+
422
+ # For demonstration, limit max length to 3 seconds
423
  total_second_length = min(total_second_length, 3.0)
424
 
 
425
  try:
426
+ models_local = get_models()
427
+ if not models_local:
428
+ err_msg = "Failed to load models. Check logs for details."
429
+ print(err_msg)
430
+ stream.output_queue.push(('error', err_msg))
431
  stream.output_queue.push(('end', None))
432
  return
433
 
434
+ text_encoder = models_local['text_encoder']
435
+ text_encoder_2 = models_local['text_encoder_2']
436
+ tokenizer = models_local['tokenizer']
437
+ tokenizer_2 = models_local['tokenizer_2']
438
+ vae = models_local['vae']
439
+ feature_extractor = models_local['feature_extractor']
440
+ image_encoder = models_local['image_encoder']
441
+ transformer = models_local['transformer']
442
  except Exception as e:
443
+ err = f"Error retrieving models: {e}"
444
+ print(err)
445
  traceback.print_exc()
446
+ stream.output_queue.push(('error', err))
447
  stream.output_queue.push(('end', None))
448
  return
449
+
450
+ device = 'cuda' if (GPU_AVAILABLE and not cpu_fallback_mode) else 'cpu'
451
+ print(f"Inference device: {device}")
452
+
453
+ # Adjust parameters if in CPU fallback
 
454
  if cpu_fallback_mode:
455
+ print("CPU fallback mode: using smaller parameters for performance.")
 
456
  latent_window_size = min(latent_window_size, 5)
457
+ steps = min(steps, 15)
458
+ total_second_length = min(total_second_length, 2.0)
459
+
460
  total_latent_sections = (total_second_length * 30) / (latent_window_size * 4)
461
  total_latent_sections = int(max(round(total_latent_sections), 1))
462
 
 
466
  history_latents = None
467
  total_generated_latent_frames = 0
468
 
469
+ stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Starting ...'))))
470
 
471
  try:
 
472
  if not high_vram and not cpu_fallback_mode:
473
  try:
474
  unload_complete_models(
475
  text_encoder, text_encoder_2, image_encoder, vae, transformer
476
  )
477
  except Exception as e:
478
+ print(f"Error unloading models: {e}")
 
479
 
480
+ # Text encode
481
  last_update_time = time.time()
482
+ stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Encoding text ...'))))
483
 
484
  try:
485
  if not high_vram and not cpu_fallback_mode:
 
487
  load_model_as_complete(text_encoder_2, target_device=device)
488
 
489
  llama_vec, clip_l_pooler = encode_prompt_conds(prompt, text_encoder, text_encoder_2, tokenizer, tokenizer_2)
 
490
  if cfg == 1:
491
  llama_vec_n, clip_l_pooler_n = torch.zeros_like(llama_vec), torch.zeros_like(clip_l_pooler)
492
  else:
 
495
  llama_vec, llama_attention_mask = crop_or_pad_yield_mask(llama_vec, length=512)
496
  llama_vec_n, llama_attention_mask_n = crop_or_pad_yield_mask(llama_vec_n, length=512)
497
  except Exception as e:
498
+ err = f"Text encoding error: {e}"
499
+ print(err)
500
  traceback.print_exc()
501
+ stream.output_queue.push(('error', err))
502
  stream.output_queue.push(('end', None))
503
  return
504
 
505
+ # Process input image
506
  try:
507
  H, W, C = input_image.shape
508
+ target_w, target_h = find_closest_aspect_ratio(W, H, PREDEFINED_RESOLUTIONS)
509
 
510
+ # If CPU fallback, scale down
 
 
 
 
 
 
 
511
  if cpu_fallback_mode:
512
+ scale_factor = min(320 / target_h, 320 / target_w)
513
+ target_h = int(target_h * scale_factor)
514
+ target_w = int(target_w * scale_factor)
 
 
 
 
 
515
 
516
+ print(f"Original image: {W}x{H}, resizing to: {target_w}x{target_h}")
517
+ input_image_np = resize_and_center_crop(input_image, target_width=target_w, target_height=target_h)
518
  Image.fromarray(input_image_np).save(os.path.join(outputs_folder, f'{job_id}.png'))
519
 
520
  input_image_pt = torch.from_numpy(input_image_np).float() / 127.5 - 1
521
  input_image_pt = input_image_pt.permute(2, 0, 1)[None, :, None]
522
  except Exception as e:
523
+ err = f"Image processing error: {e}"
524
+ print(err)
525
  traceback.print_exc()
526
+ stream.output_queue.push(('error', err))
527
  stream.output_queue.push(('end', None))
528
  return
529
 
530
+ # VAE encode
531
  last_update_time = time.time()
532
+ stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'VAE encoding ...'))))
533
 
534
  try:
535
  if not high_vram and not cpu_fallback_mode:
536
  load_model_as_complete(vae, target_device=device)
 
537
  start_latent = vae_encode(input_image_pt, vae)
538
  except Exception as e:
539
+ err = f"VAE encode error: {e}"
540
+ print(err)
541
  traceback.print_exc()
542
+ stream.output_queue.push(('error', err))
543
  stream.output_queue.push(('end', None))
544
  return
545
 
546
  # CLIP Vision
547
  last_update_time = time.time()
548
+ stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'CLIP Vision encoding ...'))))
549
 
550
  try:
551
  if not high_vram and not cpu_fallback_mode:
552
  load_model_as_complete(image_encoder, target_device=device)
 
553
  image_encoder_output = hf_clip_vision_encode(input_image_np, feature_extractor, image_encoder)
554
  image_encoder_last_hidden_state = image_encoder_output.last_hidden_state
555
  except Exception as e:
556
+ err = f"CLIP Vision encode error: {e}"
557
+ print(err)
558
  traceback.print_exc()
559
+ stream.output_queue.push(('error', err))
560
  stream.output_queue.push(('end', None))
561
  return
562
 
563
+ # Convert dtype
564
  try:
565
  llama_vec = llama_vec.to(transformer.dtype)
566
  llama_vec_n = llama_vec_n.to(transformer.dtype)
 
568
  clip_l_pooler_n = clip_l_pooler_n.to(transformer.dtype)
569
  image_encoder_last_hidden_state = image_encoder_last_hidden_state.to(transformer.dtype)
570
  except Exception as e:
571
+ err = f"Data type conversion error: {e}"
572
+ print(err)
573
  traceback.print_exc()
574
+ stream.output_queue.push(('error', err))
575
  stream.output_queue.push(('end', None))
576
  return
577
 
578
+ # Sampling
579
  last_update_time = time.time()
580
+ stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Starting sampling...'))))
581
 
582
  rnd = torch.Generator("cpu").manual_seed(seed)
583
  num_frames = latent_window_size * 4 - 3
584
 
585
  try:
586
+ history_latents = torch.zeros(
587
+ size=(1, 16, 1 + 2 + 16, target_h // 8, target_w // 8),
588
+ dtype=torch.float32
589
+ ).cpu()
590
  history_pixels = None
591
  total_generated_latent_frames = 0
592
  except Exception as e:
593
+ err = f"Error initializing history latents: {e}"
594
+ print(err)
595
  traceback.print_exc()
596
+ stream.output_queue.push(('error', err))
597
  stream.output_queue.push(('end', None))
598
  return
599
 
600
+ latent_paddings = list(reversed(range(total_latent_sections)))
 
601
  if total_latent_sections > 4:
602
+ latent_paddings = [3] + [2]*(total_latent_sections - 3) + [1, 0]
 
 
 
 
 
603
 
604
  for latent_padding in latent_paddings:
605
  last_update_time = time.time()
606
+ is_last_section = (latent_padding == 0)
607
  latent_padding_size = latent_padding * latent_window_size
608
 
609
  if stream.input_queue.top() == 'end':
 
610
  if history_pixels is not None and total_generated_latent_frames > 0:
611
  try:
612
+ final_name = os.path.join(outputs_folder, f'{job_id}_final_{total_generated_latent_frames}.mp4')
613
+ save_bcthw_as_mp4(history_pixels, final_name, fps=30, crf=18)
614
+ stream.output_queue.push(('file', final_name))
615
  except Exception as e:
616
+ print(f"Error saving final partial video: {e}")
 
617
  stream.output_queue.push(('end', None))
618
  return
619
 
620
+ print(f'latent_padding_size = {latent_padding_size}, is_last_section={is_last_section}')
621
 
622
  try:
623
  indices = torch.arange(0, sum([1, latent_padding_size, latent_window_size, 1, 2, 16])).unsqueeze(0)
624
+ (
625
+ cidx_pre,
626
+ blank_indices,
627
+ latent_indices,
628
+ cidx_post,
629
+ cidx_2x,
630
+ cidx_4x
631
+ ) = indices.split([1, latent_padding_size, latent_window_size, 1, 2, 16], dim=1)
632
+ clean_latent_indices = torch.cat([cidx_pre, cidx_post], dim=1)
633
 
634
  clean_latents_pre = start_latent.to(history_latents)
635
+ c_latents_post, c_latents_2x, c_latents_4x = history_latents[:, :, :1 + 2 + 16].split([1, 2, 16], dim=2)
636
+ clean_latents = torch.cat([clean_latents_pre, c_latents_post], dim=2)
637
  except Exception as e:
638
+ err = f"Error preparing sampling data: {e}"
639
+ print(err)
640
  traceback.print_exc()
 
641
  if last_output_filename:
642
  stream.output_queue.push(('file', last_output_filename))
643
  continue
 
645
  if not high_vram and not cpu_fallback_mode:
646
  try:
647
  unload_complete_models()
648
+ move_model_to_device_with_memory_preservation(
649
+ transformer, target_device=device, preserved_memory_gb=gpu_memory_preservation
650
+ )
651
  except Exception as e:
652
+ print(f"Error moving transformer to GPU: {e}")
 
653
 
654
  if use_teacache and not cpu_fallback_mode:
655
  try:
656
  transformer.initialize_teacache(enable_teacache=True, num_steps=steps)
657
  except Exception as e:
658
+ print(f"Error initializing TeaCache: {e}")
 
659
  transformer.initialize_teacache(enable_teacache=False)
660
  else:
661
  transformer.initialize_teacache(enable_teacache=False)
 
663
  def callback(d):
664
  global last_update_time
665
  last_update_time = time.time()
 
666
  try:
667
+ if stream.input_queue.top() == 'end':
668
+ stream.output_queue.push(('end', None))
669
+ raise KeyboardInterrupt('User requested stop.')
670
+ preview_latents = d['denoised']
671
+ preview_latents = vae_decode_fake(preview_latents)
672
+ preview_img = (preview_latents * 255.0).cpu().numpy().clip(0,255).astype(np.uint8)
673
+ preview_img = einops.rearrange(preview_img, 'b c t h w -> (b h) (t w) c')
674
+
675
+ curr_step = d['i'] + 1
676
+ percentage = int(100.0 * curr_step / steps)
677
+ hint = f'Sampling {curr_step}/{steps}'
678
+ desc = f'Generated frames so far: {int(max(0, total_generated_latent_frames * 4 - 3))}'
679
+ bar_html = make_progress_bar_html(percentage, hint)
680
+ stream.output_queue.push(('progress', (preview_img, desc, bar_html)))
681
+ except KeyboardInterrupt:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
682
  raise
683
+ except Exception as exc:
684
+ print(f"Error in sampling callback: {exc}")
 
 
685
  return
686
 
687
  try:
688
+ print(f"Sampling: device={device}, dtype={transformer.dtype}, teacache={use_teacache}")
 
 
689
  try:
 
690
  generated_latents = sample_hunyuan(
691
  transformer=transformer,
692
  sampler='unipc',
693
+ width=target_w,
694
+ height=target_h,
695
  frames=num_frames,
696
  real_guidance_scale=cfg,
697
  distilled_guidance_scale=gs,
698
  guidance_rescale=rs,
 
699
  num_inference_steps=steps,
700
  generator=rnd,
701
  prompt_embeds=llama_vec,
 
710
  latent_indices=latent_indices,
711
  clean_latents=clean_latents,
712
  clean_latent_indices=clean_latent_indices,
713
+ clean_latents_2x=c_latents_2x,
714
+ clean_latent_2x_indices=cidx_2x,
715
+ clean_latents_4x=c_latents_4x,
716
+ clean_latent_4x_indices=cidx_4x,
717
+ callback=callback
718
  )
 
 
719
  except KeyboardInterrupt as e:
720
+ print(f"User interrupt: {e}")
 
 
 
 
721
  if last_output_filename:
 
722
  stream.output_queue.push(('file', last_output_filename))
723
+ err_msg = "User stopped generation; partial video returned."
724
  else:
725
+ err_msg = "User stopped generation; no video produced."
726
+ stream.output_queue.push(('error', err_msg))
 
 
 
 
727
  stream.output_queue.push(('end', None))
 
728
  return
729
  except Exception as e:
730
+ print(f"Error during sampling: {e}")
731
  traceback.print_exc()
 
 
732
  if last_output_filename:
733
  stream.output_queue.push(('file', last_output_filename))
734
+ err_msg = f"Sampling error; partial video returned: {e}"
735
+ stream.output_queue.push(('error', err_msg))
 
 
736
  else:
737
+ err_msg = f"Sampling error; no video produced: {e}"
738
+ stream.output_queue.push(('error', err_msg))
 
 
739
  stream.output_queue.push(('end', None))
740
  return
741
 
742
  try:
743
  if is_last_section:
744
  generated_latents = torch.cat([start_latent.to(generated_latents), generated_latents], dim=2)
 
745
  total_generated_latent_frames += int(generated_latents.shape[2])
746
  history_latents = torch.cat([generated_latents.to(history_latents), history_latents], dim=2)
747
  except Exception as e:
748
+ err = f"Error merging latent outputs: {e}"
749
+ print(err)
750
  traceback.print_exc()
 
751
  if last_output_filename:
752
  stream.output_queue.push(('file', last_output_filename))
753
+ stream.output_queue.push(('error', err))
754
  stream.output_queue.push(('end', None))
755
  return
756
 
757
  if not high_vram and not cpu_fallback_mode:
758
  try:
759
+ offload_model_from_device_for_memory_preservation(
760
+ transformer, target_device=device, preserved_memory_gb=8
761
+ )
762
  load_model_as_complete(vae, target_device=device)
763
  except Exception as e:
764
+ print(f"Error managing model memory: {e}")
 
765
 
766
  try:
767
+ real_history_latents = history_latents[:, :, :total_generated_latent_frames]
768
  except Exception as e:
769
+ err = f"Error slicing latents history: {e}"
770
+ print(err)
 
771
  if last_output_filename:
772
  stream.output_queue.push(('file', last_output_filename))
773
  continue
774
 
775
  try:
 
 
 
776
  if history_pixels is None:
777
  history_pixels = vae_decode(real_history_latents, vae).cpu()
778
  else:
779
  section_latent_frames = (latent_window_size * 2 + 1) if is_last_section else (latent_window_size * 2)
780
  overlapped_frames = latent_window_size * 4 - 3
 
781
  current_pixels = vae_decode(real_history_latents[:, :, :section_latent_frames], vae).cpu()
782
  history_pixels = soft_append_bcthw(current_pixels, history_pixels, overlapped_frames)
783
 
 
 
 
 
 
 
 
 
784
  output_filename = os.path.join(outputs_folder, f'{job_id}_{total_generated_latent_frames}.mp4')
 
 
785
  save_bcthw_as_mp4(history_pixels, output_filename, fps=30, crf=18)
 
 
 
 
786
  last_output_filename = output_filename
787
  stream.output_queue.push(('file', output_filename))
788
  except Exception as e:
789
+ print(f"Error decoding/saving video: {e}")
790
  traceback.print_exc()
 
 
791
  if last_output_filename:
792
  stream.output_queue.push(('file', last_output_filename))
793
+ err = f"Error decoding/saving video: {e}"
794
+ stream.output_queue.push(('error', err))
 
 
 
 
795
  continue
796
 
797
  if is_last_section:
798
  break
799
  except Exception as e:
800
+ print(f"Outer error: {e}, type={type(e)}")
 
801
  traceback.print_exc()
 
 
 
 
 
802
  if not high_vram and not cpu_fallback_mode:
803
  try:
 
804
  unload_complete_models(
805
  text_encoder, text_encoder_2, image_encoder, vae, transformer
806
  )
807
+ except Exception as ue:
808
+ print(f"Unload error: {ue}")
 
 
 
 
809
  if last_output_filename:
 
810
  stream.output_queue.push(('file', last_output_filename))
811
+ err = f"Error in worker: {e}"
812
+ stream.output_queue.push(('error', err))
 
 
 
 
 
813
 
814
+ print("Worker finished, pushing end.")
 
815
  stream.output_queue.push(('end', None))
 
816
 
817
 
818
+ # Create a processing function with or without the HF Spaces GPU decorator
819
  if IN_HF_SPACE and 'spaces' in globals():
820
  @spaces.GPU
821
  def process_with_gpu(input_image, prompt, n_prompt, seed, total_second_length, use_teacache):
822
  global stream
823
+ assert input_image is not None, "No input image provided."
824
 
825
+ # Fix certain parameters for simplicity
826
  latent_window_size = 9
827
  steps = 25
828
  cfg = 1.0
 
830
  rs = 0.0
831
  gpu_memory_preservation = 6
832
 
 
 
833
  yield None, None, '', '', gr.update(interactive=False), gr.update(interactive=True)
 
834
  try:
835
  stream = AsyncStream()
836
+ async_run(
837
+ worker,
838
+ input_image, prompt, n_prompt, seed,
839
+ total_second_length, latent_window_size, steps,
840
+ cfg, gs, rs, gpu_memory_preservation, use_teacache
841
+ )
842
 
843
  output_filename = None
844
  prev_output_filename = None
845
  error_message = None
846
 
 
847
  while True:
848
  try:
849
  flag, data = stream.output_queue.next()
 
850
  if flag == 'file':
851
  output_filename = data
852
  prev_output_filename = output_filename
 
853
  yield output_filename, gr.update(), gr.update(), '', gr.update(interactive=False), gr.update(interactive=True)
854
+ elif flag == 'progress':
 
855
  preview, desc, html = data
 
856
  yield gr.update(), gr.update(visible=True, value=preview), desc, html, gr.update(interactive=False), gr.update(interactive=True)
857
+ elif flag == 'error':
 
858
  error_message = data
859
+ print(f"Received error: {error_message}")
860
+ elif flag == 'end':
 
 
 
861
  if output_filename is None and prev_output_filename is not None:
862
  output_filename = prev_output_filename
 
 
863
  if error_message:
864
  yield output_filename, gr.update(visible=False), gr.update(), gr.update(interactive=True), gr.update(interactive=False)
865
  else:
 
866
  yield output_filename, gr.update(visible=False), gr.update(), '', gr.update(interactive=True), gr.update(interactive=False)
867
  break
868
  except Exception as e:
869
+ print(f"Error processing output: {e}")
870
+ if (time.time() - last_update_time) > 60:
871
+ print(f"No updates for {(time.time()-last_update_time):.1f}s, likely hung.")
 
 
 
 
872
  if prev_output_filename:
873
  yield prev_output_filename, gr.update(visible=False), gr.update(), gr.update(interactive=True), gr.update(interactive=False)
874
  else:
875
  yield None, gr.update(visible=False), gr.update(), gr.update(interactive=True), gr.update(interactive=False)
876
  break
 
877
  except Exception as e:
878
+ print(f"Error starting process: {e}")
879
  traceback.print_exc()
 
 
880
  yield None, gr.update(visible=False), gr.update(), gr.update(interactive=True), gr.update(interactive=False)
881
 
882
  process = process_with_gpu
883
  else:
884
  def process(input_image, prompt, n_prompt, seed, total_second_length, use_teacache):
885
  global stream
886
+ assert input_image is not None, "No input image provided."
887
 
888
  latent_window_size = 9
889
  steps = 25
 
892
  rs = 0.0
893
  gpu_memory_preservation = 6
894
 
 
895
  yield None, None, '', '', gr.update(interactive=False), gr.update(interactive=True)
 
896
  try:
897
  stream = AsyncStream()
898
+ async_run(
899
+ worker,
900
+ input_image, prompt, n_prompt, seed,
901
+ total_second_length, latent_window_size, steps,
902
+ cfg, gs, rs, gpu_memory_preservation, use_teacache
903
+ )
904
 
905
  output_filename = None
906
  prev_output_filename = None
907
  error_message = None
908
 
 
909
  while True:
910
  try:
911
  flag, data = stream.output_queue.next()
 
912
  if flag == 'file':
913
  output_filename = data
914
  prev_output_filename = output_filename
 
915
  yield output_filename, gr.update(), gr.update(), '', gr.update(interactive=False), gr.update(interactive=True)
916
+ elif flag == 'progress':
 
917
  preview, desc, html = data
 
918
  yield gr.update(), gr.update(visible=True, value=preview), desc, html, gr.update(interactive=False), gr.update(interactive=True)
919
+ elif flag == 'error':
 
920
  error_message = data
921
+ print(f"Received error: {error_message}")
922
+ elif flag == 'end':
 
 
 
923
  if output_filename is None and prev_output_filename is not None:
924
  output_filename = prev_output_filename
 
 
925
  if error_message:
926
  yield output_filename, gr.update(visible=False), gr.update(), gr.update(interactive=True), gr.update(interactive=False)
927
  else:
 
928
  yield output_filename, gr.update(visible=False), gr.update(), '', gr.update(interactive=True), gr.update(interactive=False)
929
  break
930
  except Exception as e:
931
+ print(f"Error processing output: {e}")
932
+ if (time.time() - last_update_time) > 60:
933
+ print(f"No updates for {(time.time()-last_update_time):.1f}s, likely hung.")
 
 
 
 
934
  if prev_output_filename:
935
  yield prev_output_filename, gr.update(visible=False), gr.update(), gr.update(interactive=True), gr.update(interactive=False)
936
  else:
937
  yield None, gr.update(visible=False), gr.update(), gr.update(interactive=True), gr.update(interactive=False)
938
  break
 
939
  except Exception as e:
940
+ print(f"Error starting process: {e}")
941
  traceback.print_exc()
 
 
942
  yield None, gr.update(visible=False), gr.update(), gr.update(interactive=True), gr.update(interactive=False)
943
+
944
 
945
  def end_process():
946
+ """
947
+ Stop generation by pushing 'end' signal into the queue.
948
+ """
949
+ print("User clicked the stop button, sending 'end' signal...")
950
+ global stream
951
  if 'stream' in globals() and stream is not None:
 
952
  try:
953
  current_top = stream.input_queue.top()
954
+ print(f"Queue top signal: {current_top}")
955
  except Exception as e:
956
+ print(f"Error checking queue status: {e}")
 
 
957
  try:
958
  stream.input_queue.push('end')
959
+ print("Successfully pushed 'end' signal.")
 
 
 
 
 
 
 
 
960
  except Exception as e:
961
+ print(f"Error pushing 'end' signal: {e}")
962
  else:
963
+ print("Warning: 'stream' is not initialized; cannot stop.")
964
  return None
965
 
966
 
967
  quick_prompts = [
968
+ ["The camera smoothly orbits around the center of the scene, keeping the center point fixed and always in view"]
969
  ]
 
970
 
 
 
971
  def make_custom_css():
972
+ base_progress_css = make_progress_bar_css()
973
+ enhanced_css = """
974
+ body {
975
+ background: #f9fafb !important;
976
+ font-family: "Noto Sans", sans-serif;
977
+ }
978
  #app-container {
979
+ max-width: 1200px;
980
  margin: 0 auto;
981
+ padding: 1rem;
982
+ position: relative;
983
  }
 
 
 
984
  h1 {
985
  font-size: 2rem;
986
  text-align: center;
987
  margin-bottom: 1rem;
988
+ color: #2d3748;
989
+ font-weight: 700;
990
  }
 
 
991
  .start-btn, .stop-btn {
992
  min-height: 45px;
993
  font-size: 1rem;
994
+ font-weight: 600;
995
  }
996
+ .start-btn {
997
+ background-color: #3182ce !important;
998
+ color: #fff !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
999
  }
1000
+ .stop-btn {
1001
+ background-color: #e53e3e !important;
1002
+ color: #fff !important;
 
 
 
 
 
 
 
1003
  }
1004
+ .button-container button:hover {
1005
+ filter: brightness(0.95);
 
 
 
 
 
 
 
 
1006
  }
1007
+ .preview-container, .video-container {
1008
+ border: 1px solid #cbd5e0;
1009
+ border-radius: 8px;
1010
+ overflow: hidden;
 
1011
  }
 
 
1012
  .progress-container {
1013
+ margin-top: 15px;
1014
+ margin-bottom: 15px;
1015
  }
 
 
 
 
 
 
 
 
 
 
 
1016
  .error-message {
1017
+ background-color: #fff5f5;
1018
+ border: 1px solid #fed7d7;
1019
+ color: #e53e3e;
1020
  padding: 10px;
1021
  border-radius: 4px;
1022
  margin-top: 10px;
 
 
 
 
 
 
1023
  }
 
 
1024
  .error-icon {
1025
+ color: #e53e3e;
 
1026
  margin-right: 8px;
1027
  }
1028
+ #error-message {
1029
+ color: #ff4444;
1030
+ font-weight: bold;
1031
+ padding: 10px;
1032
+ border-radius: 4px;
1033
+ margin-top: 10px;
 
1034
  }
1035
+ @media (max-width: 768px) {
1036
+ #app-container {
1037
+ padding: 0.5rem;
1038
+ }
1039
+ .mobile-full-width {
1040
+ flex-direction: column !important;
1041
+ }
1042
+ .mobile-full-width > .gr-block {
1043
+ width: 100% !important;
1044
+ }
1045
  }
1046
  """
1047
+ return base_progress_css + enhanced_css
 
 
 
 
1048
 
1049
  css = make_custom_css()
1050
+
1051
  block = gr.Blocks(css=css).queue()
1052
  with block:
1053
+ gr.HTML("<h1>FramePack Rotate-Landscape - Generate Rotating Landscape Video</h1>")
1054
 
 
1055
  with gr.Row(elem_classes="mobile-full-width"):
1056
+ with gr.Column(scale=1):
 
1057
  input_image = gr.Image(
1058
+ sources='upload',
1059
+ type="numpy",
1060
+ label="Upload Image",
 
1061
  height=320
1062
  )
1063
+
1064
  prompt = gr.Textbox(
1065
+ label="Prompt",
1066
+ value='The camera smoothly orbits around the center of the scene...',
 
1067
  )
1068
+
1069
  example_quick_prompts = gr.Dataset(
1070
+ samples=quick_prompts,
1071
+ label="Quick Prompts",
1072
+ samples_per_page=1000,
1073
  components=[prompt]
1074
  )
1075
+ example_quick_prompts.click(
1076
+ lambda x: x[0],
1077
+ inputs=[example_quick_prompts],
1078
+ outputs=prompt,
1079
+ show_progress=False,
1080
+ queue=False
1081
+ )
1082
 
 
1083
  with gr.Row(elem_classes="button-container"):
1084
  start_button = gr.Button(
1085
+ value="Generate",
1086
+ elem_classes="start-btn",
 
1087
  variant="primary"
1088
  )
 
1089
  end_button = gr.Button(
1090
+ value="Stop",
1091
+ elem_classes="stop-btn",
 
1092
  interactive=False
1093
  )
1094
 
1095
+ use_teacache = gr.Checkbox(
1096
+ label="Use TeaCache",
1097
+ value=True,
1098
+ info="Faster speed, but possibly worse finger/hand generation."
1099
+ )
1100
+ n_prompt = gr.Textbox(label="Negative Prompt", value="", visible=False)
1101
+ seed = gr.Number(label="Seed", value=31337, precision=0)
1102
+ total_second_length = gr.Slider(
1103
+ label="Video length (max 3 seconds)",
1104
+ minimum=0.5, maximum=3, value=1.0, step=0.1
1105
+ )
 
 
 
 
1106
 
1107
+ with gr.Column(scale=1):
 
 
 
 
 
 
 
 
 
 
 
 
1108
  preview_image = gr.Image(
1109
+ label="Preview",
1110
+ height=200,
1111
  visible=False,
1112
  elem_classes="preview-container"
1113
  )
 
 
1114
  result_video = gr.Video(
1115
+ label="Generated Video",
1116
+ autoplay=True,
 
 
1117
  loop=True,
1118
+ show_share_button=True,
1119
+ height=512,
1120
+ elem_classes="video-container"
1121
  )
1122
+ gr.HTML("""
1123
+ <div>
1124
+ Note: Due to reversed sampling, ending actions may appear before starting actions. If the start action is missing, please wait for further frames.
1125
+ </div>
1126
+ """)
1127
 
 
1128
  with gr.Group(elem_classes="progress-container"):
1129
+ progress_desc = gr.Markdown('')
1130
+ progress_bar = gr.HTML('')
1131
+
 
1132
  error_message = gr.HTML('', elem_id='error-message', visible=True)
1133
 
1134
+ # Inputs
1135
  ips = [input_image, prompt, n_prompt, seed, total_second_length, use_teacache]
1136
+ start_button.click(
1137
+ fn=process,
1138
+ inputs=ips,
1139
+ outputs=[result_video, preview_image, progress_desc, progress_bar, start_button, end_button]
1140
+ )
1141
  end_button.click(fn=end_process)
1142
 
1143
+ block.launch()