ginipick commited on
Commit
1da4d19
·
verified ·
1 Parent(s): ac49e03

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +226 -254
app.py CHANGED
@@ -1,3 +1,8 @@
 
 
 
 
 
1
  import os
2
  import threading
3
  import time
@@ -23,7 +28,7 @@ translations = {
23
  "teacache_info": "Faster speed, but may result in slightly worse finger and hand generation.",
24
  "negative_prompt": "Negative Prompt",
25
  "seed": "Seed",
26
- "video_length": "Video Length (max 5 seconds)",
27
  "latent_window": "Latent Window Size",
28
  "steps": "Inference Steps",
29
  "steps_info": "Changing this value is not recommended.",
@@ -189,16 +194,19 @@ def load_models():
189
  print(f"Device: {device}, VAE/Encoders dtype={dtype}, Transformer dtype={transformer_dtype}")
190
 
191
  try:
 
192
  text_encoder = LlamaModel.from_pretrained(
193
  "hunyuanvideo-community/HunyuanVideo",
194
  subfolder='text_encoder',
195
  torch_dtype=dtype
196
  ).to(model_device)
 
197
  text_encoder_2 = CLIPTextModel.from_pretrained(
198
  "hunyuanvideo-community/HunyuanVideo",
199
  subfolder='text_encoder_2',
200
  torch_dtype=dtype
201
  ).to(model_device)
 
202
  tokenizer = LlamaTokenizerFast.from_pretrained(
203
  "hunyuanvideo-community/HunyuanVideo",
204
  subfolder='tokenizer'
@@ -207,12 +215,15 @@ def load_models():
207
  "hunyuanvideo-community/HunyuanVideo",
208
  subfolder='tokenizer_2'
209
  )
 
 
210
  vae = AutoencoderKLHunyuanVideo.from_pretrained(
211
  "hunyuanvideo-community/HunyuanVideo",
212
  subfolder='vae',
213
  torch_dtype=dtype
214
  ).to(model_device)
215
 
 
216
  feature_extractor = SiglipImageProcessor.from_pretrained(
217
  "lllyasviel/flux_redux_bfl", subfolder='feature_extractor'
218
  )
@@ -222,8 +233,13 @@ def load_models():
222
  torch_dtype=dtype
223
  ).to(model_device)
224
 
 
 
 
 
 
225
  transformer = HunyuanVideoTransformer3DModelPacked.from_pretrained(
226
- "lllyasviel/FramePackI2V_HY",
227
  torch_dtype=transformer_dtype
228
  ).to(model_device)
229
 
@@ -269,7 +285,7 @@ def load_models():
269
  ).to('cpu')
270
 
271
  transformer = HunyuanVideoTransformer3DModelPacked.from_pretrained(
272
- "lllyasviel/FramePackI2V_HY",
273
  torch_dtype=transformer_dtype
274
  ).to('cpu')
275
 
@@ -285,6 +301,7 @@ def load_models():
285
  vae.enable_slicing()
286
  vae.enable_tiling()
287
 
 
288
  transformer.high_quality_fp32_output_for_inference = True
289
  print("transformer.high_quality_fp32_output_for_inference = True")
290
 
@@ -304,6 +321,7 @@ def load_models():
304
  if torch.cuda.is_available() and not cpu_fallback_mode:
305
  try:
306
  if not high_vram:
 
307
  DynamicSwapInstaller.install_model(transformer, device=device)
308
  DynamicSwapInstaller.install_model(text_encoder, device=device)
309
  else:
@@ -338,7 +356,7 @@ def load_models():
338
  cpu_fallback_mode = True
339
  return {}
340
 
341
- # GPU 데코레이터 사용 여부 (Spaces 전용)
342
  if IN_HF_SPACE and 'spaces' in globals() and GPU_AVAILABLE:
343
  try:
344
  @spaces.GPU
@@ -404,7 +422,6 @@ def get_models():
404
 
405
  stream = AsyncStream()
406
 
407
- # 오류 메시지 HTML 생성 함수(영어만)
408
  def create_error_html(error_msg, is_timeout=False):
409
  """
410
  Create a user-friendly error message in English only
@@ -461,15 +478,13 @@ def worker(
461
  use_teacache
462
  ):
463
  """
464
- Actual generation logic in background thread.
465
  """
466
  global last_update_time
467
  last_update_time = time.time()
468
 
469
- # 요청 사항: 기본 2초로 설정, 5초까지 가능.
470
- # 아래는 슬라이더에서 이미 min=1, max=5로 설정되어 있으며, 기본값을 2로 수정하였음.
471
- # 내부 로직에서도 최대 5초 이상은 못 가도록 처리
472
- total_second_length = min(total_second_length, 5.0)
473
 
474
  try:
475
  models_local = get_models()
@@ -499,47 +514,44 @@ def worker(
499
  device = 'cuda' if (GPU_AVAILABLE and not cpu_fallback_mode) else 'cpu'
500
  print(f"Inference device: {device}")
501
 
502
- if cpu_fallback_mode:
503
- print("CPU fallback mode: reducing some parameters for performance.")
504
- latent_window_size = min(latent_window_size, 5)
505
- steps = min(steps, 15)
506
- total_second_length = min(total_second_length, 2.0)
507
 
 
508
  total_latent_sections = (total_second_length * 30) / (latent_window_size * 4)
509
  total_latent_sections = int(max(round(total_latent_sections), 1))
510
 
511
  job_id = generate_timestamp()
512
  last_output_filename = None
513
- history_pixels = None
514
  history_latents = None
 
515
  total_generated_latent_frames = 0
516
 
517
- from diffusers_helper.memory import unload_complete_models
518
-
519
  stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Starting ...'))))
520
 
521
  try:
 
522
  if not high_vram and not cpu_fallback_mode:
523
  try:
524
- unload_complete_models(
525
- text_encoder, text_encoder_2, image_encoder, vae, transformer
526
- )
527
  except Exception as e:
528
  print(f"Error unloading models: {e}")
529
 
530
- # Text Encode
531
  last_update_time = time.time()
532
  stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Text encoding...'))))
533
 
534
  try:
535
  if not high_vram and not cpu_fallback_mode:
 
536
  fake_diffusers_current_device(text_encoder, device)
537
  load_model_as_complete(text_encoder_2, target_device=device)
538
 
539
  llama_vec, clip_l_pooler = encode_prompt_conds(
540
  prompt, text_encoder, text_encoder_2, tokenizer, tokenizer_2
541
  )
542
-
543
  if cfg == 1:
544
  llama_vec_n, clip_l_pooler_n = (
545
  torch.zeros_like(llama_vec),
@@ -549,7 +561,6 @@ def worker(
549
  llama_vec_n, clip_l_pooler_n = encode_prompt_conds(
550
  n_prompt, text_encoder, text_encoder_2, tokenizer, tokenizer_2
551
  )
552
-
553
  llama_vec, llama_attention_mask = crop_or_pad_yield_mask(llama_vec, length=512)
554
  llama_vec_n, llama_attention_mask_n = crop_or_pad_yield_mask(llama_vec_n, length=512)
555
  except Exception as e:
@@ -560,14 +571,16 @@ def worker(
560
  stream.output_queue.push(('end', None))
561
  return
562
 
563
- # Image processing
564
  last_update_time = time.time()
565
  stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Image processing...'))))
566
 
567
  try:
568
  H, W, C = input_image.shape
 
569
  height, width = find_nearest_bucket(H, W, resolution=640)
570
 
 
571
  if cpu_fallback_mode:
572
  height = min(height, 320)
573
  width = min(width, 320)
@@ -585,7 +598,7 @@ def worker(
585
  stream.output_queue.push(('end', None))
586
  return
587
 
588
- # VAE encoding
589
  last_update_time = time.time()
590
  stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'VAE encoding...'))))
591
 
@@ -601,16 +614,14 @@ def worker(
601
  stream.output_queue.push(('end', None))
602
  return
603
 
604
- # CLIP Vision
605
  last_update_time = time.time()
606
  stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'CLIP Vision encode...'))))
607
 
608
  try:
609
  if not high_vram and not cpu_fallback_mode:
610
  load_model_as_complete(image_encoder, target_device=device)
611
- image_encoder_output = hf_clip_vision_encode(
612
- input_image_np, feature_extractor, image_encoder
613
- )
614
  image_encoder_last_hidden_state = image_encoder_output.last_hidden_state
615
  except Exception as e:
616
  err = f"CLIP Vision encode error: {e}"
@@ -620,7 +631,7 @@ def worker(
620
  stream.output_queue.push(('end', None))
621
  return
622
 
623
- # Convert dtype
624
  try:
625
  llama_vec = llama_vec.to(transformer.dtype)
626
  llama_vec_n = llama_vec_n.to(transformer.dtype)
@@ -635,20 +646,18 @@ def worker(
635
  stream.output_queue.push(('end', None))
636
  return
637
 
638
- # Sampling
639
  last_update_time = time.time()
640
  stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Start sampling...'))))
641
 
642
  rnd = torch.Generator("cpu").manual_seed(seed)
643
- num_frames = latent_window_size * 4 - 3
644
 
 
 
645
  try:
646
- history_latents = torch.zeros(
647
- size=(1, 16, 1 + 2 + 16, height // 8, width // 8),
648
- dtype=torch.float32
649
- ).cpu()
650
  history_pixels = None
651
- total_generated_latent_frames = 0
652
  except Exception as e:
653
  err = f"Init history state error: {e}"
654
  print(err)
@@ -657,57 +666,27 @@ def worker(
657
  stream.output_queue.push(('end', None))
658
  return
659
 
660
- latent_paddings = list(reversed(range(total_latent_sections)))
661
- if total_latent_sections > 4:
662
- # Some heuristic to flatten out large steps
663
- latent_paddings = [3] + [2]*(total_latent_sections - 3) + [1, 0]
664
-
665
- for latent_padding in latent_paddings:
666
- last_update_time = time.time()
667
- is_last_section = (latent_padding == 0)
668
- latent_padding_size = latent_padding * latent_window_size
669
 
 
670
  if stream.input_queue.top() == 'end':
671
- # If user requests end, save partial video if possible
672
  if history_pixels is not None and total_generated_latent_frames > 0:
673
  try:
674
  outname = os.path.join(
675
  outputs_folder, f'{job_id}_final_{total_generated_latent_frames}.mp4'
676
  )
677
- save_bcthw_as_mp4(history_pixels, outname, fps=30)
678
  stream.output_queue.push(('file', outname))
679
  except Exception as e:
680
  print(f"Error saving final partial video: {e}")
681
  stream.output_queue.push(('end', None))
682
  return
683
 
684
- print(f"latent_padding_size={latent_padding_size}, last_section={is_last_section}")
685
-
686
- try:
687
- indices = torch.arange(
688
- 0, sum([1, latent_padding_size, latent_window_size, 1, 2, 16])
689
- ).unsqueeze(0)
690
- (
691
- clean_latent_indices_pre,
692
- blank_indices,
693
- latent_indices,
694
- clean_latent_indices_post,
695
- clean_latent_2x_indices,
696
- clean_latent_4x_indices
697
- ) = indices.split([1, latent_padding_size, latent_window_size, 1, 2, 16], dim=1)
698
- clean_latent_indices = torch.cat([clean_latent_indices_pre, clean_latent_indices_post], dim=1)
699
-
700
- clean_latents_pre = start_latent.to(history_latents)
701
- clean_latents_post, clean_latents_2x, clean_latents_4x = history_latents[:, :, :1 + 2 + 16].split([1, 2, 16], dim=2)
702
- clean_latents = torch.cat([clean_latents_pre, clean_latents_post], dim=2)
703
- except Exception as e:
704
- err = f"Sampling data prep error: {e}"
705
- print(err)
706
- traceback.print_exc()
707
- if last_output_filename:
708
- stream.output_queue.push(('file', last_output_filename))
709
- continue
710
 
 
711
  if not high_vram and not cpu_fallback_mode:
712
  try:
713
  unload_complete_models()
@@ -726,6 +705,7 @@ def worker(
726
  else:
727
  transformer.initialize_teacache(enable_teacache=False)
728
 
 
729
  def callback(d):
730
  global last_update_time
731
  last_update_time = time.time()
@@ -741,7 +721,7 @@ def worker(
741
  curr_step = d['i'] + 1
742
  percentage = int(100.0 * curr_step / steps)
743
  hint = f'Sampling {curr_step}/{steps}'
744
- desc = f'Total frames so far: {int(max(0, total_generated_latent_frames * 4 - 3))}'
745
  barhtml = make_progress_bar_html(percentage, hint)
746
  stream.output_queue.push(('progress', (preview, desc, barhtml)))
747
  except KeyboardInterrupt:
@@ -750,113 +730,137 @@ def worker(
750
  print(f"Callback error: {e}")
751
  return
752
 
 
 
753
  try:
754
- print(f"Sampling with device={device}, dtype={transformer.dtype}, teacache={use_teacache}")
755
- from diffusers_helper.pipelines.k_diffusion_hunyuan import sample_hunyuan
756
 
757
- try:
758
- generated_latents = sample_hunyuan(
759
- transformer=transformer,
760
- sampler='unipc',
761
- width=width,
762
- height=height,
763
- frames=num_frames,
764
- real_guidance_scale=cfg,
765
- distilled_guidance_scale=gs,
766
- guidance_rescale=rs,
767
- num_inference_steps=steps,
768
- generator=rnd,
769
- prompt_embeds=llama_vec,
770
- prompt_embeds_mask=llama_attention_mask,
771
- prompt_poolers=clip_l_pooler,
772
- negative_prompt_embeds=llama_vec_n,
773
- negative_prompt_embeds_mask=llama_attention_mask_n,
774
- negative_prompt_poolers=clip_l_pooler_n,
775
- device=device,
776
- dtype=transformer.dtype,
777
- image_embeddings=image_encoder_last_hidden_state,
778
- latent_indices=latent_indices,
779
- clean_latents=clean_latents,
780
- clean_latent_indices=clean_latent_indices,
781
- clean_latents_2x=clean_latents_2x,
782
- clean_latent_2x_indices=clean_latent_2x_indices,
783
- clean_latents_4x=clean_latents_4x,
784
- clean_latent_4x_indices=clean_latent_4x_indices,
785
- callback=callback
786
- )
787
- except KeyboardInterrupt as e:
788
- print(f"User interrupt: {e}")
789
- if last_output_filename:
790
- stream.output_queue.push(('file', last_output_filename))
791
- err = "User stopped generation, partial video returned."
792
- else:
793
- err = "User stopped generation, no video produced."
794
- stream.output_queue.push(('error', err))
795
- stream.output_queue.push(('end', None))
796
- return
797
  except Exception as e:
798
- print(f"Sampling error: {e}")
 
799
  traceback.print_exc()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
800
  if last_output_filename:
801
  stream.output_queue.push(('file', last_output_filename))
 
 
 
 
 
 
 
802
  err = f"Error during sampling, partial video returned: {e}"
 
803
  stream.output_queue.push(('error', err))
804
  else:
805
- err = f"Error during sampling, no video produced: {e}"
806
  stream.output_queue.push(('error', err))
807
  stream.output_queue.push(('end', None))
808
  return
809
 
810
  try:
811
- if is_last_section:
812
- generated_latents = torch.cat([start_latent.to(generated_latents), generated_latents], dim=2)
813
- total_generated_latent_frames += int(generated_latents.shape[2])
814
- history_latents = torch.cat([generated_latents.to(history_latents), history_latents], dim=2)
815
  except Exception as e:
816
- err = f"Post-latent processing error: {e}"
817
  print(err)
818
  traceback.print_exc()
819
- if last_output_filename:
820
- stream.output_queue.push(('file', last_output_filename))
821
  stream.output_queue.push(('error', err))
822
  stream.output_queue.push(('end', None))
823
  return
824
 
 
825
  if not high_vram and not cpu_fallback_mode:
826
  try:
827
- offload_model_from_device_for_memory_preservation(
828
- transformer, target_device=device, preserved_memory_gb=8
829
- )
830
  load_model_as_complete(vae, target_device=device)
831
  except Exception as e:
832
  print(f"Model memory manage error: {e}")
833
 
 
834
  try:
835
- real_history_latents = history_latents[:, :, :total_generated_latent_frames]
836
- except Exception as e:
837
- err = f"History latents slice error: {e}"
838
- print(err)
839
- if last_output_filename:
840
- stream.output_queue.push(('file', last_output_filename))
841
- continue
842
 
843
- try:
844
- # VAE decode
845
  if history_pixels is None:
846
  history_pixels = vae_decode(real_history_latents, vae).cpu()
847
  else:
848
- # Overlap logic
849
- section_latent_frames = (
850
- (latent_window_size * 2 + 1) if is_last_section else (latent_window_size * 2)
851
- )
852
- overlapped_frames = latent_window_size * 4 - 3
853
- current_pixels = vae_decode(real_history_latents[:, :, :section_latent_frames], vae).cpu()
854
- history_pixels = soft_append_bcthw(current_pixels, history_pixels, overlapped_frames)
855
-
 
856
  output_filename = os.path.join(
857
  outputs_folder, f'{job_id}_{total_generated_latent_frames}.mp4'
858
  )
859
- save_bcthw_as_mp4(history_pixels, output_filename, fps=30)
860
  last_output_filename = output_filename
861
  stream.output_queue.push(('file', output_filename))
862
  except Exception as e:
@@ -868,16 +872,13 @@ def worker(
868
  stream.output_queue.push(('error', err))
869
  continue
870
 
871
- if is_last_section:
872
- break
873
  except Exception as e:
874
  print(f"Outer error: {e}, type={type(e)}")
875
  traceback.print_exc()
876
  if not high_vram and not cpu_fallback_mode:
877
  try:
878
- unload_complete_models(
879
- text_encoder, text_encoder_2, image_encoder, vae, transformer
880
- )
881
  except Exception as ue:
882
  print(f"Unload error: {ue}")
883
 
@@ -889,7 +890,8 @@ def worker(
889
  print("Worker finished, pushing 'end'.")
890
  stream.output_queue.push(('end', None))
891
 
892
- # 최종 처리 함수 (Spaces GPU decorator or normal)
 
893
  if IN_HF_SPACE and 'spaces' in globals():
894
  @spaces.GPU
895
  def process_with_gpu(
@@ -900,7 +902,7 @@ if IN_HF_SPACE and 'spaces' in globals():
900
  global stream
901
  assert input_image is not None, "No input image given."
902
 
903
- # Initialize UI state
904
  yield None, None, "", "", gr.update(interactive=False), gr.update(interactive=True)
905
  try:
906
  stream = AsyncStream()
@@ -916,50 +918,35 @@ if IN_HF_SPACE and 'spaces' in globals():
916
  error_message = None
917
 
918
  while True:
919
- try:
920
- flag, data = stream.output_queue.next()
921
- if flag == 'file':
922
- output_filename = data
923
- prev_output_filename = output_filename
924
- yield output_filename, gr.update(), gr.update(), '', gr.update(interactive=False), gr.update(interactive=True)
925
- elif flag == 'progress':
926
- preview, desc, html = data
927
- yield gr.update(), gr.update(visible=True, value=preview), desc, html, gr.update(interactive=False), gr.update(interactive=True)
928
- elif flag == 'error':
929
- error_message = data
930
- print(f"Got error: {error_message}")
931
- elif flag == 'end':
932
- if output_filename is None and prev_output_filename:
933
- output_filename = prev_output_filename
934
- if error_message:
935
- err_html = create_error_html(error_message)
936
- yield (
937
- output_filename, gr.update(visible=False), gr.update(),
938
- err_html, gr.update(interactive=True), gr.update(interactive=False)
939
- )
940
- else:
941
- yield (
942
- output_filename, gr.update(visible=False), gr.update(),
943
- '', gr.update(interactive=True), gr.update(interactive=False)
944
- )
945
- break
946
- except Exception as e:
947
- print(f"Loop error: {e}")
948
- if (time.time() - last_update_time) > 60:
949
- print("No updates for 60 seconds, possible hang or timeout.")
950
- if prev_output_filename:
951
- err_html = create_error_html("partial video has been generated", is_timeout=True)
952
- yield (
953
- prev_output_filename, gr.update(visible=False), gr.update(),
954
- err_html, gr.update(interactive=True), gr.update(interactive=False)
955
- )
956
- else:
957
- err_html = create_error_html(f"Processing timed out: {e}", is_timeout=True)
958
- yield (
959
- None, gr.update(visible=False), gr.update(),
960
- err_html, gr.update(interactive=True), gr.update(interactive=False)
961
- )
962
- break
963
  except Exception as e:
964
  print(f"Start process error: {e}")
965
  traceback.print_exc()
@@ -991,56 +978,42 @@ else:
991
  error_message = None
992
 
993
  while True:
994
- try:
995
- flag, data = stream.output_queue.next()
996
- if flag == 'file':
997
- output_filename = data
998
- prev_output_filename = output_filename
999
- yield output_filename, gr.update(), gr.update(), '', gr.update(interactive=False), gr.update(interactive=True)
1000
- elif flag == 'progress':
1001
- preview, desc, html = data
1002
- yield gr.update(), gr.update(visible=True, value=preview), desc, html, gr.update(interactive=False), gr.update(interactive=True)
1003
- elif flag == 'error':
1004
- error_message = data
1005
- print(f"Got error: {error_message}")
1006
- elif flag == 'end':
1007
- if output_filename is None and prev_output_filename:
1008
- output_filename = prev_output_filename
1009
- if error_message:
1010
- err_html = create_error_html(error_message)
1011
- yield (
1012
- output_filename, gr.update(visible=False), gr.update(),
1013
- err_html, gr.update(interactive=True), gr.update(interactive=False)
1014
- )
1015
- else:
1016
- yield (
1017
- output_filename, gr.update(visible=False), gr.update(),
1018
- '', gr.update(interactive=True), gr.update(interactive=False)
1019
- )
1020
- break
1021
- except Exception as e:
1022
- print(f"Loop error: {e}")
1023
- if (time.time() - last_update_time) > 60:
1024
- print("No update for 60 seconds, possible hang or timeout.")
1025
- if prev_output_filename:
1026
- err_html = create_error_html("partial video has been generated", is_timeout=True)
1027
- yield (
1028
- prev_output_filename, gr.update(visible=False), gr.update(),
1029
- err_html, gr.update(interactive=True), gr.update(interactive=False)
1030
- )
1031
- else:
1032
- err_html = create_error_html(f"Processing timed out: {e}", is_timeout=True)
1033
- yield (
1034
- None, gr.update(visible=False), gr.update(),
1035
- err_html, gr.update(interactive=True), gr.update(interactive=False)
1036
- )
1037
- break
1038
  except Exception as e:
1039
  print(f"Start process error: {e}")
1040
  traceback.print_exc()
1041
  err_html = create_error_html(str(e))
1042
  yield None, gr.update(visible=False), gr.update(), err_html, gr.update(interactive=True), gr.update(interactive=False)
1043
 
 
1044
  def end_process():
1045
  """
1046
  Stop generation by pushing 'end' to the worker queue
@@ -1068,7 +1041,6 @@ quick_prompts = [
1068
  ["A character doing some simple body movements."]
1069
  ]
1070
 
1071
- # CSS (파스텔 톤 스타일)
1072
  def make_custom_css():
1073
  base_progress_css = make_progress_bar_css()
1074
  pastel_css = """
@@ -1169,17 +1141,17 @@ with block:
1169
  with gr.Row(elem_classes="mobile-full-width"):
1170
  with gr.Column(scale=1, elem_classes="gr-panel"):
1171
  input_image = gr.Image(
1172
- label="Upload Image",
1173
  sources='upload',
1174
  type="numpy",
1175
  elem_id="input-image",
1176
  height=320
1177
  )
1178
- prompt = gr.Textbox(label="Prompt", value='', elem_id="prompt-input")
1179
 
1180
  example_quick_prompts = gr.Dataset(
1181
  samples=quick_prompts,
1182
- label="Quick Prompts",
1183
  samples_per_page=1000,
1184
  components=[prompt]
1185
  )
@@ -1193,18 +1165,18 @@ with block:
1193
  with gr.Column(scale=1, elem_classes="gr-panel"):
1194
  with gr.Row(elem_classes="button-container"):
1195
  start_button = gr.Button(
1196
- value="Generate",
1197
  elem_id="start-button",
1198
  variant="primary"
1199
  )
1200
  end_button = gr.Button(
1201
- value="Stop",
1202
  elem_id="stop-button",
1203
  interactive=False
1204
  )
1205
 
1206
  result_video = gr.Video(
1207
- label="Generated Video",
1208
  autoplay=True,
1209
  loop=True,
1210
  height=320,
@@ -1212,7 +1184,7 @@ with block:
1212
  elem_id="result-video"
1213
  )
1214
  preview_image = gr.Image(
1215
- label="Preview",
1216
  visible=False,
1217
  height=150,
1218
  elem_classes="preview-container"
@@ -1239,11 +1211,11 @@ with block:
1239
  value=31337,
1240
  precision=0
1241
  )
1242
- # 여기서 기본값(value) 2 변경 (최대 5는 그대로 유지)
1243
  total_second_length = gr.Slider(
1244
  label=get_translation("video_length"),
1245
  minimum=1,
1246
- maximum=5,
1247
  value=2,
1248
  step=0.1
1249
  )
@@ -1296,7 +1268,7 @@ with block:
1296
  info=get_translation("gpu_memory_info")
1297
  )
1298
 
1299
- # 처리 함수 연결
1300
  ips = [
1301
  input_image, prompt, n_prompt, seed,
1302
  total_second_length, latent_window_size, steps,
 
1
+ ########################################
2
+ # from diffusers_helper.hf_login import login
3
+ # 필요 시 로그인 함수 사용 (주석 해제 후)
4
+ ########################################
5
+
6
  import os
7
  import threading
8
  import time
 
28
  "teacache_info": "Faster speed, but may result in slightly worse finger and hand generation.",
29
  "negative_prompt": "Negative Prompt",
30
  "seed": "Seed",
31
+ "video_length": "Video Length (max 4 seconds)",
32
  "latent_window": "Latent Window Size",
33
  "steps": "Inference Steps",
34
  "steps_info": "Changing this value is not recommended.",
 
194
  print(f"Device: {device}, VAE/Encoders dtype={dtype}, Transformer dtype={transformer_dtype}")
195
 
196
  try:
197
+ # (1) 텍스트 인코더
198
  text_encoder = LlamaModel.from_pretrained(
199
  "hunyuanvideo-community/HunyuanVideo",
200
  subfolder='text_encoder',
201
  torch_dtype=dtype
202
  ).to(model_device)
203
+
204
  text_encoder_2 = CLIPTextModel.from_pretrained(
205
  "hunyuanvideo-community/HunyuanVideo",
206
  subfolder='text_encoder_2',
207
  torch_dtype=dtype
208
  ).to(model_device)
209
+
210
  tokenizer = LlamaTokenizerFast.from_pretrained(
211
  "hunyuanvideo-community/HunyuanVideo",
212
  subfolder='tokenizer'
 
215
  "hunyuanvideo-community/HunyuanVideo",
216
  subfolder='tokenizer_2'
217
  )
218
+
219
+ # (2) VAE
220
  vae = AutoencoderKLHunyuanVideo.from_pretrained(
221
  "hunyuanvideo-community/HunyuanVideo",
222
  subfolder='vae',
223
  torch_dtype=dtype
224
  ).to(model_device)
225
 
226
+ # (3) CLIP Vision
227
  feature_extractor = SiglipImageProcessor.from_pretrained(
228
  "lllyasviel/flux_redux_bfl", subfolder='feature_extractor'
229
  )
 
233
  torch_dtype=dtype
234
  ).to(model_device)
235
 
236
+ # (4) Transformer (FramePack_F1)
237
+ #
238
+ # 기존: "lllyasviel/FramePackI2V_HY"
239
+ # 변경: "lllyasviel/FramePack_F1_I2V_HY_20250503" (2번째 코드에서 제시됨)
240
+ #
241
  transformer = HunyuanVideoTransformer3DModelPacked.from_pretrained(
242
+ "lllyasviel/FramePack_F1_I2V_HY_20250503",
243
  torch_dtype=transformer_dtype
244
  ).to(model_device)
245
 
 
285
  ).to('cpu')
286
 
287
  transformer = HunyuanVideoTransformer3DModelPacked.from_pretrained(
288
+ "lllyasviel/FramePack_F1_I2V_HY_20250503",
289
  torch_dtype=transformer_dtype
290
  ).to('cpu')
291
 
 
301
  vae.enable_slicing()
302
  vae.enable_tiling()
303
 
304
+ # FramePack_F1 모델에서 필요
305
  transformer.high_quality_fp32_output_for_inference = True
306
  print("transformer.high_quality_fp32_output_for_inference = True")
307
 
 
321
  if torch.cuda.is_available() and not cpu_fallback_mode:
322
  try:
323
  if not high_vram:
324
+ # VRAM이 적다면 DynamicSwapInstaller로 필요 시 GPU/CPU 스왑
325
  DynamicSwapInstaller.install_model(transformer, device=device)
326
  DynamicSwapInstaller.install_model(text_encoder, device=device)
327
  else:
 
356
  cpu_fallback_mode = True
357
  return {}
358
 
359
+ # GPU 데코레이터 (Spaces 전용)
360
  if IN_HF_SPACE and 'spaces' in globals() and GPU_AVAILABLE:
361
  try:
362
  @spaces.GPU
 
422
 
423
  stream = AsyncStream()
424
 
 
425
  def create_error_html(error_msg, is_timeout=False):
426
  """
427
  Create a user-friendly error message in English only
 
478
  use_teacache
479
  ):
480
  """
481
+ 최종 영상 생성 로직 (백그라운드에서 동작)
482
  """
483
  global last_update_time
484
  last_update_time = time.time()
485
 
486
+ # 기본 2초, 최대 4초로 제한
487
+ total_second_length = min(total_second_length, 4.0)
 
 
488
 
489
  try:
490
  models_local = get_models()
 
514
  device = 'cuda' if (GPU_AVAILABLE and not cpu_fallback_mode) else 'cpu'
515
  print(f"Inference device: {device}")
516
 
517
+ # total_second_length만큼 30fps로 만들 때, latent_window_size*4-3 프레임 단위가 여러 번 이어져야 함.
518
+ # 단순히 (총초 * fps)/(latent_window_size*4-3) 반복 횟수를 구함
519
+ # 2번째 예시 코드처럼, 섹션 반복 방식으로 구현
 
 
520
 
521
+ # 'FramePack_F1' 모델 기준으로, 아래 방식으로 "조금씩" 영상을 확장해가며 샘플링
522
  total_latent_sections = (total_second_length * 30) / (latent_window_size * 4)
523
  total_latent_sections = int(max(round(total_latent_sections), 1))
524
 
525
  job_id = generate_timestamp()
526
  last_output_filename = None
 
527
  history_latents = None
528
+ history_pixels = None
529
  total_generated_latent_frames = 0
530
 
531
+ # 초기 메시지
 
532
  stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Starting ...'))))
533
 
534
  try:
535
+ # VRAM 적을 경우, 미리 Unload
536
  if not high_vram and not cpu_fallback_mode:
537
  try:
538
+ unload_complete_models(text_encoder, text_encoder_2, image_encoder, vae, transformer)
 
 
539
  except Exception as e:
540
  print(f"Error unloading models: {e}")
541
 
542
+ # (1) Text Encode
543
  last_update_time = time.time()
544
  stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Text encoding...'))))
545
 
546
  try:
547
  if not high_vram and not cpu_fallback_mode:
548
+ # Dynamic 오프로딩
549
  fake_diffusers_current_device(text_encoder, device)
550
  load_model_as_complete(text_encoder_2, target_device=device)
551
 
552
  llama_vec, clip_l_pooler = encode_prompt_conds(
553
  prompt, text_encoder, text_encoder_2, tokenizer, tokenizer_2
554
  )
 
555
  if cfg == 1:
556
  llama_vec_n, clip_l_pooler_n = (
557
  torch.zeros_like(llama_vec),
 
561
  llama_vec_n, clip_l_pooler_n = encode_prompt_conds(
562
  n_prompt, text_encoder, text_encoder_2, tokenizer, tokenizer_2
563
  )
 
564
  llama_vec, llama_attention_mask = crop_or_pad_yield_mask(llama_vec, length=512)
565
  llama_vec_n, llama_attention_mask_n = crop_or_pad_yield_mask(llama_vec_n, length=512)
566
  except Exception as e:
 
571
  stream.output_queue.push(('end', None))
572
  return
573
 
574
+ # (2) Image processing
575
  last_update_time = time.time()
576
  stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Image processing...'))))
577
 
578
  try:
579
  H, W, C = input_image.shape
580
+ # 해상도 버킷
581
  height, width = find_nearest_bucket(H, W, resolution=640)
582
 
583
+ # CPU 모드면 해상도 너무 크지 않게
584
  if cpu_fallback_mode:
585
  height = min(height, 320)
586
  width = min(width, 320)
 
598
  stream.output_queue.push(('end', None))
599
  return
600
 
601
+ # (3) VAE Encoding
602
  last_update_time = time.time()
603
  stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'VAE encoding...'))))
604
 
 
614
  stream.output_queue.push(('end', None))
615
  return
616
 
617
+ # (4) CLIP Vision
618
  last_update_time = time.time()
619
  stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'CLIP Vision encode...'))))
620
 
621
  try:
622
  if not high_vram and not cpu_fallback_mode:
623
  load_model_as_complete(image_encoder, target_device=device)
624
+ image_encoder_output = hf_clip_vision_encode(input_image_np, feature_extractor, image_encoder)
 
 
625
  image_encoder_last_hidden_state = image_encoder_output.last_hidden_state
626
  except Exception as e:
627
  err = f"CLIP Vision encode error: {e}"
 
631
  stream.output_queue.push(('end', None))
632
  return
633
 
634
+ # (5) dtype 변환
635
  try:
636
  llama_vec = llama_vec.to(transformer.dtype)
637
  llama_vec_n = llama_vec_n.to(transformer.dtype)
 
646
  stream.output_queue.push(('end', None))
647
  return
648
 
649
+ # (6) Sampling 반복
650
  last_update_time = time.time()
651
  stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Start sampling...'))))
652
 
653
  rnd = torch.Generator("cpu").manual_seed(seed)
 
654
 
655
+ # FramePack_F1 모델에서, 처음에는 history_latents = [start_latent] 정도
656
+ # 2번째 코드처럼, 우선 history_latents 에 start_latent 넣고, 섹션별로 확장
657
  try:
658
+ history_latents = start_latent.cpu()
 
 
 
659
  history_pixels = None
660
+ total_generated_latent_frames = start_latent.shape[2] # 보통 1
661
  except Exception as e:
662
  err = f"Init history state error: {e}"
663
  print(err)
 
666
  stream.output_queue.push(('end', None))
667
  return
668
 
669
+ # mp4 CRF(품질) 등은 고정(16 등) 가능. 여기서는 간단히 CRF=16
670
+ mp4_crf = 16
 
 
 
 
 
 
 
671
 
672
+ for section_index in range(total_latent_sections):
673
  if stream.input_queue.top() == 'end':
674
+ # 사용자 중단
675
  if history_pixels is not None and total_generated_latent_frames > 0:
676
  try:
677
  outname = os.path.join(
678
  outputs_folder, f'{job_id}_final_{total_generated_latent_frames}.mp4'
679
  )
680
+ save_bcthw_as_mp4(history_pixels, outname, fps=30, crf=mp4_crf)
681
  stream.output_queue.push(('file', outname))
682
  except Exception as e:
683
  print(f"Error saving final partial video: {e}")
684
  stream.output_queue.push(('end', None))
685
  return
686
 
687
+ print(f"Section {section_index+1}/{total_latent_sections}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
688
 
689
+ # 모델 스왑
690
  if not high_vram and not cpu_fallback_mode:
691
  try:
692
  unload_complete_models()
 
705
  else:
706
  transformer.initialize_teacache(enable_teacache=False)
707
 
708
+ # 콜백
709
  def callback(d):
710
  global last_update_time
711
  last_update_time = time.time()
 
721
  curr_step = d['i'] + 1
722
  percentage = int(100.0 * curr_step / steps)
723
  hint = f'Sampling {curr_step}/{steps}'
724
+ desc = f'Section {section_index+1}/{total_latent_sections}'
725
  barhtml = make_progress_bar_html(percentage, hint)
726
  stream.output_queue.push(('progress', (preview, desc, barhtml)))
727
  except KeyboardInterrupt:
 
730
  print(f"Callback error: {e}")
731
  return
732
 
733
+ # 2번째 예시처럼 indices split
734
+ # FramePack_F1: [1, 16, 2, 1, latent_window_size] 방식
735
  try:
736
+ # 샘플링할 프레임 수
737
+ frames_per_section = latent_window_size * 4 - 3
738
 
739
+ # indices 준비
740
+ indices = torch.arange(0, sum([1, 16, 2, 1, latent_window_size])).unsqueeze(0)
741
+ (
742
+ clean_latent_indices_start,
743
+ clean_latent_4x_indices,
744
+ clean_latent_2x_indices,
745
+ clean_latent_1x_indices,
746
+ latent_indices
747
+ ) = indices.split([1, 16, 2, 1, latent_window_size], dim=1)
748
+
749
+ # history_latents 에서 뒷부분 16+2+1=19 프레임짜리를 나눠서 clean_latents_xx 로 추출
750
+ if history_latents.shape[2] < 19:
751
+ # 혹은 초기 상태라 19프레임이 없을 수도 있으므로 패딩
752
+ # 여기서는 단순히 history_latents 전부를 19프레임으로 맞춰주기
753
+ needed = 19 - history_latents.shape[2]
754
+ if needed > 0:
755
+ pad_shape = list(history_latents.shape)
756
+ pad_shape[2] = needed
757
+ pad_zeros = torch.zeros(pad_shape, dtype=history_latents.dtype)
758
+ history_latents = torch.cat([pad_zeros, history_latents], dim=2)
759
+
760
+ clean_latents_4x, clean_latents_2x, clean_latents_1x = history_latents[:, :, -19:, :, :].split([16, 2, 1], dim=2)
761
+ # clean_latents 는 [start_latent + clean_latents_1x], 즉 1프레임 정도만 연결
762
+ clean_latents = torch.cat([start_latent.to(history_latents), clean_latents_1x], dim=2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
763
  except Exception as e:
764
+ err = f"Indices prep error: {e}"
765
+ print(err)
766
  traceback.print_exc()
767
+ stream.output_queue.push(('error', err))
768
+ stream.output_queue.push(('end', None))
769
+ return
770
+
771
+ # 진짜 샘플링
772
+ try:
773
+ generated_latents = sample_hunyuan(
774
+ transformer=transformer,
775
+ sampler='unipc',
776
+ width=width,
777
+ height=height,
778
+ frames=frames_per_section,
779
+ real_guidance_scale=cfg,
780
+ distilled_guidance_scale=gs,
781
+ guidance_rescale=rs,
782
+ num_inference_steps=steps,
783
+ generator=rnd,
784
+ prompt_embeds=llama_vec,
785
+ prompt_embeds_mask=llama_attention_mask,
786
+ prompt_poolers=clip_l_pooler,
787
+ negative_prompt_embeds=llama_vec_n,
788
+ negative_prompt_embeds_mask=llama_attention_mask_n,
789
+ negative_prompt_poolers=clip_l_pooler_n,
790
+ device=device,
791
+ dtype=transformer.dtype,
792
+ image_embeddings=image_encoder_last_hidden_state,
793
+ latent_indices=latent_indices,
794
+ clean_latents=clean_latents,
795
+ clean_latent_indices=torch.cat([clean_latent_indices_start, clean_latent_1x_indices], dim=1),
796
+ clean_latents_2x=clean_latents_2x,
797
+ clean_latent_2x_indices=clean_latent_2x_indices,
798
+ clean_latents_4x=clean_latents_4x,
799
+ clean_latent_4x_indices=clean_latent_4x_indices,
800
+ callback=callback
801
+ )
802
+ except KeyboardInterrupt:
803
+ print("User stopped generation.")
804
+ err = "User stopped generation, partial video returned."
805
  if last_output_filename:
806
  stream.output_queue.push(('file', last_output_filename))
807
+ stream.output_queue.push(('error', err))
808
+ stream.output_queue.push(('end', None))
809
+ return
810
+ except Exception as e:
811
+ print(f"Sampling error: {e}")
812
+ traceback.print_exc()
813
+ if last_output_filename:
814
  err = f"Error during sampling, partial video returned: {e}"
815
+ stream.output_queue.push(('file', last_output_filename))
816
  stream.output_queue.push(('error', err))
817
  else:
818
+ err = f"Error during sampling: {e}"
819
  stream.output_queue.push(('error', err))
820
  stream.output_queue.push(('end', None))
821
  return
822
 
823
  try:
824
+ # history_latents 뒤에 붙이기
825
+ total_generated_latent_frames += generated_latents.shape[2]
826
+ history_latents = torch.cat([history_latents, generated_latents.to(history_latents)], dim=2)
 
827
  except Exception as e:
828
+ err = f"Concat history_latents error: {e}"
829
  print(err)
830
  traceback.print_exc()
 
 
831
  stream.output_queue.push(('error', err))
832
  stream.output_queue.push(('end', None))
833
  return
834
 
835
+ # 모델 오프로딩 / VAE 로드
836
  if not high_vram and not cpu_fallback_mode:
837
  try:
838
+ offload_model_from_device_for_memory_preservation(transformer, target_device=device, preserved_memory_gb=8)
 
 
839
  load_model_as_complete(vae, target_device=device)
840
  except Exception as e:
841
  print(f"Model memory manage error: {e}")
842
 
843
+ # VAE 디코드 & 결과 저장
844
  try:
845
+ real_history_latents = history_latents # 모든 프레임
 
 
 
 
 
 
846
 
847
+ # 처음 디코드 시
 
848
  if history_pixels is None:
849
  history_pixels = vae_decode(real_history_latents, vae).cpu()
850
  else:
851
+ # 앞뒤 중복 프레임 연결(단순 Append).
852
+ # 여기서는 2번째 예시의 soft_append_bcthw 방식을 그대로 사용
853
+ # frames_per_section = latent_window_size*4 - 3
854
+ # 중복(overlapped_frames)도 동일: frames_per_section
855
+ # 다만, 실제론 섹션엔 중복이 거의 없을 수 있으므로 안전하게 min처리
856
+ overlapped_frames = frames_per_section
857
+ current_pixels = vae_decode(real_history_latents[:, :, -frames_per_section:], vae).cpu()
858
+ history_pixels = soft_append_bcthw(history_pixels, current_pixels, overlapped_frames)
859
+
860
  output_filename = os.path.join(
861
  outputs_folder, f'{job_id}_{total_generated_latent_frames}.mp4'
862
  )
863
+ save_bcthw_as_mp4(history_pixels, output_filename, fps=30, crf=mp4_crf)
864
  last_output_filename = output_filename
865
  stream.output_queue.push(('file', output_filename))
866
  except Exception as e:
 
872
  stream.output_queue.push(('error', err))
873
  continue
874
 
875
+ # for문 종료
 
876
  except Exception as e:
877
  print(f"Outer error: {e}, type={type(e)}")
878
  traceback.print_exc()
879
  if not high_vram and not cpu_fallback_mode:
880
  try:
881
+ unload_complete_models(text_encoder, text_encoder_2, image_encoder, vae, transformer)
 
 
882
  except Exception as ue:
883
  print(f"Unload error: {ue}")
884
 
 
890
  print("Worker finished, pushing 'end'.")
891
  stream.output_queue.push(('end', None))
892
 
893
+
894
+ # Gradio 내에서 Spaces GPU를 쓰는지 여부에 따라 process 함수를 감싸는 로직
895
  if IN_HF_SPACE and 'spaces' in globals():
896
  @spaces.GPU
897
  def process_with_gpu(
 
902
  global stream
903
  assert input_image is not None, "No input image given."
904
 
905
+ # 초기화
906
  yield None, None, "", "", gr.update(interactive=False), gr.update(interactive=True)
907
  try:
908
  stream = AsyncStream()
 
918
  error_message = None
919
 
920
  while True:
921
+ flag, data = stream.output_queue.next()
922
+ if flag == 'file':
923
+ output_filename = data
924
+ prev_output_filename = output_filename
925
+ yield output_filename, gr.update(), gr.update(), '', gr.update(interactive=False), gr.update(interactive=True)
926
+
927
+ elif flag == 'progress':
928
+ preview, desc, html = data
929
+ yield gr.update(), gr.update(visible=True, value=preview), desc, html, gr.update(interactive=False), gr.update(interactive=True)
930
+
931
+ elif flag == 'error':
932
+ error_message = data
933
+ print(f"Got error: {error_message}")
934
+
935
+ elif flag == 'end':
936
+ if output_filename is None and prev_output_filename:
937
+ output_filename = prev_output_filename
938
+ if error_message:
939
+ err_html = create_error_html(error_message)
940
+ yield (
941
+ output_filename, gr.update(visible=False), gr.update(),
942
+ err_html, gr.update(interactive=True), gr.update(interactive=False)
943
+ )
944
+ else:
945
+ yield (
946
+ output_filename, gr.update(visible=False), gr.update(),
947
+ '', gr.update(interactive=True), gr.update(interactive=False)
948
+ )
949
+ break
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
950
  except Exception as e:
951
  print(f"Start process error: {e}")
952
  traceback.print_exc()
 
978
  error_message = None
979
 
980
  while True:
981
+ flag, data = stream.output_queue.next()
982
+ if flag == 'file':
983
+ output_filename = data
984
+ prev_output_filename = output_filename
985
+ yield output_filename, gr.update(), gr.update(), '', gr.update(interactive=False), gr.update(interactive=True)
986
+
987
+ elif flag == 'progress':
988
+ preview, desc, html = data
989
+ yield gr.update(), gr.update(visible=True, value=preview), desc, html, gr.update(interactive=False), gr.update(interactive=True)
990
+
991
+ elif flag == 'error':
992
+ error_message = data
993
+ print(f"Got error: {error_message}")
994
+
995
+ elif flag == 'end':
996
+ if output_filename is None and prev_output_filename:
997
+ output_filename = prev_output_filename
998
+ if error_message:
999
+ err_html = create_error_html(error_message)
1000
+ yield (
1001
+ output_filename, gr.update(visible=False), gr.update(),
1002
+ err_html, gr.update(interactive=True), gr.update(interactive=False)
1003
+ )
1004
+ else:
1005
+ yield (
1006
+ output_filename, gr.update(visible=False), gr.update(),
1007
+ '', gr.update(interactive=True), gr.update(interactive=False)
1008
+ )
1009
+ break
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1010
  except Exception as e:
1011
  print(f"Start process error: {e}")
1012
  traceback.print_exc()
1013
  err_html = create_error_html(str(e))
1014
  yield None, gr.update(visible=False), gr.update(), err_html, gr.update(interactive=True), gr.update(interactive=False)
1015
 
1016
+
1017
  def end_process():
1018
  """
1019
  Stop generation by pushing 'end' to the worker queue
 
1041
  ["A character doing some simple body movements."]
1042
  ]
1043
 
 
1044
  def make_custom_css():
1045
  base_progress_css = make_progress_bar_css()
1046
  pastel_css = """
 
1141
  with gr.Row(elem_classes="mobile-full-width"):
1142
  with gr.Column(scale=1, elem_classes="gr-panel"):
1143
  input_image = gr.Image(
1144
+ label=get_translation("upload_image"),
1145
  sources='upload',
1146
  type="numpy",
1147
  elem_id="input-image",
1148
  height=320
1149
  )
1150
+ prompt = gr.Textbox(label=get_translation("prompt"), value='', elem_id="prompt-input")
1151
 
1152
  example_quick_prompts = gr.Dataset(
1153
  samples=quick_prompts,
1154
+ label=get_translation("quick_prompts"),
1155
  samples_per_page=1000,
1156
  components=[prompt]
1157
  )
 
1165
  with gr.Column(scale=1, elem_classes="gr-panel"):
1166
  with gr.Row(elem_classes="button-container"):
1167
  start_button = gr.Button(
1168
+ value=get_translation("start_generation"),
1169
  elem_id="start-button",
1170
  variant="primary"
1171
  )
1172
  end_button = gr.Button(
1173
+ value=get_translation("stop_generation"),
1174
  elem_id="stop-button",
1175
  interactive=False
1176
  )
1177
 
1178
  result_video = gr.Video(
1179
+ label=get_translation("generated_video"),
1180
  autoplay=True,
1181
  loop=True,
1182
  height=320,
 
1184
  elem_id="result-video"
1185
  )
1186
  preview_image = gr.Image(
1187
+ label=get_translation("next_latents"),
1188
  visible=False,
1189
  height=150,
1190
  elem_classes="preview-container"
 
1211
  value=31337,
1212
  precision=0
1213
  )
1214
+ # 기본값(value) = 2, 최대값(maximum) = 4
1215
  total_second_length = gr.Slider(
1216
  label=get_translation("video_length"),
1217
  minimum=1,
1218
+ maximum=4,
1219
  value=2,
1220
  step=0.1
1221
  )
 
1268
  info=get_translation("gpu_memory_info")
1269
  )
1270
 
1271
+ # 버튼 동작
1272
  ips = [
1273
  input_image, prompt, n_prompt, seed,
1274
  total_second_length, latent_window_size, steps,