framepack-i2v

Running on Zero

App Files Files Community

framepack-i2v / app.py

ginipick

Update app.py

4418d0f verified 8 days ago

raw

history blame

55.9 kB

	#############################################
	# from diffusers_helper.hf_login import login
	# 필요시 HF 로그인 사용 (주석 해제 후)
	#############################################

	import os

	os.environ['HF_HOME'] = os.path.abspath(
	os.path.realpath(os.path.join(os.path.dirname(__file__), './hf_download'))
	)

	import gradio as gr
	import torch
	import traceback
	import einops
	import safetensors.torch as sf
	import numpy as np
	import math
	import time

	# Hugging Face Spaces 환경 인지 확인
	IN_HF_SPACE = os.environ.get('SPACE_ID') is not None

	# --------- 번역 딕셔너리(영어 고정) ---------
	translations = {
	"en": {
	"title": "FramePack - Image to Video Generation",
	"upload_image": "Upload Image",
	"prompt": "Prompt",
	"quick_prompts": "Quick Prompts",
	"start_generation": "Generate",
	"stop_generation": "Stop",
	"use_teacache": "Use TeaCache",
	"teacache_info": "Faster speed, but may result in slightly worse finger and hand generation.",
	"negative_prompt": "Negative Prompt",
	"seed": "Seed",
	# 최대 4초로 UI 표기 수정
	"video_length": "Video Length (max 4 seconds)",
	"latent_window": "Latent Window Size",
	"steps": "Inference Steps",
	"steps_info": "Changing this value is not recommended.",
	"cfg_scale": "CFG Scale",
	"distilled_cfg": "Distilled CFG Scale",
	"distilled_cfg_info": "Changing this value is not recommended.",
	"cfg_rescale": "CFG Rescale",
	"gpu_memory": "GPU Memory Preservation (GB) (larger means slower)",
	"gpu_memory_info": "Set this to a larger value if you encounter OOM errors. Larger values cause slower speed.",
	"next_latents": "Next Latents",
	"generated_video": "Generated Video",
	"sampling_note": "Note: The model predicts future frames from past frames. If the start action isn't immediately visible, please wait for more frames.",
	"error_message": "Error",
	"processing_error": "Processing error",
	"network_error": "Network connection is unstable, model download timed out. Please try again later.",
	"memory_error": "GPU memory insufficient, please try increasing GPU memory preservation value or reduce video length.",
	"model_error": "Failed to load model, possibly due to network issues or high server load. Please try again later.",
	"partial_video": "Processing error, but partial video has been generated",
	"processing_interrupt": "Processing was interrupted, but partial video has been generated"
	}
	}

	def get_translation(key):
	return translations["en"].get(key, key)

	#############################################
	# diffusers_helper 관련 임포트
	#############################################
	from diffusers_helper.thread_utils import AsyncStream, async_run
	from diffusers_helper.gradio.progress_bar import make_progress_bar_css, make_progress_bar_html
	from diffusers_helper.memory import (
	cpu,
	gpu,
	get_cuda_free_memory_gb,
	move_model_to_device_with_memory_preservation,
	offload_model_from_device_for_memory_preservation,
	fake_diffusers_current_device,
	DynamicSwapInstaller,
	unload_complete_models,
	load_model_as_complete
	)
	from diffusers_helper.utils import (
	generate_timestamp,
	save_bcthw_as_mp4,
	resize_and_center_crop,
	crop_or_pad_yield_mask,
	soft_append_bcthw
	)
	from diffusers_helper.bucket_tools import find_nearest_bucket
	from diffusers_helper.hunyuan import (
	encode_prompt_conds, vae_encode, vae_decode, vae_decode_fake
	)
	from diffusers_helper.clip_vision import hf_clip_vision_encode
	from diffusers_helper.models.hunyuan_video_packed import HunyuanVideoTransformer3DModelPacked
	from diffusers_helper.pipelines.k_diffusion_hunyuan import sample_hunyuan

	from diffusers import AutoencoderKLHunyuanVideo
	from transformers import (
	LlamaModel, CLIPTextModel,
	LlamaTokenizerFast, CLIPTokenizer,
	SiglipVisionModel, SiglipImageProcessor
	)

	#############################################
	# GPU 체크
	#############################################
	GPU_AVAILABLE = torch.cuda.is_available()
	free_mem_gb = 0.0
	high_vram = False
	if GPU_AVAILABLE:
	try:
	free_mem_gb = torch.cuda.get_device_properties(0).total_memory / 1e9
	high_vram = (free_mem_gb > 60)
	except:
	pass
	print(f"GPU Available: {GPU_AVAILABLE}, free_mem_gb={free_mem_gb}, high_vram={high_vram}")

	cpu_fallback_mode = not GPU_AVAILABLE
	last_update_time = time.time()

	#############################################
	# 모델 로드 (전역)
	#############################################
	text_encoder = None
	text_encoder_2 = None
	tokenizer = None
	tokenizer_2 = None
	vae = None
	feature_extractor = None
	image_encoder = None
	transformer = None

	# 아래 로직은 질문에 제시된 '두 번째 코드'의 모델 로드 부분을 거의 그대로 사용
	def load_global_models():
	global text_encoder, text_encoder_2, tokenizer, tokenizer_2
	global vae, feature_extractor, image_encoder, transformer
	global cpu_fallback_mode

	# 이미 로드되었으면 패스
	if transformer is not None:
	return

	# GPU 메모리 정보
	device = gpu if GPU_AVAILABLE else cpu

	# diffusers_helper.memory.get_cuda_free_memory_gb(gpu)로 더 정확히 구해도 됨
	print("Loading models...")

	# ======== 실 코드: 두 번째 예시 기준 =========
	# (1) 하이브리드 (if high_vram -> GPU로 로드, 아니면 CPU + DynamicSwap)

	# 반드시 float16, bfloat16로 로드
	text_encoder_local = LlamaModel.from_pretrained(
	"hunyuanvideo-community/HunyuanVideo",
	subfolder='text_encoder',
	torch_dtype=torch.float16
	).cpu()

	text_encoder_2_local = CLIPTextModel.from_pretrained(
	"hunyuanvideo-community/HunyuanVideo",
	subfolder='text_encoder_2',
	torch_dtype=torch.float16
	).cpu()

	tokenizer_local = LlamaTokenizerFast.from_pretrained(
	"hunyuanvideo-community/HunyuanVideo",
	subfolder='tokenizer'
	)
	tokenizer_2_local = CLIPTokenizer.from_pretrained(
	"hunyuanvideo-community/HunyuanVideo",
	subfolder='tokenizer_2'
	)

	vae_local = AutoencoderKLHunyuanVideo.from_pretrained(
	"hunyuanvideo-community/HunyuanVideo",
	subfolder='vae',
	torch_dtype=torch.float16
	).cpu()

	feature_extractor_local = SiglipImageProcessor.from_pretrained(
	"lllyasviel/flux_redux_bfl", subfolder='feature_extractor'
	)
	image_encoder_local = SiglipVisionModel.from_pretrained(
	"lllyasviel/flux_redux_bfl",
	subfolder='image_encoder',
	torch_dtype=torch.float16
	).cpu()

	# FramePack_F1_I2V_HY_20250503 (bfloat16)
	transformer_local = HunyuanVideoTransformer3DModelPacked.from_pretrained(
	'lllyasviel/FramePack_F1_I2V_HY_20250503',
	torch_dtype=torch.bfloat16
	).cpu()

	# eval & dtype
	vae_local.eval()
	text_encoder_local.eval()
	text_encoder_2_local.eval()
	image_encoder_local.eval()
	transformer_local.eval()

	# VAE slicing for low VRAM
	if not high_vram:
	vae_local.enable_slicing()
	vae_local.enable_tiling()

	# 오프로드용
	transformer_local.high_quality_fp32_output_for_inference = True
	transformer_local.to(dtype=torch.bfloat16)
	vae_local.to(dtype=torch.float16)
	image_encoder_local.to(dtype=torch.float16)
	text_encoder_local.to(dtype=torch.float16)
	text_encoder_2_local.to(dtype=torch.float16)

	# requires_grad_(False)
	for m in [vae_local, text_encoder_local, text_encoder_2_local, image_encoder_local, transformer_local]:
	m.requires_grad_(False)

	# GPU 모드 & VRAM 많으면 전부 GPU
	# 그렇지 않으면 DynamicSwap
	if GPU_AVAILABLE:
	if not high_vram:
	DynamicSwapInstaller.install_model(transformer_local, device=gpu)
	DynamicSwapInstaller.install_model(text_encoder_local, device=gpu)
	else:
	text_encoder_local.to(gpu)
	text_encoder_2_local.to(gpu)
	image_encoder_local.to(gpu)
	vae_local.to(gpu)
	transformer_local.to(gpu)
	else:
	cpu_fallback_mode = True

	# 글로벌에 할당
	print("Model loaded.")
	text_encoder = text_encoder_local
	text_encoder_2 = text_encoder_2_local
	tokenizer = tokenizer_local
	tokenizer_2 = tokenizer_2_local
	vae = vae_local
	feature_extractor = feature_extractor_local
	image_encoder = image_encoder_local
	transformer = transformer_local

	#############################################
	# Worker 로직 (두 번째 코드) 그대로
	#############################################
	stream = AsyncStream()

	outputs_folder = './outputs/'
	os.makedirs(outputs_folder, exist_ok=True)

	@torch.no_grad()
	def worker(
	input_image, prompt, n_prompt, seed,
	total_second_length, latent_window_size, steps,
	cfg, gs, rs, gpu_memory_preservation, use_teacache
	):
	"""
	실제 샘플링 로직 (두 번째 코드 기반)
	"""
	load_global_models() # 모델 로딩
	global text_encoder, text_encoder_2, tokenizer, tokenizer_2
	global vae, feature_extractor, image_encoder, transformer
	global last_update_time

	# 최대 4초로 고정
	total_second_length = min(total_second_length, 4.0)

	total_latent_sections = (total_second_length * 30) / (latent_window_size * 4)
	total_latent_sections = int(max(round(total_latent_sections), 1))

	job_id = generate_timestamp()

	stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Starting ...'))))

	try:
	# GPU 적을 경우 Unload
	if not high_vram and GPU_AVAILABLE:
	unload_complete_models(
	text_encoder, text_encoder_2, image_encoder, vae, transformer
	)

	# Text encoding
	stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Text encoding ...'))))

	if not high_vram and GPU_AVAILABLE:
	fake_diffusers_current_device(text_encoder, gpu)
	load_model_as_complete(text_encoder_2, target_device=gpu)

	llama_vec, clip_l_pooler = encode_prompt_conds(prompt, text_encoder, text_encoder_2, tokenizer, tokenizer_2)
	if cfg == 1.0:
	llama_vec_n, clip_l_pooler_n = torch.zeros_like(llama_vec), torch.zeros_like(clip_l_pooler)
	else:
	llama_vec_n, clip_l_pooler_n = encode_prompt_conds(n_prompt, text_encoder, text_encoder_2, tokenizer, tokenizer_2)

	llama_vec, llama_mask = crop_or_pad_yield_mask(llama_vec, length=512)
	llama_vec_n, llama_mask_n = crop_or_pad_yield_mask(llama_vec_n, length=512)

	# Image processing
	stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Image processing ...'))))

	H, W, C = input_image.shape
	height, width = find_nearest_bucket(H, W, resolution=640)

	if cpu_fallback_mode:
	height = min(height, 320)
	width = min(width, 320)

	input_image_np = resize_and_center_crop(input_image, target_width=width, target_height=height)

	Image.fromarray(input_image_np).save(os.path.join(outputs_folder, f'{job_id}.png'))

	input_image_pt = torch.from_numpy(input_image_np).float() / 127.5 - 1
	input_image_pt = input_image_pt.permute(2, 0, 1)[None, :, None]

	# VAE encode
	stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'VAE encoding ...'))))

	if not high_vram and GPU_AVAILABLE:
	load_model_as_complete(vae, target_device=gpu)
	start_latent = vae_encode(input_image_pt, vae)

	# CLIP Vision
	stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'CLIP Vision encoding ...'))))

	if not high_vram and GPU_AVAILABLE:
	load_model_as_complete(image_encoder, target_device=gpu)
	image_encoder_output = hf_clip_vision_encode(input_image_np, feature_extractor, image_encoder)
	image_encoder_last_hidden_state = image_encoder_output.last_hidden_state

	# dtype
	llama_vec = llama_vec.to(transformer.dtype)
	llama_vec_n = llama_vec_n.to(transformer.dtype)
	clip_l_pooler = clip_l_pooler.to(transformer.dtype)
	clip_l_pooler_n = clip_l_pooler_n.to(transformer.dtype)
	image_encoder_last_hidden_state = image_encoder_last_hidden_state.to(transformer.dtype)

	# Start sampling
	stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Start sampling ...'))))

	rnd = torch.Generator("cpu").manual_seed(seed)

	# 초기 history latents
	history_latents = torch.zeros(size=(1, 16, 16 + 2 + 1, height // 8, width // 8), dtype=torch.float32).cpu()
	history_pixels = None

	# start_latent 붙이기
	history_latents = torch.cat([history_latents, start_latent.to(history_latents)], dim=2)
	total_generated_latent_frames = 1

	for section_index in range(total_latent_sections):
	if stream.input_queue.top() == 'end':
	stream.output_queue.push(('end', None))
	return

	print(f'Section {section_index+1}/{total_latent_sections}')

	if not high_vram and GPU_AVAILABLE:
	unload_complete_models()
	move_model_to_device_with_memory_preservation(transformer, target_device=gpu, preserved_memory_gb=gpu_memory_preservation)

	# teacache
	if use_teacache:
	transformer.initialize_teacache(enable_teacache=True, num_steps=steps)
	else:
	transformer.initialize_teacache(enable_teacache=False)

	def callback(d):
	preview = d['denoised']
	preview = vae_decode_fake(preview)
	preview = (preview * 255.0).detach().cpu().numpy().clip(0, 255).astype(np.uint8)
	preview = einops.rearrange(preview, 'b c t h w -> (b h) (t w) c')

	if stream.input_queue.top() == 'end':
	stream.output_queue.push(('end', None))
	raise KeyboardInterrupt('User stops generation.')

	current_step = d['i'] + 1
	percentage = int(100.0 * current_step / steps)
	hint = f'Sampling {current_step}/{steps}'
	desc = f'Section {section_index+1}/{total_latent_sections}'
	stream.output_queue.push(('progress', (preview, desc, make_progress_bar_html(percentage, hint))))
	return

	# indices
	frames_per_section = latent_window_size * 4 - 3
	indices = torch.arange(0, sum([1, 16, 2, 1, latent_window_size])).unsqueeze(0)
	(
	clean_latent_indices_start,
	clean_latent_4x_indices,
	clean_latent_2x_indices,
	clean_latent_1x_indices,
	latent_indices
	) = indices.split([1, 16, 2, 1, latent_window_size], dim=1)

	clean_latent_indices = torch.cat([clean_latent_indices_start, clean_latent_1x_indices], dim=1)

	clean_latents_4x, clean_latents_2x, clean_latents_1x = history_latents[:, :, -19:, :, :].split([16, 2, 1], dim=2)
	clean_latents = torch.cat([start_latent.to(history_latents), clean_latents_1x], dim=2)

	try:
	generated_latents = sample_hunyuan(
	transformer=transformer,
	sampler='unipc',
	width=width,
	height=height,
	frames=frames_per_section,
	real_guidance_scale=cfg,
	distilled_guidance_scale=gs,
	guidance_rescale=rs,
	num_inference_steps=steps,
	generator=rnd,
	prompt_embeds=llama_vec,
	prompt_embeds_mask=llama_mask,
	prompt_poolers=clip_l_pooler,
	negative_prompt_embeds=llama_vec_n,
	negative_prompt_embeds_mask=llama_mask_n,
	negative_prompt_poolers=clip_l_pooler_n,
	device=gpu if GPU_AVAILABLE else cpu,
	dtype=torch.bfloat16,
	image_embeddings=image_encoder_last_hidden_state,
	latent_indices=latent_indices,
	clean_latents=clean_latents,
	clean_latent_indices=clean_latent_indices,
	clean_latents_2x=clean_latents_2x,
	clean_latent_2x_indices=clean_latent_2x_indices,
	clean_latents_4x=clean_latents_4x,
	clean_latent_4x_indices=clean_latent_4x_indices,
	callback=callback
	)
	except KeyboardInterrupt:
	print("User cancelled.")
	stream.output_queue.push(('end', None))
	return
	except Exception as e:
	traceback.print_exc()
	stream.output_queue.push(('end', None))
	return

	total_generated_latent_frames += generated_latents.shape[2]
	history_latents = torch.cat([history_latents, generated_latents.to(history_latents)], dim=2)

	if not high_vram and GPU_AVAILABLE:
	offload_model_from_device_for_memory_preservation(transformer, target_device=gpu, preserved_memory_gb=8)
	load_model_as_complete(vae, target_device=gpu)

	real_history_latents = history_latents[:, :, -total_generated_latent_frames:, :, :]

	if history_pixels is None:
	history_pixels = vae_decode(real_history_latents, vae).cpu()
	else:
	section_latent_frames = latent_window_size * 2
	overlapped_frames = frames_per_section
	current_pixels = vae_decode(real_history_latents[:, :, -section_latent_frames:], vae).cpu()
	history_pixels = soft_append_bcthw(history_pixels, current_pixels, overlapped_frames)

	if not high_vram and GPU_AVAILABLE:
	unload_complete_models()

	output_filename = os.path.join(outputs_folder, f'{job_id}_{total_generated_latent_frames}.mp4')
	save_bcthw_as_mp4(history_pixels, output_filename, fps=30, crf=16) # CRF=16

	stream.output_queue.push(('file', output_filename))

	except:
	traceback.print_exc()
	if not high_vram and GPU_AVAILABLE:
	unload_complete_models(text_encoder, text_encoder_2, image_encoder, vae, transformer)

	stream.output_queue.push(('end', None))
	return

	def end_process():
	"""
	중단 요청
	"""
	global stream
	stream.input_queue.push('end')

	# Gradio에서 이 worker 함수를 비동기로 호출
	def process(
	input_image, prompt, n_prompt, seed,
	total_second_length, latent_window_size, steps,
	cfg, gs, rs, gpu_memory_preservation, use_teacache
	):
	global stream
	if input_image is None:
	raise ValueError("No input image provided.")

	yield None, None, "", "", gr.update(interactive=False), gr.update(interactive=True)

	stream = AsyncStream()
	async_run(
	worker,
	input_image, prompt, n_prompt, seed,
	total_second_length, latent_window_size, steps,
	cfg, gs, rs, gpu_memory_preservation, use_teacache
	)

	output_filename = None
	prev_filename = None
	error_message = None

	while True:
	flag, data = stream.output_queue.next()
	if flag == 'file':
	output_filename = data
	prev_filename = output_filename
	yield output_filename, gr.update(), gr.update(), "", gr.update(interactive=False), gr.update(interactive=True)

	elif flag == 'progress':
	preview, desc, html = data
	yield gr.update(), gr.update(visible=True, value=preview), desc, html, gr.update(interactive=False), gr.update(interactive=True)

	elif flag == 'error':
	error_message = data
	print(f"Error: {error_message}")

	elif flag == 'end':
	if output_filename is None and prev_filename:
	output_filename = prev_filename
	# 에러가 있었으면 에러 표시
	if error_message:
	yield (
	output_filename, # 마지막 파일 (또는 None)
	gr.update(visible=False),
	gr.update(),
	f"<div style='color:red;'>{error_message}</div>",
	gr.update(interactive=True),
	gr.update(interactive=False)
	)
	else:
	yield (
	output_filename, gr.update(visible=False), gr.update(), "", gr.update(interactive=True), gr.update(interactive=False)
	)
	break

	# UI CSS
	def make_custom_css():
	base_progress_css = make_progress_bar_css()
	pastel_css = """
	body {
	background: #faf9ff !important;
	font-family: "Noto Sans", sans-serif;
	}
	#app-container {
	max-width: 1200px;
	margin: 0 auto;
	padding: 1rem;
	position: relative;
	}
	#app-container h1 {
	color: #5F5AA2;
	margin-bottom: 1.2rem;
	font-weight: 700;
	text-shadow: 1px 1px 2px #bbb;
	}
	.gr-panel {
	background: #ffffffcc;
	border: 1px solid #e1dff0;
	border-radius: 8px;
	padding: 1rem;
	box-shadow: 0 1px 3px rgba(0,0,0,0.1);
	}
	.button-container button {
	min-height: 45px;
	font-size: 1rem;
	font-weight: 600;
	border-radius: 6px;
	}
	.button-container button#start-button {
	background-color: #A289E3 !important;
	color: #fff !important;
	border: 1px solid #a58de2;
	}
	.button-container button#stop-button {
	background-color: #F48A9B !important;
	color: #fff !important;
	border: 1px solid #f18fa0;
	}
	.button-container button:hover {
	filter: brightness(0.95);
	}
	.preview-container, .video-container {
	border: 1px solid #ded9f2;
	border-radius: 8px;
	overflow: hidden;
	}
	.progress-container {
	margin-top: 15px;
	margin-bottom: 15px;
	}
	.error-message {
	background-color: #FFF5F5;
	border: 1px solid #FED7D7;
	color: #E53E3E;
	padding: 10px;
	border-radius: 4px;
	margin-top: 10px;
	font-weight: 500;
	}
	@media (max-width: 768px) {
	#app-container {
	padding: 0.5rem;
	}
	.mobile-full-width {
	flex-direction: column !important;
	}
	.mobile-full-width > .gr-block {
	width: 100% !important;
	}
	}
	"""
	return base_progress_css + pastel_css

	css = make_custom_css()

	# 샘플 프롬프트
	quick_prompts = [
	["The girl dances gracefully, with clear movements, full of charm."],
	["A character doing some simple body movements."]
	]

	# Gradio UI
	block = gr.Blocks(css=css).queue()
	with block:
	gr.HTML("<div id='app-container'><h1>FramePack - Image to Video Generation</h1></div>")

	with gr.Row(elem_classes="mobile-full-width"):
	# 왼쪽
	with gr.Column(scale=1, elem_classes="gr-panel"):
	input_image = gr.Image(
	label=get_translation("upload_image"),
	type="numpy",
	height=320
	)
	prompt = gr.Textbox(
	label=get_translation("prompt"),
	value=''
	)

	example_quick_prompts = gr.Dataset(
	samples=quick_prompts,
	label=get_translation("quick_prompts"),
	samples_per_page=1000,
	components=[prompt]
	)
	example_quick_prompts.click(
	fn=lambda x: x[0],
	inputs=[example_quick_prompts],
	outputs=prompt,
	show_progress=False,
	queue=False
	)

	# 오른쪽
	with gr.Column(scale=1, elem_classes="gr-panel"):
	with gr.Row(elem_classes="button-container"):
	start_button = gr.Button(
	value=get_translation("start_generation"),
	elem_id="start-button",
	variant="primary"
	)
	stop_button = gr.Button(
	value=get_translation("stop_generation"),
	elem_id="stop-button",
	interactive=False
	)

	result_video = gr.Video(
	label=get_translation("generated_video"),
	autoplay=True,
	loop=True,
	height=320,
	elem_classes="video-container"
	)
	preview_image = gr.Image(
	label=get_translation("next_latents"),
	visible=False,
	height=150,
	elem_classes="preview-container"
	)
	gr.Markdown(get_translation("sampling_note"))

	with gr.Group(elem_classes="progress-container"):
	progress_desc = gr.Markdown('')
	progress_bar = gr.HTML('')

	error_message = gr.HTML('', visible=True)

	# Advanced
	with gr.Accordion("Advanced Settings", open=False, elem_classes="gr-panel"):
	use_teacache = gr.Checkbox(
	label=get_translation("use_teacache"),
	value=True,
	info=get_translation("teacache_info")
	)
	n_prompt = gr.Textbox(label=get_translation("negative_prompt"), value="", visible=False)
	seed = gr.Number(
	label=get_translation("seed"),
	value=31337,
	precision=0
	)
	# 기본 2초, 최대 4초
	total_second_length = gr.Slider(
	label=get_translation("video_length"),
	minimum=1,
	maximum=4,
	value=2,
	step=0.1
	)
	latent_window_size = gr.Slider(
	label=get_translation("latent_window"),
	minimum=1,
	maximum=33,
	value=9,
	step=1,
	visible=False
	)
	steps = gr.Slider(
	label=get_translation("steps"),
	minimum=1,
	maximum=100,
	value=25,
	step=1,
	info=get_translation("steps_info")
	)
	cfg = gr.Slider(
	label=get_translation("cfg_scale"),
	minimum=1.0,
	maximum=32.0,
	value=1.0,
	step=0.01,
	visible=False
	)
	gs = gr.Slider(
	label=get_translation("distilled_cfg"),
	minimum=1.0,
	maximum=32.0,
	value=10.0,
	step=0.01,
	info=get_translation("distilled_cfg_info")
	)
	rs = gr.Slider(
	label=get_translation("cfg_rescale"),
	minimum=0.0,
	maximum=1.0,
	value=0.0,
	step=0.01,
	visible=False
	)
	gpu_memory_preservation = gr.Slider(
	label=get_translation("gpu_memory"),
	minimum=6,
	maximum=128,
	value=6,
	step=0.1,
	info=get_translation("gpu_memory_info")
	)

	# 버튼 처리
	inputs_list = [
	input_image, prompt, n_prompt, seed,
	total_second_length, latent_window_size, steps,
	cfg, gs, rs, gpu_memory_preservation, use_teacache
	]
	start_button.click(
	fn=process,
	inputs=inputs_list,
	outputs=[result_video, preview_image, progress_desc, progress_bar, start_button, stop_button]
	)
	stop_button.click(fn=end_process)

	block.launch()
	#############################################
	# from diffusers_helper.hf_login import login
	# 필요시 HF 로그인 사용 (주석 해제 후)
	#############################################

	import os

	os.environ['HF_HOME'] = os.path.abspath(
	os.path.realpath(os.path.join(os.path.dirname(__file__), './hf_download'))
	)

	import gradio as gr
	import torch
	import traceback
	import einops
	import safetensors.torch as sf
	import numpy as np
	import math
	import time

	# Hugging Face Spaces 환경 인지 확인
	IN_HF_SPACE = os.environ.get('SPACE_ID') is not None

	# --------- 번역 딕셔너리(영어 고정) ---------
	translations = {
	"en": {
	"title": "FramePack - Image to Video Generation",
	"upload_image": "Upload Image",
	"prompt": "Prompt",
	"quick_prompts": "Quick Prompts",
	"start_generation": "Generate",
	"stop_generation": "Stop",
	"use_teacache": "Use TeaCache",
	"teacache_info": "Faster speed, but may result in slightly worse finger and hand generation.",
	"negative_prompt": "Negative Prompt",
	"seed": "Seed",
	# 최대 4초로 UI 표기 수정
	"video_length": "Video Length (max 4 seconds)",
	"latent_window": "Latent Window Size",
	"steps": "Inference Steps",
	"steps_info": "Changing this value is not recommended.",
	"cfg_scale": "CFG Scale",
	"distilled_cfg": "Distilled CFG Scale",
	"distilled_cfg_info": "Changing this value is not recommended.",
	"cfg_rescale": "CFG Rescale",
	"gpu_memory": "GPU Memory Preservation (GB) (larger means slower)",
	"gpu_memory_info": "Set this to a larger value if you encounter OOM errors. Larger values cause slower speed.",
	"next_latents": "Next Latents",
	"generated_video": "Generated Video",
	"sampling_note": "Note: The model predicts future frames from past frames. If the start action isn't immediately visible, please wait for more frames.",
	"error_message": "Error",
	"processing_error": "Processing error",
	"network_error": "Network connection is unstable, model download timed out. Please try again later.",
	"memory_error": "GPU memory insufficient, please try increasing GPU memory preservation value or reduce video length.",
	"model_error": "Failed to load model, possibly due to network issues or high server load. Please try again later.",
	"partial_video": "Processing error, but partial video has been generated",
	"processing_interrupt": "Processing was interrupted, but partial video has been generated"
	}
	}

	def get_translation(key):
	return translations["en"].get(key, key)

	#############################################
	# diffusers_helper 관련 임포트
	#############################################
	from diffusers_helper.thread_utils import AsyncStream, async_run
	from diffusers_helper.gradio.progress_bar import make_progress_bar_css, make_progress_bar_html
	from diffusers_helper.memory import (
	cpu,
	gpu,
	get_cuda_free_memory_gb,
	move_model_to_device_with_memory_preservation,
	offload_model_from_device_for_memory_preservation,
	fake_diffusers_current_device,
	DynamicSwapInstaller,
	unload_complete_models,
	load_model_as_complete
	)
	from diffusers_helper.utils import (
	generate_timestamp,
	save_bcthw_as_mp4,
	resize_and_center_crop,
	crop_or_pad_yield_mask,
	soft_append_bcthw
	)
	from diffusers_helper.bucket_tools import find_nearest_bucket
	from diffusers_helper.hunyuan import (
	encode_prompt_conds, vae_encode, vae_decode, vae_decode_fake
	)
	from diffusers_helper.clip_vision import hf_clip_vision_encode
	from diffusers_helper.models.hunyuan_video_packed import HunyuanVideoTransformer3DModelPacked
	from diffusers_helper.pipelines.k_diffusion_hunyuan import sample_hunyuan

	from diffusers import AutoencoderKLHunyuanVideo
	from transformers import (
	LlamaModel, CLIPTextModel,
	LlamaTokenizerFast, CLIPTokenizer,
	SiglipVisionModel, SiglipImageProcessor
	)

	#############################################
	# GPU 체크
	#############################################
	GPU_AVAILABLE = torch.cuda.is_available()
	free_mem_gb = 0.0
	high_vram = False
	if GPU_AVAILABLE:
	try:
	free_mem_gb = torch.cuda.get_device_properties(0).total_memory / 1e9
	high_vram = (free_mem_gb > 60)
	except:
	pass
	print(f"GPU Available: {GPU_AVAILABLE}, free_mem_gb={free_mem_gb}, high_vram={high_vram}")

	cpu_fallback_mode = not GPU_AVAILABLE
	last_update_time = time.time()

	#############################################
	# 모델 로드 (전역)
	#############################################
	text_encoder = None
	text_encoder_2 = None
	tokenizer = None
	tokenizer_2 = None
	vae = None
	feature_extractor = None
	image_encoder = None
	transformer = None

	# 아래 로직은 질문에 제시된 '두 번째 코드'의 모델 로드 부분을 거의 그대로 사용
	def load_global_models():
	global text_encoder, text_encoder_2, tokenizer, tokenizer_2
	global vae, feature_extractor, image_encoder, transformer
	global cpu_fallback_mode

	# 이미 로드되었으면 패스
	if transformer is not None:
	return

	# GPU 메모리 정보
	device = gpu if GPU_AVAILABLE else cpu

	# diffusers_helper.memory.get_cuda_free_memory_gb(gpu)로 더 정확히 구해도 됨
	print("Loading models...")

	# ======== 실 코드: 두 번째 예시 기준 =========
	# (1) 하이브리드 (if high_vram -> GPU로 로드, 아니면 CPU + DynamicSwap)

	# 반드시 float16, bfloat16로 로드
	text_encoder_local = LlamaModel.from_pretrained(
	"hunyuanvideo-community/HunyuanVideo",
	subfolder='text_encoder',
	torch_dtype=torch.float16
	).cpu()

	text_encoder_2_local = CLIPTextModel.from_pretrained(
	"hunyuanvideo-community/HunyuanVideo",
	subfolder='text_encoder_2',
	torch_dtype=torch.float16
	).cpu()

	tokenizer_local = LlamaTokenizerFast.from_pretrained(
	"hunyuanvideo-community/HunyuanVideo",
	subfolder='tokenizer'
	)
	tokenizer_2_local = CLIPTokenizer.from_pretrained(
	"hunyuanvideo-community/HunyuanVideo",
	subfolder='tokenizer_2'
	)

	vae_local = AutoencoderKLHunyuanVideo.from_pretrained(
	"hunyuanvideo-community/HunyuanVideo",
	subfolder='vae',
	torch_dtype=torch.float16
	).cpu()

	feature_extractor_local = SiglipImageProcessor.from_pretrained(
	"lllyasviel/flux_redux_bfl", subfolder='feature_extractor'
	)
	image_encoder_local = SiglipVisionModel.from_pretrained(
	"lllyasviel/flux_redux_bfl",
	subfolder='image_encoder',
	torch_dtype=torch.float16
	).cpu()

	# FramePack_F1_I2V_HY_20250503 (bfloat16)
	transformer_local = HunyuanVideoTransformer3DModelPacked.from_pretrained(
	'lllyasviel/FramePack_F1_I2V_HY_20250503',
	torch_dtype=torch.bfloat16
	).cpu()

	# eval & dtype
	vae_local.eval()
	text_encoder_local.eval()
	text_encoder_2_local.eval()
	image_encoder_local.eval()
	transformer_local.eval()

	# VAE slicing for low VRAM
	if not high_vram:
	vae_local.enable_slicing()
	vae_local.enable_tiling()

	# 오프로드용
	transformer_local.high_quality_fp32_output_for_inference = True
	transformer_local.to(dtype=torch.bfloat16)
	vae_local.to(dtype=torch.float16)
	image_encoder_local.to(dtype=torch.float16)
	text_encoder_local.to(dtype=torch.float16)
	text_encoder_2_local.to(dtype=torch.float16)

	# requires_grad_(False)
	for m in [vae_local, text_encoder_local, text_encoder_2_local, image_encoder_local, transformer_local]:
	m.requires_grad_(False)

	# GPU 모드 & VRAM 많으면 전부 GPU
	# 그렇지 않으면 DynamicSwap
	if GPU_AVAILABLE:
	if not high_vram:
	DynamicSwapInstaller.install_model(transformer_local, device=gpu)
	DynamicSwapInstaller.install_model(text_encoder_local, device=gpu)
	else:
	text_encoder_local.to(gpu)
	text_encoder_2_local.to(gpu)
	image_encoder_local.to(gpu)
	vae_local.to(gpu)
	transformer_local.to(gpu)
	else:
	cpu_fallback_mode = True

	# 글로벌에 할당
	print("Model loaded.")
	text_encoder = text_encoder_local
	text_encoder_2 = text_encoder_2_local
	tokenizer = tokenizer_local
	tokenizer_2 = tokenizer_2_local
	vae = vae_local
	feature_extractor = feature_extractor_local
	image_encoder = image_encoder_local
	transformer = transformer_local

	#############################################
	# Worker 로직 (두 번째 코드) 그대로
	#############################################
	stream = AsyncStream()

	outputs_folder = './outputs/'
	os.makedirs(outputs_folder, exist_ok=True)

	@torch.no_grad()
	def worker(
	input_image, prompt, n_prompt, seed,
	total_second_length, latent_window_size, steps,
	cfg, gs, rs, gpu_memory_preservation, use_teacache
	):
	"""
	실제 샘플링 로직 (두 번째 코드 기반)
	"""
	load_global_models() # 모델 로딩
	global text_encoder, text_encoder_2, tokenizer, tokenizer_2
	global vae, feature_extractor, image_encoder, transformer
	global last_update_time

	# 최대 4초로 고정
	total_second_length = min(total_second_length, 4.0)

	total_latent_sections = (total_second_length * 30) / (latent_window_size * 4)
	total_latent_sections = int(max(round(total_latent_sections), 1))

	job_id = generate_timestamp()

	stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Starting ...'))))

	try:
	# GPU 적을 경우 Unload
	if not high_vram and GPU_AVAILABLE:
	unload_complete_models(
	text_encoder, text_encoder_2, image_encoder, vae, transformer
	)

	# Text encoding
	stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Text encoding ...'))))

	if not high_vram and GPU_AVAILABLE:
	fake_diffusers_current_device(text_encoder, gpu)
	load_model_as_complete(text_encoder_2, target_device=gpu)

	llama_vec, clip_l_pooler = encode_prompt_conds(prompt, text_encoder, text_encoder_2, tokenizer, tokenizer_2)
	if cfg == 1.0:
	llama_vec_n, clip_l_pooler_n = torch.zeros_like(llama_vec), torch.zeros_like(clip_l_pooler)
	else:
	llama_vec_n, clip_l_pooler_n = encode_prompt_conds(n_prompt, text_encoder, text_encoder_2, tokenizer, tokenizer_2)

	llama_vec, llama_mask = crop_or_pad_yield_mask(llama_vec, length=512)
	llama_vec_n, llama_mask_n = crop_or_pad_yield_mask(llama_vec_n, length=512)

	# Image processing
	stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Image processing ...'))))

	H, W, C = input_image.shape
	height, width = find_nearest_bucket(H, W, resolution=640)

	if cpu_fallback_mode:
	height = min(height, 320)
	width = min(width, 320)

	input_image_np = resize_and_center_crop(input_image, target_width=width, target_height=height)

	Image.fromarray(input_image_np).save(os.path.join(outputs_folder, f'{job_id}.png'))

	input_image_pt = torch.from_numpy(input_image_np).float() / 127.5 - 1
	input_image_pt = input_image_pt.permute(2, 0, 1)[None, :, None]

	# VAE encode
	stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'VAE encoding ...'))))

	if not high_vram and GPU_AVAILABLE:
	load_model_as_complete(vae, target_device=gpu)
	start_latent = vae_encode(input_image_pt, vae)

	# CLIP Vision
	stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'CLIP Vision encoding ...'))))

	if not high_vram and GPU_AVAILABLE:
	load_model_as_complete(image_encoder, target_device=gpu)
	image_encoder_output = hf_clip_vision_encode(input_image_np, feature_extractor, image_encoder)
	image_encoder_last_hidden_state = image_encoder_output.last_hidden_state

	# dtype
	llama_vec = llama_vec.to(transformer.dtype)
	llama_vec_n = llama_vec_n.to(transformer.dtype)
	clip_l_pooler = clip_l_pooler.to(transformer.dtype)
	clip_l_pooler_n = clip_l_pooler_n.to(transformer.dtype)
	image_encoder_last_hidden_state = image_encoder_last_hidden_state.to(transformer.dtype)

	# Start sampling
	stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Start sampling ...'))))

	rnd = torch.Generator("cpu").manual_seed(seed)

	# 초기 history latents
	history_latents = torch.zeros(size=(1, 16, 16 + 2 + 1, height // 8, width // 8), dtype=torch.float32).cpu()
	history_pixels = None

	# start_latent 붙이기
	history_latents = torch.cat([history_latents, start_latent.to(history_latents)], dim=2)
	total_generated_latent_frames = 1

	for section_index in range(total_latent_sections):
	if stream.input_queue.top() == 'end':
	stream.output_queue.push(('end', None))
	return

	print(f'Section {section_index+1}/{total_latent_sections}')

	if not high_vram and GPU_AVAILABLE:
	unload_complete_models()
	move_model_to_device_with_memory_preservation(transformer, target_device=gpu, preserved_memory_gb=gpu_memory_preservation)

	# teacache
	if use_teacache:
	transformer.initialize_teacache(enable_teacache=True, num_steps=steps)
	else:
	transformer.initialize_teacache(enable_teacache=False)

	def callback(d):
	preview = d['denoised']
	preview = vae_decode_fake(preview)
	preview = (preview * 255.0).detach().cpu().numpy().clip(0, 255).astype(np.uint8)
	preview = einops.rearrange(preview, 'b c t h w -> (b h) (t w) c')

	if stream.input_queue.top() == 'end':
	stream.output_queue.push(('end', None))
	raise KeyboardInterrupt('User stops generation.')

	current_step = d['i'] + 1
	percentage = int(100.0 * current_step / steps)
	hint = f'Sampling {current_step}/{steps}'
	desc = f'Section {section_index+1}/{total_latent_sections}'
	stream.output_queue.push(('progress', (preview, desc, make_progress_bar_html(percentage, hint))))
	return

	# indices
	frames_per_section = latent_window_size * 4 - 3
	indices = torch.arange(0, sum([1, 16, 2, 1, latent_window_size])).unsqueeze(0)
	(
	clean_latent_indices_start,
	clean_latent_4x_indices,
	clean_latent_2x_indices,
	clean_latent_1x_indices,
	latent_indices
	) = indices.split([1, 16, 2, 1, latent_window_size], dim=1)

	clean_latent_indices = torch.cat([clean_latent_indices_start, clean_latent_1x_indices], dim=1)

	clean_latents_4x, clean_latents_2x, clean_latents_1x = history_latents[:, :, -19:, :, :].split([16, 2, 1], dim=2)
	clean_latents = torch.cat([start_latent.to(history_latents), clean_latents_1x], dim=2)

	try:
	generated_latents = sample_hunyuan(
	transformer=transformer,
	sampler='unipc',
	width=width,
	height=height,
	frames=frames_per_section,
	real_guidance_scale=cfg,
	distilled_guidance_scale=gs,
	guidance_rescale=rs,
	num_inference_steps=steps,
	generator=rnd,
	prompt_embeds=llama_vec,
	prompt_embeds_mask=llama_mask,
	prompt_poolers=clip_l_pooler,
	negative_prompt_embeds=llama_vec_n,
	negative_prompt_embeds_mask=llama_mask_n,
	negative_prompt_poolers=clip_l_pooler_n,
	device=gpu if GPU_AVAILABLE else cpu,
	dtype=torch.bfloat16,
	image_embeddings=image_encoder_last_hidden_state,
	latent_indices=latent_indices,
	clean_latents=clean_latents,
	clean_latent_indices=clean_latent_indices,
	clean_latents_2x=clean_latents_2x,
	clean_latent_2x_indices=clean_latent_2x_indices,
	clean_latents_4x=clean_latents_4x,
	clean_latent_4x_indices=clean_latent_4x_indices,
	callback=callback
	)
	except KeyboardInterrupt:
	print("User cancelled.")
	stream.output_queue.push(('end', None))
	return
	except Exception as e:
	traceback.print_exc()
	stream.output_queue.push(('end', None))
	return

	total_generated_latent_frames += generated_latents.shape[2]
	history_latents = torch.cat([history_latents, generated_latents.to(history_latents)], dim=2)

	if not high_vram and GPU_AVAILABLE:
	offload_model_from_device_for_memory_preservation(transformer, target_device=gpu, preserved_memory_gb=8)
	load_model_as_complete(vae, target_device=gpu)

	real_history_latents = history_latents[:, :, -total_generated_latent_frames:, :, :]

	if history_pixels is None:
	history_pixels = vae_decode(real_history_latents, vae).cpu()
	else:
	section_latent_frames = latent_window_size * 2
	overlapped_frames = frames_per_section
	current_pixels = vae_decode(real_history_latents[:, :, -section_latent_frames:], vae).cpu()
	history_pixels = soft_append_bcthw(history_pixels, current_pixels, overlapped_frames)

	if not high_vram and GPU_AVAILABLE:
	unload_complete_models()

	output_filename = os.path.join(outputs_folder, f'{job_id}_{total_generated_latent_frames}.mp4')
	save_bcthw_as_mp4(history_pixels, output_filename, fps=30, crf=16) # CRF=16

	stream.output_queue.push(('file', output_filename))

	except:
	traceback.print_exc()
	if not high_vram and GPU_AVAILABLE:
	unload_complete_models(text_encoder, text_encoder_2, image_encoder, vae, transformer)

	stream.output_queue.push(('end', None))
	return

	def end_process():
	"""
	중단 요청
	"""
	global stream
	stream.input_queue.push('end')

	# Gradio에서 이 worker 함수를 비동기로 호출
	def process(
	input_image, prompt, n_prompt, seed,
	total_second_length, latent_window_size, steps,
	cfg, gs, rs, gpu_memory_preservation, use_teacache
	):
	global stream
	if input_image is None:
	raise ValueError("No input image provided.")

	yield None, None, "", "", gr.update(interactive=False), gr.update(interactive=True)

	stream = AsyncStream()
	async_run(
	worker,
	input_image, prompt, n_prompt, seed,
	total_second_length, latent_window_size, steps,
	cfg, gs, rs, gpu_memory_preservation, use_teacache
	)

	output_filename = None
	prev_filename = None
	error_message = None

	while True:
	flag, data = stream.output_queue.next()
	if flag == 'file':
	output_filename = data
	prev_filename = output_filename
	yield output_filename, gr.update(), gr.update(), "", gr.update(interactive=False), gr.update(interactive=True)

	elif flag == 'progress':
	preview, desc, html = data
	yield gr.update(), gr.update(visible=True, value=preview), desc, html, gr.update(interactive=False), gr.update(interactive=True)

	elif flag == 'error':
	error_message = data
	print(f"Error: {error_message}")

	elif flag == 'end':
	if output_filename is None and prev_filename:
	output_filename = prev_filename
	# 에러가 있었으면 에러 표시
	if error_message:
	yield (
	output_filename, # 마지막 파일 (또는 None)
	gr.update(visible=False),
	gr.update(),
	f"<div style='color:red;'>{error_message}</div>",
	gr.update(interactive=True),
	gr.update(interactive=False)
	)
	else:
	yield (
	output_filename, gr.update(visible=False), gr.update(), "", gr.update(interactive=True), gr.update(interactive=False)
	)
	break

	# UI CSS
	def make_custom_css():
	base_progress_css = make_progress_bar_css()
	pastel_css = """
	body {
	background: #faf9ff !important;
	font-family: "Noto Sans", sans-serif;
	}
	#app-container {
	max-width: 1200px;
	margin: 0 auto;
	padding: 1rem;
	position: relative;
	}
	#app-container h1 {
	color: #5F5AA2;
	margin-bottom: 1.2rem;
	font-weight: 700;
	text-shadow: 1px 1px 2px #bbb;
	}
	.gr-panel {
	background: #ffffffcc;
	border: 1px solid #e1dff0;
	border-radius: 8px;
	padding: 1rem;
	box-shadow: 0 1px 3px rgba(0,0,0,0.1);
	}
	.button-container button {
	min-height: 45px;
	font-size: 1rem;
	font-weight: 600;
	border-radius: 6px;
	}
	.button-container button#start-button {
	background-color: #A289E3 !important;
	color: #fff !important;
	border: 1px solid #a58de2;
	}
	.button-container button#stop-button {
	background-color: #F48A9B !important;
	color: #fff !important;
	border: 1px solid #f18fa0;
	}
	.button-container button:hover {
	filter: brightness(0.95);
	}
	.preview-container, .video-container {
	border: 1px solid #ded9f2;
	border-radius: 8px;
	overflow: hidden;
	}
	.progress-container {
	margin-top: 15px;
	margin-bottom: 15px;
	}
	.error-message {
	background-color: #FFF5F5;
	border: 1px solid #FED7D7;
	color: #E53E3E;
	padding: 10px;
	border-radius: 4px;
	margin-top: 10px;
	font-weight: 500;
	}
	@media (max-width: 768px) {
	#app-container {
	padding: 0.5rem;
	}
	.mobile-full-width {
	flex-direction: column !important;
	}
	.mobile-full-width > .gr-block {
	width: 100% !important;
	}
	}
	"""
	return base_progress_css + pastel_css

	css = make_custom_css()

	# 샘플 프롬프트
	quick_prompts = [
	["The girl dances gracefully, with clear movements, full of charm."],
	["A character doing some simple body movements."]
	]

	# Gradio UI
	block = gr.Blocks(css=css).queue()
	with block:
	gr.HTML("<div id='app-container'><h1>FramePack - Image to Video Generation</h1></div>")

	with gr.Row(elem_classes="mobile-full-width"):
	# 왼쪽
	with gr.Column(scale=1, elem_classes="gr-panel"):
	input_image = gr.Image(
	label=get_translation("upload_image"),
	type="numpy",
	height=320
	)
	prompt = gr.Textbox(
	label=get_translation("prompt"),
	value=''
	)

	example_quick_prompts = gr.Dataset(
	samples=quick_prompts,
	label=get_translation("quick_prompts"),
	samples_per_page=1000,
	components=[prompt]
	)
	example_quick_prompts.click(
	fn=lambda x: x[0],
	inputs=[example_quick_prompts],
	outputs=prompt,
	show_progress=False,
	queue=False
	)

	# 오른쪽
	with gr.Column(scale=1, elem_classes="gr-panel"):
	with gr.Row(elem_classes="button-container"):
	start_button = gr.Button(
	value=get_translation("start_generation"),
	elem_id="start-button",
	variant="primary"
	)
	stop_button = gr.Button(
	value=get_translation("stop_generation"),
	elem_id="stop-button",
	interactive=False
	)

	result_video = gr.Video(
	label=get_translation("generated_video"),
	autoplay=True,
	loop=True,
	height=320,
	elem_classes="video-container"
	)
	preview_image = gr.Image(
	label=get_translation("next_latents"),
	visible=False,
	height=150,
	elem_classes="preview-container"
	)
	gr.Markdown(get_translation("sampling_note"))

	with gr.Group(elem_classes="progress-container"):
	progress_desc = gr.Markdown('')
	progress_bar = gr.HTML('')

	error_message = gr.HTML('', visible=True)

	# Advanced
	with gr.Accordion("Advanced Settings", open=False, elem_classes="gr-panel"):
	use_teacache = gr.Checkbox(
	label=get_translation("use_teacache"),
	value=True,
	info=get_translation("teacache_info")
	)
	n_prompt = gr.Textbox(label=get_translation("negative_prompt"), value="", visible=False)
	seed = gr.Number(
	label=get_translation("seed"),
	value=31337,
	precision=0
	)
	# 기본 2초, 최대 4초
	total_second_length = gr.Slider(
	label=get_translation("video_length"),
	minimum=1,
	maximum=4,
	value=2,
	step=0.1
	)
	latent_window_size = gr.Slider(
	label=get_translation("latent_window"),
	minimum=1,
	maximum=33,
	value=9,
	step=1,
	visible=False
	)
	steps = gr.Slider(
	label=get_translation("steps"),
	minimum=1,
	maximum=100,
	value=25,
	step=1,
	info=get_translation("steps_info")
	)
	cfg = gr.Slider(
	label=get_translation("cfg_scale"),
	minimum=1.0,
	maximum=32.0,
	value=1.0,
	step=0.01,
	visible=False
	)
	gs = gr.Slider(
	label=get_translation("distilled_cfg"),
	minimum=1.0,
	maximum=32.0,
	value=10.0,
	step=0.01,
	info=get_translation("distilled_cfg_info")
	)
	rs = gr.Slider(
	label=get_translation("cfg_rescale"),
	minimum=0.0,
	maximum=1.0,
	value=0.0,
	step=0.01,
	visible=False
	)
	gpu_memory_preservation = gr.Slider(
	label=get_translation("gpu_memory"),
	minimum=6,
	maximum=128,
	value=6,
	step=0.1,
	info=get_translation("gpu_memory_info")
	)

	# 버튼 처리
	inputs_list = [
	input_image, prompt, n_prompt, seed,
	total_second_length, latent_window_size, steps,
	cfg, gs, rs, gpu_memory_preservation, use_teacache
	]
	start_button.click(
	fn=process,
	inputs=inputs_list,
	outputs=[result_video, preview_image, progress_desc, progress_bar, start_button, stop_button]
	)
	stop_button.click(fn=end_process)

	block.launch()