Spaces:

KumaPower
/

AvatarArtist

Running on Zero

App Files Files Community

刘虹雨 commited on Apr 1

Commit

b1ae546

1 Parent(s): 66886ce

update code

Browse files

Files changed (1) hide show

app.py +219 -193

app.py CHANGED Viewed

@@ -3,7 +3,17 @@ import subprocess
 import sys
 import warnings
 import logging
-import spaces
 import difflib
 # Configure logging settings
@@ -448,6 +458,7 @@ def duplicate_batch(tensor, batch_size=2):
 @torch.no_grad()
 @spaces.GPU(duration=200)
 def avatar_generation(items, save_path_base, video_path_input, source_type, is_styled, styled_img, image_name_true):
     """
     Generate avatars from input images.
@@ -463,162 +474,165 @@ def avatar_generation(items, save_path_base, video_path_input, source_type, is_s
         mean (torch.Tensor): Mean normalization tensor.
         ws_avg (torch.Tensor): Latent average tensor.
     """
-    if is_styled:
-        items = [styled_img]
-    else:
-        items = [items]
-    video_folder = "./demo_data/target_video"
-    video_name = os.path.basename(video_path_input).split(".")[0]
-    target_path = os.path.join(video_folder, 'data_' + video_name)
-    exp_base_dir = os.path.join(target_path, 'coeffs')
-    exp_img_base_dir = os.path.join(target_path, 'images512x512')
-    motion_base_dir = os.path.join(target_path, 'motions')
-    label_file_test = os.path.join(target_path, 'images512x512/dataset_realcam.json')
-    # render_model.to(device)
-    # image_encoder.to(device)
-    # vae_triplane.to(device)
-    # dinov2.to(device)
-    # ws_avg.to(device)
-    # DiT_model.to(device)
-    # Set up face verse for amimation
-    if source_type == 'example':
-        input_img_fvid = './demo_data/source_img/img_generate_different_domain/coeffs/demo_imgs'
-        input_img_motion = './demo_data/source_img/img_generate_different_domain/motions/demo_imgs'
-    elif source_type == 'custom':
-        input_img_fvid = os.path.join(save_path_base, 'processed_img/dataset/coeffs/input_image')
-        input_img_motion = os.path.join(save_path_base, 'processed_img/dataset/motions/input_image')
-    else:
-        raise ValueError("Wrong type")
-    bs = 1
-    sample_steps = 20
-    cfg_scale = 4.5
-    pitch_range = 0.25
-    yaw_range = 0.35
-    triplane_size = (256 * 4, 256)
-    latent_size = (triplane_size[0] // 8, triplane_size[1] // 8)
-    for chunk in tqdm(list(get_chunks(items, 1)), unit='batch'):
-        if bs != 1:
-            raise ValueError("Batch size > 1 not implemented")
-        image_dir = chunk[0]
-        image_name = os.path.splitext(image_name_true)[0]
-        # # image_name = os.path.splitext(os.path.basename(image_dir))[0]
-        # if source_type == 'custom':
-        #     image_name = os.path.splitext(image_name_true)[0]
-        # else:
-        #     image_name = os.path.splitext(os.path.basename(image_dir))[0]
-        dino_img, clip_image = image_process(image_dir, clip_image_processor, dino_img_processor, device)
-        clip_feature = image_encoder(clip_image, output_hidden_states=True).hidden_states[-2]
-        uncond_clip_feature = image_encoder(torch.zeros_like(clip_image), output_hidden_states=True).hidden_states[
-            -2]
-        dino_feature = dinov2(dino_img).last_hidden_state
-        uncond_dino_feature = dinov2(torch.zeros_like(dino_img)).last_hidden_state
-        samples = generate_samples(DiT_model, cfg_scale, sample_steps, clip_feature, dino_feature,
-                                   uncond_clip_feature, uncond_dino_feature, device, latent_size,
-                                   'dpm-solver')
-        samples = (samples / 0.3994218)
-        samples = rearrange(samples, "b c (f h) w -> b c f h w", f=4)
-        samples = vae_triplane.decode(samples)
-        samples = rearrange(samples, "b c f h w -> b f c h w")
-        samples = samples * std + mean
-        torch.cuda.empty_cache()
-        torch.cuda.ipc_collect()
-        save_frames_path_out = os.path.join(save_path_base, image_name, video_name, 'out')
-        save_frames_path_outshow = os.path.join(save_path_base, image_name, video_name,'out_show')
-        save_frames_path_depth = os.path.join(save_path_base, image_name, video_name, 'depth')
-        os.makedirs(save_frames_path_out, exist_ok=True)
-        os.makedirs(save_frames_path_outshow, exist_ok=True)
-        os.makedirs(save_frames_path_depth, exist_ok=True)
-        img_ref = np.array(Image.open(image_dir))
-        img_ref_out = img_ref.copy()
-        img_ref = torch.from_numpy(img_ref.astype(np.float32) / 127.5 - 1).permute(2, 0, 1).unsqueeze(0).to(device)
-        motion_app_dir = os.path.join(input_img_motion, image_name + '.npy')
-        motion_app = torch.tensor(np.load(motion_app_dir), dtype=torch.float32).unsqueeze(0).to(device)
-        id_motions = os.path.join(input_img_fvid, image_name + '.npy')
-        all_pose = json.loads(open(label_file_test).read())['labels']
-        all_pose = dict(all_pose)
-        if os.path.exists(id_motions):
-            coeff = np.load(id_motions).astype(np.float32)
-            coeff = torch.from_numpy(coeff).to(device).float().unsqueeze(0)
-            Faceverse.id_coeff = Faceverse.recon_model.split_coeffs(coeff)[0]
-        motion_dir = os.path.join(motion_base_dir, video_name)
-        exp_dir = os.path.join(exp_base_dir, video_name)
-        for frame_index, motion_name in enumerate(
-                tqdm(natsorted(os.listdir(motion_dir), alg=ns.PATH), desc="Processing Frames")):
-            exp_each_dir_img = os.path.join(exp_img_base_dir, video_name, motion_name.replace('.npy', '.png'))
-            exp_each_dir = os.path.join(exp_dir, motion_name)
-            motion_each_dir = os.path.join(motion_dir, motion_name)
-            # Load pose data
-            pose_key = os.path.join(video_name, motion_name.replace('.npy', '.png'))
-            cam2world_pose = LookAtPoseSampler.sample(
-                3.14 / 2 + yaw_range * np.sin(2 * 3.14 * frame_index / len(os.listdir(motion_dir))),
-                3.14 / 2 - 0.05 + pitch_range * np.cos(2 * 3.14 * frame_index / len(os.listdir(motion_dir))),
-                torch.tensor([0, 0, 0], device=device), radius=2.7, device=device)
-            pose_show = torch.cat([cam2world_pose.reshape(-1, 16),
-                                   FOV_to_intrinsics(fov_degrees=18.837, device=device).reshape(-1, 9)], 1).to(device)
-            pose = torch.tensor(np.array(all_pose[pose_key]).astype(np.float32)).float().unsqueeze(0).to(device)
-            # Load and resize expression image
-            exp_img = np.array(Image.open(exp_each_dir_img).resize((512, 512)))
-            # Load expression coefficients
-            exp_coeff = torch.from_numpy(np.load(exp_each_dir).astype(np.float32)).to(device).float().unsqueeze(0)
-            exp_target = Faceverse.make_driven_rendering(exp_coeff, res=256)
-            # Load motion data
-            motion = torch.tensor(np.load(motion_each_dir)).float().unsqueeze(0).to(device)
-            # img_ref_double = duplicate_batch(img_ref, batch_size=2)
-            # motion_app_double = duplicate_batch(motion_app, batch_size=2)
-            # motion_double = duplicate_batch(motion, batch_size=2)
-            # pose_double = torch.cat([pose_show, pose], dim=0)
-            # exp_target_double = duplicate_batch(exp_target, batch_size=2)
-            # samples_double = duplicate_batch(samples, batch_size=2)
-            # Select refine_net processing method
-            final_out = render_model(
-                img_ref, None, motion_app, motion, c=pose, mesh=exp_target,
-                triplane_recon=samples,
-                ws_avg=ws_avg, motion_scale=1.
-            )
-            # Process output image
-            final_out_show = trans(final_out['image_sr'][0].unsqueeze(0))
-            final_out_notshow = trans(final_out['image_sr'][0].unsqueeze(0))
-            depth = final_out['image_depth'][0].unsqueeze(0)
-            depth = -depth
-            depth = (depth - depth.min()) / (depth.max() - depth.min()) * 2 - 1
-            depth = trans(depth)
-            depth = np.repeat(depth[:, :, :], 3, axis=2)
-            # Save output images
-            frame_name = f'{str(frame_index).zfill(4)}.png'
-            Image.fromarray(depth, 'RGB').save(os.path.join(save_frames_path_depth, frame_name))
-            Image.fromarray(final_out_notshow, 'RGB').save(os.path.join(save_frames_path_out, frame_name))
-            Image.fromarray(final_out_show, 'RGB').save(os.path.join(save_frames_path_outshow, frame_name))
-        # Generate videos
-        images_to_video(save_frames_path_out, os.path.join(save_path_base, image_name + video_name+ '_out.mp4'))
-        images_to_video(save_frames_path_depth, os.path.join(save_path_base, image_name + video_name+ '_out.mp4'))
-        logging.info(f"✅ Video generation completed successfully!")
-        return os.path.join(save_path_base, image_name + video_name+ '_out.mp4'),  os.path.join(save_path_base, image_name + video_name+'_depth.mp4')
 def get_image_base64(path):
@@ -631,35 +645,38 @@ def assert_input_image(input_image):
     if input_image is None:
         raise gr.Error("No image selected or uploaded!")
-@spaces.GPU(duration=100)
 def process_image(input_image_dir, source_type, is_style, save_dir):
-    """ 🎯 处理 input_image，根据是否是示例图片执行不同逻辑 """
-    process_img_input_dir = os.path.join(save_dir, 'input_image')
-    process_img_save_dir = os.path.join(save_dir, 'processed_img')
-    base_name = os.path.basename(input_image_dir)           # abc123.jpg
-    name_without_ext = os.path.splitext(base_name)[0]        # abc123
-    image_name_true  = name_without_ext + ".png"
-    os.makedirs(process_img_save_dir, exist_ok=True)
-    os.makedirs(process_img_input_dir, exist_ok=True)
-    if source_type == "example":
-        image = Image.open(input_image_dir)
-        return image, source_type, image_name_true
-    else:
-        # input_process_model.inference(input_image, process_img_save_dir)
-        shutil.copy(input_image_dir, process_img_input_dir)
-        input_process_model.inference(process_img_input_dir, process_img_save_dir, is_img=True, is_video=False)
-        files = os.listdir(os.path.join(process_img_save_dir, 'dataset/images512x512/input_image'))
-        image_files = [f for f in files if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.webp'))]
-        # 使用 difflib 查找相似文件名
-        matches = difflib.get_close_matches(image_name_true, image_files, n=1, cutoff=0.1)
-        closest_match = matches[0]
-        imge_dir = os.path.join(process_img_save_dir, 'dataset/images512x512/input_image', closest_match)
-        image = Image.open(imge_dir)
-        image_name_true = closest_match
-        return image, source_type, image_name_true  # 这里替换成 处理用户上传图片的逻辑
-@spaces.GPU(duration=100)
 @torch.no_grad()
 def style_transfer(processed_image, style_prompt, cfg, strength, save_base,image_name_true):
     """
@@ -682,7 +699,8 @@ def style_transfer(processed_image, style_prompt, cfg, strength, save_base,image
         controlnet_conditioning_scale=1.5
     )['images'][0]
     trg_img_pil.save(os.path.join(save_dir, image_name_true))
-    return trg_img_pil # 🚨 这里需要替换成你的风格转换逻辑
 def reset_flag():
@@ -827,6 +845,7 @@ def launch_gradio_app():
         is_styled = gr.State(value=False)
         working_dir = gr.State()
         image_name_true = gr.State()
         with gr.Row():
@@ -953,12 +972,19 @@ def launch_gradio_app():
                             autoplay=True
                         )
         def apply_style_and_mark(processed_image, style_choice, cfg, strength, working_dir, image_name_true):
-            styled = style_transfer(processed_image, styles[style_choice], cfg, strength, working_dir, image_name_true)
-            return styled, True
         def process_image_and_enable_style(input_image, source_type, is_styled, wd):
-            processed_result, updated_source_type, image_name_true = process_image(input_image, source_type, is_styled, wd)
-            return processed_result, updated_source_type, gr.update(interactive=True), gr.update(interactive=True), image_name_true
         processed_image_button.click(
             fn=prepare_working_dir,
@@ -968,18 +994,18 @@ def launch_gradio_app():
         ).success(
             fn=process_image_and_enable_style,
             inputs=[input_image, source_type, is_styled, working_dir],
-            outputs=[processed_image, source_type, style_button, submit, image_name_true],
             queue=True
         )
         style_button.click(
             fn=apply_style_and_mark,
             inputs=[processed_image, style_choice, cfg_slider, strength_slider, working_dir, image_name_true],
-            outputs=[style_image, is_styled]
         )
         submit.click(
             fn=avatar_generation,
             inputs=[processed_image, working_dir, video_input, source_type, is_styled, style_image, image_name_true],
-            outputs=[output_video, output_video_1],  # ⏳ 稍后展示视频
             queue=True
         )

 import sys
 import warnings
 import logging
+if os.environ.get("SPACES_ZERO_GPU") is not None:
+    import spaces
+else:
+    class spaces:
+        @staticmethod
+        def GPU(*decorator_args, **decorator_kwargs):
+            def decorator(func):
+                def wrapper(*args, **kwargs):
+                    return func(*args, **kwargs)
+                return wrapper
+            return decorator
 import difflib
 # Configure logging settings
 @torch.no_grad()
 @spaces.GPU(duration=200)
 def avatar_generation(items, save_path_base, video_path_input, source_type, is_styled, styled_img, image_name_true):
     """
     Generate avatars from input images.
         mean (torch.Tensor): Mean normalization tensor.
         ws_avg (torch.Tensor): Latent average tensor.
     """
+    try:
+        if is_styled:
+            items = [styled_img]
+        else:
+            items = [items]
+        video_folder = "./demo_data/target_video"
+        video_name = os.path.basename(video_path_input).split(".")[0]
+        target_path = os.path.join(video_folder, 'data_' + video_name)
+        exp_base_dir = os.path.join(target_path, 'coeffs')
+        exp_img_base_dir = os.path.join(target_path, 'images512x512')
+        motion_base_dir = os.path.join(target_path, 'motions')
+        label_file_test = os.path.join(target_path, 'images512x512/dataset_realcam.json')
+        # render_model.to(device)
+        # image_encoder.to(device)
+        # vae_triplane.to(device)
+        # dinov2.to(device)
+        # ws_avg.to(device)
+        # DiT_model.to(device)
+        # Set up face verse for amimation
+        if source_type == 'example':
+            input_img_fvid = './demo_data/source_img/img_generate_different_domain/coeffs/demo_imgs'
+            input_img_motion = './demo_data/source_img/img_generate_different_domain/motions/demo_imgs'
+        elif source_type == 'custom':
+            input_img_fvid = os.path.join(save_path_base, 'processed_img/dataset/coeffs/input_image')
+            input_img_motion = os.path.join(save_path_base, 'processed_img/dataset/motions/input_image')
+        else:
+            raise ValueError("Wrong type")
+        bs = 1
+        sample_steps = 20
+        cfg_scale = 4.5
+        pitch_range = 0.25
+        yaw_range = 0.35
+        triplane_size = (256 * 4, 256)
+        latent_size = (triplane_size[0] // 8, triplane_size[1] // 8)
+        for chunk in tqdm(list(get_chunks(items, 1)), unit='batch'):
+            if bs != 1:
+                raise ValueError("Batch size > 1 not implemented")
+            image_dir = chunk[0]
+            image_name = os.path.splitext(image_name_true)[0]
+            # # image_name = os.path.splitext(os.path.basename(image_dir))[0]
+            # if source_type == 'custom':
+            #     image_name = os.path.splitext(image_name_true)[0]
+            # else:
+            #     image_name = os.path.splitext(os.path.basename(image_dir))[0]
+            dino_img, clip_image = image_process(image_dir, clip_image_processor, dino_img_processor, device)
+            clip_feature = image_encoder(clip_image, output_hidden_states=True).hidden_states[-2]
+            uncond_clip_feature = image_encoder(torch.zeros_like(clip_image), output_hidden_states=True).hidden_states[
+                -2]
+            dino_feature = dinov2(dino_img).last_hidden_state
+            uncond_dino_feature = dinov2(torch.zeros_like(dino_img)).last_hidden_state
+            samples = generate_samples(DiT_model, cfg_scale, sample_steps, clip_feature, dino_feature,
+                                    uncond_clip_feature, uncond_dino_feature, device, latent_size,
+                                    'dpm-solver')
+            samples = (samples / 0.3994218)
+            samples = rearrange(samples, "b c (f h) w -> b c f h w", f=4)
+            samples = vae_triplane.decode(samples)
+            samples = rearrange(samples, "b c f h w -> b f c h w")
+            samples = samples * std + mean
+            torch.cuda.empty_cache()
+            torch.cuda.ipc_collect()
+            save_frames_path_out = os.path.join(save_path_base, image_name, video_name, 'out')
+            save_frames_path_outshow = os.path.join(save_path_base, image_name, video_name,'out_show')
+            save_frames_path_depth = os.path.join(save_path_base, image_name, video_name, 'depth')
+            os.makedirs(save_frames_path_out, exist_ok=True)
+            os.makedirs(save_frames_path_outshow, exist_ok=True)
+            os.makedirs(save_frames_path_depth, exist_ok=True)
+            img_ref = np.array(Image.open(image_dir))
+            img_ref_out = img_ref.copy()
+            img_ref = torch.from_numpy(img_ref.astype(np.float32) / 127.5 - 1).permute(2, 0, 1).unsqueeze(0).to(device)
+            motion_app_dir = os.path.join(input_img_motion, image_name + '.npy')
+            motion_app = torch.tensor(np.load(motion_app_dir), dtype=torch.float32).unsqueeze(0).to(device)
+            id_motions = os.path.join(input_img_fvid, image_name + '.npy')
+            all_pose = json.loads(open(label_file_test).read())['labels']
+            all_pose = dict(all_pose)
+            if os.path.exists(id_motions):
+                coeff = np.load(id_motions).astype(np.float32)
+                coeff = torch.from_numpy(coeff).to(device).float().unsqueeze(0)
+                Faceverse.id_coeff = Faceverse.recon_model.split_coeffs(coeff)[0]
+            motion_dir = os.path.join(motion_base_dir, video_name)
+            exp_dir = os.path.join(exp_base_dir, video_name)
+            for frame_index, motion_name in enumerate(
+                    tqdm(natsorted(os.listdir(motion_dir), alg=ns.PATH), desc="Processing Frames")):
+                exp_each_dir_img = os.path.join(exp_img_base_dir, video_name, motion_name.replace('.npy', '.png'))
+                exp_each_dir = os.path.join(exp_dir, motion_name)
+                motion_each_dir = os.path.join(motion_dir, motion_name)
+                # Load pose data
+                pose_key = os.path.join(video_name, motion_name.replace('.npy', '.png'))
+                cam2world_pose = LookAtPoseSampler.sample(
+                    3.14 / 2 + yaw_range * np.sin(2 * 3.14 * frame_index / len(os.listdir(motion_dir))),
+                    3.14 / 2 - 0.05 + pitch_range * np.cos(2 * 3.14 * frame_index / len(os.listdir(motion_dir))),
+                    torch.tensor([0, 0, 0], device=device), radius=2.7, device=device)
+                pose_show = torch.cat([cam2world_pose.reshape(-1, 16),
+                                    FOV_to_intrinsics(fov_degrees=18.837, device=device).reshape(-1, 9)], 1).to(device)
+                pose = torch.tensor(np.array(all_pose[pose_key]).astype(np.float32)).float().unsqueeze(0).to(device)
+                # Load and resize expression image
+                exp_img = np.array(Image.open(exp_each_dir_img).resize((512, 512)))
+                # Load expression coefficients
+                exp_coeff = torch.from_numpy(np.load(exp_each_dir).astype(np.float32)).to(device).float().unsqueeze(0)
+                exp_target = Faceverse.make_driven_rendering(exp_coeff, res=256)
+                # Load motion data
+                motion = torch.tensor(np.load(motion_each_dir)).float().unsqueeze(0).to(device)
+                # img_ref_double = duplicate_batch(img_ref, batch_size=2)
+                # motion_app_double = duplicate_batch(motion_app, batch_size=2)
+                # motion_double = duplicate_batch(motion, batch_size=2)
+                # pose_double = torch.cat([pose_show, pose], dim=0)
+                # exp_target_double = duplicate_batch(exp_target, batch_size=2)
+                # samples_double = duplicate_batch(samples, batch_size=2)
+                # Select refine_net processing method
+                final_out = render_model(
+                    img_ref, None, motion_app, motion, c=pose, mesh=exp_target,
+                    triplane_recon=samples,
+                    ws_avg=ws_avg, motion_scale=1.
+                )
+                # Process output image
+                final_out_show = trans(final_out['image_sr'][0].unsqueeze(0))
+                # final_out_notshow = trans(final_out['image_sr'][0].unsqueeze(0))
+                depth = final_out['image_depth'][0].unsqueeze(0)
+                depth = -depth
+                depth = (depth - depth.min()) / (depth.max() - depth.min()) * 2 - 1
+                depth = trans(depth)
+                depth = np.repeat(depth[:, :, :], 3, axis=2)
+                # Save output images
+                frame_name = f'{str(frame_index).zfill(4)}.png'
+                Image.fromarray(depth, 'RGB').save(os.path.join(save_frames_path_depth, frame_name))
+                # Image.fromarray(final_out_notshow, 'RGB').save(os.path.join(save_frames_path_out, frame_name))
+                Image.fromarray(final_out_show, 'RGB').save(os.path.join(save_frames_path_outshow, frame_name))
+            # Generate videos
+            images_to_video(save_frames_path_out, os.path.join(save_path_base, image_name + video_name+ '_out.mp4'))
+            images_to_video(save_frames_path_depth, os.path.join(save_path_base, image_name + video_name+ '_depth.mp4'))
+            logging.info(f"✅ Video generation completed successfully!")
+            return os.path.join(save_path_base, image_name + video_name+ '_out.mp4'),  os.path.join(save_path_base, image_name + video_name+'_depth.mp4')
+    except Exception as e:
+        return None, None, f"❌ error：{str(e)}"
 def get_image_base64(path):
     if input_image is None:
         raise gr.Error("No image selected or uploaded!")
+@spaces.GPU(duration=30)
 def process_image(input_image_dir, source_type, is_style, save_dir):
+        """ 🎯 处理 input_image，根据是否是示例图片执行不同逻辑 """
+        process_img_input_dir = os.path.join(save_dir, 'input_image')
+        process_img_save_dir = os.path.join(save_dir, 'processed_img')
+        base_name = os.path.basename(input_image_dir)           # abc123.jpg
+        name_without_ext = os.path.splitext(base_name)[0]        # abc123
+        image_name_true  = name_without_ext + ".png"
+        os.makedirs(process_img_save_dir, exist_ok=True)
+        os.makedirs(process_img_input_dir, exist_ok=True)
+        if source_type == "example":
+            image = Image.open(input_image_dir)
+            return image, source_type, image_name_true, ""
+        else:
+            # input_process_model.inference(input_image, process_img_save_dir)
+            shutil.copy(input_image_dir, process_img_input_dir)
+            input_process_model.inference(process_img_input_dir, process_img_save_dir, is_img=True, is_video=False)
+            files = os.listdir(os.path.join(process_img_save_dir, 'dataset/images512x512/input_image'))
+            image_files = [f for f in files if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.webp'))]
+            # 使用 difflib 查找相似文件名
+            matches = difflib.get_close_matches(image_name_true, image_files, n=1, cutoff=0.1)
+            closest_match = matches[0]
+            imge_dir = os.path.join(process_img_save_dir, 'dataset/images512x512/input_image', closest_match)
+            image = Image.open(imge_dir)
+            image_name_true = closest_match
+            return image, source_type, image_name_true, ""  # 这里替换成 处理用户上传图片的逻辑
+@spaces.GPU(duration=30)
 @torch.no_grad()
 def style_transfer(processed_image, style_prompt, cfg, strength, save_base,image_name_true):
     """
         controlnet_conditioning_scale=1.5
     )['images'][0]
     trg_img_pil.save(os.path.join(save_dir, image_name_true))
+    return trg_img_pil, "" # 🚨 这里需要替换成你的风格转换逻辑
 def reset_flag():
         is_styled = gr.State(value=False)
         working_dir = gr.State()
         image_name_true = gr.State()
+        error_box = gr.Textbox(label="error hint", lines=3, interactive=False, visible=True)
         with gr.Row():
                             autoplay=True
                         )
         def apply_style_and_mark(processed_image, style_choice, cfg, strength, working_dir, image_name_true):
+            try:
+                styled = style_transfer(processed_image, styles[style_choice], cfg, strength, working_dir, image_name_true)
+                return styled, True, ""
+            except Exception as e:
+                return None, True, f"❌ error：{str(e)}"
         def process_image_and_enable_style(input_image, source_type, is_styled, wd):
+            try:
+                processed_result, updated_source_type, image_name_true = process_image(input_image, source_type, is_styled, wd)
+                return processed_result, updated_source_type, gr.update(interactive=True), gr.update(interactive=True), image_name_true, ""
+            except Exception as e:
+                return None, updated_source_type, gr.update(interactive=False), gr.update(interactive=False), image_name_true, f"❌ error：{str(e)}"
         processed_image_button.click(
             fn=prepare_working_dir,
         ).success(
             fn=process_image_and_enable_style,
             inputs=[input_image, source_type, is_styled, working_dir],
+            outputs=[processed_image, source_type, style_button, submit, image_name_true, error_box],
             queue=True
         )
         style_button.click(
             fn=apply_style_and_mark,
             inputs=[processed_image, style_choice, cfg_slider, strength_slider, working_dir, image_name_true],
+            outputs=[style_image, is_styled, error_box]
         )
         submit.click(
             fn=avatar_generation,
             inputs=[processed_image, working_dir, video_input, source_type, is_styled, style_image, image_name_true],
+            outputs=[output_video, output_video_1, error_box],  # ⏳ 稍后展示视频
             queue=True
         )