Spaces:

Hyathi
/

SoundImage-LipSync

Sleeping

samarth-ht commited on Jan 30

Commit

d64aa71

1 Parent(s): 36fe809

bug fixes

Files changed (2) hide show

soundimage/pipelines/lipsync_pipeline.py CHANGED Viewed

@@ -318,7 +318,7 @@ class LipsyncPipeline(DiffusionPipeline):
         # 0. Define call parameters
         batch_size = 1
         device = self._execution_device
-        self.image_processor = ImageProcessor(height, mask=mask, device="cuda", mask_image=mask_path)
         self.set_progress_bar_config(desc=f"Sample frames: {num_frames}")
         video_frames, original_video_frames, boxes, affine_matrices = self.affine_transform_video(video_path)

         # 0. Define call parameters
         batch_size = 1
         device = self._execution_device
+        self.image_processor = ImageProcessor(height, mask=mask, device="cuda", mask_path=mask_path)
         self.set_progress_bar_config(desc=f"Sample frames: {num_frames}")
         video_frames, original_video_frames, boxes, affine_matrices = self.affine_transform_video(video_path)

soundimage/utils/image_processor.py CHANGED Viewed

@@ -28,12 +28,7 @@ https://stackoverflow.com/questions/23853632/which-kind-of-interpolation-best-fo
 """
-def load_fixed_mask(resolution: int, mask_path: str) -> torch.Tensor:
-    mask_image = cv2.imread(mask_path)
-    mask_image = cv2.cvtColor(mask_image, cv2.COLOR_BGR2RGB)
-    mask_image = cv2.resize(mask_image, (resolution, resolution), interpolation=cv2.INTER_AREA) / 255.0
-    mask_image = rearrange(torch.from_numpy(mask_image), "h w c -> c h w")
-    return mask_image
 class ImageProcessor:
@@ -53,7 +48,7 @@ class ImageProcessor:
             self.restorer = AlignRestore()
             if mask_image is None:
-                self.mask_image = load_fixed_mask(resolution, mask_path)
             else:
                 self.mask_image = mask_image
@@ -66,6 +61,12 @@ class ImageProcessor:
                 # self.face_mesh = mp.solutions.face_mesh.FaceMesh(static_image_mode=True)  # Process single image
                 self.face_mesh = None
                 self.fa = None
     def detect_facial_landmarks(self, image: np.ndarray):
         height, width, _ = image.shape

 """
 class ImageProcessor:
             self.restorer = AlignRestore()
             if mask_image is None:
+                self.mask_image = self.load_fixed_mask(resolution, mask_path)
             else:
                 self.mask_image = mask_image
                 # self.face_mesh = mp.solutions.face_mesh.FaceMesh(static_image_mode=True)  # Process single image
                 self.face_mesh = None
                 self.fa = None
+    def load_fixed_mask(resolution: int, mask_path: str) -> torch.Tensor:
+        mask_image = cv2.imread(mask_path)
+        mask_image = cv2.cvtColor(mask_image, cv2.COLOR_BGR2RGB)
+        mask_image = cv2.resize(mask_image, (resolution, resolution), interpolation=cv2.INTER_AREA) / 255.0
+        mask_image = rearrange(torch.from_numpy(mask_image), "h w c -> c h w")
+        return mask_image
     def detect_facial_landmarks(self, image: np.ndarray):
         height, width, _ = image.shape