samarth-ht commited on
Commit
d64aa71
·
1 Parent(s): 36fe809
soundimage/pipelines/lipsync_pipeline.py CHANGED
@@ -318,7 +318,7 @@ class LipsyncPipeline(DiffusionPipeline):
318
  # 0. Define call parameters
319
  batch_size = 1
320
  device = self._execution_device
321
- self.image_processor = ImageProcessor(height, mask=mask, device="cuda", mask_image=mask_path)
322
  self.set_progress_bar_config(desc=f"Sample frames: {num_frames}")
323
 
324
  video_frames, original_video_frames, boxes, affine_matrices = self.affine_transform_video(video_path)
 
318
  # 0. Define call parameters
319
  batch_size = 1
320
  device = self._execution_device
321
+ self.image_processor = ImageProcessor(height, mask=mask, device="cuda", mask_path=mask_path)
322
  self.set_progress_bar_config(desc=f"Sample frames: {num_frames}")
323
 
324
  video_frames, original_video_frames, boxes, affine_matrices = self.affine_transform_video(video_path)
soundimage/utils/image_processor.py CHANGED
@@ -28,12 +28,7 @@ https://stackoverflow.com/questions/23853632/which-kind-of-interpolation-best-fo
28
  """
29
 
30
 
31
- def load_fixed_mask(resolution: int, mask_path: str) -> torch.Tensor:
32
- mask_image = cv2.imread(mask_path)
33
- mask_image = cv2.cvtColor(mask_image, cv2.COLOR_BGR2RGB)
34
- mask_image = cv2.resize(mask_image, (resolution, resolution), interpolation=cv2.INTER_AREA) / 255.0
35
- mask_image = rearrange(torch.from_numpy(mask_image), "h w c -> c h w")
36
- return mask_image
37
 
38
 
39
  class ImageProcessor:
@@ -53,7 +48,7 @@ class ImageProcessor:
53
  self.restorer = AlignRestore()
54
 
55
  if mask_image is None:
56
- self.mask_image = load_fixed_mask(resolution, mask_path)
57
  else:
58
  self.mask_image = mask_image
59
 
@@ -66,6 +61,12 @@ class ImageProcessor:
66
  # self.face_mesh = mp.solutions.face_mesh.FaceMesh(static_image_mode=True) # Process single image
67
  self.face_mesh = None
68
  self.fa = None
 
 
 
 
 
 
69
 
70
  def detect_facial_landmarks(self, image: np.ndarray):
71
  height, width, _ = image.shape
 
28
  """
29
 
30
 
31
+
 
 
 
 
 
32
 
33
 
34
  class ImageProcessor:
 
48
  self.restorer = AlignRestore()
49
 
50
  if mask_image is None:
51
+ self.mask_image = self.load_fixed_mask(resolution, mask_path)
52
  else:
53
  self.mask_image = mask_image
54
 
 
61
  # self.face_mesh = mp.solutions.face_mesh.FaceMesh(static_image_mode=True) # Process single image
62
  self.face_mesh = None
63
  self.fa = None
64
+ def load_fixed_mask(resolution: int, mask_path: str) -> torch.Tensor:
65
+ mask_image = cv2.imread(mask_path)
66
+ mask_image = cv2.cvtColor(mask_image, cv2.COLOR_BGR2RGB)
67
+ mask_image = cv2.resize(mask_image, (resolution, resolution), interpolation=cv2.INTER_AREA) / 255.0
68
+ mask_image = rearrange(torch.from_numpy(mask_image), "h w c -> c h w")
69
+ return mask_image
70
 
71
  def detect_facial_landmarks(self, image: np.ndarray):
72
  height, width, _ = image.shape