Spaces:
Sleeping
Sleeping
Commit
·
d64aa71
1
Parent(s):
36fe809
bug fixes
Browse files
soundimage/pipelines/lipsync_pipeline.py
CHANGED
@@ -318,7 +318,7 @@ class LipsyncPipeline(DiffusionPipeline):
|
|
318 |
# 0. Define call parameters
|
319 |
batch_size = 1
|
320 |
device = self._execution_device
|
321 |
-
self.image_processor = ImageProcessor(height, mask=mask, device="cuda",
|
322 |
self.set_progress_bar_config(desc=f"Sample frames: {num_frames}")
|
323 |
|
324 |
video_frames, original_video_frames, boxes, affine_matrices = self.affine_transform_video(video_path)
|
|
|
318 |
# 0. Define call parameters
|
319 |
batch_size = 1
|
320 |
device = self._execution_device
|
321 |
+
self.image_processor = ImageProcessor(height, mask=mask, device="cuda", mask_path=mask_path)
|
322 |
self.set_progress_bar_config(desc=f"Sample frames: {num_frames}")
|
323 |
|
324 |
video_frames, original_video_frames, boxes, affine_matrices = self.affine_transform_video(video_path)
|
soundimage/utils/image_processor.py
CHANGED
@@ -28,12 +28,7 @@ https://stackoverflow.com/questions/23853632/which-kind-of-interpolation-best-fo
|
|
28 |
"""
|
29 |
|
30 |
|
31 |
-
|
32 |
-
mask_image = cv2.imread(mask_path)
|
33 |
-
mask_image = cv2.cvtColor(mask_image, cv2.COLOR_BGR2RGB)
|
34 |
-
mask_image = cv2.resize(mask_image, (resolution, resolution), interpolation=cv2.INTER_AREA) / 255.0
|
35 |
-
mask_image = rearrange(torch.from_numpy(mask_image), "h w c -> c h w")
|
36 |
-
return mask_image
|
37 |
|
38 |
|
39 |
class ImageProcessor:
|
@@ -53,7 +48,7 @@ class ImageProcessor:
|
|
53 |
self.restorer = AlignRestore()
|
54 |
|
55 |
if mask_image is None:
|
56 |
-
self.mask_image = load_fixed_mask(resolution, mask_path)
|
57 |
else:
|
58 |
self.mask_image = mask_image
|
59 |
|
@@ -66,6 +61,12 @@ class ImageProcessor:
|
|
66 |
# self.face_mesh = mp.solutions.face_mesh.FaceMesh(static_image_mode=True) # Process single image
|
67 |
self.face_mesh = None
|
68 |
self.fa = None
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
def detect_facial_landmarks(self, image: np.ndarray):
|
71 |
height, width, _ = image.shape
|
|
|
28 |
"""
|
29 |
|
30 |
|
31 |
+
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
|
34 |
class ImageProcessor:
|
|
|
48 |
self.restorer = AlignRestore()
|
49 |
|
50 |
if mask_image is None:
|
51 |
+
self.mask_image = self.load_fixed_mask(resolution, mask_path)
|
52 |
else:
|
53 |
self.mask_image = mask_image
|
54 |
|
|
|
61 |
# self.face_mesh = mp.solutions.face_mesh.FaceMesh(static_image_mode=True) # Process single image
|
62 |
self.face_mesh = None
|
63 |
self.fa = None
|
64 |
+
def load_fixed_mask(resolution: int, mask_path: str) -> torch.Tensor:
|
65 |
+
mask_image = cv2.imread(mask_path)
|
66 |
+
mask_image = cv2.cvtColor(mask_image, cv2.COLOR_BGR2RGB)
|
67 |
+
mask_image = cv2.resize(mask_image, (resolution, resolution), interpolation=cv2.INTER_AREA) / 255.0
|
68 |
+
mask_image = rearrange(torch.from_numpy(mask_image), "h w c -> c h w")
|
69 |
+
return mask_image
|
70 |
|
71 |
def detect_facial_landmarks(self, image: np.ndarray):
|
72 |
height, width, _ = image.shape
|