Spaces:
Sleeping
Sleeping
Commit
·
ac802d5
1
Parent(s):
cd8c6a9
changes to work
Browse files- .gitignore +1 -1
- configs/unet/first_stage.yaml +1 -3
- configs/unet/second_stage.yaml +1 -3
- soundimage/models/motion_module.py +0 -4
- soundimage/utils/image_processor.py +1 -1
- soundimage/utils/mask.png +3 -0
.gitignore
CHANGED
@@ -40,6 +40,6 @@ wandb/
|
|
40 |
*.jpeg
|
41 |
*.csv
|
42 |
|
43 |
-
!/
|
44 |
/checkpoints/
|
45 |
!/assets/*
|
|
|
40 |
*.jpeg
|
41 |
*.csv
|
42 |
|
43 |
+
!/soundimage/utils/mask.png
|
44 |
/checkpoints/
|
45 |
!/assets/*
|
configs/unet/first_stage.yaml
CHANGED
@@ -83,9 +83,7 @@ model:
|
|
83 |
unet_use_cross_frame_attention: false
|
84 |
unet_use_temporal_attention: false
|
85 |
|
86 |
-
|
87 |
-
# When we started the project, we used the codebase of AnimateDiff and tried motion module, the results are poor
|
88 |
-
# We decied to leave the code here for possible future usage
|
89 |
use_motion_module: false
|
90 |
motion_module_resolutions: [1, 2, 4, 8]
|
91 |
motion_module_mid_block: false
|
|
|
83 |
unet_use_cross_frame_attention: false
|
84 |
unet_use_temporal_attention: false
|
85 |
|
86 |
+
|
|
|
|
|
87 |
use_motion_module: false
|
88 |
motion_module_resolutions: [1, 2, 4, 8]
|
89 |
motion_module_mid_block: false
|
configs/unet/second_stage.yaml
CHANGED
@@ -83,9 +83,7 @@ model:
|
|
83 |
unet_use_cross_frame_attention: false
|
84 |
unet_use_temporal_attention: false
|
85 |
|
86 |
-
|
87 |
-
# When we started the project, we used the codebase of AnimateDiff and tried motion module, the results are poor
|
88 |
-
# We decied to leave the code here for possible future usage
|
89 |
use_motion_module: false
|
90 |
motion_module_resolutions: [1, 2, 4, 8]
|
91 |
motion_module_mid_block: false
|
|
|
83 |
unet_use_cross_frame_attention: false
|
84 |
unet_use_temporal_attention: false
|
85 |
|
86 |
+
|
|
|
|
|
87 |
use_motion_module: false
|
88 |
motion_module_resolutions: [1, 2, 4, 8]
|
89 |
motion_module_mid_block: false
|
soundimage/models/motion_module.py
CHANGED
@@ -1,8 +1,4 @@
|
|
1 |
-
# Adapted from https://github.com/guoyww/AnimateDiff/blob/main/animatediff/models/motion_module.py
|
2 |
|
3 |
-
# Actually we don't use the motion module in the final version of LatentSync
|
4 |
-
# When we started the project, we used the codebase of AnimateDiff and tried motion module
|
5 |
-
# But the results are poor, and we decied to leave the code here for possible future usage
|
6 |
|
7 |
from dataclasses import dataclass
|
8 |
|
|
|
|
|
1 |
|
|
|
|
|
|
|
2 |
|
3 |
from dataclasses import dataclass
|
4 |
|
soundimage/utils/image_processor.py
CHANGED
@@ -29,7 +29,7 @@ https://stackoverflow.com/questions/23853632/which-kind-of-interpolation-best-fo
|
|
29 |
|
30 |
|
31 |
def load_fixed_mask(resolution: int) -> torch.Tensor:
|
32 |
-
mask_image = cv2.imread("
|
33 |
mask_image = cv2.cvtColor(mask_image, cv2.COLOR_BGR2RGB)
|
34 |
mask_image = cv2.resize(mask_image, (resolution, resolution), interpolation=cv2.INTER_AREA) / 255.0
|
35 |
mask_image = rearrange(torch.from_numpy(mask_image), "h w c -> c h w")
|
|
|
29 |
|
30 |
|
31 |
def load_fixed_mask(resolution: int) -> torch.Tensor:
|
32 |
+
mask_image = cv2.imread("soundimage/utils/mask.png")
|
33 |
mask_image = cv2.cvtColor(mask_image, cv2.COLOR_BGR2RGB)
|
34 |
mask_image = cv2.resize(mask_image, (resolution, resolution), interpolation=cv2.INTER_AREA) / 255.0
|
35 |
mask_image = rearrange(torch.from_numpy(mask_image), "h w c -> c h w")
|
soundimage/utils/mask.png
ADDED
![]() |
Git LFS Details
|