samarth-ht commited on
Commit
ac802d5
·
1 Parent(s): cd8c6a9

changes to work

Browse files
.gitignore CHANGED
@@ -40,6 +40,6 @@ wandb/
40
  *.jpeg
41
  *.csv
42
 
43
- !/latentsync/utils/mask.png
44
  /checkpoints/
45
  !/assets/*
 
40
  *.jpeg
41
  *.csv
42
 
43
+ !/soundimage/utils/mask.png
44
  /checkpoints/
45
  !/assets/*
configs/unet/first_stage.yaml CHANGED
@@ -83,9 +83,7 @@ model:
83
  unet_use_cross_frame_attention: false
84
  unet_use_temporal_attention: false
85
 
86
- # Actually we don't use the motion module in the final version of LatentSync
87
- # When we started the project, we used the codebase of AnimateDiff and tried motion module, the results are poor
88
- # We decied to leave the code here for possible future usage
89
  use_motion_module: false
90
  motion_module_resolutions: [1, 2, 4, 8]
91
  motion_module_mid_block: false
 
83
  unet_use_cross_frame_attention: false
84
  unet_use_temporal_attention: false
85
 
86
+
 
 
87
  use_motion_module: false
88
  motion_module_resolutions: [1, 2, 4, 8]
89
  motion_module_mid_block: false
configs/unet/second_stage.yaml CHANGED
@@ -83,9 +83,7 @@ model:
83
  unet_use_cross_frame_attention: false
84
  unet_use_temporal_attention: false
85
 
86
- # Actually we don't use the motion module in the final version of LatentSync
87
- # When we started the project, we used the codebase of AnimateDiff and tried motion module, the results are poor
88
- # We decied to leave the code here for possible future usage
89
  use_motion_module: false
90
  motion_module_resolutions: [1, 2, 4, 8]
91
  motion_module_mid_block: false
 
83
  unet_use_cross_frame_attention: false
84
  unet_use_temporal_attention: false
85
 
86
+
 
 
87
  use_motion_module: false
88
  motion_module_resolutions: [1, 2, 4, 8]
89
  motion_module_mid_block: false
soundimage/models/motion_module.py CHANGED
@@ -1,8 +1,4 @@
1
- # Adapted from https://github.com/guoyww/AnimateDiff/blob/main/animatediff/models/motion_module.py
2
 
3
- # Actually we don't use the motion module in the final version of LatentSync
4
- # When we started the project, we used the codebase of AnimateDiff and tried motion module
5
- # But the results are poor, and we decied to leave the code here for possible future usage
6
 
7
  from dataclasses import dataclass
8
 
 
 
1
 
 
 
 
2
 
3
  from dataclasses import dataclass
4
 
soundimage/utils/image_processor.py CHANGED
@@ -29,7 +29,7 @@ https://stackoverflow.com/questions/23853632/which-kind-of-interpolation-best-fo
29
 
30
 
31
  def load_fixed_mask(resolution: int) -> torch.Tensor:
32
- mask_image = cv2.imread("latentsync/utils/mask.png")
33
  mask_image = cv2.cvtColor(mask_image, cv2.COLOR_BGR2RGB)
34
  mask_image = cv2.resize(mask_image, (resolution, resolution), interpolation=cv2.INTER_AREA) / 255.0
35
  mask_image = rearrange(torch.from_numpy(mask_image), "h w c -> c h w")
 
29
 
30
 
31
  def load_fixed_mask(resolution: int) -> torch.Tensor:
32
+ mask_image = cv2.imread("soundimage/utils/mask.png")
33
  mask_image = cv2.cvtColor(mask_image, cv2.COLOR_BGR2RGB)
34
  mask_image = cv2.resize(mask_image, (resolution, resolution), interpolation=cv2.INTER_AREA) / 255.0
35
  mask_image = rearrange(torch.from_numpy(mask_image), "h w c -> c h w")
soundimage/utils/mask.png ADDED

Git LFS Details

  • SHA256: aa233251b9ff5691a1565a4108f0910ab1e5e7ad79a7bb2b741ab4d92c81053c
  • Pointer size: 129 Bytes
  • Size of remote file: 1.87 kB