diff --git a/LICENSE b/LICENSE deleted file mode 100644 index f49a4e16e68b128803cc2dcea614603632b04eac..0000000000000000000000000000000000000000 --- a/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file diff --git a/app.py b/app.py index 36fb47b18be33f8e27f9b8f3fe076c84b4b2c4ff..e2e4ff9a1b33947f232aaf527411843b912aca55 100644 --- a/app.py +++ b/app.py @@ -10,8 +10,7 @@ import os CONFIG_PATH = Path("configs/unet/second_stage.yaml") CHECKPOINT_PATH = Path("checkpoints/latentsync_unet.pt") -# subprocess.run(["huggingface-cli", "download", "Hyathi/LatentSync", "--local-dir", "checkpoints", "--exclude", "*.git*", "README.md"]) -subprocess.run(["huggingface-cli", "download", "Hyathi/LatentSync", "--local-dir", "checkpoints", "--exclude", "*.git*", "README.md", "--token", os.environ["HF_TOKEN"]]) +subprocess.run(["huggingface-cli", "download", "Hyathi/SoundImage-LipSync", "--local-dir", "checkpoints", "--exclude", "*.git*", "README.md", "--token", os.environ["HF_TOKEN"]]) def process_video( video_path, diff --git a/data_processing_pipeline.sh b/data_processing_pipeline.sh deleted file mode 100644 index 73f551a4f6d73bd11576861fcf9818e010129499..0000000000000000000000000000000000000000 --- a/data_processing_pipeline.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -python -m preprocess.data_processing_pipeline \ - --total_num_workers 20 \ - --per_gpu_num_workers 10 \ - --resolution 256 \ - --sync_conf_threshold 3 \ - --temp_dir temp \ - --input_dir /mnt/bn/maliva-gen-ai-v2/chunyu.li/VoxCeleb2/raw diff --git a/eval/eval_sync_conf.py b/eval/eval_sync_conf.py index 741a0c3505c0206fe4e8076c98e5260c36afbf11..a5570328d21321f8b9c5400527c41a9c413b0814 100644 --- a/eval/eval_sync_conf.py +++ b/eval/eval_sync_conf.py @@ -18,7 +18,7 @@ import tqdm from statistics import fmean from eval.syncnet import SyncNetEval from eval.syncnet_detect import SyncNetDetector -from latentsync.utils.util import red_text +from soundimage.utils.util import red_text import torch diff --git a/eval/eval_syncnet_acc.py b/eval/eval_syncnet_acc.py index acde8d804389b70520eb4f3ccb26b860516a2c4c..636e6862324b7600d4dc1fd6337f96f93aec5741 100644 --- a/eval/eval_syncnet_acc.py +++ b/eval/eval_syncnet_acc.py @@ -17,8 +17,8 @@ from tqdm.auto import tqdm import torch import torch.nn as nn from einops import rearrange -from latentsync.models.syncnet import SyncNet -from latentsync.data.syncnet_dataset import SyncNetDataset +from soundimage.models.syncnet import SyncNet +from soundimage.data.syncnet_dataset import SyncNetDataset from diffusers import AutoencoderKL from omegaconf import OmegaConf from accelerate.utils import set_seed diff --git a/inference.sh b/inference.sh deleted file mode 100644 index d032fb2243b92b5ac8dce4dcc4679bfb1022f259..0000000000000000000000000000000000000000 --- a/inference.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -python -m scripts.inference \ - --unet_config_path "configs/unet/second_stage.yaml" \ - --inference_ckpt_path "checkpoints/latentsync_unet.pt" \ - --guidance_scale 1.0 \ - --video_path "assets/demo1_video.mp4" \ - --audio_path "assets/demo1_audio.wav" \ - --video_out_path "video_out.mp4" diff --git a/latentsync/utils/mask.png b/latentsync/utils/mask.png deleted file mode 100644 index eb8fdd1f4e1c59a55f9b3604a71946b9833f3ca2..0000000000000000000000000000000000000000 Binary files a/latentsync/utils/mask.png and /dev/null differ diff --git a/preprocess/affine_transform.py b/preprocess/affine_transform.py index a9e37fbca1469917788cd4ef33b4660fe17acfff..f389a1895a931edb8dd2fe03cf176d50468bf9e2 100644 --- a/preprocess/affine_transform.py +++ b/preprocess/affine_transform.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from latentsync.utils.util import read_video, write_video -from latentsync.utils.image_processor import ImageProcessor +from soundimage.utils.util import read_video, write_video +from soundimage.utils.image_processor import ImageProcessor import torch from einops import rearrange import os diff --git a/preprocess/filter_high_resolution.py b/preprocess/filter_high_resolution.py index 9c8f2b8873ff86ffe9be934a9bc3f9bd5adbc335..5367936c6f774cac9a9d3233b81cb86107566998 100644 --- a/preprocess/filter_high_resolution.py +++ b/preprocess/filter_high_resolution.py @@ -13,7 +13,7 @@ # limitations under the License. import mediapipe as mp -from latentsync.utils.util import read_video +from soundimage.utils.util import read_video import os import tqdm import shutil diff --git a/preprocess/remove_broken_videos.py b/preprocess/remove_broken_videos.py index fc58b38e9ea51ac5ac3efde911a8712885fef096..8f469700596e56999138b110264eb2a2789b24b5 100644 --- a/preprocess/remove_broken_videos.py +++ b/preprocess/remove_broken_videos.py @@ -16,8 +16,8 @@ import os from multiprocessing import Pool import tqdm -from latentsync.utils.av_reader import AVReader -from latentsync.utils.util import gather_video_paths_recursively +from soundimage.utils.av_reader import AVReader +from soundimage.utils.util import gather_video_paths_recursively def remove_broken_video(video_path): diff --git a/preprocess/remove_incorrect_affined.py b/preprocess/remove_incorrect_affined.py index baf4a85bbf682258f748312da9c109e60d410e53..8188f19a80d220c6f30428c07e7264aabd4add7a 100644 --- a/preprocess/remove_incorrect_affined.py +++ b/preprocess/remove_incorrect_affined.py @@ -13,7 +13,7 @@ # limitations under the License. import mediapipe as mp -from latentsync.utils.util import read_video, gather_video_paths_recursively +from soundimage.utils.util import read_video, gather_video_paths_recursively import os import tqdm from multiprocessing import Pool diff --git a/scripts/inference.py b/scripts/inference.py index 5f92690f14110a580cb147949501aa687367c663..76ca42e87562a66155e18a6357385a414f353663 100644 --- a/scripts/inference.py +++ b/scripts/inference.py @@ -16,11 +16,11 @@ import argparse from omegaconf import OmegaConf import torch from diffusers import AutoencoderKL, DDIMScheduler -from latentsync.models.unet import UNet3DConditionModel -from latentsync.pipelines.lipsync_pipeline import LipsyncPipeline +from soundimage.models.unet import UNet3DConditionModel +from soundimage.pipelines.lipsync_pipeline import LipsyncPipeline from diffusers.utils.import_utils import is_xformers_available from accelerate.utils import set_seed -from latentsync.whisper.audio2feature import Audio2Feature +from soundimage.whisper.audio2feature import Audio2Feature def main(config, args): diff --git a/scripts/train_syncnet.py b/scripts/train_syncnet.py index 56b983a29764e45469d94dbe0e3c50357252ed9f..604b2b9c060b3d08f1a3031c2f5b274ac5051f53 100644 --- a/scripts/train_syncnet.py +++ b/scripts/train_syncnet.py @@ -18,10 +18,10 @@ import logging from omegaconf import OmegaConf import shutil -from latentsync.data.syncnet_dataset import SyncNetDataset -from latentsync.models.syncnet import SyncNet -from latentsync.models.syncnet_wav2lip import SyncNetWav2Lip -from latentsync.utils.util import gather_loss, plot_loss_chart +from soundimage.data.syncnet_dataset import SyncNetDataset +from soundimage.models.syncnet import SyncNet +from soundimage.models.syncnet_wav2lip import SyncNetWav2Lip +from soundimage.utils.util import gather_loss, plot_loss_chart from accelerate.utils import set_seed import torch @@ -31,7 +31,7 @@ from einops import rearrange import torch.distributed as dist from torch.nn.parallel import DistributedDataParallel as DDP from torch.utils.data.distributed import DistributedSampler -from latentsync.utils.util import init_dist, cosine_loss +from soundimage.utils.util import init_dist, cosine_loss logger = get_logger(__name__) diff --git a/scripts/train_unet.py b/scripts/train_unet.py index ee1a0a620700a0c19cc5bff4aada7f3276c0b5a8..5ed04ffbb9fc155eba6a7e566ea66009bc7d4009 100644 --- a/scripts/train_unet.py +++ b/scripts/train_unet.py @@ -36,18 +36,18 @@ from diffusers.optimization import get_scheduler from diffusers.utils.import_utils import is_xformers_available from accelerate.utils import set_seed -from latentsync.data.unet_dataset import UNetDataset -from latentsync.models.unet import UNet3DConditionModel -from latentsync.models.syncnet import SyncNet -from latentsync.pipelines.lipsync_pipeline import LipsyncPipeline -from latentsync.utils.util import ( +from soundimage.data.unet_dataset import UNetDataset +from soundimage.models.unet import UNet3DConditionModel +from soundimage.models.syncnet import SyncNet +from soundimage.pipelines.lipsync_pipeline import LipsyncPipeline +from soundimage.utils.util import ( init_dist, cosine_loss, reversed_forward, ) -from latentsync.utils.util import plot_loss_chart, gather_loss -from latentsync.whisper.audio2feature import Audio2Feature -from latentsync.trepa import TREPALoss +from soundimage.utils.util import plot_loss_chart, gather_loss +from soundimage.whisper.audio2feature import Audio2Feature +from soundimage.trepa import TREPALoss from eval.syncnet import SyncNetEval from eval.syncnet_detect import SyncNetDetector from eval.eval_sync_conf import syncnet_eval diff --git a/setup_env.sh b/setup_env.sh deleted file mode 100644 index 487092c2759d2df3edafd4a02acf7bbdd20dd8c3..0000000000000000000000000000000000000000 --- a/setup_env.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash - -# Create a new conda environment -conda create -y -n latentsync python=3.10.13 -conda activate latentsync - -# Install ffmpeg -conda install -y -c conda-forge ffmpeg - -# Python dependencies -pip install -r requirements.txt - -# OpenCV dependencies -sudo apt -y install libgl1 - -# Download all the checkpoints from HuggingFace -huggingface-cli download Hyathi/LatentSync --local-dir checkpoints --exclude "*.git*" "README.md" - -# Soft links for the auxiliary models -mkdir -p ~/.cache/torch/hub/checkpoints -ln -s $(pwd)/checkpoints/auxiliary/2DFAN4-cd938726ad.zip ~/.cache/torch/hub/checkpoints/2DFAN4-cd938726ad.zip -ln -s $(pwd)/checkpoints/auxiliary/s3fd-619a316812.pth ~/.cache/torch/hub/checkpoints/s3fd-619a316812.pth -ln -s $(pwd)/checkpoints/auxiliary/vgg16-397923af.pth ~/.cache/torch/hub/checkpoints/vgg16-397923af.pth diff --git a/latentsync/data/syncnet_dataset.py b/soundimage/data/syncnet_dataset.py similarity index 100% rename from latentsync/data/syncnet_dataset.py rename to soundimage/data/syncnet_dataset.py diff --git a/latentsync/data/unet_dataset.py b/soundimage/data/unet_dataset.py similarity index 100% rename from latentsync/data/unet_dataset.py rename to soundimage/data/unet_dataset.py diff --git a/latentsync/models/attention.py b/soundimage/models/attention.py similarity index 100% rename from latentsync/models/attention.py rename to soundimage/models/attention.py diff --git a/latentsync/models/motion_module.py b/soundimage/models/motion_module.py similarity index 100% rename from latentsync/models/motion_module.py rename to soundimage/models/motion_module.py diff --git a/latentsync/models/resnet.py b/soundimage/models/resnet.py similarity index 100% rename from latentsync/models/resnet.py rename to soundimage/models/resnet.py diff --git a/latentsync/models/syncnet.py b/soundimage/models/syncnet.py similarity index 100% rename from latentsync/models/syncnet.py rename to soundimage/models/syncnet.py diff --git a/latentsync/models/syncnet_wav2lip.py b/soundimage/models/syncnet_wav2lip.py similarity index 100% rename from latentsync/models/syncnet_wav2lip.py rename to soundimage/models/syncnet_wav2lip.py diff --git a/latentsync/models/unet.py b/soundimage/models/unet.py similarity index 100% rename from latentsync/models/unet.py rename to soundimage/models/unet.py diff --git a/latentsync/models/unet_blocks.py b/soundimage/models/unet_blocks.py similarity index 100% rename from latentsync/models/unet_blocks.py rename to soundimage/models/unet_blocks.py diff --git a/latentsync/models/utils.py b/soundimage/models/utils.py similarity index 100% rename from latentsync/models/utils.py rename to soundimage/models/utils.py diff --git a/latentsync/pipelines/lipsync_pipeline.py b/soundimage/pipelines/lipsync_pipeline.py similarity index 100% rename from latentsync/pipelines/lipsync_pipeline.py rename to soundimage/pipelines/lipsync_pipeline.py diff --git a/latentsync/trepa/__init__.py b/soundimage/trepa/__init__.py similarity index 100% rename from latentsync/trepa/__init__.py rename to soundimage/trepa/__init__.py diff --git a/latentsync/trepa/third_party/VideoMAEv2/__init__.py b/soundimage/trepa/third_party/VideoMAEv2/__init__.py similarity index 100% rename from latentsync/trepa/third_party/VideoMAEv2/__init__.py rename to soundimage/trepa/third_party/VideoMAEv2/__init__.py diff --git a/latentsync/trepa/third_party/VideoMAEv2/utils.py b/soundimage/trepa/third_party/VideoMAEv2/utils.py similarity index 100% rename from latentsync/trepa/third_party/VideoMAEv2/utils.py rename to soundimage/trepa/third_party/VideoMAEv2/utils.py diff --git a/latentsync/trepa/third_party/VideoMAEv2/videomaev2_finetune.py b/soundimage/trepa/third_party/VideoMAEv2/videomaev2_finetune.py similarity index 100% rename from latentsync/trepa/third_party/VideoMAEv2/videomaev2_finetune.py rename to soundimage/trepa/third_party/VideoMAEv2/videomaev2_finetune.py diff --git a/latentsync/trepa/third_party/VideoMAEv2/videomaev2_pretrain.py b/soundimage/trepa/third_party/VideoMAEv2/videomaev2_pretrain.py similarity index 100% rename from latentsync/trepa/third_party/VideoMAEv2/videomaev2_pretrain.py rename to soundimage/trepa/third_party/VideoMAEv2/videomaev2_pretrain.py diff --git a/latentsync/trepa/third_party/__init__.py b/soundimage/trepa/third_party/__init__.py similarity index 100% rename from latentsync/trepa/third_party/__init__.py rename to soundimage/trepa/third_party/__init__.py diff --git a/latentsync/trepa/utils/__init__.py b/soundimage/trepa/utils/__init__.py similarity index 100% rename from latentsync/trepa/utils/__init__.py rename to soundimage/trepa/utils/__init__.py diff --git a/latentsync/trepa/utils/data_utils.py b/soundimage/trepa/utils/data_utils.py similarity index 100% rename from latentsync/trepa/utils/data_utils.py rename to soundimage/trepa/utils/data_utils.py diff --git a/latentsync/trepa/utils/metric_utils.py b/soundimage/trepa/utils/metric_utils.py similarity index 100% rename from latentsync/trepa/utils/metric_utils.py rename to soundimage/trepa/utils/metric_utils.py diff --git a/latentsync/utils/affine_transform.py b/soundimage/utils/affine_transform.py similarity index 100% rename from latentsync/utils/affine_transform.py rename to soundimage/utils/affine_transform.py diff --git a/latentsync/utils/audio.py b/soundimage/utils/audio.py similarity index 100% rename from latentsync/utils/audio.py rename to soundimage/utils/audio.py diff --git a/latentsync/utils/av_reader.py b/soundimage/utils/av_reader.py similarity index 100% rename from latentsync/utils/av_reader.py rename to soundimage/utils/av_reader.py diff --git a/latentsync/utils/image_processor.py b/soundimage/utils/image_processor.py similarity index 100% rename from latentsync/utils/image_processor.py rename to soundimage/utils/image_processor.py diff --git a/latentsync/utils/util.py b/soundimage/utils/util.py similarity index 100% rename from latentsync/utils/util.py rename to soundimage/utils/util.py diff --git a/latentsync/whisper/audio2feature.py b/soundimage/whisper/audio2feature.py similarity index 100% rename from latentsync/whisper/audio2feature.py rename to soundimage/whisper/audio2feature.py diff --git a/latentsync/whisper/whisper/__init__.py b/soundimage/whisper/whisper/__init__.py similarity index 100% rename from latentsync/whisper/whisper/__init__.py rename to soundimage/whisper/whisper/__init__.py diff --git a/latentsync/whisper/whisper/__main__.py b/soundimage/whisper/whisper/__main__.py similarity index 100% rename from latentsync/whisper/whisper/__main__.py rename to soundimage/whisper/whisper/__main__.py diff --git a/latentsync/whisper/whisper/assets/gpt2/merges.txt b/soundimage/whisper/whisper/assets/gpt2/merges.txt similarity index 100% rename from latentsync/whisper/whisper/assets/gpt2/merges.txt rename to soundimage/whisper/whisper/assets/gpt2/merges.txt diff --git a/latentsync/whisper/whisper/assets/gpt2/special_tokens_map.json b/soundimage/whisper/whisper/assets/gpt2/special_tokens_map.json similarity index 100% rename from latentsync/whisper/whisper/assets/gpt2/special_tokens_map.json rename to soundimage/whisper/whisper/assets/gpt2/special_tokens_map.json diff --git a/latentsync/whisper/whisper/assets/gpt2/tokenizer_config.json b/soundimage/whisper/whisper/assets/gpt2/tokenizer_config.json similarity index 100% rename from latentsync/whisper/whisper/assets/gpt2/tokenizer_config.json rename to soundimage/whisper/whisper/assets/gpt2/tokenizer_config.json diff --git a/latentsync/whisper/whisper/assets/gpt2/vocab.json b/soundimage/whisper/whisper/assets/gpt2/vocab.json similarity index 100% rename from latentsync/whisper/whisper/assets/gpt2/vocab.json rename to soundimage/whisper/whisper/assets/gpt2/vocab.json diff --git a/latentsync/whisper/whisper/assets/mel_filters.npz b/soundimage/whisper/whisper/assets/mel_filters.npz similarity index 100% rename from latentsync/whisper/whisper/assets/mel_filters.npz rename to soundimage/whisper/whisper/assets/mel_filters.npz diff --git a/latentsync/whisper/whisper/assets/multilingual/added_tokens.json b/soundimage/whisper/whisper/assets/multilingual/added_tokens.json similarity index 100% rename from latentsync/whisper/whisper/assets/multilingual/added_tokens.json rename to soundimage/whisper/whisper/assets/multilingual/added_tokens.json diff --git a/latentsync/whisper/whisper/assets/multilingual/merges.txt b/soundimage/whisper/whisper/assets/multilingual/merges.txt similarity index 100% rename from latentsync/whisper/whisper/assets/multilingual/merges.txt rename to soundimage/whisper/whisper/assets/multilingual/merges.txt diff --git a/latentsync/whisper/whisper/assets/multilingual/special_tokens_map.json b/soundimage/whisper/whisper/assets/multilingual/special_tokens_map.json similarity index 100% rename from latentsync/whisper/whisper/assets/multilingual/special_tokens_map.json rename to soundimage/whisper/whisper/assets/multilingual/special_tokens_map.json diff --git a/latentsync/whisper/whisper/assets/multilingual/tokenizer_config.json b/soundimage/whisper/whisper/assets/multilingual/tokenizer_config.json similarity index 100% rename from latentsync/whisper/whisper/assets/multilingual/tokenizer_config.json rename to soundimage/whisper/whisper/assets/multilingual/tokenizer_config.json diff --git a/latentsync/whisper/whisper/assets/multilingual/vocab.json b/soundimage/whisper/whisper/assets/multilingual/vocab.json similarity index 100% rename from latentsync/whisper/whisper/assets/multilingual/vocab.json rename to soundimage/whisper/whisper/assets/multilingual/vocab.json diff --git a/latentsync/whisper/whisper/audio.py b/soundimage/whisper/whisper/audio.py similarity index 100% rename from latentsync/whisper/whisper/audio.py rename to soundimage/whisper/whisper/audio.py diff --git a/latentsync/whisper/whisper/decoding.py b/soundimage/whisper/whisper/decoding.py similarity index 100% rename from latentsync/whisper/whisper/decoding.py rename to soundimage/whisper/whisper/decoding.py diff --git a/latentsync/whisper/whisper/model.py b/soundimage/whisper/whisper/model.py similarity index 100% rename from latentsync/whisper/whisper/model.py rename to soundimage/whisper/whisper/model.py diff --git a/latentsync/whisper/whisper/normalizers/__init__.py b/soundimage/whisper/whisper/normalizers/__init__.py similarity index 100% rename from latentsync/whisper/whisper/normalizers/__init__.py rename to soundimage/whisper/whisper/normalizers/__init__.py diff --git a/latentsync/whisper/whisper/normalizers/basic.py b/soundimage/whisper/whisper/normalizers/basic.py similarity index 100% rename from latentsync/whisper/whisper/normalizers/basic.py rename to soundimage/whisper/whisper/normalizers/basic.py diff --git a/latentsync/whisper/whisper/normalizers/english.json b/soundimage/whisper/whisper/normalizers/english.json similarity index 100% rename from latentsync/whisper/whisper/normalizers/english.json rename to soundimage/whisper/whisper/normalizers/english.json diff --git a/latentsync/whisper/whisper/normalizers/english.py b/soundimage/whisper/whisper/normalizers/english.py similarity index 100% rename from latentsync/whisper/whisper/normalizers/english.py rename to soundimage/whisper/whisper/normalizers/english.py diff --git a/latentsync/whisper/whisper/tokenizer.py b/soundimage/whisper/whisper/tokenizer.py similarity index 100% rename from latentsync/whisper/whisper/tokenizer.py rename to soundimage/whisper/whisper/tokenizer.py diff --git a/latentsync/whisper/whisper/transcribe.py b/soundimage/whisper/whisper/transcribe.py similarity index 100% rename from latentsync/whisper/whisper/transcribe.py rename to soundimage/whisper/whisper/transcribe.py diff --git a/latentsync/whisper/whisper/utils.py b/soundimage/whisper/whisper/utils.py similarity index 100% rename from latentsync/whisper/whisper/utils.py rename to soundimage/whisper/whisper/utils.py diff --git a/tools/count_videos_time.py b/tools/count_videos_time.py index cb37842d23c13810a8a2905d20672c289b353ffb..6a04f610ec1ef7666383952bc9177a2016d03277 100644 --- a/tools/count_videos_time.py +++ b/tools/count_videos_time.py @@ -13,7 +13,7 @@ # limitations under the License. import matplotlib.pyplot as plt -from latentsync.utils.util import count_video_time, gather_video_paths_recursively +from soundimage.utils.util import count_video_time, gather_video_paths_recursively from tqdm import tqdm diff --git a/tools/write_fileslist.py b/tools/write_fileslist.py index 2132a2468a0a0f28cc841a408546547524e99ee1..849a34f74f07892a26e4537288b59dea5bffe81c 100644 --- a/tools/write_fileslist.py +++ b/tools/write_fileslist.py @@ -13,7 +13,7 @@ # limitations under the License. from tqdm import tqdm -from latentsync.utils.util import gather_video_paths_recursively +from soundimage.utils.util import gather_video_paths_recursively def write_fileslist(fileslist_path): diff --git a/train_syncnet.sh b/train_syncnet.sh deleted file mode 100644 index 7a99ab06efb7dbcf0ebd8d518fb284799b4d8ee7..0000000000000000000000000000000000000000 --- a/train_syncnet.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash - -torchrun --nnodes=1 --nproc_per_node=1 --master_port=25678 -m scripts.train_syncnet \ - --config_path "configs/syncnet/syncnet_16_pixel.yaml" diff --git a/train_unet.sh b/train_unet.sh deleted file mode 100644 index 28f63442c599064c5083f38287c6bcdfda660fc2..0000000000000000000000000000000000000000 --- a/train_unet.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash - -torchrun --nnodes=1 --nproc_per_node=1 --master_port=25678 -m scripts.train_unet \ - --unet_config_path "configs/unet/first_stage.yaml"