voice-clone-app / src /data /download.py
hengjie yang
Initial commit: Voice Clone App with Gradio interface
9580089
import os
import requests
import tarfile
import zipfile
import shutil
from pathlib import Path
from tqdm import tqdm
import subprocess
def download_file(url: str, target_path: str):
"""使用requests下载文件,支持进度条"""
response = requests.get(url, stream=True)
total_size = int(response.headers.get('content-length', 0))
with open(target_path, 'wb') as file, tqdm(
desc="Downloading",
total=total_size,
unit='iB',
unit_scale=True,
unit_divisor=1024,
) as pbar:
for data in response.iter_content(chunk_size=1024):
size = file.write(data)
pbar.update(size)
def download_vctk(target_dir: str = "data/raw"):
"""下载VCTK数据集"""
url = "https://datashare.ed.ac.uk/bitstream/handle/10283/3443/VCTK-Corpus-0.92.zip"
target_dir = Path(target_dir)
zip_path = target_dir / "vctk.zip"
# 创建目标目录
os.makedirs(target_dir, exist_ok=True)
# 下载数据集
if not zip_path.exists():
print("Downloading VCTK dataset...")
download_file(url, str(zip_path))
# 解压数据集
if not (target_dir / "VCTK-Corpus").exists():
print("\nExtracting VCTK dataset...")
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(target_dir)
# 整理文件结构
vctk_dir = target_dir / "VCTK-Corpus" / "wav48"
for speaker_dir in tqdm(os.listdir(vctk_dir), desc="Organizing files"):
if os.path.isdir(vctk_dir / speaker_dir):
# 移动音频文件
src_dir = vctk_dir / speaker_dir
dst_dir = target_dir / speaker_dir
if not dst_dir.exists():
shutil.copytree(src_dir, dst_dir)
# 清理下载文件
if zip_path.exists():
os.remove(zip_path)
if (target_dir / "VCTK-Corpus").exists():
shutil.rmtree(target_dir / "VCTK-Corpus")
def download_librispeech(target_dir: str = "data/raw", subset: str = "dev-clean"):
"""下载LibriSpeech数据集的一个子集"""
url = f"https://www.openslr.org/resources/12/{subset}.tar.gz"
target_dir = Path(target_dir)
tar_path = target_dir / f"librispeech_{subset}.tar.gz"
# 创建目标目录
os.makedirs(target_dir, exist_ok=True)
# 下载数据集
if not tar_path.exists():
print(f"Downloading LibriSpeech {subset} dataset...")
download_file(url, str(tar_path))
# 解压数据集
if not (target_dir / "LibriSpeech").exists():
print(f"\nExtracting LibriSpeech {subset} dataset...")
with tarfile.open(tar_path, 'r:gz') as tar:
tar.extractall(target_dir)
# 整理文件结构
libri_dir = target_dir / "LibriSpeech" / subset
for speaker_dir in tqdm(os.listdir(libri_dir), desc="Organizing files"):
if os.path.isdir(libri_dir / speaker_dir):
# 移动音频文件
src_dir = libri_dir / speaker_dir
dst_dir = target_dir / f"libri_{speaker_dir}"
if not dst_dir.exists():
shutil.copytree(src_dir, dst_dir)
# 清理下载文件
if tar_path.exists():
os.remove(tar_path)
if (target_dir / "LibriSpeech").exists():
shutil.rmtree(target_dir / "LibriSpeech")
def download_aishell3(target_dir: str = "data/raw"):
"""下载AISHELL-3数据集(需要OpenSLR账号)"""
print("AISHELL-3 dataset needs to be downloaded manually from:")
print("https://www.openslr.org/93/")
print(f"Please download and extract it to {target_dir}")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Download speech datasets")
parser.add_argument("--dataset", type=str, choices=["vctk", "librispeech", "aishell3"],
required=True, help="Dataset to download")
parser.add_argument("--target_dir", type=str, default="data/raw",
help="Directory to save the dataset")
args = parser.parse_args()
if args.dataset == "vctk":
download_vctk(args.target_dir)
elif args.dataset == "librispeech":
download_librispeech(args.target_dir)
else:
download_aishell3(args.target_dir)