积极的屁孩 commited on
Commit
4e8c834
·
1 Parent(s): d202deb
Files changed (1) hide show
  1. app.py +125 -107
app.py CHANGED
@@ -9,19 +9,6 @@ import shutil
9
  from huggingface_hub import snapshot_download, hf_hub_download
10
  import requests
11
  import subprocess
12
- import json
13
-
14
- # 获取当前工作目录的绝对路径
15
- BASE_DIR = os.path.abspath(os.getcwd())
16
-
17
- # 修复相对路径为绝对路径的函数
18
- def fix_path(path):
19
- if path is None:
20
- return None
21
- # 如果是相对路径(以./开头),转换为绝对路径
22
- if isinstance(path, str) and path.startswith('./'):
23
- return os.path.join(BASE_DIR, path[2:])
24
- return path
25
 
26
  # 检查并安装必要的依赖
27
  def install_dependencies():
@@ -113,33 +100,6 @@ def download_amphion_code():
113
  if not os.path.exists(local_path):
114
  with open(local_path, "w") as f:
115
  f.write("# Placeholder file\n")
116
-
117
- # 下载特殊文件:hubert_large_l18_mean_std.npz
118
- try:
119
- # 确保目录存在
120
- os.makedirs(os.path.join(os.getcwd(), "models/vc/vevo/config"), exist_ok=True)
121
-
122
- # 尝试从HuggingFace下载
123
- try:
124
- hf_hub_download(
125
- repo_id=REPO_ID,
126
- filename="tokenizer/vq8192/hubert_large_l18_mean_std.npz",
127
- cache_dir=CACHE_DIR,
128
- local_dir=os.path.join(os.getcwd(), "models/vc/vevo/config"),
129
- local_dir_use_symlinks=False
130
- )
131
- print("成功下载: hubert_large_l18_mean_std.npz")
132
- except Exception as e:
133
- print(f"无法从HuggingFace下载hubert_large_l18_mean_std.npz: {str(e)}")
134
- # 尝试从GitHub下载
135
- hubert_url = "https://huggingface.co/amphion/Vevo/resolve/main/tokenizer/vq8192/hubert_large_l18_mean_std.npz"
136
- response = requests.get(hubert_url)
137
- if response.status_code == 200:
138
- with open(os.path.join(os.getcwd(), "models/vc/vevo/config/hubert_large_l18_mean_std.npz"), "wb") as f:
139
- f.write(response.content)
140
- print("成功从HuggingFace下载: hubert_large_l18_mean_std.npz")
141
- except Exception as e:
142
- print(f"下载hubert_large_l18_mean_std.npz时出错: {str(e)}")
143
 
144
  # 先下载必要的代码文件
145
  download_amphion_code()
@@ -199,10 +159,10 @@ class VevoGradioApp:
199
  self.pipelines = {}
200
  # 配置文件路径
201
  self.config_paths = {
202
- "vq32tovq8192": fix_path("./models/vc/vevo/config/Vq32ToVq8192.json"),
203
- "vq8192tomels": fix_path("./models/vc/vevo/config/Vq8192ToMels.json"),
204
- "phonetovq8192": fix_path("./models/vc/vevo/config/PhoneToVq8192.json"),
205
- "vocoder": fix_path("./models/vc/vevo/config/Vocoder.json")
206
  }
207
 
208
  # 确保配置文件存在
@@ -210,8 +170,7 @@ class VevoGradioApp:
210
 
211
  def download_configs(self):
212
  """下载必要的配置文件"""
213
- config_dir = fix_path("./models/vc/vevo/config")
214
- os.makedirs(config_dir, exist_ok=True)
215
  config_files = {
216
  "Vq32ToVq8192.json": "https://raw.githubusercontent.com/open-mmlab/Amphion/main/models/vc/vevo/config/Vq32ToVq8192.json",
217
  "Vq8192ToMels.json": "https://raw.githubusercontent.com/open-mmlab/Amphion/main/models/vc/vevo/config/Vq8192ToMels.json",
@@ -219,8 +178,13 @@ class VevoGradioApp:
219
  "Vocoder.json": "https://raw.githubusercontent.com/open-mmlab/Amphion/main/models/vc/vevo/config/Vocoder.json"
220
  }
221
 
 
 
 
 
 
222
  for filename, url in config_files.items():
223
- target_path = os.path.join(config_dir, filename)
224
  if not os.path.exists(target_path):
225
  try:
226
  response = requests.get(url)
@@ -238,36 +202,81 @@ class VevoGradioApp:
238
  with open(target_path, 'w') as f:
239
  f.write('{}')
240
  print(f"无法下载配置文件 {filename},已创建占位符")
241
-
242
- # 修改配置文件中的相对路径
243
- self.update_config_paths()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
 
245
- def update_config_paths(self):
246
- """修改配置文件中的相对路径"""
247
  try:
248
- # 修改Vq8192ToMels.json文件中的路径引用
249
- vq8192_path = self.config_paths["vq8192tomels"]
250
- if os.path.exists(vq8192_path):
251
- with open(vq8192_path, 'r') as f:
252
- config = json.load(f)
253
-
254
- # 修改hubert_large_l18_mean_std.npz路径
255
- if "model" in config and "representation_stat_mean_var_path" in config["model"]:
256
- # 替换为绝对路径
257
- hubert_stat_path = fix_path("./models/vc/vevo/config/hubert_large_l18_mean_std.npz")
258
- config["model"]["representation_stat_mean_var_path"] = hubert_stat_path
259
 
260
- # 保存修改后的配置
261
- with open(vq8192_path, 'w') as f:
262
- json.dump(config, f, indent=2)
263
- print("成功更新配置文件中的路径")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
  except Exception as e:
265
- print(f"更新配置文件路径时出错: {str(e)}")
266
 
267
  def init_voice_conversion_pipeline(self):
268
  """初始化语音转换管道"""
269
  if "voice" not in self.pipelines:
270
  try:
 
 
 
 
 
 
 
 
271
  # 内容标记器
272
  local_dir = snapshot_download(
273
  repo_id=REPO_ID,
@@ -315,31 +324,43 @@ class VevoGradioApp:
315
  )
316
  vocoder_ckpt_path = os.path.join(local_dir, "acoustic_modeling/Vocoder")
317
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
  # 创建推理管道
319
  self.pipelines["voice"] = VevoInferencePipeline(
320
  content_tokenizer_ckpt_path=content_tokenizer_ckpt_path,
321
  content_style_tokenizer_ckpt_path=content_style_tokenizer_ckpt_path,
322
- ar_cfg_path=self.config_paths["vq32tovq8192"],
323
  ar_ckpt_path=ar_ckpt_path,
324
- fmt_cfg_path=self.config_paths["vq8192tomels"],
325
  fmt_ckpt_path=fmt_ckpt_path,
326
- vocoder_cfg_path=self.config_paths["vocoder"],
327
  vocoder_ckpt_path=vocoder_ckpt_path,
328
  device=self.device,
329
  )
330
  except Exception as e:
331
  print(f"初始化语音转换管道时出错: {str(e)}")
332
  # 创建一个占位符管道
333
- try:
334
- # 尝试提供必要的配置文件
335
- self.pipelines["voice"] = VevoInferencePipeline(
336
- device=self.device,
337
- fmt_cfg_path=self.config_paths["vq8192tomels"],
338
- vocoder_cfg_path=self.config_paths["vocoder"],
339
- )
340
- except:
341
- # 如果还是失败,创建最简单的管道
342
- self.pipelines["voice"] = VevoInferencePipeline(device=self.device)
343
 
344
  return self.pipelines["voice"]
345
 
@@ -347,6 +368,14 @@ class VevoGradioApp:
347
  """初始化音色转换管道"""
348
  if "timbre" not in self.pipelines:
349
  try:
 
 
 
 
 
 
 
 
350
  # 内容-风格标记器
351
  local_dir = snapshot_download(
352
  repo_id=REPO_ID,
@@ -377,25 +406,16 @@ class VevoGradioApp:
377
  # 创建推理管道
378
  self.pipelines["timbre"] = VevoInferencePipeline(
379
  content_style_tokenizer_ckpt_path=tokenizer_ckpt_path,
380
- fmt_cfg_path=self.config_paths["vq8192tomels"],
381
  fmt_ckpt_path=fmt_ckpt_path,
382
- vocoder_cfg_path=self.config_paths["vocoder"],
383
  vocoder_ckpt_path=vocoder_ckpt_path,
384
  device=self.device,
385
  )
386
  except Exception as e:
387
  print(f"初始化音色转换管道时出错: {str(e)}")
388
  # 创建一个占位符管道
389
- try:
390
- # 尝试提供必要的配置文件
391
- self.pipelines["timbre"] = VevoInferencePipeline(
392
- device=self.device,
393
- fmt_cfg_path=self.config_paths["vq8192tomels"],
394
- vocoder_cfg_path=self.config_paths["vocoder"],
395
- )
396
- except:
397
- # 如果还是失败,创建最简单的管道
398
- self.pipelines["timbre"] = VevoInferencePipeline(device=self.device)
399
 
400
  return self.pipelines["timbre"]
401
 
@@ -403,6 +423,14 @@ class VevoGradioApp:
403
  """初始化文本转语音管道"""
404
  if "tts" not in self.pipelines:
405
  try:
 
 
 
 
 
 
 
 
406
  # 内容-风格标记器
407
  local_dir = snapshot_download(
408
  repo_id=REPO_ID,
@@ -442,28 +470,18 @@ class VevoGradioApp:
442
  # 创建推理管道
443
  self.pipelines["tts"] = VevoInferencePipeline(
444
  content_style_tokenizer_ckpt_path=content_style_tokenizer_ckpt_path,
445
- ar_cfg_path=self.config_paths["phonetovq8192"],
446
  ar_ckpt_path=ar_ckpt_path,
447
- fmt_cfg_path=self.config_paths["vq8192tomels"],
448
  fmt_ckpt_path=fmt_ckpt_path,
449
- vocoder_cfg_path=self.config_paths["vocoder"],
450
  vocoder_ckpt_path=vocoder_ckpt_path,
451
  device=self.device,
452
  )
453
  except Exception as e:
454
  print(f"初始化TTS管道时出错: {str(e)}")
455
  # 创建一个占位符管道
456
- try:
457
- # 尝试提供必要的配置文件
458
- self.pipelines["tts"] = VevoInferencePipeline(
459
- device=self.device,
460
- fmt_cfg_path=self.config_paths["vq8192tomels"],
461
- vocoder_cfg_path=self.config_paths["vocoder"],
462
- ar_cfg_path=self.config_paths["phonetovq8192"],
463
- )
464
- except:
465
- # 如果还是失败,创建最简单的管道
466
- self.pipelines["tts"] = VevoInferencePipeline(device=self.device)
467
 
468
  return self.pipelines["tts"]
469
 
 
9
  from huggingface_hub import snapshot_download, hf_hub_download
10
  import requests
11
  import subprocess
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  # 检查并安装必要的依赖
14
  def install_dependencies():
 
100
  if not os.path.exists(local_path):
101
  with open(local_path, "w") as f:
102
  f.write("# Placeholder file\n")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
  # 先下载必要的代码文件
105
  download_amphion_code()
 
159
  self.pipelines = {}
160
  # 配置文件路径
161
  self.config_paths = {
162
+ "vq32tovq8192": "./models/vc/vevo/config/Vq32ToVq8192.json",
163
+ "vq8192tomels": "./models/vc/vevo/config/Vq8192ToMels.json",
164
+ "phonetovq8192": "./models/vc/vevo/config/PhoneToVq8192.json",
165
+ "vocoder": "./models/vc/vevo/config/Vocoder.json"
166
  }
167
 
168
  # 确保配置文件存在
 
170
 
171
  def download_configs(self):
172
  """下载必要的配置文件"""
173
+ os.makedirs("./models/vc/vevo/config", exist_ok=True)
 
174
  config_files = {
175
  "Vq32ToVq8192.json": "https://raw.githubusercontent.com/open-mmlab/Amphion/main/models/vc/vevo/config/Vq32ToVq8192.json",
176
  "Vq8192ToMels.json": "https://raw.githubusercontent.com/open-mmlab/Amphion/main/models/vc/vevo/config/Vq8192ToMels.json",
 
178
  "Vocoder.json": "https://raw.githubusercontent.com/open-mmlab/Amphion/main/models/vc/vevo/config/Vocoder.json"
179
  }
180
 
181
+ # 额外下载必要的统计文件
182
+ stat_files = {
183
+ "hubert_large_l18_mean_std.npz": "https://huggingface.co/amphion/Vevo/resolve/main/tokenizer/vq32/hubert_large_l18_mean_std.npz"
184
+ }
185
+
186
  for filename, url in config_files.items():
187
+ target_path = f"./models/vc/vevo/config/{filename}"
188
  if not os.path.exists(target_path):
189
  try:
190
  response = requests.get(url)
 
202
  with open(target_path, 'w') as f:
203
  f.write('{}')
204
  print(f"无法下载配置文件 {filename},已创建占位符")
205
+
206
+ # 下载统计文件
207
+ for filename, url in stat_files.items():
208
+ # 同时支持两个位置:配置目录和标准位置
209
+ target_paths = [
210
+ f"./models/vc/vevo/config/{filename}", # 配置文件夹中
211
+ f"./tokenizer/vq32/{filename}" # HuggingFace仓库标准位置
212
+ ]
213
+
214
+ # 确保目录存在
215
+ for target_path in target_paths:
216
+ os.makedirs(os.path.dirname(target_path), exist_ok=True)
217
+
218
+ if not os.path.exists(target_path):
219
+ try:
220
+ response = requests.get(url)
221
+ if response.status_code == 200:
222
+ with open(target_path, "wb") as f:
223
+ f.write(response.content)
224
+ print(f"成功下载统计文件到: {target_path}")
225
+ else:
226
+ print(f"无法下载统计文件 {filename} 到 {target_path}, 状态码: {response.status_code}")
227
+ except Exception as e:
228
+ print(f"下载统计文件 {filename} 到 {target_path} 时出错: {str(e)}")
229
+
230
+ # 修复配置文件中的路径
231
+ self.fix_config_paths()
232
 
233
+ def fix_config_paths(self):
234
+ """修复配置文件中的相对路径"""
235
  try:
236
+ for config_name, config_path in self.config_paths.items():
237
+ if os.path.exists(config_path):
238
+ with open(config_path, 'r') as f:
239
+ config_data = f.read()
240
+
241
+ # 获取当前工作目录的绝对路径
242
+ base_dir = os.path.abspath(os.getcwd())
 
 
 
 
243
 
244
+ # 替换配置中的相对路径
245
+ if 'representation_stat_mean_var_path' in config_data:
246
+ # 正确的统计文件路径
247
+ stat_file_path = f"{base_dir}/models/vc/vevo/config/hubert_large_l18_mean_std.npz"
248
+
249
+ # 替换所有可能的路径格式
250
+ replacements = [
251
+ ('"representation_stat_mean_var_path": "./models/vc/vevo/config/hubert_large_l18_mean_std.npz"', f'"representation_stat_mean_var_path": "{stat_file_path}"'),
252
+ ('"representation_stat_mean_var_path": "models/vc/vevo/config/hubert_large_l18_mean_std.npz"', f'"representation_stat_mean_var_path": "{stat_file_path}"'),
253
+ ('"representation_stat_mean_var_path": "./tokenizer/vq32/hubert_large_l18_mean_std.npz"', f'"representation_stat_mean_var_path": "{stat_file_path}"'),
254
+ ('"representation_stat_mean_var_path": "tokenizer/vq32/hubert_large_l18_mean_std.npz"', f'"representation_stat_mean_var_path": "{stat_file_path}"'),
255
+ ]
256
+
257
+ for old, new in replacements:
258
+ config_data = config_data.replace(old, new)
259
+
260
+ # 保存修复后的配置
261
+ with open(config_path, 'w') as f:
262
+ f.write(config_data)
263
+
264
+ print(f"已修复配置文件路径: {config_path}")
265
  except Exception as e:
266
+ print(f"修复配置文件路径时出错: {str(e)}")
267
 
268
  def init_voice_conversion_pipeline(self):
269
  """初始化语音转换管道"""
270
  if "voice" not in self.pipelines:
271
  try:
272
+ # 确保配置文件路径是绝对路径
273
+ absolute_config_paths = {}
274
+ for key, path in self.config_paths.items():
275
+ if path and not os.path.isabs(path):
276
+ absolute_config_paths[key] = os.path.abspath(path)
277
+ else:
278
+ absolute_config_paths[key] = path
279
+
280
  # 内容标记器
281
  local_dir = snapshot_download(
282
  repo_id=REPO_ID,
 
324
  )
325
  vocoder_ckpt_path = os.path.join(local_dir, "acoustic_modeling/Vocoder")
326
 
327
+ # 确保统计文件存在
328
+ possible_stat_file_paths = [
329
+ os.path.join(os.getcwd(), "models/vc/vevo/config/hubert_large_l18_mean_std.npz"),
330
+ os.path.join(os.getcwd(), "tokenizer/vq32/hubert_large_l18_mean_std.npz")
331
+ ]
332
+
333
+ # 检查是否有任一路径存在
334
+ stat_file_exists = any(os.path.exists(path) for path in possible_stat_file_paths)
335
+
336
+ if not stat_file_exists:
337
+ print(f"警告: 找不到统计文件,将尝试创建空文件")
338
+ try:
339
+ import numpy as np
340
+ # 在两个位置都创建一个简单的统计文件
341
+ for stat_path in possible_stat_file_paths:
342
+ os.makedirs(os.path.dirname(stat_path), exist_ok=True)
343
+ np.savez(stat_path, mean=np.zeros(1024), std=np.ones(1024))
344
+ print(f"已创建占位符统计文件: {stat_path}")
345
+ except Exception as e:
346
+ print(f"创建统计文件时出错: {str(e)}")
347
+
348
  # 创建推理管道
349
  self.pipelines["voice"] = VevoInferencePipeline(
350
  content_tokenizer_ckpt_path=content_tokenizer_ckpt_path,
351
  content_style_tokenizer_ckpt_path=content_style_tokenizer_ckpt_path,
352
+ ar_cfg_path=absolute_config_paths["vq32tovq8192"],
353
  ar_ckpt_path=ar_ckpt_path,
354
+ fmt_cfg_path=absolute_config_paths["vq8192tomels"],
355
  fmt_ckpt_path=fmt_ckpt_path,
356
+ vocoder_cfg_path=absolute_config_paths["vocoder"],
357
  vocoder_ckpt_path=vocoder_ckpt_path,
358
  device=self.device,
359
  )
360
  except Exception as e:
361
  print(f"初始化语音转换管道时出错: {str(e)}")
362
  # 创建一个占位符管道
363
+ self.pipelines["voice"] = VevoInferencePipeline(device=self.device)
 
 
 
 
 
 
 
 
 
364
 
365
  return self.pipelines["voice"]
366
 
 
368
  """初始化音色转换管道"""
369
  if "timbre" not in self.pipelines:
370
  try:
371
+ # 确保配置文件路径是绝对路径
372
+ absolute_config_paths = {}
373
+ for key, path in self.config_paths.items():
374
+ if path and not os.path.isabs(path):
375
+ absolute_config_paths[key] = os.path.abspath(path)
376
+ else:
377
+ absolute_config_paths[key] = path
378
+
379
  # 内容-风格标记器
380
  local_dir = snapshot_download(
381
  repo_id=REPO_ID,
 
406
  # 创建推理管道
407
  self.pipelines["timbre"] = VevoInferencePipeline(
408
  content_style_tokenizer_ckpt_path=tokenizer_ckpt_path,
409
+ fmt_cfg_path=absolute_config_paths["vq8192tomels"],
410
  fmt_ckpt_path=fmt_ckpt_path,
411
+ vocoder_cfg_path=absolute_config_paths["vocoder"],
412
  vocoder_ckpt_path=vocoder_ckpt_path,
413
  device=self.device,
414
  )
415
  except Exception as e:
416
  print(f"初始化音色转换管道时出错: {str(e)}")
417
  # 创建一个占位符管道
418
+ self.pipelines["timbre"] = VevoInferencePipeline(device=self.device)
 
 
 
 
 
 
 
 
 
419
 
420
  return self.pipelines["timbre"]
421
 
 
423
  """初始化文本转语音管道"""
424
  if "tts" not in self.pipelines:
425
  try:
426
+ # 确保配置文件路径是绝对路径
427
+ absolute_config_paths = {}
428
+ for key, path in self.config_paths.items():
429
+ if path and not os.path.isabs(path):
430
+ absolute_config_paths[key] = os.path.abspath(path)
431
+ else:
432
+ absolute_config_paths[key] = path
433
+
434
  # 内容-风格标记器
435
  local_dir = snapshot_download(
436
  repo_id=REPO_ID,
 
470
  # 创建推理管道
471
  self.pipelines["tts"] = VevoInferencePipeline(
472
  content_style_tokenizer_ckpt_path=content_style_tokenizer_ckpt_path,
473
+ ar_cfg_path=absolute_config_paths["phonetovq8192"],
474
  ar_ckpt_path=ar_ckpt_path,
475
+ fmt_cfg_path=absolute_config_paths["vq8192tomels"],
476
  fmt_ckpt_path=fmt_ckpt_path,
477
+ vocoder_cfg_path=absolute_config_paths["vocoder"],
478
  vocoder_ckpt_path=vocoder_ckpt_path,
479
  device=self.device,
480
  )
481
  except Exception as e:
482
  print(f"初始化TTS管道时出错: {str(e)}")
483
  # 创建一个占位符管道
484
+ self.pipelines["tts"] = VevoInferencePipeline(device=self.device)
 
 
 
 
 
 
 
 
 
 
485
 
486
  return self.pipelines["tts"]
487