HoneyTian commited on
Commit
54cd20f
·
1 Parent(s): fe513a7
examples/nx_mpnet/run.sh CHANGED
@@ -3,7 +3,7 @@
3
  : <<'END'
4
 
5
 
6
- sh run.sh --stage 1 --stop_stage 2 --system_version centos --file_folder_name file_dir --final_model_name nx-mpnet-aishell-20250224 \
7
  --noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise" \
8
  --speech_dir "/data/tianxing/HuggingDatasets/aishell/data_aishell/wav/train" \
9
  --max_epochs 100
 
3
  : <<'END'
4
 
5
 
6
+ sh run.sh --stage 2 --stop_stage 2 --system_version centos --file_folder_name file_dir --final_model_name nx-mpnet-aishell-20250224 \
7
  --noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise" \
8
  --speech_dir "/data/tianxing/HuggingDatasets/aishell/data_aishell/wav/train" \
9
  --max_epochs 100
toolbox/torchaudio/models/nx_mpnet/configuration_nx_mpnet.py CHANGED
@@ -11,8 +11,8 @@ class NXMPNetConfig(PretrainedConfig):
11
  sample_rate: int = 8000,
12
  segment_size: int = 16000,
13
  n_fft: int = 512,
14
- win_length: int = 200,
15
- hop_length: int = 80,
16
 
17
  dense_num_blocks: int = 4,
18
  dense_hidden_size: int = 64,
@@ -29,8 +29,8 @@ class NXMPNetConfig(PretrainedConfig):
29
  tsfm_dropout_rate: float = 0.0,
30
  tsfm_max_time_relative_position: int = 1024,
31
  tsfm_max_freq_relative_position: int = 128,
32
- tsfm_chunk_size: int = 4,
33
- tsfm_num_left_chunks: int = 128,
34
  tsfm_num_right_chunks: int = 2,
35
 
36
  discriminator_dim: int = 32,
@@ -51,8 +51,8 @@ class NXMPNetConfig(PretrainedConfig):
51
  self.sample_rate = sample_rate
52
  self.segment_size = segment_size
53
  self.n_fft = n_fft
54
- self.win_length = win_length
55
- self.hop_length = hop_length
56
 
57
  self.dense_num_blocks = dense_num_blocks
58
  self.dense_hidden_size = dense_hidden_size
 
11
  sample_rate: int = 8000,
12
  segment_size: int = 16000,
13
  n_fft: int = 512,
14
+ win_size: int = 200,
15
+ hop_size: int = 80,
16
 
17
  dense_num_blocks: int = 4,
18
  dense_hidden_size: int = 64,
 
29
  tsfm_dropout_rate: float = 0.0,
30
  tsfm_max_time_relative_position: int = 1024,
31
  tsfm_max_freq_relative_position: int = 128,
32
+ tsfm_chunk_size: int = 1,
33
+ tsfm_num_left_chunks: int = 64,
34
  tsfm_num_right_chunks: int = 2,
35
 
36
  discriminator_dim: int = 32,
 
51
  self.sample_rate = sample_rate
52
  self.segment_size = segment_size
53
  self.n_fft = n_fft
54
+ self.win_size = win_size
55
+ self.hop_size = hop_size
56
 
57
  self.dense_num_blocks = dense_num_blocks
58
  self.dense_hidden_size = dense_hidden_size