HoneyTian commited on
Commit
c6c50f4
·
1 Parent(s): 31b64ca
examples/nx_mpnet/yaml/config.yaml CHANGED
@@ -19,8 +19,8 @@ tsfm_hidden_size: 64
19
  tsfm_attention_heads: 4
20
  tsfm_num_blocks: 4
21
  tsfm_dropout_rate: 0.0
22
- tsfm_max_time_relative_position: 1024
23
- tsfm_max_freq_relative_position: 128
24
  tsfm_chunk_size: 1
25
  tsfm_num_left_chunks: 64
26
  tsfm_num_right_chunks: 2
 
19
  tsfm_attention_heads: 4
20
  tsfm_num_blocks: 4
21
  tsfm_dropout_rate: 0.0
22
+ tsfm_max_time_relative_position: 2048
23
+ tsfm_max_freq_relative_position: 256
24
  tsfm_chunk_size: 1
25
  tsfm_num_left_chunks: 64
26
  tsfm_num_right_chunks: 2
toolbox/torchaudio/models/nx_mpnet/configuration_nx_mpnet.py CHANGED
@@ -27,8 +27,8 @@ class NXMPNetConfig(PretrainedConfig):
27
  tsfm_attention_heads: int = 4,
28
  tsfm_num_blocks: int = 4,
29
  tsfm_dropout_rate: float = 0.0,
30
- tsfm_max_time_relative_position: int = 1024,
31
- tsfm_max_freq_relative_position: int = 128,
32
  tsfm_chunk_size: int = 1,
33
  tsfm_num_left_chunks: int = 64,
34
  tsfm_num_right_chunks: int = 2,
 
27
  tsfm_attention_heads: int = 4,
28
  tsfm_num_blocks: int = 4,
29
  tsfm_dropout_rate: float = 0.0,
30
+ tsfm_max_time_relative_position: int = 2048,
31
+ tsfm_max_freq_relative_position: int = 256,
32
  tsfm_chunk_size: int = 1,
33
  tsfm_num_left_chunks: int = 64,
34
  tsfm_num_right_chunks: int = 2,
toolbox/torchaudio/models/nx_mpnet/transformers/transformers.py CHANGED
@@ -245,8 +245,8 @@ class TSTransformerBlock(nn.Module):
245
  input_dim: int,
246
  dropout_rate: float = 0.1,
247
  n_heads: int = 4,
248
- max_time_relative_position: int = 1024,
249
- max_freq_relative_position: int = 128,
250
  ):
251
  super(TSTransformerBlock, self).__init__()
252
  self.time_transformer = TransformerBlock(input_dim, dropout_rate, n_heads, max_time_relative_position)
@@ -285,8 +285,8 @@ class TSTransformerEncoder(nn.Module):
285
  attention_heads: int = 4,
286
  num_blocks: int = 6,
287
  dropout_rate: float = 0.1,
288
- max_time_relative_position: int = 1024,
289
- max_freq_relative_position: int = 128,
290
  chunk_size: int = 1,
291
  num_left_chunks: int = 128,
292
  num_right_chunks: int = 2,
 
245
  input_dim: int,
246
  dropout_rate: float = 0.1,
247
  n_heads: int = 4,
248
+ max_time_relative_position: int = 2048,
249
+ max_freq_relative_position: int = 256,
250
  ):
251
  super(TSTransformerBlock, self).__init__()
252
  self.time_transformer = TransformerBlock(input_dim, dropout_rate, n_heads, max_time_relative_position)
 
285
  attention_heads: int = 4,
286
  num_blocks: int = 6,
287
  dropout_rate: float = 0.1,
288
+ max_time_relative_position: int = 2048,
289
+ max_freq_relative_position: int = 256,
290
  chunk_size: int = 1,
291
  num_left_chunks: int = 128,
292
  num_right_chunks: int = 2,
toolbox/torchaudio/models/nx_mpnet/yaml/config.yaml CHANGED
@@ -19,8 +19,8 @@ tsfm_hidden_size: 64
19
  tsfm_attention_heads: 4
20
  tsfm_num_blocks: 4
21
  tsfm_dropout_rate: 0.0
22
- tsfm_max_time_relative_position: 1024
23
- tsfm_max_freq_relative_position: 128
24
  tsfm_chunk_size: 1
25
  tsfm_num_left_chunks: 64
26
  tsfm_num_right_chunks: 2
 
19
  tsfm_attention_heads: 4
20
  tsfm_num_blocks: 4
21
  tsfm_dropout_rate: 0.0
22
+ tsfm_max_time_relative_position: 2048
23
+ tsfm_max_freq_relative_position: 256
24
  tsfm_chunk_size: 1
25
  tsfm_num_left_chunks: 64
26
  tsfm_num_right_chunks: 2