buchi-stdesign commited on
Commit
041ba86
·
verified ·
1 Parent(s): 61f8b10

Update src/sbv2/synthesizer_trn.py

Browse files
Files changed (1) hide show
  1. src/sbv2/synthesizer_trn.py +70 -109
src/sbv2/synthesizer_trn.py CHANGED
@@ -1,109 +1,70 @@
1
- # 完全新規構成 synthesize_trn.py
2
- # (config.jsonにstrict対応)
3
-
4
- import torch
5
- import torch.nn as nn
6
-
7
- from src.sbv2.generator import Generator
8
- from src.sbv2.posterior_encoder import PosteriorEncoder
9
- from src.sbv2.flow import ResidualCouplingBlock
10
- from src.sbv2.flow import Flip
11
- from src.sbv2.duration_predictor import DurationPredictor
12
- from src.sbv2.stochastic_duration_predictor import StochasticDurationPredictor
13
-
14
- class SynthesizerTrn(nn.Module):
15
- def __init__(self,
16
- n_vocab,
17
- spec_channels,
18
- inter_channels,
19
- hidden_channels,
20
- filter_channels,
21
- n_heads,
22
- n_layers,
23
- kernel_size,
24
- p_dropout,
25
- resblock,
26
- resblock_kernel_sizes,
27
- resblock_dilation_sizes,
28
- upsample_rates,
29
- upsample_initial_channel,
30
- upsample_kernel_sizes,
31
- segment_size,
32
- gin_channels,
33
- out_channels,
34
- dec_kernel_size,
35
- enc_channels,
36
- enc_out_channels,
37
- enc_kernel_size,
38
- enc_dilation_rate,
39
- enc_n_layers,
40
- flow_hidden_channels,
41
- flow_kernel_size,
42
- flow_n_layers,
43
- flow_n_flows,
44
- sdp_hidden_channels,
45
- sdp_kernel_size,
46
- sdp_n_layers,
47
- sdp_dropout,
48
- sampling_rate,
49
- filter_length,
50
- hop_length,
51
- win_length):
52
- super().__init__()
53
-
54
- self.n_vocab = n_vocab
55
- self.spec_channels = spec_channels
56
- self.inter_channels = inter_channels
57
- self.hidden_channels = hidden_channels
58
- self.filter_channels = filter_channels
59
- self.n_heads = n_heads
60
- self.n_layers = n_layers
61
- self.kernel_size = kernel_size
62
- self.p_dropout = p_dropout
63
- self.resblock = resblock
64
- self.resblock_kernel_sizes = resblock_kernel_sizes
65
- self.resblock_dilation_sizes = resblock_dilation_sizes
66
- self.upsample_rates = upsample_rates
67
- self.upsample_initial_channel = upsample_initial_channel
68
- self.upsample_kernel_sizes = upsample_kernel_sizes
69
- self.segment_size = segment_size
70
- self.gin_channels = gin_channels
71
- self.out_channels = out_channels
72
- self.dec_kernel_size = dec_kernel_size
73
- self.enc_channels = enc_channels
74
- self.enc_out_channels = enc_out_channels
75
- self.enc_kernel_size = enc_kernel_size
76
- self.enc_dilation_rate = enc_dilation_rate
77
- self.enc_n_layers = enc_n_layers
78
- self.flow_hidden_channels = flow_hidden_channels
79
- self.flow_kernel_size = flow_kernel_size
80
- self.flow_n_layers = flow_n_layers
81
- self.flow_n_flows = flow_n_flows
82
- self.sdp_hidden_channels = sdp_hidden_channels
83
- self.sdp_kernel_size = sdp_kernel_size
84
- self.sdp_n_layers = sdp_n_layers
85
- self.sdp_dropout = sdp_dropout
86
- self.sampling_rate = sampling_rate
87
- self.filter_length = filter_length
88
- self.hop_length = hop_length
89
- self.win_length = win_length
90
-
91
- # ネットワークモジュール
92
- self.enc_p = PosteriorEncoder(
93
- spec_channels, inter_channels, hidden_channels,
94
- kernel_size, enc_dilation_rate, int(enc_n_layers))
95
- self.decoder = Generator(
96
- upsample_rates, upsample_initial_channel)
97
- self.flow = ResidualCouplingBlock(
98
- inter_channels, flow_hidden_channels, flow_kernel_size, flow_n_layers)
99
- self.flow_post = Flip()
100
- self.dp = DurationPredictor(
101
- inter_channels, filter_channels, kernel_size, p_dropout)
102
- self.sdp = StochasticDurationPredictor(
103
- inter_channels, filter_channels, kernel_size, p_dropout)
104
-
105
- def forward(self, *args, **kwargs):
106
- raise NotImplementedError("Training用 forwardは未実装です")
107
-
108
- def infer(self, *args, **kwargs):
109
- raise NotImplementedError("推論用 inferは未実装です")
 
1
+ import torch.nn as nn
2
+ from src.sbv2.generator import Generator
3
+ from src.sbv2.posterior_encoder import PosteriorEncoder
4
+
5
+ class SynthesizerTrn(nn.Module):
6
+ def __init__(
7
+ self,
8
+ n_vocab,
9
+ p_dropout,
10
+ segment_size,
11
+ inter_channels,
12
+ out_channels,
13
+ hidden_channels,
14
+ filter_channels,
15
+ dec_kernel_size,
16
+ enc_channels,
17
+ enc_out_channels,
18
+ enc_kernel_size,
19
+ enc_dilation_rate,
20
+ enc_n_layers,
21
+ flow_hidden_channels,
22
+ flow_kernel_size,
23
+ flow_n_layers,
24
+ flow_n_flows,
25
+ sdp_hidden_channels,
26
+ sdp_kernel_size,
27
+ sdp_n_layers,
28
+ sdp_dropout,
29
+ sampling_rate,
30
+ filter_length,
31
+ hop_length,
32
+ win_length,
33
+ resblock,
34
+ resblock_kernel_sizes,
35
+ resblock_dilation_sizes,
36
+ upsample_rates,
37
+ upsample_initial_channel,
38
+ upsample_kernel_sizes,
39
+ gin_channels
40
+ ):
41
+ super().__init__()
42
+
43
+ # PosteriorEncoder
44
+ self.enc_p = PosteriorEncoder(
45
+ in_channels=enc_channels,
46
+ out_channels=enc_out_channels,
47
+ hidden_channels=hidden_channels,
48
+ kernel_size=enc_kernel_size,
49
+ dilation_rate=enc_dilation_rate,
50
+ n_layers=enc_n_layers
51
+ )
52
+
53
+ # Generator (Decoder)
54
+ self.decoder = Generator(
55
+ upsample_rates=upsample_rates,
56
+ upsample_initial_channel=upsample_initial_channel,
57
+ resblock_kernel_sizes=resblock_kernel_sizes,
58
+ resblock_dilation_sizes=resblock_dilation_sizes,
59
+ resblock=resblock,
60
+ upsample_kernel_sizes=upsample_kernel_sizes,
61
+ inter_channels=inter_channels,
62
+ out_channels=out_channels,
63
+ sampling_rate=sampling_rate
64
+ )
65
+
66
+ # その他層(ここでは省略)
67
+
68
+ def infer(self, x, noise_scale=0.667, noise_scale_w=0.8, length_scale=1.0):
69
+ # 仮の推論実装(必要に応じて調整)
70
+ return self.decoder(x), None