Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -17,55 +17,45 @@ class SpriteGenerator(nn.Module):
|
|
17 |
|
18 |
# Proiezione dal testo al latent space
|
19 |
self.text_projection = nn.Sequential(
|
20 |
-
nn.Linear(
|
21 |
nn.LeakyReLU(0.2),
|
22 |
-
nn.Linear(latent_dim, latent_dim)
|
23 |
)
|
24 |
|
25 |
-
# Generator
|
26 |
self.generator = nn.Sequential(
|
27 |
-
#
|
28 |
-
nn.ConvTranspose2d(latent_dim, 512, 4, 1, 0, bias=False),
|
29 |
nn.BatchNorm2d(512),
|
30 |
nn.ReLU(True),
|
31 |
|
32 |
-
|
|
|
33 |
nn.BatchNorm2d(256),
|
34 |
nn.ReLU(True),
|
35 |
|
36 |
-
|
|
|
37 |
nn.BatchNorm2d(128),
|
38 |
nn.ReLU(True),
|
39 |
|
40 |
-
|
|
|
41 |
nn.BatchNorm2d(64),
|
42 |
nn.ReLU(True),
|
43 |
|
44 |
-
|
|
|
45 |
nn.BatchNorm2d(32),
|
46 |
nn.ReLU(True),
|
47 |
|
48 |
-
|
|
|
49 |
nn.BatchNorm2d(16),
|
50 |
nn.ReLU(True),
|
51 |
|
52 |
-
|
53 |
-
nn.
|
54 |
-
nn.ReLU(True),
|
55 |
-
|
56 |
-
nn.ConvTranspose2d(16, 3, 4, 2, 1, bias=False), # -> 3 x 512 x 512
|
57 |
-
nn.BatchNorm2d(3),
|
58 |
-
nn.ReLU(True),
|
59 |
-
|
60 |
-
nn.Conv2d(3, 3, 3, 1, 1, bias=True), # Prima convoluzione di rifinitura
|
61 |
-
nn.BatchNorm2d(3),
|
62 |
-
nn.ReLU(True),
|
63 |
-
|
64 |
-
nn.Conv2d(3, 3, 3, 1, 1, bias=True), # Seconda convoluzione di rifinitura
|
65 |
-
nn.BatchNorm2d(3),
|
66 |
-
nn.ReLU(True),
|
67 |
-
|
68 |
-
nn.Conv2d(3, 3, 3, 1, 1, bias=True) # Convoluzione finale
|
69 |
)
|
70 |
|
71 |
# Frame interpolator
|
@@ -105,7 +95,7 @@ class SpriteGenerator(nn.Module):
|
|
105 |
# Generate frame
|
106 |
frame_latent_reshaped = frame_latent.unsqueeze(2).unsqueeze(3)
|
107 |
frame = self.generator(frame_latent_reshaped)
|
108 |
-
#
|
109 |
frame = torch.tanh(frame)
|
110 |
all_frames.append(frame)
|
111 |
|
|
|
17 |
|
18 |
# Proiezione dal testo al latent space
|
19 |
self.text_projection = nn.Sequential(
|
20 |
+
nn.Linear(768, latent_dim), # 768 -> 512
|
21 |
nn.LeakyReLU(0.2),
|
22 |
+
nn.Linear(latent_dim, latent_dim) # 512 -> 512
|
23 |
)
|
24 |
|
25 |
+
# Generator
|
26 |
self.generator = nn.Sequential(
|
27 |
+
# Blocco iniziale: latent_dim x 1 x 1 -> 512 x 4 x 4
|
28 |
+
nn.ConvTranspose2d(latent_dim, 512, 4, 1, 0, bias=False),
|
29 |
nn.BatchNorm2d(512),
|
30 |
nn.ReLU(True),
|
31 |
|
32 |
+
# 512 x 4 x 4 -> 256 x 8 x 8
|
33 |
+
nn.ConvTranspose2d(512, 256, 4, 2, 1, bias=False),
|
34 |
nn.BatchNorm2d(256),
|
35 |
nn.ReLU(True),
|
36 |
|
37 |
+
# 256 x 8 x 8 -> 128 x 16 x 16
|
38 |
+
nn.ConvTranspose2d(256, 128, 4, 2, 1, bias=False),
|
39 |
nn.BatchNorm2d(128),
|
40 |
nn.ReLU(True),
|
41 |
|
42 |
+
# 128 x 16 x 16 -> 64 x 32 x 32
|
43 |
+
nn.ConvTranspose2d(128, 64, 4, 2, 1, bias=False),
|
44 |
nn.BatchNorm2d(64),
|
45 |
nn.ReLU(True),
|
46 |
|
47 |
+
# 64 x 32 x 32 -> 32 x 64 x 64
|
48 |
+
nn.ConvTranspose2d(64, 32, 4, 2, 1, bias=False),
|
49 |
nn.BatchNorm2d(32),
|
50 |
nn.ReLU(True),
|
51 |
|
52 |
+
# 32 x 64 x 64 -> 16 x 128 x 128
|
53 |
+
nn.ConvTranspose2d(32, 16, 4, 2, 1, bias=False),
|
54 |
nn.BatchNorm2d(16),
|
55 |
nn.ReLU(True),
|
56 |
|
57 |
+
# 16 x 128 x 128 -> 3 x 256 x 256
|
58 |
+
nn.ConvTranspose2d(16, 3, 4, 2, 1, bias=False),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
)
|
60 |
|
61 |
# Frame interpolator
|
|
|
95 |
# Generate frame
|
96 |
frame_latent_reshaped = frame_latent.unsqueeze(2).unsqueeze(3)
|
97 |
frame = self.generator(frame_latent_reshaped)
|
98 |
+
# Normalizzazione dell'output
|
99 |
frame = torch.tanh(frame)
|
100 |
all_frames.append(frame)
|
101 |
|