Spaces:

Lod34
/

Animator2D-v2

Running

App Files Files Community

Lod34 commited on Mar 5

Commit

91263a9

verified ·

1 Parent(s): dc498fc

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -28

app.py CHANGED Viewed

@@ -17,55 +17,45 @@ class SpriteGenerator(nn.Module):
         # Proiezione dal testo al latent space
         self.text_projection = nn.Sequential(
-            nn.Linear(self.text_encoder.config.d_model, latent_dim),
             nn.LeakyReLU(0.2),
-            nn.Linear(latent_dim, latent_dim)
         )
-        # Generator modificato per corrispondere esattamente ai pesi salvati
         self.generator = nn.Sequential(
-            # Input: latent_dim x 1 x 1
-            nn.ConvTranspose2d(latent_dim, 512, 4, 1, 0, bias=False),  # -> 512 x 4 x 4
             nn.BatchNorm2d(512),
             nn.ReLU(True),
-            nn.ConvTranspose2d(512, 256, 4, 2, 1, bias=False),  # -> 256 x 8 x 8
             nn.BatchNorm2d(256),
             nn.ReLU(True),
-            nn.ConvTranspose2d(256, 128, 4, 2, 1, bias=False),  # -> 128 x 16 x 16
             nn.BatchNorm2d(128),
             nn.ReLU(True),
-            nn.ConvTranspose2d(128, 64, 4, 2, 1, bias=False),  # -> 64 x 32 x 32
             nn.BatchNorm2d(64),
             nn.ReLU(True),
-            nn.ConvTranspose2d(64, 32, 4, 2, 1, bias=False),  # -> 32 x 64 x 64
             nn.BatchNorm2d(32),
             nn.ReLU(True),
-            nn.ConvTranspose2d(32, 16, 4, 2, 1, bias=False),  # -> 16 x 128 x 128
             nn.BatchNorm2d(16),
             nn.ReLU(True),
-            nn.ConvTranspose2d(16, 16, 4, 2, 1, bias=False),  # -> 16 x 256 x 256
-            nn.BatchNorm2d(16),
-            nn.ReLU(True),
-            nn.ConvTranspose2d(16, 3, 4, 2, 1, bias=False),  # -> 3 x 512 x 512
-            nn.BatchNorm2d(3),
-            nn.ReLU(True),
-            nn.Conv2d(3, 3, 3, 1, 1, bias=True),  # Prima convoluzione di rifinitura
-            nn.BatchNorm2d(3),
-            nn.ReLU(True),
-            nn.Conv2d(3, 3, 3, 1, 1, bias=True),  # Seconda convoluzione di rifinitura
-            nn.BatchNorm2d(3),
-            nn.ReLU(True),
-            nn.Conv2d(3, 3, 3, 1, 1, bias=True)  # Convoluzione finale
         )
         # Frame interpolator
@@ -105,7 +95,7 @@ class SpriteGenerator(nn.Module):
             # Generate frame
             frame_latent_reshaped = frame_latent.unsqueeze(2).unsqueeze(3)
             frame = self.generator(frame_latent_reshaped)
-            # Aggiungiamo Tanh finale per normalizzare l'output
             frame = torch.tanh(frame)
             all_frames.append(frame)

         # Proiezione dal testo al latent space
         self.text_projection = nn.Sequential(
+            nn.Linear(768, latent_dim),  # 768 -> 512
             nn.LeakyReLU(0.2),
+            nn.Linear(latent_dim, latent_dim)  # 512 -> 512
         )
+        # Generator
         self.generator = nn.Sequential(
+            # Blocco iniziale: latent_dim x 1 x 1 -> 512 x 4 x 4
+            nn.ConvTranspose2d(latent_dim, 512, 4, 1, 0, bias=False),
             nn.BatchNorm2d(512),
             nn.ReLU(True),
+            # 512 x 4 x 4 -> 256 x 8 x 8
+            nn.ConvTranspose2d(512, 256, 4, 2, 1, bias=False),
             nn.BatchNorm2d(256),
             nn.ReLU(True),
+            # 256 x 8 x 8 -> 128 x 16 x 16
+            nn.ConvTranspose2d(256, 128, 4, 2, 1, bias=False),
             nn.BatchNorm2d(128),
             nn.ReLU(True),
+            # 128 x 16 x 16 -> 64 x 32 x 32
+            nn.ConvTranspose2d(128, 64, 4, 2, 1, bias=False),
             nn.BatchNorm2d(64),
             nn.ReLU(True),
+            # 64 x 32 x 32 -> 32 x 64 x 64
+            nn.ConvTranspose2d(64, 32, 4, 2, 1, bias=False),
             nn.BatchNorm2d(32),
             nn.ReLU(True),
+            # 32 x 64 x 64 -> 16 x 128 x 128
+            nn.ConvTranspose2d(32, 16, 4, 2, 1, bias=False),
             nn.BatchNorm2d(16),
             nn.ReLU(True),
+            # 16 x 128 x 128 -> 3 x 256 x 256
+            nn.ConvTranspose2d(16, 3, 4, 2, 1, bias=False),
         )
         # Frame interpolator
             # Generate frame
             frame_latent_reshaped = frame_latent.unsqueeze(2).unsqueeze(3)
             frame = self.generator(frame_latent_reshaped)
+            # Normalizzazione dell'output
             frame = torch.tanh(frame)
             all_frames.append(frame)