Spaces:

Lod34
/

Animator2D-v2

Running

App Files Files Community

Lod34 commited on Mar 5

Commit

95b77dc

verified ·

1 Parent(s): 91263a9

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -87

app.py CHANGED Viewed

@@ -17,14 +17,14 @@ class SpriteGenerator(nn.Module):
         # Proiezione dal testo al latent space
         self.text_projection = nn.Sequential(
-            nn.Linear(768, latent_dim),  # 768 -> 512
             nn.LeakyReLU(0.2),
-            nn.Linear(latent_dim, latent_dim)  # 512 -> 512
         )
         # Generator
         self.generator = nn.Sequential(
-            # Blocco iniziale: latent_dim x 1 x 1 -> 512 x 4 x 4
             nn.ConvTranspose2d(latent_dim, 512, 4, 1, 0, bias=False),
             nn.BatchNorm2d(512),
             nn.ReLU(True),
@@ -95,7 +95,6 @@ class SpriteGenerator(nn.Module):
             # Generate frame
             frame_latent_reshaped = frame_latent.unsqueeze(2).unsqueeze(3)
             frame = self.generator(frame_latent_reshaped)
-            # Normalizzazione dell'output
             frame = torch.tanh(frame)
             all_frames.append(frame)
@@ -104,100 +103,77 @@ class SpriteGenerator(nn.Module):
         return sprites
-# Costanti
-MODEL_ID = "Lod34/Animator2D-v2"
-CACHE_DIR = "model_cache"
-def load_model():
-    """
-    Carica il modello
-    """
     try:
-        model = SpriteGenerator()
-        # Carica i pesi del modello
-        model_path = hf_hub_download(
-            repo_id=MODEL_ID,
-            filename="pytorch_model.bin",
-            cache_dir=CACHE_DIR
-        )
-        model.load_state_dict(torch.load(model_path, map_location='cpu'))
         model.eval()
-        return model
     except Exception as e:
         print(f"Errore nel caricamento del modello: {str(e)}")
-        return None
-# Inizializzazione globale
-print("Caricamento del modello...")
-model = load_model()
-tokenizer = AutoTokenizer.from_pretrained("t5-base")
-def generate_animated_sprite(character_description, num_frames, character_action, viewing_direction):
-    """
-    Genera un'animazione sprite utilizzando il modello
-    """
-    if model is None:
-        raise Exception("Il modello non è stato caricato correttamente")
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    model.to(device)
-    # Prepara l'input
-    text_input = f"""
-    Description: {character_description}
-    Action: {character_action}
-    Direction: {viewing_direction}
-    Number of frames: {num_frames}
-    """
-    # Tokenizzazione
-    encoded_text = tokenizer(
-        text_input,
-        padding="max_length",
-        max_length=128,
-        truncation=True,
-        return_tensors="pt"
-    )
-    input_ids = encoded_text['input_ids'].to(device)
-    attention_mask = encoded_text['attention_mask'].to(device)
-    num_frames_tensor = torch.tensor([int(num_frames)], device=device)
     try:
-        # Generazione frames
         with torch.no_grad():
-            output_sprites = model(
-                input_ids=input_ids,
-                attention_mask=attention_mask,
-                num_frames=num_frames_tensor
             )
-        # Conversione in immagini
-        frames = []
-        for i in range(int(num_frames)):
-            frame = output_sprites[0, i].cpu()
-            frame = ((frame + 1) * 127.5).clamp(0, 255).to(torch.uint8)
-            frame = frame.permute(1, 2, 0).numpy()
-            frame_img = Image.fromarray(frame)
-            frames.append(frame_img)
-        # Salvataggio GIF
-        os.makedirs("tmp", exist_ok=True)
-        output_path = os.path.join("tmp", f"sprite_{hash(character_description)}.gif")
-        frames[0].save(
-            output_path,
-            format='GIF',
-            append_images=frames[1:],
-            save_all=True,
-            duration=200,
-            loop=0
-        )
-        return output_path
     except Exception as e:
         print(f"Errore nella generazione: {str(e)}")
-        raise e
 # Interfaccia Gradio
 def create_interface():

         # Proiezione dal testo al latent space
         self.text_projection = nn.Sequential(
+            nn.Linear(768, latent_dim),
             nn.LeakyReLU(0.2),
+            nn.Linear(latent_dim, latent_dim)
         )
         # Generator
         self.generator = nn.Sequential(
+            # Input: latent_dim x 1 x 1 -> 512 x 4 x 4
             nn.ConvTranspose2d(latent_dim, 512, 4, 1, 0, bias=False),
             nn.BatchNorm2d(512),
             nn.ReLU(True),
             # Generate frame
             frame_latent_reshaped = frame_latent.unsqueeze(2).unsqueeze(3)
             frame = self.generator(frame_latent_reshaped)
             frame = torch.tanh(frame)
             all_frames.append(frame)
         return sprites
+def initialize_model():
+    print("Inizializzazione del modello...")
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    model = SpriteGenerator()
     try:
+        # Carica il modello
+        state_dict = torch.load("Animator2D-v2.pth", map_location=device)
+        model.load_state_dict(state_dict)
+        model = model.to(device)
         model.eval()
+        print("Modello caricato con successo!")
+        return model, device
     except Exception as e:
         print(f"Errore nel caricamento del modello: {str(e)}")
+        raise
+def generate_sprite(prompt, num_frames=8):
     try:
+        # Usa il modello e il device globali
+        global model, device, tokenizer
+        # Tokenizza il testo
+        tokens = tokenizer(prompt, return_tensors="pt", padding=True)
+        tokens = {k: v.to(device) for k, v in tokens.items()}
+        # Genera l'immagine
         with torch.no_grad():
+            frames = model(
+                input_ids=tokens["input_ids"],
+                attention_mask=tokens["attention_mask"],
+                num_frames=torch.tensor([num_frames], device=device)
             )
+        # Converte il tensore in immagine
+        frames = (frames * 0.5 + 0.5).clamp(0, 1)
+        frames = frames.cpu().numpy()
+        # Ritorna il primo frame come esempio
+        frame = frames[0, 0]  # Prende il primo frame del batch
+        frame = (frame * 255).astype('uint8').transpose(1, 2, 0)
+        return Image.fromarray(frame)
     except Exception as e:
         print(f"Errore nella generazione: {str(e)}")
+        raise
+# Inizializzazione globale
+print("Caricamento del modello...")
+try:
+    model, device = initialize_model()
+    tokenizer = AutoTokenizer.from_pretrained("t5-base")
+    # Creazione dell'interfaccia Gradio
+    interface = gr.Interface(
+        fn=generate_sprite,
+        inputs=[
+            gr.Textbox(label="Descrivi lo sprite che vuoi generare"),
+            gr.Slider(minimum=1, maximum=16, value=8, step=1, label="Numero di frame")
+        ],
+        outputs=gr.Image(label="Sprite generato"),
+        title="Animator2D-v2 Sprite Generator",
+        description="Genera sprite animati da descrizioni testuali"
+    )
+    # Avvio dell'interfaccia
+    interface.launch()
+except Exception as e:
+    print(f"Errore nell'inizializzazione dell'applicazione: {str(e)}")
+    raise e
 # Interfaccia Gradio
 def create_interface():