Spaces:

kaupane
/

diffusion-wikiart

Sleeping

App Files Files Community

kaupane commited on Apr 7

Commit

9bb4b8c

verified ·

1 Parent(s): d7b421a

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -29

app.py CHANGED Viewed

@@ -18,6 +18,9 @@ latent_scale_factor = 0.18215  # Same as in DiTTrainer
 # For tracking progress in UI
 global_progress = 0
 def load_dit_model(dit_size):
     """Load DiT model of specified size"""
     #ckpt_path = f"./ckpts/DiT_{dit_size}_final.pth"
@@ -41,15 +44,13 @@ def load_dit_model(dit_size):
     # Load checkpoint
     checkpoint = torch.load(ckpt_path, map_location="cpu")
     model.load_state_dict(checkpoint["model_state_dict"])
-    # Use half precision to speed up sampling
-    model = model.half()
     return model
 class DiffusionSampler:
-    def __init__(self, device="cuda" if torch.cuda.is_available() else "cpu"):
         self.device = device
         self.vae = None
         # Pre-compute diffusion parameters
@@ -68,11 +69,20 @@ class DiffusionSampler:
         self.sqrt_recip_alphas = self.sqrt_recip_alphas.to(self.device)
         self.betas = self.betas.to(self.device)
         self.posterior_variance = self.posterior_variance.to(self.device)
     def load_vae(self):
         """Load VAE model (done lazily to save memory until needed)"""
         if self.vae is None:
             self.vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-ema").to(self.device)
             self.vae.eval()
     @spaces.GPU(duration=120)
@@ -91,7 +101,10 @@ class DiffusionSampler:
                 torch.cuda.manual_seed(seed)
                 torch.cuda.manual_seed_all(seed)
         model.to(self.device)
         model.eval()
         # Convert genre and style to tensors
@@ -100,15 +113,10 @@ class DiffusionSampler:
         g_null = torch.tensor([model.num_genres] * num_samples, device=self.device, dtype=torch.long)
         s_null = torch.tensor([model.num_styles] * num_samples, device=self.device, dtype=torch.long)
-        # Start with random latents
-        latents = torch.randn((num_samples, 4, 32, 32), device=self.device, dtype=torch.float16)
-        # Convert diffusion parameters to half precision for compatibility
-        sqrt_alphas_cumprod_half = self.sqrt_alphas_cumprod.half()
-        sqrt_one_minus_alpha_cumprod_half = self.sqrt_one_minus_alpha_cumprod.half()
-        sqrt_recip_alphas_half = self.sqrt_recip_alphas.half()
-        betas_half = self.betas.half()
-        posterior_variance_half = self.posterior_variance.half()
         # Use classifier-free guidance for better quality
         cfg_scale = 2.5
@@ -125,10 +133,11 @@ class DiffusionSampler:
                 t = torch.full((num_samples,), t_val, device=self.device, dtype=torch.long)
-                sqrt_recip_alphas_t = sqrt_recip_alphas_half[t].view(-1, 1, 1, 1)
-                sqrt_one_minus_alphas_cumprod_t = sqrt_one_minus_alpha_cumprod_half[t].view(-1, 1, 1, 1)
-                beta_t = betas_half[t].view(-1, 1, 1, 1)
-                posterior_variance_t = posterior_variance_half[t].view(-1, 1, 1, 1)
                 # Get noise prediction with classifier-free guidance
                 eps_theta_cond = model(latents, t, g_cond, s_cond)
@@ -137,18 +146,23 @@ class DiffusionSampler:
                 # Update latents
                 mean = sqrt_recip_alphas_t * (latents - (beta_t / sqrt_one_minus_alphas_cumprod_t) * eps_theta)
-                noise = torch.randn_like(latents,dtype=torch.float16)
                 if t_val == 0:
                     latents = mean
                 else:
                     latents = mean + torch.sqrt(posterior_variance_t) * noise
-        # Decode latents to images
         self.load_vae()
-        # Convert latents back to float32 for vae decoding
-        latents = latents.to(dtype=torch.float16) / self.vae.config.scaling_factor
-        latents = latents.to(self.device)
         progress(0.95, desc="Decoding images...")
         with torch.no_grad():
@@ -172,16 +186,16 @@ class DiffusionSampler:
         return gallery_images
 # Initialize sampler globally
-sampler = DiffusionSampler()
 def generate_random_seed():
     """Generate a random seed between 0 and 2^32 - 1"""
     return random.randint(0, 2**32 - 1)
 MODEL_SAMPLE_LIMITS = {
-    "S": {"min":1, "max": 16, "default": 4},
-    "B": {"min":1, "max": 12, "default": 4},
-    "L": {"min":1, "max": 4, "default": 1}
 }
 def update_sample_slider(dit_size):
@@ -264,6 +278,10 @@ with gr.Blocks(title="DiT Diffusion Model Generator", theme=gr.themes.Soft()) as
                 seed = gr.Number(label="Seed", value=generate_random_seed(), precision=0, info="Set for reproducible results")
                 reset_seed_btn = gr.Button("🎲 New Seed")
             with gr.Row():
                 generate_btn = gr.Button("Generate Images", variant="primary")
                 clear_btn = gr.Button("🗑️ Clear Gallery")
@@ -282,6 +300,17 @@ with gr.Blocks(title="DiT Diffusion Model Generator", theme=gr.themes.Soft()) as
     # Clear gallery button functionality
     clear_btn.click(clear_gallery, inputs=[], outputs=[output_gallery, error_message])
     # Connect components
     generate_btn.click(
         fn=generate_samples,
@@ -290,6 +319,5 @@ with gr.Blocks(title="DiT Diffusion Model Generator", theme=gr.themes.Soft()) as
     )
 if __name__ == "__main__":
     app.launch()

 # For tracking progress in UI
 global_progress = 0
+# Set to True to enable half-precision inference
+USE_HALF_PRECISION = True
 def load_dit_model(dit_size):
     """Load DiT model of specified size"""
     #ckpt_path = f"./ckpts/DiT_{dit_size}_final.pth"
     # Load checkpoint
     checkpoint = torch.load(ckpt_path, map_location="cpu")
     model.load_state_dict(checkpoint["model_state_dict"])
     return model
 class DiffusionSampler:
+    def __init__(self, device="cuda" if torch.cuda.is_available() else "cpu", use_half=USE_HALF_PRECISION):
         self.device = device
+        self.use_half = use_half
         self.vae = None
         # Pre-compute diffusion parameters
         self.sqrt_recip_alphas = self.sqrt_recip_alphas.to(self.device)
         self.betas = self.betas.to(self.device)
         self.posterior_variance = self.posterior_variance.to(self.device)
+        # Convert diffusion parameters to half precision if needed
+        if self.use_half:
+            self.sqrt_alphas_cumprod = self.sqrt_alphas_cumprod.half()
+            self.sqrt_one_minus_alpha_cumprod = self.sqrt_one_minus_alpha_cumprod.half()
+            self.sqrt_recip_alphas = self.sqrt_recip_alphas.half()
+            self.betas = self.betas.half()
+            self.posterior_variance = self.posterior_variance.half()
     def load_vae(self):
         """Load VAE model (done lazily to save memory until needed)"""
         if self.vae is None:
             self.vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-ema").to(self.device)
+            # VAE should always remain in full precision
             self.vae.eval()
     @spaces.GPU(duration=120)
                 torch.cuda.manual_seed(seed)
                 torch.cuda.manual_seed_all(seed)
+        # Move model to device and convert to half precision if enabled
         model.to(self.device)
+        if self.use_half:
+            model.half()
         model.eval()
         # Convert genre and style to tensors
         g_null = torch.tensor([model.num_genres] * num_samples, device=self.device, dtype=torch.long)
         s_null = torch.tensor([model.num_styles] * num_samples, device=self.device, dtype=torch.long)
+        # Start with random latents (in appropriate precision)
+        latents = torch.randn((num_samples, 4, 32, 32), device=self.device)
+        if self.use_half:
+            latents = latents.half()
         # Use classifier-free guidance for better quality
         cfg_scale = 2.5
                 t = torch.full((num_samples,), t_val, device=self.device, dtype=torch.long)
+                # Get diffusion parameters for current timestep in proper precision
+                sqrt_recip_alphas_t = self.sqrt_recip_alphas[t].view(-1, 1, 1, 1)
+                sqrt_one_minus_alphas_cumprod_t = self.sqrt_one_minus_alpha_cumprod[t].view(-1, 1, 1, 1)
+                beta_t = self.betas[t].view(-1, 1, 1, 1)
+                posterior_variance_t = self.posterior_variance[t].view(-1, 1, 1, 1)
                 # Get noise prediction with classifier-free guidance
                 eps_theta_cond = model(latents, t, g_cond, s_cond)
                 # Update latents
                 mean = sqrt_recip_alphas_t * (latents - (beta_t / sqrt_one_minus_alphas_cumprod_t) * eps_theta)
+                # Generate noise with same precision as latents
+                noise = torch.randn_like(latents)
                 if t_val == 0:
                     latents = mean
                 else:
                     latents = mean + torch.sqrt(posterior_variance_t) * noise
+        # Decode latents to images - VAE needs full precision
         self.load_vae()
+        # Convert latents to full precision for VAE if needed
+        if self.use_half:
+            latents = latents.float()
+        latents = latents / self.vae.config.scaling_factor
         progress(0.95, desc="Decoding images...")
         with torch.no_grad():
         return gallery_images
 # Initialize sampler globally
+sampler = DiffusionSampler(use_half=USE_HALF_PRECISION)
 def generate_random_seed():
     """Generate a random seed between 0 and 2^32 - 1"""
     return random.randint(0, 2**32 - 1)
 MODEL_SAMPLE_LIMITS = {
+    "S": {"min":1, "max": 18, "default": 4},
+    "B": {"min":1, "max": 9, "default": 4},
+    "L": {"min":1, "max": 3, "default": 1}
 }
 def update_sample_slider(dit_size):
                 seed = gr.Number(label="Seed", value=generate_random_seed(), precision=0, info="Set for reproducible results")
                 reset_seed_btn = gr.Button("🎲 New Seed")
+            # Add option to toggle half-precision
+            use_half_precision = gr.Checkbox(label="Use half-precision (faster)", value=USE_HALF_PRECISION,
+                                            info="Use FP16 for model (faster, less memory, slightly lower quality)")
             with gr.Row():
                 generate_btn = gr.Button("Generate Images", variant="primary")
                 clear_btn = gr.Button("🗑️ Clear Gallery")
     # Clear gallery button functionality
     clear_btn.click(clear_gallery, inputs=[], outputs=[output_gallery, error_message])
+    # Update half-precision setting when checkbox is changed
+    def update_half_precision(value):
+        global USE_HALF_PRECISION
+        USE_HALF_PRECISION = value
+        # Recreate sampler with new setting
+        global sampler
+        sampler = DiffusionSampler(use_half=value)
+        return None
+    use_half_precision.change(update_half_precision, inputs=[use_half_precision], outputs=[None])
     # Connect components
     generate_btn.click(
         fn=generate_samples,
     )
 if __name__ == "__main__":
     app.launch()