Spaces:

RishabA
/

celeba-latent-diffusion

Running on Zero

App Files Files Community

RishabA commited on 25 days ago

Commit

c39621e

verified ·

1 Parent(s): 036212e

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -58

app.py CHANGED Viewed

@@ -55,12 +55,11 @@ vae.eval()
 print("Model and checkpoints loaded successfully!")
-def sample_ddpm_inference(text_prompt):
     """
     Given a text prompt and (optionally) an image condition (as a PIL image),
     sample from the diffusion model and return a generated image (PIL image).
     """
-    mask_image_pil = None
     guidance_scale = 1.0
     # Create noise scheduler
@@ -138,30 +137,6 @@ def sample_ddpm_inference(text_prompt):
         uncond_input["image"] = torch.zeros_like(mask_tensor)
         cond_input["image"] = mask_tensor
-    # Load the diffusion UNet (and assume it has been pretrained and saved)
-    # unet = UNet(
-    #     image_channels=autoencoder_params["z_channels"], model_config=ldm_params
-    # ).to(device)
-    # ldm_checkpoint_path = os.path.join(
-    #     train_params["task_name"], train_params["ldm_ckpt_name"]
-    # )
-    # if os.path.exists(ldm_checkpoint_path):
-    #     checkpoint = torch.load(ldm_checkpoint_path, map_location=device)
-    #     unet.load_state_dict(checkpoint["model_state_dict"])
-    # unet.eval()
-    # Load VQVAE (assume pretrained and saved)
-    # vae = VQVAE(
-    #     image_channels=dataset_params["image_channels"], model_config=autoencoder_params
-    # ).to(device)
-    # vae_checkpoint_path = os.path.join(
-    #     train_params["task_name"], train_params["vqvae_autoencoder_ckpt_name"]
-    # )
-    # if os.path.exists(vae_checkpoint_path):
-    #     checkpoint = torch.load(vae_checkpoint_path, map_location=device)
-    #     vae.load_state_dict(checkpoint["model_state_dict"])
-    # vae.eval()
     # Determine latent shape from VQVAE: (batch, z_channels, H_lat, W_lat)
     # For example, if image_size is 256 and there are 3 downsamplings, H_lat = 256 // 8 = 32.
     latent_size = dataset_params["image_size"] // (
@@ -212,37 +187,28 @@ css_str = """
 }
 """
-# with gr.Blocks(css=css_str) as demo:
-#     gr.Markdown("<div class='title'>Conditioned Latent Diffusion with CelebA</div>")
-#     gr.Markdown(
-#         "<div class='description'>Enter a text prompt and (optionally) upload a mask image for conditioning; the generated image will update as the reverse diffusion progresses.</div>"
-#     )
-#     with gr.Row():
-#         text_input = gr.Textbox(
-#             label="Text Prompt",
-#             lines=2,
-#             placeholder="E.g., 'He is a man with brown hair.'",
-#         )
-#         mask_input = gr.Image(type="pil", label="Optional Mask for Conditioning")
-#     generate_button = gr.Button("Generate Image")
-#     output_image = gr.Image(label="Generated Image", type="pil")
-#     # Adding stream=True allows Gradio to process the generator output
-#     generate_button.click(
-#         fn=generate_image,
-#         inputs=[text_input, mask_input],
-#         outputs=[output_image],
-#     )
-demo = gr.Interface(
-    sample_ddpm_inference,
-    inputs=gr.Textbox(
-        label="Text Prompt",
-        lines=2,
-        placeholder="E.g., 'He is a man with brown hair.'",
-    ),
-    outputs="image",
-)
 if __name__ == "__main__":
-    demo.launch(share=True)

 print("Model and checkpoints loaded successfully!")
+def sample_ddpm_inference(text_prompt, mask_image_pil):
     """
     Given a text prompt and (optionally) an image condition (as a PIL image),
     sample from the diffusion model and return a generated image (PIL image).
     """
     guidance_scale = 1.0
     # Create noise scheduler
         uncond_input["image"] = torch.zeros_like(mask_tensor)
         cond_input["image"] = mask_tensor
     # Determine latent shape from VQVAE: (batch, z_channels, H_lat, W_lat)
     # For example, if image_size is 256 and there are 3 downsamplings, H_lat = 256 // 8 = 32.
     latent_size = dataset_params["image_size"] // (
 }
 """
+with gr.Blocks(css=css_str) as demo:
+    gr.Markdown("<div class='title'>Conditioned Latent Diffusion with CelebA</div>")
+    gr.Markdown(
+        "<div class='description'>Enter a text prompt and (optionally) upload a mask image for conditioning; the generated image will update as the reverse diffusion progresses.</div>"
+    )
+    with gr.Row():
+        text_input = gr.Textbox(
+            label="Text Prompt",
+            lines=2,
+            placeholder="E.g., 'He is a man with brown hair.'",
+        )
+        mask_input = gr.Image(type="pil", label="Optional Mask for Conditioning")
+    generate_button = gr.Button("Generate Image")
+    output_image = gr.Image(label="Generated Image", type="pil")
+    generate_button.click(
+        fn=sample_ddpm_inference,
+        inputs=[text_input, mask_input],
+        outputs=[output_image],
+    )
 if __name__ == "__main__":
+    demo.launch(share=True)