Spaces:

RishabA
/

celeba-latent-diffusion

Running on Zero

App Files Files Community

RishabA commited on 25 days ago

Commit

c74d396

verified ·

1 Parent(s): ddc10aa

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -12

app.py CHANGED Viewed

@@ -1,18 +1,22 @@
 import torch
 import gradio as gr
-from model import UNet, VQVAE, sample_ddpm_inference
 from huggingface_hub import hf_hub_download
 import json
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 config_path = hf_hub_download(
     repo_id="RishabA/celeba-cond-ddpm", filename="config.json"
 )
 with open(config_path, "r") as f:
     config = json.load(f)
-# Download checkpoint files. Adjust file paths if needed.
 ldm_ckpt_path = hf_hub_download(
     repo_id="RishabA/celeba-cond-ddpm", filename="celebhq/ddpm_ckpt_class_cond.pth"
 )
@@ -20,13 +24,12 @@ vae_ckpt_path = hf_hub_download(
     repo_id="RishabA/celeba-cond-ddpm", filename="celebhq/vqvae_autoencoder_ckpt.pth"
 )
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 unet = UNet(config["autoencoder_params"]["z_channels"], config["ldm_params"]).to(device)
 vae = VQVAE(
     config["dataset_params"]["image_channels"], config["autoencoder_params"]
 ).to(device)
-# Load the pretrained weights
 unet_state = torch.load(ldm_ckpt_path, map_location=device)
 unet.load_state_dict(unet_state["model_state_dict"])
@@ -37,23 +40,21 @@ unet.eval()
 vae.eval()
 print("Model and checkpoints loaded successfully!")
-print(unet)
-print(vae)
 def generate_image(text_prompt, mask_upload):
     """
-    text_prompt: A text prompt provided by the user.
     mask_upload: Either a PIL image (uploaded) or None.
-    guidance_scale: Float slider setting for classifier-free guidance.
     """
     return sample_ddpm_inference(unet, vae, text_prompt, mask_upload, device)
 css_str = """
-body {
-    background-color: #f7f7f7;
-}
 .title {
     font-size: 48px;
     text-align: center;
@@ -69,7 +70,7 @@ body {
 with gr.Blocks(css=css_str) as demo:
     gr.Markdown("<div class='title'>Conditioned Latent Diffusion with CelebA</div>")
     gr.Markdown(
-        "<div class='description'>Enter a text prompt and (optionally) upload a mask image for conditioning; the model will generate an image accordingly.</div>"
     )
     with gr.Row():
         text_input = gr.Textbox(
@@ -81,10 +82,12 @@ with gr.Blocks(css=css_str) as demo:
     generate_button = gr.Button("Generate Image")
     output_image = gr.Image(label="Generated Image", type="pil")
     generate_button.click(
         fn=generate_image,
         inputs=[text_input, mask_input],
         outputs=output_image,
     )
 if __name__ == "__main__":

 import torch
 import gradio as gr
+from model import (
+    UNet,
+    VQVAE,
+    sample_ddpm_inference,
+)
 from huggingface_hub import hf_hub_download
 import json
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Download config and checkpoint files from HF Hub
 config_path = hf_hub_download(
     repo_id="RishabA/celeba-cond-ddpm", filename="config.json"
 )
 with open(config_path, "r") as f:
     config = json.load(f)
 ldm_ckpt_path = hf_hub_download(
     repo_id="RishabA/celeba-cond-ddpm", filename="celebhq/ddpm_ckpt_class_cond.pth"
 )
     repo_id="RishabA/celeba-cond-ddpm", filename="celebhq/vqvae_autoencoder_ckpt.pth"
 )
+# Instantiate and load the models
 unet = UNet(config["autoencoder_params"]["z_channels"], config["ldm_params"]).to(device)
 vae = VQVAE(
     config["dataset_params"]["image_channels"], config["autoencoder_params"]
 ).to(device)
 unet_state = torch.load(ldm_ckpt_path, map_location=device)
 unet.load_state_dict(unet_state["model_state_dict"])
 vae.eval()
 print("Model and checkpoints loaded successfully!")
 def generate_image(text_prompt, mask_upload):
     """
+    text_prompt: Text prompt provided by the user.
     mask_upload: Either a PIL image (uploaded) or None.
+    This function returns a generator that yields an intermediate
+    decoded image at every timestep from the diffusion process.
     """
+    # sample_ddpm_inference is assumed to be a generator function (using yield)
     return sample_ddpm_inference(unet, vae, text_prompt, mask_upload, device)
 css_str = """
 .title {
     font-size: 48px;
     text-align: center;
 with gr.Blocks(css=css_str) as demo:
     gr.Markdown("<div class='title'>Conditioned Latent Diffusion with CelebA</div>")
     gr.Markdown(
+        "<div class='description'>Enter a text prompt and (optionally) upload a mask image for conditioning; the generated image will update as the reverse diffusion progresses.</div>"
     )
     with gr.Row():
         text_input = gr.Textbox(
     generate_button = gr.Button("Generate Image")
     output_image = gr.Image(label="Generated Image", type="pil")
+    # Adding stream=True allows Gradio to process the generator output
     generate_button.click(
         fn=generate_image,
         inputs=[text_input, mask_input],
         outputs=output_image,
+        stream=True,
     )
 if __name__ == "__main__":