Spaces:

AlphaQuark
/

img2img-01

Running on T4

App Files Files Community

AlphaQuark commited on Mar 26, 2024

Commit

bcbcf8a

verified ·

1 Parent(s): e7fcd04

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -45

app.py CHANGED Viewed

@@ -1,52 +1,53 @@
 import gradio as gr
-from diffusers import AutoencoderKL, UNet2DConditionModel, PNDMScheduler, LMSDiscreteScheduler
 from PIL import Image
 import torch
-from torchvision.transforms import functional as TF
-from torch import autocast
-def load_refiner_model():
-    # Load the autoencoder and UNet models for the refiner
-    vae = AutoencoderKL.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
-    unet = UNet2DConditionModel.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0")
-    # Initialize scheduler
-    scheduler = PNDMScheduler.from_config("stabilityai/stable-diffusion-xl-base-1.0")
-    return vae, unet, scheduler
-vae, unet, scheduler = load_refiner_model()
-def refine_image(input_image):
-    # Transform input image for model processing
-    input_image = input_image.resize((512, 512))
-    input_image = TF.to_tensor(input_image).unsqueeze(0) * 2 - 1
-    # Prepare for inference
-    with torch.no_grad(), autocast("cuda"):
-        # Encode image with VAE
-        latents = vae.encode(input_image).latent_dist.sample()
-        # Refine latents with the UNet
-        latents = latents.half()
-        latents = scheduler.add_noise(latents, torch.zeros_like(latents), 0)
-        latents = unet(latents, t=torch.tensor([0]).to(latents.device)).sample
-        # Decode the refined latents
-        output_image = vae.decode(latents).sample
-    # Convert output tensor to PIL Image
-    output_image = TF.to_pil_image((output_image.squeeze(0) + 1) / 2)
-    return output_image
-# Gradio interface
-interface = gr.Interface(
-    fn=refine_image,
-    inputs=gr.inputs.Image(type="pil", label="Upload Generated Image"),
-    outputs=gr.outputs.Image(type="pil", label="Refined Image"),
-    title="Stable Diffusion XL Refiner",
-    description="Refine images generated by Stable Diffusion models for enhanced quality."
-)
-if __name__ == "__main__":
-    interface.launch(share=True)

 import gradio as gr
 from PIL import Image
 import torch
+from diffusers import DiffusionPipeline
+# Device configuration - use GPU if available
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# Initialize the pipeline and move it to the selected device
+pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0", torch_dtype=torch.float16).to(device)
+def resize_image(image_path, target_size):
+    with Image.open(image_path) as img:
+        resized_img = img.resize((target_size, target_size))
+    return resized_img
+def infer(source_img_path, prompt, negative_prompt, guide, steps, seed, strength):
+    # Set the seed for reproducibility
+    torch.manual_seed(seed)
+    # Resize the source image
+    source_image = resize_image(source_img_path, 768)
+    # Convert the image to a tensor and add a batch dimension
+    source_image_tensor = torch.from_numpy(np.array(source_image)).permute(2, 0, 1).unsqueeze(0).to(device)
+    # Normalize the image tensor to [-1, 1]
+    source_image_tensor = source_image_tensor / 255.0 * 2 - 1
+    # Perform inference
+    with torch.no_grad():
+        generated_image = pipe(prompt=prompt, negative_prompt=negative_prompt, init_image=source_image_tensor, strength=strength, guidance_scale=guide, num_inference_steps=steps, generator=torch.Generator(device).manual_seed(seed)).images[0]
+    # Convert the tensor to PIL Image for display
+    generated_image = (generated_image.permute(1, 2, 0).cpu().numpy() + 1) / 2
+    generated_image = np.clip(generated_image, 0, 1)
+    generated_image = Image.fromarray((generated_image * 255).astype(np.uint8))
+    return generated_image
+# Define the Gradio interface
+iface = gr.Interface(fn=infer,
+                     inputs=[gr.Image(source="upload", type="filepath", label="Raw Image. Must Be .png"),
+                             gr.Textbox(label='Prompt Input Text. 77 Token (Keyword or Symbol) Maximum'),
+                             gr.Textbox(label='What you Do Not want the AI to generate.'),
+                             gr.Slider(2, 15, value=7, label='Guidance Scale'),
+                             gr.Slider(1, 25, value=10, step=1, label='Number of Iterations'),
+                             gr.Slider(label="Seed", minimum=0, maximum=2**63 - 1, step=1, randomize=True),
+                             gr.Slider(label='Strength', minimum=0, maximum=1, step=.05, value=.5)],
+                     outputs='image',
+                     title="Stable Diffusion XL 1.0 Image to Image Pipeline",
+                     description="""For more information on Stable Diffusion XL 1.0 see <a href="https://huggingface.co/stabilityai/stable-diffusion-xl-refiner-1.0">here</a>.<br><br>Upload an Image (<b>MUST Be .PNG</b>) enter a Prompt, or let it just do its Thing, then click submit. For suggestions for prompts, keywords, artists or styles see <a href="https://github.com/Maks-s/sd-akashic">this guide</a>.""",
+                     article="Developed by: <a href=\"https://huggingface.co/cuonguet\">CuongTran</a>").launch()