Spaces:
Running
on
T4
Running
on
T4
Update app.py
Browse files
app.py
CHANGED
@@ -1,52 +1,53 @@
|
|
1 |
import gradio as gr
|
2 |
-
from diffusers import AutoencoderKL, UNet2DConditionModel, PNDMScheduler, LMSDiscreteScheduler
|
3 |
from PIL import Image
|
4 |
import torch
|
5 |
-
from
|
6 |
-
from torch import autocast
|
7 |
|
8 |
-
|
9 |
-
|
10 |
-
vae = AutoencoderKL.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
|
11 |
-
unet = UNet2DConditionModel.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0")
|
12 |
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
return vae, unet, scheduler
|
17 |
-
|
18 |
-
vae, unet, scheduler = load_refiner_model()
|
19 |
-
|
20 |
-
def refine_image(input_image):
|
21 |
-
# Transform input image for model processing
|
22 |
-
input_image = input_image.resize((512, 512))
|
23 |
-
input_image = TF.to_tensor(input_image).unsqueeze(0) * 2 - 1
|
24 |
-
|
25 |
-
# Prepare for inference
|
26 |
-
with torch.no_grad(), autocast("cuda"):
|
27 |
-
# Encode image with VAE
|
28 |
-
latents = vae.encode(input_image).latent_dist.sample()
|
29 |
-
|
30 |
-
# Refine latents with the UNet
|
31 |
-
latents = latents.half()
|
32 |
-
latents = scheduler.add_noise(latents, torch.zeros_like(latents), 0)
|
33 |
-
latents = unet(latents, t=torch.tensor([0]).to(latents.device)).sample
|
34 |
-
|
35 |
-
# Decode the refined latents
|
36 |
-
output_image = vae.decode(latents).sample
|
37 |
-
|
38 |
-
# Convert output tensor to PIL Image
|
39 |
-
output_image = TF.to_pil_image((output_image.squeeze(0) + 1) / 2)
|
40 |
-
return output_image
|
41 |
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
outputs=gr.outputs.Image(type="pil", label="Refined Image"),
|
47 |
-
title="Stable Diffusion XL Refiner",
|
48 |
-
description="Refine images generated by Stable Diffusion models for enhanced quality."
|
49 |
-
)
|
50 |
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
|
|
2 |
from PIL import Image
|
3 |
import torch
|
4 |
+
from diffusers import DiffusionPipeline
|
|
|
5 |
|
6 |
+
# Device configuration - use GPU if available
|
7 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
|
8 |
|
9 |
+
# Initialize the pipeline and move it to the selected device
|
10 |
+
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0", torch_dtype=torch.float16).to(device)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
+
def resize_image(image_path, target_size):
|
13 |
+
with Image.open(image_path) as img:
|
14 |
+
resized_img = img.resize((target_size, target_size))
|
15 |
+
return resized_img
|
|
|
|
|
|
|
|
|
16 |
|
17 |
+
def infer(source_img_path, prompt, negative_prompt, guide, steps, seed, strength):
|
18 |
+
# Set the seed for reproducibility
|
19 |
+
torch.manual_seed(seed)
|
20 |
+
|
21 |
+
# Resize the source image
|
22 |
+
source_image = resize_image(source_img_path, 768)
|
23 |
+
|
24 |
+
# Convert the image to a tensor and add a batch dimension
|
25 |
+
source_image_tensor = torch.from_numpy(np.array(source_image)).permute(2, 0, 1).unsqueeze(0).to(device)
|
26 |
+
|
27 |
+
# Normalize the image tensor to [-1, 1]
|
28 |
+
source_image_tensor = source_image_tensor / 255.0 * 2 - 1
|
29 |
+
|
30 |
+
# Perform inference
|
31 |
+
with torch.no_grad():
|
32 |
+
generated_image = pipe(prompt=prompt, negative_prompt=negative_prompt, init_image=source_image_tensor, strength=strength, guidance_scale=guide, num_inference_steps=steps, generator=torch.Generator(device).manual_seed(seed)).images[0]
|
33 |
+
|
34 |
+
# Convert the tensor to PIL Image for display
|
35 |
+
generated_image = (generated_image.permute(1, 2, 0).cpu().numpy() + 1) / 2
|
36 |
+
generated_image = np.clip(generated_image, 0, 1)
|
37 |
+
generated_image = Image.fromarray((generated_image * 255).astype(np.uint8))
|
38 |
+
|
39 |
+
return generated_image
|
40 |
+
|
41 |
+
# Define the Gradio interface
|
42 |
+
iface = gr.Interface(fn=infer,
|
43 |
+
inputs=[gr.Image(source="upload", type="filepath", label="Raw Image. Must Be .png"),
|
44 |
+
gr.Textbox(label='Prompt Input Text. 77 Token (Keyword or Symbol) Maximum'),
|
45 |
+
gr.Textbox(label='What you Do Not want the AI to generate.'),
|
46 |
+
gr.Slider(2, 15, value=7, label='Guidance Scale'),
|
47 |
+
gr.Slider(1, 25, value=10, step=1, label='Number of Iterations'),
|
48 |
+
gr.Slider(label="Seed", minimum=0, maximum=2**63 - 1, step=1, randomize=True),
|
49 |
+
gr.Slider(label='Strength', minimum=0, maximum=1, step=.05, value=.5)],
|
50 |
+
outputs='image',
|
51 |
+
title="Stable Diffusion XL 1.0 Image to Image Pipeline",
|
52 |
+
description="""For more information on Stable Diffusion XL 1.0 see <a href="https://huggingface.co/stabilityai/stable-diffusion-xl-refiner-1.0">here</a>.<br><br>Upload an Image (<b>MUST Be .PNG</b>) enter a Prompt, or let it just do its Thing, then click submit. For suggestions for prompts, keywords, artists or styles see <a href="https://github.com/Maks-s/sd-akashic">this guide</a>.""",
|
53 |
+
article="Developed by: <a href=\"https://huggingface.co/cuonguet\">CuongTran</a>").launch()
|