AlphaQuark commited on
Commit
bcbcf8a
·
verified ·
1 Parent(s): e7fcd04

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -45
app.py CHANGED
@@ -1,52 +1,53 @@
1
  import gradio as gr
2
- from diffusers import AutoencoderKL, UNet2DConditionModel, PNDMScheduler, LMSDiscreteScheduler
3
  from PIL import Image
4
  import torch
5
- from torchvision.transforms import functional as TF
6
- from torch import autocast
7
 
8
- def load_refiner_model():
9
- # Load the autoencoder and UNet models for the refiner
10
- vae = AutoencoderKL.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
11
- unet = UNet2DConditionModel.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0")
12
 
13
- # Initialize scheduler
14
- scheduler = PNDMScheduler.from_config("stabilityai/stable-diffusion-xl-base-1.0")
15
-
16
- return vae, unet, scheduler
17
-
18
- vae, unet, scheduler = load_refiner_model()
19
-
20
- def refine_image(input_image):
21
- # Transform input image for model processing
22
- input_image = input_image.resize((512, 512))
23
- input_image = TF.to_tensor(input_image).unsqueeze(0) * 2 - 1
24
-
25
- # Prepare for inference
26
- with torch.no_grad(), autocast("cuda"):
27
- # Encode image with VAE
28
- latents = vae.encode(input_image).latent_dist.sample()
29
-
30
- # Refine latents with the UNet
31
- latents = latents.half()
32
- latents = scheduler.add_noise(latents, torch.zeros_like(latents), 0)
33
- latents = unet(latents, t=torch.tensor([0]).to(latents.device)).sample
34
-
35
- # Decode the refined latents
36
- output_image = vae.decode(latents).sample
37
-
38
- # Convert output tensor to PIL Image
39
- output_image = TF.to_pil_image((output_image.squeeze(0) + 1) / 2)
40
- return output_image
41
 
42
- # Gradio interface
43
- interface = gr.Interface(
44
- fn=refine_image,
45
- inputs=gr.inputs.Image(type="pil", label="Upload Generated Image"),
46
- outputs=gr.outputs.Image(type="pil", label="Refined Image"),
47
- title="Stable Diffusion XL Refiner",
48
- description="Refine images generated by Stable Diffusion models for enhanced quality."
49
- )
50
 
51
- if __name__ == "__main__":
52
- interface.launch(share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
2
  from PIL import Image
3
  import torch
4
+ from diffusers import DiffusionPipeline
 
5
 
6
+ # Device configuration - use GPU if available
7
+ device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
8
 
9
+ # Initialize the pipeline and move it to the selected device
10
+ pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0", torch_dtype=torch.float16).to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
+ def resize_image(image_path, target_size):
13
+ with Image.open(image_path) as img:
14
+ resized_img = img.resize((target_size, target_size))
15
+ return resized_img
 
 
 
 
16
 
17
+ def infer(source_img_path, prompt, negative_prompt, guide, steps, seed, strength):
18
+ # Set the seed for reproducibility
19
+ torch.manual_seed(seed)
20
+
21
+ # Resize the source image
22
+ source_image = resize_image(source_img_path, 768)
23
+
24
+ # Convert the image to a tensor and add a batch dimension
25
+ source_image_tensor = torch.from_numpy(np.array(source_image)).permute(2, 0, 1).unsqueeze(0).to(device)
26
+
27
+ # Normalize the image tensor to [-1, 1]
28
+ source_image_tensor = source_image_tensor / 255.0 * 2 - 1
29
+
30
+ # Perform inference
31
+ with torch.no_grad():
32
+ generated_image = pipe(prompt=prompt, negative_prompt=negative_prompt, init_image=source_image_tensor, strength=strength, guidance_scale=guide, num_inference_steps=steps, generator=torch.Generator(device).manual_seed(seed)).images[0]
33
+
34
+ # Convert the tensor to PIL Image for display
35
+ generated_image = (generated_image.permute(1, 2, 0).cpu().numpy() + 1) / 2
36
+ generated_image = np.clip(generated_image, 0, 1)
37
+ generated_image = Image.fromarray((generated_image * 255).astype(np.uint8))
38
+
39
+ return generated_image
40
+
41
+ # Define the Gradio interface
42
+ iface = gr.Interface(fn=infer,
43
+ inputs=[gr.Image(source="upload", type="filepath", label="Raw Image. Must Be .png"),
44
+ gr.Textbox(label='Prompt Input Text. 77 Token (Keyword or Symbol) Maximum'),
45
+ gr.Textbox(label='What you Do Not want the AI to generate.'),
46
+ gr.Slider(2, 15, value=7, label='Guidance Scale'),
47
+ gr.Slider(1, 25, value=10, step=1, label='Number of Iterations'),
48
+ gr.Slider(label="Seed", minimum=0, maximum=2**63 - 1, step=1, randomize=True),
49
+ gr.Slider(label='Strength', minimum=0, maximum=1, step=.05, value=.5)],
50
+ outputs='image',
51
+ title="Stable Diffusion XL 1.0 Image to Image Pipeline",
52
+ description="""For more information on Stable Diffusion XL 1.0 see <a href="https://huggingface.co/stabilityai/stable-diffusion-xl-refiner-1.0">here</a>.<br><br>Upload an Image (<b>MUST Be .PNG</b>) enter a Prompt, or let it just do its Thing, then click submit. For suggestions for prompts, keywords, artists or styles see <a href="https://github.com/Maks-s/sd-akashic">this guide</a>.""",
53
+ article="Developed by: <a href=\"https://huggingface.co/cuonguet\">CuongTran</a>").launch()