File size: 3,689 Bytes
a1f69bb 86ea5fd a1f69bb d8aa11d a1f69bb e90f748 d8aa11d a1f69bb d8aa11d a1f69bb 86ea5fd a1f69bb 86ea5fd d8aa11d 86ea5fd a1f69bb d8aa11d 86ea5fd d8aa11d 86ea5fd 35a268c 36be636 86ea5fd e90f748 35a268c 86ea5fd a1f69bb 86ea5fd d8aa11d a1f69bb d8aa11d a1f69bb d8aa11d c99851d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
import spaces
import rembg
import torch
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, AutoPipelineForImage2Image
import cv2
from transformers import pipeline
import numpy as np
from PIL import Image
import gradio as gr
# pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
# pipe.to("cuda")
def check_prompt(prompt):
if prompt is None:
raise gr.Error("Please enter a prompt!")
imagepipe = AutoPipelineForImage2Image.from_pretrained(
"stabilityai/stable-diffusion-xl-refiner-1.0", torch_dtype=torch.float32, use_safetensors=True
)
controlNet_normal = ControlNetModel.from_pretrained(
"fusing/stable-diffusion-v1-5-controlnet-normal",
torch_dtype=torch.float16
)
controlNet_depth = ControlNetModel.from_pretrained(
"lllyasviel/sd-controlnet-depth",
torch_dtype=torch.float16
)
controlNet_MAP = {"Normal": controlNet_normal, "Depth": controlNet_depth}
# vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16, use_safetensors=True)
# Function to generate an image from text using diffusion
@spaces.GPU
def generate_txttoimg(prompt, control_image, controlnet):
prompt += "no background, side view, minimalist shot, single shoe, no legs, product photo"
textpipe = StableDiffusionControlNetPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5",
controlnet=controlNet_MAP[controlnet],
torch_dtype=torch.float16,
safety_checker = None
)
textpipe.to("cuda")
if controlnet == "Normal":
control_image = get_normal(control_image)
elif controlnet == "Depth":
control_image = get_depth(control_image)
image = textpipe(prompt, image=control_image).images[0]
image2 = rembg.remove(image)
return image2
@spaces.GPU
def generate_imgtoimg(prompt, init_image, strength=0.5):
prompt += ", no background, side view, minimalist shot, single shoe, no legs, product photo"
imagepipe.to("cuda")
image = imagepipe(prompt, image=init_image, strength=strength).images[0]
image2 = rembg.remove(image)
return image2
def get_normal(image):
depth_estimator = pipeline("depth-estimation", model ="Intel/dpt-hybrid-midas" )
image = depth_estimator(image)['predicted_depth'][0]
image = image.numpy()
image_depth = image.copy()
image_depth -= np.min(image_depth)
image_depth /= np.max(image_depth)
bg_threhold = 0.4
x = cv2.Sobel(image, cv2.CV_32F, 1, 0, ksize=3)
x[image_depth < bg_threhold] = 0
y = cv2.Sobel(image, cv2.CV_32F, 0, 1, ksize=3)
y[image_depth < bg_threhold] = 0
z = np.ones_like(x) * np.pi * 2.0
image = np.stack([x, y, z], axis=2)
image /= np.sum(image ** 2.0, axis=2, keepdims=True) ** 0.5
image = (image * 127.5 + 127.5).clip(0, 255).astype(np.uint8)
normalimage = Image.fromarray(image)
return normalimage
def get_depth(image):
depth_estimator = pipeline('depth-estimation')
image = depth_estimator(image)['depth']
image = np.array(image)
image = image[:, :, None]
image = np.concatenate([image, image, image], axis=2)
depthimage = Image.fromarray(image)
return depthimage
# def get_canny(image):
# image = np.array(image)
# low_threshold = 100
# high_threshold = 200
# image = cv2.Canny(image,low_threshold,high_threshold)
# image = image[:,:,None]
# image = np.concatenate([image, image, image], axis=2)
# canny_image = Image.fromarray(image)
# return canny_image
def update_image(image):
return image
|