File size: 3,129 Bytes
a1f69bb d8aa11d a1f69bb d8aa11d a1f69bb d8aa11d a1f69bb d8aa11d a1f69bb d8aa11d a1f69bb d8aa11d a1f69bb d8aa11d c0a0649 d8aa11d a1f69bb d8aa11d a1f69bb d8aa11d a1f69bb d8aa11d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
import spaces
import rembg
import torch
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, AutoencoderKL
import cv2
from transformers import pipeline
import numpy as np
from PIL import Image
import gradio as gr
# pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
# pipe.to("cuda")
def check_prompt(prompt):
if prompt is None:
raise gr.Error("Please enter a prompt!")
controlNet_normal = ControlNetModel.from_pretrained(
"fusing/stable-diffusion-v1-5-controlnet-normal",
torch_dtype=torch.float16
)
controlNet_depth = ControlNetModel.from_pretrained(
"lllyasviel/sd-controlnet-depth",
torch_dtype=torch.float16
)
controlNet_MAP = {"Normal": controlNet_normal, "Depth": controlNet_depth}
# vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16, use_safetensors=True)
# Function to generate an image from text using diffusion
@spaces.GPU
def generate_image(prompt, control_image, controlnet):
prompt += "no background, side view, minimalist shot, single shoe, no legs, product photo"
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5",
controlnet=controlNet_MAP[controlnet],
torch_dtype=torch.float16,
safety_checker = None
)
pipe.to("cuda")
if controlnet == "Normal":
control_image = get_normal(control_image)
elif controlnet == "Depth":
control_image = get_depth(control_image)
image = pipe(prompt, image=control_image).images[0]
image2 = rembg.remove(image)
return image2
def get_normal(image):
depth_estimator = pipeline("depth-estimation", model ="Intel/dpt-hybrid-midas" )
image = depth_estimator(image)['predicted_depth'][0]
image = image.numpy()
image_depth = image.copy()
image_depth -= np.min(image_depth)
image_depth /= np.max(image_depth)
bg_threhold = 0.4
x = cv2.Sobel(image, cv2.CV_32F, 1, 0, ksize=3)
x[image_depth < bg_threhold] = 0
y = cv2.Sobel(image, cv2.CV_32F, 0, 1, ksize=3)
y[image_depth < bg_threhold] = 0
z = np.ones_like(x) * np.pi * 2.0
image = np.stack([x, y, z], axis=2)
image /= np.sum(image ** 2.0, axis=2, keepdims=True) ** 0.5
image = (image * 127.5 + 127.5).clip(0, 255).astype(np.uint8)
normalimage = Image.fromarray(image)
return normalimage
def get_depth(image):
depth_estimator = pipeline('depth-estimation')
image = depth_estimator(image)['depth']
image = np.array(image)
image = image[:, :, None]
image = np.concatenate([image, image, image], axis=2)
depthimage = Image.fromarray(image)
return depthimage
# def get_canny(image):
# image = np.array(image)
# low_threshold = 100
# high_threshold = 200
# image = cv2.Canny(image,low_threshold,high_threshold)
# image = image[:,:,None]
# image = np.concatenate([image, image, image], axis=2)
# canny_image = Image.fromarray(image)
# return canny_image |