lixiang46
fix bug and change seed
595a73a
raw
history blame contribute delete
13.3 kB
import spaces
import random
import torch
import cv2
import gradio as gr
import numpy as np
from huggingface_hub import snapshot_download
from transformers import CLIPVisionModelWithProjection,CLIPImageProcessor
from diffusers.utils import load_image
from kolors.pipelines.pipeline_controlnet_xl_kolors_img2img import StableDiffusionXLControlNetImg2ImgPipeline
from kolors.models.modeling_chatglm import ChatGLMModel
from kolors.models.tokenization_chatglm import ChatGLMTokenizer
from kolors.models.controlnet import ControlNetModel
from diffusers import AutoencoderKL
from kolors.models.unet_2d_condition import UNet2DConditionModel
from diffusers import EulerDiscreteScheduler
from PIL import Image
from annotator.midas import MidasDetector
from annotator.dwpose import DWposeDetector
from annotator.util import resize_image, HWC3
device = "cuda"
ckpt_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors")
ckpt_dir_depth = snapshot_download(repo_id="Kwai-Kolors/Kolors-ControlNet-Depth")
ckpt_dir_canny = snapshot_download(repo_id="Kwai-Kolors/Kolors-ControlNet-Canny")
ckpt_dir_pose = snapshot_download(repo_id="Kwai-Kolors/Kolors-ControlNet-Pose")
text_encoder = ChatGLMModel.from_pretrained(f'{ckpt_dir}/text_encoder', torch_dtype=torch.float16).half().to(device)
tokenizer = ChatGLMTokenizer.from_pretrained(f'{ckpt_dir}/text_encoder')
vae = AutoencoderKL.from_pretrained(f"{ckpt_dir}/vae", revision=None).half().to(device)
scheduler = EulerDiscreteScheduler.from_pretrained(f"{ckpt_dir}/scheduler")
unet = UNet2DConditionModel.from_pretrained(f"{ckpt_dir}/unet", revision=None).half().to(device)
controlnet_depth = ControlNetModel.from_pretrained(f"{ckpt_dir_depth}", revision=None).half().to(device)
controlnet_canny = ControlNetModel.from_pretrained(f"{ckpt_dir_canny}", revision=None).half().to(device)
controlnet_pose = ControlNetModel.from_pretrained(f"{ckpt_dir_pose}", revision=None).half().to(device)
pipe_depth = StableDiffusionXLControlNetImg2ImgPipeline(
vae=vae,
controlnet = controlnet_depth,
text_encoder=text_encoder,
tokenizer=tokenizer,
unet=unet,
scheduler=scheduler,
force_zeros_for_empty_prompt=False
)
pipe_canny = StableDiffusionXLControlNetImg2ImgPipeline(
vae=vae,
controlnet = controlnet_canny,
text_encoder=text_encoder,
tokenizer=tokenizer,
unet=unet,
scheduler=scheduler,
force_zeros_for_empty_prompt=False
)
pipe_pose = StableDiffusionXLControlNetImg2ImgPipeline(
vae=vae,
controlnet = controlnet_pose,
text_encoder=text_encoder,
tokenizer=tokenizer,
unet=unet,
scheduler=scheduler,
force_zeros_for_empty_prompt=False
)
@spaces.GPU
def process_canny_condition(image, canny_threods=[100,200]):
np_image = image.copy()
np_image = cv2.Canny(np_image, canny_threods[0], canny_threods[1])
np_image = np_image[:, :, None]
np_image = np.concatenate([np_image, np_image, np_image], axis=2)
np_image = HWC3(np_image)
return Image.fromarray(np_image)
model_midas = MidasDetector()
@spaces.GPU
def process_depth_condition_midas(img, res = 1024):
h,w,_ = img.shape
img = resize_image(HWC3(img), res)
result = HWC3(model_midas(img))
result = cv2.resize(result, (w,h))
return Image.fromarray(result)
model_dwpose = DWposeDetector()
@spaces.GPU
def process_dwpose_condition(image, res=1024):
h,w,_ = image.shape
img = resize_image(HWC3(image), res)
out_res, out_img = model_dwpose(image)
result = HWC3(out_img)
result = cv2.resize( result, (w,h) )
return Image.fromarray(result)
MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 1024
@spaces.GPU
def infer_depth(prompt,
image = None,
negative_prompt = "nsfw,脸部阴影,低分辨率,jpeg伪影、模糊、糟糕,黑脸,霓虹灯",
seed = 397886929,
randomize_seed = False,
guidance_scale = 6.0,
num_inference_steps = 50,
controlnet_conditioning_scale = 0.7,
control_guidance_end = 0.9,
strength = 1.0
):
if randomize_seed:
seed = random.randint(0, MAX_SEED)
generator = torch.Generator().manual_seed(seed)
init_image = resize_image(image, MAX_IMAGE_SIZE)
pipe = pipe_depth.to("cuda")
condi_img = process_depth_condition_midas( np.array(init_image), MAX_IMAGE_SIZE)
image = pipe(
prompt= prompt ,
image = init_image,
controlnet_conditioning_scale = controlnet_conditioning_scale,
control_guidance_end = control_guidance_end,
strength= strength ,
control_image = condi_img,
negative_prompt= negative_prompt ,
num_inference_steps= num_inference_steps,
guidance_scale= guidance_scale,
num_images_per_prompt=1,
generator=generator,
).images[0]
return [condi_img, image], seed
@spaces.GPU
def infer_canny(prompt,
image = None,
negative_prompt = "nsfw,脸部阴影,低分辨率,jpeg伪影、模糊、糟糕,黑脸,霓虹灯",
seed = 397886929,
randomize_seed = False,
guidance_scale = 6.0,
num_inference_steps = 50,
controlnet_conditioning_scale = 0.7,
control_guidance_end = 0.9,
strength = 1.0
):
if randomize_seed:
seed = random.randint(0, MAX_SEED)
generator = torch.Generator().manual_seed(seed)
init_image = resize_image(image, MAX_IMAGE_SIZE)
pipe = pipe_canny.to("cuda")
condi_img = process_canny_condition(np.array(init_image))
image = pipe(
prompt= prompt ,
image = init_image,
controlnet_conditioning_scale = controlnet_conditioning_scale,
control_guidance_end = control_guidance_end,
strength= strength ,
control_image = condi_img,
negative_prompt= negative_prompt ,
num_inference_steps= num_inference_steps,
guidance_scale= guidance_scale,
num_images_per_prompt=1,
generator=generator,
).images[0]
return [condi_img, image], seed
@spaces.GPU
def infer_pose(prompt,
image = None,
negative_prompt = "nsfw,脸部阴影,低分辨率,jpeg伪影、模糊、糟糕,黑脸,霓虹灯",
seed = 66,
randomize_seed = False,
guidance_scale = 6.0,
num_inference_steps = 50,
controlnet_conditioning_scale = 0.7,
control_guidance_end = 0.9,
strength = 1.0
):
if randomize_seed:
seed = random.randint(0, MAX_SEED)
generator = torch.Generator().manual_seed(seed)
init_image = resize_image(image, MAX_IMAGE_SIZE)
pipe = pipe_pose.to("cuda")
condi_img = process_dwpose_condition(np.array(init_image), MAX_IMAGE_SIZE)
image = pipe(
prompt= prompt ,
image = init_image,
controlnet_conditioning_scale = controlnet_conditioning_scale,
control_guidance_end = control_guidance_end,
strength= strength ,
control_image = condi_img,
negative_prompt= negative_prompt ,
num_inference_steps= num_inference_steps,
guidance_scale= guidance_scale,
num_images_per_prompt=1,
generator=generator,
).images[0]
return [condi_img, image], seed
canny_examples = [
["一个漂亮的女孩,高品质,超清晰,色彩鲜艳,超高分辨率,最佳品质,8k,高清,4K",
"image/woman_1.png"],
["全景,一只可爱的白色小狗坐在杯子里,看向镜头,动漫风格,3d渲染,辛烷值渲染",
"image/dog.png"]
]
depth_examples = [
["新海诚风格,丰富的色彩,穿着绿色衬衫的女人站在田野里,唯美风景,清新明亮,斑驳的光影,最好的质量,超细节,8K画质",
"image/woman_2.png"],
["一只颜色鲜艳的小鸟,高品质,超清晰,色彩鲜艳,超高分辨率,最佳品质,8k,高清,4K",
"image/bird.png"]
]
pose_examples = [
["一位穿着紫色泡泡袖连衣裙、戴着皇冠和白色蕾丝手套的女孩双手托脸,高品质,超清晰,色彩鲜艳,超高分辨率,最佳品质,8k,高清,4K",
"image/woman_3.png"],
["一个穿着黑色运动外套、白色内搭,上面戴着项链的女子,站在街边,背景是红色建筑和绿树,高品质,超清晰,色彩鲜艳,超高分辨率,最佳品质,8k,高清,4K",
"image/woman_4.png"]
]
css="""
#col-left {
margin: 0 auto;
max-width: 600px;
}
#col-right {
margin: 0 auto;
max-width: 750px;
}
#button {
color: blue;
}
"""
def load_description(fp):
with open(fp, 'r', encoding='utf-8') as f:
content = f.read()
return content
with gr.Blocks(css=css) as Kolors:
gr.HTML(load_description("assets/title.md"))
with gr.Row():
with gr.Column(elem_id="col-left"):
with gr.Row():
prompt = gr.Textbox(
label="Prompt",
placeholder="Enter your prompt",
lines=2
)
with gr.Row():
image = gr.Image(label="Image", type="pil")
with gr.Accordion("Advanced Settings", open=False):
negative_prompt = gr.Textbox(
label="Negative prompt",
placeholder="Enter a negative prompt",
visible=True,
value="nsfw,脸部阴影,低分辨率,jpeg伪影、模糊、糟糕,黑脸,霓虹灯"
)
seed = gr.Slider(
label="Seed",
minimum=0,
maximum=MAX_SEED,
step=1,
value=0,
)
randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
with gr.Row():
guidance_scale = gr.Slider(
label="Guidance scale",
minimum=0.0,
maximum=10.0,
step=0.1,
value=6.0,
)
num_inference_steps = gr.Slider(
label="Number of inference steps",
minimum=10,
maximum=50,
step=1,
value=30,
)
with gr.Row():
controlnet_conditioning_scale = gr.Slider(
label="Controlnet Conditioning Scale",
minimum=0.0,
maximum=1.0,
step=0.1,
value=0.7,
)
control_guidance_end = gr.Slider(
label="Control Guidance End",
minimum=0.0,
maximum=1.0,
step=0.1,
value=0.9,
)
with gr.Row():
strength = gr.Slider(
label="Strength",
minimum=0.0,
maximum=1.0,
step=0.1,
value=1.0,
)
with gr.Row():
canny_button = gr.Button("Canny", elem_id="button")
depth_button = gr.Button("Depth", elem_id="button")
pose_button = gr.Button("Pose", elem_id="button")
with gr.Column(elem_id="col-right"):
result = gr.Gallery(label="Result", show_label=False, columns=2)
seed_used = gr.Number(label="Seed Used")
with gr.Row():
gr.Examples(
fn = infer_canny,
examples = canny_examples,
inputs = [prompt, image],
outputs = [result, seed_used],
label = "Canny"
)
with gr.Row():
gr.Examples(
fn = infer_depth,
examples = depth_examples,
inputs = [prompt, image],
outputs = [result, seed_used],
label = "Depth"
)
with gr.Row():
gr.Examples(
fn = infer_pose,
examples = pose_examples,
inputs = [prompt, image],
outputs = [result, seed_used],
label = "Pose"
)
canny_button.click(
fn = infer_canny,
inputs = [prompt, image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, controlnet_conditioning_scale, control_guidance_end, strength],
outputs = [result, seed_used]
)
depth_button.click(
fn = infer_depth,
inputs = [prompt, image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, controlnet_conditioning_scale, control_guidance_end, strength],
outputs = [result, seed_used]
)
pose_button.click(
fn = infer_pose,
inputs = [prompt, image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, controlnet_conditioning_scale, control_guidance_end, strength],
outputs = [result, seed_used]
)
Kolors.queue().launch(debug=True)