File size: 4,114 Bytes
74a2a96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import gradio as gr
import numpy as np
import random
import os
from PIL import Image
import spaces
import torch
from transformers import pipeline
from diffusers import StableDiffusionDepth2ImgPipeline


model_id_depth = "depth-anything/Depth-Anything-V2-Large-hf"
if torch.cuda.is_available():
    pipe_depth = pipeline(task="depth-estimation", model=model_id_depth, device="cuda")
else:
    pipe_depth = pipeline(task="depth-estimation", model=model_id_depth)
model_id_depth2image = "stabilityai/stable-diffusion-2-depth"
if torch.cuda.is_available():
    pipe_depth2image = StableDiffusionDepth2ImgPipeline.from_pretrained(model_id_depth2image, torch_dtype=torch.float16).to("cuda")
else:
    pipe_depth2image = StableDiffusionDepth2ImgPipeline.from_pretrained(model_id_depth2image)
max_seed = np.iinfo(np.int32).max
max_image_size = 1344
example_files = [os.path.join('assets/examples', filename) for filename in sorted(os.listdir('assets/examples'))]


@spaces.GPU
def infer(
        init_image,
        prompt,
        negative_prompt,
        seed,
        randomize_seed,
        width,
        height,
        guidance_scale,
        num_inference_steps):
    if randomize_seed:
        seed = random.randint(0, max_seed)
    init_image = Image.fromarray(np.uint8(init_image))
    # generate depth
    predicted_depth = pipe_depth(init_image)["predicted_depth"]
    # generate image
    image = pipe_depth2image(
        prompt=prompt,
        image=init_image,
        depth_map=predicted_depth,
        negative_prompt=negative_prompt,
        guidance_scale=guidance_scale,
        num_inference_steps=num_inference_steps,
        height=height,
        width=width,
        generator=torch.Generator().manual_seed(seed)
    ).images[0]
    return image, seed


with gr.Blocks() as demo:
    gr.Markdown("# Demo [Depth2Image](https://huggingface.co/stabilityai/stable-diffusion-2-depth) with depth map estimated by [Depth Anything V2](https://huggingface.co/depth-anything/Depth-Anything-V2-Large-hf).")
    prompt = gr.Text(
        label="Prompt",
        show_label=False,
        max_lines=1,
        placeholder="Enter your prompt",
        container=False,
    )
    with gr.Row():
        init_image = gr.Image(label="Input Image", type='numpy')
        result = gr.Image(label="Result", show_label=False)
    run_button = gr.Button("Run", scale=0)
    with gr.Accordion("Advanced Settings", open=False):
        negative_prompt = gr.Text(
            label="Negative Prompt",
            max_lines=1,
            placeholder="Enter a negative prompt",
        )
        seed = gr.Slider(
            label="Seed",
            minimum=0,
            maximum=max_seed,
            step=1,
            value=0,
        )
        randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
        with gr.Row():
            width = gr.Slider(
                label="Width",
                minimum=256,
                maximum=max_image_size,
                step=64,
                value=1024,
            )
            height = gr.Slider(
                label="Height",
                minimum=256,
                maximum=max_image_size,
                step=64,
                value=1024,
            )
        with gr.Row():
            guidance_scale = gr.Slider(
                label="Guidance scale",
                minimum=0.0,
                maximum=10.0,
                step=0.1,
                value=7.5,
            )
            num_inference_steps = gr.Slider(
                label="Number of inference steps",
                minimum=1,
                maximum=50,
                step=1,
                value=5,
            )
    gr.on(
        triggers=[run_button.click, prompt.submit, negative_prompt.submit],
        fn=infer,
        inputs=[init_image, prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
        outputs=[result, seed]
    )
    examples = gr.Examples(
        examples=example_files, inputs=[init_image], outputs=[depth_image_slider, gray_depth_file], fn=on_submit
    )


demo.queue().launch()