File size: 3,210 Bytes
763126f
 
 
23c3d80
 
 
6a9c346
 
0459f53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23c3d80
754b60e
6a9c346
0459f53
23c3d80
d889842
23c3d80
0459f53
 
 
d889842
0459f53
 
11d79a9
0459f53
 
7f15638
0459f53
 
6a9c346
0459f53
6a9c346
0459f53
 
 
 
 
 
 
 
 
 
23c3d80
0459f53
e794576
0459f53
23c3d80
 
 
 
 
0459f53
 
 
 
d889842
 
0459f53
6a9c346
23c3d80
0459f53
 
 
763126f
9f0f9a3
23c3d80
d889842
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import os
import torch
import gradio as gr
import numpy as np
from PIL import Image
from einops import rearrange
import requests
import spaces
from diffusers.utils import load_image
from diffusers import FluxControlNetPipeline, FluxControlNetModel
from gradio_imageslider import ImageSlider

# Pretrained model paths
base_model = 'black-forest-labs/FLUX.1-dev'
controlnet_model = 'InstantX/FLUX.1-dev-Controlnet-Union'

# Load the ControlNet and pipeline models
controlnet = FluxControlNetModel.from_pretrained(controlnet_model, torch_dtype=torch.bfloat16)
pipe = FluxControlNetPipeline.from_pretrained(base_model, controlnet=controlnet, torch_dtype=torch.bfloat16)
pipe.to("cuda")

# Define control modes
CONTROL_MODES = {
    0: "Canny",
    1: "Tile",
    2: "Depth",
    3: "Blur",
    4: "Pose",
    5: "Gray (Low)",
    6: "LQ"
}

def preprocess_image(image, target_width, target_height):
    image = image.resize((target_width, target_height), Image.LANCZOS)
    return image

@spaces.GPU(duration=120)
def generate_image(prompt, control_image, control_mode, controlnet_conditioning_scale, num_steps, guidance, width, height, seed, random_seed):
    if random_seed:
        seed = np.random.randint(0, 10000)
    
    # Ensure width and height are multiples of 16
    width = 16 * (width // 16)
    height = 16 * (height // 16)
    
    # Set the seed for reproducibility
    torch.manual_seed(seed)
    
    # Preprocess control image
    control_image = preprocess_image(control_image, width, height)
    
    # Ensure control_mode is an integer
    control_mode_index = int(control_mode)

    # Generate the image with the selected control mode and other parameters
    with torch.no_grad():
        image = pipe(
            prompt,
            control_image=control_image,
            control_mode=control_mode_index,  # Pass control mode as an integer
            width=width,
            height=height,
            controlnet_conditioning_scale=controlnet_conditioning_scale,
            num_inference_steps=num_steps,
            guidance_scale=guidance
        ).images[0]
    
    return image

# Define the Gradio interface
interface = gr.Interface(
    fn=generate_image,
    inputs=[
        gr.Textbox(label="Prompt"),
        gr.Image(type="pil", label="Control Image"),
        gr.Dropdown(choices=[(i, name) for i, name in CONTROL_MODES.items()], label="Control Mode", value=0),  # Correct value and format for dropdown
        gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.5, label="ControlNet Conditioning Scale"),
        gr.Slider(step=1, minimum=1, maximum=64, value=24, label="Num Steps"),
        gr.Slider(minimum=0.1, maximum=10, value=3.5, label="Guidance"),
        gr.Slider(minimum=128, maximum=1024, step=128, value=512, label="Width"),
        gr.Slider(minimum=128, maximum=1024, step=128, value=512, label="Height"),
        gr.Number(value=42, label="Seed"),
        gr.Checkbox(label="Random Seed")
    ],
    outputs=ImageSlider(label="Generated Image"),
    title="FLUX.1 Controlnet with Multiple Modes",
    description="Generate images using ControlNet and a text prompt with adjustable control modes."
)

if __name__ == "__main__":
    interface.launch()