Spaces:

dezzman
/

diffusion_models

Running

File size: 5,759 Bytes

import gradio as gr
import numpy as np
import random
import os

# import spaces #[uncomment to use ZeroGPU]
from diffusers import DiffusionPipeline, StableDiffusionPipeline
from peft import PeftModel, LoraConfig
import torch
from typing import Optional


def get_lora_sd_pipeline(
    ckpt_dir='./lora_logos', 
    base_model_name_or_path=None, 
    dtype=torch.float16, 
    adapter_name="default"
):
    unet_sub_dir = os.path.join(ckpt_dir, "unet")
    text_encoder_sub_dir = os.path.join(ckpt_dir, "text_encoder")
    if os.path.exists(text_encoder_sub_dir) and base_model_name_or_path is None:
        config = LoraConfig.from_pretrained(text_encoder_sub_dir)
        base_model_name_or_path = config.base_model_name_or_path

    if base_model_name_or_path is None:
        raise ValueError("Please specify the base model name or path")

    pipe = StableDiffusionPipeline.from_pretrained(base_model_name_or_path, torch_dtype=dtype)
    pipe.unet = PeftModel.from_pretrained(pipe.unet, unet_sub_dir, adapter_name=adapter_name)

    if os.path.exists(text_encoder_sub_dir):
        pipe.text_encoder = PeftModel.from_pretrained(
            pipe.text_encoder, text_encoder_sub_dir, adapter_name=adapter_name
        )

    if dtype in (torch.float16, torch.bfloat16):
        pipe.unet.half()
        pipe.text_encoder.half()

    return pipe

device = "cuda" if torch.cuda.is_available() else "cpu"
model_id_default = "CompVis/stable-diffusion-v1-4"

if torch.cuda.is_available():
    torch_dtype = torch.float16
else:
    torch_dtype = torch.float32


pipe_default = get_lora_sd_pipeline(
    ckpt_dir='./lora_logos', 
    base_model_name_or_path=model_id_default, 
    dtype=torch_dtype,
    )
# pipe_default = DiffusionPipeline.from_pretrained(model_id_default, torch_dtype=torch_dtype)
pipe_default = pipe_default.to(device)

MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 1024


# @spaces.GPU #[uncomment to use ZeroGPU]
def infer(
    prompt: str,
    negative_prompt: str,
    width: int,
    height: int,
    num_inference_steps: Optional[int] = 20,
    model_id: Optional[str] = 'CompVis/stable-diffusion-v1-4',
    seed: Optional[int] = 42,
    guidance_scale: Optional[float] = 7.0,
    lora_scale: Optional[float] = 0.5,
    progress=gr.Progress(track_tqdm=True),
):
    generator = torch.Generator().manual_seed(seed)

    params = {
        'prompt': prompt,
        'negative_prompt': negative_prompt,
        'guidance_scale': guidance_scale,
        'num_inference_steps': num_inference_steps,
        'width': width,
        'height': height,
        'generator': generator,
    }

    if model_id != model_id_default:
        pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch_dtype)
        pipe = pipe.to(device)
        pipe.fuse_lora(lora_scale=0.4)
        image = pipe(**params).images[0]
    else:
        pipe_default.fuse_lora(lora_scale=0.4)
        image = pipe_default(**params).images[0]

    return image

css = """
#col-container {
    margin: 0 auto;
    max-width: 640px;
}
"""

with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        
        gr.Markdown(" # DEMO Text-to-Image")

        with gr.Row():
            model_id = gr.Textbox(
                label="Model ID",
                max_lines=1,
                placeholder="Enter model id like 'CompVis/stable-diffusion-v1-4'",
                value="CompVis/stable-diffusion-v1-4"
            )

        prompt = gr.Textbox(
            label="Prompt",
            max_lines=1,
            placeholder="Enter your prompt",
        )

        negative_prompt = gr.Textbox(
            label="Negative prompt",
            max_lines=1,
            placeholder="Enter a negative prompt",
        )

        with gr.Row():
            seed = gr.Number(
                label="Seed",
                minimum=0,
                maximum=MAX_SEED,
                step=1,
                value=42,
            )

        with gr.Row():
            guidance_scale = gr.Slider(
                label="Guidance scale",
                minimum=0.0,
                maximum=10.0,
                step=0.1,
                value=7.0,
            )

        with gr.Row():
            lora_scale = gr.Slider(
                label="LoRA scale",
                minimum=0.0,
                maximum=1.0,
                step=0.1,
                value=0.5,
            )

        with gr.Row():
            num_inference_steps = gr.Slider(
                label="Number of inference steps",
                minimum=1,
                maximum=50,
                step=1,
                value=20,
            )

        with gr.Accordion("Optional Settings", open=False):
            with gr.Row():
                width = gr.Slider(
                    label="Width",
                    minimum=256,
                    maximum=MAX_IMAGE_SIZE,
                    step=32,
                    value=1024,
                )
            
            with gr.Row():
                height = gr.Slider(
                    label="Height",
                    minimum=256,
                    maximum=MAX_IMAGE_SIZE,
                    step=32,
                    value=1024,
                )

        run_button = gr.Button("Run", scale=1, variant="primary")
        result = gr.Image(label="Result", show_label=False)
    
    gr.on(
        triggers=[run_button.click, prompt.submit],
        fn=infer,
        inputs=[
            prompt,
            negative_prompt,
            width,
            height,
            num_inference_steps,
            model_id,
            seed,
            guidance_scale,
            lora_scale,
        ],
        outputs=[result],
    )

if __name__ == "__main__":
    demo.launch()