bilegentile's picture
Upload folder using huggingface_hub
c19ca42 verified
from enum import Enum
from typing import Any, List, NamedTuple
from functools import lru_cache
class UnetBlockType(Enum):
INPUT = "input"
OUTPUT = "output"
MIDDLE = "middle"
class TransformerID(NamedTuple):
block_type: UnetBlockType
# The id of the block the transformer is in. Not all blocks have cross attn.
block_id: int
# The index of transformer within the block.
# A block can have multiple transformers in SDXL.
block_index: int
# The call index of transformer if in a single step of diffusion.
transformer_index: int
class TransformerIDResult(NamedTuple):
input_ids: List[TransformerID]
output_ids: List[TransformerID]
middle_ids: List[TransformerID]
def get(self, idx: int) -> TransformerID:
return self.to_list()[idx]
def to_list(self) -> List[TransformerID]:
return sorted(
self.input_ids + self.output_ids + self.middle_ids,
key=lambda i: i.transformer_index,
)
class StableDiffusionVersion(Enum):
"""The version family of stable diffusion model."""
UNKNOWN = 0
SD1x = 1
SD2x = 2
SDXL = 3
@staticmethod
def detect_from_model_name(model_name: str) -> "StableDiffusionVersion":
"""Based on the model name provided, guess what stable diffusion version it is.
This might not be accurate without actually inspect the file content.
"""
if any(f"sd{v}" in model_name.lower() for v in ("14", "15", "16")):
return StableDiffusionVersion.SD1x
if "sd21" in model_name or "2.1" in model_name:
return StableDiffusionVersion.SD2x
if "xl" in model_name.lower():
return StableDiffusionVersion.SDXL
return StableDiffusionVersion.UNKNOWN
def encoder_block_num(self) -> int:
if self in (
StableDiffusionVersion.SD1x,
StableDiffusionVersion.SD2x,
StableDiffusionVersion.UNKNOWN,
):
return 12
else:
return 9 # SDXL
def controlnet_layer_num(self) -> int:
return self.encoder_block_num() + 1
@property
def transformer_block_num(self) -> int:
"""Number of blocks that has cross attn transformers in unet."""
if self in (
StableDiffusionVersion.SD1x,
StableDiffusionVersion.SD2x,
StableDiffusionVersion.UNKNOWN,
):
return 16
else:
return 11 # SDXL
@property
@lru_cache(maxsize=None)
def transformer_ids(self) -> List[TransformerID]:
"""id of blocks that have cross attention"""
if self in (
StableDiffusionVersion.SD1x,
StableDiffusionVersion.SD2x,
StableDiffusionVersion.UNKNOWN,
):
transformer_index = 0
input_ids = []
for block_id in [1, 2, 4, 5, 7, 8]:
input_ids.append(
TransformerID(UnetBlockType.INPUT, block_id, 0, transformer_index)
)
transformer_index += 1
middle_id = TransformerID(UnetBlockType.MIDDLE, 0, 0, transformer_index)
transformer_index += 1
output_ids = []
for block_id in [3, 4, 5, 6, 7, 8, 9, 10, 11]:
input_ids.append(
TransformerID(UnetBlockType.OUTPUT, block_id, 0, transformer_index)
)
transformer_index += 1
return TransformerIDResult(input_ids, output_ids, [middle_id])
else:
# SDXL
transformer_index = 0
input_ids = []
for block_id in [4, 5, 7, 8]:
block_indices = (
range(2) if block_id in [4, 5] else range(10)
) # transformer_depth
for index in block_indices:
input_ids.append(
TransformerID(
UnetBlockType.INPUT, block_id, index, transformer_index
)
)
transformer_index += 1
middle_ids = [
TransformerID(UnetBlockType.MIDDLE, 0, index, transformer_index)
for index in range(10)
]
transformer_index += 1
output_ids = []
for block_id in range(6):
block_indices = (
range(2) if block_id in [3, 4, 5] else range(10)
) # transformer_depth
for index in block_indices:
output_ids.append(
TransformerID(
UnetBlockType.OUTPUT, block_id, index, transformer_index
)
)
transformer_index += 1
return TransformerIDResult(input_ids, output_ids, middle_ids)
def is_compatible_with(self, other: "StableDiffusionVersion") -> bool:
"""Incompatible only when one of version is SDXL and other is not."""
return (
any(v == StableDiffusionVersion.UNKNOWN for v in [self, other])
or sum(v == StableDiffusionVersion.SDXL for v in [self, other]) != 1
)
class ControlModelType(Enum):
"""
The type of Control Models (supported or not).
"""
ControlNet = "ControlNet, Lvmin Zhang"
T2I_Adapter = "T2I_Adapter, Chong Mou"
T2I_StyleAdapter = "T2I_StyleAdapter, Chong Mou"
T2I_CoAdapter = "T2I_CoAdapter, Chong Mou"
MasaCtrl = "MasaCtrl, Mingdeng Cao"
GLIGEN = "GLIGEN, Yuheng Li"
AttentionInjection = "AttentionInjection, Lvmin Zhang" # A simple attention injection written by Lvmin
StableSR = "StableSR, Jianyi Wang"
PromptDiffusion = "PromptDiffusion, Zhendong Wang"
ControlLoRA = "ControlLoRA, Wu Hecong"
ReVision = "ReVision, Stability"
IPAdapter = "IPAdapter, Hu Ye"
Controlllite = "Controlllite, Kohya"
InstantID = "InstantID, Qixun Wang"
SparseCtrl = "SparseCtrl, Yuwei Guo"
def is_controlnet(self) -> bool:
"""Returns whether the control model should be treated as ControlNet."""
return self in (
ControlModelType.ControlNet,
ControlModelType.ControlLoRA,
ControlModelType.InstantID,
)
def allow_context_sharing(self) -> bool:
"""Returns whether this control model type allows the same PlugableControlModel
object map to multiple ControlNetUnit.
Both IPAdapter and Controlllite have unit specific input (clip/image) stored
on the model object during inference. Sharing the context means that the input
set earlier gets lost.
"""
return self not in (
ControlModelType.IPAdapter,
ControlModelType.Controlllite,
)
# Written by Lvmin
class AutoMachine(Enum):
"""
Lvmin's algorithm for Attention/AdaIn AutoMachine States.
"""
Read = "Read"
Write = "Write"
StyleAlign = "StyleAlign"
class HiResFixOption(Enum):
BOTH = "Both"
LOW_RES_ONLY = "Low res only"
HIGH_RES_ONLY = "High res only"
@staticmethod
def from_value(value: Any) -> "HiResFixOption":
if isinstance(value, str) and value.startswith("HiResFixOption."):
_, field = value.split(".")
return getattr(HiResFixOption, field)
if isinstance(value, str):
return HiResFixOption(value)
elif isinstance(value, int):
return [x for x in HiResFixOption][value]
else:
assert isinstance(value, HiResFixOption)
return value
class InputMode(Enum):
# Single image to a single ControlNet unit.
SIMPLE = "simple"
# Input is a directory. N generations. Each generation takes 1 input image
# from the directory.
BATCH = "batch"
# Input is a directory. 1 generation. Each generation takes N input image
# from the directory.
MERGE = "merge"