Spaces:
Runtime error
Runtime error
Update pipline_StableDiffusionXL_ConsistentID.py
Browse files
pipline_StableDiffusionXL_ConsistentID.py
CHANGED
@@ -42,17 +42,37 @@ PipelineImageInput = Union[
|
|
42 |
|
43 |
|
44 |
class ConsistentIDStableDiffusionXLPipeline(StableDiffusionXLPipeline):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
@validate_hf_hub_args
|
47 |
def load_ConsistentID_model(
|
48 |
self,
|
49 |
pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]],
|
|
|
50 |
weight_name: str,
|
51 |
subfolder: str = '',
|
52 |
trigger_word_ID: str = '<|image|>',
|
53 |
trigger_word_facial: str = '<|facial|>',
|
54 |
image_encoder_path: str = 'laion/CLIP-ViT-H-14-laion2B-s32B-b79K', # Import CLIP pretrained model
|
55 |
-
bise_net_cp: str = 'JackAILab/ConsistentID/face_parsing.pth',
|
56 |
torch_dtype = torch.float16,
|
57 |
num_tokens = 4,
|
58 |
lora_rank= 128,
|
@@ -75,10 +95,11 @@ class ConsistentIDStableDiffusionXLPipeline(StableDiffusionXLPipeline):
|
|
75 |
self.app.prepare(ctx_id=0, det_size=(512, 512)) ### (640, 640)
|
76 |
|
77 |
### BiSeNet
|
78 |
-
self.bise_net = BiSeNet(n_classes = 19)
|
79 |
-
self.bise_net.cuda()
|
80 |
-
self.bise_net_cp= bise_net_cp # Import BiSeNet model
|
81 |
-
self.bise_net.load_state_dict(torch.load(self.bise_net_cp)) # , map_location="cpu"
|
|
|
82 |
self.bise_net.eval()
|
83 |
# Colors for all 20 parts
|
84 |
self.part_colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0],
|
@@ -92,7 +113,7 @@ class ConsistentIDStableDiffusionXLPipeline(StableDiffusionXLPipeline):
|
|
92 |
[0, 255, 255], [85, 255, 255], [170, 255, 255]]
|
93 |
|
94 |
### LLVA Optional
|
95 |
-
self.llva_model_path = "" #
|
96 |
self.llva_prompt = "Describe this person's facial features for me, including face, ears, eyes, nose, and mouth."
|
97 |
self.llva_tokenizer, self.llva_model, self.llva_image_processor, self.llva_context_len = None,None,None,None #load_pretrained_model(self.llva_model_path)
|
98 |
|
|
|
42 |
|
43 |
|
44 |
class ConsistentIDStableDiffusionXLPipeline(StableDiffusionXLPipeline):
|
45 |
+
|
46 |
+
def cuda(self, dtype=torch.float16, use_xformers=False):
|
47 |
+
self.to('cuda', dtype)
|
48 |
+
|
49 |
+
# if hasattr(self, 'image_proj_model'):
|
50 |
+
# self.image_proj_model.to(self.unet.device).to(self.unet.dtype)
|
51 |
+
|
52 |
+
if use_xformers:
|
53 |
+
if is_xformers_available():
|
54 |
+
import xformers
|
55 |
+
from packaging import version
|
56 |
+
|
57 |
+
xformers_version = version.parse(xformers.__version__)
|
58 |
+
if xformers_version == version.parse("0.0.16"):
|
59 |
+
logger.warn(
|
60 |
+
"xFormers 0.0.16 cannot be used for training in some GPUs. If you observe problems during training, please update xFormers to at least 0.0.17. See https://huggingface.co/docs/diffusers/main/en/optimization/xformers for more details."
|
61 |
+
)
|
62 |
+
self.enable_xformers_memory_efficient_attention()
|
63 |
+
else:
|
64 |
+
raise ValueError("xformers is not available. Make sure it is installed correctly")
|
65 |
|
66 |
@validate_hf_hub_args
|
67 |
def load_ConsistentID_model(
|
68 |
self,
|
69 |
pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]],
|
70 |
+
bise_net,
|
71 |
weight_name: str,
|
72 |
subfolder: str = '',
|
73 |
trigger_word_ID: str = '<|image|>',
|
74 |
trigger_word_facial: str = '<|facial|>',
|
75 |
image_encoder_path: str = 'laion/CLIP-ViT-H-14-laion2B-s32B-b79K', # Import CLIP pretrained model
|
|
|
76 |
torch_dtype = torch.float16,
|
77 |
num_tokens = 4,
|
78 |
lora_rank= 128,
|
|
|
95 |
self.app.prepare(ctx_id=0, det_size=(512, 512)) ### (640, 640)
|
96 |
|
97 |
### BiSeNet
|
98 |
+
# self.bise_net = BiSeNet(n_classes = 19)
|
99 |
+
# self.bise_net.cuda()
|
100 |
+
# self.bise_net_cp= bise_net_cp # Import BiSeNet model
|
101 |
+
# self.bise_net.load_state_dict(torch.load(self.bise_net_cp)) # , map_location="cpu"
|
102 |
+
self.bise_net = bise_net # load from outside
|
103 |
self.bise_net.eval()
|
104 |
# Colors for all 20 parts
|
105 |
self.part_colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0],
|
|
|
113 |
[0, 255, 255], [85, 255, 255], [170, 255, 255]]
|
114 |
|
115 |
### LLVA Optional
|
116 |
+
self.llva_model_path = "liuhaotian/llava-v1.5-13b" # import llava weights
|
117 |
self.llva_prompt = "Describe this person's facial features for me, including face, ears, eyes, nose, and mouth."
|
118 |
self.llva_tokenizer, self.llva_model, self.llva_image_processor, self.llva_context_len = None,None,None,None #load_pretrained_model(self.llva_model_path)
|
119 |
|