JackAILab commited on
Commit
23d7289
Β·
verified Β·
1 Parent(s): ba409e9

Update pipline_StableDiffusionXL_ConsistentID.py

Browse files
pipline_StableDiffusionXL_ConsistentID.py CHANGED
@@ -42,17 +42,37 @@ PipelineImageInput = Union[
42
 
43
 
44
  class ConsistentIDStableDiffusionXLPipeline(StableDiffusionXLPipeline):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  @validate_hf_hub_args
47
  def load_ConsistentID_model(
48
  self,
49
  pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]],
 
50
  weight_name: str,
51
  subfolder: str = '',
52
  trigger_word_ID: str = '<|image|>',
53
  trigger_word_facial: str = '<|facial|>',
54
  image_encoder_path: str = 'laion/CLIP-ViT-H-14-laion2B-s32B-b79K', # Import CLIP pretrained model
55
- bise_net_cp: str = 'JackAILab/ConsistentID/face_parsing.pth',
56
  torch_dtype = torch.float16,
57
  num_tokens = 4,
58
  lora_rank= 128,
@@ -75,10 +95,11 @@ class ConsistentIDStableDiffusionXLPipeline(StableDiffusionXLPipeline):
75
  self.app.prepare(ctx_id=0, det_size=(512, 512)) ### (640, 640)
76
 
77
  ### BiSeNet
78
- self.bise_net = BiSeNet(n_classes = 19)
79
- self.bise_net.cuda()
80
- self.bise_net_cp= bise_net_cp # Import BiSeNet model
81
- self.bise_net.load_state_dict(torch.load(self.bise_net_cp)) # , map_location="cpu"
 
82
  self.bise_net.eval()
83
  # Colors for all 20 parts
84
  self.part_colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0],
@@ -92,7 +113,7 @@ class ConsistentIDStableDiffusionXLPipeline(StableDiffusionXLPipeline):
92
  [0, 255, 255], [85, 255, 255], [170, 255, 255]]
93
 
94
  ### LLVA Optional
95
- self.llva_model_path = "" #TODO import llava weights
96
  self.llva_prompt = "Describe this person's facial features for me, including face, ears, eyes, nose, and mouth."
97
  self.llva_tokenizer, self.llva_model, self.llva_image_processor, self.llva_context_len = None,None,None,None #load_pretrained_model(self.llva_model_path)
98
 
 
42
 
43
 
44
  class ConsistentIDStableDiffusionXLPipeline(StableDiffusionXLPipeline):
45
+
46
+ def cuda(self, dtype=torch.float16, use_xformers=False):
47
+ self.to('cuda', dtype)
48
+
49
+ # if hasattr(self, 'image_proj_model'):
50
+ # self.image_proj_model.to(self.unet.device).to(self.unet.dtype)
51
+
52
+ if use_xformers:
53
+ if is_xformers_available():
54
+ import xformers
55
+ from packaging import version
56
+
57
+ xformers_version = version.parse(xformers.__version__)
58
+ if xformers_version == version.parse("0.0.16"):
59
+ logger.warn(
60
+ "xFormers 0.0.16 cannot be used for training in some GPUs. If you observe problems during training, please update xFormers to at least 0.0.17. See https://huggingface.co/docs/diffusers/main/en/optimization/xformers for more details."
61
+ )
62
+ self.enable_xformers_memory_efficient_attention()
63
+ else:
64
+ raise ValueError("xformers is not available. Make sure it is installed correctly")
65
 
66
  @validate_hf_hub_args
67
  def load_ConsistentID_model(
68
  self,
69
  pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]],
70
+ bise_net,
71
  weight_name: str,
72
  subfolder: str = '',
73
  trigger_word_ID: str = '<|image|>',
74
  trigger_word_facial: str = '<|facial|>',
75
  image_encoder_path: str = 'laion/CLIP-ViT-H-14-laion2B-s32B-b79K', # Import CLIP pretrained model
 
76
  torch_dtype = torch.float16,
77
  num_tokens = 4,
78
  lora_rank= 128,
 
95
  self.app.prepare(ctx_id=0, det_size=(512, 512)) ### (640, 640)
96
 
97
  ### BiSeNet
98
+ # self.bise_net = BiSeNet(n_classes = 19)
99
+ # self.bise_net.cuda()
100
+ # self.bise_net_cp= bise_net_cp # Import BiSeNet model
101
+ # self.bise_net.load_state_dict(torch.load(self.bise_net_cp)) # , map_location="cpu"
102
+ self.bise_net = bise_net # load from outside
103
  self.bise_net.eval()
104
  # Colors for all 20 parts
105
  self.part_colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0],
 
113
  [0, 255, 255], [85, 255, 255], [170, 255, 255]]
114
 
115
  ### LLVA Optional
116
+ self.llva_model_path = "liuhaotian/llava-v1.5-13b" # import llava weights
117
  self.llva_prompt = "Describe this person's facial features for me, including face, ears, eyes, nose, and mouth."
118
  self.llva_tokenizer, self.llva_model, self.llva_image_processor, self.llva_context_len = None,None,None,None #load_pretrained_model(self.llva_model_path)
119