alessandro trinca tornidor commited on
Commit
e35418a
·
1 Parent(s): e5d4552

feat: add device_map argument to try avoid CUDA init RuntimeError, bump lisa-on-cuda to version 1.4.5

Browse files
Files changed (2) hide show
  1. lisa_on_cuda/utils/app_helpers.py +19 -14
  2. pyproject.toml +2 -2
lisa_on_cuda/utils/app_helpers.py CHANGED
@@ -118,7 +118,7 @@ def preprocess(
118
 
119
  def load_model_for_causal_llm_pretrained(
120
  version, torch_dtype, load_in_8bit, load_in_4bit, seg_token_idx, vision_tower,
121
- internal_logger: logging = None
122
  ):
123
  if internal_logger is None:
124
  internal_logger = app_logger
@@ -128,8 +128,7 @@ def load_model_for_causal_llm_pretrained(
128
  kwargs.update(
129
  {
130
  "torch_dtype": torch.half,
131
- # commentare?
132
- "load_in_4bit": True,
133
  "quantization_config": BitsAndBytesConfig(
134
  load_in_4bit=True,
135
  bnb_4bit_compute_dtype=torch.float16,
@@ -155,13 +154,15 @@ def load_model_for_causal_llm_pretrained(
155
  low_cpu_mem_usage=True,
156
  vision_tower=vision_tower,
157
  seg_token_idx=seg_token_idx,
 
 
158
  **kwargs
159
  )
160
- internal_logger.debug(f"model loaded!")
161
  return _model
162
 
163
 
164
- def get_model(args_to_parse, internal_logger: logging = None, inference_decorator: Callable = None):
165
  if internal_logger is None:
166
  internal_logger = app_logger
167
  internal_logger.info(f"starting model preparation, folder creation for path: {args_to_parse.vis_save_path}.")
@@ -183,7 +184,7 @@ def get_model(args_to_parse, internal_logger: logging = None, inference_decorato
183
  use_fast=False,
184
  )
185
  _tokenizer.pad_token = _tokenizer.unk_token
186
- internal_logger.info(f"tokenizer ok")
187
  args_to_parse.seg_token_idx = _tokenizer("[SEG]", add_special_tokens=False).input_ids[0]
188
  torch_dtype = torch.float32
189
  if args_to_parse.precision == "bf16":
@@ -199,7 +200,8 @@ def get_model(args_to_parse, internal_logger: logging = None, inference_decorato
199
  load_in_8bit=args_to_parse.load_in_8bit,
200
  load_in_4bit=args_to_parse.load_in_4bit,
201
  seg_token_idx=args_to_parse.seg_token_idx,
202
- vision_tower=args_to_parse.vision_tower
 
203
  )) if inference_decorator else load_model_for_causal_llm_pretrained(
204
  args_to_parse.version,
205
  torch_dtype=torch_dtype,
@@ -207,8 +209,9 @@ def get_model(args_to_parse, internal_logger: logging = None, inference_decorato
207
  load_in_4bit=args_to_parse.load_in_4bit,
208
  seg_token_idx=args_to_parse.seg_token_idx,
209
  vision_tower=args_to_parse.vision_tower,
 
210
  )
211
- internal_logger.debug(f"causal llm loaded!")
212
 
213
  _model.config.eos_token_id = _tokenizer.eos_token_id
214
  _model.config.bos_token_id = _tokenizer.bos_token_id
@@ -222,11 +225,11 @@ def get_model(args_to_parse, internal_logger: logging = None, inference_decorato
222
  _model, args_to_parse, torch_dtype
223
  )
224
  vision_tower.to(device=args_to_parse.local_rank)
225
- internal_logger.debug(f"vision tower loaded, prepare clip image processor...")
226
  _clip_image_processor = CLIPImageProcessor.from_pretrained(_model.config.vision_tower)
227
- internal_logger.debug(f"clip image processor done.")
228
  _transform = ResizeLongestSide(args_to_parse.image_size)
229
- internal_logger.debug(f"start model evaluation...")
230
  inference_decorator(_model.eval()) if inference_decorator else _model.eval()
231
  internal_logger.info("model preparation ok!")
232
  return _model, _clip_image_processor, _tokenizer, _transform
@@ -261,15 +264,17 @@ def prepare_model_vision_tower(_model, args_to_parse, torch_dtype, internal_logg
261
  internal_logger.debug(f"vision tower precision fp32? {args_to_parse.precision}, 3.")
262
  _model = _model.float().cuda()
263
  vision_tower = _model.get_model().get_vision_tower()
264
- internal_logger.debug(f"vision tower ok!")
265
  return _model, vision_tower
266
 
267
 
268
- def get_inference_model_by_args(args_to_parse, internal_logger0: logging = None, inference_decorator: Callable = None):
 
 
269
  if internal_logger0 is None:
270
  internal_logger0 = app_logger
271
  internal_logger0.info(f"args_to_parse:{args_to_parse}, creating model...")
272
- model, clip_image_processor, tokenizer, transform = get_model(args_to_parse)
273
  internal_logger0.info("created model, preparing inference function")
274
  no_seg_out = placeholders["no_seg_out"]
275
 
 
118
 
119
  def load_model_for_causal_llm_pretrained(
120
  version, torch_dtype, load_in_8bit, load_in_4bit, seg_token_idx, vision_tower,
121
+ internal_logger: logging = None, device_map="auto"
122
  ):
123
  if internal_logger is None:
124
  internal_logger = app_logger
 
128
  kwargs.update(
129
  {
130
  "torch_dtype": torch.half,
131
+ # "load_in_4bit": True,
 
132
  "quantization_config": BitsAndBytesConfig(
133
  load_in_4bit=True,
134
  bnb_4bit_compute_dtype=torch.float16,
 
154
  low_cpu_mem_usage=True,
155
  vision_tower=vision_tower,
156
  seg_token_idx=seg_token_idx,
157
+ # try to avoid CUDA init RuntimeError on ZeroGPU huggingface hardware
158
+ device_map=device_map,
159
  **kwargs
160
  )
161
+ internal_logger.debug("model loaded!")
162
  return _model
163
 
164
 
165
+ def get_model(args_to_parse, internal_logger: logging = None, inference_decorator: Callable = None, device_map="auto"):
166
  if internal_logger is None:
167
  internal_logger = app_logger
168
  internal_logger.info(f"starting model preparation, folder creation for path: {args_to_parse.vis_save_path}.")
 
184
  use_fast=False,
185
  )
186
  _tokenizer.pad_token = _tokenizer.unk_token
187
+ internal_logger.info("tokenizer ok")
188
  args_to_parse.seg_token_idx = _tokenizer("[SEG]", add_special_tokens=False).input_ids[0]
189
  torch_dtype = torch.float32
190
  if args_to_parse.precision == "bf16":
 
200
  load_in_8bit=args_to_parse.load_in_8bit,
201
  load_in_4bit=args_to_parse.load_in_4bit,
202
  seg_token_idx=args_to_parse.seg_token_idx,
203
+ vision_tower=args_to_parse.vision_tower,
204
+ device_map=device_map # try to avoid CUDA init RuntimeError on ZeroGPU huggingface hardware
205
  )) if inference_decorator else load_model_for_causal_llm_pretrained(
206
  args_to_parse.version,
207
  torch_dtype=torch_dtype,
 
209
  load_in_4bit=args_to_parse.load_in_4bit,
210
  seg_token_idx=args_to_parse.seg_token_idx,
211
  vision_tower=args_to_parse.vision_tower,
212
+ device_map=device_map
213
  )
214
+ internal_logger.debug("causal llm loaded!")
215
 
216
  _model.config.eos_token_id = _tokenizer.eos_token_id
217
  _model.config.bos_token_id = _tokenizer.bos_token_id
 
225
  _model, args_to_parse, torch_dtype
226
  )
227
  vision_tower.to(device=args_to_parse.local_rank)
228
+ internal_logger.debug("vision tower loaded, prepare clip image processor...")
229
  _clip_image_processor = CLIPImageProcessor.from_pretrained(_model.config.vision_tower)
230
+ internal_logger.debug("clip image processor done.")
231
  _transform = ResizeLongestSide(args_to_parse.image_size)
232
+ internal_logger.debug("start model evaluation...")
233
  inference_decorator(_model.eval()) if inference_decorator else _model.eval()
234
  internal_logger.info("model preparation ok!")
235
  return _model, _clip_image_processor, _tokenizer, _transform
 
264
  internal_logger.debug(f"vision tower precision fp32? {args_to_parse.precision}, 3.")
265
  _model = _model.float().cuda()
266
  vision_tower = _model.get_model().get_vision_tower()
267
+ internal_logger.debug("vision tower ok!")
268
  return _model, vision_tower
269
 
270
 
271
+ def get_inference_model_by_args(
272
+ args_to_parse, internal_logger0: logging = None, inference_decorator: Callable = None, device_map="auto"
273
+ ):
274
  if internal_logger0 is None:
275
  internal_logger0 = app_logger
276
  internal_logger0.info(f"args_to_parse:{args_to_parse}, creating model...")
277
+ model, clip_image_processor, tokenizer, transform = get_model(args_to_parse, device_map=device_map)
278
  internal_logger0.info("created model, preparing inference function")
279
  no_seg_out = placeholders["no_seg_out"]
280
 
pyproject.toml CHANGED
@@ -1,6 +1,6 @@
1
  [tool.poetry]
2
  name = "lisa-on-cuda"
3
- version = "1.4.4"
4
  description = "LISA (Reasoning Segmentation via Large Language Model) on cuda, now with huggingface ZeroGPU support!"
5
  authors = ["alessandro trinca tornidor <[email protected]>"]
6
  license = "Apache 2.0"
@@ -8,7 +8,7 @@ readme = "README.md"
8
 
9
  [metadata]
10
  name = "lisa-on-cuda"
11
- version = "1.4.4"
12
 
13
  [tool.poetry.urls]
14
  Source = "https://huggingface.co/spaces/aletrn/lisa-on-cuda/"
 
1
  [tool.poetry]
2
  name = "lisa-on-cuda"
3
+ version = "1.4.5"
4
  description = "LISA (Reasoning Segmentation via Large Language Model) on cuda, now with huggingface ZeroGPU support!"
5
  authors = ["alessandro trinca tornidor <[email protected]>"]
6
  license = "Apache 2.0"
 
8
 
9
  [metadata]
10
  name = "lisa-on-cuda"
11
+ version = "1.4.5"
12
 
13
  [tool.poetry.urls]
14
  Source = "https://huggingface.co/spaces/aletrn/lisa-on-cuda/"