Spaces:
Paused
Paused
alessandro trinca tornidor
commited on
Commit
·
e35418a
1
Parent(s):
e5d4552
feat: add device_map argument to try avoid CUDA init RuntimeError, bump lisa-on-cuda to version 1.4.5
Browse files- lisa_on_cuda/utils/app_helpers.py +19 -14
- pyproject.toml +2 -2
lisa_on_cuda/utils/app_helpers.py
CHANGED
@@ -118,7 +118,7 @@ def preprocess(
|
|
118 |
|
119 |
def load_model_for_causal_llm_pretrained(
|
120 |
version, torch_dtype, load_in_8bit, load_in_4bit, seg_token_idx, vision_tower,
|
121 |
-
internal_logger: logging = None
|
122 |
):
|
123 |
if internal_logger is None:
|
124 |
internal_logger = app_logger
|
@@ -128,8 +128,7 @@ def load_model_for_causal_llm_pretrained(
|
|
128 |
kwargs.update(
|
129 |
{
|
130 |
"torch_dtype": torch.half,
|
131 |
-
#
|
132 |
-
"load_in_4bit": True,
|
133 |
"quantization_config": BitsAndBytesConfig(
|
134 |
load_in_4bit=True,
|
135 |
bnb_4bit_compute_dtype=torch.float16,
|
@@ -155,13 +154,15 @@ def load_model_for_causal_llm_pretrained(
|
|
155 |
low_cpu_mem_usage=True,
|
156 |
vision_tower=vision_tower,
|
157 |
seg_token_idx=seg_token_idx,
|
|
|
|
|
158 |
**kwargs
|
159 |
)
|
160 |
-
internal_logger.debug(
|
161 |
return _model
|
162 |
|
163 |
|
164 |
-
def get_model(args_to_parse, internal_logger: logging = None, inference_decorator: Callable = None):
|
165 |
if internal_logger is None:
|
166 |
internal_logger = app_logger
|
167 |
internal_logger.info(f"starting model preparation, folder creation for path: {args_to_parse.vis_save_path}.")
|
@@ -183,7 +184,7 @@ def get_model(args_to_parse, internal_logger: logging = None, inference_decorato
|
|
183 |
use_fast=False,
|
184 |
)
|
185 |
_tokenizer.pad_token = _tokenizer.unk_token
|
186 |
-
internal_logger.info(
|
187 |
args_to_parse.seg_token_idx = _tokenizer("[SEG]", add_special_tokens=False).input_ids[0]
|
188 |
torch_dtype = torch.float32
|
189 |
if args_to_parse.precision == "bf16":
|
@@ -199,7 +200,8 @@ def get_model(args_to_parse, internal_logger: logging = None, inference_decorato
|
|
199 |
load_in_8bit=args_to_parse.load_in_8bit,
|
200 |
load_in_4bit=args_to_parse.load_in_4bit,
|
201 |
seg_token_idx=args_to_parse.seg_token_idx,
|
202 |
-
vision_tower=args_to_parse.vision_tower
|
|
|
203 |
)) if inference_decorator else load_model_for_causal_llm_pretrained(
|
204 |
args_to_parse.version,
|
205 |
torch_dtype=torch_dtype,
|
@@ -207,8 +209,9 @@ def get_model(args_to_parse, internal_logger: logging = None, inference_decorato
|
|
207 |
load_in_4bit=args_to_parse.load_in_4bit,
|
208 |
seg_token_idx=args_to_parse.seg_token_idx,
|
209 |
vision_tower=args_to_parse.vision_tower,
|
|
|
210 |
)
|
211 |
-
internal_logger.debug(
|
212 |
|
213 |
_model.config.eos_token_id = _tokenizer.eos_token_id
|
214 |
_model.config.bos_token_id = _tokenizer.bos_token_id
|
@@ -222,11 +225,11 @@ def get_model(args_to_parse, internal_logger: logging = None, inference_decorato
|
|
222 |
_model, args_to_parse, torch_dtype
|
223 |
)
|
224 |
vision_tower.to(device=args_to_parse.local_rank)
|
225 |
-
internal_logger.debug(
|
226 |
_clip_image_processor = CLIPImageProcessor.from_pretrained(_model.config.vision_tower)
|
227 |
-
internal_logger.debug(
|
228 |
_transform = ResizeLongestSide(args_to_parse.image_size)
|
229 |
-
internal_logger.debug(
|
230 |
inference_decorator(_model.eval()) if inference_decorator else _model.eval()
|
231 |
internal_logger.info("model preparation ok!")
|
232 |
return _model, _clip_image_processor, _tokenizer, _transform
|
@@ -261,15 +264,17 @@ def prepare_model_vision_tower(_model, args_to_parse, torch_dtype, internal_logg
|
|
261 |
internal_logger.debug(f"vision tower precision fp32? {args_to_parse.precision}, 3.")
|
262 |
_model = _model.float().cuda()
|
263 |
vision_tower = _model.get_model().get_vision_tower()
|
264 |
-
internal_logger.debug(
|
265 |
return _model, vision_tower
|
266 |
|
267 |
|
268 |
-
def get_inference_model_by_args(
|
|
|
|
|
269 |
if internal_logger0 is None:
|
270 |
internal_logger0 = app_logger
|
271 |
internal_logger0.info(f"args_to_parse:{args_to_parse}, creating model...")
|
272 |
-
model, clip_image_processor, tokenizer, transform = get_model(args_to_parse)
|
273 |
internal_logger0.info("created model, preparing inference function")
|
274 |
no_seg_out = placeholders["no_seg_out"]
|
275 |
|
|
|
118 |
|
119 |
def load_model_for_causal_llm_pretrained(
|
120 |
version, torch_dtype, load_in_8bit, load_in_4bit, seg_token_idx, vision_tower,
|
121 |
+
internal_logger: logging = None, device_map="auto"
|
122 |
):
|
123 |
if internal_logger is None:
|
124 |
internal_logger = app_logger
|
|
|
128 |
kwargs.update(
|
129 |
{
|
130 |
"torch_dtype": torch.half,
|
131 |
+
# "load_in_4bit": True,
|
|
|
132 |
"quantization_config": BitsAndBytesConfig(
|
133 |
load_in_4bit=True,
|
134 |
bnb_4bit_compute_dtype=torch.float16,
|
|
|
154 |
low_cpu_mem_usage=True,
|
155 |
vision_tower=vision_tower,
|
156 |
seg_token_idx=seg_token_idx,
|
157 |
+
# try to avoid CUDA init RuntimeError on ZeroGPU huggingface hardware
|
158 |
+
device_map=device_map,
|
159 |
**kwargs
|
160 |
)
|
161 |
+
internal_logger.debug("model loaded!")
|
162 |
return _model
|
163 |
|
164 |
|
165 |
+
def get_model(args_to_parse, internal_logger: logging = None, inference_decorator: Callable = None, device_map="auto"):
|
166 |
if internal_logger is None:
|
167 |
internal_logger = app_logger
|
168 |
internal_logger.info(f"starting model preparation, folder creation for path: {args_to_parse.vis_save_path}.")
|
|
|
184 |
use_fast=False,
|
185 |
)
|
186 |
_tokenizer.pad_token = _tokenizer.unk_token
|
187 |
+
internal_logger.info("tokenizer ok")
|
188 |
args_to_parse.seg_token_idx = _tokenizer("[SEG]", add_special_tokens=False).input_ids[0]
|
189 |
torch_dtype = torch.float32
|
190 |
if args_to_parse.precision == "bf16":
|
|
|
200 |
load_in_8bit=args_to_parse.load_in_8bit,
|
201 |
load_in_4bit=args_to_parse.load_in_4bit,
|
202 |
seg_token_idx=args_to_parse.seg_token_idx,
|
203 |
+
vision_tower=args_to_parse.vision_tower,
|
204 |
+
device_map=device_map # try to avoid CUDA init RuntimeError on ZeroGPU huggingface hardware
|
205 |
)) if inference_decorator else load_model_for_causal_llm_pretrained(
|
206 |
args_to_parse.version,
|
207 |
torch_dtype=torch_dtype,
|
|
|
209 |
load_in_4bit=args_to_parse.load_in_4bit,
|
210 |
seg_token_idx=args_to_parse.seg_token_idx,
|
211 |
vision_tower=args_to_parse.vision_tower,
|
212 |
+
device_map=device_map
|
213 |
)
|
214 |
+
internal_logger.debug("causal llm loaded!")
|
215 |
|
216 |
_model.config.eos_token_id = _tokenizer.eos_token_id
|
217 |
_model.config.bos_token_id = _tokenizer.bos_token_id
|
|
|
225 |
_model, args_to_parse, torch_dtype
|
226 |
)
|
227 |
vision_tower.to(device=args_to_parse.local_rank)
|
228 |
+
internal_logger.debug("vision tower loaded, prepare clip image processor...")
|
229 |
_clip_image_processor = CLIPImageProcessor.from_pretrained(_model.config.vision_tower)
|
230 |
+
internal_logger.debug("clip image processor done.")
|
231 |
_transform = ResizeLongestSide(args_to_parse.image_size)
|
232 |
+
internal_logger.debug("start model evaluation...")
|
233 |
inference_decorator(_model.eval()) if inference_decorator else _model.eval()
|
234 |
internal_logger.info("model preparation ok!")
|
235 |
return _model, _clip_image_processor, _tokenizer, _transform
|
|
|
264 |
internal_logger.debug(f"vision tower precision fp32? {args_to_parse.precision}, 3.")
|
265 |
_model = _model.float().cuda()
|
266 |
vision_tower = _model.get_model().get_vision_tower()
|
267 |
+
internal_logger.debug("vision tower ok!")
|
268 |
return _model, vision_tower
|
269 |
|
270 |
|
271 |
+
def get_inference_model_by_args(
|
272 |
+
args_to_parse, internal_logger0: logging = None, inference_decorator: Callable = None, device_map="auto"
|
273 |
+
):
|
274 |
if internal_logger0 is None:
|
275 |
internal_logger0 = app_logger
|
276 |
internal_logger0.info(f"args_to_parse:{args_to_parse}, creating model...")
|
277 |
+
model, clip_image_processor, tokenizer, transform = get_model(args_to_parse, device_map=device_map)
|
278 |
internal_logger0.info("created model, preparing inference function")
|
279 |
no_seg_out = placeholders["no_seg_out"]
|
280 |
|
pyproject.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
[tool.poetry]
|
2 |
name = "lisa-on-cuda"
|
3 |
-
version = "1.4.
|
4 |
description = "LISA (Reasoning Segmentation via Large Language Model) on cuda, now with huggingface ZeroGPU support!"
|
5 |
authors = ["alessandro trinca tornidor <[email protected]>"]
|
6 |
license = "Apache 2.0"
|
@@ -8,7 +8,7 @@ readme = "README.md"
|
|
8 |
|
9 |
[metadata]
|
10 |
name = "lisa-on-cuda"
|
11 |
-
version = "1.4.
|
12 |
|
13 |
[tool.poetry.urls]
|
14 |
Source = "https://huggingface.co/spaces/aletrn/lisa-on-cuda/"
|
|
|
1 |
[tool.poetry]
|
2 |
name = "lisa-on-cuda"
|
3 |
+
version = "1.4.5"
|
4 |
description = "LISA (Reasoning Segmentation via Large Language Model) on cuda, now with huggingface ZeroGPU support!"
|
5 |
authors = ["alessandro trinca tornidor <[email protected]>"]
|
6 |
license = "Apache 2.0"
|
|
|
8 |
|
9 |
[metadata]
|
10 |
name = "lisa-on-cuda"
|
11 |
+
version = "1.4.5"
|
12 |
|
13 |
[tool.poetry.urls]
|
14 |
Source = "https://huggingface.co/spaces/aletrn/lisa-on-cuda/"
|