ford442 commited on
Commit
78217cc
·
verified ·
1 Parent(s): 57a25d7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -1
app.py CHANGED
@@ -37,7 +37,8 @@ print(f"Loading model: {model_name}")
37
  model = AutoModelForCausalLM.from_pretrained(
38
  model_name,
39
  torch_dtype="auto",
40
- # device_map="auto", # device_map="auto" can be helpful for very large models to distribute layers if you have multiple GPUs or for offloading.
 
41
  # For single GPU, explicit .to('cuda') is fine.
42
  # trust_remote_code=True # Removed: Generally not needed for Vicuna/Llama models
43
  ).to('cuda', torch.bfloat16) # Explicitly using bfloat16 as in original code
 
37
  model = AutoModelForCausalLM.from_pretrained(
38
  model_name,
39
  torch_dtype="auto",
40
+ load_in_8bit=True,
41
+ device_map="auto", # device_map="auto" can be helpful for very large models to distribute layers if you have multiple GPUs or for offloading.
42
  # For single GPU, explicit .to('cuda') is fine.
43
  # trust_remote_code=True # Removed: Generally not needed for Vicuna/Llama models
44
  ).to('cuda', torch.bfloat16) # Explicitly using bfloat16 as in original code