Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -41,7 +41,7 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
41 |
device_map="auto", # device_map="auto" can be helpful for very large models to distribute layers if you have multiple GPUs or for offloading.
|
42 |
# For single GPU, explicit .to('cuda') is fine.
|
43 |
# trust_remote_code=True # Removed: Generally not needed for Vicuna/Llama models
|
44 |
-
).to('cuda'
|
45 |
|
46 |
# ** MODIFICATION 3: Removed `trust_remote_code=True` for tokenizer **
|
47 |
print(f"Loading tokenizer: {model_name}")
|
|
|
41 |
device_map="auto", # device_map="auto" can be helpful for very large models to distribute layers if you have multiple GPUs or for offloading.
|
42 |
# For single GPU, explicit .to('cuda') is fine.
|
43 |
# trust_remote_code=True # Removed: Generally not needed for Vicuna/Llama models
|
44 |
+
).to('cuda') #, torch.bfloat16) # Explicitly using bfloat16 as in original code
|
45 |
|
46 |
# ** MODIFICATION 3: Removed `trust_remote_code=True` for tokenizer **
|
47 |
print(f"Loading tokenizer: {model_name}")
|