Fix model to cuda
Browse files
app.py
CHANGED
@@ -19,7 +19,7 @@ def load_model_on_selection(model_name, progress=gr.Progress(track_tqdm=False)):
|
|
19 |
current_model = AutoModelForCausalLM.from_pretrained(
|
20 |
model_name,
|
21 |
torch_dtype=torch.float16,
|
22 |
-
device_map="
|
23 |
use_auth_token=token
|
24 |
)
|
25 |
|
@@ -31,7 +31,7 @@ def generate_text(prompt):
|
|
31 |
global current_model, current_tokenizer
|
32 |
if current_model is None or current_tokenizer is None:
|
33 |
return "⚠️ No model loaded yet. Please select a model first."
|
34 |
-
|
35 |
inputs = current_tokenizer(prompt, return_tensors="pt").to(current_model.device)
|
36 |
outputs = current_model.generate(**inputs, max_new_tokens=256)
|
37 |
return current_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
|
19 |
current_model = AutoModelForCausalLM.from_pretrained(
|
20 |
model_name,
|
21 |
torch_dtype=torch.float16,
|
22 |
+
device_map="cpu",
|
23 |
use_auth_token=token
|
24 |
)
|
25 |
|
|
|
31 |
global current_model, current_tokenizer
|
32 |
if current_model is None or current_tokenizer is None:
|
33 |
return "⚠️ No model loaded yet. Please select a model first."
|
34 |
+
current_model.to('cuda')
|
35 |
inputs = current_tokenizer(prompt, return_tensors="pt").to(current_model.device)
|
36 |
outputs = current_model.generate(**inputs, max_new_tokens=256)
|
37 |
return current_tokenizer.decode(outputs[0], skip_special_tokens=True)
|