Put everything on the cuda device!
Browse files
app.py
CHANGED
@@ -17,14 +17,19 @@ def chat_with_model(messages):
|
|
17 |
yield messages + [{"role": "assistant", "content": "⚠️ No model loaded."}]
|
18 |
return
|
19 |
|
20 |
-
|
21 |
|
22 |
pad_id = current_tokenizer.pad_token_id
|
23 |
if pad_id is None:
|
24 |
pad_id = current_tokenizer.unk_token_id or 0
|
25 |
|
26 |
prompt = format_prompt(messages)
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
streamer = TextIteratorStreamer(current_tokenizer, skip_prompt=True, skip_special_tokens=False)
|
30 |
|
|
|
17 |
yield messages + [{"role": "assistant", "content": "⚠️ No model loaded."}]
|
18 |
return
|
19 |
|
20 |
+
|
21 |
|
22 |
pad_id = current_tokenizer.pad_token_id
|
23 |
if pad_id is None:
|
24 |
pad_id = current_tokenizer.unk_token_id or 0
|
25 |
|
26 |
prompt = format_prompt(messages)
|
27 |
+
device = torch.device("cuda")
|
28 |
+
current_model.to(device).half()
|
29 |
+
|
30 |
+
inputs = current_tokenizer(prompt, return_tensors="pt")
|
31 |
+
inputs = {k: v.to(device) for k, v in inputs.items()}
|
32 |
+
|
33 |
|
34 |
streamer = TextIteratorStreamer(current_tokenizer, skip_prompt=True, skip_special_tokens=False)
|
35 |
|