IST199655
commited on
Commit
·
e7c3048
1
Parent(s):
66e4a39
app.py
CHANGED
@@ -5,7 +5,7 @@ from huggingface_hub import InferenceClient
|
|
5 |
Copied from inference in colab notebook
|
6 |
"""
|
7 |
|
8 |
-
from transformers import AutoTokenizer , AutoModelForCausalLM ,
|
9 |
import torch
|
10 |
from threading import Thread
|
11 |
|
@@ -93,22 +93,12 @@ def respond(
|
|
93 |
messages.append({"role": "assistant", "content": val[1]})
|
94 |
messages.append({"role": "user", "content": message})
|
95 |
|
96 |
-
# Create a single text prompt from the messages
|
97 |
-
prompt = ""
|
98 |
-
for msg in messages:
|
99 |
-
if msg["role"] == "system":
|
100 |
-
prompt += f"[System]: {msg['content']}\n\n"
|
101 |
-
elif msg["role"] == "user":
|
102 |
-
prompt += f"[User]: {msg['content']}\n\n"
|
103 |
-
elif msg["role"] == "assistant":
|
104 |
-
prompt += f"[Assistant]: {msg['content']}\n\n"
|
105 |
-
|
106 |
# Tokenize the prompt
|
107 |
-
inputs = tokenizer(
|
108 |
input_ids = inputs.input_ids.to("cpu") # Ensure input is on the CPU
|
109 |
|
110 |
# Generate tokens incrementally
|
111 |
-
streamer =
|
112 |
generation_kwargs = {
|
113 |
"input_ids": input_ids,
|
114 |
"max_new_tokens": max_tokens,
|
@@ -124,8 +114,7 @@ def respond(
|
|
124 |
response = ""
|
125 |
for token in streamer:
|
126 |
response += token
|
127 |
-
yield response
|
128 |
-
print(response)
|
129 |
|
130 |
|
131 |
"""
|
|
|
5 |
Copied from inference in colab notebook
|
6 |
"""
|
7 |
|
8 |
+
from transformers import AutoTokenizer , AutoModelForCausalLM , TextStreamer
|
9 |
import torch
|
10 |
from threading import Thread
|
11 |
|
|
|
93 |
messages.append({"role": "assistant", "content": val[1]})
|
94 |
messages.append({"role": "user", "content": message})
|
95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
# Tokenize the prompt
|
97 |
+
inputs = tokenizer(messages, return_tensors="pt", truncation=True)
|
98 |
input_ids = inputs.input_ids.to("cpu") # Ensure input is on the CPU
|
99 |
|
100 |
# Generate tokens incrementally
|
101 |
+
streamer = TextStreamer(tokenizer, skip_prompt=True)
|
102 |
generation_kwargs = {
|
103 |
"input_ids": input_ids,
|
104 |
"max_new_tokens": max_tokens,
|
|
|
114 |
response = ""
|
115 |
for token in streamer:
|
116 |
response += token
|
117 |
+
yield response
|
|
|
118 |
|
119 |
|
120 |
"""
|