IST199655 commited on
Commit
f63e352
·
1 Parent(s): e7c3048
Files changed (1) hide show
  1. app.py +8 -5
app.py CHANGED
@@ -93,14 +93,17 @@ def respond(
93
  messages.append({"role": "assistant", "content": val[1]})
94
  messages.append({"role": "user", "content": message})
95
 
96
- # Tokenize the prompt
97
- inputs = tokenizer(messages, return_tensors="pt", truncation=True)
98
- input_ids = inputs.input_ids.to("cpu") # Ensure input is on the CPU
99
-
 
 
 
100
  # Generate tokens incrementally
101
  streamer = TextStreamer(tokenizer, skip_prompt=True)
102
  generation_kwargs = {
103
- "input_ids": input_ids,
104
  "max_new_tokens": max_tokens,
105
  "temperature": temperature,
106
  "top_p": top_p,
 
93
  messages.append({"role": "assistant", "content": val[1]})
94
  messages.append({"role": "user", "content": message})
95
 
96
+ # Tokenize the messages
97
+ inputs = tokenizer.apply_chat_template(
98
+ messages,
99
+ tokenize = True,
100
+ add_generation_prompt = True, # Must add for generation
101
+ return_tensors = "pt",
102
+ )
103
  # Generate tokens incrementally
104
  streamer = TextStreamer(tokenizer, skip_prompt=True)
105
  generation_kwargs = {
106
+ "input_ids": inputs,
107
  "max_new_tokens": max_tokens,
108
  "temperature": temperature,
109
  "top_p": top_p,