IST199655 commited on
Commit
e7c3048
·
1 Parent(s): 66e4a39
Files changed (1) hide show
  1. app.py +4 -15
app.py CHANGED
@@ -5,7 +5,7 @@ from huggingface_hub import InferenceClient
5
  Copied from inference in colab notebook
6
  """
7
 
8
- from transformers import AutoTokenizer , AutoModelForCausalLM , TextIteratorStreamer
9
  import torch
10
  from threading import Thread
11
 
@@ -93,22 +93,12 @@ def respond(
93
  messages.append({"role": "assistant", "content": val[1]})
94
  messages.append({"role": "user", "content": message})
95
 
96
- # Create a single text prompt from the messages
97
- prompt = ""
98
- for msg in messages:
99
- if msg["role"] == "system":
100
- prompt += f"[System]: {msg['content']}\n\n"
101
- elif msg["role"] == "user":
102
- prompt += f"[User]: {msg['content']}\n\n"
103
- elif msg["role"] == "assistant":
104
- prompt += f"[Assistant]: {msg['content']}\n\n"
105
-
106
  # Tokenize the prompt
107
- inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
108
  input_ids = inputs.input_ids.to("cpu") # Ensure input is on the CPU
109
 
110
  # Generate tokens incrementally
111
- streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
112
  generation_kwargs = {
113
  "input_ids": input_ids,
114
  "max_new_tokens": max_tokens,
@@ -124,8 +114,7 @@ def respond(
124
  response = ""
125
  for token in streamer:
126
  response += token
127
- yield response[len(prompt):].strip()
128
- print(response)
129
 
130
 
131
  """
 
5
  Copied from inference in colab notebook
6
  """
7
 
8
+ from transformers import AutoTokenizer , AutoModelForCausalLM , TextStreamer
9
  import torch
10
  from threading import Thread
11
 
 
93
  messages.append({"role": "assistant", "content": val[1]})
94
  messages.append({"role": "user", "content": message})
95
 
 
 
 
 
 
 
 
 
 
 
96
  # Tokenize the prompt
97
+ inputs = tokenizer(messages, return_tensors="pt", truncation=True)
98
  input_ids = inputs.input_ids.to("cpu") # Ensure input is on the CPU
99
 
100
  # Generate tokens incrementally
101
+ streamer = TextStreamer(tokenizer, skip_prompt=True)
102
  generation_kwargs = {
103
  "input_ids": input_ids,
104
  "max_new_tokens": max_tokens,
 
114
  response = ""
115
  for token in streamer:
116
  response += token
117
+ yield response
 
118
 
119
 
120
  """