Spaces:
Running
on
Zero
Running
on
Zero
bugfixc: not using pipeline for response generation
Browse files
app.py
CHANGED
@@ -189,28 +189,27 @@ def chat_response(user_message, chat_history, system_prompt, enable_search,
|
|
189 |
|
190 |
# Load the pipeline.
|
191 |
pipe = load_pipeline(model_name)
|
192 |
-
#
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
#
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
thread = threading.Thread(target=model.generate, kwargs=gen_kwargs)
|
214 |
thread.start()
|
215 |
|
216 |
# Collect tokens from the streamer as they are generated.
|
|
|
189 |
|
190 |
# Load the pipeline.
|
191 |
pipe = load_pipeline(model_name)
|
192 |
+
# Set up a streamer tied to the pipeline’s tokenizer.
|
193 |
+
streamer = TextIteratorStreamer(
|
194 |
+
pipe.tokenizer,
|
195 |
+
skip_prompt=True,
|
196 |
+
skip_special_tokens=True
|
197 |
+
)
|
198 |
+
|
199 |
+
# Kick off generation via the pipeline itself.
|
200 |
+
thread = threading.Thread(
|
201 |
+
target=pipe,
|
202 |
+
args=(prompt_text,),
|
203 |
+
kwargs={
|
204 |
+
"max_new_tokens": max_tokens,
|
205 |
+
"temperature": temperature,
|
206 |
+
"top_k": top_k,
|
207 |
+
"top_p": top_p,
|
208 |
+
"repetition_penalty": repeat_penalty,
|
209 |
+
"streamer": streamer,
|
210 |
+
"return_full_text": False,
|
211 |
+
}
|
212 |
+
)
|
|
|
213 |
thread.start()
|
214 |
|
215 |
# Collect tokens from the streamer as they are generated.
|