Luigi commited on
Commit
939895d
·
1 Parent(s): 423dc1a

bugfixc: not using pipeline for response generation

Browse files
Files changed (1) hide show
  1. app.py +21 -22
app.py CHANGED
@@ -189,28 +189,27 @@ def chat_response(user_message, chat_history, system_prompt, enable_search,
189
 
190
  # Load the pipeline.
191
  pipe = load_pipeline(model_name)
192
- # Obtain the underlying tokenizer and model.
193
- tokenizer = pipe.tokenizer
194
- model = pipe.model
195
-
196
- # Tokenize the formatted prompt.
197
- model_inputs = tokenizer(prompt_text, return_tensors="pt").to(model.device)
198
-
199
- # Set up a streamer for token-by-token generation.
200
- streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
201
-
202
- # Run generate in a background thread with the streamer.
203
- gen_kwargs = {
204
- "input_ids": model_inputs.input_ids,
205
- "attention_mask": model_inputs.attention_mask,
206
- "max_new_tokens": max_tokens,
207
- "temperature": temperature,
208
- "top_k": top_k,
209
- "top_p": top_p,
210
- "repetition_penalty": repeat_penalty,
211
- "streamer": streamer
212
- }
213
- thread = threading.Thread(target=model.generate, kwargs=gen_kwargs)
214
  thread.start()
215
 
216
  # Collect tokens from the streamer as they are generated.
 
189
 
190
  # Load the pipeline.
191
  pipe = load_pipeline(model_name)
192
+ # Set up a streamer tied to the pipeline’s tokenizer.
193
+ streamer = TextIteratorStreamer(
194
+ pipe.tokenizer,
195
+ skip_prompt=True,
196
+ skip_special_tokens=True
197
+ )
198
+
199
+ # Kick off generation via the pipeline itself.
200
+ thread = threading.Thread(
201
+ target=pipe,
202
+ args=(prompt_text,),
203
+ kwargs={
204
+ "max_new_tokens": max_tokens,
205
+ "temperature": temperature,
206
+ "top_k": top_k,
207
+ "top_p": top_p,
208
+ "repetition_penalty": repeat_penalty,
209
+ "streamer": streamer,
210
+ "return_full_text": False,
211
+ }
212
+ )
 
213
  thread.start()
214
 
215
  # Collect tokens from the streamer as they are generated.