Ruurd commited on
Commit
7ee1641
·
1 Parent(s): b16f2d9

Add break after end of sequence token

Browse files
Files changed (1) hide show
  1. app.py +10 -2
app.py CHANGED
@@ -62,7 +62,7 @@ with gr.Blocks() as demo:
62
  yield messages + [{"role": "assistant", "content": "⚠️ No model loaded."}]
63
  return
64
 
65
- current_model.to("cuda")
66
 
67
  prompt = format_prompt(messages)
68
  inputs = current_tokenizer(prompt, return_tensors="pt").to(current_model.device)
@@ -79,10 +79,18 @@ with gr.Blocks() as demo:
79
  output_scores=False
80
  ).sequences[0][inputs['input_ids'].shape[-1]:]: # skip input tokens
81
  output_ids.append(token_id.item())
82
- decoded = current_tokenizer.decode(output_ids, skip_special_tokens=True)
 
 
 
 
83
  messages[-1]["content"] = decoded
84
  yield messages
85
 
 
 
 
 
86
  with gr.Row():
87
  model_selector = gr.Dropdown(choices=model_choices, label="Select Model")
88
  model_status = gr.Textbox(label="Model Status", interactive=False)
 
62
  yield messages + [{"role": "assistant", "content": "⚠️ No model loaded."}]
63
  return
64
 
65
+ current_model = current_model.half().to("cuda")
66
 
67
  prompt = format_prompt(messages)
68
  inputs = current_tokenizer(prompt, return_tensors="pt").to(current_model.device)
 
79
  output_scores=False
80
  ).sequences[0][inputs['input_ids'].shape[-1]:]: # skip input tokens
81
  output_ids.append(token_id.item())
82
+ decoded = current_tokenizer.decode(output_ids, skip_special_tokens=False)
83
+ if output_ids[-1] == current_tokenizer.eos_token_id:
84
+ current_model.to("cpu")
85
+ torch.cuda.empty_cache()
86
+ return
87
  messages[-1]["content"] = decoded
88
  yield messages
89
 
90
+ current_model.to("cpu")
91
+ torch.cuda.empty_cache()
92
+ return
93
+
94
  with gr.Row():
95
  model_selector = gr.Dropdown(choices=model_choices, label="Select Model")
96
  model_status = gr.Textbox(label="Model Status", interactive=False)