Spaces:
Running
on
Zero
Running
on
Zero
better management on system prompt
Browse files
app.py
CHANGED
@@ -159,7 +159,6 @@ def chat_response(user_message, chat_history, system_prompt, enable_search,
|
|
159 |
pipe = load_pipeline(model_name)
|
160 |
|
161 |
# Use the pipeline directly with conversation history.
|
162 |
-
# Note: Many chat pipelines use internal chat templating to properly format the conversation.
|
163 |
response = pipe(
|
164 |
conversation,
|
165 |
max_new_tokens=max_tokens,
|
@@ -192,6 +191,16 @@ def cancel_generation():
|
|
192 |
cancel_event.set()
|
193 |
return "Cancellation requested."
|
194 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
# ------------------------------
|
196 |
# Gradio UI Definition
|
197 |
# ------------------------------
|
@@ -208,10 +217,12 @@ with gr.Blocks(title="LLM Inference with ZeroGPU") as demo:
|
|
208 |
value=default_model,
|
209 |
info="Choose from available models."
|
210 |
)
|
211 |
-
|
212 |
-
|
|
|
|
|
213 |
system_prompt_text = gr.Textbox(label="System Prompt",
|
214 |
-
value=
|
215 |
lines=3,
|
216 |
info="Define the base context for the AI's responses.")
|
217 |
gr.Markdown("### Generation Parameters")
|
@@ -226,8 +237,6 @@ with gr.Blocks(title="LLM Inference with ZeroGPU") as demo:
|
|
226 |
repeat_penalty_slider = gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, value=1.1, step=0.1,
|
227 |
info="Penalizes token repetition to improve diversity.")
|
228 |
gr.Markdown("### Web Search Settings")
|
229 |
-
enable_search_checkbox = gr.Checkbox(label="Enable Web Search", value=False,
|
230 |
-
info="Include recent search context to improve answers.")
|
231 |
max_results_number = gr.Number(label="Max Search Results", value=6, precision=0,
|
232 |
info="Maximum number of search results to retrieve.")
|
233 |
max_chars_number = gr.Number(label="Max Chars per Result", value=600, precision=0,
|
@@ -239,13 +248,20 @@ with gr.Blocks(title="LLM Inference with ZeroGPU") as demo:
|
|
239 |
msg_input = gr.Textbox(label="Your Message", placeholder="Enter your message and press Enter")
|
240 |
search_debug = gr.Markdown(label="Web Search Debug")
|
241 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
242 |
def clear_chat():
|
243 |
return [], "", ""
|
244 |
|
245 |
clear_button.click(fn=clear_chat, outputs=[chatbot, msg_input, search_debug])
|
246 |
cancel_button.click(fn=cancel_generation, outputs=search_debug)
|
247 |
|
248 |
-
# Submission: the chat_response function is
|
249 |
msg_input.submit(
|
250 |
fn=chat_response,
|
251 |
inputs=[msg_input, chatbot, system_prompt_text, enable_search_checkbox,
|
|
|
159 |
pipe = load_pipeline(model_name)
|
160 |
|
161 |
# Use the pipeline directly with conversation history.
|
|
|
162 |
response = pipe(
|
163 |
conversation,
|
164 |
max_new_tokens=max_tokens,
|
|
|
191 |
cancel_event.set()
|
192 |
return "Cancellation requested."
|
193 |
|
194 |
+
# ------------------------------
|
195 |
+
# Helper Function for Default Prompt Update
|
196 |
+
# ------------------------------
|
197 |
+
def update_default_prompt(enable_search):
|
198 |
+
today = datetime.now().strftime('%Y-%m-%d')
|
199 |
+
if enable_search:
|
200 |
+
return f"You are a helpful assistant. Today is {today}. Please leverage the latest web data when responding to queries."
|
201 |
+
else:
|
202 |
+
return f"You are a helpful assistant. Today is {today}."
|
203 |
+
|
204 |
# ------------------------------
|
205 |
# Gradio UI Definition
|
206 |
# ------------------------------
|
|
|
217 |
value=default_model,
|
218 |
info="Choose from available models."
|
219 |
)
|
220 |
+
# Create the Enable Web Search checkbox.
|
221 |
+
enable_search_checkbox = gr.Checkbox(label="Enable Web Search", value=False,
|
222 |
+
info="Include recent search context to improve answers.")
|
223 |
+
# Create the System Prompt textbox with an initial value.
|
224 |
system_prompt_text = gr.Textbox(label="System Prompt",
|
225 |
+
value=update_default_prompt(enable_search_checkbox.value),
|
226 |
lines=3,
|
227 |
info="Define the base context for the AI's responses.")
|
228 |
gr.Markdown("### Generation Parameters")
|
|
|
237 |
repeat_penalty_slider = gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, value=1.1, step=0.1,
|
238 |
info="Penalizes token repetition to improve diversity.")
|
239 |
gr.Markdown("### Web Search Settings")
|
|
|
|
|
240 |
max_results_number = gr.Number(label="Max Search Results", value=6, precision=0,
|
241 |
info="Maximum number of search results to retrieve.")
|
242 |
max_chars_number = gr.Number(label="Max Chars per Result", value=600, precision=0,
|
|
|
248 |
msg_input = gr.Textbox(label="Your Message", placeholder="Enter your message and press Enter")
|
249 |
search_debug = gr.Markdown(label="Web Search Debug")
|
250 |
|
251 |
+
# Wire the Enable Web Search checkbox change to update the System Prompt textbox.
|
252 |
+
enable_search_checkbox.change(
|
253 |
+
fn=update_default_prompt,
|
254 |
+
inputs=[enable_search_checkbox],
|
255 |
+
outputs=[system_prompt_text]
|
256 |
+
)
|
257 |
+
|
258 |
def clear_chat():
|
259 |
return [], "", ""
|
260 |
|
261 |
clear_button.click(fn=clear_chat, outputs=[chatbot, msg_input, search_debug])
|
262 |
cancel_button.click(fn=cancel_generation, outputs=search_debug)
|
263 |
|
264 |
+
# Submission: the chat_response function is used with the Transformers pipeline.
|
265 |
msg_input.submit(
|
266 |
fn=chat_response,
|
267 |
inputs=[msg_input, chatbot, system_prompt_text, enable_search_checkbox,
|