Spaces:
Running
on
Zero
Running
on
Zero
Forbid the user to produce the answer marker
Browse files
app.py
CHANGED
@@ -16,6 +16,9 @@ if gr.NO_RELOAD:
|
|
16 |
torch_dtype="auto",
|
17 |
)
|
18 |
|
|
|
|
|
|
|
19 |
# the sentences starting the reasoning step by step
|
20 |
rethink_prepends = [
|
21 |
"OK, I need to figure out ",
|
@@ -28,7 +31,7 @@ rethink_prepends = [
|
|
28 |
"I think I have a good grasp ",
|
29 |
"Now, using all the above information, I can answer the question using the original language used for the question:"
|
30 |
"\n{question}\n"
|
31 |
-
"\n
|
32 |
]
|
33 |
|
34 |
|
@@ -52,7 +55,9 @@ def reformat_math(text):
|
|
52 |
|
53 |
def user_input(message, history: list):
|
54 |
"""Append the user input in the history and clean the input textbox"""
|
55 |
-
return "", history + [
|
|
|
|
|
56 |
|
57 |
|
58 |
def rebuild_messages(history: list):
|
@@ -103,7 +108,7 @@ def bot(history: list, max_num_tokens: int, final_num_tokens: int):
|
|
103 |
messages[-1]["content"] += prepend.format(question=question)
|
104 |
|
105 |
num_tokens = int(
|
106 |
-
max_num_tokens if
|
107 |
)
|
108 |
t = threading.Thread(
|
109 |
target=pipe,
|
@@ -117,7 +122,7 @@ def bot(history: list, max_num_tokens: int, final_num_tokens: int):
|
|
117 |
|
118 |
# rebuild the history with the new content
|
119 |
history[-1].content += prepend
|
120 |
-
if
|
121 |
# stop thinking, this is the answer now (no metadata for intermediate steps)
|
122 |
history.append(gr.ChatMessage(role="assistant", content=""))
|
123 |
for token in streamer:
|
@@ -179,8 +184,8 @@ with gr.Blocks(fill_height=True, title="Making any model reasoning") as demo:
|
|
179 |
A good value is 200 to 255.
|
180 |
""")
|
181 |
gr.Markdown("""
|
182 |
-
This interface can work on personal computer with 6Go VRAM (e.g. NVidia
|
183 |
-
the application and try others instruct models.
|
184 |
""")
|
185 |
|
186 |
# when the user submit a message, the bot will answer
|
|
|
16 |
torch_dtype="auto",
|
17 |
)
|
18 |
|
19 |
+
# the answer marker to detect final answer
|
20 |
+
ANSWER_MARKER = "**ANSWER**"
|
21 |
+
|
22 |
# the sentences starting the reasoning step by step
|
23 |
rethink_prepends = [
|
24 |
"OK, I need to figure out ",
|
|
|
31 |
"I think I have a good grasp ",
|
32 |
"Now, using all the above information, I can answer the question using the original language used for the question:"
|
33 |
"\n{question}\n"
|
34 |
+
f"\n{ANSWER_MARKER}\n",
|
35 |
]
|
36 |
|
37 |
|
|
|
55 |
|
56 |
def user_input(message, history: list):
|
57 |
"""Append the user input in the history and clean the input textbox"""
|
58 |
+
return "", history + [
|
59 |
+
gr.ChatMessage(role="user", content=message.replace(ANSWER_MARKER, ""))
|
60 |
+
]
|
61 |
|
62 |
|
63 |
def rebuild_messages(history: list):
|
|
|
108 |
messages[-1]["content"] += prepend.format(question=question)
|
109 |
|
110 |
num_tokens = int(
|
111 |
+
max_num_tokens if ANSWER_MARKER not in prepend else final_num_tokens
|
112 |
)
|
113 |
t = threading.Thread(
|
114 |
target=pipe,
|
|
|
122 |
|
123 |
# rebuild the history with the new content
|
124 |
history[-1].content += prepend
|
125 |
+
if ANSWER_MARKER in prepend:
|
126 |
# stop thinking, this is the answer now (no metadata for intermediate steps)
|
127 |
history.append(gr.ChatMessage(role="assistant", content=""))
|
128 |
for token in streamer:
|
|
|
184 |
A good value is 200 to 255.
|
185 |
""")
|
186 |
gr.Markdown("""
|
187 |
+
This interface can work on personal computer with 6Go VRAM (e.g. NVidia 3050/3060 on laptop).
|
188 |
+
Feel free to fork the application and try others instruct models.
|
189 |
""")
|
190 |
|
191 |
# when the user submit a message, the bot will answer
|