Spaces:
Running
on
Zero
Running
on
Zero
File size: 11,665 Bytes
42f4126 aea4015 42f4126 a4ca8e9 42f4126 a4ca8e9 f5c4480 6d5ee96 a4ca8e9 89817e2 f5c4480 42f4126 f5c4480 42f4126 f5c4480 42f4126 f5c4480 9e7af9a f5c4480 9e7af9a f5c4480 9e7af9a f5c4480 9e7af9a aea4015 f5c4480 42f4126 f5c4480 a4ca8e9 42f4126 aea4015 46ef1e4 f5c4480 46ef1e4 42f4126 aea4015 42f4126 f5c4480 42f4126 9e7af9a 42f4126 070daeb a4ca8e9 46ef1e4 f5c4480 46ef1e4 f5c4480 46ef1e4 a4ca8e9 46ef1e4 f5c4480 46ef1e4 f5c4480 46ef1e4 f5c4480 46ef1e4 a4ca8e9 42f4126 f5c4480 42f4126 f5c4480 aea4015 a4ca8e9 aea4015 42f4126 f5c4480 aea4015 a4ca8e9 f5c4480 aea4015 f5c4480 42f4126 aea4015 42f4126 f5c4480 aea4015 9e7af9a f5c4480 9e7af9a f5c4480 aea4015 42f4126 aea4015 9e7af9a aea4015 f5c4480 aea4015 9e7af9a f5c4480 9e7af9a f5c4480 9e7af9a f5c4480 ad2752a 9e7af9a f5c4480 9e7af9a f5c4480 9e7af9a ad2752a 9e7af9a f5c4480 9e7af9a ad2752a 9e7af9a f5c4480 9e7af9a 42f4126 f5c4480 a4ca8e9 7402b8f a4ca8e9 42f4126 46ef1e4 a4ca8e9 f5c4480 a4ca8e9 f5c4480 a4ca8e9 f5c4480 cd198e1 f5c4480 cd198e1 a4ca8e9 f5c4480 a4ca8e9 f5c4480 a4ca8e9 f5c4480 a4ca8e9 f5c4480 a4ca8e9 f5c4480 42f4126 f5c4480 46ef1e4 a4ca8e9 46ef1e4 a4ca8e9 46ef1e4 f5c4480 42f4126 a4ca8e9 42f4126 46ef1e4 42f4126 a4ca8e9 42f4126 46ef1e4 f5c4480 42f4126 ad2752a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 |
import re
import threading
import gradio as gr
import spaces
import transformers
from transformers import pipeline
# Loading model and tokenizer
model_name = "meta-llama/Llama-3.1-8B-Instruct"
if gr.NO_RELOAD:
pipe = pipeline(
"text-generation",
model=model_name,
device_map="auto",
torch_dtype="auto",
)
# Marker for detecting final answer
ANSWER_MARKER = "**Answer**"
# Sentences to start step-by-step reasoning
rethink_prepends = [
"Now, I need to understand the following ",
"In my opinion ",
"Let me verify if the following is correct ",
"Also, I should remember that ",
"Another point to note is ",
"And I also remember the following fact ",
"Now I think I understand sufficiently ",
]
# Prompt addition for generating final answer
final_answer_prompt = """
Based on my reasoning process so far, I will answer the original question in the language it was asked:
{question}
Here is the conclusion I've reasoned:
{reasoning_conclusion}
Based on the above reasoning, my final answer:
{ANSWER_MARKER}
"""
# Settings for displaying formulas
latex_delimiters = [
{"left": "$$", "right": "$$", "display": True},
{"left": "$", "right": "$", "display": False},
]
def reformat_math(text):
"""Modify MathJax delimiters to use Gradio syntax (Katex).
This is a temporary fix for displaying math formulas in Gradio. Currently,
I haven't found a way to make it work as expected with other latex_delimiters...
"""
text = re.sub(r"\\\[\s*(.*?)\s*\\\]", r"$$\1$$", text, flags=re.DOTALL)
text = re.sub(r"\\\(\s*(.*?)\s*\\\)", r"$\1$", text, flags=re.DOTALL)
return text
def user_input(message, history_original, history_thinking):
"""Add user input to history and clear input text box"""
return "", history_original + [
gr.ChatMessage(role="user", content=message.replace(ANSWER_MARKER, ""))
], history_thinking + [
gr.ChatMessage(role="user", content=message.replace(ANSWER_MARKER, ""))
]
def rebuild_messages(history: list):
"""Reconstruct messages from history for model use without intermediate thinking process"""
messages = []
for h in history:
if isinstance(h, dict) and not h.get("metadata", {}).get("title", False):
messages.append(h)
elif (
isinstance(h, gr.ChatMessage)
and h.metadata.get("title", None) is None
and isinstance(h.content, str)
):
messages.append({"role": h.role, "content": h.content})
return messages
@spaces.GPU
def bot_original(
history: list,
max_num_tokens: int,
do_sample: bool,
temperature: float,
):
"""Make the original model answer questions (without reasoning process)"""
# For streaming tokens from thread later
streamer = transformers.TextIteratorStreamer(
pipe.tokenizer, # pyright: ignore
skip_special_tokens=True,
skip_prompt=True,
)
# Prepare assistant message
history.append(
gr.ChatMessage(
role="assistant",
content=str(""),
)
)
# Messages to be displayed in current chat
messages = rebuild_messages(history[:-1]) # Excluding last empty message
# Original model answers directly without reasoning
t = threading.Thread(
target=pipe,
args=(messages,),
kwargs=dict(
max_new_tokens=max_num_tokens,
streamer=streamer,
do_sample=do_sample,
temperature=temperature,
),
)
t.start()
for token in streamer:
history[-1].content += token
history[-1].content = reformat_math(history[-1].content)
yield history
t.join()
yield history
@spaces.GPU
def bot_thinking(
history: list,
max_num_tokens: int,
final_num_tokens: int,
do_sample: bool,
temperature: float,
):
"""Make the model answer questions with reasoning process"""
# For streaming tokens from thread later
streamer = transformers.TextIteratorStreamer(
pipe.tokenizer, # pyright: ignore
skip_special_tokens=True,
skip_prompt=True,
)
# For reinserting the question into reasoning if needed
question = history[-1]["content"]
# Prepare assistant message
history.append(
gr.ChatMessage(
role="assistant",
content=str(""),
metadata={"title": "🧠 Thinking...", "status": "pending"},
)
)
# Reasoning process to be displayed in current chat
messages = rebuild_messages(history)
# Variable to store the entire reasoning process
full_reasoning = ""
# Run reasoning steps
for i, prepend in enumerate(rethink_prepends):
if i > 0:
messages[-1]["content"] += "\n\n"
messages[-1]["content"] += prepend.format(question=question)
t = threading.Thread(
target=pipe,
args=(messages,),
kwargs=dict(
max_new_tokens=max_num_tokens,
streamer=streamer,
do_sample=do_sample,
temperature=temperature,
),
)
t.start()
# Reconstruct history with new content
history[-1].content += prepend.format(question=question)
for token in streamer:
history[-1].content += token
history[-1].content = reformat_math(history[-1].content)
yield history
t.join()
# Save the result of each reasoning step to full_reasoning
full_reasoning = history[-1].content
# Reasoning complete, now generate final answer
history[-1].metadata = {"title": "💭 Thought Process", "status": "done"}
# Extract conclusion part from reasoning process (approximately last 1-2 paragraphs)
reasoning_parts = full_reasoning.split("\n\n")
reasoning_conclusion = "\n\n".join(reasoning_parts[-2:]) if len(reasoning_parts) > 2 else full_reasoning
# Add final answer message
history.append(gr.ChatMessage(role="assistant", content=""))
# Construct message for final answer
final_messages = rebuild_messages(history[:-1]) # Excluding last empty message
final_prompt = final_answer_prompt.format(
question=question,
reasoning_conclusion=reasoning_conclusion,
ANSWER_MARKER=ANSWER_MARKER
)
final_messages[-1]["content"] += final_prompt
# Generate final answer
t = threading.Thread(
target=pipe,
args=(final_messages,),
kwargs=dict(
max_new_tokens=final_num_tokens,
streamer=streamer,
do_sample=do_sample,
temperature=temperature,
),
)
t.start()
# Stream final answer
for token in streamer:
history[-1].content += token
history[-1].content = reformat_math(history[-1].content)
yield history
t.join()
yield history
with gr.Blocks(fill_height=True, title="ThinkFlow") as demo:
# Title and description
gr.Markdown("# ThinkFlow")
gr.Markdown("### An LLM reasoning generation platform that automatically applies reasoning capabilities to LLM models without modification")
# Features and benefits section
with gr.Accordion("✨ Features & Benefits", open=True):
gr.Markdown("""
- **Enhanced Reasoning**: Transform any LLM into a step-by-step reasoning engine without model modifications
- **Transparency**: Visualize the model's thought process alongside direct answers
- **Improved Accuracy**: See how guided reasoning leads to more accurate solutions for complex problems
- **Educational Tool**: Perfect for teaching critical thinking and problem-solving approaches
- **Versatile Application**: Works with mathematical problems, logical puzzles, and complex questions
- **Side-by-Side Comparison**: Compare standard model responses with reasoning-enhanced outputs
""")
with gr.Row(scale=1):
with gr.Column(scale=2):
gr.Markdown("## Before (Original)")
chatbot_original = gr.Chatbot(
scale=1,
type="messages",
latex_delimiters=latex_delimiters,
label="Original Model (No Reasoning)"
)
with gr.Column(scale=2):
gr.Markdown("## After (Thinking)")
chatbot_thinking = gr.Chatbot(
scale=1,
type="messages",
latex_delimiters=latex_delimiters,
label="Model with Reasoning"
)
with gr.Row():
# Define msg textbox first
msg = gr.Textbox(
submit_btn=True,
label="",
show_label=False,
placeholder="Enter your question here.",
autofocus=True,
)
# Examples section - placed after msg variable definition
with gr.Accordion("EXAMPLES", open=False):
examples = gr.Examples(
examples=[
"[Source: MATH-500)] How many numbers among the first 100 positive integers are divisible by 3, 4, and 5?",
"[Source: MATH-500)] In the land of Ink, the money system is unique. 1 trinket equals 4 blinkets, and 3 blinkets equal 7 drinkits. What is the value of 56 drinkits in trinkets?",
"[Source: MATH-500)] The average age of Amy, Ben, and Chris is 6 years. Four years ago, Chris was the same age as Amy is now. Four years from now, Ben's age will be $\\frac{3}{5}$ of Amy's age at that time. How old is Chris now?",
"[Source: MATH-500)] A bag contains yellow and blue marbles. Currently, the ratio of blue marbles to yellow marbles is 4:3. After adding 5 blue marbles and removing 3 yellow marbles, the ratio becomes 7:3. How many blue marbles were in the bag before any were added?"
],
inputs=msg
)
with gr.Row():
with gr.Column():
gr.Markdown("""## Parameter Adjustment""")
num_tokens = gr.Slider(
50,
4000,
2000,
step=1,
label="Maximum tokens per reasoning step",
interactive=True,
)
final_num_tokens = gr.Slider(
50,
4000,
2000,
step=1,
label="Maximum tokens for final answer",
interactive=True,
)
do_sample = gr.Checkbox(True, label="Use sampling")
temperature = gr.Slider(0.1, 1.0, 0.7, step=0.1, label="Temperature")
# Community link at the bottom
gr.Markdown("<p style='font-size: 12px;'>Community: <a href='https://discord.gg/openfreeai' target='_blank'>https://discord.gg/openfreeai</a></p>")
# When user submits a message, both bots respond simultaneously
msg.submit(
user_input,
[msg, chatbot_original, chatbot_thinking], # inputs
[msg, chatbot_original, chatbot_thinking], # outputs
).then(
bot_original,
[
chatbot_original,
num_tokens,
do_sample,
temperature,
],
chatbot_original, # save new history in outputs
).then(
bot_thinking,
[
chatbot_thinking,
num_tokens,
final_num_tokens,
do_sample,
temperature,
],
chatbot_thinking, # save new history in outputs
)
if __name__ == "__main__":
demo.queue().launch() |