ThinkFlow-llama / app.py
openfree's picture
Update app.py
cd198e1 verified
raw
history blame
11.3 kB
import re
import threading
import gradio as gr
import spaces
import transformers
from transformers import pipeline
# λͺ¨λΈκ³Ό ν† ν¬λ‚˜μ΄μ € λ‘œλ”©
model_name = "CohereForAI/c4ai-command-r7b-arabic-02-2025"
if gr.NO_RELOAD:
pipe = pipeline(
"text-generation",
model=model_name,
device_map="auto",
torch_dtype="auto",
)
# μ΅œμ’… 닡변을 κ°μ§€ν•˜κΈ° μœ„ν•œ 마컀
ANSWER_MARKER = "**λ‹΅λ³€**"
# 단계별 좔둠을 μ‹œμž‘ν•˜λŠ” λ¬Έμž₯λ“€
rethink_prepends = [
"자, 이제 λ‹€μŒμ„ νŒŒμ•…ν•΄μ•Ό ν•©λ‹ˆλ‹€ ",
"제 μƒκ°μ—λŠ” ",
"μž μ‹œλ§Œμš”, 제 μƒκ°μ—λŠ” ",
"λ‹€μŒ 사항이 λ§žλŠ”μ§€ 확인해 λ³΄κ² μŠ΅λ‹ˆλ‹€ ",
"λ˜ν•œ κΈ°μ–΅ν•΄μ•Ό ν•  것은 ",
"또 λ‹€λ₯Έ μ£Όλͺ©ν•  점은 ",
"그리고 μ €λŠ” λ‹€μŒκ³Ό 같은 사싀도 κΈ°μ–΅ν•©λ‹ˆλ‹€ ",
"이제 μΆ©λΆ„νžˆ μ΄ν•΄ν–ˆλ‹€κ³  μƒκ°ν•©λ‹ˆλ‹€ ",
]
# μ΅œμ’… λ‹΅λ³€ 생성을 μœ„ν•œ ν”„λ‘¬ν”„νŠΈ μΆ”κ°€
final_answer_prompt = """
μ§€κΈˆκΉŒμ§€μ˜ μΆ”λ‘  과정을 λ°”νƒ•μœΌλ‘œ, μ›λž˜ μ§ˆλ¬Έμ— μ‚¬μš©λœ μ–Έμ–΄λ‘œ λ‹΅λ³€ν•˜κ² μŠ΅λ‹ˆλ‹€:
{question}
μ•„λž˜λŠ” λ‚΄κ°€ μΆ”λ‘ ν•œ κ²°λ‘ μž…λ‹ˆλ‹€:
{reasoning_conclusion}
μœ„ 좔둠을 기반으둜 μ΅œμ’… λ‹΅λ³€:
{ANSWER_MARKER}
"""
# μˆ˜μ‹ ν‘œμ‹œ 문제 해결을 μœ„ν•œ μ„€μ •
latex_delimiters = [
{"left": "$$", "right": "$$", "display": True},
{"left": "$", "right": "$", "display": False},
]
def reformat_math(text):
"""Gradio ꡬ문(Katex)을 μ‚¬μš©ν•˜λ„λ‘ MathJax ꡬ뢄 기호 μˆ˜μ •.
이것은 Gradioμ—μ„œ μˆ˜ν•™ 곡식을 ν‘œμ‹œν•˜κΈ° μœ„ν•œ μž„μ‹œ ν•΄κ²°μ±…μž…λ‹ˆλ‹€. ν˜„μž¬λ‘œμ„œλŠ”
λ‹€λ₯Έ latex_delimitersλ₯Ό μ‚¬μš©ν•˜μ—¬ μ˜ˆμƒλŒ€λ‘œ μž‘λ™ν•˜κ²Œ ν•˜λŠ” 방법을 μ°Ύμ§€ λͺ»ν–ˆμŠ΅λ‹ˆλ‹€...
"""
text = re.sub(r"\\\[\s*(.*?)\s*\\\]", r"$$\1$$", text, flags=re.DOTALL)
text = re.sub(r"\\\(\s*(.*?)\s*\\\)", r"$\1$", text, flags=re.DOTALL)
return text
def user_input(message, history_original, history_thinking):
"""μ‚¬μš©μž μž…λ ₯을 νžˆμŠ€ν† λ¦¬μ— μΆ”κ°€ν•˜κ³  μž…λ ₯ ν…μŠ€νŠΈ μƒμž λΉ„μš°κΈ°"""
return "", history_original + [
gr.ChatMessage(role="user", content=message.replace(ANSWER_MARKER, ""))
], history_thinking + [
gr.ChatMessage(role="user", content=message.replace(ANSWER_MARKER, ""))
]
def rebuild_messages(history: list):
"""쀑간 생각 κ³Όμ • 없이 λͺ¨λΈμ΄ μ‚¬μš©ν•  νžˆμŠ€ν† λ¦¬μ—μ„œ λ©”μ‹œμ§€ μž¬κ΅¬μ„±"""
messages = []
for h in history:
if isinstance(h, dict) and not h.get("metadata", {}).get("title", False):
messages.append(h)
elif (
isinstance(h, gr.ChatMessage)
and h.metadata.get("title", None) is None
and isinstance(h.content, str)
):
messages.append({"role": h.role, "content": h.content})
return messages
@spaces.GPU
def bot_original(
history: list,
max_num_tokens: int,
do_sample: bool,
temperature: float,
):
"""원본 λͺ¨λΈμ΄ μ§ˆλ¬Έμ— λ‹΅λ³€ν•˜λ„λ‘ ν•˜κΈ° (μΆ”λ‘  κ³Όμ • 없이)"""
# λ‚˜μ€‘μ— μŠ€λ ˆλ“œμ—μ„œ 토큰을 슀트림으둜 κ°€μ Έμ˜€κΈ° μœ„ν•¨
streamer = transformers.TextIteratorStreamer(
pipe.tokenizer, # pyright: ignore
skip_special_tokens=True,
skip_prompt=True,
)
# 보쑰자 λ©”μ‹œμ§€ μ€€λΉ„
history.append(
gr.ChatMessage(
role="assistant",
content=str(""),
)
)
# ν˜„μž¬ μ±„νŒ…μ— ν‘œμ‹œλ  λ©”μ‹œμ§€
messages = rebuild_messages(history[:-1]) # λ§ˆμ§€λ§‰ 빈 λ©”μ‹œμ§€ μ œμ™Έ
# 원본 λͺ¨λΈμ€ μΆ”λ‘  없이 λ°”λ‘œ λ‹΅λ³€
t = threading.Thread(
target=pipe,
args=(messages,),
kwargs=dict(
max_new_tokens=max_num_tokens,
streamer=streamer,
do_sample=do_sample,
temperature=temperature,
),
)
t.start()
for token in streamer:
history[-1].content += token
history[-1].content = reformat_math(history[-1].content)
yield history
t.join()
yield history
@spaces.GPU
def bot_thinking(
history: list,
max_num_tokens: int,
final_num_tokens: int,
do_sample: bool,
temperature: float,
):
"""μΆ”λ‘  과정을 ν¬ν•¨ν•˜μ—¬ λͺ¨λΈμ΄ μ§ˆλ¬Έμ— λ‹΅λ³€ν•˜λ„λ‘ ν•˜κΈ°"""
# λ‚˜μ€‘μ— μŠ€λ ˆλ“œμ—μ„œ 토큰을 슀트림으둜 κ°€μ Έμ˜€κΈ° μœ„ν•¨
streamer = transformers.TextIteratorStreamer(
pipe.tokenizer, # pyright: ignore
skip_special_tokens=True,
skip_prompt=True,
)
# ν•„μš”ν•œ 경우 좔둠에 μ§ˆλ¬Έμ„ λ‹€μ‹œ μ‚½μž…ν•˜κΈ° μœ„ν•¨
question = history[-1]["content"]
# 보쑰자 λ©”μ‹œμ§€ μ€€λΉ„
history.append(
gr.ChatMessage(
role="assistant",
content=str(""),
metadata={"title": "🧠 생각 쀑...", "status": "pending"},
)
)
# ν˜„μž¬ μ±„νŒ…μ— ν‘œμ‹œλ  μΆ”λ‘  κ³Όμ •
messages = rebuild_messages(history)
# 전체 μΆ”λ‘  과정을 μ €μž₯ν•  λ³€μˆ˜
full_reasoning = ""
# μΆ”λ‘  단계 μ‹€ν–‰
for i, prepend in enumerate(rethink_prepends):
if i > 0:
messages[-1]["content"] += "\n\n"
messages[-1]["content"] += prepend.format(question=question)
t = threading.Thread(
target=pipe,
args=(messages,),
kwargs=dict(
max_new_tokens=max_num_tokens,
streamer=streamer,
do_sample=do_sample,
temperature=temperature,
),
)
t.start()
# μƒˆ λ‚΄μš©μœΌλ‘œ νžˆμŠ€ν† λ¦¬ μž¬κ΅¬μ„±
history[-1].content += prepend.format(question=question)
for token in streamer:
history[-1].content += token
history[-1].content = reformat_math(history[-1].content)
yield history
t.join()
# 각 μΆ”λ‘  λ‹¨κ³„μ˜ κ²°κ³Όλ₯Ό full_reasoning에 μ €μž₯
full_reasoning = history[-1].content
# μΆ”λ‘  μ™„λ£Œ, 이제 μ΅œμ’… 닡변을 생성
history[-1].metadata = {"title": "πŸ’­ 사고 κ³Όμ •", "status": "done"}
# μΆ”λ‘  κ³Όμ •μ—μ„œ κ²°λ‘  뢀뢄을 μΆ”μΆœ (λ§ˆμ§€λ§‰ 1-2 문단 정도)
reasoning_parts = full_reasoning.split("\n\n")
reasoning_conclusion = "\n\n".join(reasoning_parts[-2:]) if len(reasoning_parts) > 2 else full_reasoning
# μ΅œμ’… λ‹΅λ³€ λ©”μ‹œμ§€ μΆ”κ°€
history.append(gr.ChatMessage(role="assistant", content=""))
# μ΅œμ’… 닡변을 μœ„ν•œ λ©”μ‹œμ§€ ꡬ성
final_messages = rebuild_messages(history[:-1]) # λ§ˆμ§€λ§‰ 빈 λ©”μ‹œμ§€ μ œμ™Έ
final_prompt = final_answer_prompt.format(
question=question,
reasoning_conclusion=reasoning_conclusion,
ANSWER_MARKER=ANSWER_MARKER
)
final_messages[-1]["content"] += final_prompt
# μ΅œμ’… λ‹΅λ³€ 생성
t = threading.Thread(
target=pipe,
args=(final_messages,),
kwargs=dict(
max_new_tokens=final_num_tokens,
streamer=streamer,
do_sample=do_sample,
temperature=temperature,
),
)
t.start()
# μ΅œμ’… λ‹΅λ³€ 슀트리밍
for token in streamer:
history[-1].content += token
history[-1].content = reformat_math(history[-1].content)
yield history
t.join()
yield history
with gr.Blocks(fill_height=True, title="Vidraft ThinkFlow") as demo:
# 제λͺ©κ³Ό μ„€λͺ…
gr.Markdown("# Vidraft ThinkFlow")
gr.Markdown("### μΆ”λ‘  κΈ°λŠ₯이 μ—†λŠ” LLM λͺ¨λΈμ˜ μˆ˜μ • 없이도 μΆ”λ‘  κΈ°λŠ₯을 μžλ™μœΌλ‘œ μ μš©ν•˜λŠ” LLM μΆ”λ‘  생성 ν”Œλž«νΌ")
with gr.Row(scale=1):
with gr.Column(scale=2):
gr.Markdown("## Before (Original)")
chatbot_original = gr.Chatbot(
scale=1,
type="messages",
latex_delimiters=latex_delimiters,
label="Original Model (No Reasoning)"
)
with gr.Column(scale=2):
gr.Markdown("## After (Thinking)")
chatbot_thinking = gr.Chatbot(
scale=1,
type="messages",
latex_delimiters=latex_delimiters,
label="Model with Reasoning"
)
with gr.Row():
# msg ν…μŠ€νŠΈλ°•μŠ€λ₯Ό λ¨Όμ € μ •μ˜
msg = gr.Textbox(
submit_btn=True,
label="",
show_label=False,
placeholder="여기에 μ§ˆλ¬Έμ„ μž…λ ₯ν•˜μ„Έμš”.",
autofocus=True,
)
# 예제 μ„Ήμ…˜ - msg λ³€μˆ˜ μ •μ˜ 이후에 배치
with gr.Accordion("EXAMPLES", open=False):
examples = gr.Examples(
examples=[
"[좜처: MATH-500)] 처음 100개의 μ–‘μ˜ μ •μˆ˜ μ€‘μ—μ„œ 3, 4, 5둜 λ‚˜λˆ„μ–΄ λ–¨μ–΄μ§€λŠ” μˆ˜λŠ” λͺ‡ κ°œμž…λ‹ˆκΉŒ?",
"[좜처: MATH-500)] μž‰ν¬μ˜ λ•…μ—μ„œ 돈 μ‹œμŠ€ν…œμ€ λ…νŠΉν•©λ‹ˆλ‹€. νŠΈλ§ν‚· 1κ°œλŠ” 블링킷 4κ°œμ™€ κ°™κ³ , 블링킷 3κ°œλŠ” λ“œλ§ν¬ 7κ°œμ™€ κ°™μŠ΅λ‹ˆλ‹€. νŠΈλ§ν‚·μ—μ„œ λ“œλ§ν¬ 56개의 κ°€μΉ˜λŠ” μ–Όλ§ˆμž…λ‹ˆκΉŒ?",
"[좜처: MATH-500)] 에이미, λ²€, 크리슀의 평균 λ‚˜μ΄λŠ” 6μ‚΄μž…λ‹ˆλ‹€. 4λ…„ μ „ ν¬λ¦¬μŠ€λŠ” μ§€κΈˆ 에이미와 같은 λ‚˜μ΄μ˜€μŠ΅λ‹ˆλ‹€. 4λ…„ ν›„ 벀의 λ‚˜μ΄λŠ” κ·Έλ•Œ μ—μ΄λ―Έμ˜ λ‚˜μ΄μ˜ $\\frac{3}{5}$κ°€ 될 κ²ƒμž…λ‹ˆλ‹€. ν¬λ¦¬μŠ€λŠ” μ§€κΈˆ λͺ‡ μ‚΄μž…λ‹ˆκΉŒ?",
"[좜처: MATH-500)] λ…Έλž€μƒ‰κ³Ό νŒŒλž€μƒ‰ ꡬ슬이 λ“€μ–΄ μžˆλŠ” 가방이 μžˆμŠ΅λ‹ˆλ‹€. ν˜„μž¬ νŒŒλž€μƒ‰ ꡬ슬과 λ…Έλž€μƒ‰ ꡬ슬의 λΉ„μœ¨μ€ 4:3μž…λ‹ˆλ‹€. νŒŒλž€μƒ‰ ꡬ슬 5개λ₯Ό λ”ν•˜κ³  λ…Έλž€μƒ‰ ꡬ슬 3개λ₯Ό μ œκ±°ν•˜λ©΄ λΉ„μœ¨μ€ 7:3이 λ©λ‹ˆλ‹€. 더 λ„£κΈ° 전에 가방에 νŒŒλž€μƒ‰ ꡬ슬이 λͺ‡ 개 μžˆμ—ˆμŠ΅λ‹ˆκΉŒ?"
],
inputs=msg
)
with gr.Row():
with gr.Column():
gr.Markdown("""## λ§€κ°œλ³€μˆ˜ μ‘°μ •""")
num_tokens = gr.Slider(
50,
4000,
2000,
step=1,
label="μΆ”λ‘  단계당 μ΅œλŒ€ 토큰 수",
interactive=True,
)
final_num_tokens = gr.Slider(
50,
4000,
2000,
step=1,
label="μ΅œμ’… λ‹΅λ³€μ˜ μ΅œλŒ€ 토큰 수",
interactive=True,
)
do_sample = gr.Checkbox(True, label="μƒ˜ν”Œλ§ μ‚¬μš©")
temperature = gr.Slider(0.1, 1.0, 0.7, step=0.1, label="μ˜¨λ„")
# μ‚¬μš©μžκ°€ λ©”μ‹œμ§€λ₯Ό μ œμΆœν•˜λ©΄ 두 봇이 λ™μ‹œμ— μ‘λ‹΅ν•©λ‹ˆλ‹€
msg.submit(
user_input,
[msg, chatbot_original, chatbot_thinking], # μž…λ ₯
[msg, chatbot_original, chatbot_thinking], # 좜λ ₯
).then(
bot_original,
[
chatbot_original,
num_tokens,
do_sample,
temperature,
],
chatbot_original, # 좜λ ₯μ—μ„œ μƒˆ νžˆμŠ€ν† λ¦¬ μ €μž₯
).then(
bot_thinking,
[
chatbot_thinking,
num_tokens,
final_num_tokens,
do_sample,
temperature,
],
chatbot_thinking, # 좜λ ₯μ—μ„œ μƒˆ νžˆμŠ€ν† λ¦¬ μ €μž₯
)
if __name__ == "__main__":
demo.queue().launch()