ThinkFlow-llama / app.py
openfree's picture
Update app.py
e94718a verified
raw
history blame
9.05 kB
import re
import threading
import gradio as gr
import spaces
import transformers
from transformers import pipeline
# μ‚¬μš© κ°€λŠ₯ν•œ λͺ¨λΈ λͺ©λ‘
available_models = {
"meta-llama/Llama-3.2-3B-Instruct": "Llama 3.2(3B)",
"Hermes-3-Llama-3.1-8B": "Hermes 3 Llama 3.1 (8B)",
"nvidia/Llama-3.1-Nemotron-Nano-8B-v1": "Nvidia Nemotron Nano (8B)",
"mistralai/Mistral-Small-3.1-24B-Instruct-2503": "Mistral Small 3.1 (24B)",
"bartowski/mistralai_Mistral-Small-3.1-24B-Instruct-2503-GGUF": "Mistral Small GGUF (24B)",
"google/gemma-3-27b-it": "Google Gemma 3 (27B)",
"gemma-3-27b-it-abliterated": "Gemma 3 Abliterated (27B)",
"Qwen/Qwen2.5-Coder-32B-Instruct": "Qwen 2.5 Coder (32B)",
"open-r1/OlympicCoder-32B": "Olympic Coder (32B)"
}
# λͺ¨λΈκ³Ό ν† ν¬λ‚˜μ΄μ € λ‘œλ”©μ„ μœ„ν•œ μ „μ—­ λ³€μˆ˜
pipe = None
# μ΅œμ’… 닡변을 κ°μ§€ν•˜κΈ° μœ„ν•œ 마컀
ANSWER_MARKER = "**λ‹΅λ³€**"
# 단계별 좔둠을 μ‹œμž‘ν•˜λŠ” λ¬Έμž₯λ“€
rethink_prepends = [
"자, 이제 λ‹€μŒμ„ νŒŒμ•…ν•΄μ•Ό ν•©λ‹ˆλ‹€ ",
"제 μƒκ°μ—λŠ” ",
"μž μ‹œλ§Œμš”, 제 μƒκ°μ—λŠ” ",
"λ‹€μŒ 사항이 λ§žλŠ”μ§€ 확인해 λ³΄κ² μŠ΅λ‹ˆλ‹€ ",
"λ˜ν•œ κΈ°μ–΅ν•΄μ•Ό ν•  것은 ",
"또 λ‹€λ₯Έ μ£Όλͺ©ν•  점은 ",
"그리고 μ €λŠ” λ‹€μŒκ³Ό 같은 사싀도 κΈ°μ–΅ν•©λ‹ˆλ‹€ ",
"이제 μΆ©λΆ„νžˆ μ΄ν•΄ν–ˆλ‹€κ³  μƒκ°ν•©λ‹ˆλ‹€ ",
"μ§€κΈˆκΉŒμ§€μ˜ 정보λ₯Ό λ°”νƒ•μœΌλ‘œ, μ›λž˜ μ§ˆλ¬Έμ— μ‚¬μš©λœ μ–Έμ–΄λ‘œ λ‹΅λ³€ν•˜κ² μŠ΅λ‹ˆλ‹€:"
"\n{question}\n"
f"\n{ANSWER_MARKER}\n",
]
# μˆ˜μ‹ ν‘œμ‹œ 문제 해결을 μœ„ν•œ μ„€μ •
latex_delimiters = [
{"left": "$$", "right": "$$", "display": True},
{"left": "$", "right": "$", "display": False},
]
def reformat_math(text):
"""Gradio ꡬ문(Katex)을 μ‚¬μš©ν•˜λ„λ‘ MathJax ꡬ뢄 기호 μˆ˜μ •.
이것은 Gradioμ—μ„œ μˆ˜ν•™ 곡식을 ν‘œμ‹œν•˜κΈ° μœ„ν•œ μž„μ‹œ ν•΄κ²°μ±…μž…λ‹ˆλ‹€. ν˜„μž¬λ‘œμ„œλŠ”
λ‹€λ₯Έ latex_delimitersλ₯Ό μ‚¬μš©ν•˜μ—¬ μ˜ˆμƒλŒ€λ‘œ μž‘λ™ν•˜κ²Œ ν•˜λŠ” 방법을 μ°Ύμ§€ λͺ»ν–ˆμŠ΅λ‹ˆλ‹€...
"""
text = re.sub(r"\\\[\s*(.*?)\s*\\\]", r"$$\1$$", text, flags=re.DOTALL)
text = re.sub(r"\\\(\s*(.*?)\s*\\\)", r"$\1$", text, flags=re.DOTALL)
return text
def user_input(message, history: list):
"""μ‚¬μš©μž μž…λ ₯을 νžˆμŠ€ν† λ¦¬μ— μΆ”κ°€ν•˜κ³  μž…λ ₯ ν…μŠ€νŠΈ μƒμž λΉ„μš°κΈ°"""
return "", history + [
gr.ChatMessage(role="user", content=message.replace(ANSWER_MARKER, ""))
]
def rebuild_messages(history: list):
"""쀑간 생각 κ³Όμ • 없이 λͺ¨λΈμ΄ μ‚¬μš©ν•  νžˆμŠ€ν† λ¦¬μ—μ„œ λ©”μ‹œμ§€ μž¬κ΅¬μ„±"""
messages = []
for h in history:
if isinstance(h, dict) and not h.get("metadata", {}).get("title", False):
messages.append(h)
elif (
isinstance(h, gr.ChatMessage)
and h.metadata.get("title")
and isinstance(h.content, str)
):
messages.append({"role": h.role, "content": h.content})
return messages
def load_model(model_names):
"""μ„ νƒλœ λͺ¨λΈ 이름에 따라 λͺ¨λΈ λ‘œλ“œ"""
global pipe
# λͺ¨λΈμ΄ μ„ νƒλ˜μ§€ μ•Šμ•˜μ„ 경우 κΈ°λ³Έκ°’ μ§€μ •
if not model_names:
model_name = "Qwen/Qwen2-1.5B-Instruct"
else:
# 첫 번째 μ„ νƒλœ λͺ¨λΈ μ‚¬μš© (λ‚˜μ€‘μ— μ—¬λŸ¬ λͺ¨λΈ μ•™μƒλΈ”λ‘œ ν™•μž₯ κ°€λŠ₯)
model_name = model_names[0]
pipe = pipeline(
"text-generation",
model=model_name,
device_map="auto",
torch_dtype="auto",
)
return f"λͺ¨λΈ '{model_name}'이(κ°€) λ‘œλ“œλ˜μ—ˆμŠ΅λ‹ˆλ‹€."
@spaces.GPU
def bot(
history: list,
max_num_tokens: int,
final_num_tokens: int,
do_sample: bool,
temperature: float,
):
"""λͺ¨λΈμ΄ μ§ˆλ¬Έμ— λ‹΅λ³€ν•˜λ„λ‘ ν•˜κΈ°"""
global pipe
# λͺ¨λΈμ΄ λ‘œλ“œλ˜μ§€ μ•Šμ•˜λ‹€λ©΄ 였λ₯˜ λ©”μ‹œμ§€ ν‘œμ‹œ
if pipe is None:
history.append(
gr.ChatMessage(
role="assistant",
content="λͺ¨λΈμ΄ λ‘œλ“œλ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€. ν•˜λ‚˜ μ΄μƒμ˜ λͺ¨λΈμ„ 선택해 μ£Όμ„Έμš”.",
)
)
yield history
return
# λ‚˜μ€‘μ— μŠ€λ ˆλ“œμ—μ„œ 토큰을 슀트림으둜 κ°€μ Έμ˜€κΈ° μœ„ν•¨
streamer = transformers.TextIteratorStreamer(
pipe.tokenizer, # pyright: ignore
skip_special_tokens=True,
skip_prompt=True,
)
# ν•„μš”ν•œ 경우 좔둠에 μ§ˆλ¬Έμ„ λ‹€μ‹œ μ‚½μž…ν•˜κΈ° μœ„ν•¨
question = history[-1]["content"]
# 보쑰자 λ©”μ‹œμ§€ μ€€λΉ„
history.append(
gr.ChatMessage(
role="assistant",
content=str(""),
metadata={"title": "🧠 생각 쀑...", "status": "pending"},
)
)
# ν˜„μž¬ μ±„νŒ…μ— ν‘œμ‹œλ  μΆ”λ‘  κ³Όμ •
messages = rebuild_messages(history)
for i, prepend in enumerate(rethink_prepends):
if i > 0:
messages[-1]["content"] += "\n\n"
messages[-1]["content"] += prepend.format(question=question)
num_tokens = int(
max_num_tokens if ANSWER_MARKER not in prepend else final_num_tokens
)
t = threading.Thread(
target=pipe,
args=(messages,),
kwargs=dict(
max_new_tokens=num_tokens,
streamer=streamer,
do_sample=do_sample,
temperature=temperature,
),
)
t.start()
# μƒˆ λ‚΄μš©μœΌλ‘œ νžˆμŠ€ν† λ¦¬ μž¬κ΅¬μ„±
history[-1].content += prepend.format(question=question)
if ANSWER_MARKER in prepend:
history[-1].metadata = {"title": "πŸ’­ 사고 κ³Όμ •", "status": "done"}
# 생각 μ’…λ£Œ, 이제 λ‹΅λ³€μž…λ‹ˆλ‹€ (쀑간 단계에 λŒ€ν•œ 메타데이터 μ—†μŒ)
history.append(gr.ChatMessage(role="assistant", content=""))
for token in streamer:
history[-1].content += token
history[-1].content = reformat_math(history[-1].content)
yield history
t.join()
yield history
with gr.Blocks(fill_height=True, title="ThinkFlow - Step-by-step Reasoning Service") as demo:
# 상단에 타이틀과 μ„€λͺ… μΆ”κ°€
gr.Markdown("""
# ThinkFlow
## A thought amplification service that implants step-by-step reasoning abilities into LLMs without model modification
""")
with gr.Row(scale=1):
with gr.Column(scale=5):
# μ±„νŒ… μΈν„°νŽ˜μ΄μŠ€
chatbot = gr.Chatbot(
scale=1,
type="messages",
latex_delimiters=latex_delimiters,
)
msg = gr.Textbox(
submit_btn=True,
label="",
show_label=False,
placeholder="여기에 μ§ˆλ¬Έμ„ μž…λ ₯ν•˜μ„Έμš”.",
autofocus=True,
)
with gr.Column(scale=1):
# λͺ¨λΈ 선택 μ„Ήμ…˜ μΆ”κ°€
gr.Markdown("""## λͺ¨λΈ 선택""")
model_selector = gr.CheckboxGroup(
choices=list(available_models.values()),
value=[available_models["Qwen/Qwen2-1.5B-Instruct"]], # κΈ°λ³Έκ°’
label="μ‚¬μš©ν•  LLM λͺ¨λΈ 선택 (볡수 선택 κ°€λŠ₯)",
)
# λͺ¨λΈ λ‘œλ“œ λ²„νŠΌ
load_model_btn = gr.Button("λͺ¨λΈ λ‘œλ“œ")
model_status = gr.Textbox(label="λͺ¨λΈ μƒνƒœ", interactive=False)
gr.Markdown("""## λ§€κ°œλ³€μˆ˜ μ‘°μ •""")
num_tokens = gr.Slider(
50,
4000,
2000,
step=1,
label="μΆ”λ‘  단계당 μ΅œλŒ€ 토큰 수",
interactive=True,
)
final_num_tokens = gr.Slider(
50,
4000,
2000,
step=1,
label="μ΅œμ’… λ‹΅λ³€μ˜ μ΅œλŒ€ 토큰 수",
interactive=True,
)
do_sample = gr.Checkbox(True, label="μƒ˜ν”Œλ§ μ‚¬μš©")
temperature = gr.Slider(0.1, 1.0, 0.7, step=0.1, label="μ˜¨λ„")
# μ„ νƒλœ λͺ¨λΈ λ‘œλ“œ 이벀트 μ—°κ²°
def get_model_names(selected_models):
# ν‘œμ‹œ μ΄λ¦„μ—μ„œ μ›λž˜ λͺ¨λΈ μ΄λ¦„μœΌλ‘œ λ³€ν™˜
inverse_map = {v: k for k, v in available_models.items()}
return [inverse_map[model] for model in selected_models]
load_model_btn.click(
lambda selected: load_model(get_model_names(selected)),
inputs=[model_selector],
outputs=[model_status]
)
# μ‚¬μš©μžκ°€ λ©”μ‹œμ§€λ₯Ό μ œμΆœν•˜λ©΄ 봇이 μ‘λ‹΅ν•©λ‹ˆλ‹€
msg.submit(
user_input,
[msg, chatbot], # μž…λ ₯
[msg, chatbot], # 좜λ ₯
).then(
bot,
[
chatbot,
num_tokens,
final_num_tokens,
do_sample,
temperature,
], # μ‹€μ œλ‘œλŠ” "history" μž…λ ₯
chatbot, # 좜λ ₯μ—μ„œ μƒˆ νžˆμŠ€ν† λ¦¬ μ €μž₯
)
if __name__ == "__main__":
demo.queue().launch()