ThinkFlow-llama / app.py
openfree's picture
Update app.py
04fa68c verified
raw
history blame
6.32 kB
import re
import threading
import gradio as gr
import spaces
import transformers
from transformers import pipeline
# λͺ¨λΈκ³Ό ν† ν¬λ‚˜μ΄μ € λ‘œλ”©
model_name = "CohereForAI/c4ai-command-r7b-arabic-02-2025"
if gr.NO_RELOAD:
pipe = pipeline(
"text-generation",
model=model_name,
device_map="auto",
torch_dtype="auto",
)
# μ΅œμ’… 닡변을 κ°μ§€ν•˜κΈ° μœ„ν•œ 마컀
ANSWER_MARKER = "**λ‹΅λ³€**"
# 단계별 좔둠을 μ‹œμž‘ν•˜λŠ” λ¬Έμž₯λ“€
rethink_prepends = [
"자, 이제 λ‹€μŒμ„ νŒŒμ•…ν•΄μ•Ό ν•©λ‹ˆλ‹€ ",
"제 μƒκ°μ—λŠ” ",
"μž μ‹œλ§Œμš”, 제 μƒκ°μ—λŠ” ",
"λ‹€μŒ 사항이 λ§žλŠ”μ§€ 확인해 λ³΄κ² μŠ΅λ‹ˆλ‹€ ",
"λ˜ν•œ κΈ°μ–΅ν•΄μ•Ό ν•  것은 ",
"또 λ‹€λ₯Έ μ£Όλͺ©ν•  점은 ",
"그리고 μ €λŠ” λ‹€μŒκ³Ό 같은 사싀도 κΈ°μ–΅ν•©λ‹ˆλ‹€ ",
"이제 μΆ©λΆ„νžˆ μ΄ν•΄ν–ˆλ‹€κ³  μƒκ°ν•©λ‹ˆλ‹€ ",
"μ§€κΈˆκΉŒμ§€μ˜ 정보λ₯Ό λ°”νƒ•μœΌλ‘œ, μ›λž˜ μ§ˆλ¬Έμ— μ‚¬μš©λœ μ–Έμ–΄λ‘œ λ‹΅λ³€ν•˜κ² μŠ΅λ‹ˆλ‹€:"
"\n{question}\n"
f"\n{ANSWER_MARKER}\n",
]
# μˆ˜μ‹ ν‘œμ‹œ 문제 해결을 μœ„ν•œ μ„€μ •
latex_delimiters = [
{"left": "$$", "right": "$$", "display": True},
{"left": "$", "right": "$", "display": False},
]
def reformat_math(text):
"""Gradio ꡬ문(Katex)을 μ‚¬μš©ν•˜λ„λ‘ MathJax ꡬ뢄 기호 μˆ˜μ •.
이것은 Gradioμ—μ„œ μˆ˜ν•™ 곡식을 ν‘œμ‹œν•˜κΈ° μœ„ν•œ μž„μ‹œ ν•΄κ²°μ±…μž…λ‹ˆλ‹€. ν˜„μž¬λ‘œμ„œλŠ”
λ‹€λ₯Έ latex_delimitersλ₯Ό μ‚¬μš©ν•˜μ—¬ μ˜ˆμƒλŒ€λ‘œ μž‘λ™ν•˜κ²Œ ν•˜λŠ” 방법을 μ°Ύμ§€ λͺ»ν–ˆμŠ΅λ‹ˆλ‹€...
"""
text = re.sub(r"\\\[\s*(.*?)\s*\\\]", r"$$\1$$", text, flags=re.DOTALL)
text = re.sub(r"\\\(\s*(.*?)\s*\\\)", r"$\1$", text, flags=re.DOTALL)
return text
def user_input(message, history: list):
"""μ‚¬μš©μž μž…λ ₯을 νžˆμŠ€ν† λ¦¬μ— μΆ”κ°€ν•˜κ³  μž…λ ₯ ν…μŠ€νŠΈ μƒμž λΉ„μš°κΈ°"""
return "", history + [
gr.ChatMessage(role="user", content=message.replace(ANSWER_MARKER, ""))
]
def rebuild_messages(history: list):
"""쀑간 생각 κ³Όμ • 없이 λͺ¨λΈμ΄ μ‚¬μš©ν•  νžˆμŠ€ν† λ¦¬μ—μ„œ λ©”μ‹œμ§€ μž¬κ΅¬μ„±"""
messages = []
for h in history:
if isinstance(h, dict) and not h.get("metadata", {}).get("title", False):
messages.append(h)
elif (
isinstance(h, gr.ChatMessage)
and h.metadata.get("title")
and isinstance(h.content, str)
):
messages.append({"role": h.role, "content": h.content})
return messages
@spaces.GPU
def bot(
history: list,
max_num_tokens: int,
final_num_tokens: int,
do_sample: bool,
temperature: float,
):
"""λͺ¨λΈμ΄ μ§ˆλ¬Έμ— λ‹΅λ³€ν•˜λ„λ‘ ν•˜κΈ°"""
# λ‚˜μ€‘μ— μŠ€λ ˆλ“œμ—μ„œ 토큰을 슀트림으둜 κ°€μ Έμ˜€κΈ° μœ„ν•¨
streamer = transformers.TextIteratorStreamer(
pipe.tokenizer, # pyright: ignore
skip_special_tokens=True,
skip_prompt=True,
)
# ν•„μš”ν•œ 경우 좔둠에 μ§ˆλ¬Έμ„ λ‹€μ‹œ μ‚½μž…ν•˜κΈ° μœ„ν•¨
question = history[-1]["content"]
# 보쑰자 λ©”μ‹œμ§€ μ€€λΉ„
history.append(
gr.ChatMessage(
role="assistant",
content=str(""),
metadata={"title": "🧠 생각 쀑...", "status": "pending"},
)
)
# ν˜„μž¬ μ±„νŒ…μ— ν‘œμ‹œλ  μΆ”λ‘  κ³Όμ •
messages = rebuild_messages(history)
for i, prepend in enumerate(rethink_prepends):
if i > 0:
messages[-1]["content"] += "\n\n"
messages[-1]["content"] += prepend.format(question=question)
num_tokens = int(
max_num_tokens if ANSWER_MARKER not in prepend else final_num_tokens
)
t = threading.Thread(
target=pipe,
args=(messages,),
kwargs=dict(
max_new_tokens=num_tokens,
streamer=streamer,
do_sample=do_sample,
temperature=temperature,
),
)
t.start()
# μƒˆ λ‚΄μš©μœΌλ‘œ νžˆμŠ€ν† λ¦¬ μž¬κ΅¬μ„±
history[-1].content += prepend.format(question=question)
if ANSWER_MARKER in prepend:
history[-1].metadata = {"title": "πŸ’­ 사고 κ³Όμ •", "status": "done"}
# 생각 μ’…λ£Œ, 이제 λ‹΅λ³€μž…λ‹ˆλ‹€ (쀑간 단계에 λŒ€ν•œ 메타데이터 μ—†μŒ)
history.append(gr.ChatMessage(role="assistant", content=""))
for token in streamer:
history[-1].content += token
history[-1].content = reformat_math(history[-1].content)
yield history
t.join()
yield history
with gr.Blocks(fill_height=True, title="λͺ¨λ“  LLM λͺ¨λΈμ— μΆ”λ‘  λŠ₯λ ₯ λΆ€μ—¬ν•˜κΈ°") as demo:
with gr.Row(scale=1):
with gr.Column(scale=5):
chatbot = gr.Chatbot(
scale=1,
type="messages",
latex_delimiters=latex_delimiters,
)
msg = gr.Textbox(
submit_btn=True,
label="",
show_label=False,
placeholder="여기에 μ§ˆλ¬Έμ„ μž…λ ₯ν•˜μ„Έμš”.",
autofocus=True,
)
with gr.Column(scale=1):
gr.Markdown("""## λ§€κ°œλ³€μˆ˜ μ‘°μ •""")
num_tokens = gr.Slider(
50,
4000,
2000,
step=1,
label="μΆ”λ‘  단계당 μ΅œλŒ€ 토큰 수",
interactive=True,
)
final_num_tokens = gr.Slider(
50,
4000,
2000,
step=1,
label="μ΅œμ’… λ‹΅λ³€μ˜ μ΅œλŒ€ 토큰 수",
interactive=True,
)
do_sample = gr.Checkbox(True, label="μƒ˜ν”Œλ§ μ‚¬μš©")
temperature = gr.Slider(0.1, 1.0, 0.7, step=0.1, label="μ˜¨λ„")
# μ‚¬μš©μžκ°€ λ©”μ‹œμ§€λ₯Ό μ œμΆœν•˜λ©΄ 봇이 μ‘λ‹΅ν•©λ‹ˆλ‹€
msg.submit(
user_input,
[msg, chatbot], # μž…λ ₯
[msg, chatbot], # 좜λ ₯
).then(
bot,
[
chatbot,
num_tokens,
final_num_tokens,
do_sample,
temperature,
], # μ‹€μ œλ‘œλŠ” "history" μž…λ ₯
chatbot, # 좜λ ₯μ—μ„œ μƒˆ νžˆμŠ€ν† λ¦¬ μ €μž₯
)
if __name__ == "__main__":
demo.queue().launch()