Spaces:

VIDraft
/

ThinkFlow-llama

Running on Zero

App Files Files Community

openfree commited on Mar 24

Commit

9e7af9a

verified ·

1 Parent(s): 47cd8f1

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -13

app.py CHANGED Viewed

@@ -29,11 +29,19 @@ rethink_prepends = [
     "또 다른 주목할 점은 ",
     "그리고 저는 다음과 같은 사실도 기억합니다 ",
     "이제 충분히 이해했다고 생각합니다 ",
-    "지금까지의 정보를 바탕으로, 원래 질문에 사용된 언어로 답변하겠습니다:"
-    "\n{question}\n"
-    f"\n{ANSWER_MARKER}\n",
 ]
 # 수식 표시 문제 해결을 위한 설정
 latex_delimiters = [
@@ -69,7 +77,7 @@ def rebuild_messages(history: list):
             messages.append(h)
         elif (
             isinstance(h, gr.ChatMessage)
-            and h.metadata.get("title")
             and isinstance(h.content, str)
         ):
             messages.append({"role": h.role, "content": h.content})
@@ -156,19 +164,21 @@ def bot_thinking(
     # 현재 채팅에 표시될 추론 과정
     messages = rebuild_messages(history)
     for i, prepend in enumerate(rethink_prepends):
         if i > 0:
             messages[-1]["content"] += "\n\n"
         messages[-1]["content"] += prepend.format(question=question)
-        num_tokens = int(
-            max_num_tokens if ANSWER_MARKER not in prepend else final_num_tokens
-        )
         t = threading.Thread(
             target=pipe,
             args=(messages,),
             kwargs=dict(
-                max_new_tokens=num_tokens,
                 streamer=streamer,
                 do_sample=do_sample,
                 temperature=temperature,
@@ -178,21 +188,59 @@ def bot_thinking(
         # 새 내용으로 히스토리 재구성
         history[-1].content += prepend.format(question=question)
-        if ANSWER_MARKER in prepend:
-            history[-1].metadata = {"title": "💭 사고 과정", "status": "done"}
-            # 생각 종료, 이제 답변입니다 (중간 단계에 대한 메타데이터 없음)
-            history.append(gr.ChatMessage(role="assistant", content=""))
         for token in streamer:
             history[-1].content += token
             history[-1].content = reformat_math(history[-1].content)
             yield history
         t.join()
     yield history
 with gr.Blocks(fill_height=True, title="Vidraft ThinkFlow") as demo:
-        # 제목과 설명
     gr.Markdown("# Vidraft ThinkFlow")
     gr.Markdown("### 추론 기능이 없는 LLM 모델의 수정 없이도 추론 기능을 자동으로 적용하는 LLM 추론 생성 플랫폼")

     "또 다른 주목할 점은 ",
     "그리고 저는 다음과 같은 사실도 기억합니다 ",
     "이제 충분히 이해했다고 생각합니다 ",
 ]
+# 최종 답변 생성을 위한 프롬프트 추가
+final_answer_prompt = """
+지금까지의 추론 과정을 바탕으로, 원래 질문에 사용된 언어로 답변하겠습니다:
+{question}
+아래는 내가 추론한 결론입니다:
+{reasoning_conclusion}
+위 추론을 기반으로 최종 답변:
+{ANSWER_MARKER}
+"""
 # 수식 표시 문제 해결을 위한 설정
 latex_delimiters = [
             messages.append(h)
         elif (
             isinstance(h, gr.ChatMessage)
+            and h.metadata.get("title", None) is None
             and isinstance(h.content, str)
         ):
             messages.append({"role": h.role, "content": h.content})
     # 현재 채팅에 표시될 추론 과정
     messages = rebuild_messages(history)
+    # 전체 추론 과정을 저장할 변수
+    full_reasoning = ""
+    # 추론 단계 실행
     for i, prepend in enumerate(rethink_prepends):
         if i > 0:
             messages[-1]["content"] += "\n\n"
         messages[-1]["content"] += prepend.format(question=question)
         t = threading.Thread(
             target=pipe,
             args=(messages,),
             kwargs=dict(
+                max_new_tokens=max_num_tokens,
                 streamer=streamer,
                 do_sample=do_sample,
                 temperature=temperature,
         # 새 내용으로 히스토리 재구성
         history[-1].content += prepend.format(question=question)
         for token in streamer:
             history[-1].content += token
             history[-1].content = reformat_math(history[-1].content)
             yield history
         t.join()
+        # 각 추론 단계의 결과를 full_reasoning에 저장
+        full_reasoning = history[-1].content
+    # 추론 완료, 이제 최종 답변을 생성
+    history[-1].metadata = {"title": "💭 사고 과정", "status": "done"}
+    # 추론 과정에서 결론 부분을 추출 (마지막 1-2 문단 정도)
+    reasoning_parts = full_reasoning.split("\n\n")
+    reasoning_conclusion = "\n\n".join(reasoning_parts[-2:]) if len(reasoning_parts) > 2 else full_reasoning
+    # 최종 답변 메시지 추가
+    history.append(gr.ChatMessage(role="assistant", content=""))
+    # 최종 답변을 위한 메시지 구성
+    final_messages = rebuild_messages(history[:-1])  # 마지막 빈 메시지 제외
+    final_prompt = final_answer_prompt.format(
+        question=question,
+        reasoning_conclusion=reasoning_conclusion,
+        ANSWER_MARKER=ANSWER_MARKER
+    )
+    final_messages[-1]["content"] += final_prompt
+    # 최종 답변 생성
+    t = threading.Thread(
+        target=pipe,
+        args=(final_messages,),
+        kwargs=dict(
+            max_new_tokens=final_num_tokens,
+            streamer=streamer,
+            do_sample=do_sample,
+            temperature=temperature,
+        ),
+    )
+    t.start()
+    # 최종 답변 스트리밍
+    for token in streamer:
+        history[-1].content += token
+        history[-1].content = reformat_math(history[-1].content)
+        yield history
+    t.join()
     yield history
 with gr.Blocks(fill_height=True, title="Vidraft ThinkFlow") as demo:
+    # 제목과 설명
     gr.Markdown("# Vidraft ThinkFlow")
     gr.Markdown("### 추론 기능이 없는 LLM 모델의 수정 없이도 추론 기능을 자동으로 적용하는 LLM 추론 생성 플랫폼")