Spaces:

kimhyunwoo
/

freetestn

Running

App Files Files Community

kimhyunwoo commited on 9 days ago

Commit

dc10a73

verified ·

1 Parent(s): 0ef9d3d

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -20

app.py CHANGED Viewed

@@ -29,11 +29,11 @@ try:
         MODEL_ID,
         torch_dtype=torch.float32,
         device_map="cpu",
-        force_download=True # 강제 재다운로드 활성화
     )
     tokenizer = AutoTokenizer.from_pretrained(
         MODEL_ID,
-        force_download=True # 강제 재다운로드 활성화
     )
     model.eval()
     print("--- Model and Tokenizer Loaded Successfully on CPU ---")
@@ -41,14 +41,24 @@ try:
     stop_token_strings = ["<|endofturn|>", "<|stop|>"]
     stop_token_ids_list = [tokenizer.convert_tokens_to_ids(token) for token in stop_token_strings]
-    if tokenizer.eos_token_id not in stop_token_ids_list:
-        stop_token_ids_list.append(tokenizer.eos_token_id)
     stop_token_ids_list = [tid for tid in stop_token_ids_list if tid is not None]
     if not stop_token_ids_list:
-        print("Warning: Could not find any stop token IDs. Using default EOS only.")
-        stop_token_ids_list = [tokenizer.eos_token_id]
     print(f"Using Stop Token IDs: {stop_token_ids_list}")
@@ -77,6 +87,7 @@ def predict(message, history):
         {"role": "tool_list", "content": ""},
         {"role": "system", "content": system_prompt}
     ]
     for user_msg, ai_msg in history:
         chat_history_formatted.append({"role": "user", "content": user_msg})
         chat_history_formatted.append({"role": "assistant", "content": ai_msg})
@@ -87,12 +98,13 @@ def predict(message, history):
     output_ids = None
     try:
         inputs = tokenizer.apply_chat_template(
             chat_history_formatted,
             add_generation_prompt=True,
             return_dict=True,
             return_tensors="pt"
-        ).to(model.device)
         input_length = inputs['input_ids'].shape[1]
         print(f"\nInput tokens: {input_length}")
@@ -103,11 +115,12 @@ def predict(message, history):
     try:
         print("Generating response...")
         with torch.no_grad():
             output_ids = model.generate(
                 **inputs,
                 max_new_tokens=MAX_NEW_TOKENS,
-                eos_token_id=stop_token_ids_list,
-                pad_token_id=tokenizer.eos_token_id,
                 do_sample=True,
                 temperature=0.7,
                 top_p=0.9,
@@ -121,26 +134,35 @@ def predict(message, history):
         gc.collect()
         return f"오류: 응답을 생성하는 중 문제가 발생했습니다. ({e})"
-    new_tokens = output_ids[0, input_length:]
-    response = tokenizer.decode(new_tokens, skip_special_tokens=True)
-    print(f"Output tokens: {len(new_tokens)}")
-    del inputs
-    del output_ids
-    del new_tokens
     gc.collect()
     print("Memory cleaned.")
     return response
 print("--- Setting up Gradio Interface ---")
 chatbot_component = gr.Chatbot(
     label="HyperCLOVA X SEED (0.5B) 대화",
     bubble_full_width=False,
-    height=600
-    )
 examples = [
     ["네이버 클로바X는 무엇인가요?"],
@@ -149,6 +171,7 @@ examples = [
     ["제주도 여행 계획을 세우고 있는데, 3박 4일 추천 코스 좀 짜줄래?"],
 ]
 demo = gr.ChatInterface(
     fn=predict,
     chatbot=chatbot_component,
@@ -162,11 +185,9 @@ demo = gr.ChatInterface(
     examples=examples,
     cache_examples=False,
     theme="soft",
-    retry_btn="다시 시도",
-    undo_btn="이전 턴 삭제",
-    clear_btn="대화 초기화",
 )
 if __name__ == "__main__":
     print("--- Launching Gradio App ---")
     demo.queue().launch()

         MODEL_ID,
         torch_dtype=torch.float32,
         device_map="cpu",
+        force_download=True # 이전 오류 해결을 위해 유지 (필요 없으면 False 또는 제거)
     )
     tokenizer = AutoTokenizer.from_pretrained(
         MODEL_ID,
+        force_download=True # 이전 오류 해결을 위해 유지 (필요 없으면 False 또는 제거)
     )
     model.eval()
     print("--- Model and Tokenizer Loaded Successfully on CPU ---")
     stop_token_strings = ["<|endofturn|>", "<|stop|>"]
     stop_token_ids_list = [tokenizer.convert_tokens_to_ids(token) for token in stop_token_strings]
+    # 모델 토크나이저에 eos_token_id가 제대로 설정되어 있는지 확인
+    if tokenizer.eos_token is not None and tokenizer.eos_token_id not in stop_token_ids_list:
+         # eos_token_id가 None이 아니고 리스트에 없을 때만 추가
+         if tokenizer.eos_token_id is not None:
+            stop_token_ids_list.append(tokenizer.eos_token_id)
+         else:
+            print("Warning: tokenizer.eos_token_id is None. Cannot add to stop tokens.")
+    elif tokenizer.eos_token is None:
+        print("Warning: tokenizer.eos_token is not defined.")
     stop_token_ids_list = [tid for tid in stop_token_ids_list if tid is not None]
     if not stop_token_ids_list:
+        print("Warning: Could not find any stop token IDs. Generation might not stop correctly.")
+        # Fallback: 만약 eos 토큰 ID도 없다면, generation에서 문제가 생길 수 있음
+        # 필요하다면 기본 eos 토큰 ID를 하드코딩하거나 다른 방식으로 처리해야 할 수 있음
+        # 예: stop_token_ids_list = [some_default_eos_id]
     print(f"Using Stop Token IDs: {stop_token_ids_list}")
         {"role": "tool_list", "content": ""},
         {"role": "system", "content": system_prompt}
     ]
+    # history가 (user, ai) 튜플 리스트라고 가정
     for user_msg, ai_msg in history:
         chat_history_formatted.append({"role": "user", "content": user_msg})
         chat_history_formatted.append({"role": "assistant", "content": ai_msg})
     output_ids = None
     try:
+        # device_map="cpu"로 모델을 로드했으므로, inputs도 cpu로 보냅니다.
         inputs = tokenizer.apply_chat_template(
             chat_history_formatted,
             add_generation_prompt=True,
             return_dict=True,
             return_tensors="pt"
+        ).to("cpu") # 명시적으로 CPU 지정
         input_length = inputs['input_ids'].shape[1]
         print(f"\nInput tokens: {input_length}")
     try:
         print("Generating response...")
         with torch.no_grad():
+            # eos_token_id에 리스트를 전달하는 것이 일반적입니다.
             output_ids = model.generate(
                 **inputs,
                 max_new_tokens=MAX_NEW_TOKENS,
+                eos_token_id=stop_token_ids_list, # 수정된 stop_token_ids_list 사용
+                pad_token_id=tokenizer.eos_token_id if tokenizer.eos_token_id is not None else tokenizer.pad_token_id, # pad_token_id 확인
                 do_sample=True,
                 temperature=0.7,
                 top_p=0.9,
         gc.collect()
         return f"오류: 응답을 생성하는 중 문제가 발생했습니다. ({e})"
+    # output_ids가 None이 아닐 경우에만 디코딩 시도
+    if output_ids is not None:
+        new_tokens = output_ids[0, input_length:]
+        response = tokenizer.decode(new_tokens, skip_special_tokens=True)
+        print(f"Output tokens: {len(new_tokens)}")
+        del new_tokens # 메모리 정리
+    else:
+        response = "오류: 응답 생성에 실패했습니다 (output_ids is None)."
+        print("Generation failed, output_ids is None.")
+    # 메모리 정리
+    if inputs is not None: del inputs
+    if output_ids is not None: del output_ids
     gc.collect()
     print("Memory cleaned.")
     return response
+# --- Gradio Interface ---
 print("--- Setting up Gradio Interface ---")
+# UserWarning 해결 및 최신 형식 사용
 chatbot_component = gr.Chatbot(
     label="HyperCLOVA X SEED (0.5B) 대화",
     bubble_full_width=False,
+    height=600,
+    type='messages' # message 형식 사용 명시
+)
 examples = [
     ["네이버 클로바X는 무엇인가요?"],
     ["제주도 여행 계획을 세우고 있는데, 3박 4일 추천 코스 좀 짜줄래?"],
 ]
+# 문제가 된 인자(retry_btn, undo_btn, clear_btn) 제거
 demo = gr.ChatInterface(
     fn=predict,
     chatbot=chatbot_component,
     examples=examples,
     cache_examples=False,
     theme="soft",
 )
+# --- Launch the App ---
 if __name__ == "__main__":
     print("--- Launching Gradio App ---")
     demo.queue().launch()