Spaces:

kimhyunwoo
/

gggg

Sleeping

App Files Files Community

kimhyunwoo commited on 22 days ago

Commit

ddd5b6c

verified ·

1 Parent(s): e761d96

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -53

app.py CHANGED Viewed

@@ -1,16 +1,21 @@
 import gradio as gr
 import torch
 import os
-from transformers import AutoTokenizer, __version__ as transformers_version # 버전 확인용 import 추가
-from optimum.onnxruntime import ORTModelForCausalLM, __version__ as optimum_version # 버전 확인용 import 추가
 # --- Configuration ---
-MODEL_ID = "onnx-community/gemma-3-1b-it-ONNX-GQA" # 사용자가 지정한 GQA 모델
-ONNX_FILE_NAME = None # 파일명 자동 감지 시도
 print(f"Using Transformers version: {transformers_version}")
-print(f"Using Optimum version: {optimum_version}")
-print(f"Using Gradio version: {gr.__version__}") # Gradio 버전 로깅
 # --- Device Selection ---
 try:
@@ -43,25 +48,23 @@ try:
     model = ORTModelForCausalLM.from_pretrained(
         MODEL_ID,
         provider=provider,
-        use_cache=True, # KV 캐시 사용
-        # use_io_binding=False # GPU 사용 시 문제 발생하면 False 로 시도
     )
     print(f"ONNX Model '{MODEL_ID}' loaded successfully with provider '{provider}'.")
     model_loaded_successfully = True
 except ValueError as ve:
-    # ValueError 는 모델 타입 미지원 오류일 가능성이 높음
     print(f"!!!!!!!!!!!!!! CRITICAL MODEL LOADING ERROR (ValueError) !!!!!!!!!!!!!!")
     print(f"Model: {MODEL_ID}")
     print(f"Error message: {ve}")
     print("This likely means the installed 'transformers' library version does NOT support the 'gemma3_text' architecture.")
     print("Ensure 'requirements.txt' specifies a recent version (e.g., transformers>=4.41.0) and the Space has been rebuilt/restarted.")
     print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
-    # 모델 로딩 실패 시 사용자에게 명확히 알림
     model_loaded_successfully = False
 except Exception as e:
-    # 다른 종류의 예외 처리 (메모리 부족, 네트워크 등)
     print(f"!!!!!!!!!!!!!! UNEXPECTED MODEL LOADING ERROR !!!!!!!!!!!!!!")
     print(f"Model: {MODEL_ID}")
     print(f"Error type: {type(e).__name__}")
@@ -73,33 +76,23 @@ except Exception as e:
 # --- Chat Function ---
 def chat_function(message: str, history: list):
     if not model_loaded_successfully or model is None or tokenizer is None:
-        # 모델 로드 실패 시 오류 메시지 반환
         return "Error: The AI model is not loaded. Please check the application logs."
     try:
-        # 채팅 기록을 messages 형식으로 변환
         chat_messages = [{"role": "system", "content": "You are a helpful AI assistant."}]
         for user_msg, model_msg in history:
-            # None 값 체크 추가 (Gradio 초기 상태 등에서 발생 가능)
-            if user_msg:
-                 chat_messages.append({"role": "user", "content": user_msg})
-            if model_msg:
-                 chat_messages.append({"role": "model", "content": model_msg})
-        if message: # 현재 사용자 메시지 추가
-             chat_messages.append({"role": "user", "content": message})
-        # 프롬프트 생성 (apply_chat_template 시도, 실패 시 수동)
         prompt = ""
         try:
-             prompt = tokenizer.apply_chat_template(
-                 chat_messages,
-                 tokenize=False,
-                 add_generation_prompt=True
-             )
         except Exception as template_error:
              print(f"Warning: Failed to apply chat template ({template_error}). Using manual prompt construction.")
              prompt_parts = ["<start_of_turn>system\nYou are a helpful AI assistant.<end_of_turn>"]
-             # history 에서 model 메시지가 None 일 수 있음에 유의
              for user_msg, model_msg in history:
                  if user_msg: prompt_parts.append(f"<start_of_turn>user\n{user_msg}<end_of_turn>")
                  if model_msg: prompt_parts.append(f"<start_of_turn>model\n{model_msg}<end_of_turn>")
@@ -107,14 +100,12 @@ def chat_function(message: str, history: list):
              prompt_parts.append("<start_of_turn>model")
              prompt = "\n".join(prompt_parts)
-        # print(f"--- PROMPT --- \n{prompt}\n--------------")
-        # 입력 토큰화 및 디바이스 이동
         inputs = tokenizer(prompt, return_tensors="pt").to(device)
         # 응답 생성
         print("Generating response...")
-        with torch.no_grad(): # 추론 시 그래디언트 계산 비활성화
             outputs = model.generate(
                 **inputs,
                 max_new_tokens=512,
@@ -122,50 +113,34 @@ def chat_function(message: str, history: list):
                 temperature=0.7,
                 top_k=50,
                 top_p=0.9,
-                pad_token_id=tokenizer.eos_token_id # EOS 토큰을 패딩 토큰으로 사용
             )
         print("Generation complete.")
-        # 디코딩 (입력 부분 제외)
         input_token_len = inputs['input_ids'].shape[1]
         generated_tokens = outputs[0][input_token_len:]
         response = tokenizer.decode(generated_tokens, skip_special_tokens=True)
-        # 후처리
         response = response.replace("<end_of_turn>", "").strip()
-        # print(f"--- RESPONSE --- \n{response}\n--------------")
-        # 빈 응답 처리
         if not response:
             print("Warning: Generated empty response.")
             response = "Sorry, I couldn't generate a response for that."
         return response
     except Exception as e:
         print(f"!!!!!!!!!!!!!! Error during generation !!!!!!!!!!!!!!")
         print(f"Error type: {type(e).__name__}")
         print(f"Error message: {e}")
-        print("Input message:", message)
-        # traceback.print_exc() # 필요시 상세 트레이스백 출력
         print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
         return f"Sorry, an error occurred during response generation. Please check logs."
-# --- Gradio Interface (수정됨) ---
 print("Creating Gradio Interface...")
 iface = gr.ChatInterface(
-    fn=chat_function, # 모델 로드 실패 시 chat_function 내부에서 처리
     title="AI Assistant (Gemma 3 1B ONNX-GQA)",
     description=f"Chat with {MODEL_ID}. Model loaded: {model_loaded_successfully}",
-    # chatbot 위젯에 type='messages' 추가
     chatbot=gr.Chatbot(height=600, type="messages", bubble_full_width=False),
-    # 지원하지 않는 버튼 인자 제거
-    # retry_btn=None, # 제거
-    # undo_btn=None,  # 제거
-    # clear_btn=None, # 제거
-    # submit_btn 대신 기본 버튼 사용
     theme=gr.themes.Soft(),
     examples=[["Hello!"], ["Write a poem about the internet."]]
 )
@@ -173,5 +148,4 @@ iface = gr.ChatInterface(
 # --- Launch App ---
 if __name__ == "__main__":
     print("Launching Gradio App...")
-    # 모델 로딩 실패 시에도 인터페이스는 실행하되, 오류 메시지 표시
     iface.launch()

 import gradio as gr
 import torch
 import os
+# optimum.onnxruntime 에서 __version__ import 제거
+from transformers import AutoTokenizer, __version__ as transformers_version
+from optimum.onnxruntime import ORTModelForCausalLM
+# import optimum # optimum 자체의 버전 확인 시도 (선택적)
 # --- Configuration ---
+MODEL_ID = "onnx-community/gemma-3-1b-it-ONNX-GQA"
+ONNX_FILE_NAME = None
 print(f"Using Transformers version: {transformers_version}")
+# try:
+#     print(f"Using Optimum version: {optimum.__version__}") # 다른 방법으로 버전 확인 시도
+# except AttributeError:
+#     print("Could not determine Optimum version automatically.")
+print(f"Using Gradio version: {gr.__version__}")
 # --- Device Selection ---
 try:
     model = ORTModelForCausalLM.from_pretrained(
         MODEL_ID,
         provider=provider,
+        use_cache=True,
     )
     print(f"ONNX Model '{MODEL_ID}' loaded successfully with provider '{provider}'.")
     model_loaded_successfully = True
 except ValueError as ve:
+    # 모델 타입 미지원 오류 처리
     print(f"!!!!!!!!!!!!!! CRITICAL MODEL LOADING ERROR (ValueError) !!!!!!!!!!!!!!")
     print(f"Model: {MODEL_ID}")
     print(f"Error message: {ve}")
     print("This likely means the installed 'transformers' library version does NOT support the 'gemma3_text' architecture.")
     print("Ensure 'requirements.txt' specifies a recent version (e.g., transformers>=4.41.0) and the Space has been rebuilt/restarted.")
     print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
     model_loaded_successfully = False
 except Exception as e:
+    # 다른 예외 처리
     print(f"!!!!!!!!!!!!!! UNEXPECTED MODEL LOADING ERROR !!!!!!!!!!!!!!")
     print(f"Model: {MODEL_ID}")
     print(f"Error type: {type(e).__name__}")
 # --- Chat Function ---
 def chat_function(message: str, history: list):
     if not model_loaded_successfully or model is None or tokenizer is None:
         return "Error: The AI model is not loaded. Please check the application logs."
     try:
+        # 채팅 기록 변환
         chat_messages = [{"role": "system", "content": "You are a helpful AI assistant."}]
         for user_msg, model_msg in history:
+            if user_msg: chat_messages.append({"role": "user", "content": user_msg})
+            if model_msg: chat_messages.append({"role": "model", "content": model_msg})
+        if message: chat_messages.append({"role": "user", "content": message})
+        # 프롬프트 생성
         prompt = ""
         try:
+             prompt = tokenizer.apply_chat_template(chat_messages, tokenize=False, add_generation_prompt=True)
         except Exception as template_error:
              print(f"Warning: Failed to apply chat template ({template_error}). Using manual prompt construction.")
              prompt_parts = ["<start_of_turn>system\nYou are a helpful AI assistant.<end_of_turn>"]
              for user_msg, model_msg in history:
                  if user_msg: prompt_parts.append(f"<start_of_turn>user\n{user_msg}<end_of_turn>")
                  if model_msg: prompt_parts.append(f"<start_of_turn>model\n{model_msg}<end_of_turn>")
              prompt_parts.append("<start_of_turn>model")
              prompt = "\n".join(prompt_parts)
+        # 입력 토큰화
         inputs = tokenizer(prompt, return_tensors="pt").to(device)
         # 응답 생성
         print("Generating response...")
+        with torch.no_grad():
             outputs = model.generate(
                 **inputs,
                 max_new_tokens=512,
                 temperature=0.7,
                 top_k=50,
                 top_p=0.9,
+                pad_token_id=tokenizer.eos_token_id
             )
         print("Generation complete.")
+        # 디코딩
         input_token_len = inputs['input_ids'].shape[1]
         generated_tokens = outputs[0][input_token_len:]
         response = tokenizer.decode(generated_tokens, skip_special_tokens=True)
         response = response.replace("<end_of_turn>", "").strip()
         if not response:
             print("Warning: Generated empty response.")
             response = "Sorry, I couldn't generate a response for that."
         return response
     except Exception as e:
         print(f"!!!!!!!!!!!!!! Error during generation !!!!!!!!!!!!!!")
         print(f"Error type: {type(e).__name__}")
         print(f"Error message: {e}")
         print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
         return f"Sorry, an error occurred during response generation. Please check logs."
+# --- Gradio Interface ---
 print("Creating Gradio Interface...")
 iface = gr.ChatInterface(
+    fn=chat_function,
     title="AI Assistant (Gemma 3 1B ONNX-GQA)",
     description=f"Chat with {MODEL_ID}. Model loaded: {model_loaded_successfully}",
     chatbot=gr.Chatbot(height=600, type="messages", bubble_full_width=False),
     theme=gr.themes.Soft(),
     examples=[["Hello!"], ["Write a poem about the internet."]]
 )
 # --- Launch App ---
 if __name__ == "__main__":
     print("Launching Gradio App...")
     iface.launch()