Spaces:

VIDraft
/

ThinkFlow-llama

Running on Zero

App Files Files Community

openfree commited on Mar 24

Commit

1399380

verified ·

1 Parent(s): dd08b0c

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -45

app.py CHANGED Viewed

@@ -141,7 +141,7 @@ def rebuild_messages(history: list):
             messages.append({"role": h.role, "content": h.content})
     return messages
-def load_model(model_names, status_callback=None):
     """선택된 모델 이름에 따라 모델 로드 (A100에 최적화된 설정 사용)"""
     global pipe, current_model_name, loading_in_progress
@@ -150,6 +150,7 @@ def load_model(model_names, status_callback=None):
         return "다른 모델이 이미 로드 중입니다. 잠시 기다려주세요."
     loading_in_progress = True
     try:
         # 기존 모델 정리
@@ -167,8 +168,7 @@ def load_model(model_names, status_callback=None):
         config = MODEL_CONFIG[size_category]
         # 로딩 상태 업데이트
-        if status_callback:
-            status_callback(f"모델 '{model_name}' 로드 중... (크기: {size_category})")
         # 모델 로드 (크기에 따라 최적화된 설정 적용)
         # HF_TOKEN 환경 변수 확인
@@ -185,8 +185,7 @@ def load_model(model_names, status_callback=None):
             has_bitsandbytes = True
         except ImportError:
             has_bitsandbytes = False
-            if status_callback:
-                status_callback(f"BitsAndBytes 라이브러리를 찾을 수 없습니다. 양자화 없이 로드합니다.")
         # 시간 제한 설정 (모델 크기에 따라 다르게)
         if size_category == "small":
@@ -208,8 +207,7 @@ def load_model(model_names, status_callback=None):
                 bnb_4bit_compute_dtype=DTYPE
             )
-            if status_callback:
-                status_callback(f"모델 '{model_name}' 로드 중... (양자화 적용)")
             model = AutoModelForCausalLM.from_pretrained(
                 model_name,
@@ -231,8 +229,7 @@ def load_model(model_names, status_callback=None):
             )
         else:
             # 양자화 없이 로드
-            if status_callback:
-                status_callback(f"모델 '{model_name}' 로드 중... (표준 방식)")
             pipe = pipeline(
                 "text-generation",
@@ -255,7 +252,11 @@ def load_model(model_names, status_callback=None):
     except Exception as e:
         loading_in_progress = False
-        return f"모델 로드 실패: {str(e)}"
 @spaces.GPU
 def bot(
@@ -426,34 +427,10 @@ def get_gpu_info():
     return "\n".join(gpu_info)
-# 자동 모델 로드 함수 (상태 업데이트 포함)
-def auto_load_model():
-    # 첫 번째 모델 자동 로드
-    model_key = DEFAULT_MODEL_KEY
-    try:
-        # 진행 상태 표시를 위한 빈 결과 반환
-        return "작은 모델 자동 로드 중... 잠시 기다려주세요."
-    except Exception as e:
-        return f"자동 모델 로드 실패: {str(e)}"
-# 실제 모델 로드 함수 (비동기)
-def load_model_async(model_status):
-    # 비동기 함수로 모델 로드 (실제 로드는 백그라운드에서 수행)
     model_key = DEFAULT_MODEL_KEY
-    def update_status(status):
-        model_status.update(value=status)
-    # 별도 스레드에서 로드
-    def load_in_thread():
-        try:
-            result = load_model([model_key], update_status)
-            model_status.update(value=result)
-        except Exception as e:
-            model_status.update(value=f"모델 로드 실패: {str(e)}")
-    threading.Thread(target=load_in_thread, daemon=True).start()
-    return "모델 로드 준비 중... 자동으로 진행됩니다."
 # Gradio 인터페이스
 with gr.Blocks(fill_height=True, title="ThinkFlow - Step-by-step Reasoning Service") as demo:
@@ -494,7 +471,7 @@ with gr.Blocks(fill_height=True, title="ThinkFlow - Step-by-step Reasoning Servi
             # 모델 로드 버튼
             load_model_btn = gr.Button("모델 로드", variant="primary")
-            model_status = gr.Textbox(label="모델 상태", interactive=False)
             # 메모리 정리 버튼
             clear_memory_btn = gr.Button("GPU 메모리 정리", variant="secondary")
@@ -520,13 +497,8 @@ with gr.Blocks(fill_height=True, title="ThinkFlow - Step-by-step Reasoning Servi
                 do_sample = gr.Checkbox(True, label="샘플링 사용")
                 temperature = gr.Slider(0.1, 1.0, 0.7, step=0.1, label="온도")
-    # 시작 시 자동으로 초기화
-    demo.load(auto_load_model, [], [model_status])
-    # 시작 후 비동기적으로 모델 로드 (초기 화면 표시 지연 방지)
-    # 오류 코드: demo.load(lambda x: load_model_async(x), [model_status], [], _js="() => {}")
-    # 수정된 코드: Gradio 버전 호환성을 위해 _js 파라미터 제거
-    demo.load(lambda: load_model_async(model_status), [], [])
     # 선택된 모델 로드 이벤트 연결
     def get_model_names(selected_model):

             messages.append({"role": h.role, "content": h.content})
     return messages
+def load_model(model_names):
     """선택된 모델 이름에 따라 모델 로드 (A100에 최적화된 설정 사용)"""
     global pipe, current_model_name, loading_in_progress
         return "다른 모델이 이미 로드 중입니다. 잠시 기다려주세요."
     loading_in_progress = True
+    status_messages = []
     try:
         # 기존 모델 정리
         config = MODEL_CONFIG[size_category]
         # 로딩 상태 업데이트
+        status_messages.append(f"모델 '{model_name}' 로드 중... (크기: {size_category})")
         # 모델 로드 (크기에 따라 최적화된 설정 적용)
         # HF_TOKEN 환경 변수 확인
             has_bitsandbytes = True
         except ImportError:
             has_bitsandbytes = False
+            status_messages.append("BitsAndBytes 라이브러리를 찾을 수 없습니다. 양자화 없이 로드합니다.")
         # 시간 제한 설정 (모델 크기에 따라 다르게)
         if size_category == "small":
                 bnb_4bit_compute_dtype=DTYPE
             )
+            status_messages.append(f"모델 '{model_name}' 로드 중... (양자화 적용)")
             model = AutoModelForCausalLM.from_pretrained(
                 model_name,
             )
         else:
             # 양자화 없이 로드
+            status_messages.append(f"모델 '{model_name}' 로드 중... (표준 방식)")
             pipe = pipeline(
                 "text-generation",
     except Exception as e:
         loading_in_progress = False
+        error_msg = f"모델 로드 실패: {str(e)}"
+        print(f"오류: {error_msg}")
+        return error_msg
+    finally:
+        loading_in_progress = False
 @spaces.GPU
 def bot(
     return "\n".join(gpu_info)
+# 비동기 대신 동기 방식으로 모델 자동 로드 (간소화)
+def load_default_model():
     model_key = DEFAULT_MODEL_KEY
+    return load_model([model_key])
 # Gradio 인터페이스
 with gr.Blocks(fill_height=True, title="ThinkFlow - Step-by-step Reasoning Service") as demo:
             # 모델 로드 버튼
             load_model_btn = gr.Button("모델 로드", variant="primary")
+            model_status = gr.Textbox(label="모델 상태", interactive=False, value="시작 시 작은 모델을 자동으로 로드합니다...")
             # 메모리 정리 버튼
             clear_memory_btn = gr.Button("GPU 메모리 정리", variant="secondary")
                 do_sample = gr.Checkbox(True, label="샘플링 사용")
                 temperature = gr.Slider(0.1, 1.0, 0.7, step=0.1, label="온도")
+    # 시작 시 자동으로 모델 로드 - 이제 동기적으로 처리
+    demo.load(load_default_model, [], [model_status])
     # 선택된 모델 로드 이벤트 연결
     def get_model_names(selected_model):