Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -141,7 +141,7 @@ def rebuild_messages(history: list):
|
|
141 |
messages.append({"role": h.role, "content": h.content})
|
142 |
return messages
|
143 |
|
144 |
-
def load_model(model_names
|
145 |
"""์ ํ๋ ๋ชจ๋ธ ์ด๋ฆ์ ๋ฐ๋ผ ๋ชจ๋ธ ๋ก๋ (A100์ ์ต์ ํ๋ ์ค์ ์ฌ์ฉ)"""
|
146 |
global pipe, current_model_name, loading_in_progress
|
147 |
|
@@ -150,6 +150,7 @@ def load_model(model_names, status_callback=None):
|
|
150 |
return "๋ค๋ฅธ ๋ชจ๋ธ์ด ์ด๋ฏธ ๋ก๋ ์ค์
๋๋ค. ์ ์ ๊ธฐ๋ค๋ ค์ฃผ์ธ์."
|
151 |
|
152 |
loading_in_progress = True
|
|
|
153 |
|
154 |
try:
|
155 |
# ๊ธฐ์กด ๋ชจ๋ธ ์ ๋ฆฌ
|
@@ -167,8 +168,7 @@ def load_model(model_names, status_callback=None):
|
|
167 |
config = MODEL_CONFIG[size_category]
|
168 |
|
169 |
# ๋ก๋ฉ ์ํ ์
๋ฐ์ดํธ
|
170 |
-
|
171 |
-
status_callback(f"๋ชจ๋ธ '{model_name}' ๋ก๋ ์ค... (ํฌ๊ธฐ: {size_category})")
|
172 |
|
173 |
# ๋ชจ๋ธ ๋ก๋ (ํฌ๊ธฐ์ ๋ฐ๋ผ ์ต์ ํ๋ ์ค์ ์ ์ฉ)
|
174 |
# HF_TOKEN ํ๊ฒฝ ๋ณ์ ํ์ธ
|
@@ -185,8 +185,7 @@ def load_model(model_names, status_callback=None):
|
|
185 |
has_bitsandbytes = True
|
186 |
except ImportError:
|
187 |
has_bitsandbytes = False
|
188 |
-
|
189 |
-
status_callback(f"BitsAndBytes ๋ผ์ด๋ธ๋ฌ๋ฆฌ๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค. ์์ํ ์์ด ๋ก๋ํฉ๋๋ค.")
|
190 |
|
191 |
# ์๊ฐ ์ ํ ์ค์ (๋ชจ๋ธ ํฌ๊ธฐ์ ๋ฐ๋ผ ๋ค๋ฅด๊ฒ)
|
192 |
if size_category == "small":
|
@@ -208,8 +207,7 @@ def load_model(model_names, status_callback=None):
|
|
208 |
bnb_4bit_compute_dtype=DTYPE
|
209 |
)
|
210 |
|
211 |
-
|
212 |
-
status_callback(f"๋ชจ๋ธ '{model_name}' ๋ก๋ ์ค... (์์ํ ์ ์ฉ)")
|
213 |
|
214 |
model = AutoModelForCausalLM.from_pretrained(
|
215 |
model_name,
|
@@ -231,8 +229,7 @@ def load_model(model_names, status_callback=None):
|
|
231 |
)
|
232 |
else:
|
233 |
# ์์ํ ์์ด ๋ก๋
|
234 |
-
|
235 |
-
status_callback(f"๋ชจ๋ธ '{model_name}' ๋ก๋ ์ค... (ํ์ค ๋ฐฉ์)")
|
236 |
|
237 |
pipe = pipeline(
|
238 |
"text-generation",
|
@@ -255,7 +252,11 @@ def load_model(model_names, status_callback=None):
|
|
255 |
|
256 |
except Exception as e:
|
257 |
loading_in_progress = False
|
258 |
-
|
|
|
|
|
|
|
|
|
259 |
|
260 |
@spaces.GPU
|
261 |
def bot(
|
@@ -426,34 +427,10 @@ def get_gpu_info():
|
|
426 |
|
427 |
return "\n".join(gpu_info)
|
428 |
|
429 |
-
#
|
430 |
-
def
|
431 |
-
# ์ฒซ ๋ฒ์งธ ๋ชจ๋ธ ์๋ ๋ก๋
|
432 |
-
model_key = DEFAULT_MODEL_KEY
|
433 |
-
try:
|
434 |
-
# ์งํ ์ํ ํ์๋ฅผ ์ํ ๋น ๊ฒฐ๊ณผ ๋ฐํ
|
435 |
-
return "์์ ๋ชจ๋ธ ์๋ ๋ก๋ ์ค... ์ ์ ๊ธฐ๋ค๋ ค์ฃผ์ธ์."
|
436 |
-
except Exception as e:
|
437 |
-
return f"์๋ ๋ชจ๋ธ ๋ก๋ ์คํจ: {str(e)}"
|
438 |
-
|
439 |
-
# ์ค์ ๋ชจ๋ธ ๋ก๋ ํจ์ (๋น๋๊ธฐ)
|
440 |
-
def load_model_async(model_status):
|
441 |
-
# ๋น๋๊ธฐ ํจ์๋ก ๋ชจ๋ธ ๋ก๋ (์ค์ ๋ก๋๋ ๋ฐฑ๊ทธ๋ผ์ด๋์์ ์ํ)
|
442 |
model_key = DEFAULT_MODEL_KEY
|
443 |
-
|
444 |
-
def update_status(status):
|
445 |
-
model_status.update(value=status)
|
446 |
-
|
447 |
-
# ๋ณ๋ ์ค๋ ๋์์ ๋ก๋
|
448 |
-
def load_in_thread():
|
449 |
-
try:
|
450 |
-
result = load_model([model_key], update_status)
|
451 |
-
model_status.update(value=result)
|
452 |
-
except Exception as e:
|
453 |
-
model_status.update(value=f"๋ชจ๋ธ ๋ก๋ ์คํจ: {str(e)}")
|
454 |
-
|
455 |
-
threading.Thread(target=load_in_thread, daemon=True).start()
|
456 |
-
return "๋ชจ๋ธ ๋ก๋ ์ค๋น ์ค... ์๋์ผ๋ก ์งํ๋ฉ๋๋ค."
|
457 |
|
458 |
# Gradio ์ธํฐํ์ด์ค
|
459 |
with gr.Blocks(fill_height=True, title="ThinkFlow - Step-by-step Reasoning Service") as demo:
|
@@ -494,7 +471,7 @@ with gr.Blocks(fill_height=True, title="ThinkFlow - Step-by-step Reasoning Servi
|
|
494 |
|
495 |
# ๋ชจ๋ธ ๋ก๋ ๋ฒํผ
|
496 |
load_model_btn = gr.Button("๋ชจ๋ธ ๋ก๋", variant="primary")
|
497 |
-
model_status = gr.Textbox(label="๋ชจ๋ธ ์ํ", interactive=False)
|
498 |
|
499 |
# ๋ฉ๋ชจ๋ฆฌ ์ ๋ฆฌ ๋ฒํผ
|
500 |
clear_memory_btn = gr.Button("GPU ๋ฉ๋ชจ๋ฆฌ ์ ๋ฆฌ", variant="secondary")
|
@@ -520,13 +497,8 @@ with gr.Blocks(fill_height=True, title="ThinkFlow - Step-by-step Reasoning Servi
|
|
520 |
do_sample = gr.Checkbox(True, label="์ํ๋ง ์ฌ์ฉ")
|
521 |
temperature = gr.Slider(0.1, 1.0, 0.7, step=0.1, label="์จ๋")
|
522 |
|
523 |
-
# ์์ ์ ์๋์ผ๋ก
|
524 |
-
demo.load(
|
525 |
-
|
526 |
-
# ์์ ํ ๋น๋๊ธฐ์ ์ผ๋ก ๋ชจ๋ธ ๋ก๋ (์ด๊ธฐ ํ๋ฉด ํ์ ์ง์ฐ ๋ฐฉ์ง)
|
527 |
-
# ์ค๋ฅ ์ฝ๋: demo.load(lambda x: load_model_async(x), [model_status], [], _js="() => {}")
|
528 |
-
# ์์ ๋ ์ฝ๋: Gradio ๋ฒ์ ํธํ์ฑ์ ์ํด _js ํ๋ผ๋ฏธํฐ ์ ๊ฑฐ
|
529 |
-
demo.load(lambda: load_model_async(model_status), [], [])
|
530 |
|
531 |
# ์ ํ๋ ๋ชจ๋ธ ๋ก๋ ์ด๋ฒคํธ ์ฐ๊ฒฐ
|
532 |
def get_model_names(selected_model):
|
|
|
141 |
messages.append({"role": h.role, "content": h.content})
|
142 |
return messages
|
143 |
|
144 |
+
def load_model(model_names):
|
145 |
"""์ ํ๋ ๋ชจ๋ธ ์ด๋ฆ์ ๋ฐ๋ผ ๋ชจ๋ธ ๋ก๋ (A100์ ์ต์ ํ๋ ์ค์ ์ฌ์ฉ)"""
|
146 |
global pipe, current_model_name, loading_in_progress
|
147 |
|
|
|
150 |
return "๋ค๋ฅธ ๋ชจ๋ธ์ด ์ด๋ฏธ ๋ก๋ ์ค์
๋๋ค. ์ ์ ๊ธฐ๋ค๋ ค์ฃผ์ธ์."
|
151 |
|
152 |
loading_in_progress = True
|
153 |
+
status_messages = []
|
154 |
|
155 |
try:
|
156 |
# ๊ธฐ์กด ๋ชจ๋ธ ์ ๋ฆฌ
|
|
|
168 |
config = MODEL_CONFIG[size_category]
|
169 |
|
170 |
# ๋ก๋ฉ ์ํ ์
๋ฐ์ดํธ
|
171 |
+
status_messages.append(f"๋ชจ๋ธ '{model_name}' ๋ก๋ ์ค... (ํฌ๊ธฐ: {size_category})")
|
|
|
172 |
|
173 |
# ๋ชจ๋ธ ๋ก๋ (ํฌ๊ธฐ์ ๋ฐ๋ผ ์ต์ ํ๋ ์ค์ ์ ์ฉ)
|
174 |
# HF_TOKEN ํ๊ฒฝ ๋ณ์ ํ์ธ
|
|
|
185 |
has_bitsandbytes = True
|
186 |
except ImportError:
|
187 |
has_bitsandbytes = False
|
188 |
+
status_messages.append("BitsAndBytes ๋ผ์ด๋ธ๋ฌ๋ฆฌ๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค. ์์ํ ์์ด ๋ก๋ํฉ๋๋ค.")
|
|
|
189 |
|
190 |
# ์๊ฐ ์ ํ ์ค์ (๋ชจ๋ธ ํฌ๊ธฐ์ ๋ฐ๋ผ ๋ค๋ฅด๊ฒ)
|
191 |
if size_category == "small":
|
|
|
207 |
bnb_4bit_compute_dtype=DTYPE
|
208 |
)
|
209 |
|
210 |
+
status_messages.append(f"๋ชจ๋ธ '{model_name}' ๋ก๋ ์ค... (์์ํ ์ ์ฉ)")
|
|
|
211 |
|
212 |
model = AutoModelForCausalLM.from_pretrained(
|
213 |
model_name,
|
|
|
229 |
)
|
230 |
else:
|
231 |
# ์์ํ ์์ด ๋ก๋
|
232 |
+
status_messages.append(f"๋ชจ๋ธ '{model_name}' ๋ก๋ ์ค... (ํ์ค ๋ฐฉ์)")
|
|
|
233 |
|
234 |
pipe = pipeline(
|
235 |
"text-generation",
|
|
|
252 |
|
253 |
except Exception as e:
|
254 |
loading_in_progress = False
|
255 |
+
error_msg = f"๋ชจ๋ธ ๋ก๋ ์คํจ: {str(e)}"
|
256 |
+
print(f"์ค๋ฅ: {error_msg}")
|
257 |
+
return error_msg
|
258 |
+
finally:
|
259 |
+
loading_in_progress = False
|
260 |
|
261 |
@spaces.GPU
|
262 |
def bot(
|
|
|
427 |
|
428 |
return "\n".join(gpu_info)
|
429 |
|
430 |
+
# ๋น๋๊ธฐ ๋์ ๋๊ธฐ ๋ฐฉ์์ผ๋ก ๋ชจ๋ธ ์๋ ๋ก๋ (๊ฐ์ํ)
|
431 |
+
def load_default_model():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
432 |
model_key = DEFAULT_MODEL_KEY
|
433 |
+
return load_model([model_key])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
434 |
|
435 |
# Gradio ์ธํฐํ์ด์ค
|
436 |
with gr.Blocks(fill_height=True, title="ThinkFlow - Step-by-step Reasoning Service") as demo:
|
|
|
471 |
|
472 |
# ๋ชจ๋ธ ๋ก๋ ๋ฒํผ
|
473 |
load_model_btn = gr.Button("๋ชจ๋ธ ๋ก๋", variant="primary")
|
474 |
+
model_status = gr.Textbox(label="๋ชจ๋ธ ์ํ", interactive=False, value="์์ ์ ์์ ๋ชจ๋ธ์ ์๋์ผ๋ก ๋ก๋ํฉ๋๋ค...")
|
475 |
|
476 |
# ๋ฉ๋ชจ๋ฆฌ ์ ๋ฆฌ ๋ฒํผ
|
477 |
clear_memory_btn = gr.Button("GPU ๋ฉ๋ชจ๋ฆฌ ์ ๋ฆฌ", variant="secondary")
|
|
|
497 |
do_sample = gr.Checkbox(True, label="์ํ๋ง ์ฌ์ฉ")
|
498 |
temperature = gr.Slider(0.1, 1.0, 0.7, step=0.1, label="์จ๋")
|
499 |
|
500 |
+
# ์์ ์ ์๋์ผ๋ก ๋ชจ๋ธ ๋ก๋ - ์ด์ ๋๊ธฐ์ ์ผ๋ก ์ฒ๋ฆฌ
|
501 |
+
demo.load(load_default_model, [], [model_status])
|
|
|
|
|
|
|
|
|
|
|
502 |
|
503 |
# ์ ํ๋ ๋ชจ๋ธ ๋ก๋ ์ด๋ฒคํธ ์ฐ๊ฒฐ
|
504 |
def get_model_names(selected_model):
|