openfree commited on
Commit
1399380
ยท
verified ยท
1 Parent(s): dd08b0c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -45
app.py CHANGED
@@ -141,7 +141,7 @@ def rebuild_messages(history: list):
141
  messages.append({"role": h.role, "content": h.content})
142
  return messages
143
 
144
- def load_model(model_names, status_callback=None):
145
  """์„ ํƒ๋œ ๋ชจ๋ธ ์ด๋ฆ„์— ๋”ฐ๋ผ ๋ชจ๋ธ ๋กœ๋“œ (A100์— ์ตœ์ ํ™”๋œ ์„ค์ • ์‚ฌ์šฉ)"""
146
  global pipe, current_model_name, loading_in_progress
147
 
@@ -150,6 +150,7 @@ def load_model(model_names, status_callback=None):
150
  return "๋‹ค๋ฅธ ๋ชจ๋ธ์ด ์ด๋ฏธ ๋กœ๋“œ ์ค‘์ž…๋‹ˆ๋‹ค. ์ž ์‹œ ๊ธฐ๋‹ค๋ ค์ฃผ์„ธ์š”."
151
 
152
  loading_in_progress = True
 
153
 
154
  try:
155
  # ๊ธฐ์กด ๋ชจ๋ธ ์ •๋ฆฌ
@@ -167,8 +168,7 @@ def load_model(model_names, status_callback=None):
167
  config = MODEL_CONFIG[size_category]
168
 
169
  # ๋กœ๋”ฉ ์ƒํƒœ ์—…๋ฐ์ดํŠธ
170
- if status_callback:
171
- status_callback(f"๋ชจ๋ธ '{model_name}' ๋กœ๋“œ ์ค‘... (ํฌ๊ธฐ: {size_category})")
172
 
173
  # ๋ชจ๋ธ ๋กœ๋“œ (ํฌ๊ธฐ์— ๋”ฐ๋ผ ์ตœ์ ํ™”๋œ ์„ค์ • ์ ์šฉ)
174
  # HF_TOKEN ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ํ™•์ธ
@@ -185,8 +185,7 @@ def load_model(model_names, status_callback=None):
185
  has_bitsandbytes = True
186
  except ImportError:
187
  has_bitsandbytes = False
188
- if status_callback:
189
- status_callback(f"BitsAndBytes ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. ์–‘์žํ™” ์—†์ด ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค.")
190
 
191
  # ์‹œ๊ฐ„ ์ œํ•œ ์„ค์ • (๋ชจ๋ธ ํฌ๊ธฐ์— ๋”ฐ๋ผ ๋‹ค๋ฅด๊ฒŒ)
192
  if size_category == "small":
@@ -208,8 +207,7 @@ def load_model(model_names, status_callback=None):
208
  bnb_4bit_compute_dtype=DTYPE
209
  )
210
 
211
- if status_callback:
212
- status_callback(f"๋ชจ๋ธ '{model_name}' ๋กœ๋“œ ์ค‘... (์–‘์žํ™” ์ ์šฉ)")
213
 
214
  model = AutoModelForCausalLM.from_pretrained(
215
  model_name,
@@ -231,8 +229,7 @@ def load_model(model_names, status_callback=None):
231
  )
232
  else:
233
  # ์–‘์žํ™” ์—†์ด ๋กœ๋“œ
234
- if status_callback:
235
- status_callback(f"๋ชจ๋ธ '{model_name}' ๋กœ๋“œ ์ค‘... (ํ‘œ์ค€ ๋ฐฉ์‹)")
236
 
237
  pipe = pipeline(
238
  "text-generation",
@@ -255,7 +252,11 @@ def load_model(model_names, status_callback=None):
255
 
256
  except Exception as e:
257
  loading_in_progress = False
258
- return f"๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ: {str(e)}"
 
 
 
 
259
 
260
  @spaces.GPU
261
  def bot(
@@ -426,34 +427,10 @@ def get_gpu_info():
426
 
427
  return "\n".join(gpu_info)
428
 
429
- # ์ž๋™ ๋ชจ๋ธ ๋กœ๋“œ ํ•จ์ˆ˜ (์ƒํƒœ ์—…๋ฐ์ดํŠธ ํฌํ•จ)
430
- def auto_load_model():
431
- # ์ฒซ ๋ฒˆ์งธ ๋ชจ๋ธ ์ž๋™ ๋กœ๋“œ
432
- model_key = DEFAULT_MODEL_KEY
433
- try:
434
- # ์ง„ํ–‰ ์ƒํƒœ ํ‘œ์‹œ๋ฅผ ์œ„ํ•œ ๋นˆ ๊ฒฐ๊ณผ ๋ฐ˜ํ™˜
435
- return "์ž‘์€ ๋ชจ๋ธ ์ž๋™ ๋กœ๋“œ ์ค‘... ์ž ์‹œ ๊ธฐ๋‹ค๋ ค์ฃผ์„ธ์š”."
436
- except Exception as e:
437
- return f"์ž๋™ ๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ: {str(e)}"
438
-
439
- # ์‹ค์ œ ๋ชจ๋ธ ๋กœ๋“œ ํ•จ์ˆ˜ (๋น„๋™๊ธฐ)
440
- def load_model_async(model_status):
441
- # ๋น„๋™๊ธฐ ํ•จ์ˆ˜๋กœ ๋ชจ๋ธ ๋กœ๋“œ (์‹ค์ œ ๋กœ๋“œ๋Š” ๋ฐฑ๊ทธ๋ผ์šด๋“œ์—์„œ ์ˆ˜ํ–‰)
442
  model_key = DEFAULT_MODEL_KEY
443
-
444
- def update_status(status):
445
- model_status.update(value=status)
446
-
447
- # ๋ณ„๋„ ์Šค๋ ˆ๋“œ์—์„œ ๋กœ๋“œ
448
- def load_in_thread():
449
- try:
450
- result = load_model([model_key], update_status)
451
- model_status.update(value=result)
452
- except Exception as e:
453
- model_status.update(value=f"๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ: {str(e)}")
454
-
455
- threading.Thread(target=load_in_thread, daemon=True).start()
456
- return "๋ชจ๋ธ ๋กœ๋“œ ์ค€๋น„ ์ค‘... ์ž๋™์œผ๋กœ ์ง„ํ–‰๋ฉ๋‹ˆ๋‹ค."
457
 
458
  # Gradio ์ธํ„ฐํŽ˜์ด์Šค
459
  with gr.Blocks(fill_height=True, title="ThinkFlow - Step-by-step Reasoning Service") as demo:
@@ -494,7 +471,7 @@ with gr.Blocks(fill_height=True, title="ThinkFlow - Step-by-step Reasoning Servi
494
 
495
  # ๋ชจ๋ธ ๋กœ๋“œ ๋ฒ„ํŠผ
496
  load_model_btn = gr.Button("๋ชจ๋ธ ๋กœ๋“œ", variant="primary")
497
- model_status = gr.Textbox(label="๋ชจ๋ธ ์ƒํƒœ", interactive=False)
498
 
499
  # ๋ฉ”๋ชจ๋ฆฌ ์ •๋ฆฌ ๋ฒ„ํŠผ
500
  clear_memory_btn = gr.Button("GPU ๋ฉ”๋ชจ๋ฆฌ ์ •๋ฆฌ", variant="secondary")
@@ -520,13 +497,8 @@ with gr.Blocks(fill_height=True, title="ThinkFlow - Step-by-step Reasoning Servi
520
  do_sample = gr.Checkbox(True, label="์ƒ˜ํ”Œ๋ง ์‚ฌ์šฉ")
521
  temperature = gr.Slider(0.1, 1.0, 0.7, step=0.1, label="์˜จ๋„")
522
 
523
- # ์‹œ์ž‘ ์‹œ ์ž๋™์œผ๋กœ ์ดˆ๊ธฐํ™”
524
- demo.load(auto_load_model, [], [model_status])
525
-
526
- # ์‹œ์ž‘ ํ›„ ๋น„๋™๊ธฐ์ ์œผ๋กœ ๋ชจ๋ธ ๋กœ๋“œ (์ดˆ๊ธฐ ํ™”๋ฉด ํ‘œ์‹œ ์ง€์—ฐ ๋ฐฉ์ง€)
527
- # ์˜ค๋ฅ˜ ์ฝ”๋“œ: demo.load(lambda x: load_model_async(x), [model_status], [], _js="() => {}")
528
- # ์ˆ˜์ •๋œ ์ฝ”๋“œ: Gradio ๋ฒ„์ „ ํ˜ธํ™˜์„ฑ์„ ์œ„ํ•ด _js ํŒŒ๋ผ๋ฏธํ„ฐ ์ œ๊ฑฐ
529
- demo.load(lambda: load_model_async(model_status), [], [])
530
 
531
  # ์„ ํƒ๋œ ๋ชจ๋ธ ๋กœ๋“œ ์ด๋ฒคํŠธ ์—ฐ๊ฒฐ
532
  def get_model_names(selected_model):
 
141
  messages.append({"role": h.role, "content": h.content})
142
  return messages
143
 
144
+ def load_model(model_names):
145
  """์„ ํƒ๋œ ๋ชจ๋ธ ์ด๋ฆ„์— ๋”ฐ๋ผ ๋ชจ๋ธ ๋กœ๋“œ (A100์— ์ตœ์ ํ™”๋œ ์„ค์ • ์‚ฌ์šฉ)"""
146
  global pipe, current_model_name, loading_in_progress
147
 
 
150
  return "๋‹ค๋ฅธ ๋ชจ๋ธ์ด ์ด๋ฏธ ๋กœ๋“œ ์ค‘์ž…๋‹ˆ๋‹ค. ์ž ์‹œ ๊ธฐ๋‹ค๋ ค์ฃผ์„ธ์š”."
151
 
152
  loading_in_progress = True
153
+ status_messages = []
154
 
155
  try:
156
  # ๊ธฐ์กด ๋ชจ๋ธ ์ •๋ฆฌ
 
168
  config = MODEL_CONFIG[size_category]
169
 
170
  # ๋กœ๋”ฉ ์ƒํƒœ ์—…๋ฐ์ดํŠธ
171
+ status_messages.append(f"๋ชจ๋ธ '{model_name}' ๋กœ๋“œ ์ค‘... (ํฌ๊ธฐ: {size_category})")
 
172
 
173
  # ๋ชจ๋ธ ๋กœ๋“œ (ํฌ๊ธฐ์— ๋”ฐ๋ผ ์ตœ์ ํ™”๋œ ์„ค์ • ์ ์šฉ)
174
  # HF_TOKEN ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ํ™•์ธ
 
185
  has_bitsandbytes = True
186
  except ImportError:
187
  has_bitsandbytes = False
188
+ status_messages.append("BitsAndBytes ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. ์–‘์žํ™” ์—†์ด ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค.")
 
189
 
190
  # ์‹œ๊ฐ„ ์ œํ•œ ์„ค์ • (๋ชจ๋ธ ํฌ๊ธฐ์— ๋”ฐ๋ผ ๋‹ค๋ฅด๊ฒŒ)
191
  if size_category == "small":
 
207
  bnb_4bit_compute_dtype=DTYPE
208
  )
209
 
210
+ status_messages.append(f"๋ชจ๋ธ '{model_name}' ๋กœ๋“œ ์ค‘... (์–‘์žํ™” ์ ์šฉ)")
 
211
 
212
  model = AutoModelForCausalLM.from_pretrained(
213
  model_name,
 
229
  )
230
  else:
231
  # ์–‘์žํ™” ์—†์ด ๋กœ๋“œ
232
+ status_messages.append(f"๋ชจ๋ธ '{model_name}' ๋กœ๋“œ ์ค‘... (ํ‘œ์ค€ ๋ฐฉ์‹)")
 
233
 
234
  pipe = pipeline(
235
  "text-generation",
 
252
 
253
  except Exception as e:
254
  loading_in_progress = False
255
+ error_msg = f"๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ: {str(e)}"
256
+ print(f"์˜ค๋ฅ˜: {error_msg}")
257
+ return error_msg
258
+ finally:
259
+ loading_in_progress = False
260
 
261
  @spaces.GPU
262
  def bot(
 
427
 
428
  return "\n".join(gpu_info)
429
 
430
+ # ๋น„๋™๊ธฐ ๋Œ€์‹  ๋™๊ธฐ ๋ฐฉ์‹์œผ๋กœ ๋ชจ๋ธ ์ž๋™ ๋กœ๋“œ (๊ฐ„์†Œํ™”)
431
+ def load_default_model():
 
 
 
 
 
 
 
 
 
 
 
432
  model_key = DEFAULT_MODEL_KEY
433
+ return load_model([model_key])
 
 
 
 
 
 
 
 
 
 
 
 
 
434
 
435
  # Gradio ์ธํ„ฐํŽ˜์ด์Šค
436
  with gr.Blocks(fill_height=True, title="ThinkFlow - Step-by-step Reasoning Service") as demo:
 
471
 
472
  # ๋ชจ๋ธ ๋กœ๋“œ ๋ฒ„ํŠผ
473
  load_model_btn = gr.Button("๋ชจ๋ธ ๋กœ๋“œ", variant="primary")
474
+ model_status = gr.Textbox(label="๋ชจ๋ธ ์ƒํƒœ", interactive=False, value="์‹œ์ž‘ ์‹œ ์ž‘์€ ๋ชจ๋ธ์„ ์ž๋™์œผ๋กœ ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค...")
475
 
476
  # ๋ฉ”๋ชจ๋ฆฌ ์ •๋ฆฌ ๋ฒ„ํŠผ
477
  clear_memory_btn = gr.Button("GPU ๋ฉ”๋ชจ๋ฆฌ ์ •๋ฆฌ", variant="secondary")
 
497
  do_sample = gr.Checkbox(True, label="์ƒ˜ํ”Œ๋ง ์‚ฌ์šฉ")
498
  temperature = gr.Slider(0.1, 1.0, 0.7, step=0.1, label="์˜จ๋„")
499
 
500
+ # ์‹œ์ž‘ ์‹œ ์ž๋™์œผ๋กœ ๋ชจ๋ธ ๋กœ๋“œ - ์ด์ œ ๋™๊ธฐ์ ์œผ๋กœ ์ฒ˜๋ฆฌ
501
+ demo.load(load_default_model, [], [model_status])
 
 
 
 
 
502
 
503
  # ์„ ํƒ๋œ ๋ชจ๋ธ ๋กœ๋“œ ์ด๋ฒคํŠธ ์—ฐ๊ฒฐ
504
  def get_model_names(selected_model):