openfree commited on
Commit
4dd6e62
ยท
verified ยท
1 Parent(s): f2639d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -29
app.py CHANGED
@@ -706,12 +706,13 @@ def stream_chat(
706
 
707
  print(f"์ž…๋ ฅ ํ…์„œ ์ƒ์„ฑ ํ›„ CUDA ๋ฉ”๋ชจ๋ฆฌ: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")
708
 
709
- # ์ŠคํŠธ๋ฆฌ๋จธ ์„ค์ •
 
 
710
  streamer = TextIteratorStreamer(
711
- tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True
712
  )
713
-
714
- # ์ƒ์„ฑ ๋งค๊ฐœ๋ณ€์ˆ˜ ์„ค์ •
715
  generate_kwargs = dict(
716
  **inputs,
717
  streamer=streamer,
@@ -721,9 +722,13 @@ def stream_chat(
721
  max_new_tokens=max_new_tokens,
722
  do_sample=True,
723
  temperature=temperature,
724
- eos_token_id=tokenizer.eos_token_id, # ๋ช…์‹œ์  EOS ํ† ํฐ ์ง€์ •
 
 
725
  )
726
 
 
 
727
  # ๋ฉ”๋ชจ๋ฆฌ ์ •๋ฆฌ
728
  clear_cuda_memory()
729
 
@@ -731,35 +736,26 @@ def stream_chat(
731
  thread = Thread(target=model.generate, kwargs=generate_kwargs)
732
  thread.start()
733
 
734
- # ์‘๋‹ต ์ŠคํŠธ๋ฆฌ๋ฐ
735
  buffer = ""
736
- partial_message = ""
737
- last_yield_time = time.time()
738
-
739
  try:
740
  for new_text in streamer:
741
- buffer += new_text
742
- partial_message += new_text
743
-
744
- # ์ผ์ • ์‹œ๊ฐ„๋งˆ๋‹ค ๋˜๋Š” ํ…์ŠคํŠธ๊ฐ€ ์Œ“์ผ ๋•Œ๋งˆ๋‹ค ๊ฒฐ๊ณผ ์—…๋ฐ์ดํŠธ
745
- current_time = time.time()
746
- if current_time - last_yield_time > 0.1 or len(partial_message) > 20:
747
  yield "", history + [[message, buffer]]
748
- partial_message = ""
749
- last_yield_time = current_time
750
-
751
- # ๋งˆ์ง€๋ง‰ ์‘๋‹ต ํ™•์ธ
752
- if buffer:
753
- yield "", history + [[message, buffer]]
754
-
755
- # ๋Œ€ํ™” ๊ธฐ๋ก์— ์ €์žฅ
756
- chat_history.add_conversation(message, buffer)
757
-
758
- except Exception as e:
759
- print(f"์ŠคํŠธ๋ฆฌ๋ฐ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
760
- if not buffer: # ๋ฒ„ํผ๊ฐ€ ๋น„์–ด์žˆ์œผ๋ฉด ์˜ค๋ฅ˜ ๋ฉ”์‹œ์ง€ ํ‘œ์‹œ
761
- buffer = f"์‘๋‹ต ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
762
  yield "", history + [[message, buffer]]
 
 
 
 
 
763
 
764
  # ์Šค๋ ˆ๋“œ๊ฐ€ ์—ฌ์ „ํžˆ ์‹คํ–‰ ์ค‘์ด๋ฉด ์ข…๋ฃŒ ๋Œ€๊ธฐ
765
  if thread.is_alive():
 
706
 
707
  print(f"์ž…๋ ฅ ํ…์„œ ์ƒ์„ฑ ํ›„ CUDA ๋ฉ”๋ชจ๋ฆฌ: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")
708
 
709
+
710
+ try:
711
+ # ์ŠคํŠธ๋ฆฌ๋จธ ์ดˆ๊ธฐํ™” ์‹œ ํƒ€์ž„์•„์›ƒ์„ ๋” ๊ธธ๊ฒŒ ์„ค์ •
712
  streamer = TextIteratorStreamer(
713
+ tokenizer, timeout=30.0, skip_prompt=True, skip_special_tokens=True
714
  )
715
+
 
716
  generate_kwargs = dict(
717
  **inputs,
718
  streamer=streamer,
 
722
  max_new_tokens=max_new_tokens,
723
  do_sample=True,
724
  temperature=temperature,
725
+ pad_token_id=tokenizer.pad_token_id if tokenizer.pad_token_id else tokenizer.eos_token_id,
726
+ eos_token_id=tokenizer.eos_token_id,
727
+ use_cache=True
728
  )
729
 
730
+
731
+
732
  # ๋ฉ”๋ชจ๋ฆฌ ์ •๋ฆฌ
733
  clear_cuda_memory()
734
 
 
736
  thread = Thread(target=model.generate, kwargs=generate_kwargs)
737
  thread.start()
738
 
739
+ # ์ŠคํŠธ๋ฆฌ๋ฐ ์ฒ˜๋ฆฌ ์ค‘ ์˜ˆ์™ธ ์ฒ˜๋ฆฌ ๊ฐ•ํ™”
740
  buffer = ""
 
 
 
741
  try:
742
  for new_text in streamer:
743
+ try:
744
+ buffer += new_text
 
 
 
 
745
  yield "", history + [[message, buffer]]
746
+ except Exception as inner_e:
747
+ print(f"๊ฐœ๋ณ„ ํ† ํฐ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜: {str(inner_e)}")
748
+ continue
749
+ except Exception as stream_e:
750
+ print(f"์ŠคํŠธ๋ฆฌ๋ฐ ์ „์ฒด ์˜ค๋ฅ˜: {str(stream_e)}")
751
+ if not buffer:
752
+ buffer = "์‘๋‹ต ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค."
 
 
 
 
 
 
 
753
  yield "", history + [[message, buffer]]
754
+ except Exception as outer_e:
755
+ print(f"์ „์ฒด ์ƒ์„ฑ ๊ณผ์ • ์˜ค๋ฅ˜: {str(outer_e)}")
756
+ yield "", history + [[message, "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค, ์‘๋‹ต์„ ์ƒ์„ฑํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."]]
757
+
758
+ ]
759
 
760
  # ์Šค๋ ˆ๋“œ๊ฐ€ ์—ฌ์ „ํžˆ ์‹คํ–‰ ์ค‘์ด๋ฉด ์ข…๋ฃŒ ๋Œ€๊ธฐ
761
  if thread.is_alive():