Gemma-3-R1984-4B

Running on Zero

App Files Files Community

seawolf2357 commited on Mar 17

Commit

4bf30b7

verified ·

1 Parent(s): 0889c6d

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -23

app.py CHANGED Viewed

@@ -30,17 +30,13 @@ SERPHOUSE_API_KEY = os.getenv("SERPHOUSE_API_KEY", "")
 ##############################################################################
 def extract_keywords(text: str, top_k: int = 5) -> str:
     """
-    1) 한글, 영어, 숫자, 공백만 남기도록 정규식 변경
     2) 공백 기준 토큰 분리
     3) 최대 top_k개만
     """
-    # 한글(가-힣)+영어대소문자+숫자+공백만 보존
     text = re.sub(r"[^a-zA-Z0-9가-힣\s]", "", text)
-    # 토큰 분리
     tokens = text.split()
-    # 최대 top_k개 추출
     key_tokens = tokens[:top_k]
-    # 다시 합침
     return " ".join(key_tokens)
 ##############################################################################
@@ -74,7 +70,6 @@ def do_web_search(query: str) -> str:
         summary_lines = []
         for idx, item in enumerate(organic[:20], start=1):
-            # item 전체를 JSON 문자열로
             item_json = json.dumps(item, ensure_ascii=False, indent=2)
             summary_lines.append(f"Result {idx}:\n{item_json}\n")
@@ -89,6 +84,7 @@ def do_web_search(query: str) -> str:
 ##############################################################################
 MAX_CONTENT_CHARS = 4000
 model_id = os.getenv("MODEL_ID", "google/gemma-3-27b-it")
 processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
 model = Gemma3ForConditionalGeneration.from_pretrained(
     model_id,
@@ -390,47 +386,36 @@ def run(
         return
     try:
-        # (1) system 메시지를 하나로 합치기 위해, 미리 buffer
         combined_system_msg = ""
-        # 사용자가 system_prompt를 입력했다면
         if system_prompt.strip():
             combined_system_msg += f"[System Prompt]\n{system_prompt.strip()}\n\n"
-        # (2) 웹 검색 체크 시, 키워드 추출
         if use_web_search:
             user_text = message["text"]
             ws_query = extract_keywords(user_text, top_k=5)
-            # 만약 추출 키워드가 비어있으면 검색을 건너뜀
             if ws_query.strip():
                 logger.info(f"[Auto WebSearch Keyword] {ws_query!r}")
                 ws_result = do_web_search(ws_query)
-                # 검색 결과를 시스템 메시지 끝에 합침
                 combined_system_msg += f"[Search top-20 Full Items Based on user prompt]\n{ws_result}\n\n"
             else:
-                # 추출된 키워드가 없으면 굳이 검색 시도 안 함
                 combined_system_msg += "[No valid keywords found, skipping WebSearch]\n\n"
-        # (3) system 메시지가 최종적으로 비어 있지 않다면
         messages = []
         if combined_system_msg.strip():
-            # system 역할 메시지 하나 생성
             messages.append({
                 "role": "system",
                 "content": [{"type": "text", "text": combined_system_msg.strip()}],
             })
-        # (4) 이전 대화이력
         messages.extend(process_history(history))
-        # (5) 새 유저 메시지
         user_content = process_new_user_message(message)
         for item in user_content:
             if item["type"] == "text" and len(item["text"]) > MAX_CONTENT_CHARS:
                 item["text"] = item["text"][:MAX_CONTENT_CHARS] + "\n...(truncated)..."
         messages.append({"role": "user", "content": user_content})
-        # (6) LLM 입력 생성
         inputs = processor.apply_chat_template(
             messages,
             add_generation_prompt=True,
@@ -446,7 +431,7 @@ def run(
             max_new_tokens=max_new_tokens,
         )
-        t = Thread(target=model.generate, kwargs=gen_kwargs)
         t.start()
         output = ""
@@ -459,6 +444,22 @@ def run(
         yield f"죄송합니다. 오류가 발생했습니다: {str(e)}"
 ##############################################################################
 # 예시들 (한글화)
 ##############################################################################
@@ -658,7 +659,7 @@ with gr.Blocks(css=css, title="Vidraft-Gemma-3-27B") as demo:
                 minimum=100,
                 maximum=8000,
                 step=50,
-                value=2000,
             )
             gr.Markdown("<br><br>")
@@ -698,12 +699,12 @@ with gr.Blocks(css=css, title="Vidraft-Gemma-3-27B") as demo:
             gr.Markdown("### Example Inputs (click to load)")
             gr.Examples(
                 examples=examples,
-                inputs=[],  # 연결할 inputs가 없으므로 빈 리스트
                 cache_examples=False
             )
 if __name__ == "__main__":
-    # 615줄 + filler로 715줄 맞추려면 아래 주석 추가
-    demo.launch(share=True)

 ##############################################################################
 def extract_keywords(text: str, top_k: int = 5) -> str:
     """
+    1) 한글(가-힣), 영어(a-zA-Z), 숫자(0-9), 공백만 남김
     2) 공백 기준 토큰 분리
     3) 최대 top_k개만
     """
     text = re.sub(r"[^a-zA-Z0-9가-힣\s]", "", text)
     tokens = text.split()
     key_tokens = tokens[:top_k]
     return " ".join(key_tokens)
 ##############################################################################
         summary_lines = []
         for idx, item in enumerate(organic[:20], start=1):
             item_json = json.dumps(item, ensure_ascii=False, indent=2)
             summary_lines.append(f"Result {idx}:\n{item_json}\n")
 ##############################################################################
 MAX_CONTENT_CHARS = 4000
 model_id = os.getenv("MODEL_ID", "google/gemma-3-27b-it")
 processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
 model = Gemma3ForConditionalGeneration.from_pretrained(
     model_id,
         return
     try:
         combined_system_msg = ""
         if system_prompt.strip():
             combined_system_msg += f"[System Prompt]\n{system_prompt.strip()}\n\n"
         if use_web_search:
             user_text = message["text"]
             ws_query = extract_keywords(user_text, top_k=5)
             if ws_query.strip():
                 logger.info(f"[Auto WebSearch Keyword] {ws_query!r}")
                 ws_result = do_web_search(ws_query)
                 combined_system_msg += f"[Search top-20 Full Items Based on user prompt]\n{ws_result}\n\n"
             else:
                 combined_system_msg += "[No valid keywords found, skipping WebSearch]\n\n"
         messages = []
         if combined_system_msg.strip():
             messages.append({
                 "role": "system",
                 "content": [{"type": "text", "text": combined_system_msg.strip()}],
             })
         messages.extend(process_history(history))
         user_content = process_new_user_message(message)
         for item in user_content:
             if item["type"] == "text" and len(item["text"]) > MAX_CONTENT_CHARS:
                 item["text"] = item["text"][:MAX_CONTENT_CHARS] + "\n...(truncated)..."
         messages.append({"role": "user", "content": user_content})
         inputs = processor.apply_chat_template(
             messages,
             add_generation_prompt=True,
             max_new_tokens=max_new_tokens,
         )
+        t = Thread(target=_model_gen_with_oom_catch, kwargs=gen_kwargs)
         t.start()
         output = ""
         yield f"죄송합니다. 오류가 발생했습니다: {str(e)}"
+##############################################################################
+# [추가] 별도 함수에서 model.generate(...)를 호출, OOM 캐치
+##############################################################################
+def _model_gen_with_oom_catch(**kwargs):
+    """
+    별도 스레드에서 OutOfMemoryError를 잡아주기 위해
+    """
+    try:
+        model.generate(**kwargs)
+    except torch.cuda.OutOfMemoryError:
+        raise RuntimeError(
+            "[OutOfMemoryError] GPU 메모리가 부족합니다. "
+            "Max New Tokens을 줄이거나, 프롬프트 길이를 줄여주세요."
+        )
 ##############################################################################
 # 예시들 (한글화)
 ##############################################################################
                 minimum=100,
                 maximum=8000,
                 step=50,
+                value=512,  # GPU 메모리 절약 위해 기본값 약간 축소
             )
             gr.Markdown("<br><br>")
             gr.Markdown("### Example Inputs (click to load)")
             gr.Examples(
                 examples=examples,
+                inputs=[],
                 cache_examples=False
             )
 if __name__ == "__main__":
+    # share=True 시 HF Spaces에서 경고 발생 - 로컬에서만 동작
+    # demo.launch(share=True)
+    demo.launch()