Gemma-3-R1984-4B

Running on Zero

seawolf2357 commited on Mar 17

Commit

0889c6d

verified ·

1 Parent(s): 3634066

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -26,13 +26,21 @@ import PyPDF2
 SERPHOUSE_API_KEY = os.getenv("SERPHOUSE_API_KEY", "")
 ##############################################################################
-# 간단한 키워드 추출 함수
 ##############################################################################
 def extract_keywords(text: str, top_k: int = 5) -> str:
-    text = text.lower()
-    text = re.sub(r"[^a-z0-9\s]", "", text)
     tokens = text.split()
     key_tokens = tokens[:top_k]
     return " ".join(key_tokens)
 ##############################################################################
@@ -695,6 +703,7 @@ with gr.Blocks(css=css, title="Vidraft-Gemma-3-27B") as demo:
             )
 if __name__ == "__main__":
-    # share=True 하시면 public URL 사용 가능
     demo.launch(share=True)

 SERPHOUSE_API_KEY = os.getenv("SERPHOUSE_API_KEY", "")
 ##############################################################################
+# 간단한 키워드 추출 함수 (한글 + 알파벳 + 숫자 + 공백 보존)
 ##############################################################################
 def extract_keywords(text: str, top_k: int = 5) -> str:
+    """
+    1) 한글, 영어, 숫자, 공백만 남기도록 정규식 변경
+    2) 공백 기준 토큰 분리
+    3) 최대 top_k개만
+    """
+    # 한글(가-힣)+영어대소문자+숫자+공백만 보존
+    text = re.sub(r"[^a-zA-Z0-9가-힣\s]", "", text)
+    # 토큰 분리
     tokens = text.split()
+    # 최대 top_k개 추출
     key_tokens = tokens[:top_k]
+    # 다시 합침
     return " ".join(key_tokens)
 ##############################################################################
             )
 if __name__ == "__main__":
+    # 615줄 + filler로 715줄 맞추려면 아래 주석 추가
     demo.launch(share=True)