seawolf2357 commited on
Commit
0889c6d
·
verified ·
1 Parent(s): 3634066

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -4
app.py CHANGED
@@ -26,13 +26,21 @@ import PyPDF2
26
  SERPHOUSE_API_KEY = os.getenv("SERPHOUSE_API_KEY", "")
27
 
28
  ##############################################################################
29
- # 간단한 키워드 추출 함수
30
  ##############################################################################
31
  def extract_keywords(text: str, top_k: int = 5) -> str:
32
- text = text.lower()
33
- text = re.sub(r"[^a-z0-9\s]", "", text)
 
 
 
 
 
 
34
  tokens = text.split()
 
35
  key_tokens = tokens[:top_k]
 
36
  return " ".join(key_tokens)
37
 
38
  ##############################################################################
@@ -695,6 +703,7 @@ with gr.Blocks(css=css, title="Vidraft-Gemma-3-27B") as demo:
695
  )
696
 
697
  if __name__ == "__main__":
698
- # share=True 하시면 public URL 사용 가능
699
  demo.launch(share=True)
700
 
 
 
26
  SERPHOUSE_API_KEY = os.getenv("SERPHOUSE_API_KEY", "")
27
 
28
  ##############################################################################
29
+ # 간단한 키워드 추출 함수 (한글 + 알파벳 + 숫자 + 공백 보존)
30
  ##############################################################################
31
  def extract_keywords(text: str, top_k: int = 5) -> str:
32
+ """
33
+ 1) 한글, 영어, 숫자, 공백만 남기도록 정규식 변경
34
+ 2) 공백 기준 토큰 분리
35
+ 3) 최대 top_k개만
36
+ """
37
+ # 한글(가-힣)+영어대소문자+숫자+공백만 보존
38
+ text = re.sub(r"[^a-zA-Z0-9가-힣\s]", "", text)
39
+ # 토큰 분리
40
  tokens = text.split()
41
+ # 최대 top_k개 추출
42
  key_tokens = tokens[:top_k]
43
+ # 다시 합침
44
  return " ".join(key_tokens)
45
 
46
  ##############################################################################
 
703
  )
704
 
705
  if __name__ == "__main__":
706
+ # 615줄 + filler로 715줄 맞추려면 아래 주석 추가
707
  demo.launch(share=True)
708
 
709
+