Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -26,13 +26,21 @@ import PyPDF2
|
|
26 |
SERPHOUSE_API_KEY = os.getenv("SERPHOUSE_API_KEY", "")
|
27 |
|
28 |
##############################################################################
|
29 |
-
# 간단한 키워드 추출 함수
|
30 |
##############################################################################
|
31 |
def extract_keywords(text: str, top_k: int = 5) -> str:
|
32 |
-
|
33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
tokens = text.split()
|
|
|
35 |
key_tokens = tokens[:top_k]
|
|
|
36 |
return " ".join(key_tokens)
|
37 |
|
38 |
##############################################################################
|
@@ -695,6 +703,7 @@ with gr.Blocks(css=css, title="Vidraft-Gemma-3-27B") as demo:
|
|
695 |
)
|
696 |
|
697 |
if __name__ == "__main__":
|
698 |
-
#
|
699 |
demo.launch(share=True)
|
700 |
|
|
|
|
26 |
SERPHOUSE_API_KEY = os.getenv("SERPHOUSE_API_KEY", "")
|
27 |
|
28 |
##############################################################################
|
29 |
+
# 간단한 키워드 추출 함수 (한글 + 알파벳 + 숫자 + 공백 보존)
|
30 |
##############################################################################
|
31 |
def extract_keywords(text: str, top_k: int = 5) -> str:
|
32 |
+
"""
|
33 |
+
1) 한글, 영어, 숫자, 공백만 남기도록 정규식 변경
|
34 |
+
2) 공백 기준 토큰 분리
|
35 |
+
3) 최대 top_k개만
|
36 |
+
"""
|
37 |
+
# 한글(가-힣)+영어대소문자+숫자+공백만 보존
|
38 |
+
text = re.sub(r"[^a-zA-Z0-9가-힣\s]", "", text)
|
39 |
+
# 토큰 분리
|
40 |
tokens = text.split()
|
41 |
+
# 최대 top_k개 추출
|
42 |
key_tokens = tokens[:top_k]
|
43 |
+
# 다시 합침
|
44 |
return " ".join(key_tokens)
|
45 |
|
46 |
##############################################################################
|
|
|
703 |
)
|
704 |
|
705 |
if __name__ == "__main__":
|
706 |
+
# 615줄 + filler로 715줄 맞추려면 아래 주석 추가
|
707 |
demo.launch(share=True)
|
708 |
|
709 |
+
|