Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,50 +3,53 @@ from bs4 import BeautifulSoup
|
|
3 |
import gradio as gr
|
4 |
|
5 |
def scrape_naver_blog(url):
|
6 |
-
# ๋๋ฒ๊น
: URL ํ์ธ
|
7 |
-
print(f"Scraping URL: {url}")
|
8 |
-
|
9 |
-
# ๋ชจ๋ฐ์ผ URL ํํ๋ก ๋ณํ
|
10 |
-
if not url.startswith("https://m.blog.naver.com"):
|
11 |
-
url = url.replace("https://blog.naver.com", "https://m.blog.naver.com")
|
12 |
-
print(f"Converted to mobile URL: {url}")
|
13 |
-
|
14 |
-
# ์์ฒญ ๋ณด๋ด๊ธฐ
|
15 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
response = requests.get(url)
|
17 |
response.raise_for_status()
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
title =
|
|
|
|
|
27 |
print(f"Scraped Title: {title}")
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
print(f"Scraped Content: {content
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
-
# Gradio ์ธํฐํ์ด์ค ์ค์
|
44 |
interface = gr.Interface(
|
45 |
-
fn=
|
46 |
-
inputs=gr.Textbox(label="๋ค์ด๋ฒ ๋ธ๋ก๊ทธ URL"),
|
47 |
outputs=gr.Textbox(label="์คํฌ๋ํ ๊ฒฐ๊ณผ"),
|
48 |
title="๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ์คํฌ๋ํ",
|
49 |
-
description="
|
50 |
)
|
51 |
|
52 |
if __name__ == "__main__":
|
|
|
3 |
import gradio as gr
|
4 |
|
5 |
def scrape_naver_blog(url):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
try:
|
7 |
+
# Debugging: URL ํ์ธ
|
8 |
+
print(f"Scraping URL: {url}")
|
9 |
+
|
10 |
+
if not url.startswith("https://m.blog.naver.com"):
|
11 |
+
raise ValueError("URL must be in the mobile format (https://m.blog.naver.com).")
|
12 |
+
|
13 |
response = requests.get(url)
|
14 |
response.raise_for_status()
|
15 |
+
|
16 |
+
# Debugging: HTTP ์๋ต ์ํ ํ์ธ
|
17 |
+
print(f"Response Status Code: {response.status_code}")
|
18 |
+
|
19 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
20 |
+
|
21 |
+
# ์ ๋ชฉ ์คํฌ๋ํ
|
22 |
+
title_element = soup.find("div", class_="se-module se-module-text se-title-text")
|
23 |
+
title = title_element.get_text(strip=True) if title_element else "์ ๋ชฉ์ ์ฐพ์ ์ ์์"
|
24 |
+
|
25 |
+
# Debugging: ์ ๋ชฉ ํ์ธ
|
26 |
print(f"Scraped Title: {title}")
|
27 |
+
|
28 |
+
# ๋ด์ฉ ์คํฌ๋ํ
|
29 |
+
content_elements = soup.find_all("div", class_="se-module se-module-text se-quote")
|
30 |
+
content = "\n".join(
|
31 |
+
elem.get_text(strip=True) for elem in content_elements
|
32 |
+
) if content_elements else "๋ด์ฉ์ ์ฐพ์ ์ ์์"
|
33 |
+
|
34 |
+
# Debugging: ๋ด์ฉ ํ์ธ
|
35 |
+
print(f"Scraped Content: {content}")
|
36 |
+
|
37 |
+
return f"์ ๋ชฉ: {title}\n๋ด์ฉ: {content}"
|
38 |
+
except Exception as e:
|
39 |
+
# Debugging: ์ค๋ฅ ๋ฉ์์ง ์ถ๋ ฅ
|
40 |
+
print(f"Error: {e}")
|
41 |
+
return f"Error: {e}"
|
42 |
+
|
43 |
+
# Gradio ์ธํฐํ์ด์ค ์ ์
|
44 |
+
def run_scraper(url):
|
45 |
+
return scrape_naver_blog(url)
|
46 |
|
|
|
47 |
interface = gr.Interface(
|
48 |
+
fn=run_scraper,
|
49 |
+
inputs=gr.Textbox(label="๋ค์ด๋ฒ ๋ธ๋ก๊ทธ URL (๋ชจ๋ฐ์ผ ํ์)"),
|
50 |
outputs=gr.Textbox(label="์คํฌ๋ํ ๊ฒฐ๊ณผ"),
|
51 |
title="๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ์คํฌ๋ํ",
|
52 |
+
description="๋ชจ๋ฐ์ผ URL์ ์
๋ ฅํ๋ฉด ๋ธ๋ก๊ทธ์ ์ ๋ชฉ๊ณผ ํ
์คํธ ๋ด์ฉ์ ์คํฌ๋ํํฉ๋๋ค."
|
53 |
)
|
54 |
|
55 |
if __name__ == "__main__":
|