AIRider commited on
Commit
5a7bb90
ยท
verified ยท
1 Parent(s): 80d51fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -36
app.py CHANGED
@@ -3,50 +3,53 @@ from bs4 import BeautifulSoup
3
  import gradio as gr
4
 
5
  def scrape_naver_blog(url):
6
- # ๋””๋ฒ„๊น…: URL ํ™•์ธ
7
- print(f"Scraping URL: {url}")
8
-
9
- # ๋ชจ๋ฐ”์ผ URL ํ˜•ํƒœ๋กœ ๋ณ€ํ™˜
10
- if not url.startswith("https://m.blog.naver.com"):
11
- url = url.replace("https://blog.naver.com", "https://m.blog.naver.com")
12
- print(f"Converted to mobile URL: {url}")
13
-
14
- # ์š”์ฒญ ๋ณด๋‚ด๊ธฐ
15
  try:
 
 
 
 
 
 
16
  response = requests.get(url)
17
  response.raise_for_status()
18
- except requests.RequestException as e:
19
- return f"Error while fetching the page: {e}"
20
-
21
- # BeautifulSoup์œผ๋กœ ํŒŒ์‹ฑ
22
- soup = BeautifulSoup(response.text, 'html.parser')
23
-
24
- # ์ œ๋ชฉ ์Šคํฌ๋ž˜ํ•‘
25
- try:
26
- title = soup.select_one('div.se-fs-.se-ff-').text.strip()
 
 
27
  print(f"Scraped Title: {title}")
28
- except AttributeError:
29
- title = "Title not found"
30
- print("Failed to scrape the title")
31
-
32
- # ๋‚ด์šฉ ์Šคํฌ๋ž˜ํ•‘
33
- try:
34
- content_elements = soup.select('div.se-component-content > div.se-text-paragraph > span')
35
- content = "\n".join([element.text.strip() for element in content_elements if element.text.strip()])
36
- print(f"Scraped Content: {content[:100]}...") # ๋‚ด์šฉ ์ผ๋ถ€ ์ถœ๋ ฅ
37
- except AttributeError:
38
- content = "Content not found"
39
- print("Failed to scrape the content")
40
-
41
- return f"์ œ๋ชฉ: {title}\n\n๋‚ด์šฉ: {content}"
 
 
 
 
 
42
 
43
- # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์„ค์ •
44
  interface = gr.Interface(
45
- fn=scrape_naver_blog,
46
- inputs=gr.Textbox(label="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ URL"),
47
  outputs=gr.Textbox(label="์Šคํฌ๋ž˜ํ•‘ ๊ฒฐ๊ณผ"),
48
  title="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ์Šคํฌ๋ž˜ํ•‘",
49
- description="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ์—์„œ ์ œ๋ชฉ๊ณผ ๋‚ด์šฉ์„ ์Šคํฌ๋ž˜ํ•‘ํ•ฉ๋‹ˆ๋‹ค. ๋ชจ๋ฐ”์ผ URL์„ ์ž…๋ ฅํ•˜์„ธ์š”."
50
  )
51
 
52
  if __name__ == "__main__":
 
3
  import gradio as gr
4
 
5
  def scrape_naver_blog(url):
 
 
 
 
 
 
 
 
 
6
  try:
7
+ # Debugging: URL ํ™•์ธ
8
+ print(f"Scraping URL: {url}")
9
+
10
+ if not url.startswith("https://m.blog.naver.com"):
11
+ raise ValueError("URL must be in the mobile format (https://m.blog.naver.com).")
12
+
13
  response = requests.get(url)
14
  response.raise_for_status()
15
+
16
+ # Debugging: HTTP ์‘๋‹ต ์ƒํƒœ ํ™•์ธ
17
+ print(f"Response Status Code: {response.status_code}")
18
+
19
+ soup = BeautifulSoup(response.text, 'html.parser')
20
+
21
+ # ์ œ๋ชฉ ์Šคํฌ๋ž˜ํ•‘
22
+ title_element = soup.find("div", class_="se-module se-module-text se-title-text")
23
+ title = title_element.get_text(strip=True) if title_element else "์ œ๋ชฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Œ"
24
+
25
+ # Debugging: ์ œ๋ชฉ ํ™•์ธ
26
  print(f"Scraped Title: {title}")
27
+
28
+ # ๋‚ด์šฉ ์Šคํฌ๋ž˜ํ•‘
29
+ content_elements = soup.find_all("div", class_="se-module se-module-text se-quote")
30
+ content = "\n".join(
31
+ elem.get_text(strip=True) for elem in content_elements
32
+ ) if content_elements else "๋‚ด์šฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Œ"
33
+
34
+ # Debugging: ๋‚ด์šฉ ํ™•์ธ
35
+ print(f"Scraped Content: {content}")
36
+
37
+ return f"์ œ๋ชฉ: {title}\n๋‚ด์šฉ: {content}"
38
+ except Exception as e:
39
+ # Debugging: ์˜ค๋ฅ˜ ๋ฉ”์‹œ์ง€ ์ถœ๋ ฅ
40
+ print(f"Error: {e}")
41
+ return f"Error: {e}"
42
+
43
+ # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ •์˜
44
+ def run_scraper(url):
45
+ return scrape_naver_blog(url)
46
 
 
47
  interface = gr.Interface(
48
+ fn=run_scraper,
49
+ inputs=gr.Textbox(label="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ URL (๋ชจ๋ฐ”์ผ ํ˜•์‹)"),
50
  outputs=gr.Textbox(label="์Šคํฌ๋ž˜ํ•‘ ๊ฒฐ๊ณผ"),
51
  title="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ์Šคํฌ๋ž˜ํ•‘",
52
+ description="๋ชจ๋ฐ”์ผ URL์„ ์ž…๋ ฅํ•˜๋ฉด ๋ธ”๋กœ๊ทธ์˜ ์ œ๋ชฉ๊ณผ ํ…์ŠคํŠธ ๋‚ด์šฉ์„ ์Šคํฌ๋ž˜ํ•‘ํ•ฉ๋‹ˆ๋‹ค."
53
  )
54
 
55
  if __name__ == "__main__":