File size: 2,114 Bytes
4ecdb4b
 
80d51fc
4431f41
2b61a85
 
5e53174
2b61a85
 
 
 
 
 
 
 
 
 
34aa4af
5e53174
a800f33
5e53174
4431f41
5e53174
2b61a85
 
5e53174
2b61a85
80d51fc
5e53174
5a7bb90
5e53174
5a7bb90
 
 
5e53174
 
 
5a7bb90
 
 
5e53174
 
 
5a7bb90
5e53174
 
a800f33
5e53174
 
5a7bb90
 
 
 
 
 
 
80d51fc
 
5a7bb90
2b61a85
80d51fc
 
a800f33
34aa4af
4431f41
34aa4af
80d51fc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import requests
from bs4 import BeautifulSoup
import gradio as gr

def convert_to_mobile_url(url):
    """
    PC URL์„ ๋ชจ๋ฐ”์ผ URL๋กœ ๋ณ€ํ™˜.
    """
    if "m.blog.naver.com" not in url:
        if "blog.naver.com" in url:
            url_parts = url.split("/")
            if len(url_parts) >= 5:
                user_id = url_parts[3]
                post_id = url_parts[4]
                return f"https://m.blog.naver.com/{user_id}/{post_id}"
    return url

def scrape_naver_blog(url):
    """
    ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ์˜ ์ œ๋ชฉ๊ณผ ๋‚ด์šฉ(ํ…์ŠคํŠธ๋งŒ) ์Šคํฌ๋ž˜ํ•‘.
    """
    try:
        # ๋ชจ๋ฐ”์ผ URL ๋ณ€ํ™˜
        mobile_url = convert_to_mobile_url(url)
        print(f"Converted Mobile URL: {mobile_url}")

        response = requests.get(mobile_url)
        response.raise_for_status()

        soup = BeautifulSoup(response.text, 'html.parser')

        # ์ œ๋ชฉ ์Šคํฌ๋ž˜ํ•‘
        title_element = soup.find("div", class_="se-module se-module-text se-title-text")
        title = title_element.get_text(strip=True) if title_element else "์ œ๋ชฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Œ"

        # ๋ณธ๋ฌธ ๋‚ด์šฉ ์Šคํฌ๋ž˜ํ•‘
        content_elements = soup.find_all("div", class_="se-module se-module-text")
        content = "\n".join(
            elem.get_text(strip=True) for elem in content_elements
        ) if content_elements else "๋‚ด์šฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Œ"

        # ๋””๋ฒ„๊น… ๋ฉ”์‹œ์ง€ ์ถœ๋ ฅ
        print(f"Scraped Title: {title}")
        print(f"Scraped Content: {content}")

        # ๊ฒฐ๊ณผ ๋ฐ˜ํ™˜
        result = f"์ œ๋ชฉ: {title}\n\n๋‚ด์šฉ: {content}"
        return result

    except Exception as e:
        print(f"Error: {e}")
        return f"Error: {e}"

# Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ •์˜
def run_scraper(url):
    return scrape_naver_blog(url)

interface = gr.Interface(
    fn=run_scraper,
    inputs=gr.Textbox(label="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ URL"),
    outputs=gr.Textbox(label="์Šคํฌ๋ž˜ํ•‘ ๊ฒฐ๊ณผ"),
    title="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ์Šคํฌ๋ž˜ํ•‘",
    description="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ์˜ ์ œ๋ชฉ๊ณผ ๋‚ด์šฉ์„ ์Šคํฌ๋ž˜ํ•‘ํ•ฉ๋‹ˆ๋‹ค."
)

if __name__ == "__main__":
    interface.launch()