Spaces:
Sleeping
Sleeping
File size: 2,114 Bytes
4ecdb4b 80d51fc 4431f41 2b61a85 5e53174 2b61a85 34aa4af 5e53174 a800f33 5e53174 4431f41 5e53174 2b61a85 5e53174 2b61a85 80d51fc 5e53174 5a7bb90 5e53174 5a7bb90 5e53174 5a7bb90 5e53174 5a7bb90 5e53174 a800f33 5e53174 5a7bb90 80d51fc 5a7bb90 2b61a85 80d51fc a800f33 34aa4af 4431f41 34aa4af 80d51fc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
import requests
from bs4 import BeautifulSoup
import gradio as gr
def convert_to_mobile_url(url):
"""
PC URL์ ๋ชจ๋ฐ์ผ URL๋ก ๋ณํ.
"""
if "m.blog.naver.com" not in url:
if "blog.naver.com" in url:
url_parts = url.split("/")
if len(url_parts) >= 5:
user_id = url_parts[3]
post_id = url_parts[4]
return f"https://m.blog.naver.com/{user_id}/{post_id}"
return url
def scrape_naver_blog(url):
"""
๋ค์ด๋ฒ ๋ธ๋ก๊ทธ์ ์ ๋ชฉ๊ณผ ๋ด์ฉ(ํ
์คํธ๋ง) ์คํฌ๋ํ.
"""
try:
# ๋ชจ๋ฐ์ผ URL ๋ณํ
mobile_url = convert_to_mobile_url(url)
print(f"Converted Mobile URL: {mobile_url}")
response = requests.get(mobile_url)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
# ์ ๋ชฉ ์คํฌ๋ํ
title_element = soup.find("div", class_="se-module se-module-text se-title-text")
title = title_element.get_text(strip=True) if title_element else "์ ๋ชฉ์ ์ฐพ์ ์ ์์"
# ๋ณธ๋ฌธ ๋ด์ฉ ์คํฌ๋ํ
content_elements = soup.find_all("div", class_="se-module se-module-text")
content = "\n".join(
elem.get_text(strip=True) for elem in content_elements
) if content_elements else "๋ด์ฉ์ ์ฐพ์ ์ ์์"
# ๋๋ฒ๊น
๋ฉ์์ง ์ถ๋ ฅ
print(f"Scraped Title: {title}")
print(f"Scraped Content: {content}")
# ๊ฒฐ๊ณผ ๋ฐํ
result = f"์ ๋ชฉ: {title}\n\n๋ด์ฉ: {content}"
return result
except Exception as e:
print(f"Error: {e}")
return f"Error: {e}"
# Gradio ์ธํฐํ์ด์ค ์ ์
def run_scraper(url):
return scrape_naver_blog(url)
interface = gr.Interface(
fn=run_scraper,
inputs=gr.Textbox(label="๋ค์ด๋ฒ ๋ธ๋ก๊ทธ URL"),
outputs=gr.Textbox(label="์คํฌ๋ํ ๊ฒฐ๊ณผ"),
title="๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ์คํฌ๋ํ",
description="๋ค์ด๋ฒ ๋ธ๋ก๊ทธ์ ์ ๋ชฉ๊ณผ ๋ด์ฉ์ ์คํฌ๋ํํฉ๋๋ค."
)
if __name__ == "__main__":
interface.launch()
|