Spaces:
Sleeping
Sleeping
import requests | |
from bs4 import BeautifulSoup | |
import gradio as gr | |
def convert_to_mobile_url(url): | |
""" | |
PC URL์ ๋ชจ๋ฐ์ผ URL๋ก ๋ณํ. | |
""" | |
if "m.blog.naver.com" not in url: | |
if "blog.naver.com" in url: | |
url_parts = url.split("/") | |
if len(url_parts) >= 5: | |
user_id = url_parts[3] | |
post_id = url_parts[4] | |
return f"https://m.blog.naver.com/{user_id}/{post_id}" | |
return url | |
def scrape_naver_blog(url): | |
""" | |
๋ค์ด๋ฒ ๋ธ๋ก๊ทธ์ ์ ๋ชฉ๊ณผ ๋ด์ฉ(ํ ์คํธ๋ง) ์คํฌ๋ํ. | |
""" | |
try: | |
# ๋ชจ๋ฐ์ผ URL ๋ณํ | |
mobile_url = convert_to_mobile_url(url) | |
print(f"Converted Mobile URL: {mobile_url}") | |
response = requests.get(mobile_url) | |
response.raise_for_status() | |
soup = BeautifulSoup(response.text, 'html.parser') | |
# ์ ๋ชฉ ์คํฌ๋ํ | |
title_element = soup.find("div", class_="se-module se-module-text se-title-text") | |
title = title_element.get_text(strip=True) if title_element else "์ ๋ชฉ์ ์ฐพ์ ์ ์์" | |
# ๋ณธ๋ฌธ ๋ด์ฉ ์คํฌ๋ํ | |
content_elements = soup.find_all("div", class_="se-module se-module-text") | |
content = "\n".join( | |
elem.get_text(strip=True) for elem in content_elements | |
) if content_elements else "๋ด์ฉ์ ์ฐพ์ ์ ์์" | |
# ๋๋ฒ๊น ๋ฉ์์ง ์ถ๋ ฅ | |
print(f"Scraped Title: {title}") | |
print(f"Scraped Content: {content}") | |
# ๊ฒฐ๊ณผ ๋ฐํ | |
result = f"์ ๋ชฉ: {title}\n\n๋ด์ฉ: {content}" | |
return result | |
except Exception as e: | |
print(f"Error: {e}") | |
return f"Error: {e}" | |
# Gradio ์ธํฐํ์ด์ค ์ ์ | |
def run_scraper(url): | |
return scrape_naver_blog(url) | |
interface = gr.Interface( | |
fn=run_scraper, | |
inputs=gr.Textbox(label="๋ค์ด๋ฒ ๋ธ๋ก๊ทธ URL"), | |
outputs=gr.Textbox(label="์คํฌ๋ํ ๊ฒฐ๊ณผ"), | |
title="๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ์คํฌ๋ํ", | |
description="๋ค์ด๋ฒ ๋ธ๋ก๊ทธ์ ์ ๋ชฉ๊ณผ ๋ด์ฉ์ ์คํฌ๋ํํฉ๋๋ค." | |
) | |
if __name__ == "__main__": | |
interface.launch() | |