blogcr111111 / app.py
AIRider's picture
Update app.py
a800f33 verified
import requests
from bs4 import BeautifulSoup
import gradio as gr
def convert_to_mobile_url(url):
"""
PC URL์„ ๋ชจ๋ฐ”์ผ URL๋กœ ๋ณ€ํ™˜.
"""
if "m.blog.naver.com" not in url:
if "blog.naver.com" in url:
url_parts = url.split("/")
if len(url_parts) >= 5:
user_id = url_parts[3]
post_id = url_parts[4]
return f"https://m.blog.naver.com/{user_id}/{post_id}"
return url
def scrape_naver_blog(url):
"""
๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ์˜ ์ œ๋ชฉ๊ณผ ๋‚ด์šฉ(ํ…์ŠคํŠธ๋งŒ) ์Šคํฌ๋ž˜ํ•‘.
"""
try:
# ๋ชจ๋ฐ”์ผ URL ๋ณ€ํ™˜
mobile_url = convert_to_mobile_url(url)
print(f"Converted Mobile URL: {mobile_url}")
response = requests.get(mobile_url)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
# ์ œ๋ชฉ ์Šคํฌ๋ž˜ํ•‘
title_element = soup.find("div", class_="se-module se-module-text se-title-text")
title = title_element.get_text(strip=True) if title_element else "์ œ๋ชฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Œ"
# ๋ณธ๋ฌธ ๋‚ด์šฉ ์Šคํฌ๋ž˜ํ•‘
content_elements = soup.find_all("div", class_="se-module se-module-text")
content = "\n".join(
elem.get_text(strip=True) for elem in content_elements
) if content_elements else "๋‚ด์šฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Œ"
# ๋””๋ฒ„๊น… ๋ฉ”์‹œ์ง€ ์ถœ๋ ฅ
print(f"Scraped Title: {title}")
print(f"Scraped Content: {content}")
# ๊ฒฐ๊ณผ ๋ฐ˜ํ™˜
result = f"์ œ๋ชฉ: {title}\n\n๋‚ด์šฉ: {content}"
return result
except Exception as e:
print(f"Error: {e}")
return f"Error: {e}"
# Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ •์˜
def run_scraper(url):
return scrape_naver_blog(url)
interface = gr.Interface(
fn=run_scraper,
inputs=gr.Textbox(label="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ URL"),
outputs=gr.Textbox(label="์Šคํฌ๋ž˜ํ•‘ ๊ฒฐ๊ณผ"),
title="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ์Šคํฌ๋ž˜ํ•‘",
description="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ์˜ ์ œ๋ชฉ๊ณผ ๋‚ด์šฉ์„ ์Šคํฌ๋ž˜ํ•‘ํ•ฉ๋‹ˆ๋‹ค."
)
if __name__ == "__main__":
interface.launch()