Spaces:

AIRider
/

blogcr111111

Sleeping

App Files Files Community

blogcr111111 / app.py

AIRider

Update app.py

a800f33 verified 4 months ago

raw

history blame contribute delete

2.11 kB

	import requests
	from bs4 import BeautifulSoup
	import gradio as gr

	def convert_to_mobile_url(url):
	"""
	PC URL을 모바일 URL로 변환.
	"""
	if "m.blog.naver.com" not in url:
	if "blog.naver.com" in url:
	url_parts = url.split("/")
	if len(url_parts) >= 5:
	user_id = url_parts[3]
	post_id = url_parts[4]
	return f"https://m.blog.naver.com/{user_id}/{post_id}"
	return url

	def scrape_naver_blog(url):
	"""
	네이버 블로그의 제목과 내용(텍스트만) 스크래핑.
	"""
	try:
	# 모바일 URL 변환
	mobile_url = convert_to_mobile_url(url)
	print(f"Converted Mobile URL: {mobile_url}")

	response = requests.get(mobile_url)
	response.raise_for_status()

	soup = BeautifulSoup(response.text, 'html.parser')

	# 제목 스크래핑
	title_element = soup.find("div", class_="se-module se-module-text se-title-text")
	title = title_element.get_text(strip=True) if title_element else "제목을 찾을 수 없음"

	# 본문 내용 스크래핑
	content_elements = soup.find_all("div", class_="se-module se-module-text")
	content = "\n".join(
	elem.get_text(strip=True) for elem in content_elements
	) if content_elements else "내용을 찾을 수 없음"

	# 디버깅 메시지 출력
	print(f"Scraped Title: {title}")
	print(f"Scraped Content: {content}")

	# 결과 반환
	result = f"제목: {title}\n\n내용: {content}"
	return result

	except Exception as e:
	print(f"Error: {e}")
	return f"Error: {e}"

	# Gradio 인터페이스 정의
	def run_scraper(url):
	return scrape_naver_blog(url)

	interface = gr.Interface(
	fn=run_scraper,
	inputs=gr.Textbox(label="네이버 블로그 URL"),
	outputs=gr.Textbox(label="스크래핑 결과"),
	title="네이버 블로그 스크래핑",
	description="네이버 블로그의 제목과 내용을 스크래핑합니다."
	)

	if __name__ == "__main__":
	interface.launch()