Spaces:

AIRider
/

blogcr111111

Sleeping

App Files Files Community

AIRider commited on Jan 13

Commit

5a7bb90

verified ·

1 Parent(s): 80d51fc

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -36

app.py CHANGED Viewed

@@ -3,50 +3,53 @@ from bs4 import BeautifulSoup
 import gradio as gr
 def scrape_naver_blog(url):
-    # 디버깅: URL 확인
-    print(f"Scraping URL: {url}")
-    # 모바일 URL 형태로 변환
-    if not url.startswith("https://m.blog.naver.com"):
-        url = url.replace("https://blog.naver.com", "https://m.blog.naver.com")
-        print(f"Converted to mobile URL: {url}")
-    # 요청 보내기
     try:
         response = requests.get(url)
         response.raise_for_status()
-    except requests.RequestException as e:
-        return f"Error while fetching the page: {e}"
-    # BeautifulSoup으로 파싱
-    soup = BeautifulSoup(response.text, 'html.parser')
-    # 제목 스크래핑
-    try:
-        title = soup.select_one('div.se-fs-.se-ff-').text.strip()
         print(f"Scraped Title: {title}")
-    except AttributeError:
-        title = "Title not found"
-        print("Failed to scrape the title")
-    # 내용 스크래핑
-    try:
-        content_elements = soup.select('div.se-component-content > div.se-text-paragraph > span')
-        content = "\n".join([element.text.strip() for element in content_elements if element.text.strip()])
-        print(f"Scraped Content: {content[:100]}...")  # 내용 일부 출력
-    except AttributeError:
-        content = "Content not found"
-        print("Failed to scrape the content")
-    return f"제목: {title}\n\n내용: {content}"
-# Gradio 인터페이스 설정
 interface = gr.Interface(
-    fn=scrape_naver_blog,
-    inputs=gr.Textbox(label="네이버 블로그 URL"),
     outputs=gr.Textbox(label="스크래핑 결과"),
     title="네이버 블로그 스크래핑",
-    description="네이버 블로그에서 제목과 내용을 스크래핑합니다. 모바일 URL을 입력하세요."
 )
 if __name__ == "__main__":

 import gradio as gr
 def scrape_naver_blog(url):
     try:
+        # Debugging: URL 확인
+        print(f"Scraping URL: {url}")
+        if not url.startswith("https://m.blog.naver.com"):
+            raise ValueError("URL must be in the mobile format (https://m.blog.naver.com).")
         response = requests.get(url)
         response.raise_for_status()
+        # Debugging: HTTP 응답 상태 확인
+        print(f"Response Status Code: {response.status_code}")
+        soup = BeautifulSoup(response.text, 'html.parser')
+        # 제목 스크래핑
+        title_element = soup.find("div", class_="se-module se-module-text se-title-text")
+        title = title_element.get_text(strip=True) if title_element else "제목을 찾을 수 없음"
+        # Debugging: 제목 확인
         print(f"Scraped Title: {title}")
+        # 내용 스크래핑
+        content_elements = soup.find_all("div", class_="se-module se-module-text se-quote")
+        content = "\n".join(
+            elem.get_text(strip=True) for elem in content_elements
+        ) if content_elements else "내용을 찾을 수 없음"
+        # Debugging: 내용 확인
+        print(f"Scraped Content: {content}")
+        return f"제목: {title}\n내용: {content}"
+    except Exception as e:
+        # Debugging: 오류 메시지 출력
+        print(f"Error: {e}")
+        return f"Error: {e}"
+# Gradio 인터페이스 정의
+def run_scraper(url):
+    return scrape_naver_blog(url)
 interface = gr.Interface(
+    fn=run_scraper,
+    inputs=gr.Textbox(label="네이버 블로그 URL (모바일 형식)"),
     outputs=gr.Textbox(label="스크래핑 결과"),
     title="네이버 블로그 스크래핑",
+    description="모바일 URL을 입력하면 블로그의 제목과 텍스트 내용을 스크래핑합니다."
 )
 if __name__ == "__main__":