Spaces:

Edenhuang
/

Home_work_for_0330

Sleeping

App Files Files Community

Edenhuang commited on Mar 31

Commit

26c5941

verified ·

1 Parent(s): 3506580

Create app.py

Browse files

Files changed (1) hide show

app.py +217 -0

app.py ADDED Viewed

	@@ -0,0 +1,217 @@

+import streamlit as st
+import requests
+from bs4 import BeautifulSoup
+import pandas as pd
+# Set page configuration
+st.set_page_config(
+    page_title="台灣證券交易所公告擷取工具",
+    page_icon="📊",
+    layout="wide"
+)
+# App title and description
+st.title("台灣證券交易所公告擷取工具")
+st.markdown("這個應用程式可以擷取台灣證券交易所的公司公告資訊")
+def extract_data_from_html(html_content):
+    """Extract data from HTML content and return as DataFrame"""
+    # Parse HTML content
+    soup = BeautifulSoup(html_content, 'html.parser')
+    # Find the table
+    table = soup.find('table', {'class': 'hasBorder'})
+    # Lists to store data
+    company_codes = []
+    company_names = []
+    announcement_dates = []
+    announcement_times = []
+    subjects = []
+    # If table exists, extract rows
+    if table:
+        # Find all rows in tbody (skip header)
+        tbody = table.find('tbody')
+        if tbody:
+            rows = tbody.find_all('tr')
+        else:
+            rows = table.find_all('tr')[1:] if len(table.find_all('tr')) > 1 else []
+        for row in rows:
+            # Extract cells
+            cells = row.find_all('td')
+            if len(cells) >= 5:
+                # Extract cell data
+                company_codes.append(cells[0].text.strip())
+                company_names.append(cells[1].text.strip())
+                announcement_dates.append(cells[2].text.strip())
+                announcement_times.append(cells[3].text.strip())
+                # Get subject from button title attribute if available
+                subject_cell = cells[4]
+                subject_button = subject_cell.find('button')
+                if subject_button and 'title' in subject_button.attrs:
+                    subjects.append(subject_button['title'].strip())
+                else:
+                    subjects.append(subject_cell.text.strip())
+    # Create DataFrame
+    df = pd.DataFrame({
+        '公司代號': company_codes,
+        '公司簡稱': company_names,
+        '發言日期': announcement_dates,
+        '發言時間': announcement_times,
+        '主旨': subjects
+    })
+    return df
+# Function to extract data from the actual website
+@st.cache_data(ttl=3600)  # Cache data for 1 hour
+def extract_data_from_website(url="https://mopsov.twse.com.tw/mops/web/t05sr01_1"):
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
+        'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7'
+    }
+    try:
+        with st.spinner('正在從網站擷取資料...'):
+            # Make request to get the session cookies first
+            session = requests.Session()
+            session.get("https://mopsov.twse.com.tw/mops/web/index", headers=headers)
+            # Now access the announcements page
+            response = session.get(url, headers=headers)
+            # Create form data for POST request to get the announcements
+            form_data = {
+                'step': '1',
+                'firstin': '1',
+                'off': '1',
+                'keyword4': '',
+                'code1': '',
+                'TYPEK2': '',
+                'checkbtn': '',
+                'queryName': 'co_id',
+                'inpuType': 'co_id',
+                'TYPEK': 'all',
+                'co_id': '',
+                'year': '',
+                'month': '',
+                'day': '',
+                'b_date': '',
+                'e_date': '',
+            }
+            # Make POST request
+            post_response = session.post(url, data=form_data, headers=headers)
+            # Parse the HTML content
+            df = extract_data_from_html(post_response.text)
+            if not df.empty:
+                st.success(f'成功擷取 {len(df)} 筆公告資料!')
+                return df
+            else:
+                st.warning('無法從網站擷取資料，切換到範例資料')
+                return None
+    except Exception as e:
+        st.error(f'訪問網站時發生錯誤: {e}')
+        return None
+# Example provided in the original code
+default_html_content = """
+<table class="hasBorder"><thead><tr class="tblHead_2"><th width="10%" nowrap="">公司代號</th><th width="10%" nowrap="">公司簡稱</th><th nowrap="">發言日期</th><th width="10%" nowrap="">發言時間</th><th>主旨</th></tr></thead><tbody id="tab2"><tr class="even_2" onmouseover="this.className='mouseOn_2';" onmouseout="this.className='even_2';"><td>7724</td><td>諾亞克</td><td>114/04/01</td><td>00:06:30</td><td class="table02"><button style="width:300px;height:28px;text-align:left;background-color:transparent;border:0;cursor:pointer;" onclick="document.fm_t05sr01_1.step.value='1';document.fm_t05sr01_1.SEQ_NO.value='1';document.fm_t05sr01_1.SPOKE_TIME.value='630';document.fm_t05sr01_1.SPOKE_DATE.value='20250401';document.fm_t05sr01_1.COMPANY_NAME.value='諾亞克';document.fm_t05sr01_1.COMPANY_ID.value='7724';document.fm_t05sr01_1.skey.value='7724202504011';document.fm_t05sr01_1.hhc_co_name.value='諾亞克';openWindow(document.fm_t05sr01_1 ,'');" title="公告本公司董事會決議不分配113年度董事及員工酬勞">公告本公司董事會決議不分配113年度董事......</button></td></tr><tr class="odd_2" onmouseover="this.className='mouseOn_2';" onmouseout="this.className='odd_2';"><td>4117</td><td>普生</td><td>114/04/01</td><td>00:04:31</td><td class="table02"><button style="width:300px;height:28px;text-align:left;background-color:transparent;border:0;cursor:pointer;" onclick="document.fm_t05sr01_1.step.value='1';document.fm_t05sr01_1.SEQ_NO.value='7';document.fm_t05sr01_1.SPOKE_TIME.value='431';document.fm_t05sr01_1.SPOKE_DATE.value='20250401';document.fm_t05sr01_1.COMPANY_NAME.value='普生';document.fm_t05sr01_1.COMPANY_ID.value='4117';document.fm_t05sr01_1.skey.value='4117202503317';document.fm_t05sr01_1.hhc_co_name.value='普生';openWindow(document.fm_t05sr01_1 ,'');" title="公告本公司董事會決議不發放股利">公告本公司董事會決議不發放股利</button></td></tr></tbody></table>
+"""
+# Sidebar with data source options
+st.sidebar.header("資料來源選項")
+data_source = st.sidebar.radio(
+    "選擇資料來源",
+    ["從網站擷取資料", "使用範例資料", "貼上HTML代碼"]
+)
+# Initialize data frame
+df = None
+# Process based on data source selection
+if data_source == "從網站擷取資料":
+    df = extract_data_from_website()
+    if df is None:
+        st.sidebar.warning("從網站擷取資料失敗，切換到範例資料")
+        df = extract_data_from_html(default_html_content)
+elif data_source == "使用範例資料":
+    df = extract_data_from_html(default_html_content)
+else:  # "貼上HTML代碼"
+    html_input = st.sidebar.text_area("貼上HTML代碼", value=default_html_content, height=300)
+    if st.sidebar.button("解析HTML"):
+        df = extract_data_from_html(html_input)
+        st.sidebar.success("HTML解析完成!")
+# Display and filter data
+if df is not None and not df.empty:
+    st.subheader("台灣證券交易所公告資料")
+    # Add search filters
+    col1, col2 = st.columns(2)
+    with col1:
+        search_code = st.text_input("依公司代號篩選")
+    with col2:
+        search_name = st.text_input("依公司名稱篩選")
+    # Apply filters if provided
+    filtered_df = df.copy()
+    if search_code:
+        filtered_df = filtered_df[filtered_df['公司代號'].str.contains(search_code)]
+    if search_name:
+        filtered_df = filtered_df[filtered_df['公司簡稱'].str.contains(search_name)]
+    # Display the data
+    st.dataframe(filtered_df, use_container_width=True)
+    # Download button
+    csv = filtered_df.to_csv(index=False).encode('utf-8-sig')
+    st.download_button(
+        label="下載為CSV",
+        data=csv,
+        file_name="twse_announcements.csv",
+        mime="text/csv",
+    )
+    # Display statistics
+    st.subheader("資料統計")
+    col1, col2 = st.columns(2)
+    with col1:
+        st.metric("公告總數", len(filtered_df))
+    with col2:
+        company_count = filtered_df['公司代號'].nunique()
+        st.metric("公司數量", company_count)
+    # Show announcement details on selection
+    if not filtered_df.empty:
+        st.subheader("選擇公告以查看詳情")
+        selected_indices = st.multiselect(
+            "選擇公告",
+            options=list(range(len(filtered_df))),
+            format_func=lambda i: f"{filtered_df.iloc[i]['公司簡稱']} - {filtered_df.iloc[i]['主旨'][:20]}..."
+        )
+        if selected_indices:
+            for idx in selected_indices:
+                with st.expander(f"{filtered_df.iloc[idx]['公司簡稱']} ({filtered_df.iloc[idx]['公司代號']}) - {filtered_df.iloc[idx]['發言日期']}"):
+                    st.write(f"**公司代號:** {filtered_df.iloc[idx]['公司代號']}")
+                    st.write(f"**公司簡稱:** {filtered_df.iloc[idx]['公司簡稱']}")
+                    st.write(f"**發言日期:** {filtered_df.iloc[idx]['發言日期']}")
+                    st.write(f"**發言時間:** {filtered_df.iloc[idx]['發言時間']}")
+                    st.write(f"**主旨內容:** {filtered_df.iloc[idx]['主旨']}")
+else:
+    st.warning("沒有可顯示的資料")
+# Footer
+st.markdown("---")
+st.markdown("台灣證券交易所公告擷取工具 | 資料來源: [台灣證券交易所](https://mopsov.twse.com.tw/mops/web/index)")