Edenhuang commited on
Commit
26c5941
·
verified ·
1 Parent(s): 3506580

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +217 -0
app.py ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ import pandas as pd
5
+
6
+ # Set page configuration
7
+ st.set_page_config(
8
+ page_title="台灣證券交易所公告擷取工具",
9
+ page_icon="📊",
10
+ layout="wide"
11
+ )
12
+
13
+ # App title and description
14
+ st.title("台灣證券交易所公告擷取工具")
15
+ st.markdown("這個應用程式可以擷取台灣證券交易所的公司公告資訊")
16
+
17
+ def extract_data_from_html(html_content):
18
+ """Extract data from HTML content and return as DataFrame"""
19
+ # Parse HTML content
20
+ soup = BeautifulSoup(html_content, 'html.parser')
21
+
22
+ # Find the table
23
+ table = soup.find('table', {'class': 'hasBorder'})
24
+
25
+ # Lists to store data
26
+ company_codes = []
27
+ company_names = []
28
+ announcement_dates = []
29
+ announcement_times = []
30
+ subjects = []
31
+
32
+ # If table exists, extract rows
33
+ if table:
34
+ # Find all rows in tbody (skip header)
35
+ tbody = table.find('tbody')
36
+ if tbody:
37
+ rows = tbody.find_all('tr')
38
+ else:
39
+ rows = table.find_all('tr')[1:] if len(table.find_all('tr')) > 1 else []
40
+
41
+ for row in rows:
42
+ # Extract cells
43
+ cells = row.find_all('td')
44
+
45
+ if len(cells) >= 5:
46
+ # Extract cell data
47
+ company_codes.append(cells[0].text.strip())
48
+ company_names.append(cells[1].text.strip())
49
+ announcement_dates.append(cells[2].text.strip())
50
+ announcement_times.append(cells[3].text.strip())
51
+
52
+ # Get subject from button title attribute if available
53
+ subject_cell = cells[4]
54
+ subject_button = subject_cell.find('button')
55
+ if subject_button and 'title' in subject_button.attrs:
56
+ subjects.append(subject_button['title'].strip())
57
+ else:
58
+ subjects.append(subject_cell.text.strip())
59
+
60
+ # Create DataFrame
61
+ df = pd.DataFrame({
62
+ '公司代號': company_codes,
63
+ '公司簡稱': company_names,
64
+ '發言日期': announcement_dates,
65
+ '發言時間': announcement_times,
66
+ '主旨': subjects
67
+ })
68
+
69
+ return df
70
+
71
+ # Function to extract data from the actual website
72
+ @st.cache_data(ttl=3600) # Cache data for 1 hour
73
+ def extract_data_from_website(url="https://mopsov.twse.com.tw/mops/web/t05sr01_1"):
74
+ headers = {
75
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
76
+ 'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7'
77
+ }
78
+
79
+ try:
80
+ with st.spinner('正在從網站擷取資料...'):
81
+ # Make request to get the session cookies first
82
+ session = requests.Session()
83
+ session.get("https://mopsov.twse.com.tw/mops/web/index", headers=headers)
84
+
85
+ # Now access the announcements page
86
+ response = session.get(url, headers=headers)
87
+
88
+ # Create form data for POST request to get the announcements
89
+ form_data = {
90
+ 'step': '1',
91
+ 'firstin': '1',
92
+ 'off': '1',
93
+ 'keyword4': '',
94
+ 'code1': '',
95
+ 'TYPEK2': '',
96
+ 'checkbtn': '',
97
+ 'queryName': 'co_id',
98
+ 'inpuType': 'co_id',
99
+ 'TYPEK': 'all',
100
+ 'co_id': '',
101
+ 'year': '',
102
+ 'month': '',
103
+ 'day': '',
104
+ 'b_date': '',
105
+ 'e_date': '',
106
+ }
107
+
108
+ # Make POST request
109
+ post_response = session.post(url, data=form_data, headers=headers)
110
+
111
+ # Parse the HTML content
112
+ df = extract_data_from_html(post_response.text)
113
+
114
+ if not df.empty:
115
+ st.success(f'成功擷取 {len(df)} 筆公告資料!')
116
+ return df
117
+ else:
118
+ st.warning('無法從網站擷取資料,切換到範例資料')
119
+ return None
120
+
121
+ except Exception as e:
122
+ st.error(f'訪問網站時發生錯誤: {e}')
123
+ return None
124
+
125
+ # Example provided in the original code
126
+ default_html_content = """
127
+ <table class="hasBorder"><thead><tr class="tblHead_2"><th width="10%" nowrap="">公司代號</th><th width="10%" nowrap="">公司簡稱</th><th nowrap="">發言日期</th><th width="10%" nowrap="">發言時間</th><th>主旨</th></tr></thead><tbody id="tab2"><tr class="even_2" onmouseover="this.className='mouseOn_2';" onmouseout="this.className='even_2';"><td>7724</td><td>諾亞克</td><td>114/04/01</td><td>00:06:30</td><td class="table02"><button style="width:300px;height:28px;text-align:left;background-color:transparent;border:0;cursor:pointer;" onclick="document.fm_t05sr01_1.step.value='1';document.fm_t05sr01_1.SEQ_NO.value='1';document.fm_t05sr01_1.SPOKE_TIME.value='630';document.fm_t05sr01_1.SPOKE_DATE.value='20250401';document.fm_t05sr01_1.COMPANY_NAME.value='諾亞克';document.fm_t05sr01_1.COMPANY_ID.value='7724';document.fm_t05sr01_1.skey.value='7724202504011';document.fm_t05sr01_1.hhc_co_name.value='諾亞克';openWindow(document.fm_t05sr01_1 ,'');" title="公告本公司董事會決議不分配113年度董事及員工酬勞">公告本公司董事會決議不分配113年度董事......</button></td></tr><tr class="odd_2" onmouseover="this.className='mouseOn_2';" onmouseout="this.className='odd_2';"><td>4117</td><td>普生</td><td>114/04/01</td><td>00:04:31</td><td class="table02"><button style="width:300px;height:28px;text-align:left;background-color:transparent;border:0;cursor:pointer;" onclick="document.fm_t05sr01_1.step.value='1';document.fm_t05sr01_1.SEQ_NO.value='7';document.fm_t05sr01_1.SPOKE_TIME.value='431';document.fm_t05sr01_1.SPOKE_DATE.value='20250401';document.fm_t05sr01_1.COMPANY_NAME.value='普生';document.fm_t05sr01_1.COMPANY_ID.value='4117';document.fm_t05sr01_1.skey.value='4117202503317';document.fm_t05sr01_1.hhc_co_name.value='普生';openWindow(document.fm_t05sr01_1 ,'');" title="公告本公司董事會決議不發放股利">公告本公司董事會決議不發放股利</button></td></tr></tbody></table>
128
+ """
129
+
130
+ # Sidebar with data source options
131
+ st.sidebar.header("資料來源選項")
132
+ data_source = st.sidebar.radio(
133
+ "選擇資料來源",
134
+ ["從網站擷取資料", "使用範例資料", "貼上HTML代碼"]
135
+ )
136
+
137
+ # Initialize data frame
138
+ df = None
139
+
140
+ # Process based on data source selection
141
+ if data_source == "從網站擷取資料":
142
+ df = extract_data_from_website()
143
+ if df is None:
144
+ st.sidebar.warning("從網站擷取資料失敗,切換到範例資料")
145
+ df = extract_data_from_html(default_html_content)
146
+
147
+ elif data_source == "使用範例資料":
148
+ df = extract_data_from_html(default_html_content)
149
+
150
+ else: # "貼上HTML代碼"
151
+ html_input = st.sidebar.text_area("貼上HTML代碼", value=default_html_content, height=300)
152
+ if st.sidebar.button("解析HTML"):
153
+ df = extract_data_from_html(html_input)
154
+ st.sidebar.success("HTML解析完成!")
155
+
156
+ # Display and filter data
157
+ if df is not None and not df.empty:
158
+ st.subheader("台灣證券交易所公告資料")
159
+
160
+ # Add search filters
161
+ col1, col2 = st.columns(2)
162
+ with col1:
163
+ search_code = st.text_input("依公司代號篩選")
164
+ with col2:
165
+ search_name = st.text_input("依公司名稱篩選")
166
+
167
+ # Apply filters if provided
168
+ filtered_df = df.copy()
169
+ if search_code:
170
+ filtered_df = filtered_df[filtered_df['公司代號'].str.contains(search_code)]
171
+ if search_name:
172
+ filtered_df = filtered_df[filtered_df['公司簡稱'].str.contains(search_name)]
173
+
174
+ # Display the data
175
+ st.dataframe(filtered_df, use_container_width=True)
176
+
177
+ # Download button
178
+ csv = filtered_df.to_csv(index=False).encode('utf-8-sig')
179
+ st.download_button(
180
+ label="下載為CSV",
181
+ data=csv,
182
+ file_name="twse_announcements.csv",
183
+ mime="text/csv",
184
+ )
185
+
186
+ # Display statistics
187
+ st.subheader("資料統計")
188
+ col1, col2 = st.columns(2)
189
+ with col1:
190
+ st.metric("公告總數", len(filtered_df))
191
+ with col2:
192
+ company_count = filtered_df['公司代號'].nunique()
193
+ st.metric("公司數量", company_count)
194
+
195
+ # Show announcement details on selection
196
+ if not filtered_df.empty:
197
+ st.subheader("選擇公告以查看詳情")
198
+ selected_indices = st.multiselect(
199
+ "選擇公告",
200
+ options=list(range(len(filtered_df))),
201
+ format_func=lambda i: f"{filtered_df.iloc[i]['公司簡稱']} - {filtered_df.iloc[i]['主旨'][:20]}..."
202
+ )
203
+
204
+ if selected_indices:
205
+ for idx in selected_indices:
206
+ with st.expander(f"{filtered_df.iloc[idx]['公司簡稱']} ({filtered_df.iloc[idx]['公司代號']}) - {filtered_df.iloc[idx]['發言日期']}"):
207
+ st.write(f"**公司代號:** {filtered_df.iloc[idx]['公司代號']}")
208
+ st.write(f"**公司簡稱:** {filtered_df.iloc[idx]['公司簡稱']}")
209
+ st.write(f"**發言日期:** {filtered_df.iloc[idx]['發言日期']}")
210
+ st.write(f"**發言時間:** {filtered_df.iloc[idx]['發言時間']}")
211
+ st.write(f"**主旨內容:** {filtered_df.iloc[idx]['主旨']}")
212
+ else:
213
+ st.warning("沒有可顯示的資料")
214
+
215
+ # Footer
216
+ st.markdown("---")
217
+ st.markdown("台灣證券交易所公告擷取工具 | 資料來源: [台灣證券交易所](https://mopsov.twse.com.tw/mops/web/index)")