2catycm commited on
Commit
044c7d2
·
1 Parent(s): 0bd37cc

初步结果

Browse files
app.py CHANGED
@@ -1,22 +1,34 @@
1
  import streamlit as st
2
- from pages import page1, page2, page3
3
 
4
  # 设置页面配置
5
  st.set_page_config(layout="wide")
6
 
7
  # 主应用
8
  def main():
9
- st.sidebar.title("导航")
10
- pages = {
11
- "NIPS 论文数据集高斯混合聚类分析": page1,
12
- "第二个子应用": page2,
13
- "第三个子应用": page3
14
- }
15
 
16
- page = st.sidebar.radio("选择子应用", tuple(pages.keys()))
17
 
18
- # 根据选择的子应用加载相应的页面
19
- pages[page].main()
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  if __name__ == "__main__":
22
  main()
 
1
  import streamlit as st
2
+ # from subapps import page1, page2, page3
3
 
4
  # 设置页面配置
5
  st.set_page_config(layout="wide")
6
 
7
  # 主应用
8
  def main():
9
+ # st.sidebar.title("导航")
10
+ # pages = {
11
+ # "NIPS 论文数据集高斯混合聚类分析": page1,
12
+ # "第二个子应用": page2,
13
+ # "第三个子应用": page3
14
+ # }
15
 
16
+ # page = st.sidebar.radio("选择子应用", tuple(pages.keys()))
17
 
18
+ # # 根据选择的子应用加载相应的页面
19
+ # pages[page].main()
20
+ # app.py
21
+
22
+ # https://www.cnblogs.com/wang_yb/p/18502232
23
+
24
+ page1 = st.Page("my_pages/page1.py", title="无监督聚类动态变化", icon="📊", default=True)
25
+ page2 = st.Page("my_pages/page2.py", title="第二个子应用", icon="📈")
26
+ page3 = st.Page("my_pages/page3.py", title="第三个子应用", icon="📉")
27
+ # page3 = st.Page("dev/test.py", title="第三个子应用", icon="📉")
28
+
29
+ pg = st.navigation([page1, page2, page3])
30
+ pg.run()
31
+ # pass
32
 
33
  if __name__ == "__main__":
34
  main()
{pages → dev}/test.py RENAMED
File without changes
{pages → my_pages}/page1.py RENAMED
@@ -10,10 +10,10 @@ from io import BytesIO
10
  import time
11
  from utils.data_processor import load_data, process_data, build_hyperedges
12
  from utils.visualizer import visualize_gmm, visualize_ratings
13
- from utils.hypergraph_drawer import draw_hypergraph
14
 
15
  def main():
16
- st.title("NIPS 论文数据集高斯混合聚类分析")
17
 
18
  # 自动播放
19
  slider_max = 10
@@ -31,10 +31,11 @@ def main():
31
  if st.session_state.play_state:
32
  button_label = "暂停"
33
  else:
34
- button_label = "播放"
35
  st.button(button_label, on_click=toggle_play, key="play_button")
36
  # 播放速度
37
- speed = st.slider("播放速度", min_value=0.1, max_value=2.0, value=1.0, step=0.1, key="speed_slider")
 
38
  # 主页面布局
39
 
40
  # 显示迭代次数滑条
@@ -52,16 +53,18 @@ def main():
52
  # 使用 sidebar 控制参数
53
  with st.sidebar:
54
  st.header("控制面板")
 
 
 
55
  max_samples = len(df)
56
  num_samples = st.slider("选择采样论文数量", min_value=1,
57
  max_value=min(100, max_samples), value=min(10, max_samples), step=1)
58
-
59
-
60
 
61
  # 添加复选框选择显示 paper 的属性
62
  display_attribute = st.selectbox(
63
  "选择显示 paper 的属性",
64
- ["index", "id", "title", "keywords", "author"]
65
  )
66
  # 选择是 top k 还是 top p
67
  display_option = st.selectbox(
@@ -81,31 +84,89 @@ def main():
81
  sampled_df, probabilities, paper_attributes = process_data(df, iteration, num_samples)
82
  # print(display_attribute) # 字符串
83
  hyperedges = build_hyperedges(probabilities, paper_attributes, display_attribute, top_k=top_k, top_p=top_p)
 
 
84
  # print(hyperedges)
85
 
86
 
 
87
 
 
 
88
 
89
- # 并排展示超图和高斯混合分布
90
- col1, col2 = st.columns(2)
91
- with col1:
92
- st.header("超图可视化")
93
- hypergraph_image = draw_hypergraph(hyperedges)
94
- st.image(hypergraph_image, caption="超图可视化", use_container_width=True)
95
-
96
- with col2:
97
- st.header("高斯混合分布聚类结果")
98
- fig_gmm = visualize_gmm(sampled_df, iteration)
99
- st.plotly_chart(fig_gmm, use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
  # 显示采样论文的详细信息
102
  st.header("采样论文详细信息")
103
- st.dataframe(sampled_df[["title", "keywords", "rating_avg", "confidence_avg", "site"]])
 
 
 
104
 
105
  # 增加第二种可视化方式
106
- st.header("论文评分分布")
107
- fig_bar = visualize_ratings(sampled_df)
108
- st.plotly_chart(fig_bar, use_container_width=True)
109
 
110
 
111
 
@@ -131,8 +192,8 @@ def main():
131
 
132
 
133
 
134
- if __name__ == "__main__":
135
- # 设置页面布局
136
- st.set_page_config(layout="wide")
137
- # 运行主函数
138
- main()
 
10
  import time
11
  from utils.data_processor import load_data, process_data, build_hyperedges
12
  from utils.visualizer import visualize_gmm, visualize_ratings
13
+ from utils.hypergraph_drawer import draw_hypergraph, pyplot_fig_to_buffer
14
 
15
  def main():
16
+ st.title("NeurIPS 论文数据集高斯混合聚类分析")
17
 
18
  # 自动播放
19
  slider_max = 10
 
31
  if st.session_state.play_state:
32
  button_label = "暂停"
33
  else:
34
+ button_label = "开始拟合"
35
  st.button(button_label, on_click=toggle_play, key="play_button")
36
  # 播放速度
37
+ speed = st.slider("拟合速度", min_value=0.1, max_value=2.0, value=1.0, step=0.1, key="speed_slider")
38
+ # speed = st.slider("播放速度", min_value=0.1, max_value=2.0, value=1.0, step=0.1, key="speed_slider")
39
  # 主页面布局
40
 
41
  # 显示迭代次数滑条
 
53
  # 使用 sidebar 控制参数
54
  with st.sidebar:
55
  st.header("控制面板")
56
+ draw_width = st.slider("绘图宽度", min_value=3, max_value=20, value=6, step=1, key="draw_width")
57
+ draw_height = st.slider("绘图高度",min_value=3, max_value=20, value=6, step=1, key="draw_height")
58
+
59
  max_samples = len(df)
60
  num_samples = st.slider("选择采样论文数量", min_value=1,
61
  max_value=min(100, max_samples), value=min(10, max_samples), step=1)
62
+
 
63
 
64
  # 添加复选框选择显示 paper 的属性
65
  display_attribute = st.selectbox(
66
  "选择显示 paper 的属性",
67
+ ["order", "index", "id", "title", "keywords", "author"]
68
  )
69
  # 选择是 top k 还是 top p
70
  display_option = st.selectbox(
 
84
  sampled_df, probabilities, paper_attributes = process_data(df, iteration, num_samples)
85
  # print(display_attribute) # 字符串
86
  hyperedges = build_hyperedges(probabilities, paper_attributes, display_attribute, top_k=top_k, top_p=top_p)
87
+ hypergraph = hnx.Hypergraph(hyperedges)
88
+
89
  # print(hyperedges)
90
 
91
 
92
+ st.header("超图可视化")
93
 
94
+ draw_dual = st.toggle("对偶翻转", value=False, key="draw_dual")
95
+ euler_first = st.toggle("先看Euler Diagram", value=True, key="euler_first")
96
 
97
+ def euler_visualization(hypergraph):
98
+ st.subheader("Euler Diagram 可视化")
99
+ col1, col2, col3, col4 = st.columns(4)
100
+ with col1:
101
+ fill_edges = st.toggle("填充超边", value=True, key="fill_edges")
102
+ with col2:
103
+ toplexes = st.toggle("只保留未被包含的边(toplexes)", value=False, key="toplexes")
104
+ with col3:
105
+ with_edge_labels = st.toggle("不看边名", value=False, key="with_edge_labels")
106
+
107
+
108
+ col1, col2 = st.columns(2)
109
+ captions = ["论文聚类超图", "论文聚类对偶超图"]
110
+
111
+ if 'poses' not in st.session_state:
112
+ st.session_state.poses = [None, None]
113
+ if draw_dual:
114
+ hypergraph = hypergraph.dual()
115
+ captions = ["论文聚类对偶超图", "论文聚类超图"]
116
+ # st.session_state.poses = [st.session_state.poses[1], st.session_state.poses[0]]
117
+
118
+ with col1:
119
+ hypergraph_image, st.session_state.poses[0] = draw_hypergraph(hypergraph, draw_dual=False,
120
+ fill_edges=fill_edges, toplexes=toplexes,
121
+ with_edge_labels=with_edge_labels
122
+ # pos=st.session_state.poses[0]
123
+ ,draw_width=draw_width, draw_height=draw_height
124
+ )
125
+ st.image(hypergraph_image, caption = captions[0], use_container_width=True)
126
+
127
+ with col2:
128
+ hypergraph_image, st.session_state.poses[1] = draw_hypergraph(hypergraph, draw_dual=True,
129
+ fill_edges=fill_edges, toplexes=toplexes,
130
+ with_edge_labels=with_edge_labels
131
+ # pos=st.session_state.poses[1]
132
+ ,draw_width=draw_width, draw_height=draw_height
133
+ )
134
+ st.image(hypergraph_image, caption =captions[1], use_container_width=True)
135
+
136
+ def new_visualization(hypergraph):
137
+ st.subheader("新型可视化")
138
+ col1, col2 = st.columns(2)
139
+ with col1:
140
+ fig, ax = plt.subplots(figsize=(draw_width, draw_height))
141
+ hnx.draw_bipartite_using_euler(hypergraph)
142
+ st.image(pyplot_fig_to_buffer(plt.gcf()), caption="双列二分图可视化", use_container_width=True)
143
+ with col2:
144
+ fig, ax = plt.subplots(figsize=(draw_width, draw_height))
145
+ hnx.draw_incidence_upset(hypergraph)
146
+ st.image(pyplot_fig_to_buffer(plt.gcf()), caption="Incidence/UpSet 可视化", use_container_width=True)
147
+
148
+ if euler_first:
149
+ euler_visualization(hypergraph)
150
+ new_visualization(hypergraph)
151
+ else:
152
+ new_visualization(hypergraph)
153
+ euler_visualization(hypergraph)
154
+
155
+ st.header("高斯混合分布聚类结果")
156
+ fig_gmm = visualize_gmm(sampled_df, iteration)
157
+ st.plotly_chart(fig_gmm, use_container_width=True)
158
 
159
  # 显示采样论文的详细信息
160
  st.header("采样论文详细信息")
161
+ st.dataframe(sampled_df[["index", "title", "keywords", "rating_avg", "confidence_avg", "site"]
162
+ ]
163
+ # .style.highlight_max(axis=0)
164
+ )
165
 
166
  # 增加第二种可视化方式
167
+ # st.header("论文评分分布")
168
+ # fig_bar = visualize_ratings(sampled_df)
169
+ # st.plotly_chart(fig_bar, use_container_width=True)
170
 
171
 
172
 
 
192
 
193
 
194
 
195
+ # if __name__ == "__main__":
196
+ # # 设置页面布局
197
+ # st.set_page_config(layout="wide")
198
+ # # 运行主函数
199
+ main()
{pages → my_pages}/page2.py RENAMED
@@ -2,4 +2,6 @@ import streamlit as st
2
 
3
  def main():
4
  st.title("第二个子应用")
5
- st.write("这里是第二个子应用的内容。")
 
 
 
2
 
3
  def main():
4
  st.title("第二个子应用")
5
+ st.write("这里是第二个子应用的内容。")
6
+
7
+ main()
{pages → my_pages}/page3.py RENAMED
@@ -2,4 +2,6 @@ import streamlit as st
2
 
3
  def main():
4
  st.title("第三个子应用")
5
- st.write("这里是第三个子应用的内容。")
 
 
 
2
 
3
  def main():
4
  st.title("第三个子应用")
5
+ st.write("这里是第三个子应用的内容。")
6
+
7
+ main()
pages/__pycache__/page1.cpython-311.pyc DELETED
Binary file (6.44 kB)
 
pages/__pycache__/page2.cpython-311.pyc DELETED
Binary file (602 Bytes)
 
pages/__pycache__/page3.cpython-311.pyc DELETED
Binary file (602 Bytes)
 
utils/__pycache__/data_processor.cpython-311.pyc CHANGED
Binary files a/utils/__pycache__/data_processor.cpython-311.pyc and b/utils/__pycache__/data_processor.cpython-311.pyc differ
 
utils/__pycache__/hypergraph_drawer.cpython-311.pyc CHANGED
Binary files a/utils/__pycache__/hypergraph_drawer.cpython-311.pyc and b/utils/__pycache__/hypergraph_drawer.cpython-311.pyc differ
 
utils/data_processor.py CHANGED
@@ -9,7 +9,7 @@ def load_data():
9
 
10
  def process_data(df, iteration, num_samples):
11
  # 随机采样论文
12
- sampled_df = df.sample(n=num_samples, random_state=iteration)
13
 
14
  # 计算每个论文属于各个 cluster 的概率
15
  probabilities = []
@@ -20,6 +20,8 @@ def process_data(df, iteration, num_samples):
20
  probabilities.append(prob_list)
21
  paper_attributes.append(
22
  {
 
 
23
  "id": row["id"],
24
  "title": row["title"],
25
  "keywords": row["keywords"],
@@ -40,8 +42,9 @@ def build_hyperedges(
40
  # 构建超图边
41
  hyperedges: Dict[str, List[str]] = {}
42
  for idx, (prob, paper_attr) in enumerate(zip(probabilities, paper_attributes)):
43
- if display_attribute_name == "index":
44
- display_attribute = f"Paper {idx}"
 
45
  else:
46
  display_attribute: str = paper_attr[display_attribute_name]
47
  if top_k is not None:
@@ -50,10 +53,10 @@ def build_hyperedges(
50
  # 累加起来,直到第一次大于等于 p
51
  selected_indices = []
52
  cumulative_prob = 0.0
53
- for i, p in enumerate(prob):
54
  selected_indices.append(i)
55
  cumulative_prob += p
56
- if cumulative_prob >= top_p:
57
  break
58
 
59
 
 
9
 
10
  def process_data(df, iteration, num_samples):
11
  # 随机采样论文
12
+ sampled_df = df.sample(n=num_samples, random_state=iteration).reset_index()
13
 
14
  # 计算每个论文属于各个 cluster 的概率
15
  probabilities = []
 
20
  probabilities.append(prob_list)
21
  paper_attributes.append(
22
  {
23
+ "order": idx,
24
+ "index": row['index'],
25
  "id": row["id"],
26
  "title": row["title"],
27
  "keywords": row["keywords"],
 
42
  # 构建超图边
43
  hyperedges: Dict[str, List[str]] = {}
44
  for idx, (prob, paper_attr) in enumerate(zip(probabilities, paper_attributes)):
45
+ if display_attribute_name == "index" or display_attribute_name == "order":
46
+ # display_attribute = f"Paper {idx}"
47
+ display_attribute = f"Paper {paper_attr[display_attribute_name]}"
48
  else:
49
  display_attribute: str = paper_attr[display_attribute_name]
50
  if top_k is not None:
 
53
  # 累加起来,直到第一次大于等于 p
54
  selected_indices = []
55
  cumulative_prob = 0.0
56
+ for i, p in enumerate(np.sort(prob)[::-1]):
57
  selected_indices.append(i)
58
  cumulative_prob += p
59
+ if cumulative_prob > top_p+1e-4:
60
  break
61
 
62
 
utils/hypergraph_drawer.py CHANGED
@@ -1,20 +1,53 @@
 
 
1
  import hypernetx as hnx
2
  import matplotlib.pyplot as plt
3
  from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
4
  from io import BytesIO
5
 
6
- def draw_hypergraph(hyperedges):
7
- # 构建超图
8
- H = hnx.Hypergraph(hyperedges)
9
-
10
- # 绘制超图
11
- fig, ax = plt.subplots(figsize=(12, 8))
12
- hnx.draw(H, ax=ax)
13
-
14
- # 将超图保存为图像
15
  canvas = FigureCanvas(fig)
16
  buffer = BytesIO()
17
  canvas.print_png(buffer)
18
  buffer.seek(0)
 
 
19
 
20
- return buffer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from turtle import title
2
+ from typing import Dict
3
  import hypernetx as hnx
4
  import matplotlib.pyplot as plt
5
  from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
6
  from io import BytesIO
7
 
8
+ def pyplot_fig_to_buffer(fig):
9
+ """Draw the figure using Pyplot and return the image buffer."""
 
 
 
 
 
 
 
10
  canvas = FigureCanvas(fig)
11
  buffer = BytesIO()
12
  canvas.print_png(buffer)
13
  buffer.seek(0)
14
+ return buffer
15
+
16
 
17
+ def draw_hypergraph(
18
+ H:hnx.Hypergraph=None,
19
+ hyperedges:Dict = None,
20
+ draw_dual:bool=False,
21
+ toplexes:bool=False,
22
+ fill_edges:bool=True,
23
+ with_edge_labels:bool=True,
24
+ with_node_labels:bool=True,
25
+ title:str=None,
26
+ pos = None
27
+ ,draw_width:int = 6, draw_height:int = 6
28
+ ):
29
+ # 构建超图
30
+ if H is None and hyperedges is None:
31
+ raise ValueError("Either H or hyperedges must be provided.")
32
+ if H is None:
33
+ H = hnx.Hypergraph(hyperedges)
34
+
35
+ if draw_dual:
36
+ H = H.dual()
37
+ if toplexes:
38
+ H = H.toplexes(return_hyp=True)
39
+
40
+ # 绘制超图
41
+ fig, ax = plt.subplots(figsize=(draw_width, draw_height))
42
+ if title is not None:
43
+ ax.set_title(title)
44
+ pos = hnx.draw(H, ax=ax, fill_edges=fill_edges,
45
+ with_edge_labels=with_edge_labels,
46
+ with_node_labels=with_node_labels,
47
+ node_label_alpha=0.0,
48
+ edge_label_alpha=0.0, pos=pos,
49
+ return_pos=True)
50
+ # hnx.draw(H, ax=ax)
51
+
52
+ # 将超图保存为图像
53
+ return pyplot_fig_to_buffer(fig), pos