Spaces:

2catycm
/

VisualizationForGeneralizedGaussianMixture

Sleeping

App Files Files Community

2catycm commited on Mar 29

Commit

78e4509

1 Parent(s): f7825d3

feat: updates

Browse files

Files changed (7) hide show

.gitignore +175 -0
README.md +68 -6
app.py +266 -0
experiments/gmm_dataset.py +190 -0
experiments/gmm_fitting.py +157 -0
experiments/test.py +69 -0
requirements.txt +11 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,175 @@

+*.npz
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc

README.md CHANGED Viewed

@@ -1,14 +1,76 @@
 ---
-title: VisualizationForGeneralizedGaussianMixture
-emoji: 📈
-colorFrom: blue
-colorTo: pink
 sdk: streamlit
-sdk_version: 1.44.0
 app_file: app.py
 pinned: false
 license: apache-2.0
-short_description: Interactive visualization of Generalized Gaussian Mixture
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Generalized Gaussian Mixture Visualization
+emoji: 🔄
+colorFrom: indigo
+colorTo: blue
 sdk: streamlit
+sdk_version: 1.32.0
 app_file: app.py
 pinned: false
 license: apache-2.0
+short_description: 'Interactive visualization of Generalized Gaussian Mixture Distribution.'
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+# 广义高斯混合分布可视化
+## 可视化思路
+1. 页面布局：
+```plaintext
++-----------------+----------------------+
+|   参数侧边栏    |       主显示区域      |
+|  - 形状参数p    |  +--------+--------+ |
+|  - 分量数K      |  |        |        | |
+|  - 分量参数     |  |   3D   |  等高线 | |
+|                 |  | Surface | Plot  | |
++-----------------+  |        |        | |
+                    +--------+--------+ |
+                    |    参数说明       |
+                    +----------------+ |
+```
+2. 图表配置：
+- 左图：3D曲面图 (Surface Plot)
+  - X轴：第一维坐标
+  - Y轴：第二维坐标
+  - Z轴：概率密度值
+  - 使用viridis配色方案
+- 右图：等高线图 (Contour Plot)
+  - X轴：第一维坐标
+  - Y轴：第二维坐标
+  - 颜色：概率密度值
+  - 标记分量中心点
+3. Plotly配置要点：
+```python
+# 子图布局
+specs=[[{'type': 'surface'}, {'type': 'contour'}]]
+# 坐标轴配置
+scene=dict(  # 3D图的坐标轴
+    xaxis_title='X',
+    yaxis_title='Y',
+    zaxis_title='Density'
+)
+xaxis=dict(title='X'),  # 2D图X轴
+yaxis=dict(title='Y')   # 2D图Y轴
+```
+## 数据处理流程
+1. 参数处理
+- 基本参数：p(形状), K(分量数)
+- 每个分量：中心点、尺度、权重
+- 参数改变时实时更新
+2. 数据生成
+- 使用meshgrid生成网格点
+- 计算每个点的概率密度
+- 重塑数据以适配plotly格式
+3. 交互更新
+- 参数变化触发重新计算
+- 动态更新图表和说明

app.py ADDED Viewed

	@@ -0,0 +1,266 @@

+import streamlit as st
+import numpy as np
+from pathlib import Path
+from experiments.gmm_dataset import GeneralizedGaussianMixture
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+from typing import List, Tuple
+def init_session_state():
+    """初始化session state"""
+    if 'prev_K' not in st.session_state:
+        st.session_state.prev_K = 3
+    if 'p' not in st.session_state:
+        st.session_state.p = 2.0
+    if 'centers' not in st.session_state:
+        st.session_state.centers = np.array([[-2, -2], [0, 0], [2, 2]], dtype=np.float64)
+    if 'scales' not in st.session_state:
+        st.session_state.scales = np.array([[0.3, 0.3], [0.2, 0.2], [0.4, 0.4]], dtype=np.float64)
+    if 'weights' not in st.session_state:
+        st.session_state.weights = np.ones(3, dtype=np.float64) / 3
+    if 'sample_points' not in st.session_state:
+        st.session_state.sample_points = None
+def create_default_parameters(K: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """创建默认参数"""
+    # 在[-3, 3]范围内均匀生成K个中心点
+    x = np.linspace(-3, 3, K)
+    y = np.linspace(-3, 3, K)
+    centers = np.column_stack((x, y))
+    # 默认尺度和权重
+    scales = np.ones((K, 2), dtype=np.float64) * 3
+    weights = np.random.random(size=K).astype(np.float64)
+    weights /= weights.sum()  # 归一化权重
+    return centers, scales, weights
+def generate_latex_formula(p: float, K: int, centers: np.ndarray,
+                         scales: np.ndarray, weights: np.ndarray) -> str:
+    """生成LaTeX公式"""
+    formula = r"P(x) = \sum_{k=1}^{" + str(K) + r"} \pi_k P_{\theta_k}(x) \\"
+    formula += r"P_{\theta_k}(x) = \eta_k \exp(-s_k d_k(x)) = \frac{p}{2\alpha_k \Gamma(1/p) }\exp(-\frac{|x-c_k|^p}{\alpha_k^p})= \frac{p}{2\alpha_k \Gamma(1/p) }\exp(-|\frac{x-c_k}{\alpha_k}|^p) \\"
+    formula += r"\text{where: }"
+    for k in range(K):
+        c = centers[k]
+        s = scales[k]
+        w = weights[k]
+        component = f"P_{k+1}(x) = \\frac{{{p:.1f}}}{{2\\alpha_{k+1} \\Gamma(1/{p:.1f})}}\\exp(-|\\frac{{x-({c[0]:.1f}, {c[1]:.1f})}}{{{s[0]:.1f}, {s[1]:.1f}}}|^{{{p:.1f}}}) \\\\"
+        formula += component
+        formula += f"\\pi_{k+1} = {w:.2f} \\\\"
+    return formula
+st.set_page_config(page_title="GMM Distribution Visualization", layout="wide")
+st.title("广义高斯混合分布可视化")
+# 初始化session state
+init_session_state()
+# 侧边栏参数设置
+with st.sidebar:
+    st.header("分布参数")
+    # 分布基本参数
+    st.session_state.p = st.slider("形状参数 (p)", 0.1, 5.0, st.session_state.p, 0.1,
+                                 help="p=1: 拉普拉斯分布, p=2: 高斯分布, p→∞: 均匀分布")
+    K = st.slider("分量数 (K)", 1, 5, st.session_state.prev_K)
+    # 如果K发生变化，重新初始化参数
+    if K != st.session_state.prev_K:
+        centers, scales, weights = create_default_parameters(K)
+        st.session_state.centers = centers
+        st.session_state.scales = scales
+        st.session_state.weights = weights
+        st.session_state.prev_K = K
+    # 高级参数设置
+    st.subheader("高级设置")
+    show_advanced = st.checkbox("显示分量参数", value=False)
+    if show_advanced:
+        # 为每个分量设置参数
+        centers_list: List[List[float]] = []
+        scales_list: List[List[float]] = []
+        weights_list: List[float] = []
+        for k in range(K):
+            st.write(f"分量 {k+1}")
+            col1, col2 = st.columns(2)
+            with col1:
+                cx = st.number_input(f"中心X_{k+1}", -5.0, 5.0, float(st.session_state.centers[k][0]), 0.1)
+                cy = st.number_input(f"中心Y_{k+1}", -5.0, 5.0, float(st.session_state.centers[k][1]), 0.1)
+            with col2:
+                sx = st.number_input(f"尺度X_{k+1}", 0.1, 3.0, float(st.session_state.scales[k][0]), 0.1)
+                sy = st.number_input(f"尺度Y_{k+1}", 0.1, 3.0, float(st.session_state.scales[k][1]), 0.1)
+            w = st.slider(f"权重_{k+1}", 0.0, 1.0, float(st.session_state.weights[k]), 0.1)
+            centers_list.append([cx, cy])
+            scales_list.append([sx, sy])
+            weights_list.append(w)
+        centers = np.array(centers_list, dtype=np.float64)
+        scales = np.array(scales_list, dtype=np.float64)
+        weights = np.array(weights_list, dtype=np.float64)
+        weights = weights / weights.sum()
+        st.session_state.centers = centers
+        st.session_state.scales = scales
+        st.session_state.weights = weights
+    else:
+        centers = st.session_state.centers
+        scales = st.session_state.scales
+        weights = st.session_state.weights
+    # 采样设置
+    st.subheader("采样设置")
+    n_samples = st.slider("采样点数", 5, 20, 10)
+    if st.button("重新采样"):
+        # 生成随机样本
+        samples = []
+        for _ in range(n_samples):
+            # 选择分量
+            k = np.random.choice(K, p=weights)
+            # 从选定的分量生成样本
+            sample = np.random.normal(centers[k], scales[k], size=2)
+            samples.append(sample)
+        st.session_state.sample_points = np.array(samples)
+# 创建GMM数据集
+dataset = GeneralizedGaussianMixture(
+    D=2,
+    K=K,
+    p=st.session_state.p,
+    centers=centers[:K],
+    scales=scales[:K],
+    weights=weights[:K]
+)
+# 生成网格数据
+x = np.linspace(-5, 5, 100)
+y = np.linspace(-5, 5, 100)
+X, Y = np.meshgrid(x, y)
+xy = np.column_stack((X.ravel(), Y.ravel()))
+# 计算概率密度
+Z = dataset.pdf(xy).reshape(X.shape)
+# 创建2D和3D可视化
+fig = make_subplots(
+    rows=1, cols=2,
+    specs=[[{'type': 'surface'}, {'type': 'contour'}]],
+    subplot_titles=('3D概率密度曲面', '等高线图与分量中心')
+)
+# 3D Surface
+surface = go.Surface(
+    x=X, y=Y, z=Z,
+    colorscale='viridis',
+    showscale=True,
+    colorbar=dict(x=0.45)
+)
+fig.add_trace(surface, row=1, col=1)
+# Contour Plot with component centers
+contour = go.Contour(
+    x=x, y=y, z=Z,
+    colorscale='viridis',
+    showscale=True,
+    colorbar=dict(x=1.0),
+    contours=dict(
+        showlabels=True,
+        labelfont=dict(size=12)
+    )
+)
+fig.add_trace(contour, row=1, col=2)
+# 添加分量中心点
+fig.add_trace(
+    go.Scatter(
+        x=centers[:K, 0], y=centers[:K, 1],
+        mode='markers+text',
+        marker=dict(size=10, color='red'),
+        text=[f'C{i+1}' for i in range(K)],
+        textposition="top center",
+        name='分量中心'
+    ),
+    row=1, col=2
+)
+# 添加采样点（如果有）
+if st.session_state.sample_points is not None:
+    samples = st.session_state.sample_points
+    # 计算每个样本点的概率密度
+    probs = dataset.pdf(samples)
+    # 计算每个样本点属于每个分量的后验概率
+    posteriors = []
+    for sample in samples:
+        component_probs = [
+            weights[k] * np.exp(-np.sum(((sample - centers[k]) / scales[k])**st.session_state.p))
+            for k in range(K)
+        ]
+        total = sum(component_probs)
+        posteriors.append([p/total for p in component_probs])
+    # 添加样本点到图表
+    fig.add_trace(
+        go.Scatter(
+            x=samples[:, 0], y=samples[:, 1],
+            mode='markers+text',
+            marker=dict(
+                size=8,
+                color='yellow',
+                line=dict(color='black', width=1)
+            ),
+            text=[f'S{i+1}' for i in range(len(samples))],
+            textposition="bottom center",
+            name='采样点'
+        ),
+        row=1, col=2
+    )
+    # 显示样本点的概率信息
+    st.subheader("采样点信息")
+    for i, (sample, prob, post) in enumerate(zip(samples, probs, posteriors)):
+        st.write(f"样本点 S{i+1} ({sample[0]:.2f}, {sample[1]:.2f}):")
+        st.write(f"- 概率密度: {prob:.4f}")
+        st.write("- 后验概率:")
+        for k in range(K):
+            st.write(f"  - 分量 {k+1}: {post[k]:.4f}")
+        st.write("---")
+# 更新布局
+fig.update_layout(
+    title='广义高斯混合分布',
+    showlegend=True,
+    width=1200,
+    height=600,
+    scene=dict(
+        xaxis_title='X',
+        yaxis_title='Y',
+        zaxis_title='密度'
+    )
+)
+# 更新2D图的坐标轴
+fig.update_xaxes(title_text='X', row=1, col=2)
+fig.update_yaxes(title_text='Y', row=1, col=2)
+# 显示图形
+st.plotly_chart(fig, use_container_width=True)
+# 添加参数说明
+with st.expander("分布参数说明"):
+    st.markdown("""
+    - **形状参数 (p)**：控制分布的形状
+        - p = 1: 拉普拉斯分布
+        - p = 2: 高斯分布
+        - p → ∞: 均匀分布
+    - **分量参数**：每个分量由以下参数确定
+        - 中心 (μ): 峰值位置，通过X和Y坐标确定
+        - 尺度 (α): 分布的展宽程度，X和Y方向可不同
+        - 权重 (π): 混合系数，所有分量权重和为1
+    """)
+# 显示当前参数的数学公式
+st.latex(generate_latex_formula(st.session_state.p, K, centers[:K], scales[:K], weights[:K]))

experiments/gmm_dataset.py ADDED Viewed

	@@ -0,0 +1,190 @@

+import numpy as np
+from pathlib import Path
+from scipy.special import gamma
+from typing import Optional, Tuple, Dict, List, Union
+import torch
+import os
+class GeneralizedGaussianMixture:
+    r"""广义高斯混合分布数据集生成器
+    P_{\theta_k}(x_i) = \eta_k \exp(-s_k d_k(x_i)) = \frac{p}{2\alpha_k \Gamma(1/p)}\exp(-|\frac{x_i-c_k}{\alpha_k}|^p)
+    """
+    def __init__(self,
+                 D: int = 2,           # 维度
+                 K: int = 3,           # 聚类数量
+                 p: float = 2.0,       # 幂次，p=2为标准高斯分布
+                 centers: Optional[np.ndarray] = None,  # 聚类中心
+                 scales: Optional[np.ndarray] = None,   # 尺度参数
+                 weights: Optional[np.ndarray] = None,  # 混合权重
+                 seed: int = 42):      # 随机种子
+        """初始化GMM数据集生成器
+        Args:
+            D: 数据维度
+            K: 聚类数量
+            p: 幂次参数，控制分布的形状
+            centers: 聚类中心，形状为(K, D)
+            scales: 尺度参数，形状为(K, D)
+            weights: 混合权重，形状为(K,)
+            seed: 随机种子
+        """
+        self.D = D
+        self.K = K
+        self.p = p
+        self.seed = seed
+        np.random.seed(seed)
+        # 初始化分布参数
+        if centers is None:
+            self.centers = np.random.randn(K, D) * 2
+        else:
+            self.centers = centers
+        if scales is None:
+            self.scales = np.random.uniform(0.1, 0.5, size=(K, D))
+        else:
+            self.scales = scales
+        if weights is None:
+            self.weights = np.random.dirichlet(np.ones(K))
+        else:
+            self.weights = weights / weights.sum()  # 确保权重和为1
+    def component_pdf(self, x: np.ndarray, k: int) -> np.ndarray:
+        """计算第k个分量的概率密度
+        Args:
+            x: 输入数据点，形状为(N, D)
+            k: 分量索引
+        Returns:
+            概率密度值，形状为(N,)
+        """
+        # 计算归一化常数
+        norm_const = self.p / (2 * self.scales[k] * gamma(1/self.p))
+        # 计算|x_i - c_k|^p / α_k^p
+        z = np.abs(x - self.centers[k]) / self.scales[k]
+        exp_term = np.exp(-np.sum(z**self.p, axis=1))
+        return np.prod(norm_const) * exp_term
+    def pdf(self, x: np.ndarray) -> np.ndarray:
+        """计算混合分布的概率密度
+        Args:
+            x: 输入数据点，形状为(N, D)
+        Returns:
+            概率密度值，形状为(N,)
+        """
+        density = np.zeros(len(x))
+        for k in range(self.K):
+            density += self.weights[k] * self.component_pdf(x, k)
+        return density
+    def generate_component_samples(self, n: int, k: int) -> np.ndarray:
+        """从第k个分量生成样本
+        Args:
+            n: 样本数量
+            k: 分量索引
+        Returns:
+            样本点，形状为(n, D)
+        """
+        # 使用幂指数分布的反变换采样
+        u = np.random.uniform(-1, 1, size=(n, self.D))
+        r = np.abs(u) ** (1/self.p)
+        samples = self.centers[k] + self.scales[k] * np.sign(u) * r
+        return samples
+    def generate_samples(self, N: int) -> Tuple[np.ndarray, np.ndarray]:
+        """生成混合分布的样本
+        Args:
+            N: 总样本数量
+        Returns:
+            X: 生成的数据点，形状为(N, D)
+            y: 对应的概率密度值，形状为(N,)
+        """
+        # 根据混合权重确定每个分量的样本数量
+        n_samples = np.random.multinomial(N, self.weights)
+        # 从每个分量生成样本
+        samples = []
+        for k in range(self.K):
+            x = self.generate_component_samples(n_samples[k], k)
+            samples.append(x)
+        # 合并并打乱样本
+        X = np.vstack(samples)
+        idx = np.random.permutation(N)
+        X = X[idx]
+        # 计算概率密度
+        y = self.pdf(X)
+        return X, y
+    def save_dataset(self, save_dir: Union[str, Path], name: str = 'gmm_dataset') -> None:
+        """保存数据集到文件
+        Args:
+            save_dir: 保存目录
+            name: 数据集名称
+        """
+        save_path = Path(save_dir)
+        save_path.mkdir(parents=True, exist_ok=True)
+        # 生成并保存数据
+        X, y = self.generate_samples(N=1000)
+        np.savez(str(save_path / f'{name}.npz'),
+                 X=X, y=y,
+                 centers=self.centers,
+                 scales=self.scales,
+                 weights=self.weights,
+                 D=self.D,
+                 K=self.K,
+                 p=self.p)
+    @classmethod
+    def load_dataset(cls, file_path: Union[str, Path]) -> "GeneralizedGaussianMixture":
+        """从文件加载数据集
+        Args:
+            file_path: 数据文件路径
+        Returns:
+            加载的GMM对象
+        """
+        data = np.load(str(file_path))
+        return cls(
+            D=int(data['D']),
+            K=int(data['K']),
+            p=float(data['p']),
+            centers=data['centers'],
+            scales=data['scales'],
+            weights=data['weights']
+        )
+def test_gmm_dataset():
+    """测试GMM数据集生成器"""
+    # 创建2D的GMM数据集
+    gmm = GeneralizedGaussianMixture(
+        D=2,
+        K=3,
+        p=2.0,
+        centers=np.array([[-2, -2], [0, 0], [2, 2]]),
+        scales=np.array([[0.3, 0.3], [0.2, 0.2], [0.4, 0.4]]),
+        weights=np.array([0.3, 0.4, 0.3])
+    )
+    # 生成样本
+    X, y = gmm.generate_samples(1000)
+    # 保存数据集
+    gmm.save_dataset('test_data')
+    # 加载数据集
+    loaded_gmm = GeneralizedGaussianMixture.load_dataset('test_data/gmm_dataset.npz')
+    # 验证保存和加载的参数是否一致
+    assert np.allclose(gmm.centers, loaded_gmm.centers)
+    assert np.allclose(gmm.scales, loaded_gmm.scales)
+    assert np.allclose(gmm.weights, loaded_gmm.weights)
+    print("GMM数据集测试通过！")
+if __name__ == '__main__':
+    test_gmm_dataset()

experiments/gmm_fitting.py ADDED Viewed

	@@ -0,0 +1,157 @@

+import numpy as np
+import torch
+from sklearn.neural_network import MLPRegressor
+from pathlib import Path
+import sys
+import json
+import os
+import shutil
+from typing import Any, Optional
+# 添加pykan到Python路径
+repo_root = Path(__file__).parent.parent.parent
+sys.path.append(str(repo_root / 'pykan'))
+from kan import *
+# 针对gmm_dataset的导入，尝试不同的导入路径
+try:
+    from .gmm_dataset import GeneralizedGaussianMixture
+except ImportError:
+    from gmm_dataset import GeneralizedGaussianMixture
+def train_and_evaluate(dataset: GeneralizedGaussianMixture,
+                      save_dir: Path,
+                      kan_config: Optional[dict[str, Any]] = None,
+                      random_state: int = 42) -> dict[str, Any]:
+    """训练和评估不同模型"""
+    save_dir.mkdir(parents=True, exist_ok=True)
+    # 生成训练和测试数据
+    X_train, y_train = dataset.generate_samples(N=1000)
+    X_test, y_test = dataset.generate_samples(N=200)
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    torch.set_default_dtype(torch.float64)  # 设置为双精度
+    # 转换数据为PyTorch格式
+    train_data = {
+        'train_input': torch.FloatTensor(X_train).to(device),
+        'train_label': torch.FloatTensor(y_train).reshape(-1, 1).to(device),
+        'test_input': torch.FloatTensor(X_test).to(device),
+        'test_label': torch.FloatTensor(y_test).reshape(-1, 1).to(device)
+    }
+    # 保存训练数据
+    np.savez(save_dir / f'data_{random_state}.npz',
+             X_train=X_train, y_train=y_train,
+             X_test=X_test, y_test=y_test)
+    # 训练KAN
+    if kan_config is None:
+        kan_config = {
+            'width': [dataset.D, 5, 1],
+            'grid': 5,
+            'k': 3
+        }
+    # 确保device参数是字符串
+    kan_model = KAN(**kan_config, seed=random_state, device=str(device))
+    kan_model = kan_model.to(device)  # 确保模型在正确的设备上
+    results = kan_model.fit(train_data, opt="LBFGS", steps=50, lamb=0.001)
+    # 训练MLP
+    mlp = MLPRegressor(
+        hidden_layer_sizes=(10, 5),
+        max_iter=1000,
+        random_state=random_state
+    )
+    mlp.fit(X_train, y_train)
+    # 计算和保存预测结果
+    grid_x = np.linspace(X_train.min(), X_train.max(), 100)
+    grid_y = np.linspace(X_train.min(), X_train.max(), 100)
+    XX, YY = np.meshgrid(grid_x, grid_y)
+    grid_points = np.column_stack((XX.ravel(), YY.ravel()))
+    with torch.no_grad():
+        kan_pred = kan_model(torch.FloatTensor(grid_points).to(device)).cpu().numpy()
+        mlp_pred = mlp.predict(grid_points)
+        true_density = dataset.pdf(grid_points)
+        # 计算测试集RMSE
+        kan_test_rmse = np.sqrt(np.mean((kan_model(train_data['test_input']).cpu().numpy() - y_test.reshape(-1, 1))**2))
+        mlp_test_rmse = np.sqrt(np.mean((mlp.predict(X_test).reshape(-1, 1) - y_test.reshape(-1, 1))**2))
+    evaluation = {
+        'random_state': random_state,
+        'kan_test_rmse': float(kan_test_rmse),
+        'mlp_test_rmse': float(mlp_test_rmse),
+        'training_history': results
+    }
+    # 保存预测结果
+    np.savez(save_dir / f'predictions_{random_state}.npz',
+             grid_points=grid_points,
+             kan_pred=kan_pred,
+             mlp_pred=mlp_pred,
+             true_density=true_density)
+    # 保存评估结果
+    with open(save_dir / f'evaluation_{random_state}.json', 'w') as f:
+        json.dump(evaluation, f)
+    return evaluation
+def run_experiments(save_dir: Path, n_experiments: int = 5) -> dict[str, float]:
+    """进行多次随机实验"""
+    save_dir.mkdir(parents=True, exist_ok=True)
+    all_results = []
+    base_seed = 42
+    for i in range(n_experiments):
+        print(f"Running experiment {i+1}/{n_experiments}")
+        random_state = base_seed + i
+        # 创建数据集
+        dataset = GeneralizedGaussianMixture(
+            D=2,
+            K=3,
+            p=2.0,
+            centers=np.array([[-2, -2], [0, 0], [2, 2]]),
+            scales=np.array([[0.3, 0.3], [0.2, 0.2], [0.4, 0.4]]),
+            weights=np.array([0.3, 0.4, 0.3]),
+            seed=random_state
+        )
+        # 训练和评估
+        result = train_and_evaluate(dataset, save_dir / str(random_state), random_state=random_state)
+        all_results.append(result)
+    # 保存所有结果
+    with open(save_dir / 'all_results.json', 'w') as f:
+        json.dump(all_results, f)
+    # 计算统计量
+    kan_rmses = [r['kan_test_rmse'] for r in all_results]
+    mlp_rmses = [r['mlp_test_rmse'] for r in all_results]
+    statistics = {
+        'kan_mean_rmse': float(np.mean(kan_rmses)),
+        'kan_std_rmse': float(np.std(kan_rmses)),
+        'mlp_mean_rmse': float(np.mean(mlp_rmses)),
+        'mlp_std_rmse': float(np.std(mlp_rmses)),
+    }
+    with open(save_dir / 'statistics.json', 'w') as f:
+        json.dump(statistics, f)
+    return statistics
+if __name__ == '__main__':
+    # 使用相对路径，保存在experiments/results目录下
+    results_dir = Path(__file__).parent / 'results'
+    stats = run_experiments(results_dir)
+    print("\nExperiment Statistics:")
+    print(f"KAN Test RMSE: {stats['kan_mean_rmse']:.4f} ± {stats['kan_std_rmse']:.4f}")
+    print(f"MLP Test RMSE: {stats['mlp_mean_rmse']:.4f} ± {stats['mlp_std_rmse']:.4f}")

experiments/test.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import plotly.graph_objects as go
+# 示例数据（请替换为实际数据）
+methods = ['RSRM', 'PSRN', 'NGGP', 'PySR', 'BMS', 'uDSR', 'AIF',
+          'DGSR', 'E2E', 'SymINDy', 'PhySO', 'TPSR', 'SPL',
+          'DEAP', 'SINDy', 'NSRS', 'gplearn', 'SNIP', 'KAN', 'EQL']
+recovery_rates = [85, 78, 92, 88, 76, 83, 95, 81, 89, 77, 84, 86, 80,
+                79, 82, 87, 75, 88, 90, 84]  # 恢复率百分比
+errors = [3, 4, 2, 3, 5, 2, 1, 3, 2, 4, 3, 2, 3, 4, 2, 3, 5, 2, 3, 2]  # 误差范围
+# 创建图形对象
+fig = go.Figure()
+# 添加带误差线的数据点
+fig.add_trace(go.Scatter(
+    x=recovery_rates,
+    y=methods,
+    mode='markers',
+    error_x=dict(
+        type='data',
+        array=errors,
+        visible=True,
+        color='#FF5733',
+        thickness=2,
+        width=10
+    ),
+    marker=dict(
+        size=12,
+        color=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',
+              '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf',
+              '#aec7e8', '#ffbb78', '#98df8a', '#ff9896', '#c5b0d5',
+              '#c49c94', '#f7b6d2', '#c7c7c7', '#dbdb8d', '#9edae5'],
+        opacity=0.8
+    )
+))
+# 设置布局
+fig.update_layout(
+    title='不同方法的恢复率比较',
+    xaxis=dict(
+        title='恢复率 (%)',
+        range=[0, 100],
+        dtick=20,
+        title_standoff=25
+    ),
+    yaxis=dict(
+        title='Methods',
+        title_font=dict(size=14),
+        tickfont=dict(size=12),
+        autorange="reversed"  # 使第一个方法显示在最上方
+    ),
+    hovermode='closest',
+    width=1000,
+    height=600,
+    showlegend=False
+)
+# 添加注释（可选）
+fig.add_annotation(
+    x=0,
+    y=0.95,
+    xref='paper',
+    yref='paper',
+    text='知乎 @x66ccff',
+    showarrow=False,
+    font=dict(size=10)
+)
+fig.show()

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+streamlit>=1.32.0
+numpy>=1.21.0
+pandas>=1.3.0
+plotly>=5.18.0
+scipy>=1.7.0
+torch>=1.9.0
+scikit-learn>=1.0.0
+ipython>=8.0.0
+ipywidgets>=7.0.0
+nbformat>=5.0.0
+sympy>=1.8