File size: 2,829 Bytes
3039071
 
 
 
 
 
 
 
 
 
 
 
 
 
ec2a227
 
17fec11
3039071
ec2a227
 
 
3039071
ec2a227
3039071
 
 
 
 
 
ec2a227
 
 
3039071
ec2a227
3039071
 
ec2a227
3039071
ec2a227
3039071
ec2a227
 
 
 
 
 
3039071
 
 
 
 
 
 
 
ec2a227
 
 
3039071
17fec11
3039071
 
ec2a227
 
 
3039071
 
 
 
ec2a227
 
 
3039071
 
 
ec2a227
3039071
 
 
 
 
 
ec2a227
 
 
3039071
 
 
 
 
 
ec2a227
 
 
 
 
3039071
 
 
 
 
ec2a227
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# /// script
# requires-python = ">=3.12"
# dependencies = [
#     "ipython==9.0.2",
#     "ipywidgets==8.1.5",
#     "marimo",
#     "numpy==2.1.3",
#     "pandas==2.2.3",
#     "scikit-learn==1.6.1",
#     "setuptools==78.1.0",
#     "ydata-profiling==4.16.1",
# ]
# ///

import marimo

__generated_with = "0.12.9"
app = marimo.App(width="medium")


@app.cell
def _():
    import marimo as mo
    with mo.status.spinner("Importing libraries..."):
        import numpy as np
        import pandas as pd
        from ydata_profiling import ProfileReport 
        from sklearn import datasets
    return ProfileReport, datasets, mo, np, pd


@app.cell
def _(mo):
    mo.md(
        r"""
        # On-the-fly YData Profiling 

        It is often desirable to have a quick EDA report when you receive a fresh CSV. [YData Profiling](https://docs.profiling.ydata.ai/latest/) is a useful tool to streamline that tool. This small [marimo](https://marimo.io) app enables you to drag-and-drop a CSV and generate an HTML report that you can visualise in your browser or download as an HTML report. 

        **Note: No data gets stored when you used this service**
        """
    )
    return


@app.cell
def _(mo, try_default_df):
    uploaded_file = mo.ui.file(
        filetypes=[".csv"], 
        kind='area', 
        label = "Drag and drop a CSV file here, or click to open file browser"
    ) if not try_default_df.value else None
    uploaded_file
    return (uploaded_file,)


@app.cell
def _(mo):
    try_default_df = mo.ui.switch(label="Try default dataset ([`diabetes` from scikit-learn](https://scikit-learn.org/stable/datasets/toy_dataset.html#diabetes-dataset))")
    try_default_df
    return (try_default_df,)


@app.cell
def _(mo):
    minimal = mo.ui.checkbox(True, label="Minimal profiling")
    minimal
    return (minimal,)


@app.cell
def _(datasets, mo, pd, try_default_df, uploaded_file):
    should_stop = not try_default_df.value and len(uploaded_file.value) == 0
    mo.stop(should_stop , mo.callout("⬆️ Please choose a source dataframe above!"))

    if try_default_df.value:
        diabetes = datasets.load_diabetes()
        df = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
    else:
        df = pd.read_csv(uploaded_file.value[0].contents)
    return df, diabetes, should_stop


@app.cell
def _(mo, profile):
    mo.download(
        data=profile.html,
        filename="ydata.html",
        mimetype="text/html",
        label = "Download YData Profile"
    )
    return


@app.cell
def _(ProfileReport, df, minimal, mo):
    with mo.status.spinner("Generating YData Profile report..."):
        profile = ProfileReport(df,minimal = minimal.value, title="YData Profiling Report")
        profile.to_notebook_iframe()
    return (profile,)


if __name__ == "__main__":
    app.run()