Spaces:

lucharo
/

ydata-profiling-marimo

Running

File size: 2,829 Bytes

# /// script
# requires-python = ">=3.12"
# dependencies = [
#     "ipython==9.0.2",
#     "ipywidgets==8.1.5",
#     "marimo",
#     "numpy==2.1.3",
#     "pandas==2.2.3",
#     "scikit-learn==1.6.1",
#     "setuptools==78.1.0",
#     "ydata-profiling==4.16.1",
# ]
# ///

import marimo

__generated_with = "0.12.9"
app = marimo.App(width="medium")


@app.cell
def _():
    import marimo as mo
    with mo.status.spinner("Importing libraries..."):
        import numpy as np
        import pandas as pd
        from ydata_profiling import ProfileReport 
        from sklearn import datasets
    return ProfileReport, datasets, mo, np, pd


@app.cell
def _(mo):
    mo.md(
        r"""
        # On-the-fly YData Profiling 

        It is often desirable to have a quick EDA report when you receive a fresh CSV. [YData Profiling](https://docs.profiling.ydata.ai/latest/) is a useful tool to streamline that tool. This small [marimo](https://marimo.io) app enables you to drag-and-drop a CSV and generate an HTML report that you can visualise in your browser or download as an HTML report. 

        **Note: No data gets stored when you used this service**
        """
    )
    return


@app.cell
def _(mo, try_default_df):
    uploaded_file = mo.ui.file(
        filetypes=[".csv"], 
        kind='area', 
        label = "Drag and drop a CSV file here, or click to open file browser"
    ) if not try_default_df.value else None
    uploaded_file
    return (uploaded_file,)


@app.cell
def _(mo):
    try_default_df = mo.ui.switch(label="Try default dataset ([`diabetes` from scikit-learn](https://scikit-learn.org/stable/datasets/toy_dataset.html#diabetes-dataset))")
    try_default_df
    return (try_default_df,)


@app.cell
def _(mo):
    minimal = mo.ui.checkbox(True, label="Minimal profiling")
    minimal
    return (minimal,)


@app.cell
def _(datasets, mo, pd, try_default_df, uploaded_file):
    should_stop = not try_default_df.value and len(uploaded_file.value) == 0
    mo.stop(should_stop , mo.callout("⬆️ Please choose a source dataframe above!"))

    if try_default_df.value:
        diabetes = datasets.load_diabetes()
        df = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
    else:
        df = pd.read_csv(uploaded_file.value[0].contents)
    return df, diabetes, should_stop


@app.cell
def _(mo, profile):
    mo.download(
        data=profile.html,
        filename="ydata.html",
        mimetype="text/html",
        label = "Download YData Profile"
    )
    return


@app.cell
def _(ProfileReport, df, minimal, mo):
    with mo.status.spinner("Generating YData Profile report..."):
        profile = ProfileReport(df,minimal = minimal.value, title="YData Profiling Report")
        profile.to_notebook_iframe()
    return (profile,)


if __name__ == "__main__":
    app.run()