# /// script # requires-python = ">=3.12" # dependencies = [ # "ipython==9.0.2", # "ipywidgets==8.1.5", # "marimo", # "numpy==2.1.3", # "pandas==2.2.3", # "scikit-learn==1.6.1", # "setuptools==78.1.0", # "ydata-profiling==4.16.1", # ] # /// import marimo __generated_with = "0.12.9" app = marimo.App(width="medium") @app.cell def _(): import marimo as mo with mo.status.spinner("Importing libraries..."): import numpy as np import pandas as pd from ydata_profiling import ProfileReport from sklearn import datasets return ProfileReport, datasets, mo, np, pd @app.cell def _(mo): mo.md( r""" # On-the-fly YData Profiling It is often desirable to have a quick EDA report when you receive a fresh CSV. [YData Profiling](https://docs.profiling.ydata.ai/latest/) is a useful tool to streamline that tool. This small [marimo](https://marimo.io) app enables you to drag-and-drop a CSV and generate an HTML report that you can visualise in your browser or download as an HTML report. **Note: No data gets stored when you used this service** """ ) return @app.cell def _(mo, try_default_df): uploaded_file = mo.ui.file( filetypes=[".csv"], kind='area', label = "Drag and drop a CSV file here, or click to open file browser" ) if not try_default_df.value else None uploaded_file return (uploaded_file,) @app.cell def _(mo): try_default_df = mo.ui.switch(label="Try default dataset ([`diabetes` from scikit-learn](https://scikit-learn.org/stable/datasets/toy_dataset.html#diabetes-dataset))") try_default_df return (try_default_df,) @app.cell def _(mo): minimal = mo.ui.checkbox(True, label="Minimal profiling") minimal return (minimal,) @app.cell def _(datasets, mo, pd, try_default_df, uploaded_file): should_stop = not try_default_df.value and len(uploaded_file.value) == 0 mo.stop(should_stop , mo.callout("⬆️ Please choose a source dataframe above!")) if try_default_df.value: diabetes = datasets.load_diabetes() df = pd.DataFrame(diabetes.data, columns=diabetes.feature_names) else: df = pd.read_csv(uploaded_file.value[0].contents) return df, diabetes, should_stop @app.cell def _(mo, profile): mo.download( data=profile.html, filename="ydata.html", mimetype="text/html", label = "Download YData Profile" ) return @app.cell def _(ProfileReport, df, minimal, mo): with mo.status.spinner("Generating YData Profile report..."): profile = ProfileReport(df,minimal = minimal.value, title="YData Profiling Report") profile.to_notebook_iframe() return (profile,) if __name__ == "__main__": app.run()