Luis Chaves Rodriguez
added dev make statement pinning python version and link to diabetes df dataset
17fec11
# /// script
# requires-python = ">=3.12"
# dependencies = [
# "ipython==9.0.2",
# "ipywidgets==8.1.5",
# "marimo",
# "numpy==2.1.3",
# "pandas==2.2.3",
# "scikit-learn==1.6.1",
# "setuptools==78.1.0",
# "ydata-profiling==4.16.1",
# ]
# ///
import marimo
__generated_with = "0.12.9"
app = marimo.App(width="medium")
@app.cell
def _():
import marimo as mo
with mo.status.spinner("Importing libraries..."):
import numpy as np
import pandas as pd
from ydata_profiling import ProfileReport
from sklearn import datasets
return ProfileReport, datasets, mo, np, pd
@app.cell
def _(mo):
mo.md(
r"""
# On-the-fly YData Profiling
It is often desirable to have a quick EDA report when you receive a fresh CSV. [YData Profiling](https://docs.profiling.ydata.ai/latest/) is a useful tool to streamline that tool. This small [marimo](https://marimo.io) app enables you to drag-and-drop a CSV and generate an HTML report that you can visualise in your browser or download as an HTML report.
**Note: No data gets stored when you used this service**
"""
)
return
@app.cell
def _(mo, try_default_df):
uploaded_file = mo.ui.file(
filetypes=[".csv"],
kind='area',
label = "Drag and drop a CSV file here, or click to open file browser"
) if not try_default_df.value else None
uploaded_file
return (uploaded_file,)
@app.cell
def _(mo):
try_default_df = mo.ui.switch(label="Try default dataset ([`diabetes` from scikit-learn](https://scikit-learn.org/stable/datasets/toy_dataset.html#diabetes-dataset))")
try_default_df
return (try_default_df,)
@app.cell
def _(mo):
minimal = mo.ui.checkbox(True, label="Minimal profiling")
minimal
return (minimal,)
@app.cell
def _(datasets, mo, pd, try_default_df, uploaded_file):
should_stop = not try_default_df.value and len(uploaded_file.value) == 0
mo.stop(should_stop , mo.callout("⬆️ Please choose a source dataframe above!"))
if try_default_df.value:
diabetes = datasets.load_diabetes()
df = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
else:
df = pd.read_csv(uploaded_file.value[0].contents)
return df, diabetes, should_stop
@app.cell
def _(mo, profile):
mo.download(
data=profile.html,
filename="ydata.html",
mimetype="text/html",
label = "Download YData Profile"
)
return
@app.cell
def _(ProfileReport, df, minimal, mo):
with mo.status.spinner("Generating YData Profile report..."):
profile = ProfileReport(df,minimal = minimal.value, title="YData Profiling Report")
profile.to_notebook_iframe()
return (profile,)
if __name__ == "__main__":
app.run()