Luis Chaves Rodriguez
added dev make statement pinning python version and link to diabetes df dataset
17fec11
# /// script | |
# requires-python = ">=3.12" | |
# dependencies = [ | |
# "ipython==9.0.2", | |
# "ipywidgets==8.1.5", | |
# "marimo", | |
# "numpy==2.1.3", | |
# "pandas==2.2.3", | |
# "scikit-learn==1.6.1", | |
# "setuptools==78.1.0", | |
# "ydata-profiling==4.16.1", | |
# ] | |
# /// | |
import marimo | |
__generated_with = "0.12.9" | |
app = marimo.App(width="medium") | |
def _(): | |
import marimo as mo | |
with mo.status.spinner("Importing libraries..."): | |
import numpy as np | |
import pandas as pd | |
from ydata_profiling import ProfileReport | |
from sklearn import datasets | |
return ProfileReport, datasets, mo, np, pd | |
def _(mo): | |
mo.md( | |
r""" | |
# On-the-fly YData Profiling | |
It is often desirable to have a quick EDA report when you receive a fresh CSV. [YData Profiling](https://docs.profiling.ydata.ai/latest/) is a useful tool to streamline that tool. This small [marimo](https://marimo.io) app enables you to drag-and-drop a CSV and generate an HTML report that you can visualise in your browser or download as an HTML report. | |
**Note: No data gets stored when you used this service** | |
""" | |
) | |
return | |
def _(mo, try_default_df): | |
uploaded_file = mo.ui.file( | |
filetypes=[".csv"], | |
kind='area', | |
label = "Drag and drop a CSV file here, or click to open file browser" | |
) if not try_default_df.value else None | |
uploaded_file | |
return (uploaded_file,) | |
def _(mo): | |
try_default_df = mo.ui.switch(label="Try default dataset ([`diabetes` from scikit-learn](https://scikit-learn.org/stable/datasets/toy_dataset.html#diabetes-dataset))") | |
try_default_df | |
return (try_default_df,) | |
def _(mo): | |
minimal = mo.ui.checkbox(True, label="Minimal profiling") | |
minimal | |
return (minimal,) | |
def _(datasets, mo, pd, try_default_df, uploaded_file): | |
should_stop = not try_default_df.value and len(uploaded_file.value) == 0 | |
mo.stop(should_stop , mo.callout("⬆️ Please choose a source dataframe above!")) | |
if try_default_df.value: | |
diabetes = datasets.load_diabetes() | |
df = pd.DataFrame(diabetes.data, columns=diabetes.feature_names) | |
else: | |
df = pd.read_csv(uploaded_file.value[0].contents) | |
return df, diabetes, should_stop | |
def _(mo, profile): | |
mo.download( | |
data=profile.html, | |
filename="ydata.html", | |
mimetype="text/html", | |
label = "Download YData Profile" | |
) | |
return | |
def _(ProfileReport, df, minimal, mo): | |
with mo.status.spinner("Generating YData Profile report..."): | |
profile = ProfileReport(df,minimal = minimal.value, title="YData Profiling Report") | |
profile.to_notebook_iframe() | |
return (profile,) | |
if __name__ == "__main__": | |
app.run() | |