File size: 2,829 Bytes
3039071 ec2a227 17fec11 3039071 ec2a227 3039071 ec2a227 3039071 ec2a227 3039071 ec2a227 3039071 ec2a227 3039071 ec2a227 3039071 ec2a227 3039071 ec2a227 3039071 17fec11 3039071 ec2a227 3039071 ec2a227 3039071 ec2a227 3039071 ec2a227 3039071 ec2a227 3039071 ec2a227 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
# /// script
# requires-python = ">=3.12"
# dependencies = [
# "ipython==9.0.2",
# "ipywidgets==8.1.5",
# "marimo",
# "numpy==2.1.3",
# "pandas==2.2.3",
# "scikit-learn==1.6.1",
# "setuptools==78.1.0",
# "ydata-profiling==4.16.1",
# ]
# ///
import marimo
__generated_with = "0.12.9"
app = marimo.App(width="medium")
@app.cell
def _():
import marimo as mo
with mo.status.spinner("Importing libraries..."):
import numpy as np
import pandas as pd
from ydata_profiling import ProfileReport
from sklearn import datasets
return ProfileReport, datasets, mo, np, pd
@app.cell
def _(mo):
mo.md(
r"""
# On-the-fly YData Profiling
It is often desirable to have a quick EDA report when you receive a fresh CSV. [YData Profiling](https://docs.profiling.ydata.ai/latest/) is a useful tool to streamline that tool. This small [marimo](https://marimo.io) app enables you to drag-and-drop a CSV and generate an HTML report that you can visualise in your browser or download as an HTML report.
**Note: No data gets stored when you used this service**
"""
)
return
@app.cell
def _(mo, try_default_df):
uploaded_file = mo.ui.file(
filetypes=[".csv"],
kind='area',
label = "Drag and drop a CSV file here, or click to open file browser"
) if not try_default_df.value else None
uploaded_file
return (uploaded_file,)
@app.cell
def _(mo):
try_default_df = mo.ui.switch(label="Try default dataset ([`diabetes` from scikit-learn](https://scikit-learn.org/stable/datasets/toy_dataset.html#diabetes-dataset))")
try_default_df
return (try_default_df,)
@app.cell
def _(mo):
minimal = mo.ui.checkbox(True, label="Minimal profiling")
minimal
return (minimal,)
@app.cell
def _(datasets, mo, pd, try_default_df, uploaded_file):
should_stop = not try_default_df.value and len(uploaded_file.value) == 0
mo.stop(should_stop , mo.callout("⬆️ Please choose a source dataframe above!"))
if try_default_df.value:
diabetes = datasets.load_diabetes()
df = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
else:
df = pd.read_csv(uploaded_file.value[0].contents)
return df, diabetes, should_stop
@app.cell
def _(mo, profile):
mo.download(
data=profile.html,
filename="ydata.html",
mimetype="text/html",
label = "Download YData Profile"
)
return
@app.cell
def _(ProfileReport, df, minimal, mo):
with mo.status.spinner("Generating YData Profile report..."):
profile = ProfileReport(df,minimal = minimal.value, title="YData Profiling Report")
profile.to_notebook_iframe()
return (profile,)
if __name__ == "__main__":
app.run()
|