Spaces:

lucharo
/

ydata-profiling-marimo

Running

Luis Chaves Rodriguez

added dev make statement pinning python version and link to diabetes df dataset

17fec11 12 days ago

2.83 kB

	# /// script
	# requires-python = ">=3.12"
	# dependencies = [
	# "ipython==9.0.2",
	# "ipywidgets==8.1.5",
	# "marimo",
	# "numpy==2.1.3",
	# "pandas==2.2.3",
	# "scikit-learn==1.6.1",
	# "setuptools==78.1.0",
	# "ydata-profiling==4.16.1",
	# ]
	# ///

	import marimo

	__generated_with = "0.12.9"
	app = marimo.App(width="medium")


	@app.cell
	def _():
	import marimo as mo
	with mo.status.spinner("Importing libraries..."):
	import numpy as np
	import pandas as pd
	from ydata_profiling import ProfileReport
	from sklearn import datasets
	return ProfileReport, datasets, mo, np, pd


	@app.cell
	def _(mo):
	mo.md(
	r"""
	# On-the-fly YData Profiling

	It is often desirable to have a quick EDA report when you receive a fresh CSV. [YData Profiling](https://docs.profiling.ydata.ai/latest/) is a useful tool to streamline that tool. This small [marimo](https://marimo.io) app enables you to drag-and-drop a CSV and generate an HTML report that you can visualise in your browser or download as an HTML report.

	Note: No data gets stored when you used this service
	"""
	)
	return


	@app.cell
	def _(mo, try_default_df):
	uploaded_file = mo.ui.file(
	filetypes=[".csv"],
	kind='area',
	label = "Drag and drop a CSV file here, or click to open file browser"
	) if not try_default_df.value else None
	uploaded_file
	return (uploaded_file,)


	@app.cell
	def _(mo):
	try_default_df = mo.ui.switch(label="Try default dataset ([`diabetes` from scikit-learn](https://scikit-learn.org/stable/datasets/toy_dataset.html#diabetes-dataset))")
	try_default_df
	return (try_default_df,)


	@app.cell
	def _(mo):
	minimal = mo.ui.checkbox(True, label="Minimal profiling")
	minimal
	return (minimal,)


	@app.cell
	def _(datasets, mo, pd, try_default_df, uploaded_file):
	should_stop = not try_default_df.value and len(uploaded_file.value) == 0
	mo.stop(should_stop , mo.callout("⬆️ Please choose a source dataframe above!"))

	if try_default_df.value:
	diabetes = datasets.load_diabetes()
	df = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
	else:
	df = pd.read_csv(uploaded_file.value[0].contents)
	return df, diabetes, should_stop


	@app.cell
	def _(mo, profile):
	mo.download(
	data=profile.html,
	filename="ydata.html",
	mimetype="text/html",
	label = "Download YData Profile"
	)
	return


	@app.cell
	def _(ProfileReport, df, minimal, mo):
	with mo.status.spinner("Generating YData Profile report..."):
	profile = ProfileReport(df,minimal = minimal.value, title="YData Profiling Report")
	profile.to_notebook_iframe()
	return (profile,)


	if __name__ == "__main__":
	app.run()