PAOLO BRICARELLO
Update app.py
747c55e verified
raw
history blame
14.9 kB
import marimo
__generated_with = "0.12.0"
app = marimo.App(width="medium")
@app.cell
def _():
import marimo as mo
import pandas as pd
from svg import SVG, G, Circle, Path, Title, Rect, Line, Polygon, Text
import numpy as np
from collections import Counter
return (
Circle,
Counter,
G,
Line,
Path,
Polygon,
Rect,
SVG,
Text,
Title,
mo,
np,
pd,
)
@app.cell
def _(pd):
#Data import
stage_data= pd.read_csv("C:/Users/paolo/Desktop/data visualisation project/stage_data.csv")
tdf_stages= pd.read_csv("C:/Users/paolo/Desktop/data visualisation project/tdf_stages.csv")
tdf_winners= pd.read_csv("C:/Users/paolo/Desktop/data visualisation project/tdf_winners.csv")
return stage_data, tdf_stages, tdf_winners
@app.cell
def _(mo, pd, tdf_stages):
#Text blocks to insert the initial and final date used to filter the database
tdf_stages['Date'] = pd.to_datetime(tdf_stages['Date'], errors='coerce')
start_date_input = mo.ui.text(
label="Initial Date (YYYY-MM-DD)",
value=tdf_stages["Date"].min().strftime("%Y-%m-%d")
)
end_date_input = mo.ui.text(
label="Final date (YYYY-MM-DD)",
value=tdf_stages["Date"].max().strftime("%Y-%m-%d")
)
return end_date_input, start_date_input
@app.cell
def _(end_date_input, mo, start_date_input):
#show the text blocks in the output
mo.hstack(
[start_date_input, end_date_input], justify="start"
)
return
@app.cell
def _(mo):
#Slider to select the circles radius
radius =mo.ui.slider(start=1, stop=10, step=1, value=5, label="radius")
return (radius,)
@app.cell
def _(Counter, end_date_input, start_date_input, tdf_stages):
# Group the "type" variable in four groups
macro_class_mapping = {
'Flat cobblestone stage': 'Plain',
'Flat stage': 'Plain',
'Flat Stage': 'Plain',
'Half Stage': 'Other',
'High mountain stage': 'Mountain',
'Hilly stage': 'Hill',
'Individual time trial': 'Chrono',
'Intermediate stage': 'Other',
'Medium mountain stage': 'Mountain',
'Mountain stage': 'Mountain',
'Mountain Stage': 'Mountain',
'Mountain time trial': 'Chrono',
'Plain stage': 'Plain',
'Plain stage with cobblestones': 'Plain',
'Stage with mountain': 'Mountain',
'Stage with mountain(s)': 'Mountain',
'Team time trial': 'Chrono',
'Transition stage': 'Other'
}
tdf_stages['macro_class'] = tdf_stages['Type'].map(macro_class_mapping)
# Filter the database using the data inserted in the Text Blocks
start_date = start_date_input.value
end_date = end_date_input.value
filtered_df = tdf_stages[(tdf_stages['Date'] >= start_date) & (tdf_stages['Date'] <= end_date)]
# Find the 3 countries with most winning for each Type of stage
all_macro_classes = ['Plain', 'Hill', 'Mountain', 'Chrono']
macro_class_top3 = {}
for macro_class in all_macro_classes:
if macro_class in filtered_df['macro_class'].unique():
group = filtered_df[filtered_df['macro_class'] == macro_class]
top_countries = Counter(group['Winner_Country']).most_common(3)
macro_class_top3[macro_class] = [country for country, _ in top_countries]
while len(macro_class_top3[macro_class]) < 3:
macro_class_top3[macro_class].append("NA")
else:
macro_class_top3[macro_class] =[' ', ' ', ' ']
print(macro_class_top3)
return (
all_macro_classes,
end_date,
filtered_df,
group,
macro_class,
macro_class_mapping,
macro_class_top3,
start_date,
top_countries,
)
@app.cell
def _(tdf_winners):
##Scale function for flat stages
X_MIN, X_MAX = tdf_winners["height"].min(), tdf_winners["height"].max()
Y_MIN, Y_MAX = tdf_winners["weight"].min(), tdf_winners["weight"].max()
SVG_X_MIN_P, SVG_X_MAX_P = 60, 150
SVG_Y_MIN_P, SVG_Y_MAX_P = 280, 470
def scale_x_plain(x):
return SVG_X_MIN_P + (x - X_MIN) / (X_MAX - X_MIN) * (SVG_X_MAX_P - SVG_X_MIN_P)
def scale_y_plain(y):
return SVG_Y_MAX_P - (y - Y_MIN) / (Y_MAX - Y_MIN) * (SVG_Y_MAX_P - SVG_Y_MIN_P)
return (
SVG_X_MAX_P,
SVG_X_MIN_P,
SVG_Y_MAX_P,
SVG_Y_MIN_P,
X_MAX,
X_MIN,
Y_MAX,
Y_MIN,
scale_x_plain,
scale_y_plain,
)
@app.cell
def _(macro_class_top3, scale_x_plain, scale_y_plain, tdf_winners):
## datapoints for plain stages
Cplain=str(macro_class_top3['Plain'][0])
datapoints_plain = []
for i in range(106):
if Cplain.capitalize() in tdf_winners["birth_country"][i] :
datapoints_plain.append(
{
'x': scale_x_plain(tdf_winners["height"][i]),
'y': scale_y_plain(tdf_winners["weight"][i]),
'data-id': i
})
print(Cplain)
print(datapoints_plain)
return Cplain, datapoints_plain, i
@app.cell
def _(X_MAX, X_MIN, Y_MAX, Y_MIN):
##Scale function for hilly stages
SVG_X_MIN_C, SVG_X_MAX_C = 210, 300
SVG_Y_MIN_C, SVG_Y_MAX_C = 280, 470
def scale_x_hill(x):
return SVG_X_MIN_C + (x - X_MIN) / (X_MAX - X_MIN) * (SVG_X_MAX_C - SVG_X_MIN_C)
def scale_y_hill(y):
return SVG_Y_MAX_C - (y - Y_MIN) / (Y_MAX - Y_MIN) * (SVG_Y_MAX_C - SVG_Y_MIN_C)
return (
SVG_X_MAX_C,
SVG_X_MIN_C,
SVG_Y_MAX_C,
SVG_Y_MIN_C,
scale_x_hill,
scale_y_hill,
)
@app.cell
def _(i, macro_class_top3, scale_x_hill, scale_y_hill, tdf_winners):
## datapoints for hilly stages
Chill=str(macro_class_top3['Hill'][0])
datapoints_hill = []
for j in range(106):
if Chill.capitalize() in tdf_winners["birth_country"][j]:
datapoints_hill.append(
{
'x': scale_x_hill(tdf_winners["height"][j]),
'y': scale_y_hill(tdf_winners["weight"][j]),
'data-id': i
})
return Chill, datapoints_hill, j
@app.cell
def _(X_MAX, X_MIN, Y_MAX, Y_MIN):
##Scale function for Mountain stages
SVG_X_MIN_M, SVG_X_MAX_M = 360, 450
SVG_Y_MIN_M, SVG_Y_MAX_M = 280, 470
def scale_x_Mountain(x):
return SVG_X_MIN_M + (x - X_MIN) / (X_MAX - X_MIN) * (SVG_X_MAX_M - SVG_X_MIN_M)
def scale_y_Mountain(y):
return SVG_Y_MAX_M - (y - Y_MIN) / (Y_MAX - Y_MIN) * (SVG_Y_MAX_M - SVG_Y_MIN_M)
return (
SVG_X_MAX_M,
SVG_X_MIN_M,
SVG_Y_MAX_M,
SVG_Y_MIN_M,
scale_x_Mountain,
scale_y_Mountain,
)
@app.cell
def _(macro_class_top3, scale_x_Mountain, scale_y_Mountain, tdf_winners):
## datapoints for Mountain stages
CMountain=str(macro_class_top3['Mountain'][0])
datapoints_Mountain = []
for k in range(106):
if CMountain.capitalize() in tdf_winners["birth_country"][k]:
datapoints_Mountain.append(
{
'x': scale_x_Mountain(tdf_winners["height"][k]),
'y': scale_y_Mountain(tdf_winners["weight"][k]),
'data-id': k
})
return CMountain, datapoints_Mountain, k
@app.cell
def _(X_MAX, X_MIN, Y_MAX, Y_MIN):
##Scale function for Chrono stages
SVG_X_MIN_CR, SVG_X_MAX_CR = 520, 600
SVG_Y_MIN_CR, SVG_Y_MAX_CR = 280, 470
def scale_x_Chrono(x):
return SVG_X_MIN_CR + (x - X_MIN) / (X_MAX - X_MIN) * (SVG_X_MAX_CR - SVG_X_MIN_CR)
def scale_y_Chrono(y):
return SVG_Y_MAX_CR - (y - Y_MIN) / (Y_MAX - Y_MIN) * (SVG_Y_MAX_CR - SVG_Y_MIN_CR)
return (
SVG_X_MAX_CR,
SVG_X_MIN_CR,
SVG_Y_MAX_CR,
SVG_Y_MIN_CR,
scale_x_Chrono,
scale_y_Chrono,
)
@app.cell
def _(macro_class_top3, scale_x_Chrono, scale_y_Chrono, tdf_winners):
#datapoints for Chrono stages
CChrono=str(macro_class_top3['Chrono'][0])
datapoints_Chrono = []
for z in range(106):
if CChrono.capitalize() in tdf_winners["birth_country"][z] :
datapoints_Chrono.append(
{
'x': scale_x_Chrono(tdf_winners["height"][z]),
'y': scale_y_Chrono(tdf_winners["weight"][z]),
'data-id': z
})
return CChrono, datapoints_Chrono, z
@app.cell
def _(
Circle,
datapoints_Chrono,
datapoints_Mountain,
datapoints_hill,
datapoints_plain,
pd,
radius,
):
#create circles for each stage type
circles_plain = []
for datapoint in datapoints_plain:
if not (pd.isna(datapoint["x"]) or pd.isna(datapoint["y"])):
circles_plain.append(
Circle(
cx=datapoint["x"],
cy=datapoint["y"],
r=radius.value,
fill="green",
fill_opacity=0.5,
stroke_width=1,
stroke="white"
)
)
circles_hill = []
for datapoint in datapoints_hill:
if not (pd.isna(datapoint["x"]) or pd.isna(datapoint["y"])):
circles_hill.append(
Circle(
cx=datapoint["x"],
cy=datapoint["y"],
r=radius.value,
fill="orange",
fill_opacity=0.5,
stroke_width=1,
stroke="white"
)
)
circles_mountain = []
for datapoint in datapoints_Mountain:
if not (pd.isna(datapoint["x"]) or pd.isna(datapoint["y"])):
circles_mountain.append(
Circle(
cx=datapoint["x"],
cy=datapoint["y"],
r=radius.value,
fill="brown",
fill_opacity=0.5,
stroke_width=1,
stroke="white"
)
)
circles_chrono= []
for datapoint in datapoints_Chrono:
if not (pd.isna(datapoint["x"]) or pd.isna(datapoint["y"])):
circles_chrono.append(
Circle(
cx=datapoint["x"],
cy=datapoint["y"],
r=radius.value,
fill="darkgray",
fill_opacity=0.5,
stroke_width=1,
stroke="white"
)
)
return (
circles_chrono,
circles_hill,
circles_mountain,
circles_plain,
datapoint,
)
@app.cell
def _(
CChrono,
CMountain,
Chill,
Cplain,
Line,
Polygon,
SVG,
Text,
circles_chrono,
circles_hill,
circles_mountain,
circles_plain,
macro_class_top3,
mo,
radius,
):
#complete graph
plot = SVG(
width=700,
height=800,
elements=
[
# Rettangoli e poligoni
Polygon(points=[(30, 250), (180, 250), (180, 650), (30, 650)], fill="green", stroke="black"),
Polygon(points=[(30, 250), (70, 220), (220, 220), (180, 250)], fill="forestgreen", stroke="black"),
Polygon(points=[(180, 250), (330, 250), (330, 650), (180, 650)], fill="orange", stroke="black"),
Polygon(points=[(180, 250), (330, 250), (370, 220), (220, 220)], fill="chocolate", stroke="black"),
Polygon(points=[(180, 250), (330, 250), (330, 200)], fill="goldenrod", stroke="black"),
Polygon(points=[(180, 250), (220, 220), (370, 170), (330, 200)], fill="gold", stroke="black"),
Polygon(points=[(330, 250), (480, 250), (420, 130), (330, 200)], fill="maroon", stroke="black"),
Polygon(points=[(330, 650), (480, 650), (480, 250), (330, 250)], fill="chocolate", stroke="black"),
Polygon(points=[(330, 200), (370, 170), (450, 100), (520, 220), (480, 250), (420, 130)], fill="maroon", stroke="black"),
Polygon(points=[(480, 250), (630, 250), (670, 220), (520, 220)], fill="gray", stroke="black"),
Polygon(points=[(480, 250), (630, 250), (630, 650), (480, 650)], fill="gray", stroke="black"),
Polygon(points=[(630, 650), (670, 630), (670, 220), (630, 250)], fill="darkgray", stroke="black"),
# Linee
Line(x1=30, y1=500, x2=630, y2=500, stroke="black", stroke_width=1),
Line(x1=630, y1=500, x2=670, y2=480, stroke="black", stroke_width=1),
Line(x1=30, y1=570, x2=630, y2=570, stroke="black", stroke_width=1),
Line(x1=630, y1=570, x2=670, y2=550, stroke="black", stroke_width=1),
#scritte
Text(x=120, y=490, text="height", font_size=20, fill="darkgreen"),
Text(x=40, y=270, text="weight", font_size=20, fill="darkgreen"),
Text(x=270, y=490, text="height", font_size=20, fill="darkorange"),
Text(x=190, y=270, text="weight", font_size=20, fill="darkorange"),
Text(x=420, y=490, text="height", font_size=20, fill="brown"),
Text(x=340, y=270, text="weight", font_size=20, fill="brown"),
Text(x=570, y=490, text="height", font_size=20, fill="darkgray"),
Text(x=490, y=270, text="weight", font_size=20, fill="darkgray"),
Text(x=50, y=400, text=Cplain, font_size=60, fill="darkgreen"),
Text(x=200, y=400, text=Chill, font_size=60, fill="darkorange"),
Text(x=350, y=400, text=CMountain, font_size=60, fill="brown"),
Text(x=500, y=400, text=CChrono, font_size=60, fill="darkgray"),
Text(x=70, y=550, text=str(macro_class_top3['Plain'][1]), font_size=40, fill="darkgreen"),
Text(x=220, y=550, text=str(macro_class_top3['Hill'][1]), font_size=40, fill="darkorange"),
Text(x=370, y=550, text=str(macro_class_top3['Mountain'][1]), font_size=40, fill="brown"),
Text(x=520, y=550, text=str(macro_class_top3['Chrono'][1]), font_size=40, fill="darkgray"),
Text(x=85, y=620, text=str(macro_class_top3['Plain'][2]), font_size=20, fill="darkgreen"),
Text(x=235, y=620, text=str(macro_class_top3['Hill'][2]), font_size=20, fill="darkorange"),
Text(x=385, y=620, text=str(macro_class_top3['Mountain'][2]), font_size=20, fill="brown"),
Text(x=535, y=620, text=str(macro_class_top3['Chrono'][2]), font_size=20, fill="darkgray"),
] + circles_plain + circles_hill + circles_mountain + circles_chrono,
)
mo.Html(plot.as_str())
mo.hstack(
[mo.Html(plot.as_str()), radius], justify="start"
)
return (plot,)
if __name__ == "__main__":
app.run()