|
import marimo |
|
|
|
__generated_with = "0.12.0" |
|
app = marimo.App(width="medium") |
|
|
|
|
|
@app.cell |
|
def _(): |
|
import marimo as mo |
|
import pandas as pd |
|
from svg import SVG, G, Circle, Path, Title, Rect, Line, Polygon, Text |
|
import numpy as np |
|
from collections import Counter |
|
return ( |
|
Circle, |
|
Counter, |
|
G, |
|
Line, |
|
Path, |
|
Polygon, |
|
Rect, |
|
SVG, |
|
Text, |
|
Title, |
|
mo, |
|
np, |
|
pd, |
|
) |
|
|
|
|
|
@app.cell |
|
def _(pd): |
|
|
|
|
|
stage_data= pd.read_csv("C:/Users/paolo/Desktop/data visualisation project/stage_data.csv") |
|
tdf_stages= pd.read_csv("C:/Users/paolo/Desktop/data visualisation project/tdf_stages.csv") |
|
tdf_winners= pd.read_csv("C:/Users/paolo/Desktop/data visualisation project/tdf_winners.csv") |
|
return stage_data, tdf_stages, tdf_winners |
|
|
|
|
|
@app.cell |
|
def _(mo, pd, tdf_stages): |
|
|
|
|
|
tdf_stages['Date'] = pd.to_datetime(tdf_stages['Date'], errors='coerce') |
|
|
|
start_date_input = mo.ui.text( |
|
label="Initial Date (YYYY-MM-DD)", |
|
value=tdf_stages["Date"].min().strftime("%Y-%m-%d") |
|
) |
|
end_date_input = mo.ui.text( |
|
label="Final date (YYYY-MM-DD)", |
|
value=tdf_stages["Date"].max().strftime("%Y-%m-%d") |
|
) |
|
return end_date_input, start_date_input |
|
|
|
|
|
@app.cell |
|
def _(end_date_input, mo, start_date_input): |
|
|
|
|
|
mo.hstack( |
|
[start_date_input, end_date_input], justify="start" |
|
) |
|
return |
|
|
|
|
|
@app.cell |
|
def _(mo): |
|
|
|
|
|
radius =mo.ui.slider(start=1, stop=10, step=1, value=5, label="radius") |
|
return (radius,) |
|
|
|
|
|
@app.cell |
|
def _(Counter, end_date_input, start_date_input, tdf_stages): |
|
|
|
macro_class_mapping = { |
|
'Flat cobblestone stage': 'Plain', |
|
'Flat stage': 'Plain', |
|
'Flat Stage': 'Plain', |
|
'Half Stage': 'Other', |
|
'High mountain stage': 'Mountain', |
|
'Hilly stage': 'Hill', |
|
'Individual time trial': 'Chrono', |
|
'Intermediate stage': 'Other', |
|
'Medium mountain stage': 'Mountain', |
|
'Mountain stage': 'Mountain', |
|
'Mountain Stage': 'Mountain', |
|
'Mountain time trial': 'Chrono', |
|
'Plain stage': 'Plain', |
|
'Plain stage with cobblestones': 'Plain', |
|
'Stage with mountain': 'Mountain', |
|
'Stage with mountain(s)': 'Mountain', |
|
'Team time trial': 'Chrono', |
|
'Transition stage': 'Other' |
|
} |
|
|
|
tdf_stages['macro_class'] = tdf_stages['Type'].map(macro_class_mapping) |
|
|
|
|
|
start_date = start_date_input.value |
|
end_date = end_date_input.value |
|
|
|
|
|
filtered_df = tdf_stages[(tdf_stages['Date'] >= start_date) & (tdf_stages['Date'] <= end_date)] |
|
|
|
|
|
|
|
|
|
|
|
all_macro_classes = ['Plain', 'Hill', 'Mountain', 'Chrono'] |
|
|
|
macro_class_top3 = {} |
|
for macro_class in all_macro_classes: |
|
if macro_class in filtered_df['macro_class'].unique(): |
|
group = filtered_df[filtered_df['macro_class'] == macro_class] |
|
top_countries = Counter(group['Winner_Country']).most_common(3) |
|
macro_class_top3[macro_class] = [country for country, _ in top_countries] |
|
while len(macro_class_top3[macro_class]) < 3: |
|
macro_class_top3[macro_class].append("NA") |
|
else: |
|
macro_class_top3[macro_class] =[' ', ' ', ' '] |
|
|
|
print(macro_class_top3) |
|
return ( |
|
all_macro_classes, |
|
end_date, |
|
filtered_df, |
|
group, |
|
macro_class, |
|
macro_class_mapping, |
|
macro_class_top3, |
|
start_date, |
|
top_countries, |
|
) |
|
|
|
|
|
@app.cell |
|
def _(tdf_winners): |
|
|
|
|
|
X_MIN, X_MAX = tdf_winners["height"].min(), tdf_winners["height"].max() |
|
Y_MIN, Y_MAX = tdf_winners["weight"].min(), tdf_winners["weight"].max() |
|
|
|
|
|
SVG_X_MIN_P, SVG_X_MAX_P = 60, 150 |
|
SVG_Y_MIN_P, SVG_Y_MAX_P = 280, 470 |
|
|
|
|
|
def scale_x_plain(x): |
|
return SVG_X_MIN_P + (x - X_MIN) / (X_MAX - X_MIN) * (SVG_X_MAX_P - SVG_X_MIN_P) |
|
|
|
def scale_y_plain(y): |
|
return SVG_Y_MAX_P - (y - Y_MIN) / (Y_MAX - Y_MIN) * (SVG_Y_MAX_P - SVG_Y_MIN_P) |
|
return ( |
|
SVG_X_MAX_P, |
|
SVG_X_MIN_P, |
|
SVG_Y_MAX_P, |
|
SVG_Y_MIN_P, |
|
X_MAX, |
|
X_MIN, |
|
Y_MAX, |
|
Y_MIN, |
|
scale_x_plain, |
|
scale_y_plain, |
|
) |
|
|
|
|
|
@app.cell |
|
def _(macro_class_top3, scale_x_plain, scale_y_plain, tdf_winners): |
|
|
|
Cplain=str(macro_class_top3['Plain'][0]) |
|
|
|
|
|
datapoints_plain = [] |
|
for i in range(106): |
|
if Cplain.capitalize() in tdf_winners["birth_country"][i] : |
|
datapoints_plain.append( |
|
{ |
|
'x': scale_x_plain(tdf_winners["height"][i]), |
|
'y': scale_y_plain(tdf_winners["weight"][i]), |
|
'data-id': i |
|
}) |
|
|
|
print(Cplain) |
|
print(datapoints_plain) |
|
return Cplain, datapoints_plain, i |
|
|
|
|
|
@app.cell |
|
def _(X_MAX, X_MIN, Y_MAX, Y_MIN): |
|
|
|
|
|
SVG_X_MIN_C, SVG_X_MAX_C = 210, 300 |
|
SVG_Y_MIN_C, SVG_Y_MAX_C = 280, 470 |
|
|
|
def scale_x_hill(x): |
|
return SVG_X_MIN_C + (x - X_MIN) / (X_MAX - X_MIN) * (SVG_X_MAX_C - SVG_X_MIN_C) |
|
|
|
def scale_y_hill(y): |
|
return SVG_Y_MAX_C - (y - Y_MIN) / (Y_MAX - Y_MIN) * (SVG_Y_MAX_C - SVG_Y_MIN_C) |
|
return ( |
|
SVG_X_MAX_C, |
|
SVG_X_MIN_C, |
|
SVG_Y_MAX_C, |
|
SVG_Y_MIN_C, |
|
scale_x_hill, |
|
scale_y_hill, |
|
) |
|
|
|
|
|
@app.cell |
|
def _(i, macro_class_top3, scale_x_hill, scale_y_hill, tdf_winners): |
|
|
|
Chill=str(macro_class_top3['Hill'][0]) |
|
|
|
datapoints_hill = [] |
|
for j in range(106): |
|
if Chill.capitalize() in tdf_winners["birth_country"][j]: |
|
datapoints_hill.append( |
|
{ |
|
'x': scale_x_hill(tdf_winners["height"][j]), |
|
'y': scale_y_hill(tdf_winners["weight"][j]), |
|
'data-id': i |
|
}) |
|
return Chill, datapoints_hill, j |
|
|
|
|
|
@app.cell |
|
def _(X_MAX, X_MIN, Y_MAX, Y_MIN): |
|
|
|
|
|
SVG_X_MIN_M, SVG_X_MAX_M = 360, 450 |
|
SVG_Y_MIN_M, SVG_Y_MAX_M = 280, 470 |
|
|
|
def scale_x_Mountain(x): |
|
return SVG_X_MIN_M + (x - X_MIN) / (X_MAX - X_MIN) * (SVG_X_MAX_M - SVG_X_MIN_M) |
|
|
|
def scale_y_Mountain(y): |
|
return SVG_Y_MAX_M - (y - Y_MIN) / (Y_MAX - Y_MIN) * (SVG_Y_MAX_M - SVG_Y_MIN_M) |
|
return ( |
|
SVG_X_MAX_M, |
|
SVG_X_MIN_M, |
|
SVG_Y_MAX_M, |
|
SVG_Y_MIN_M, |
|
scale_x_Mountain, |
|
scale_y_Mountain, |
|
) |
|
|
|
|
|
@app.cell |
|
def _(macro_class_top3, scale_x_Mountain, scale_y_Mountain, tdf_winners): |
|
|
|
|
|
CMountain=str(macro_class_top3['Mountain'][0]) |
|
|
|
datapoints_Mountain = [] |
|
for k in range(106): |
|
if CMountain.capitalize() in tdf_winners["birth_country"][k]: |
|
datapoints_Mountain.append( |
|
{ |
|
'x': scale_x_Mountain(tdf_winners["height"][k]), |
|
'y': scale_y_Mountain(tdf_winners["weight"][k]), |
|
'data-id': k |
|
}) |
|
return CMountain, datapoints_Mountain, k |
|
|
|
|
|
@app.cell |
|
def _(X_MAX, X_MIN, Y_MAX, Y_MIN): |
|
|
|
|
|
SVG_X_MIN_CR, SVG_X_MAX_CR = 520, 600 |
|
SVG_Y_MIN_CR, SVG_Y_MAX_CR = 280, 470 |
|
|
|
def scale_x_Chrono(x): |
|
return SVG_X_MIN_CR + (x - X_MIN) / (X_MAX - X_MIN) * (SVG_X_MAX_CR - SVG_X_MIN_CR) |
|
|
|
def scale_y_Chrono(y): |
|
return SVG_Y_MAX_CR - (y - Y_MIN) / (Y_MAX - Y_MIN) * (SVG_Y_MAX_CR - SVG_Y_MIN_CR) |
|
return ( |
|
SVG_X_MAX_CR, |
|
SVG_X_MIN_CR, |
|
SVG_Y_MAX_CR, |
|
SVG_Y_MIN_CR, |
|
scale_x_Chrono, |
|
scale_y_Chrono, |
|
) |
|
|
|
|
|
@app.cell |
|
def _(macro_class_top3, scale_x_Chrono, scale_y_Chrono, tdf_winners): |
|
|
|
|
|
CChrono=str(macro_class_top3['Chrono'][0]) |
|
datapoints_Chrono = [] |
|
for z in range(106): |
|
if CChrono.capitalize() in tdf_winners["birth_country"][z] : |
|
|
|
datapoints_Chrono.append( |
|
{ |
|
'x': scale_x_Chrono(tdf_winners["height"][z]), |
|
'y': scale_y_Chrono(tdf_winners["weight"][z]), |
|
'data-id': z |
|
}) |
|
|
|
return CChrono, datapoints_Chrono, z |
|
|
|
|
|
@app.cell |
|
def _( |
|
Circle, |
|
datapoints_Chrono, |
|
datapoints_Mountain, |
|
datapoints_hill, |
|
datapoints_plain, |
|
pd, |
|
radius, |
|
): |
|
|
|
|
|
circles_plain = [] |
|
for datapoint in datapoints_plain: |
|
if not (pd.isna(datapoint["x"]) or pd.isna(datapoint["y"])): |
|
circles_plain.append( |
|
Circle( |
|
cx=datapoint["x"], |
|
cy=datapoint["y"], |
|
r=radius.value, |
|
fill="green", |
|
fill_opacity=0.5, |
|
stroke_width=1, |
|
stroke="white" |
|
) |
|
) |
|
|
|
circles_hill = [] |
|
for datapoint in datapoints_hill: |
|
if not (pd.isna(datapoint["x"]) or pd.isna(datapoint["y"])): |
|
circles_hill.append( |
|
Circle( |
|
cx=datapoint["x"], |
|
cy=datapoint["y"], |
|
r=radius.value, |
|
fill="orange", |
|
fill_opacity=0.5, |
|
stroke_width=1, |
|
stroke="white" |
|
) |
|
) |
|
|
|
circles_mountain = [] |
|
for datapoint in datapoints_Mountain: |
|
if not (pd.isna(datapoint["x"]) or pd.isna(datapoint["y"])): |
|
circles_mountain.append( |
|
Circle( |
|
cx=datapoint["x"], |
|
cy=datapoint["y"], |
|
r=radius.value, |
|
fill="brown", |
|
fill_opacity=0.5, |
|
stroke_width=1, |
|
stroke="white" |
|
) |
|
) |
|
|
|
circles_chrono= [] |
|
for datapoint in datapoints_Chrono: |
|
if not (pd.isna(datapoint["x"]) or pd.isna(datapoint["y"])): |
|
circles_chrono.append( |
|
Circle( |
|
cx=datapoint["x"], |
|
cy=datapoint["y"], |
|
r=radius.value, |
|
fill="darkgray", |
|
fill_opacity=0.5, |
|
stroke_width=1, |
|
stroke="white" |
|
) |
|
) |
|
return ( |
|
circles_chrono, |
|
circles_hill, |
|
circles_mountain, |
|
circles_plain, |
|
datapoint, |
|
) |
|
|
|
|
|
@app.cell |
|
def _( |
|
CChrono, |
|
CMountain, |
|
Chill, |
|
Cplain, |
|
Line, |
|
Polygon, |
|
SVG, |
|
Text, |
|
circles_chrono, |
|
circles_hill, |
|
circles_mountain, |
|
circles_plain, |
|
macro_class_top3, |
|
mo, |
|
radius, |
|
): |
|
|
|
|
|
plot = SVG( |
|
width=700, |
|
height=800, |
|
elements= |
|
[ |
|
|
|
Polygon(points=[(30, 250), (180, 250), (180, 650), (30, 650)], fill="green", stroke="black"), |
|
Polygon(points=[(30, 250), (70, 220), (220, 220), (180, 250)], fill="forestgreen", stroke="black"), |
|
Polygon(points=[(180, 250), (330, 250), (330, 650), (180, 650)], fill="orange", stroke="black"), |
|
Polygon(points=[(180, 250), (330, 250), (370, 220), (220, 220)], fill="chocolate", stroke="black"), |
|
Polygon(points=[(180, 250), (330, 250), (330, 200)], fill="goldenrod", stroke="black"), |
|
Polygon(points=[(180, 250), (220, 220), (370, 170), (330, 200)], fill="gold", stroke="black"), |
|
Polygon(points=[(330, 250), (480, 250), (420, 130), (330, 200)], fill="maroon", stroke="black"), |
|
Polygon(points=[(330, 650), (480, 650), (480, 250), (330, 250)], fill="chocolate", stroke="black"), |
|
Polygon(points=[(330, 200), (370, 170), (450, 100), (520, 220), (480, 250), (420, 130)], fill="maroon", stroke="black"), |
|
Polygon(points=[(480, 250), (630, 250), (670, 220), (520, 220)], fill="gray", stroke="black"), |
|
Polygon(points=[(480, 250), (630, 250), (630, 650), (480, 650)], fill="gray", stroke="black"), |
|
Polygon(points=[(630, 650), (670, 630), (670, 220), (630, 250)], fill="darkgray", stroke="black"), |
|
|
|
|
|
Line(x1=30, y1=500, x2=630, y2=500, stroke="black", stroke_width=1), |
|
Line(x1=630, y1=500, x2=670, y2=480, stroke="black", stroke_width=1), |
|
|
|
Line(x1=30, y1=570, x2=630, y2=570, stroke="black", stroke_width=1), |
|
Line(x1=630, y1=570, x2=670, y2=550, stroke="black", stroke_width=1), |
|
|
|
|
|
Text(x=120, y=490, text="height", font_size=20, fill="darkgreen"), |
|
Text(x=40, y=270, text="weight", font_size=20, fill="darkgreen"), |
|
Text(x=270, y=490, text="height", font_size=20, fill="darkorange"), |
|
Text(x=190, y=270, text="weight", font_size=20, fill="darkorange"), |
|
Text(x=420, y=490, text="height", font_size=20, fill="brown"), |
|
Text(x=340, y=270, text="weight", font_size=20, fill="brown"), |
|
Text(x=570, y=490, text="height", font_size=20, fill="darkgray"), |
|
Text(x=490, y=270, text="weight", font_size=20, fill="darkgray"), |
|
|
|
Text(x=50, y=400, text=Cplain, font_size=60, fill="darkgreen"), |
|
Text(x=200, y=400, text=Chill, font_size=60, fill="darkorange"), |
|
Text(x=350, y=400, text=CMountain, font_size=60, fill="brown"), |
|
Text(x=500, y=400, text=CChrono, font_size=60, fill="darkgray"), |
|
|
|
Text(x=70, y=550, text=str(macro_class_top3['Plain'][1]), font_size=40, fill="darkgreen"), |
|
Text(x=220, y=550, text=str(macro_class_top3['Hill'][1]), font_size=40, fill="darkorange"), |
|
Text(x=370, y=550, text=str(macro_class_top3['Mountain'][1]), font_size=40, fill="brown"), |
|
Text(x=520, y=550, text=str(macro_class_top3['Chrono'][1]), font_size=40, fill="darkgray"), |
|
|
|
Text(x=85, y=620, text=str(macro_class_top3['Plain'][2]), font_size=20, fill="darkgreen"), |
|
Text(x=235, y=620, text=str(macro_class_top3['Hill'][2]), font_size=20, fill="darkorange"), |
|
Text(x=385, y=620, text=str(macro_class_top3['Mountain'][2]), font_size=20, fill="brown"), |
|
Text(x=535, y=620, text=str(macro_class_top3['Chrono'][2]), font_size=20, fill="darkgray"), |
|
|
|
] + circles_plain + circles_hill + circles_mountain + circles_chrono, |
|
) |
|
|
|
|
|
mo.Html(plot.as_str()) |
|
|
|
mo.hstack( |
|
[mo.Html(plot.as_str()), radius], justify="start" |
|
) |
|
return (plot,) |
|
|
|
|
|
if __name__ == "__main__": |
|
app.run() |
|
|
|
|