import marimo __generated_with = "0.12.0" app = marimo.App(width="medium") @app.cell def _(): import marimo as mo import pandas as pd from svg import SVG, G, Circle, Path, Title, Rect, Line, Polygon, Text import numpy as np from collections import Counter return ( Circle, Counter, G, Line, Path, Polygon, Rect, SVG, Text, Title, mo, np, pd, ) @app.cell def _(pd): #Data import stage_data= pd.read_csv("C:/Users/paolo/Desktop/data visualisation project/stage_data.csv") tdf_stages= pd.read_csv("C:/Users/paolo/Desktop/data visualisation project/tdf_stages.csv") tdf_winners= pd.read_csv("C:/Users/paolo/Desktop/data visualisation project/tdf_winners.csv") return stage_data, tdf_stages, tdf_winners @app.cell def _(mo, pd, tdf_stages): #Text blocks to insert the initial and final date used to filter the database tdf_stages['Date'] = pd.to_datetime(tdf_stages['Date'], errors='coerce') start_date_input = mo.ui.text( label="Initial Date (YYYY-MM-DD)", value=tdf_stages["Date"].min().strftime("%Y-%m-%d") ) end_date_input = mo.ui.text( label="Final date (YYYY-MM-DD)", value=tdf_stages["Date"].max().strftime("%Y-%m-%d") ) return end_date_input, start_date_input @app.cell def _(end_date_input, mo, start_date_input): #show the text blocks in the output mo.hstack( [start_date_input, end_date_input], justify="start" ) return @app.cell def _(mo): #Slider to select the circles radius radius =mo.ui.slider(start=1, stop=10, step=1, value=5, label="radius") return (radius,) @app.cell def _(Counter, end_date_input, start_date_input, tdf_stages): # Group the "type" variable in four groups macro_class_mapping = { 'Flat cobblestone stage': 'Plain', 'Flat stage': 'Plain', 'Flat Stage': 'Plain', 'Half Stage': 'Other', 'High mountain stage': 'Mountain', 'Hilly stage': 'Hill', 'Individual time trial': 'Chrono', 'Intermediate stage': 'Other', 'Medium mountain stage': 'Mountain', 'Mountain stage': 'Mountain', 'Mountain Stage': 'Mountain', 'Mountain time trial': 'Chrono', 'Plain stage': 'Plain', 'Plain stage with cobblestones': 'Plain', 'Stage with mountain': 'Mountain', 'Stage with mountain(s)': 'Mountain', 'Team time trial': 'Chrono', 'Transition stage': 'Other' } tdf_stages['macro_class'] = tdf_stages['Type'].map(macro_class_mapping) # Filter the database using the data inserted in the Text Blocks start_date = start_date_input.value end_date = end_date_input.value filtered_df = tdf_stages[(tdf_stages['Date'] >= start_date) & (tdf_stages['Date'] <= end_date)] # Find the 3 countries with most winning for each Type of stage all_macro_classes = ['Plain', 'Hill', 'Mountain', 'Chrono'] macro_class_top3 = {} for macro_class in all_macro_classes: if macro_class in filtered_df['macro_class'].unique(): group = filtered_df[filtered_df['macro_class'] == macro_class] top_countries = Counter(group['Winner_Country']).most_common(3) macro_class_top3[macro_class] = [country for country, _ in top_countries] while len(macro_class_top3[macro_class]) < 3: macro_class_top3[macro_class].append("NA") else: macro_class_top3[macro_class] =[' ', ' ', ' '] print(macro_class_top3) return ( all_macro_classes, end_date, filtered_df, group, macro_class, macro_class_mapping, macro_class_top3, start_date, top_countries, ) @app.cell def _(tdf_winners): ##Scale function for flat stages X_MIN, X_MAX = tdf_winners["height"].min(), tdf_winners["height"].max() Y_MIN, Y_MAX = tdf_winners["weight"].min(), tdf_winners["weight"].max() SVG_X_MIN_P, SVG_X_MAX_P = 60, 150 SVG_Y_MIN_P, SVG_Y_MAX_P = 280, 470 def scale_x_plain(x): return SVG_X_MIN_P + (x - X_MIN) / (X_MAX - X_MIN) * (SVG_X_MAX_P - SVG_X_MIN_P) def scale_y_plain(y): return SVG_Y_MAX_P - (y - Y_MIN) / (Y_MAX - Y_MIN) * (SVG_Y_MAX_P - SVG_Y_MIN_P) return ( SVG_X_MAX_P, SVG_X_MIN_P, SVG_Y_MAX_P, SVG_Y_MIN_P, X_MAX, X_MIN, Y_MAX, Y_MIN, scale_x_plain, scale_y_plain, ) @app.cell def _(macro_class_top3, scale_x_plain, scale_y_plain, tdf_winners): ## datapoints for plain stages Cplain=str(macro_class_top3['Plain'][0]) datapoints_plain = [] for i in range(106): if Cplain.capitalize() in tdf_winners["birth_country"][i] : datapoints_plain.append( { 'x': scale_x_plain(tdf_winners["height"][i]), 'y': scale_y_plain(tdf_winners["weight"][i]), 'data-id': i }) print(Cplain) print(datapoints_plain) return Cplain, datapoints_plain, i @app.cell def _(X_MAX, X_MIN, Y_MAX, Y_MIN): ##Scale function for hilly stages SVG_X_MIN_C, SVG_X_MAX_C = 210, 300 SVG_Y_MIN_C, SVG_Y_MAX_C = 280, 470 def scale_x_hill(x): return SVG_X_MIN_C + (x - X_MIN) / (X_MAX - X_MIN) * (SVG_X_MAX_C - SVG_X_MIN_C) def scale_y_hill(y): return SVG_Y_MAX_C - (y - Y_MIN) / (Y_MAX - Y_MIN) * (SVG_Y_MAX_C - SVG_Y_MIN_C) return ( SVG_X_MAX_C, SVG_X_MIN_C, SVG_Y_MAX_C, SVG_Y_MIN_C, scale_x_hill, scale_y_hill, ) @app.cell def _(i, macro_class_top3, scale_x_hill, scale_y_hill, tdf_winners): ## datapoints for hilly stages Chill=str(macro_class_top3['Hill'][0]) datapoints_hill = [] for j in range(106): if Chill.capitalize() in tdf_winners["birth_country"][j]: datapoints_hill.append( { 'x': scale_x_hill(tdf_winners["height"][j]), 'y': scale_y_hill(tdf_winners["weight"][j]), 'data-id': i }) return Chill, datapoints_hill, j @app.cell def _(X_MAX, X_MIN, Y_MAX, Y_MIN): ##Scale function for Mountain stages SVG_X_MIN_M, SVG_X_MAX_M = 360, 450 SVG_Y_MIN_M, SVG_Y_MAX_M = 280, 470 def scale_x_Mountain(x): return SVG_X_MIN_M + (x - X_MIN) / (X_MAX - X_MIN) * (SVG_X_MAX_M - SVG_X_MIN_M) def scale_y_Mountain(y): return SVG_Y_MAX_M - (y - Y_MIN) / (Y_MAX - Y_MIN) * (SVG_Y_MAX_M - SVG_Y_MIN_M) return ( SVG_X_MAX_M, SVG_X_MIN_M, SVG_Y_MAX_M, SVG_Y_MIN_M, scale_x_Mountain, scale_y_Mountain, ) @app.cell def _(macro_class_top3, scale_x_Mountain, scale_y_Mountain, tdf_winners): ## datapoints for Mountain stages CMountain=str(macro_class_top3['Mountain'][0]) datapoints_Mountain = [] for k in range(106): if CMountain.capitalize() in tdf_winners["birth_country"][k]: datapoints_Mountain.append( { 'x': scale_x_Mountain(tdf_winners["height"][k]), 'y': scale_y_Mountain(tdf_winners["weight"][k]), 'data-id': k }) return CMountain, datapoints_Mountain, k @app.cell def _(X_MAX, X_MIN, Y_MAX, Y_MIN): ##Scale function for Chrono stages SVG_X_MIN_CR, SVG_X_MAX_CR = 520, 600 SVG_Y_MIN_CR, SVG_Y_MAX_CR = 280, 470 def scale_x_Chrono(x): return SVG_X_MIN_CR + (x - X_MIN) / (X_MAX - X_MIN) * (SVG_X_MAX_CR - SVG_X_MIN_CR) def scale_y_Chrono(y): return SVG_Y_MAX_CR - (y - Y_MIN) / (Y_MAX - Y_MIN) * (SVG_Y_MAX_CR - SVG_Y_MIN_CR) return ( SVG_X_MAX_CR, SVG_X_MIN_CR, SVG_Y_MAX_CR, SVG_Y_MIN_CR, scale_x_Chrono, scale_y_Chrono, ) @app.cell def _(macro_class_top3, scale_x_Chrono, scale_y_Chrono, tdf_winners): #datapoints for Chrono stages CChrono=str(macro_class_top3['Chrono'][0]) datapoints_Chrono = [] for z in range(106): if CChrono.capitalize() in tdf_winners["birth_country"][z] : datapoints_Chrono.append( { 'x': scale_x_Chrono(tdf_winners["height"][z]), 'y': scale_y_Chrono(tdf_winners["weight"][z]), 'data-id': z }) return CChrono, datapoints_Chrono, z @app.cell def _( Circle, datapoints_Chrono, datapoints_Mountain, datapoints_hill, datapoints_plain, pd, radius, ): #create circles for each stage type circles_plain = [] for datapoint in datapoints_plain: if not (pd.isna(datapoint["x"]) or pd.isna(datapoint["y"])): circles_plain.append( Circle( cx=datapoint["x"], cy=datapoint["y"], r=radius.value, fill="green", fill_opacity=0.5, stroke_width=1, stroke="white" ) ) circles_hill = [] for datapoint in datapoints_hill: if not (pd.isna(datapoint["x"]) or pd.isna(datapoint["y"])): circles_hill.append( Circle( cx=datapoint["x"], cy=datapoint["y"], r=radius.value, fill="orange", fill_opacity=0.5, stroke_width=1, stroke="white" ) ) circles_mountain = [] for datapoint in datapoints_Mountain: if not (pd.isna(datapoint["x"]) or pd.isna(datapoint["y"])): circles_mountain.append( Circle( cx=datapoint["x"], cy=datapoint["y"], r=radius.value, fill="brown", fill_opacity=0.5, stroke_width=1, stroke="white" ) ) circles_chrono= [] for datapoint in datapoints_Chrono: if not (pd.isna(datapoint["x"]) or pd.isna(datapoint["y"])): circles_chrono.append( Circle( cx=datapoint["x"], cy=datapoint["y"], r=radius.value, fill="darkgray", fill_opacity=0.5, stroke_width=1, stroke="white" ) ) return ( circles_chrono, circles_hill, circles_mountain, circles_plain, datapoint, ) @app.cell def _( CChrono, CMountain, Chill, Cplain, Line, Polygon, SVG, Text, circles_chrono, circles_hill, circles_mountain, circles_plain, macro_class_top3, mo, radius, ): #complete graph plot = SVG( width=700, height=800, elements= [ # Rettangoli e poligoni Polygon(points=[(30, 250), (180, 250), (180, 650), (30, 650)], fill="green", stroke="black"), Polygon(points=[(30, 250), (70, 220), (220, 220), (180, 250)], fill="forestgreen", stroke="black"), Polygon(points=[(180, 250), (330, 250), (330, 650), (180, 650)], fill="orange", stroke="black"), Polygon(points=[(180, 250), (330, 250), (370, 220), (220, 220)], fill="chocolate", stroke="black"), Polygon(points=[(180, 250), (330, 250), (330, 200)], fill="goldenrod", stroke="black"), Polygon(points=[(180, 250), (220, 220), (370, 170), (330, 200)], fill="gold", stroke="black"), Polygon(points=[(330, 250), (480, 250), (420, 130), (330, 200)], fill="maroon", stroke="black"), Polygon(points=[(330, 650), (480, 650), (480, 250), (330, 250)], fill="chocolate", stroke="black"), Polygon(points=[(330, 200), (370, 170), (450, 100), (520, 220), (480, 250), (420, 130)], fill="maroon", stroke="black"), Polygon(points=[(480, 250), (630, 250), (670, 220), (520, 220)], fill="gray", stroke="black"), Polygon(points=[(480, 250), (630, 250), (630, 650), (480, 650)], fill="gray", stroke="black"), Polygon(points=[(630, 650), (670, 630), (670, 220), (630, 250)], fill="darkgray", stroke="black"), # Linee Line(x1=30, y1=500, x2=630, y2=500, stroke="black", stroke_width=1), Line(x1=630, y1=500, x2=670, y2=480, stroke="black", stroke_width=1), Line(x1=30, y1=570, x2=630, y2=570, stroke="black", stroke_width=1), Line(x1=630, y1=570, x2=670, y2=550, stroke="black", stroke_width=1), #scritte Text(x=120, y=490, text="height", font_size=20, fill="darkgreen"), Text(x=40, y=270, text="weight", font_size=20, fill="darkgreen"), Text(x=270, y=490, text="height", font_size=20, fill="darkorange"), Text(x=190, y=270, text="weight", font_size=20, fill="darkorange"), Text(x=420, y=490, text="height", font_size=20, fill="brown"), Text(x=340, y=270, text="weight", font_size=20, fill="brown"), Text(x=570, y=490, text="height", font_size=20, fill="darkgray"), Text(x=490, y=270, text="weight", font_size=20, fill="darkgray"), Text(x=50, y=400, text=Cplain, font_size=60, fill="darkgreen"), Text(x=200, y=400, text=Chill, font_size=60, fill="darkorange"), Text(x=350, y=400, text=CMountain, font_size=60, fill="brown"), Text(x=500, y=400, text=CChrono, font_size=60, fill="darkgray"), Text(x=70, y=550, text=str(macro_class_top3['Plain'][1]), font_size=40, fill="darkgreen"), Text(x=220, y=550, text=str(macro_class_top3['Hill'][1]), font_size=40, fill="darkorange"), Text(x=370, y=550, text=str(macro_class_top3['Mountain'][1]), font_size=40, fill="brown"), Text(x=520, y=550, text=str(macro_class_top3['Chrono'][1]), font_size=40, fill="darkgray"), Text(x=85, y=620, text=str(macro_class_top3['Plain'][2]), font_size=20, fill="darkgreen"), Text(x=235, y=620, text=str(macro_class_top3['Hill'][2]), font_size=20, fill="darkorange"), Text(x=385, y=620, text=str(macro_class_top3['Mountain'][2]), font_size=20, fill="brown"), Text(x=535, y=620, text=str(macro_class_top3['Chrono'][2]), font_size=20, fill="darkgray"), ] + circles_plain + circles_hill + circles_mountain + circles_chrono, ) mo.Html(plot.as_str()) mo.hstack( [mo.Html(plot.as_str()), radius], justify="start" ) return (plot,) if __name__ == "__main__": app.run()