Spaces:
Running
Running
Fix some typos
Browse files
polars/05_reactive_plots.py
CHANGED
@@ -28,7 +28,7 @@ def _(mo):
|
|
28 |
|
29 |
We will be using a [Spotify Tracks dataset](https://huggingface.co/datasets/maharshipandya/spotify-tracks-dataset). Before you write any code yourself, I recommend taking some time to understand the data you're working with, from which columns are available to what are their possible values, as well as more abstract details such as the scope, coverage and intended uses of the dataset.
|
30 |
|
31 |
-
Note that this dataset does not contains data about ***all*** tracks, you can try using a larger dataset such as [bigdata-pw/Spotify](https://huggingface.co/datasets/bigdata-pw/Spotify), but I'm sticking with the smaller one to keep the notebook size
|
32 |
"""
|
33 |
)
|
34 |
return
|
@@ -73,7 +73,7 @@ def _(lz, pl):
|
|
73 |
.drop("Unnamed: 0", "track_id", "explicit")
|
74 |
.with_columns(
|
75 |
# Perform whichever transformations you want (again somewhat arbitrary in this example)
|
76 |
-
# Convert the duration from
|
77 |
pl.col("duration_ms").floordiv(1_000).alias("duration_seconds"),
|
78 |
# Convert the popularity from an integer 0 ~ 100 to a percentage 0 ~ 1.0
|
79 |
pl.col("popularity").truediv(100),
|
@@ -158,7 +158,7 @@ def _(df, get_extremes, pl, plot):
|
|
158 |
# Now, we want to filter to only include tracks whose duration falls inside of our selection - we will need to first identify the extremes, then filter based on them
|
159 |
min_dur, max_dur = get_extremes(
|
160 |
plot.value, col="duration_seconds", defaults_if_missing=(120, 360)
|
161 |
-
) #
|
162 |
# Calculate how many we are keeping vs throwing away with the filter
|
163 |
duration_in_range = pl.col("duration_seconds").is_between(min_dur, max_dur)
|
164 |
print(
|
|
|
28 |
|
29 |
We will be using a [Spotify Tracks dataset](https://huggingface.co/datasets/maharshipandya/spotify-tracks-dataset). Before you write any code yourself, I recommend taking some time to understand the data you're working with, from which columns are available to what are their possible values, as well as more abstract details such as the scope, coverage and intended uses of the dataset.
|
30 |
|
31 |
+
Note that this dataset does not contains data about ***all*** tracks, you can try using a larger dataset such as [bigdata-pw/Spotify](https://huggingface.co/datasets/bigdata-pw/Spotify), but I'm sticking with the smaller one to keep the notebook size manageable for most users.
|
32 |
"""
|
33 |
)
|
34 |
return
|
|
|
73 |
.drop("Unnamed: 0", "track_id", "explicit")
|
74 |
.with_columns(
|
75 |
# Perform whichever transformations you want (again somewhat arbitrary in this example)
|
76 |
+
# Convert the duration from milliseconds to seconds (int)
|
77 |
pl.col("duration_ms").floordiv(1_000).alias("duration_seconds"),
|
78 |
# Convert the popularity from an integer 0 ~ 100 to a percentage 0 ~ 1.0
|
79 |
pl.col("popularity").truediv(100),
|
|
|
158 |
# Now, we want to filter to only include tracks whose duration falls inside of our selection - we will need to first identify the extremes, then filter based on them
|
159 |
min_dur, max_dur = get_extremes(
|
160 |
plot.value, col="duration_seconds", defaults_if_missing=(120, 360)
|
161 |
+
) # Utility function defined in the bottom of the Notebook
|
162 |
# Calculate how many we are keeping vs throwing away with the filter
|
163 |
duration_in_range = pl.col("duration_seconds").is_between(min_dur, max_dur)
|
164 |
print(
|