cyberosa commited on
Commit
8834fdb
·
1 Parent(s): f26bf5c

updated live data including Friday

Browse files
data/closed_markets_div.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99b7ff8fafd742e4c7b4601adb95bd42cf560d6a81ac97819ea5748a6ba4b900
3
- size 50378
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d715e676a0779f18b69e9f549175793e6581cb9e87a456f3e8b0bc7db26190d6
3
+ size 48884
data/daily_info.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23cc8b305b1f2f5b6cc7b4a6017c1aeb2a003ec5cc921be1ad66f380d29a6102
3
- size 658465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0bf740abcffe1facea0fed126c61a19406ef32474c8dd63d85ea9448f96f701
3
+ size 423117
data/unknown_daily_traders.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c74aad2a1150163d78233e9a54ffa262d195e43b68b9aba1e33536671075ef57
3
- size 163247
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1db631b6cc5b6ff1aadd6ce3285dc032fe79c83cd14bb2c1cb1fa7b7917e61b0
3
+ size 25139
data/weekly_mech_calls.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a91eccf4392fc1cbdbb911fa2163a5aff3f4483692ab059083492047c3a4f55a
3
- size 50574
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e321e63d58f312fe2769880d9ec5ec9fba24229e427a514a3a9567936edbab5
3
+ size 50009
scripts/wow_retentions.py CHANGED
@@ -4,83 +4,64 @@ from utils import DATA_DIR
4
 
5
 
6
  # Basic Week over Week Retention
7
- def calculate_wow_retention(
8
- traders_df: pd.DataFrame, trader_filter: str
9
- ) -> pd.DataFrame:
10
- """Function to compute the wow retention at the week level"""
11
- if trader_filter == "Olas":
12
- df = traders_df.loc[traders_df["staking"] != "non_Olas"]
13
- elif trader_filter == "non_Olas":
14
- df = traders_df.loc[traders_df["staking"] == "non_Olas"]
15
- else:
16
- # unknown traders
17
- print("Not implemented yet")
18
- # Get unique traders per week
19
  weekly_traders = (
20
- df.groupby("month_year_week")["trader_address"].nunique().reset_index()
 
 
21
  )
 
22
 
23
  # Calculate retention
24
  retention = []
25
- for i in range(1, len(weekly_traders)):
26
- current_week = weekly_traders.iloc[i]["month_year_week"]
27
- previous_week = weekly_traders.iloc[i - 1]["month_year_week"]
28
-
29
- # Get traders in both weeks
30
- current_traders = set(
31
- df[df["month_year_week"] == current_week]["trader_address"]
32
- )
33
- previous_traders = set(
34
- df[df["month_year_week"] == previous_week]["trader_address"]
35
- )
36
-
37
- retained = len(current_traders.intersection(previous_traders))
38
- retention_rate = (retained / len(previous_traders)) * 100
39
-
40
- retention.append(
41
- {
42
- "month_year_week": current_week,
43
- "retained_traders": retained,
44
- "previous_traders": len(previous_traders),
45
- "retention_rate": retention_rate,
46
- }
47
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  return pd.DataFrame(retention)
50
 
51
 
52
- # N-Week Rolling Retention
53
- def calculate_nweek_retention(df: pd.DataFrame, n_weeks=4):
54
- # Get first and last trade for each trader
55
- trader_activity = (
56
- df.groupby("trader_address")
57
- .agg({"creation_timestamp": ["min", "max"]})
58
- .reset_index()
59
- )
60
-
61
- trader_activity.columns = ["trader_address", "first_trade", "last_trade"]
62
- trader_activity["weeks_active"] = (
63
- pd.to_datetime(trader_activity["last_trade"])
64
- - pd.to_datetime(trader_activity["first_trade"])
65
- ).dt.days / 7
66
-
67
- return {
68
- "total_traders": len(trader_activity),
69
- f"{n_weeks}_week_retained": len(
70
- trader_activity[trader_activity["weeks_active"] >= n_weeks]
71
- ),
72
- "retention_rate": (
73
- len(trader_activity[trader_activity["weeks_active"] >= n_weeks])
74
- / len(trader_activity)
75
- )
76
- * 100,
77
- }
78
-
79
-
80
  # Cohort Retention
81
- def calculate_cohort_retention(df, max_weeks=12):
82
  # Get first week for each trader
83
- # TODO check if first will retrieve the first week of the data or not
84
  first_trades = (
85
  df.groupby("trader_address")
86
  .agg({"creation_timestamp": "min", "month_year_week": "first"})
@@ -119,6 +100,10 @@ def calculate_cohort_retention(df, max_weeks=12):
119
  # Convert to percentages
120
  retention_matrix = retention_matrix.div(cohort_sizes, axis=0) * 100
121
 
 
 
 
 
122
  # Limit to max_weeks if specified
123
  if max_weeks is not None and max_weeks < retention_matrix.shape[1]:
124
  retention_matrix = retention_matrix.iloc[:, :max_weeks]
@@ -126,16 +111,28 @@ def calculate_cohort_retention(df, max_weeks=12):
126
  return retention_matrix.round(2)
127
 
128
 
129
- if __name__ == "__main__":
130
- # read trades dataset
131
- trades_df = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
132
- # First, create week numbers from timestamps
133
- trades_df["creation_timestamp"] = pd.to_datetime(trades_df["creation_timestamp"])
134
- trades_df = trades_df.sort_values(by="creation_timestamp", ascending=True)
135
- trades_df["month_year_week"] = (
136
- trades_df["creation_timestamp"].dt.to_period("W").dt.strftime("%b-%d-%Y")
 
 
 
 
 
 
 
 
137
  )
 
 
 
 
 
138
  # Usage example:
139
- wow_retention = calculate_wow_retention(trades_df, trader_filter="Olas")
140
- rolling_retention = calculate_nweek_retention(trades_df, n_weeks=4)
141
- cohort_retention = calculate_cohort_retention(trades_df)
 
4
 
5
 
6
  # Basic Week over Week Retention
7
+ def calculate_wow_retention_by_type(df: pd.DataFrame) -> pd.DataFrame:
8
+ # Get unique traders per week and type
 
 
 
 
 
 
 
 
 
 
9
  weekly_traders = (
10
+ df.groupby(["month_year_week", "trader_type"])["trader_address"]
11
+ .nunique()
12
+ .reset_index()
13
  )
14
+ weekly_traders = weekly_traders.sort_values(["trader_type", "month_year_week"])
15
 
16
  # Calculate retention
17
  retention = []
18
+ # Iterate through each trader type
19
+ for trader_type in weekly_traders["trader_type"].unique():
20
+ type_data = weekly_traders[weekly_traders["trader_type"] == trader_type]
21
+
22
+ # Calculate retention for each week within this trader type
23
+ for i in range(1, len(type_data)):
24
+ current_week = type_data.iloc[i]["month_year_week"]
25
+ previous_week = type_data.iloc[i - 1]["month_year_week"]
26
+
27
+ # Get traders in both weeks for this type
28
+ current_traders = set(
29
+ df[
30
+ (df["month_year_week"] == current_week)
31
+ & (df["trader_type"] == trader_type)
32
+ ]["trader_address"]
33
+ )
34
+
35
+ previous_traders = set(
36
+ df[
37
+ (df["month_year_week"] == previous_week)
38
+ & (df["trader_type"] == trader_type)
39
+ ]["trader_address"]
40
+ )
41
+
42
+ retained = len(current_traders.intersection(previous_traders))
43
+ retention_rate = (
44
+ (retained / len(previous_traders)) * 100
45
+ if len(previous_traders) > 0
46
+ else 0
47
+ )
48
+
49
+ retention.append(
50
+ {
51
+ "trader_type": trader_type,
52
+ "week": current_week,
53
+ "retained_traders": retained,
54
+ "previous_traders": len(previous_traders),
55
+ "retention_rate": round(retention_rate, 2),
56
+ }
57
+ )
58
 
59
  return pd.DataFrame(retention)
60
 
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  # Cohort Retention
63
+ def calculate_cohort_retention(df, max_weeks=12) -> pd.DataFrame:
64
  # Get first week for each trader
 
65
  first_trades = (
66
  df.groupby("trader_address")
67
  .agg({"creation_timestamp": "min", "month_year_week": "first"})
 
100
  # Convert to percentages
101
  retention_matrix = retention_matrix.div(cohort_sizes, axis=0) * 100
102
 
103
+ # Sort index (cohort_week) chronologically
104
+ retention_matrix.index = pd.to_datetime(retention_matrix.index)
105
+ retention_matrix = retention_matrix.sort_index()
106
+
107
  # Limit to max_weeks if specified
108
  if max_weeks is not None and max_weeks < retention_matrix.shape[1]:
109
  retention_matrix = retention_matrix.iloc[:, :max_weeks]
 
111
  return retention_matrix.round(2)
112
 
113
 
114
+ def prepare_retention_dataset() -> pd.DataFrame:
115
+ # read all datasets
116
+ traders_df = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
117
+ unknown_df = pd.read_parquet(DATA_DIR / "unknown_traders.parquet")
118
+ traders_df["trader_type"] = traders_df["staking"].apply(
119
+ lambda x: "non_Olas" if x == "non_Olas" else "Olas"
120
+ )
121
+ unknown_df["trader_type"] = "unclassified"
122
+ all_traders = pd.concat([traders_df, unknown_df], ignore_index=True)
123
+
124
+ all_traders["creation_timestamp"] = pd.to_datetime(
125
+ all_traders["creation_timestamp"]
126
+ )
127
+ all_traders = all_traders.sort_values(by="creation_timestamp", ascending=True)
128
+ all_traders["month_year_week"] = (
129
+ all_traders["creation_timestamp"].dt.to_period("W").dt.strftime("%b-%d-%Y")
130
  )
131
+ return all_traders
132
+
133
+
134
+ if __name__ == "__main__":
135
+ all_traders = prepare_retention_dataset()
136
  # Usage example:
137
+ wow_retention = calculate_wow_retention_by_type(all_traders)
138
+ cohort_retention = calculate_cohort_retention(all_traders)