patrickramos commited on
Commit
e5250f2
·
1 Parent(s): 42d898c

Translate team names

Browse files
Files changed (3) hide show
  1. daily_weekly_leaderboard.py +1 -0
  2. data.py +34 -2
  3. translate.py +54 -0
daily_weekly_leaderboard.py CHANGED
@@ -18,6 +18,7 @@ df = (
18
  .rename({
19
  'name': 'Name',
20
  'release_speed': 'Velocity',
 
21
  })
22
  )
23
 
 
18
  .rename({
19
  'name': 'Name',
20
  'release_speed': 'Velocity',
21
+ 'team': 'Team'
22
  })
23
  )
24
 
data.py CHANGED
@@ -12,6 +12,7 @@ from seasons import SEASONS
12
  from translate import (
13
  translate_pa_outcome, translate_pitch_outcome,
14
  jp_pitch_to_en_pitch, jp_pitch_to_pitch_code,
 
15
  max_pitch_types
16
  )
17
 
@@ -66,6 +67,21 @@ for season in SEASONS:
66
  # load player data
67
  _player_df = pl.read_csv(os.path.join(season_dir, 'player.csv'))
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  # translate pa data
70
  _pa_df = (
71
  _pa_df
@@ -146,7 +162,6 @@ for season in SEASONS:
146
  pl.read_csv(os.path.join(season_dir, 'register.csv'))
147
  .with_columns(
148
  pl.col('en_name').str.replace(',', '').alias('en_name'),
149
-
150
  )
151
  .select(
152
  pl.col('en_name'),
@@ -154,7 +169,19 @@ for season in SEASONS:
154
  pl.col('jp_name').alias('name')
155
  )
156
  )
157
- _player_df = _player_df.join(register, on=['name', 'team'], how='inner').with_columns(pl.col('en_name').alias('name')).drop(pl.col('en_name'))
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
  # merge pitch and pa data
160
  _df = (
@@ -261,3 +288,8 @@ def compute_league_pitch_stats(df):
261
  return df.group_by('pitch_name').agg(pl.col('release_speed').mean().round(1).alias('Velocity'))
262
 
263
  league_pitch_stats = compute_league_pitch_stats(df)
 
 
 
 
 
 
12
  from translate import (
13
  translate_pa_outcome, translate_pitch_outcome,
14
  jp_pitch_to_en_pitch, jp_pitch_to_pitch_code,
15
+ jp_team_to_en_team, jp_team_to_en_full_team,
16
  max_pitch_types
17
  )
18
 
 
67
  # load player data
68
  _player_df = pl.read_csv(os.path.join(season_dir, 'player.csv'))
69
 
70
+ # translate game data
71
+ _game_df = (
72
+ _game_df
73
+ .with_columns(
74
+ pl.col('home_team').alias('jp_home_team'),
75
+ pl.col('away_team').alias('jp_away_team')
76
+ )
77
+ .with_columns(
78
+ pl.col('home_team').replace_strict(jp_team_to_en_team),
79
+ pl.col('home_team').replace_strict(jp_team_to_en_full_team).alias('full_home_team'),
80
+ pl.col('away_team').replace_strict(jp_team_to_en_team),
81
+ pl.col('away_team').replace_strict(jp_team_to_en_full_team).alias('full_away_team')
82
+ )
83
+ )
84
+
85
  # translate pa data
86
  _pa_df = (
87
  _pa_df
 
162
  pl.read_csv(os.path.join(season_dir, 'register.csv'))
163
  .with_columns(
164
  pl.col('en_name').str.replace(',', '').alias('en_name'),
 
165
  )
166
  .select(
167
  pl.col('en_name'),
 
169
  pl.col('jp_name').alias('name')
170
  )
171
  )
172
+ _player_df = (
173
+ _player_df
174
+ .join(register, on=['name', 'team'], how='inner')
175
+ .with_columns(
176
+ pl.col('en_name').alias('name'),
177
+ pl.col('team').alias('jp_team')
178
+ )
179
+ .with_columns(
180
+ pl.col('jp_team').replace_strict(jp_team_to_en_team).alias('team'),
181
+ pl.col('jp_team').replace_strict(jp_team_to_en_full_team).alias('full_team'),
182
+ )
183
+ .drop(pl.col('en_name'))
184
+ )
185
 
186
  # merge pitch and pa data
187
  _df = (
 
288
  return df.group_by('pitch_name').agg(pl.col('release_speed').mean().round(1).alias('Velocity'))
289
 
290
  league_pitch_stats = compute_league_pitch_stats(df)
291
+
292
+ if __name__ == '__main__':
293
+ print(df.shape)
294
+ print(df.columns)
295
+ breakpoint()
translate.py CHANGED
@@ -204,3 +204,57 @@ def translate_pitch_outcome(outcome):
204
  return outcome
205
 
206
  max_pitch_types = len(jp_pitch_to_en_pitch)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  return outcome
205
 
206
  max_pitch_types = len(jp_pitch_to_en_pitch)
207
+
208
+ jp_team_to_en_team = {
209
+ '阪神': 'Hanshin',
210
+ '楽天': 'Rakuten',
211
+ 'オリックス': 'Orix',
212
+ 'ソフトバンク': 'SoftBank',
213
+ 'ヤクルト': 'Yakult',
214
+ 'ロッテ': 'Lotte',
215
+ '巨人': 'Giants',
216
+ '日本ハム': 'Nippon-Ham',
217
+ '西武': 'Seibu',
218
+ '中日': 'Chunichi',
219
+ 'DeNA': 'DeNA',
220
+ '広島': 'Hiroshima',
221
+ 'オリックス・バファローズ': 'Orix',
222
+ '北海道日本ハムファイターズ': 'Nippon-Ham',
223
+ '埼玉西武ライオンズ': 'Seibu',
224
+ '東京ヤクルトスワローズ': 'Yakult',
225
+ '横浜DeNAベイスターズ': 'DeNA',
226
+ '読売ジャイアンツ': 'Giants',
227
+ '千葉ロッテマリーンズ': 'Lotte',
228
+ '広島東洋カープ': 'Hiroshima',
229
+ '東北楽天ゴールデンイーグルス': 'Rakuten',
230
+ '中日ドラゴンズ': 'Chunichi',
231
+ '福岡ソフトバンクホークス': 'SoftBank',
232
+ '阪神タイガース': 'Hanshin'
233
+ }
234
+
235
+ jp_team_to_en_full_team = {
236
+ '阪神': 'Hanshin Tigers',
237
+ '楽天': 'Rakuten Eagles',
238
+ 'オリックス': 'Orix Buffaloes',
239
+ 'ソフトバンク': 'Softbank Hawks',
240
+ 'ヤクルト': 'Yakult Swallows',
241
+ 'ロッテ': 'Lotte Marines',
242
+ '巨人': 'Tokyo Giants',
243
+ '日本ハム': 'Nippon-Ham Fighters',
244
+ '西武': 'Seibu Lions',
245
+ '中日': 'Chunichi Dragons',
246
+ 'DeNA': 'DeNA BayStars',
247
+ '広島': 'Hiroshima Carp',
248
+ 'オリックス・バファローズ': 'Orix Buffaloes',
249
+ '北海道日本ハムファイターズ': 'Nippon-Ham Fighters',
250
+ '埼玉西武ライオンズ': 'Seibu Lions',
251
+ '東京ヤクルトスワローズ': 'Yakult Swallows',
252
+ '横浜DeNAベイスターズ': 'DeNA BayStars',
253
+ '読売ジャイアンツ': 'Yomiuri Giants',
254
+ '千葉ロッテマリーンズ': 'Lotte Marines',
255
+ '広島東洋カープ': 'Hiroshima Caro',
256
+ '東北楽天ゴールデンイーグルス': 'Rakuten Eagles',
257
+ '中日ドラゴンズ': 'Chunichi Dragons',
258
+ '福岡ソフトバンクホークス': 'SoftBank Hawks',
259
+ '阪神タイガース': 'Hanshin Tigers'
260
+ }