|
import pandas as pd |
|
import lightgbm as lgb |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.metrics import log_loss |
|
|
|
|
|
dtypes_df = pd.read_csv("./input/train_dtypes.csv") |
|
dtypes = { |
|
k: (v if v != "float16" else "float32") |
|
for (k, v) in zip(dtypes_df.column, dtypes_df.dtype) |
|
} |
|
|
|
|
|
train_dfs = [pd.read_csv(f"./input/train_{i}.csv", dtype=dtypes) for i in range(10)] |
|
train_df = pd.concat(train_dfs, ignore_index=True) |
|
|
|
|
|
X = train_df.drop( |
|
[ |
|
"game_num", |
|
"event_id", |
|
"event_time", |
|
"player_scoring_next", |
|
"team_scoring_next", |
|
"team_A_scoring_within_10sec", |
|
"team_B_scoring_within_10sec", |
|
], |
|
axis=1, |
|
) |
|
y = train_df[["team_A_scoring_within_10sec", "team_B_scoring_within_10sec"]] |
|
|
|
|
|
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42) |
|
|
|
|
|
model_A = lgb.LGBMClassifier() |
|
model_A.fit(X_train, y_train.iloc[:, 0]) |
|
|
|
|
|
model_B = lgb.LGBMClassifier() |
|
model_B.fit(X_train, y_train.iloc[:, 1]) |
|
|
|
|
|
val_preds_A = model_A.predict_proba(X_val)[:, 1] |
|
|
|
|
|
val_preds_B = model_B.predict_proba(X_val)[:, 1] |
|
|
|
|
|
val_preds = pd.DataFrame( |
|
{ |
|
"team_A_scoring_within_10sec": val_preds_A, |
|
"team_B_scoring_within_10sec": val_preds_B, |
|
} |
|
) |
|
|
|
|
|
val_log_loss = log_loss(y_val, val_preds) |
|
print(f"Validation Log Loss: {val_log_loss}") |
|
|
|
|
|
test_dtypes_df = pd.read_csv("./input/test_dtypes.csv") |
|
test_dtypes = { |
|
k: (v if v != "float16" else "float32") |
|
for (k, v) in zip(test_dtypes_df.column, test_dtypes_df.dtype) |
|
} |
|
test_df = pd.read_csv("./input/test.csv", dtype=test_dtypes) |
|
X_test = test_df.drop(["id"], axis=1) |
|
|
|
|
|
test_preds_A = model_A.predict_proba(X_test)[:, 1] |
|
|
|
|
|
test_preds_B = model_B.predict_proba(X_test)[:, 1] |
|
|
|
|
|
test_preds = pd.DataFrame( |
|
{ |
|
"team_A_scoring_within_10sec": test_preds_A, |
|
"team_B_scoring_within_10sec": test_preds_B, |
|
} |
|
) |
|
|
|
|
|
submission = pd.concat([test_df["id"], test_preds], axis=1) |
|
submission.to_csv("./working/submission.csv", index=False) |
|
|