File size: 1,928 Bytes
5cbc1e9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import RandomizedSearchCV, KFold
from sklearn.metrics import log_loss, make_scorer
from sklearn.preprocessing import LabelEncoder
import numpy as np
# Load the data
train_data = pd.read_csv("./input/train.csv")
test_data = pd.read_csv("./input/test.csv")
# Encode categorical features
le = LabelEncoder()
train_data["EJ"] = le.fit_transform(train_data["EJ"])
test_data["EJ"] = le.transform(test_data["EJ"])
# Prepare the data
X = train_data.drop(["Id", "Class"], axis=1)
y = train_data["Class"]
X_test = test_data.drop("Id", axis=1)
# Define the model parameters and parameter grid for randomized search
model = lgb.LGBMClassifier(objective="binary", boosting_type="gbdt", is_unbalance=True)
param_grid = {
"learning_rate": [0.01, 0.05, 0.1],
"num_leaves": [15, 31, 63],
"max_depth": [-1, 5, 10],
"min_child_samples": [10, 20, 30],
"max_bin": [255, 300],
"subsample": [0.6, 0.8, 1.0],
"colsample_bytree": [0.3, 0.5, 0.7],
}
# Create a scorer for log loss
log_loss_scorer = make_scorer(log_loss, greater_is_better=False, needs_proba=True)
# Perform randomized search with cross-validation
random_search = RandomizedSearchCV(
model,
param_distributions=param_grid,
n_iter=10,
scoring=log_loss_scorer,
cv=KFold(n_splits=10, shuffle=True, random_state=42),
random_state=42,
verbose=1,
)
random_search.fit(X, y)
# Best model and log loss
best_model = random_search.best_estimator_
best_score = -random_search.best_score_
print(f"Best Log Loss: {best_score}")
# Predict on test set with the best model
test_predictions = best_model.predict_proba(X_test)[:, 1]
# Create a submission file
submission = pd.DataFrame(
{
"Id": test_data["Id"],
"class_0": 1 - test_predictions,
"class_1": test_predictions,
}
)
submission.to_csv("./working/submission.csv", index=False)
|