Spaces:

dexhunter
/

aideml

Running

App Files Files Community

aideml / sample_results /icr-identify-age-related-conditions.py

dominikschmidt

add open-source AIDE

39c930a about 1 year ago

raw

history blame contribute delete

1.93 kB

	import pandas as pd
	import lightgbm as lgb
	from sklearn.model_selection import RandomizedSearchCV, KFold
	from sklearn.metrics import log_loss, make_scorer
	from sklearn.preprocessing import LabelEncoder
	import numpy as np

	# Load the data
	train_data = pd.read_csv("./input/train.csv")
	test_data = pd.read_csv("./input/test.csv")

	# Encode categorical features
	le = LabelEncoder()
	train_data["EJ"] = le.fit_transform(train_data["EJ"])
	test_data["EJ"] = le.transform(test_data["EJ"])

	# Prepare the data
	X = train_data.drop(["Id", "Class"], axis=1)
	y = train_data["Class"]
	X_test = test_data.drop("Id", axis=1)

	# Define the model parameters and parameter grid for randomized search
	model = lgb.LGBMClassifier(objective="binary", boosting_type="gbdt", is_unbalance=True)
	param_grid = {
	"learning_rate": [0.01, 0.05, 0.1],
	"num_leaves": [15, 31, 63],
	"max_depth": [-1, 5, 10],
	"min_child_samples": [10, 20, 30],
	"max_bin": [255, 300],
	"subsample": [0.6, 0.8, 1.0],
	"colsample_bytree": [0.3, 0.5, 0.7],
	}

	# Create a scorer for log loss
	log_loss_scorer = make_scorer(log_loss, greater_is_better=False, needs_proba=True)

	# Perform randomized search with cross-validation
	random_search = RandomizedSearchCV(
	model,
	param_distributions=param_grid,
	n_iter=10,
	scoring=log_loss_scorer,
	cv=KFold(n_splits=10, shuffle=True, random_state=42),
	random_state=42,
	verbose=1,
	)

	random_search.fit(X, y)

	# Best model and log loss
	best_model = random_search.best_estimator_
	best_score = -random_search.best_score_
	print(f"Best Log Loss: {best_score}")

	# Predict on test set with the best model
	test_predictions = best_model.predict_proba(X_test)[:, 1]

	# Create a submission file
	submission = pd.DataFrame(
	{
	"Id": test_data["Id"],
	"class_0": 1 - test_predictions,
	"class_1": test_predictions,
	}
	)
	submission.to_csv("./working/submission.csv", index=False)