Spaces:

dexhunter
/

aideml

Sleeping

aideml / sample_results /tabular-playground-series-jul-2022.py

add open-source AIDE

39c930a about 1 year ago

1.51 kB

	import pandas as pd
	from sklearn.cluster import KMeans
	from sklearn.metrics import silhouette_score
	from sklearn.preprocessing import StandardScaler

	# Load the dataset
	data = pd.read_csv("./input/data.csv")
	data.drop("id", axis=1, inplace=True)

	# Standardize the features
	scaler = StandardScaler()
	data_scaled = scaler.fit_transform(data)

	# Determine the optimal number of clusters using the elbow method and silhouette score
	range_n_clusters = list(range(2, 11))
	silhouette_scores = []
	for n_clusters in range_n_clusters:
	clusterer = KMeans(n_clusters=n_clusters, random_state=42)
	cluster_labels = clusterer.fit_predict(data_scaled)
	silhouette_avg = silhouette_score(data_scaled, cluster_labels)
	silhouette_scores.append(silhouette_avg)

	# Select the number of clusters with the highest silhouette score
	optimal_n_clusters = range_n_clusters[silhouette_scores.index(max(silhouette_scores))]

	# Fit the KMeans model with the optimal number of clusters
	kmeans = KMeans(n_clusters=optimal_n_clusters, random_state=42)
	data["Predicted"] = kmeans.fit_predict(data_scaled)

	# Evaluate the model using silhouette score
	final_silhouette_score = silhouette_score(data_scaled, data["Predicted"])
	print(
	f"Silhouette Score for optimal clusters {optimal_n_clusters}: {final_silhouette_score}"
	)

	# Save the predictions to a CSV file
	submission = pd.read_csv("./input/sample_submission.csv")
	submission["Predicted"] = data["Predicted"]
	submission.to_csv("./working/submission.csv", index=False)