Upload KNNModel.py
Browse files- KNN/KNNModel.py +95 -0
KNN/KNNModel.py
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
from sklearn.preprocessing import LabelEncoder, StandardScaler
|
5 |
+
from sklearn.model_selection import train_test_split, cross_val_score
|
6 |
+
from sklearn.neighbors import KNeighborsRegressor
|
7 |
+
from sklearn.metrics import r2_score, mean_squared_error
|
8 |
+
import matplotlib.pyplot as plt
|
9 |
+
|
10 |
+
class KNNModel:
|
11 |
+
def __init__(self):
|
12 |
+
self.target_column = "Fat_Percentage"
|
13 |
+
self.data_path = os.path.join("app", "data", "gym_members_exercise_tracking.csv") # Updated path
|
14 |
+
self.data = None
|
15 |
+
self.selected_features = None
|
16 |
+
self.label_encoders = {}
|
17 |
+
self.scaler = StandardScaler()
|
18 |
+
self.knn = None
|
19 |
+
self.optimal_k = None
|
20 |
+
|
21 |
+
def load_and_preprocess_data(self):
|
22 |
+
# Load data
|
23 |
+
self.data = pd.read_csv(self.data_path)
|
24 |
+
|
25 |
+
# Check if 'Experience_Level' column exists
|
26 |
+
if 'Experience_Level' not in self.data.columns:
|
27 |
+
raise ValueError("'Experience_Level' column not found in the dataset.")
|
28 |
+
|
29 |
+
# Encode categorical features
|
30 |
+
categorical_features = ['Gender', 'Experience_Level', 'Workout_Type']
|
31 |
+
for feature in categorical_features:
|
32 |
+
if feature in self.data.columns: # Ensure the column exists
|
33 |
+
le = LabelEncoder()
|
34 |
+
self.data[feature] = le.fit_transform(self.data[feature])
|
35 |
+
self.label_encoders[feature] = le # Save the encoder
|
36 |
+
else:
|
37 |
+
raise ValueError(f"'{feature}' column not found in the dataset.")
|
38 |
+
|
39 |
+
# Compute correlations and select features
|
40 |
+
correlation_matrix = self.data.corr()
|
41 |
+
target_corr = correlation_matrix[self.target_column].sort_values(ascending=False)
|
42 |
+
self.selected_features = target_corr[abs(target_corr) >= 0.5].index.tolist()
|
43 |
+
self.selected_features.remove(self.target_column)
|
44 |
+
|
45 |
+
# Prepare dataset
|
46 |
+
X = self.data[self.selected_features]
|
47 |
+
y = self.data[self.target_column]
|
48 |
+
X_scaled = self.scaler.fit_transform(X)
|
49 |
+
|
50 |
+
# Split into training and testing sets
|
51 |
+
return train_test_split(X_scaled, y, test_size=0.2, random_state=42)
|
52 |
+
|
53 |
+
def find_optimal_k(self, X_train, y_train, k_range=20):
|
54 |
+
k_values = range(1, k_range + 1)
|
55 |
+
cv_scores = []
|
56 |
+
|
57 |
+
for k in k_values:
|
58 |
+
knn = KNeighborsRegressor(n_neighbors=k)
|
59 |
+
scores = cross_val_score(knn, X_train, y_train, cv=5, scoring='r2')
|
60 |
+
cv_scores.append(scores.mean())
|
61 |
+
|
62 |
+
# Find optimal k
|
63 |
+
self.optimal_k = k_values[np.argmax(cv_scores)]
|
64 |
+
|
65 |
+
# Plot k vs. R^2
|
66 |
+
plt.figure(figsize=(10, 6))
|
67 |
+
plt.plot(k_values, cv_scores, marker='o', linestyle='-')
|
68 |
+
plt.xlabel("Number of Neighbors (k)")
|
69 |
+
plt.ylabel("Cross-Validated R^2")
|
70 |
+
plt.title("R^2 vs. k")
|
71 |
+
plt.grid()
|
72 |
+
plt.savefig(os.path.join("app", "KNN", "optimal_k_plot.png"))
|
73 |
+
plt.close()
|
74 |
+
|
75 |
+
return self.optimal_k
|
76 |
+
|
77 |
+
def train_model(self, X_train, y_train):
|
78 |
+
if not self.optimal_k:
|
79 |
+
raise ValueError("Optimal k is not set. Run find_optimal_k() first.")
|
80 |
+
self.knn = KNeighborsRegressor(n_neighbors=self.optimal_k)
|
81 |
+
self.knn.fit(X_train, y_train)
|
82 |
+
|
83 |
+
def evaluate_model(self, X_test, y_test):
|
84 |
+
if not self.knn:
|
85 |
+
raise ValueError("Model is not trained. Run train_model() first.")
|
86 |
+
y_pred = self.knn.predict(X_test)
|
87 |
+
r2 = r2_score(y_test, y_pred)
|
88 |
+
mse = mean_squared_error(y_test, y_pred)
|
89 |
+
return r2, mse
|
90 |
+
|
91 |
+
def predict(self, input_data: pd.DataFrame) -> float:
|
92 |
+
if not self.knn:
|
93 |
+
raise ValueError("Model is not trained. Run train_model() first.")
|
94 |
+
input_scaled = self.scaler.transform(input_data)
|
95 |
+
return self.knn.predict(input_scaled)[0]
|