mohli commited on
Commit
83131d7
·
verified ·
1 Parent(s): 5ea8735

Upload KNNModel.py

Browse files
Files changed (1) hide show
  1. KNN/KNNModel.py +95 -0
KNN/KNNModel.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sklearn.preprocessing import LabelEncoder, StandardScaler
5
+ from sklearn.model_selection import train_test_split, cross_val_score
6
+ from sklearn.neighbors import KNeighborsRegressor
7
+ from sklearn.metrics import r2_score, mean_squared_error
8
+ import matplotlib.pyplot as plt
9
+
10
+ class KNNModel:
11
+ def __init__(self):
12
+ self.target_column = "Fat_Percentage"
13
+ self.data_path = os.path.join("app", "data", "gym_members_exercise_tracking.csv") # Updated path
14
+ self.data = None
15
+ self.selected_features = None
16
+ self.label_encoders = {}
17
+ self.scaler = StandardScaler()
18
+ self.knn = None
19
+ self.optimal_k = None
20
+
21
+ def load_and_preprocess_data(self):
22
+ # Load data
23
+ self.data = pd.read_csv(self.data_path)
24
+
25
+ # Check if 'Experience_Level' column exists
26
+ if 'Experience_Level' not in self.data.columns:
27
+ raise ValueError("'Experience_Level' column not found in the dataset.")
28
+
29
+ # Encode categorical features
30
+ categorical_features = ['Gender', 'Experience_Level', 'Workout_Type']
31
+ for feature in categorical_features:
32
+ if feature in self.data.columns: # Ensure the column exists
33
+ le = LabelEncoder()
34
+ self.data[feature] = le.fit_transform(self.data[feature])
35
+ self.label_encoders[feature] = le # Save the encoder
36
+ else:
37
+ raise ValueError(f"'{feature}' column not found in the dataset.")
38
+
39
+ # Compute correlations and select features
40
+ correlation_matrix = self.data.corr()
41
+ target_corr = correlation_matrix[self.target_column].sort_values(ascending=False)
42
+ self.selected_features = target_corr[abs(target_corr) >= 0.5].index.tolist()
43
+ self.selected_features.remove(self.target_column)
44
+
45
+ # Prepare dataset
46
+ X = self.data[self.selected_features]
47
+ y = self.data[self.target_column]
48
+ X_scaled = self.scaler.fit_transform(X)
49
+
50
+ # Split into training and testing sets
51
+ return train_test_split(X_scaled, y, test_size=0.2, random_state=42)
52
+
53
+ def find_optimal_k(self, X_train, y_train, k_range=20):
54
+ k_values = range(1, k_range + 1)
55
+ cv_scores = []
56
+
57
+ for k in k_values:
58
+ knn = KNeighborsRegressor(n_neighbors=k)
59
+ scores = cross_val_score(knn, X_train, y_train, cv=5, scoring='r2')
60
+ cv_scores.append(scores.mean())
61
+
62
+ # Find optimal k
63
+ self.optimal_k = k_values[np.argmax(cv_scores)]
64
+
65
+ # Plot k vs. R^2
66
+ plt.figure(figsize=(10, 6))
67
+ plt.plot(k_values, cv_scores, marker='o', linestyle='-')
68
+ plt.xlabel("Number of Neighbors (k)")
69
+ plt.ylabel("Cross-Validated R^2")
70
+ plt.title("R^2 vs. k")
71
+ plt.grid()
72
+ plt.savefig(os.path.join("app", "KNN", "optimal_k_plot.png"))
73
+ plt.close()
74
+
75
+ return self.optimal_k
76
+
77
+ def train_model(self, X_train, y_train):
78
+ if not self.optimal_k:
79
+ raise ValueError("Optimal k is not set. Run find_optimal_k() first.")
80
+ self.knn = KNeighborsRegressor(n_neighbors=self.optimal_k)
81
+ self.knn.fit(X_train, y_train)
82
+
83
+ def evaluate_model(self, X_test, y_test):
84
+ if not self.knn:
85
+ raise ValueError("Model is not trained. Run train_model() first.")
86
+ y_pred = self.knn.predict(X_test)
87
+ r2 = r2_score(y_test, y_pred)
88
+ mse = mean_squared_error(y_test, y_pred)
89
+ return r2, mse
90
+
91
+ def predict(self, input_data: pd.DataFrame) -> float:
92
+ if not self.knn:
93
+ raise ValueError("Model is not trained. Run train_model() first.")
94
+ input_scaled = self.scaler.transform(input_data)
95
+ return self.knn.predict(input_scaled)[0]