File size: 3,785 Bytes
538da5d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import os
import warnings
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import gradio as gr
class My_RandomForest:
def __init__(self):
self.target_column = "Experience_Level" # Change to suit your classification target
self.models = {
"Male": None,
"Female": None,
"Unspecified": None
}
# Default parameters
self.n_estimators = 10000 # Number of trees
self.max_depth = 4 # Maximum tree depth
self.max_features = 'sqrt'
self.criterion = 'gini'
self.accuracies = {"Male": None, "Female": None, "Unspecified": None} # Store accuracies
self.selected_features = {
"Male": ["Workout_Frequency (days/week)", "Session_Duration (hours)", "Water_Intake (liters)"],
"Female": ["Workout_Frequency (days/week)", "Session_Duration (hours)", "Water_Intake (liters)"],
"Unspecified": ["Workout_Frequency (days/week)", "Session_Duration (hours)", "Water_Intake (liters)"]
}
self.scaler = MinMaxScaler() # Initialize the scaler
self.init_dataset()
def init_dataset(self):
# Load the dataset
csv_file = os.path.join("data", "gym_members_exercise_tracking.csv")
df_original = pd.read_csv(csv_file)
self.df_original = df_original
def train_model(self, gender="Unspecified"):
if gender not in self.models:
raise ValueError("Invalid gender specified. Choose from 'Male', 'Female', or 'Unspecified'.")
# Filter data by gender for training specific models
if gender == "Male":
df_filtered = self.df_original[self.df_original["Gender"] == "Male"]
elif gender == "Female":
df_filtered = self.df_original[self.df_original["Gender"] == "Female"]
else:
df_filtered = self.df_original # Use all data for Unspecified
features = self.selected_features[gender]
X = df_filtered[features]
y = df_filtered[self.target_column]
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Fit the scaler on the training data and transform both sets
self.scaler.fit(X_train)
X_train = self.scaler.transform(X_train)
X_test = self.scaler.transform(X_test)
# Initialize and train the Random Forest model
model = RandomForestClassifier(
n_estimators=self.n_estimators,
max_depth=self.max_depth,
max_features=self.max_features,
criterion=self.criterion,
random_state=42
)
model.fit(X_train, y_train)
# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
#print(f"{gender} Model Accuracy: {accuracy:.4f}")
#print(f"{gender} Model Classification Report:")
#print(classification_report(y_test, y_pred))
self.models[gender] = model
self.accuracies[gender] = accuracy # Store the accuracy
def predict(self, input_data: pd.DataFrame, gender="Unspecified"):
if gender not in self.models or self.models[gender] is None:
raise ValueError(f"Model for {gender} is not trained yet.")
features = self.selected_features[gender]
scaled_input = self.scaler.transform(input_data[features])
prediction = self.models[gender].predict(scaled_input)
return prediction
|