import os import warnings import pandas as pd import numpy as np from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score, classification_report from sklearn.model_selection import train_test_split from sklearn.preprocessing import MinMaxScaler import matplotlib.pyplot as plt import gradio as gr class My_RandomForest: def __init__(self): self.target_column = "Experience_Level" # Change to suit your classification target self.models = { "Male": None, "Female": None, "Unspecified": None } # Default parameters self.n_estimators = 10000 # Number of trees self.max_depth = 4 # Maximum tree depth self.max_features = 'sqrt' self.criterion = 'gini' self.accuracies = {"Male": None, "Female": None, "Unspecified": None} # Store accuracies self.selected_features = { "Male": ["Workout_Frequency (days/week)", "Session_Duration (hours)", "Water_Intake (liters)"], "Female": ["Workout_Frequency (days/week)", "Session_Duration (hours)", "Water_Intake (liters)"], "Unspecified": ["Workout_Frequency (days/week)", "Session_Duration (hours)", "Water_Intake (liters)"] } self.scaler = MinMaxScaler() # Initialize the scaler self.init_dataset() def init_dataset(self): # Load the dataset csv_file = os.path.join("data", "gym_members_exercise_tracking.csv") df_original = pd.read_csv(csv_file) self.df_original = df_original def train_model(self, gender="Unspecified"): if gender not in self.models: raise ValueError("Invalid gender specified. Choose from 'Male', 'Female', or 'Unspecified'.") # Filter data by gender for training specific models if gender == "Male": df_filtered = self.df_original[self.df_original["Gender"] == "Male"] elif gender == "Female": df_filtered = self.df_original[self.df_original["Gender"] == "Female"] else: df_filtered = self.df_original # Use all data for Unspecified features = self.selected_features[gender] X = df_filtered[features] y = df_filtered[self.target_column] # Split the data into training and testing sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Fit the scaler on the training data and transform both sets self.scaler.fit(X_train) X_train = self.scaler.transform(X_train) X_test = self.scaler.transform(X_test) # Initialize and train the Random Forest model model = RandomForestClassifier( n_estimators=self.n_estimators, max_depth=self.max_depth, max_features=self.max_features, criterion=self.criterion, random_state=42 ) model.fit(X_train, y_train) # Evaluate the model y_pred = model.predict(X_test) accuracy = accuracy_score(y_test, y_pred) #print(f"{gender} Model Accuracy: {accuracy:.4f}") #print(f"{gender} Model Classification Report:") #print(classification_report(y_test, y_pred)) self.models[gender] = model self.accuracies[gender] = accuracy # Store the accuracy def predict(self, input_data: pd.DataFrame, gender="Unspecified"): if gender not in self.models or self.models[gender] is None: raise ValueError(f"Model for {gender} is not trained yet.") features = self.selected_features[gender] scaled_input = self.scaler.transform(input_data[features]) prediction = self.models[gender].predict(scaled_input) return prediction