import gradio as gr import pandas as pd import numpy as np import joblib, os script_dir = os.path.dirname(os.path.abspath(__file__)) pipeline_path = os.path.join(script_dir, 'toolkit', 'pipeline.joblib') model_path = os.path.join(script_dir, 'toolkit', 'Random Forest Classifier.joblib') # Load transformation pipeline and model pipeline = joblib.load(pipeline_path) model = joblib.load(model_path) # Create a function to calculate TotalCharges def calculate_total_charges(tenure, monthly_charges): return tenure * monthly_charges # Create a function that applies the ML pipeline and makes predictions def predict(SeniorCitizen, Partner, Dependents, tenure, InternetService, OnlineSecurity, OnlineBackup, DeviceProtection, TechSupport, StreamingTV, StreamingMovies, Contract, PaperlessBilling, PaymentMethod, MonthlyCharges): # Calculate TotalCharges TotalCharges = calculate_total_charges(tenure, MonthlyCharges) # Create a dataframe with the input data input_df = pd.DataFrame({ 'SeniorCitizen': [SeniorCitizen], 'Partner': [Partner], 'Dependents': [Dependents], 'tenure': [tenure], 'InternetService': [InternetService], 'OnlineSecurity': [OnlineSecurity], 'OnlineBackup': [OnlineBackup], 'DeviceProtection': [DeviceProtection], 'TechSupport': [TechSupport], 'StreamingTV': [StreamingTV], 'StreamingMovies': [StreamingMovies], 'Contract': [Contract], 'PaperlessBilling': [PaperlessBilling], 'PaymentMethod': [PaymentMethod], 'MonthlyCharges': [MonthlyCharges], 'TotalCharges': [TotalCharges] }) # Selecting categorical and numerical columns separately cat_cols = [col for col in input_df.columns if input_df[col].dtype == 'object'] num_cols = [col for col in input_df.columns if input_df[col].dtype != 'object'] X_processed = pipeline.transform(input_df) # Extracting feature names for categorical columns after one-hot encoding cat_encoder = pipeline.named_steps['preprocessor'].named_transformers_['cat'].named_steps['onehot'] cat_feature_names = cat_encoder.get_feature_names_out(cat_cols) # Concatenating numerical and categorical feature names feature_names = num_cols + list(cat_feature_names) # Convert X_processed to DataFrame final_df = pd.DataFrame(X_processed, columns=feature_names) # Extract the first three columns and remaining columns, then merge first_three_columns = final_df.iloc[:, :3] remaining_columns = final_df.iloc[:, 3:] final_df = pd.concat([remaining_columns, first_three_columns], axis=1) # Make predictions using the model prediction_probs = model.predict_proba(final_df)[0] prediction_label = { "Prediction: CHURN 🔴": prediction_probs[1], "Prediction: STAY ✅": prediction_probs[0] } return prediction_label