Spaces:
Running
Running
import gradio as gr | |
import pandas as pd | |
import numpy as np | |
import joblib, os | |
script_dir = os.path.dirname(os.path.abspath(__file__)) | |
pipeline_path = os.path.join(script_dir, 'toolkit', 'pipeline.joblib') | |
model_path = os.path.join(script_dir, 'toolkit', 'Random Forest Classifier.joblib') | |
# Load transformation pipeline and model | |
pipeline = joblib.load(pipeline_path) | |
model = joblib.load(model_path) | |
# Create a function to calculate TotalCharges | |
def calculate_total_charges(tenure, monthly_charges): | |
return tenure * monthly_charges | |
# Create a function that applies the ML pipeline and makes predictions | |
def predict(SeniorCitizen, Partner, Dependents, tenure, | |
InternetService, OnlineSecurity, OnlineBackup, DeviceProtection, TechSupport, | |
StreamingTV, StreamingMovies, Contract, PaperlessBilling, PaymentMethod, | |
MonthlyCharges): | |
# Calculate TotalCharges | |
TotalCharges = calculate_total_charges(tenure, MonthlyCharges) | |
# Create a dataframe with the input data | |
input_df = pd.DataFrame({ | |
'SeniorCitizen': [SeniorCitizen], | |
'Partner': [Partner], | |
'Dependents': [Dependents], | |
'tenure': [tenure], | |
'InternetService': [InternetService], | |
'OnlineSecurity': [OnlineSecurity], | |
'OnlineBackup': [OnlineBackup], | |
'DeviceProtection': [DeviceProtection], | |
'TechSupport': [TechSupport], | |
'StreamingTV': [StreamingTV], | |
'StreamingMovies': [StreamingMovies], | |
'Contract': [Contract], | |
'PaperlessBilling': [PaperlessBilling], | |
'PaymentMethod': [PaymentMethod], | |
'MonthlyCharges': [MonthlyCharges], | |
'TotalCharges': [TotalCharges] | |
}) | |
# Selecting categorical and numerical columns separately | |
cat_cols = [col for col in input_df.columns if input_df[col].dtype == 'object'] | |
num_cols = [col for col in input_df.columns if input_df[col].dtype != 'object'] | |
X_processed = pipeline.transform(input_df) | |
# Extracting feature names for categorical columns after one-hot encoding | |
cat_encoder = pipeline.named_steps['preprocessor'].named_transformers_['cat'].named_steps['onehot'] | |
cat_feature_names = cat_encoder.get_feature_names_out(cat_cols) | |
# Concatenating numerical and categorical feature names | |
feature_names = num_cols + list(cat_feature_names) | |
# Convert X_processed to DataFrame | |
final_df = pd.DataFrame(X_processed, columns=feature_names) | |
# Extract the first three columns and remaining columns, then merge | |
first_three_columns = final_df.iloc[:, :3] | |
remaining_columns = final_df.iloc[:, 3:] | |
final_df = pd.concat([remaining_columns, first_three_columns], axis=1) | |
# Make predictions using the model | |
prediction_probs = model.predict_proba(final_df)[0] | |
prediction_label = { | |
"Prediction: CHURN 🔴": prediction_probs[1], | |
"Prediction: STAY ✅": prediction_probs[0] | |
} | |
return prediction_label |