Spaces:
Running
Running
File size: 2,952 Bytes
4b23871 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import gradio as gr
import pandas as pd
import numpy as np
import joblib, os
script_dir = os.path.dirname(os.path.abspath(__file__))
pipeline_path = os.path.join(script_dir, 'toolkit', 'pipeline.joblib')
model_path = os.path.join(script_dir, 'toolkit', 'Random Forest Classifier.joblib')
# Load transformation pipeline and model
pipeline = joblib.load(pipeline_path)
model = joblib.load(model_path)
# Create a function to calculate TotalCharges
def calculate_total_charges(tenure, monthly_charges):
return tenure * monthly_charges
# Create a function that applies the ML pipeline and makes predictions
def predict(SeniorCitizen, Partner, Dependents, tenure,
InternetService, OnlineSecurity, OnlineBackup, DeviceProtection, TechSupport,
StreamingTV, StreamingMovies, Contract, PaperlessBilling, PaymentMethod,
MonthlyCharges):
# Calculate TotalCharges
TotalCharges = calculate_total_charges(tenure, MonthlyCharges)
# Create a dataframe with the input data
input_df = pd.DataFrame({
'SeniorCitizen': [SeniorCitizen],
'Partner': [Partner],
'Dependents': [Dependents],
'tenure': [tenure],
'InternetService': [InternetService],
'OnlineSecurity': [OnlineSecurity],
'OnlineBackup': [OnlineBackup],
'DeviceProtection': [DeviceProtection],
'TechSupport': [TechSupport],
'StreamingTV': [StreamingTV],
'StreamingMovies': [StreamingMovies],
'Contract': [Contract],
'PaperlessBilling': [PaperlessBilling],
'PaymentMethod': [PaymentMethod],
'MonthlyCharges': [MonthlyCharges],
'TotalCharges': [TotalCharges]
})
# Selecting categorical and numerical columns separately
cat_cols = [col for col in input_df.columns if input_df[col].dtype == 'object']
num_cols = [col for col in input_df.columns if input_df[col].dtype != 'object']
X_processed = pipeline.transform(input_df)
# Extracting feature names for categorical columns after one-hot encoding
cat_encoder = pipeline.named_steps['preprocessor'].named_transformers_['cat'].named_steps['onehot']
cat_feature_names = cat_encoder.get_feature_names_out(cat_cols)
# Concatenating numerical and categorical feature names
feature_names = num_cols + list(cat_feature_names)
# Convert X_processed to DataFrame
final_df = pd.DataFrame(X_processed, columns=feature_names)
# Extract the first three columns and remaining columns, then merge
first_three_columns = final_df.iloc[:, :3]
remaining_columns = final_df.iloc[:, 3:]
final_df = pd.concat([remaining_columns, first_three_columns], axis=1)
# Make predictions using the model
prediction_probs = model.predict_proba(final_df)[0]
prediction_label = {
"Prediction: CHURN 🔴": prediction_probs[1],
"Prediction: STAY ✅": prediction_probs[0]
}
return prediction_label
|