File size: 2,952 Bytes
4b23871
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import gradio as gr
import pandas as pd
import numpy as np
import joblib, os

script_dir = os.path.dirname(os.path.abspath(__file__))
pipeline_path = os.path.join(script_dir, 'toolkit', 'pipeline.joblib')
model_path = os.path.join(script_dir, 'toolkit', 'Random Forest Classifier.joblib')

# Load transformation pipeline and model
pipeline = joblib.load(pipeline_path)
model = joblib.load(model_path)

# Create a function to calculate TotalCharges
def calculate_total_charges(tenure, monthly_charges):
    return tenure * monthly_charges

# Create a function that applies the ML pipeline and makes predictions
def predict(SeniorCitizen, Partner, Dependents, tenure,
            InternetService, OnlineSecurity, OnlineBackup, DeviceProtection, TechSupport,
            StreamingTV, StreamingMovies, Contract, PaperlessBilling, PaymentMethod,
            MonthlyCharges):
    
    # Calculate TotalCharges
    TotalCharges = calculate_total_charges(tenure, MonthlyCharges)

    # Create a dataframe with the input data
    input_df = pd.DataFrame({
        'SeniorCitizen': [SeniorCitizen],
        'Partner': [Partner],
        'Dependents': [Dependents],
        'tenure': [tenure],
        'InternetService': [InternetService],
        'OnlineSecurity': [OnlineSecurity],
        'OnlineBackup': [OnlineBackup],
        'DeviceProtection': [DeviceProtection],
        'TechSupport': [TechSupport],
        'StreamingTV': [StreamingTV],
        'StreamingMovies': [StreamingMovies],
        'Contract': [Contract],
        'PaperlessBilling': [PaperlessBilling],
        'PaymentMethod': [PaymentMethod],
        'MonthlyCharges': [MonthlyCharges],
        'TotalCharges': [TotalCharges]
    })

    # Selecting categorical and numerical columns separately
    cat_cols = [col for col in input_df.columns if input_df[col].dtype == 'object']
    num_cols = [col for col in input_df.columns if input_df[col].dtype != 'object']
    
    X_processed = pipeline.transform(input_df)

    # Extracting feature names for categorical columns after one-hot encoding
    cat_encoder = pipeline.named_steps['preprocessor'].named_transformers_['cat'].named_steps['onehot']
    cat_feature_names = cat_encoder.get_feature_names_out(cat_cols)

    # Concatenating numerical and categorical feature names
    feature_names = num_cols + list(cat_feature_names)

    # Convert X_processed to DataFrame
    final_df = pd.DataFrame(X_processed, columns=feature_names)

    # Extract the first three columns and remaining columns, then merge
    first_three_columns = final_df.iloc[:, :3]
    remaining_columns = final_df.iloc[:, 3:]
    final_df = pd.concat([remaining_columns, first_three_columns], axis=1)

    # Make predictions using the model
    prediction_probs = model.predict_proba(final_df)[0]
    prediction_label = {
        "Prediction: CHURN 🔴": prediction_probs[1],
        "Prediction: STAY ✅": prediction_probs[0]
    }

    return prediction_label