Rawiwan1912 commited on
Commit
4b23871
·
verified ·
1 Parent(s): 66cca75

Update churn_analysis.py

Browse files
Files changed (1) hide show
  1. churn_analysis.py +76 -0
churn_analysis.py CHANGED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
+ import joblib, os
5
+
6
+ script_dir = os.path.dirname(os.path.abspath(__file__))
7
+ pipeline_path = os.path.join(script_dir, 'toolkit', 'pipeline.joblib')
8
+ model_path = os.path.join(script_dir, 'toolkit', 'Random Forest Classifier.joblib')
9
+
10
+ # Load transformation pipeline and model
11
+ pipeline = joblib.load(pipeline_path)
12
+ model = joblib.load(model_path)
13
+
14
+ # Create a function to calculate TotalCharges
15
+ def calculate_total_charges(tenure, monthly_charges):
16
+ return tenure * monthly_charges
17
+
18
+ # Create a function that applies the ML pipeline and makes predictions
19
+ def predict(SeniorCitizen, Partner, Dependents, tenure,
20
+ InternetService, OnlineSecurity, OnlineBackup, DeviceProtection, TechSupport,
21
+ StreamingTV, StreamingMovies, Contract, PaperlessBilling, PaymentMethod,
22
+ MonthlyCharges):
23
+
24
+ # Calculate TotalCharges
25
+ TotalCharges = calculate_total_charges(tenure, MonthlyCharges)
26
+
27
+ # Create a dataframe with the input data
28
+ input_df = pd.DataFrame({
29
+ 'SeniorCitizen': [SeniorCitizen],
30
+ 'Partner': [Partner],
31
+ 'Dependents': [Dependents],
32
+ 'tenure': [tenure],
33
+ 'InternetService': [InternetService],
34
+ 'OnlineSecurity': [OnlineSecurity],
35
+ 'OnlineBackup': [OnlineBackup],
36
+ 'DeviceProtection': [DeviceProtection],
37
+ 'TechSupport': [TechSupport],
38
+ 'StreamingTV': [StreamingTV],
39
+ 'StreamingMovies': [StreamingMovies],
40
+ 'Contract': [Contract],
41
+ 'PaperlessBilling': [PaperlessBilling],
42
+ 'PaymentMethod': [PaymentMethod],
43
+ 'MonthlyCharges': [MonthlyCharges],
44
+ 'TotalCharges': [TotalCharges]
45
+ })
46
+
47
+ # Selecting categorical and numerical columns separately
48
+ cat_cols = [col for col in input_df.columns if input_df[col].dtype == 'object']
49
+ num_cols = [col for col in input_df.columns if input_df[col].dtype != 'object']
50
+
51
+ X_processed = pipeline.transform(input_df)
52
+
53
+ # Extracting feature names for categorical columns after one-hot encoding
54
+ cat_encoder = pipeline.named_steps['preprocessor'].named_transformers_['cat'].named_steps['onehot']
55
+ cat_feature_names = cat_encoder.get_feature_names_out(cat_cols)
56
+
57
+ # Concatenating numerical and categorical feature names
58
+ feature_names = num_cols + list(cat_feature_names)
59
+
60
+ # Convert X_processed to DataFrame
61
+ final_df = pd.DataFrame(X_processed, columns=feature_names)
62
+
63
+ # Extract the first three columns and remaining columns, then merge
64
+ first_three_columns = final_df.iloc[:, :3]
65
+ remaining_columns = final_df.iloc[:, 3:]
66
+ final_df = pd.concat([remaining_columns, first_three_columns], axis=1)
67
+
68
+ # Make predictions using the model
69
+ prediction_probs = model.predict_proba(final_df)[0]
70
+ prediction_label = {
71
+ "Prediction: CHURN 🔴": prediction_probs[1],
72
+ "Prediction: STAY ✅": prediction_probs[0]
73
+ }
74
+
75
+ return prediction_label
76
+