WickedFaith commited on
Commit
474ddf8
·
verified ·
1 Parent(s): f5af99d

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +227 -0
  2. career_prediction_model.pkl +3 -0
  3. requirements.txt +10 -0
app.py ADDED
@@ -0,0 +1,227 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
5
+ import matplotlib.pyplot as plt
6
+ import seaborn as sns
7
+ import pickle
8
+ import gradio as gr
9
+ import os
10
+
11
+ # Load the model
12
+ model_path = 'career_prediction_model.pkl'
13
+ with open(model_path, 'rb') as f:
14
+ saved_data = pickle.load(f)
15
+
16
+ model = saved_data['model']
17
+ label_encoders = saved_data['label_encoders']
18
+ target_encoder = saved_data['target_encoder']
19
+ features = saved_data['features']
20
+ target = 'What would you like to become when you grow up'
21
+
22
+ # Function for individual prediction
23
+ def predict_career(work_env, academic_perf, motivation, leadership, tech_savvy):
24
+ # Prepare input data
25
+ input_data = pd.DataFrame({
26
+ 'Preferred Work Environment': [work_env],
27
+ 'Academic Performance (CGPA/Percentage)': [float(academic_perf)],
28
+ 'Motivation for Career Choice ': [motivation], # Note the space at the end
29
+ 'Leadership Experience': [leadership],
30
+ 'Tech-Savviness': [tech_savvy]
31
+ })
32
+
33
+ # Encode categorical features
34
+ for feature in features:
35
+ if feature in label_encoders and input_data[feature].dtype == 'object':
36
+ try:
37
+ input_data[feature] = label_encoders[feature].transform(input_data[feature])
38
+ except ValueError:
39
+ # Handle unknown categories
40
+ print(f"Warning: Unknown category in {feature}. Using most frequent category.")
41
+ input_data[feature] = 0 # Default to first category
42
+
43
+ # Make prediction
44
+ prediction = model.predict(input_data)[0]
45
+ predicted_career = target_encoder.inverse_transform([int(prediction)])[0]
46
+
47
+ # Get probabilities for all classes
48
+ if hasattr(model, 'predict_proba'):
49
+ probabilities = model.predict_proba(input_data)[0]
50
+ class_probs = {target_encoder.inverse_transform([i])[0]: prob
51
+ for i, prob in enumerate(probabilities)}
52
+ sorted_probs = dict(sorted(class_probs.items(), key=lambda x: x[1], reverse=True))
53
+
54
+ result = f"Predicted career: {predicted_career}\n\nProbabilities:\n"
55
+ for career, prob in sorted_probs.items():
56
+ result += f"{career}: {prob:.2f}\n"
57
+ return result
58
+ else:
59
+ return f"Predicted career: {predicted_career}"
60
+
61
+ # Function for batch evaluation
62
+ def evaluate_model_with_csv(csv_file):
63
+ try:
64
+ # Try different encodings
65
+ encodings = ['utf-8', 'latin1', 'ISO-8859-1', 'cp1252', 'utf-8-sig']
66
+
67
+ # Try each encoding until one works
68
+ for encoding in encodings:
69
+ try:
70
+ test_df = pd.read_csv(csv_file.name, encoding=encoding)
71
+ break
72
+ except UnicodeDecodeError:
73
+ if encoding == encodings[-1]:
74
+ return ["Error: Could not decode the CSV file with any common encodings.", None]
75
+ continue
76
+ except Exception as e:
77
+ if encoding == encodings[-1]:
78
+ return [f"Error reading CSV: {str(e)}", None]
79
+ continue
80
+
81
+ # Check if required columns exist
82
+ missing_cols = [col for col in features + [target] if col not in test_df.columns]
83
+ if missing_cols:
84
+ return [f"Error: The following required columns are missing in the CSV: {missing_cols}", None]
85
+
86
+ # Preprocess the test data
87
+ X_eval = test_df[features].copy()
88
+
89
+ # Handle missing values
90
+ X_eval = X_eval.fillna('Unknown')
91
+
92
+ # Convert Academic Performance to numeric
93
+ X_eval['Academic Performance (CGPA/Percentage)'] = pd.to_numeric(
94
+ X_eval['Academic Performance (CGPA/Percentage)'], errors='coerce')
95
+ X_eval['Academic Performance (CGPA/Percentage)'].fillna(
96
+ X_eval['Academic Performance (CGPA/Percentage)'].mean(), inplace=True)
97
+
98
+ # Encode categorical features
99
+ for feature in features:
100
+ if feature in label_encoders and X_eval[feature].dtype == 'object':
101
+ # Handle unknown categories by mapping them to 0
102
+ X_eval[feature] = X_eval[feature].apply(
103
+ lambda x: label_encoders[feature].transform([x])[0]
104
+ if x in label_encoders[feature].classes_ else 0
105
+ )
106
+
107
+ # Get the true labels
108
+ y_true = test_df[target].copy()
109
+ y_true = y_true.fillna('Corporate Employee')
110
+
111
+ # Encode the true labels
112
+ y_true_encoded = y_true.apply(
113
+ lambda x: target_encoder.transform([x])[0]
114
+ if x in target_encoder.classes_ else 0
115
+ ).values
116
+
117
+ # Make predictions
118
+ y_pred = model.predict(X_eval)
119
+ y_pred = np.array(y_pred).astype(int)
120
+
121
+ # Calculate accuracy
122
+ accuracy = accuracy_score(y_true_encoded, y_pred)
123
+
124
+ # Create a DataFrame with actual vs predicted values
125
+ results_df = pd.DataFrame({
126
+ 'Actual Career': [target_encoder.classes_[i] for i in y_true_encoded],
127
+ 'Predicted Career': [target_encoder.classes_[i] for i in y_pred]
128
+ })
129
+
130
+ # Count correct predictions
131
+ results_df['Correct'] = results_df['Actual Career'] == results_df['Predicted Career']
132
+ correct_count = results_df['Correct'].sum()
133
+ total_count = len(results_df)
134
+
135
+ # Create confusion matrix
136
+ plt.figure(figsize=(12, 10))
137
+ cm = pd.crosstab(results_df['Actual Career'], results_df['Predicted Career'])
138
+ sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
139
+ plt.title('Confusion Matrix')
140
+ plt.ylabel('Actual Career')
141
+ plt.xlabel('Predicted Career')
142
+ plt.tight_layout()
143
+
144
+ # Save the confusion matrix
145
+ cm_path = 'confusion_matrix.png'
146
+ plt.savefig(cm_path)
147
+
148
+ # Prepare the results
149
+ result_text = f"Model Evaluation Results:\n\n"
150
+ result_text += f"Total samples: {total_count}\n"
151
+ result_text += f"Correct predictions: {correct_count}\n"
152
+ result_text += f"Accuracy: {accuracy:.4f}\n\n"
153
+
154
+ # Generate classification report
155
+ report = classification_report(y_true_encoded, y_pred,
156
+ target_names=target_encoder.classes_,
157
+ output_dict=True)
158
+
159
+ # Add class-wise metrics
160
+ result_text += "Class-wise Performance:\n"
161
+ for class_name in target_encoder.classes_:
162
+ if class_name in report:
163
+ result_text += f"\n{class_name}:\n"
164
+ result_text += f" Precision: {report[class_name]['precision']:.4f}\n"
165
+ result_text += f" Recall: {report[class_name]['recall']:.4f}\n"
166
+ result_text += f" F1-score: {report[class_name]['f1-score']:.4f}\n"
167
+
168
+ return [result_text, cm_path]
169
+
170
+ except Exception as e:
171
+ import traceback
172
+ error_details = traceback.format_exc()
173
+ print(f"Error in evaluation: {str(e)}\n{error_details}")
174
+
175
+ # Create a simple error image
176
+ plt.figure(figsize=(6, 4))
177
+ plt.text(0.5, 0.5, f"Error: {str(e)}",
178
+ horizontalalignment='center', verticalalignment='center', fontsize=12, color='red')
179
+ plt.axis('off')
180
+ error_path = 'error_image.png'
181
+ plt.savefig(error_path)
182
+
183
+ return [f"Error: {str(e)}", error_path]
184
+
185
+ # Get unique values for dropdowns
186
+ work_env_options = list(label_encoders['Preferred Work Environment'].classes_)
187
+ motivation_options = list(label_encoders['Motivation for Career Choice '].classes_)
188
+ leadership_options = list(label_encoders['Leadership Experience'].classes_)
189
+ tech_savvy_options = list(label_encoders['Tech-Savviness'].classes_)
190
+
191
+ # Create the Gradio interface
192
+ iface = gr.Interface(
193
+ fn=predict_career,
194
+ inputs=[
195
+ gr.Dropdown(work_env_options, label="Preferred Work Environment"),
196
+ gr.Number(label="Academic Performance (CGPA/Percentage)", minimum=0, maximum=10),
197
+ gr.Dropdown(motivation_options, label="Motivation for Career Choice"),
198
+ gr.Dropdown(leadership_options, label="Leadership Experience"),
199
+ gr.Dropdown(tech_savvy_options, label="Tech-Savviness")
200
+ ],
201
+ outputs="text",
202
+ title="Career Prediction Model",
203
+ description="Enter your details to predict your future career path",
204
+ theme="huggingface"
205
+ )
206
+
207
+ # Create a separate interface for model evaluation
208
+ eval_iface = gr.Interface(
209
+ fn=evaluate_model_with_csv,
210
+ inputs=gr.File(label="Upload Test CSV File"),
211
+ outputs=[
212
+ gr.Textbox(label="Evaluation Results"),
213
+ gr.Image(label="Confusion Matrix")
214
+ ],
215
+ title="Career Prediction Model Evaluation",
216
+ description="Upload a CSV file with test data to evaluate the model's performance",
217
+ theme="huggingface"
218
+ )
219
+
220
+ # Create a tabbed interface
221
+ demo = gr.TabbedInterface(
222
+ [iface, eval_iface],
223
+ ["Individual Prediction", "Batch Evaluation"]
224
+ )
225
+
226
+ # Launch the interface
227
+ demo.launch()
career_prediction_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca2d2d50abdebdc3b64d365a7861ee745236482e9f9a3af3878fcedbf59b58be
3
+ size 888869
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ pandas
3
+ numpy
4
+ scikit-learn
5
+ xgboost
6
+ lightgbm
7
+ catboost
8
+ matplotlib
9
+ seaborn
10
+ gradio