import streamlit as st import pickle import pandas as pd from catboost import CatBoostClassifier # Load the trained model and unique values from the pickle file with open('model_and_key_components.pkl', 'rb') as file: saved_components = pickle.load(file) model = saved_components['model'] unique_values = saved_components['unique_values'] # Define the Streamlit app def main(): st.title("Employee Attrition Prediction App") st.sidebar.title("Model Settings") # Sidebar inputs with st.sidebar.expander("View Unique Values"): st.write("Unique values for each feature:") for column, values in unique_values.items(): st.write(f"- {column}: {values}") # Main content st.write("Welcome to the Employee Attrition Prediction App!") st.write("This app helps HR practitioners predict employee attrition using a trained CatBoost model.") st.write("Please provide the following information to make a prediction:") # Define layout with two columns col1, col2 = st.columns(2) # Column 1 with col1: age = st.slider("Age", min_value=18, max_value=70) monthly_income = st.slider("Monthly Income", min_value=1000, max_value=20000) num_companies_worked = st.slider("Number of Companies Worked", min_value=0, max_value=10) percent_salary_hike = st.slider("Percent Salary Hike", min_value=10, max_value=25) training_times_last_year = st.slider("Training Times Last Year", min_value=0, max_value=6) years_since_last_promotion = st.slider("Years Since Last Promotion", min_value=0, max_value=15) years_with_curr_manager = st.slider("Years With Current Manager", min_value=0, max_value=15) # Column 2 with col2: over_time = st.checkbox("Over Time") work_life_balance = st.select_slider("Work Life Balance", options=[1, 2, 3, 4]) environment_satisfaction = st.select_slider("Environment Satisfaction", options=[1, 2, 3, 4]) job_satisfaction = st.select_slider("Job Satisfaction", options=[1, 2, 3, 4]) relationship_satisfaction = st.select_slider("Relationship Satisfaction", options=[1, 2, 3, 4]) # Create a DataFrame to hold the user input data input_data = pd.DataFrame({ 'Age': [age], 'MonthlyIncome': [monthly_income], 'NumCompaniesWorked': [num_companies_worked], 'PercentSalaryHike': [percent_salary_hike], 'TrainingTimesLastYear': [training_times_last_year], 'YearsSinceLastPromotion': [years_since_last_promotion], 'YearsWithCurrManager': [years_with_curr_manager], 'OverTime': [over_time], 'WorkLifeBalance': [work_life_balance], 'EnvironmentSatisfaction': [environment_satisfaction], 'JobSatisfaction': [job_satisfaction], 'RelationshipSatisfaction': [relationship_satisfaction] }) # Make predictions prediction = model.predict(input_data) probability = model.predict_proba(input_data)[:, 1] # Display prediction if prediction[0] == 0: st.success("Employee is predicted to stay (Attrition = No)") else: st.error("Employee is predicted to leave (Attrition = Yes)") # Offer recommendations for retaining the employee st.subheader("Suggestions for retaining the employee:") st.markdown("- Invest in orientation programs and career development for entry-level staff, which could contribute to higher retention.") st.markdown("- Implement mentorship programs and career development initiatives aimed at engaging and retaining younger employees.") st.markdown("- Offer robust training and development programs and regular promotions to foster career growth. This investment in skills and career advancement can contribute to higher job satisfaction and retention.") st.markdown("- Recognize the diverse needs of employees based on marital status and consider tailoring benefits or support programs accordingly.") st.markdown("- Consider offering benefits that cater to the unique needs of married, single, and divorced employees.") st.markdown("- Introduce or enhance policies that support work-life balance for employees with families.") st.markdown("- Recognize the unique challenges and opportunities within each department and tailor retention strategies accordingly.") # Display probability st.write(f"Probability of Attrition: {probability[0]*100:.2f}%") if __name__ == "__main__": main()