import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.ensemble import RandomForestClassifier import joblib import os import sys import pickle # Add the project root to Python path project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")) sys.path.append(project_root) class LiverDiseaseModel: # In the __init__ method, add more detailed error handling def __init__(self): self.model = None self.scaler = None self.model_path = os.path.join(project_root, "models", "liver_model.joblib") self.scaler_path = os.path.join(project_root, "models", "liver_scaler.joblib") self.pkl_model_path = os.path.join(project_root, "models", "liver_disease_model.pkl") # Create models directory if it doesn't exist os.makedirs(os.path.dirname(self.model_path), exist_ok=True) print(f"Looking for model at: {self.pkl_model_path}") # Try to load model and scaler in this order: # 1. First try the .pkl file # 2. Then try the .joblib files if os.path.exists(self.pkl_model_path): try: print(f"Loading model from {self.pkl_model_path}") with open(self.pkl_model_path, 'rb') as f: model_data = pickle.load(f) # Check if the loaded data is a dictionary containing both model and scaler if isinstance(model_data, dict): self.model = model_data.get('model') self.scaler = model_data.get('scaler') print("Successfully loaded model and scaler from .pkl file") else: # If it's just the model self.model = model_data print("Loaded model from .pkl file, but no scaler found") # Try to load scaler separately if it exists if os.path.exists(self.scaler_path): self.scaler = joblib.load(self.scaler_path) print("Loaded scaler from .joblib file") else: # Create a default scaler if none exists print("No scaler found, creating a default StandardScaler") self.scaler = StandardScaler() except Exception as e: print(f"Error loading model from .pkl file: {str(e)}") import traceback print(traceback.format_exc()) else: print(f"Model file not found at: {self.pkl_model_path}") def train(self, X, y): """Train the model on the provided data""" # Split the data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Initialize and fit the scaler self.scaler = StandardScaler() X_train_scaled = self.scaler.fit_transform(X_train) # Initialize and train the model self.model = RandomForestClassifier( n_estimators=100, max_depth=10, random_state=42 ) self.model.fit(X_train_scaled, y_train) # Save the model and scaler in both formats joblib.dump(self.model, self.model_path) joblib.dump(self.scaler, self.scaler_path) # Also save as .pkl for compatibility with open(self.pkl_model_path, 'wb') as f: pickle.dump({'model': self.model, 'scaler': self.scaler}, f) # Evaluate on test set X_test_scaled = self.scaler.transform(X_test) test_score = self.model.score(X_test_scaled, y_test) return test_score def predict(self, features): """Make a prediction for the given features""" if self.model is None: raise ValueError(f"Model not loaded. Please ensure model file exists at {self.pkl_model_path} and is valid.") if self.scaler is None: print("Warning: No scaler found. Using raw features without scaling.") # Convert features to DataFrame feature_names = ['Age', 'Gender', 'Total_Bilirubin', 'Direct_Bilirubin', 'Alkaline_Phosphotase', 'Alamine_Aminotransferase', 'Aspartate_Aminotransferase', 'Total_Protiens', 'Albumin', 'Albumin_and_Globulin_Ratio'] # Create a DataFrame with the features in the correct order df = pd.DataFrame([features], columns=feature_names) # Scale the features if scaler is available if self.scaler is not None: try: X_scaled = self.scaler.transform(df) except Exception as e: print(f"Error scaling features: {str(e)}. Using raw features.") X_scaled = df.values else: X_scaled = df.values # Make prediction try: prediction = bool(self.model.predict(X_scaled)[0]) probability = float(self.model.predict_proba(X_scaled)[0][1]) except Exception as e: print(f"Error making prediction: {str(e)}") import traceback print(traceback.format_exc()) raise ValueError(f"Error making prediction: {str(e)}") return { "prediction": prediction, "probability": probability } def get_feature_importance(self): """Return feature importance if available""" if self.model is None: return None try: # Get feature importance from the model if hasattr(self.model, 'feature_importances_'): return self.model.feature_importances_.tolist() return None except: return None