Spaces:

WickedFaith
/

Synthack-SyntaxSquad

Sleeping

File size: 6,148 Bytes

3efedb0

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
import joblib
import os
import sys
import pickle

# Add the project root to Python path
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
sys.path.append(project_root)

class LiverDiseaseModel:
    # In the __init__ method, add more detailed error handling
    def __init__(self):
        self.model = None
        self.scaler = None
        self.model_path = os.path.join(project_root, "models", "liver_model.joblib")
        self.scaler_path = os.path.join(project_root, "models", "liver_scaler.joblib")
        self.pkl_model_path = os.path.join(project_root, "models", "liver_disease_model.pkl")
        
        # Create models directory if it doesn't exist
        os.makedirs(os.path.dirname(self.model_path), exist_ok=True)
        
        print(f"Looking for model at: {self.pkl_model_path}")
        
        # Try to load model and scaler in this order:
        # 1. First try the .pkl file
        # 2. Then try the .joblib files
        if os.path.exists(self.pkl_model_path):
            try:
                print(f"Loading model from {self.pkl_model_path}")
                with open(self.pkl_model_path, 'rb') as f:
                    model_data = pickle.load(f)
                    
                # Check if the loaded data is a dictionary containing both model and scaler
                if isinstance(model_data, dict):
                    self.model = model_data.get('model')
                    self.scaler = model_data.get('scaler')
                    print("Successfully loaded model and scaler from .pkl file")
                else:
                    # If it's just the model
                    self.model = model_data
                    print("Loaded model from .pkl file, but no scaler found")
                    
                    # Try to load scaler separately if it exists
                    if os.path.exists(self.scaler_path):
                        self.scaler = joblib.load(self.scaler_path)
                        print("Loaded scaler from .joblib file")
                    else:
                        # Create a default scaler if none exists
                        print("No scaler found, creating a default StandardScaler")
                        self.scaler = StandardScaler()
            except Exception as e:
                print(f"Error loading model from .pkl file: {str(e)}")
                import traceback
                print(traceback.format_exc())
        else:
            print(f"Model file not found at: {self.pkl_model_path}")
    
    def train(self, X, y):
        """Train the model on the provided data"""
        # Split the data
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        
        # Initialize and fit the scaler
        self.scaler = StandardScaler()
        X_train_scaled = self.scaler.fit_transform(X_train)
        
        # Initialize and train the model
        self.model = RandomForestClassifier(
            n_estimators=100,
            max_depth=10,
            random_state=42
        )
        self.model.fit(X_train_scaled, y_train)
        
        # Save the model and scaler in both formats
        joblib.dump(self.model, self.model_path)
        joblib.dump(self.scaler, self.scaler_path)
        
        # Also save as .pkl for compatibility
        with open(self.pkl_model_path, 'wb') as f:
            pickle.dump({'model': self.model, 'scaler': self.scaler}, f)
        
        # Evaluate on test set
        X_test_scaled = self.scaler.transform(X_test)
        test_score = self.model.score(X_test_scaled, y_test)
        return test_score
    
    def predict(self, features):
        """Make a prediction for the given features"""
        if self.model is None:
            raise ValueError(f"Model not loaded. Please ensure model file exists at {self.pkl_model_path} and is valid.")
        
        if self.scaler is None:
            print("Warning: No scaler found. Using raw features without scaling.")
        
        # Convert features to DataFrame
        feature_names = ['Age', 'Gender', 'Total_Bilirubin', 'Direct_Bilirubin', 
                        'Alkaline_Phosphotase', 'Alamine_Aminotransferase', 
                        'Aspartate_Aminotransferase', 'Total_Protiens', 
                        'Albumin', 'Albumin_and_Globulin_Ratio']
        
        # Create a DataFrame with the features in the correct order
        df = pd.DataFrame([features], columns=feature_names)
        
        # Scale the features if scaler is available
        if self.scaler is not None:
            try:
                X_scaled = self.scaler.transform(df)
            except Exception as e:
                print(f"Error scaling features: {str(e)}. Using raw features.")
                X_scaled = df.values
        else:
            X_scaled = df.values
        
        # Make prediction
        try:
            prediction = bool(self.model.predict(X_scaled)[0])
            probability = float(self.model.predict_proba(X_scaled)[0][1])
        except Exception as e:
            print(f"Error making prediction: {str(e)}")
            import traceback
            print(traceback.format_exc())
            raise ValueError(f"Error making prediction: {str(e)}")
        
        return {
            "prediction": prediction,
            "probability": probability
        }
        
    def get_feature_importance(self):
        """Return feature importance if available"""
        if self.model is None:
            return None
            
        try:
            # Get feature importance from the model
            if hasattr(self.model, 'feature_importances_'):
                return self.model.feature_importances_.tolist()
            return None
        except:
            return None