File size: 6,148 Bytes
3efedb0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
import joblib
import os
import sys
import pickle

# Add the project root to Python path
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
sys.path.append(project_root)

class LiverDiseaseModel:
    # In the __init__ method, add more detailed error handling
    def __init__(self):
        self.model = None
        self.scaler = None
        self.model_path = os.path.join(project_root, "models", "liver_model.joblib")
        self.scaler_path = os.path.join(project_root, "models", "liver_scaler.joblib")
        self.pkl_model_path = os.path.join(project_root, "models", "liver_disease_model.pkl")
        
        # Create models directory if it doesn't exist
        os.makedirs(os.path.dirname(self.model_path), exist_ok=True)
        
        print(f"Looking for model at: {self.pkl_model_path}")
        
        # Try to load model and scaler in this order:
        # 1. First try the .pkl file
        # 2. Then try the .joblib files
        if os.path.exists(self.pkl_model_path):
            try:
                print(f"Loading model from {self.pkl_model_path}")
                with open(self.pkl_model_path, 'rb') as f:
                    model_data = pickle.load(f)
                    
                # Check if the loaded data is a dictionary containing both model and scaler
                if isinstance(model_data, dict):
                    self.model = model_data.get('model')
                    self.scaler = model_data.get('scaler')
                    print("Successfully loaded model and scaler from .pkl file")
                else:
                    # If it's just the model
                    self.model = model_data
                    print("Loaded model from .pkl file, but no scaler found")
                    
                    # Try to load scaler separately if it exists
                    if os.path.exists(self.scaler_path):
                        self.scaler = joblib.load(self.scaler_path)
                        print("Loaded scaler from .joblib file")
                    else:
                        # Create a default scaler if none exists
                        print("No scaler found, creating a default StandardScaler")
                        self.scaler = StandardScaler()
            except Exception as e:
                print(f"Error loading model from .pkl file: {str(e)}")
                import traceback
                print(traceback.format_exc())
        else:
            print(f"Model file not found at: {self.pkl_model_path}")
    
    def train(self, X, y):
        """Train the model on the provided data"""
        # Split the data
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        
        # Initialize and fit the scaler
        self.scaler = StandardScaler()
        X_train_scaled = self.scaler.fit_transform(X_train)
        
        # Initialize and train the model
        self.model = RandomForestClassifier(
            n_estimators=100,
            max_depth=10,
            random_state=42
        )
        self.model.fit(X_train_scaled, y_train)
        
        # Save the model and scaler in both formats
        joblib.dump(self.model, self.model_path)
        joblib.dump(self.scaler, self.scaler_path)
        
        # Also save as .pkl for compatibility
        with open(self.pkl_model_path, 'wb') as f:
            pickle.dump({'model': self.model, 'scaler': self.scaler}, f)
        
        # Evaluate on test set
        X_test_scaled = self.scaler.transform(X_test)
        test_score = self.model.score(X_test_scaled, y_test)
        return test_score
    
    def predict(self, features):
        """Make a prediction for the given features"""
        if self.model is None:
            raise ValueError(f"Model not loaded. Please ensure model file exists at {self.pkl_model_path} and is valid.")
        
        if self.scaler is None:
            print("Warning: No scaler found. Using raw features without scaling.")
        
        # Convert features to DataFrame
        feature_names = ['Age', 'Gender', 'Total_Bilirubin', 'Direct_Bilirubin', 
                        'Alkaline_Phosphotase', 'Alamine_Aminotransferase', 
                        'Aspartate_Aminotransferase', 'Total_Protiens', 
                        'Albumin', 'Albumin_and_Globulin_Ratio']
        
        # Create a DataFrame with the features in the correct order
        df = pd.DataFrame([features], columns=feature_names)
        
        # Scale the features if scaler is available
        if self.scaler is not None:
            try:
                X_scaled = self.scaler.transform(df)
            except Exception as e:
                print(f"Error scaling features: {str(e)}. Using raw features.")
                X_scaled = df.values
        else:
            X_scaled = df.values
        
        # Make prediction
        try:
            prediction = bool(self.model.predict(X_scaled)[0])
            probability = float(self.model.predict_proba(X_scaled)[0][1])
        except Exception as e:
            print(f"Error making prediction: {str(e)}")
            import traceback
            print(traceback.format_exc())
            raise ValueError(f"Error making prediction: {str(e)}")
        
        return {
            "prediction": prediction,
            "probability": probability
        }
        
    def get_feature_importance(self):
        """Return feature importance if available"""
        if self.model is None:
            return None
            
        try:
            # Get feature importance from the model
            if hasattr(self.model, 'feature_importances_'):
                return self.model.feature_importances_.tolist()
            return None
        except:
            return None