File size: 2,890 Bytes
3efedb0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import joblib
import numpy as np
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

class LoanApprovalModel:
    def __init__(self, model_type='random_forest'):
        self.model_type = model_type
        self.model = self._get_model()
        self.preprocessor = None
        self.feature_names = None
        
    def _get_model(self):
        if self.model_type == 'random_forest':
            return RandomForestClassifier(n_estimators=100, random_state=42)
        elif self.model_type == 'gradient_boosting':
            return GradientBoostingClassifier(random_state=42)
        else:
            raise ValueError(f"Unsupported model type: {self.model_type}")
            
    def train(self, preprocessor, X_train, y_train, feature_names=None):
        """Train the model"""
        self.preprocessor = preprocessor
        self.feature_names = feature_names
        
        # Transform the data
        X_train_processed = preprocessor.fit_transform(X_train)
        
        # Train the model
        self.model.fit(X_train_processed, y_train)
        
    def predict(self, X):
        """Make predictions"""
        X_processed = self.preprocessor.transform(X)
        return self.model.predict(X_processed)
        
    def predict_proba(self, X):
        """Get prediction probabilities"""
        X_processed = self.preprocessor.transform(X)
        return self.model.predict_proba(X_processed)
        
    def evaluate(self, X_test, y_test):
        """Evaluate model performance"""
        y_pred = self.predict(X_test)
        y_proba = self.predict_proba(X_test)[:, 1]
        
        metrics = {
            'accuracy': accuracy_score(y_test, y_pred),
            'precision': precision_score(y_test, y_pred),
            'recall': recall_score(y_test, y_pred),
            'f1': f1_score(y_test, y_pred),
            'roc_auc': roc_auc_score(y_test, y_proba)
        }
        
        return metrics
        
    def save_model(self, filepath):
        """Save model to disk"""
        model_data = {
            'model': self.model,
            'preprocessor': self.preprocessor,
            'feature_names': self.feature_names,
            'model_type': self.model_type
        }
        joblib.dump(model_data, filepath)
        
    @classmethod
    def load_model(cls, filepath):
        """Load model from disk"""
        model_data = joblib.load(filepath)
        
        model_instance = cls(model_type=model_data['model_type'])
        model_instance.model = model_data['model']
        model_instance.preprocessor = model_data['preprocessor']
        model_instance.feature_names = model_data['feature_names']
        
        return model_instance