Spaces:
Sleeping
Sleeping
File size: 6,148 Bytes
3efedb0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
import joblib
import os
import sys
import pickle
# Add the project root to Python path
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
sys.path.append(project_root)
class LiverDiseaseModel:
# In the __init__ method, add more detailed error handling
def __init__(self):
self.model = None
self.scaler = None
self.model_path = os.path.join(project_root, "models", "liver_model.joblib")
self.scaler_path = os.path.join(project_root, "models", "liver_scaler.joblib")
self.pkl_model_path = os.path.join(project_root, "models", "liver_disease_model.pkl")
# Create models directory if it doesn't exist
os.makedirs(os.path.dirname(self.model_path), exist_ok=True)
print(f"Looking for model at: {self.pkl_model_path}")
# Try to load model and scaler in this order:
# 1. First try the .pkl file
# 2. Then try the .joblib files
if os.path.exists(self.pkl_model_path):
try:
print(f"Loading model from {self.pkl_model_path}")
with open(self.pkl_model_path, 'rb') as f:
model_data = pickle.load(f)
# Check if the loaded data is a dictionary containing both model and scaler
if isinstance(model_data, dict):
self.model = model_data.get('model')
self.scaler = model_data.get('scaler')
print("Successfully loaded model and scaler from .pkl file")
else:
# If it's just the model
self.model = model_data
print("Loaded model from .pkl file, but no scaler found")
# Try to load scaler separately if it exists
if os.path.exists(self.scaler_path):
self.scaler = joblib.load(self.scaler_path)
print("Loaded scaler from .joblib file")
else:
# Create a default scaler if none exists
print("No scaler found, creating a default StandardScaler")
self.scaler = StandardScaler()
except Exception as e:
print(f"Error loading model from .pkl file: {str(e)}")
import traceback
print(traceback.format_exc())
else:
print(f"Model file not found at: {self.pkl_model_path}")
def train(self, X, y):
"""Train the model on the provided data"""
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Initialize and fit the scaler
self.scaler = StandardScaler()
X_train_scaled = self.scaler.fit_transform(X_train)
# Initialize and train the model
self.model = RandomForestClassifier(
n_estimators=100,
max_depth=10,
random_state=42
)
self.model.fit(X_train_scaled, y_train)
# Save the model and scaler in both formats
joblib.dump(self.model, self.model_path)
joblib.dump(self.scaler, self.scaler_path)
# Also save as .pkl for compatibility
with open(self.pkl_model_path, 'wb') as f:
pickle.dump({'model': self.model, 'scaler': self.scaler}, f)
# Evaluate on test set
X_test_scaled = self.scaler.transform(X_test)
test_score = self.model.score(X_test_scaled, y_test)
return test_score
def predict(self, features):
"""Make a prediction for the given features"""
if self.model is None:
raise ValueError(f"Model not loaded. Please ensure model file exists at {self.pkl_model_path} and is valid.")
if self.scaler is None:
print("Warning: No scaler found. Using raw features without scaling.")
# Convert features to DataFrame
feature_names = ['Age', 'Gender', 'Total_Bilirubin', 'Direct_Bilirubin',
'Alkaline_Phosphotase', 'Alamine_Aminotransferase',
'Aspartate_Aminotransferase', 'Total_Protiens',
'Albumin', 'Albumin_and_Globulin_Ratio']
# Create a DataFrame with the features in the correct order
df = pd.DataFrame([features], columns=feature_names)
# Scale the features if scaler is available
if self.scaler is not None:
try:
X_scaled = self.scaler.transform(df)
except Exception as e:
print(f"Error scaling features: {str(e)}. Using raw features.")
X_scaled = df.values
else:
X_scaled = df.values
# Make prediction
try:
prediction = bool(self.model.predict(X_scaled)[0])
probability = float(self.model.predict_proba(X_scaled)[0][1])
except Exception as e:
print(f"Error making prediction: {str(e)}")
import traceback
print(traceback.format_exc())
raise ValueError(f"Error making prediction: {str(e)}")
return {
"prediction": prediction,
"probability": probability
}
def get_feature_importance(self):
"""Return feature importance if available"""
if self.model is None:
return None
try:
# Get feature importance from the model
if hasattr(self.model, 'feature_importances_'):
return self.model.feature_importances_.tolist()
return None
except:
return None |