Spaces:
Sleeping
Sleeping
from sklearn.neighbors import KNeighborsClassifier | |
from sklearn.model_selection import train_test_split | |
from sklearn.metrics import accuracy_score | |
from .base_model import BaseModel | |
from ..config import DIABETES_MODEL_PATH, RANDOM_STATE, TEST_SIZE | |
import numpy as np | |
class DiabetesModel(BaseModel): | |
def __init__(self): | |
super().__init__(DIABETES_MODEL_PATH) | |
self.model = KNeighborsClassifier( | |
n_neighbors=7, # Increased neighbors for more robust prediction | |
weights='distance' # Weight points by distance | |
) | |
self.feature_names = [ | |
'Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', | |
'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', | |
'GlucoseBMI', 'GlucoseAge' # Added derived features | |
] | |
self.X_train = None | |
self.y_train = None | |
# Define risk thresholds | |
self.high_risk_threshold = 0.6 | |
def train(self, X, y): | |
X_train, X_test, y_train, y_test = train_test_split( | |
X, y, test_size=TEST_SIZE, random_state=RANDOM_STATE, | |
stratify=y # Ensure balanced split | |
) | |
self.X_train = X_train | |
self.y_train = y_train | |
self.model.fit(X_train, y_train) | |
return self.evaluate(X_train, X_test, y_train, y_test) | |
def predict(self, X): | |
if self.scaler: | |
X = self.scaler.transform(X) | |
# Get distances and indices of nearest neighbors | |
distances, indices = self.model.kneighbors(X) | |
# Get similar cases | |
similar_cases = self.X_train.iloc[indices[0]] | |
similar_outcomes = self.y_train.iloc[indices[0]] | |
# Calculate weighted probability | |
weights = 1 / (distances[0] + 1e-6) # Add small constant to avoid division by zero | |
weighted_prob = np.sum(similar_outcomes * weights) / np.sum(weights) | |
# Make prediction based on probability threshold | |
prediction = np.array([1 if weighted_prob >= self.high_risk_threshold else 0]) | |
return prediction, similar_cases, similar_outcomes, distances[0] | |
def evaluate(self, X_train, X_test, y_train, y_test): | |
train_accuracy = accuracy_score(y_train, self.model.predict(X_train)) | |
test_accuracy = accuracy_score(y_test, self.model.predict(X_test)) | |
return train_accuracy, test_accuracy |