app / src /models /diabetes.py
schandel08's picture
Files uploaded
a8b81f3
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from .base_model import BaseModel
from ..config import DIABETES_MODEL_PATH, RANDOM_STATE, TEST_SIZE
import numpy as np
class DiabetesModel(BaseModel):
def __init__(self):
super().__init__(DIABETES_MODEL_PATH)
self.model = KNeighborsClassifier(
n_neighbors=7, # Increased neighbors for more robust prediction
weights='distance' # Weight points by distance
)
self.feature_names = [
'Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness',
'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age',
'GlucoseBMI', 'GlucoseAge' # Added derived features
]
self.X_train = None
self.y_train = None
# Define risk thresholds
self.high_risk_threshold = 0.6
def train(self, X, y):
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=TEST_SIZE, random_state=RANDOM_STATE,
stratify=y # Ensure balanced split
)
self.X_train = X_train
self.y_train = y_train
self.model.fit(X_train, y_train)
return self.evaluate(X_train, X_test, y_train, y_test)
def predict(self, X):
if self.scaler:
X = self.scaler.transform(X)
# Get distances and indices of nearest neighbors
distances, indices = self.model.kneighbors(X)
# Get similar cases
similar_cases = self.X_train.iloc[indices[0]]
similar_outcomes = self.y_train.iloc[indices[0]]
# Calculate weighted probability
weights = 1 / (distances[0] + 1e-6) # Add small constant to avoid division by zero
weighted_prob = np.sum(similar_outcomes * weights) / np.sum(weights)
# Make prediction based on probability threshold
prediction = np.array([1 if weighted_prob >= self.high_risk_threshold else 0])
return prediction, similar_cases, similar_outcomes, distances[0]
def evaluate(self, X_train, X_test, y_train, y_test):
train_accuracy = accuracy_score(y_train, self.model.predict(X_train))
test_accuracy = accuracy_score(y_test, self.model.predict(X_test))
return train_accuracy, test_accuracy