Spaces:
Sleeping
Sleeping
File size: 5,802 Bytes
a8b81f3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from .base_model import BaseModel
from ..config import PARKINSONS_MODEL_PATH, RANDOM_STATE, TEST_SIZE
import numpy as np
import pandas as pd
class ParkinsonsModel(BaseModel):
def __init__(self):
super().__init__(PARKINSONS_MODEL_PATH)
self.model = KNeighborsClassifier(
n_neighbors=5, # Increased for more robust predictions
weights='distance',
metric='euclidean' # Changed to euclidean for better distance measurement
)
self.feature_names = [
'MDVP:Fo(Hz)', 'MDVP:Fhi(Hz)', 'MDVP:Flo(Hz)', 'MDVP:Jitter(%)',
'MDVP:Jitter(Abs)', 'MDVP:RAP', 'MDVP:PPQ', 'Jitter:DDP',
'MDVP:Shimmer', 'MDVP:Shimmer(dB)', 'Shimmer:APQ3', 'Shimmer:APQ5',
'MDVP:APQ', 'Shimmer:DDA', 'NHR', 'HNR', 'RPDE', 'DFA',
'spread1', 'spread2', 'D2', 'PPE'
]
self.X_train = None
self.y_train = None
self.scaler = None
# Feature ranges from dataset analysis
self.feature_ranges = {
'MDVP:Fo(Hz)': (88.333, 260.105),
'MDVP:Fhi(Hz)': (102.145, 592.030),
'MDVP:Flo(Hz)': (65.476, 239.170),
'MDVP:Jitter(%)': (0.001, 0.033),
'MDVP:Shimmer': (0.009, 0.119),
'HNR': (8.441, 33.047),
'RPDE': (0.256, 0.685),
'DFA': (0.574, 0.825),
'spread1': (-7.968984, -2.434031),
'spread2': (0.006, 0.527),
'PPE': (0.044, 0.527)
}
# Add feature weights
self.feature_weights = {
'MDVP:Fo(Hz)': 1.0,
'MDVP:Fhi(Hz)': 1.0,
'MDVP:Flo(Hz)': 1.0,
'MDVP:Jitter(%)': 2.0,
'MDVP:Jitter(Abs)': 1.5,
'MDVP:RAP': 1.5,
'MDVP:PPQ': 1.5,
'Jitter:DDP': 1.5,
'MDVP:Shimmer': 2.0,
'MDVP:Shimmer(dB)': 1.5,
'Shimmer:APQ3': 1.5,
'Shimmer:APQ5': 1.5,
'MDVP:APQ': 1.5,
'Shimmer:DDA': 1.5,
'NHR': 1.8,
'HNR': 1.8,
'RPDE': 1.5,
'DFA': 1.5,
'spread1': 1.2,
'spread2': 1.2,
'D2': 1.2,
'PPE': 1.8
}
def is_input_valid(self, X):
"""Check if input values are within expected ranges"""
X_df = pd.DataFrame(X, columns=self.feature_names)
for feature, (min_val, max_val) in self.feature_ranges.items():
if feature in X_df.columns:
value = X_df[feature].iloc[0]
# Extend the acceptable range by 20% on both sides
range_width = max_val - min_val
extended_min = min_val - (range_width * 0.2)
extended_max = max_val + (range_width * 0.2)
if value < extended_min or value > extended_max:
return False, f"{feature} value ({value:.3f}) is outside expected range ({min_val:.3f} - {max_val:.3f})"
return True, ""
def predict(self, X):
# Validate input
is_valid, message = self.is_input_valid(X)
if not is_valid:
raise ValueError(f"Invalid input: {message}")
if self.scaler:
X = self.scaler.transform(X)
X = pd.DataFrame(X, columns=self.feature_names)
# Apply feature weights
for feature, weight in self.feature_weights.items():
if feature in X.columns:
X[feature] = X[feature] * weight
# Get nearest neighbors
distances, indices = self.model.kneighbors(X)
# Convert X_train to DataFrame if it's a numpy array
if isinstance(self.X_train, np.ndarray):
self.X_train = pd.DataFrame(self.X_train, columns=self.feature_names)
# Get similar cases
similar_cases = self.X_train.iloc[indices[0]]
similar_outcomes = pd.Series(self.y_train).iloc[indices[0]] # Convert y_train to Series
# Calculate confidence score based on distances
max_distance = np.max(distances)
confidence_scores = 1 - (distances[0] / max_distance)
# Weight the predictions by confidence
weighted_pred = np.average(similar_outcomes, weights=confidence_scores)
# Make final prediction
prediction = np.array([1 if weighted_pred >= 0.5 else 0])
return prediction, similar_cases, similar_outcomes, distances[0]
def train(self, X, y):
# Convert input to DataFrame if it's not already
if not isinstance(X, pd.DataFrame):
X = pd.DataFrame(X, columns=self.feature_names)
# Apply feature weights
X_weighted = X.copy()
for feature, weight in self.feature_weights.items():
if feature in X.columns:
X_weighted[feature] = X_weighted[feature] * weight
X_train, X_test, y_train, y_test = train_test_split(
X_weighted, y, test_size=TEST_SIZE, random_state=RANDOM_STATE,
stratify=y
)
# Store as DataFrames/Series
self.X_train = pd.DataFrame(X_train, columns=self.feature_names)
self.y_train = pd.Series(y_train)
self.model.fit(X_train, y_train)
return self.evaluate(X_train, X_test, y_train, y_test)
def evaluate(self, X_train, X_test, y_train, y_test):
train_accuracy = accuracy_score(y_train, self.model.predict(X_train))
test_accuracy = accuracy_score(y_test, self.model.predict(X_test))
return train_accuracy, test_accuracy |