Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,300 +1,282 @@
|
|
1 |
-
import os
|
2 |
-
import requests
|
3 |
-
import joblib
|
4 |
-
import logging
|
5 |
-
import zipfile
|
6 |
-
import pandas as pd
|
7 |
-
import numpy as np
|
8 |
-
import warnings
|
9 |
-
from flask import Flask, request, jsonify
|
10 |
-
from flask_cors import CORS
|
11 |
-
|
12 |
-
# Suppress sklearn warnings
|
13 |
-
warnings.filterwarnings('ignore', category=UserWarning, module='sklearn')
|
14 |
-
|
15 |
-
# Configure logging
|
16 |
-
logging.basicConfig(level=logging.INFO)
|
17 |
-
|
18 |
-
# Get model URLs from environment variables
|
19 |
-
DIABETES_MODEL_URL = os.getenv("DIABETES_MODEL_URL")
|
20 |
-
SCALER_URL = os.getenv("SCALER_URL")
|
21 |
-
MULTI_MODEL_URL = os.getenv("MULTI_MODEL_URL")
|
22 |
-
|
23 |
-
# Local paths for downloaded models
|
24 |
-
MODEL_PATHS = {
|
25 |
-
"DIABETES_MODEL": "finaliseddiabetes_model.zip",
|
26 |
-
"SCALER": "finalisedscaler.zip",
|
27 |
-
"MULTI_MODEL": "nodiabetes.zip",
|
28 |
-
}
|
29 |
-
|
30 |
-
# Extracted model names
|
31 |
-
EXTRACTED_MODELS = {
|
32 |
-
"DIABETES_MODEL": "finaliseddiabetes_model.joblib",
|
33 |
-
"SCALER": "finalisedscaler.joblib",
|
34 |
-
"MULTI_MODEL": "nodiabetes.joblib",
|
35 |
-
}
|
36 |
-
|
37 |
-
BASE_DIR = os.getcwd()
|
38 |
-
|
39 |
-
# Flask app initialization
|
40 |
-
app = Flask(__name__)
|
41 |
-
|
42 |
-
# Enable CORS
|
43 |
-
CORS(app, resources={
|
44 |
-
r"/*": {
|
45 |
-
"origins": [
|
46 |
-
"http://localhost:3000",
|
47 |
-
"https://carelog-diabetes-api.onrender.com",
|
48 |
-
"https://carelog-diabetes.vercel.app",
|
49 |
-
"http://localhost:5000"
|
50 |
-
],
|
51 |
-
"methods": ["GET", "POST", "OPTIONS"],
|
52 |
-
"allow_headers": ["Content-Type", "Authorization"],
|
53 |
-
"supports_credentials": True
|
54 |
-
}
|
55 |
-
})
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
logging.
|
83 |
-
return
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
logging.error(f"
|
101 |
-
return
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
if not family_history:
|
158 |
-
return 0.0
|
159 |
-
genetic_contribution = (first_degree * 0.5) + (second_degree * 0.25)
|
160 |
-
return min(genetic_contribution, 1.0)
|
161 |
-
|
162 |
-
def get_multi_condition_predictions(model, df):
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
'
|
170 |
-
'
|
171 |
-
|
172 |
-
|
173 |
-
}
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
return
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
if
|
203 |
-
return jsonify({'status': 'error', 'error': 'Invalid
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
'
|
230 |
-
'
|
231 |
-
'
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
df_scaled = models['SCALER'].transform(df_diabetes)
|
284 |
-
|
285 |
-
prediction, probability = get_diabetes_prediction(models['DIABETES_MODEL'], df_scaled)
|
286 |
-
|
287 |
-
return jsonify({
|
288 |
-
'status': 'success',
|
289 |
-
'model': 'diabetes',
|
290 |
-
'prediction': prediction,
|
291 |
-
'probability': probability,
|
292 |
-
'risk_level': 'HIGH' if probability > 70 else 'MODERATE' if probability > 40 else 'LOW'
|
293 |
-
})
|
294 |
-
|
295 |
-
except Exception as e:
|
296 |
-
logging.error(f"Error: {e}")
|
297 |
-
return jsonify({'status': 'error', 'error': str(e)}), 500
|
298 |
-
|
299 |
-
if __name__ == '__main__':
|
300 |
-
app.run(host="0.0.0.0", port=7860)
|
|
|
1 |
+
import os
|
2 |
+
import requests
|
3 |
+
import joblib
|
4 |
+
import logging
|
5 |
+
import zipfile
|
6 |
+
import pandas as pd
|
7 |
+
import numpy as np
|
8 |
+
import warnings
|
9 |
+
from flask import Flask, request, jsonify
|
10 |
+
from flask_cors import CORS
|
11 |
+
|
12 |
+
# Suppress sklearn warnings
|
13 |
+
warnings.filterwarnings('ignore', category=UserWarning, module='sklearn')
|
14 |
+
|
15 |
+
# Configure logging
|
16 |
+
logging.basicConfig(level=logging.INFO)
|
17 |
+
|
18 |
+
# Get model URLs from environment variables
|
19 |
+
DIABETES_MODEL_URL = os.getenv("DIABETES_MODEL_URL")
|
20 |
+
SCALER_URL = os.getenv("SCALER_URL")
|
21 |
+
MULTI_MODEL_URL = os.getenv("MULTI_MODEL_URL")
|
22 |
+
|
23 |
+
# Local paths for downloaded models
|
24 |
+
MODEL_PATHS = {
|
25 |
+
"DIABETES_MODEL": "finaliseddiabetes_model.zip",
|
26 |
+
"SCALER": "finalisedscaler.zip",
|
27 |
+
"MULTI_MODEL": "nodiabetes.zip",
|
28 |
+
}
|
29 |
+
|
30 |
+
# Extracted model names
|
31 |
+
EXTRACTED_MODELS = {
|
32 |
+
"DIABETES_MODEL": "finaliseddiabetes_model.joblib",
|
33 |
+
"SCALER": "finalisedscaler.joblib",
|
34 |
+
"MULTI_MODEL": "nodiabetes.joblib",
|
35 |
+
}
|
36 |
+
|
37 |
+
BASE_DIR = os.getcwd()
|
38 |
+
|
39 |
+
# Flask app initialization
|
40 |
+
app = Flask(__name__)
|
41 |
+
|
42 |
+
# Enable CORS
|
43 |
+
CORS(app, resources={
|
44 |
+
r"/*": {
|
45 |
+
"origins": [
|
46 |
+
"http://localhost:3000",
|
47 |
+
"https://carelog-diabetes-api.onrender.com",
|
48 |
+
"https://carelog-diabetes.vercel.app",
|
49 |
+
"http://localhost:5000"
|
50 |
+
],
|
51 |
+
"methods": ["GET", "POST", "OPTIONS"],
|
52 |
+
"allow_headers": ["Content-Type", "Authorization"],
|
53 |
+
"supports_credentials": True
|
54 |
+
}
|
55 |
+
})
|
56 |
+
|
57 |
+
# Root route for Hugging Face Space
|
58 |
+
@app.route('/')
|
59 |
+
def index():
|
60 |
+
return """
|
61 |
+
<h1>Welcome to the Diabetes Health Predictor API 👋</h1>
|
62 |
+
<p>This Hugging Face Space provides health risk predictions including diabetes, hypertension, stroke, and cardiovascular conditions.</p>
|
63 |
+
<p>Use the <code>/predict</code> endpoint via POST request to get started with your health insights!</p>
|
64 |
+
"""
|
65 |
+
|
66 |
+
def download_model(url, zip_filename):
|
67 |
+
zip_path = os.path.join(BASE_DIR, zip_filename)
|
68 |
+
if not url:
|
69 |
+
logging.error(f"URL for {zip_filename} is missing!")
|
70 |
+
return False
|
71 |
+
try:
|
72 |
+
response = requests.get(url, allow_redirects=True)
|
73 |
+
if response.status_code == 200:
|
74 |
+
with open(zip_path, 'wb') as f:
|
75 |
+
f.write(response.content)
|
76 |
+
logging.info(f"Downloaded {zip_filename} successfully.")
|
77 |
+
return True
|
78 |
+
else:
|
79 |
+
logging.error(f"Failed to download {zip_filename}. HTTP Status: {response.status_code}")
|
80 |
+
return False
|
81 |
+
except Exception as e:
|
82 |
+
logging.error(f"Error downloading {zip_filename}: {e}")
|
83 |
+
return False
|
84 |
+
|
85 |
+
def extract_if_needed(zip_filename, extracted_filename):
|
86 |
+
zip_path = os.path.join(BASE_DIR, zip_filename)
|
87 |
+
extracted_path = os.path.join(BASE_DIR, extracted_filename)
|
88 |
+
if os.path.exists(extracted_path):
|
89 |
+
logging.info(f"{extracted_filename} already exists. Skipping extraction.")
|
90 |
+
return True
|
91 |
+
if not os.path.exists(zip_path):
|
92 |
+
logging.error(f"Zip file missing: {zip_path}")
|
93 |
+
return False
|
94 |
+
try:
|
95 |
+
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
|
96 |
+
zip_ref.extractall(BASE_DIR)
|
97 |
+
logging.info(f"Extracted {zip_filename}")
|
98 |
+
return True
|
99 |
+
except Exception as e:
|
100 |
+
logging.error(f"Error extracting {zip_filename}: {e}")
|
101 |
+
return False
|
102 |
+
|
103 |
+
def load_model(model_filename):
|
104 |
+
model_path = os.path.join(BASE_DIR, model_filename)
|
105 |
+
if not os.path.exists(model_path):
|
106 |
+
logging.error(f"Model file not found: {model_path}")
|
107 |
+
return None
|
108 |
+
try:
|
109 |
+
model = joblib.load(model_path)
|
110 |
+
logging.info(f"Loaded {model_filename} successfully.")
|
111 |
+
return model
|
112 |
+
except Exception as e:
|
113 |
+
logging.error(f"Error loading {model_filename}: {e}")
|
114 |
+
return None
|
115 |
+
|
116 |
+
def initialize_models():
|
117 |
+
models = {}
|
118 |
+
for model_key, zip_filename in MODEL_PATHS.items():
|
119 |
+
extracted_filename = EXTRACTED_MODELS[model_key]
|
120 |
+
if not os.path.exists(os.path.join(BASE_DIR, zip_filename)):
|
121 |
+
download_model(globals()[f"{model_key}_URL"], zip_filename)
|
122 |
+
extract_if_needed(zip_filename, extracted_filename)
|
123 |
+
models[model_key] = load_model(extracted_filename)
|
124 |
+
return models
|
125 |
+
|
126 |
+
models = initialize_models()
|
127 |
+
|
128 |
+
FEATURE_ORDER = [
|
129 |
+
'Pregnancies', 'Glucose', 'BloodPressure', 'Insulin',
|
130 |
+
'BMI', 'DiabetesPedigreeFunction', 'Age'
|
131 |
+
]
|
132 |
+
|
133 |
+
def validate_input(value, input_type=float, min_value=0, max_value=None):
|
134 |
+
try:
|
135 |
+
value = input_type(value)
|
136 |
+
if value < min_value:
|
137 |
+
return None
|
138 |
+
if max_value is not None and value > max_value:
|
139 |
+
return None
|
140 |
+
return value
|
141 |
+
except (ValueError, TypeError):
|
142 |
+
return None
|
143 |
+
|
144 |
+
def validate_blood_pressure(systolic, diastolic):
|
145 |
+
systolic = validate_input(systolic, float, 0, 300)
|
146 |
+
diastolic = validate_input(diastolic, float, 0, 200)
|
147 |
+
if systolic is None or diastolic is None:
|
148 |
+
return None, None
|
149 |
+
return systolic, diastolic
|
150 |
+
|
151 |
+
def validate_gender(gender):
|
152 |
+
if isinstance(gender, str) and gender.lower() in ['male', 'female']:
|
153 |
+
return 1 if gender.lower() == 'male' else 0
|
154 |
+
return None
|
155 |
+
|
156 |
+
def calculate_diabetes_pedigree(family_history, first_degree=0, second_degree=0):
|
157 |
+
if not family_history:
|
158 |
+
return 0.0
|
159 |
+
genetic_contribution = (first_degree * 0.5) + (second_degree * 0.25)
|
160 |
+
return min(genetic_contribution, 1.0)
|
161 |
+
|
162 |
+
def get_multi_condition_predictions(model, df):
|
163 |
+
try:
|
164 |
+
predictions = model.predict(df)[0]
|
165 |
+
probs_list = model.predict_proba(df)
|
166 |
+
return {
|
167 |
+
'hypertension': bool(predictions[0]),
|
168 |
+
'cardiovascular': float(probs_list[1][0][1]),
|
169 |
+
'stroke': float(probs_list[2][0][1]),
|
170 |
+
'diabetes': float(probs_list[3][0][1])
|
171 |
+
}
|
172 |
+
except Exception as e:
|
173 |
+
logging.error(f"Error in multi-condition prediction: {str(e)}")
|
174 |
+
return None
|
175 |
+
|
176 |
+
def get_diabetes_prediction(model, df):
|
177 |
+
try:
|
178 |
+
prediction = model.predict(df)[0]
|
179 |
+
probability = float(model.predict_proba(df)[0][1] * 100)
|
180 |
+
return 'Diabetes' if prediction else 'No Diabetes', probability
|
181 |
+
except Exception as e:
|
182 |
+
logging.error(f"Error in diabetes prediction: {str(e)}")
|
183 |
+
return None, 0.0
|
184 |
+
|
185 |
+
@app.route('/health', methods=['GET'])
|
186 |
+
def health_check():
|
187 |
+
return jsonify({'status': 'healthy', 'message': 'Service is running'})
|
188 |
+
|
189 |
+
@app.route('/predict', methods=['POST'])
|
190 |
+
def predict_health():
|
191 |
+
try:
|
192 |
+
data = request.get_json()
|
193 |
+
logging.info(f"Received data: {data}")
|
194 |
+
if not data:
|
195 |
+
return jsonify({'status': 'error', 'error': 'Invalid JSON payload'}), 400
|
196 |
+
|
197 |
+
gender = validate_gender(data.get('gender'))
|
198 |
+
if gender is None:
|
199 |
+
return jsonify({'status': 'error', 'error': 'Invalid gender value. Must be "male" or "female"'}), 400
|
200 |
+
|
201 |
+
systolic, diastolic = validate_blood_pressure(data.get('systolic'), data.get('diastolic'))
|
202 |
+
if systolic is None or diastolic is None:
|
203 |
+
return jsonify({'status': 'error', 'error': 'Invalid blood pressure values'}), 400
|
204 |
+
|
205 |
+
age = validate_input(data.get('age'), float, 0, 120)
|
206 |
+
glucose = validate_input(data.get('glucose'), float, 0, 1000)
|
207 |
+
bmi = validate_input(data.get('bmi'), float, 0, 100)
|
208 |
+
|
209 |
+
if any(v is None for v in [age, glucose, bmi]):
|
210 |
+
return jsonify({'status': 'error', 'error': 'Invalid values for age, glucose, or BMI'}), 400
|
211 |
+
|
212 |
+
use_multi_condition = systolic < 90 or diastolic < 60
|
213 |
+
|
214 |
+
if use_multi_condition:
|
215 |
+
df_multi = pd.DataFrame([{
|
216 |
+
'Age': age,
|
217 |
+
'Gender': gender,
|
218 |
+
'Systolic_bp': systolic,
|
219 |
+
'Diastolic_bp': diastolic,
|
220 |
+
'Glucose': glucose,
|
221 |
+
'BMI': bmi
|
222 |
+
}])
|
223 |
+
|
224 |
+
results = get_multi_condition_predictions(models['MULTI_MODEL'], df_multi)
|
225 |
+
if results is None:
|
226 |
+
return jsonify({'status': 'error', 'error': 'Error in multi-condition prediction'}), 500
|
227 |
+
|
228 |
+
return jsonify({
|
229 |
+
'status': 'success',
|
230 |
+
'model': 'multi-condition',
|
231 |
+
'predictions': {
|
232 |
+
'hypertension': results['hypertension'],
|
233 |
+
'cardiovascular_risk': results['cardiovascular'],
|
234 |
+
'stroke_risk': results['stroke'],
|
235 |
+
'diabetes_risk': results['diabetes']
|
236 |
+
}
|
237 |
+
})
|
238 |
+
|
239 |
+
pregnancies = validate_input(data.get('pregnancies', 0 if gender == 1 else None), float, 0, 20)
|
240 |
+
insulin = validate_input(data.get('insulin'), float, 0, 1000)
|
241 |
+
|
242 |
+
family_history = data.get('family_history', False)
|
243 |
+
first_degree = validate_input(data.get('first_degree_relatives', 0), float, 0, 10)
|
244 |
+
second_degree = validate_input(data.get('second_degree_relatives', 0), float, 0, 20)
|
245 |
+
|
246 |
+
diabetes_pedigree = calculate_diabetes_pedigree(
|
247 |
+
family_history,
|
248 |
+
first_degree if first_degree is not None else 0,
|
249 |
+
second_degree if second_degree is not None else 0
|
250 |
+
)
|
251 |
+
|
252 |
+
if any(v is None for v in [pregnancies, insulin]):
|
253 |
+
return jsonify({'status': 'error', 'error': 'Invalid values for pregnancies or insulin'}), 400
|
254 |
+
|
255 |
+
df_diabetes = pd.DataFrame([{
|
256 |
+
'Pregnancies': pregnancies,
|
257 |
+
'Glucose': glucose,
|
258 |
+
'BloodPressure': systolic,
|
259 |
+
'Insulin': insulin,
|
260 |
+
'BMI': bmi,
|
261 |
+
'DiabetesPedigreeFunction': diabetes_pedigree,
|
262 |
+
'Age': age
|
263 |
+
}])
|
264 |
+
|
265 |
+
df_diabetes = df_diabetes[FEATURE_ORDER]
|
266 |
+
df_scaled = models['SCALER'].transform(df_diabetes)
|
267 |
+
prediction, probability = get_diabetes_prediction(models['DIABETES_MODEL'], df_scaled)
|
268 |
+
|
269 |
+
return jsonify({
|
270 |
+
'status': 'success',
|
271 |
+
'model': 'diabetes',
|
272 |
+
'prediction': prediction,
|
273 |
+
'probability': probability,
|
274 |
+
'risk_level': 'HIGH' if probability > 70 else 'MODERATE' if probability > 40 else 'LOW'
|
275 |
+
})
|
276 |
+
|
277 |
+
except Exception as e:
|
278 |
+
logging.error(f"Error: {e}")
|
279 |
+
return jsonify({'status': 'error', 'error': str(e)}), 500
|
280 |
+
|
281 |
+
if __name__ == '__main__':
|
282 |
+
app.run(host="0.0.0.0", port=7860)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|