ECG / utilities /utils.py
Nayefleb's picture
Upload folder using huggingface_hub
041508c verified
import os
import glob
import pickle
import pandas as pd
import numpy as np
from tqdm import tqdm
import wfdb
import ast
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.preprocessing import StandardScaler, MultiLabelBinarizer
# EVALUATION STUFF
def generate_results(idxs, y_true, y_pred, thresholds):
return evaluate_experiment(y_true[idxs], y_pred[idxs], thresholds)
def evaluate_experiment(y_true, y_pred, thresholds=None):
results = {}
if not thresholds is None:
# binary predictions
y_pred_binary = apply_thresholds(y_pred, thresholds)
# PhysioNet/CinC Challenges metrics
challenge_scores = challenge_metrics(y_true, y_pred_binary, beta1=2, beta2=2)
results['F_beta_macro'] = challenge_scores['F_beta_macro']
results['G_beta_macro'] = challenge_scores['G_beta_macro']
results['TP'] = challenge_scores['TP']
results['TN'] = challenge_scores['TN']
results['FP'] = challenge_scores['FP']
results['FN'] = challenge_scores['FN']
results['Accuracy'] = challenge_scores['Accuracy']
results['F1'] = challenge_scores['F1']
results['Precision'] = challenge_scores['Precision']
results['Recall'] = challenge_scores['Recall']
# label based metric
results['macro_auc'] = roc_auc_score(y_true, y_pred, average='macro')
df_result = pd.DataFrame(results, index=[0])
return df_result
def challenge_metrics(y_true, y_pred, beta1=2, beta2=2, single=False):
f_beta = 0
g_beta = 0
TP, FP, TN, FN = 0., 0., 0., 0.
Accuracy = 0
Precision = 0
Recall = 0
F1 = 0
if single: # if evaluating single class in case of threshold-optimization
sample_weights = np.ones(y_true.sum(axis=1).shape)
else:
sample_weights = y_true.sum(axis=1)
for classi in range(y_true.shape[1]):
y_truei, y_predi = y_true[:, classi], y_pred[:, classi]
TP, FP, TN, FN = 0., 0., 0., 0.
for i in range(len(y_predi)):
sample_weight = sample_weights[i]
if y_truei[i] == y_predi[i] == 1:
TP += 1. / sample_weight
if (y_predi[i] == 1) and (y_truei[i] != y_predi[i]):
FP += 1. / sample_weight
if y_truei[i] == y_predi[i] == 0:
TN += 1. / sample_weight
if (y_predi[i] == 0) and (y_truei[i] != y_predi[i]):
FN += 1. / sample_weight
f_beta_i = ((1 + beta1 ** 2) * TP) / ((1 + beta1 ** 2) * TP + FP + (beta1 ** 2) * FN)
g_beta_i = TP / (TP + FP + beta2 * FN)
f_beta += f_beta_i
g_beta += g_beta_i
Accuracy = (TP + TN) / (FP + TP + TN + FN)
# Precision = TP / (TP + FP)
# Recall = TP / (TP + FN)
# F1 = 2*(Precision * Recall) / (Precision + Recall)
F1 = 2 * TP / 2 * TP + FP + FN
return {'F_beta_macro': f_beta / y_true.shape[1], 'G_beta_macro': g_beta / y_true.shape[1], 'TP': TP, 'FP': FP,
'TN': TN, 'FN': FN, 'Accuracy': Accuracy, 'F1': F1, 'Precision': Precision, 'Recall': Recall}
def get_appropriate_bootstrap_samples(y_true, n_bootstraping_samples):
samples = []
while True:
ridxs = np.random.randint(0, len(y_true), len(y_true))
if y_true[ridxs].sum(axis=0).min() != 0:
samples.append(ridxs)
if len(samples) == n_bootstraping_samples:
break
return samples
def find_optimal_cutoff_threshold(target, predicted):
"""
Find the optimal probability cutoff point for a classification model related to event rate
"""
fpr, tpr, threshold = roc_curve(target, predicted)
optimal_idx = np.argmax(tpr - fpr)
optimal_threshold = threshold[optimal_idx]
return optimal_threshold
def find_optimal_cutoff_thresholds(y_true, y_pred):
return [find_optimal_cutoff_threshold(y_true[:, i], y_pred[:, i]) for i in range(y_true.shape[1])]
def find_optimal_cutoff_threshold_for_Gbeta(target, predicted, n_thresholds=100):
thresholds = np.linspace(0.00, 1, n_thresholds)
scores = [challenge_metrics(target, predicted > t, single=True)['G_beta_macro'] for t in thresholds]
optimal_idx = np.argmax(scores)
return thresholds[optimal_idx]
def find_optimal_cutoff_thresholds_for_Gbeta(y_true, y_pred):
print("optimize thresholds with respect to G_beta")
return [
find_optimal_cutoff_threshold_for_Gbeta(y_true[:, k][:, np.newaxis], y_pred[:, k][:, np.newaxis])
for k in tqdm(range(y_true.shape[1]))]
def apply_thresholds(preds, thresholds):
"""
apply class-wise thresholds to prediction score in order to get binary format.
BUT: if no score is above threshold, pick maximum. This is needed due to metric issues.
"""
tmp = []
for p in preds:
tmp_p = (p > thresholds).astype(int)
if np.sum(tmp_p) == 0:
tmp_p[np.argmax(p)] = 1
tmp.append(tmp_p)
tmp = np.array(tmp)
return tmp
# DATA PROCESSING STUFF
def load_dataset(path, sampling_rate, release=False):
if path.split('/')[-2] == 'ptbxl':
# load and convert annotation data
Y = pd.read_csv(path + 'ptbxl_database.csv', index_col='ecg_id')
Y.scp_codes = Y.scp_codes.apply(lambda x: ast.literal_eval(x))
# Load raw signal data
X = load_raw_data_ptbxl(Y, sampling_rate, path)
elif path.split('/')[-2] == 'ICBEB':
# load and convert annotation data
Y = pd.read_csv(path + 'icbeb_database.csv', index_col='ecg_id')
Y.scp_codes = Y.scp_codes.apply(lambda x: ast.literal_eval(x))
# Load raw signal data
X = load_raw_data_icbeb(Y, sampling_rate, path)
return X, Y
def load_raw_data_icbeb(df, sampling_rate, path):
if sampling_rate == 100:
if os.path.exists(path + 'raw100.npy'):
data = np.load(path + 'raw100.npy', allow_pickle=True)
else:
data = [wfdb.rdsamp(path + 'records100/' + str(f)) for f in tqdm(df.index)]
data = np.array([signal for signal, meta in data])
pickle.dump(data, open(path + 'raw100.npy', 'wb'), protocol=4)
elif sampling_rate == 500:
if os.path.exists(path + 'raw500.npy'):
data = np.load(path + 'raw500.npy', allow_pickle=True)
else:
data = [wfdb.rdsamp(path + 'records500/' + str(f)) for f in tqdm(df.index)]
data = np.array([signal for signal, meta in data])
pickle.dump(data, open(path + 'raw500.npy', 'wb'), protocol=4)
return data
def load_raw_data_ptbxl(df, sampling_rate, path):
if sampling_rate == 100:
if os.path.exists(path + 'raw100.npy'):
data = np.load(path + 'raw100.npy', allow_pickle=True)
else:
data = [wfdb.rdsamp(path + f) for f in tqdm(df.filename_lr)]
data = np.array([signal for signal, meta in data])
pickle.dump(data, open(path + 'raw100.npy', 'wb'), protocol=4)
elif sampling_rate == 500:
if os.path.exists(path + 'raw500.npy'):
data = np.load(path + 'raw500.npy', allow_pickle=True)
else:
data = [wfdb.rdsamp(path + f) for f in tqdm(df.filename_hr)]
data = np.array([signal for signal, meta in data])
pickle.dump(data, open(path + 'raw500.npy', 'wb'), protocol=4)
return data
def compute_label_aggregations(df, folder, ctype):
df['scp_codes_len'] = df.scp_codes.apply(lambda x: len(x))
aggregation_df = pd.read_csv(folder + 'scp_statements.csv', index_col=0)
if ctype in ['diagnostic', 'subdiagnostic', 'superdiagnostic']:
def aggregate_all_diagnostic(y_dic):
tmp = []
for key in y_dic.keys():
if key in diag_agg_df.index:
tmp.append(key)
return list(set(tmp))
def aggregate_subdiagnostic(y_dic):
tmp = []
for key in y_dic.keys():
if key in diag_agg_df.index:
c = diag_agg_df.loc[key].diagnostic_subclass
if str(c) != 'nan':
tmp.append(c)
return list(set(tmp))
def aggregate_diagnostic(y_dic):
tmp = []
for key in y_dic.keys():
if key in diag_agg_df.index:
c = diag_agg_df.loc[key].diagnostic_class
if str(c) != 'nan':
tmp.append(c)
return list(set(tmp))
diag_agg_df = aggregation_df[aggregation_df.diagnostic == 1.0]
if ctype == 'diagnostic':
df['diagnostic'] = df.scp_codes.apply(aggregate_all_diagnostic)
df['diagnostic_len'] = df.diagnostic.apply(lambda x: len(x))
elif ctype == 'subdiagnostic':
df['subdiagnostic'] = df.scp_codes.apply(aggregate_subdiagnostic)
df['subdiagnostic_len'] = df.subdiagnostic.apply(lambda x: len(x))
elif ctype == 'superdiagnostic':
df['superdiagnostic'] = df.scp_codes.apply(aggregate_diagnostic)
df['superdiagnostic_len'] = df.superdiagnostic.apply(lambda x: len(x))
elif ctype == 'form':
form_agg_df = aggregation_df[aggregation_df.form == 1.0]
def aggregate_form(y_dic):
tmp = []
for key in y_dic.keys():
if key in form_agg_df.index:
c = key
if str(c) != 'nan':
tmp.append(c)
return list(set(tmp))
df['form'] = df.scp_codes.apply(aggregate_form)
df['form_len'] = df.form.apply(lambda x: len(x))
elif ctype == 'rhythm':
rhythm_agg_df = aggregation_df[aggregation_df.rhythm == 1.0]
def aggregate_rhythm(y_dic):
tmp = []
for key in y_dic.keys():
if key in rhythm_agg_df.index:
c = key
if str(c) != 'nan':
tmp.append(c)
return list(set(tmp))
df['rhythm'] = df.scp_codes.apply(aggregate_rhythm)
df['rhythm_len'] = df.rhythm.apply(lambda x: len(x))
elif ctype == 'all':
df['all_scp'] = df.scp_codes.apply(lambda x: list(set(x.keys())))
return df
def select_data(XX, YY, ctype, min_samples, output_folder):
# convert multi_label to multi-hot
mlb = MultiLabelBinarizer()
if ctype == 'diagnostic':
X = XX[YY.diagnostic_len > 0]
Y = YY[YY.diagnostic_len > 0]
mlb.fit(Y.diagnostic.values)
y = mlb.transform(Y.diagnostic.values)
elif ctype == 'subdiagnostic':
counts = pd.Series(np.concatenate(YY.subdiagnostic.values)).value_counts()
counts = counts[counts > min_samples]
YY.subdiagnostic = YY.subdiagnostic.apply(lambda x: list(set(x).intersection(set(counts.index.values))))
YY['subdiagnostic_len'] = YY.subdiagnostic.apply(lambda x: len(x))
X = XX[YY.subdiagnostic_len > 0]
Y = YY[YY.subdiagnostic_len > 0]
mlb.fit(Y.subdiagnostic.values)
y = mlb.transform(Y.subdiagnostic.values)
elif ctype == 'superdiagnostic':
counts = pd.Series(np.concatenate(YY.superdiagnostic.values)).value_counts()
counts = counts[counts > min_samples]
YY.superdiagnostic = YY.superdiagnostic.apply(lambda x: list(set(x).intersection(set(counts.index.values))))
YY['superdiagnostic_len'] = YY.superdiagnostic.apply(lambda x: len(x))
X = XX[YY.superdiagnostic_len > 0]
Y = YY[YY.superdiagnostic_len > 0]
mlb.fit(Y.superdiagnostic.values)
y = mlb.transform(Y.superdiagnostic.values)
elif ctype == 'form':
# filter
counts = pd.Series(np.concatenate(YY.form.values)).value_counts()
counts = counts[counts > min_samples]
YY.form = YY.form.apply(lambda x: list(set(x).intersection(set(counts.index.values))))
YY['form_len'] = YY.form.apply(lambda x: len(x))
# select
X = XX[YY.form_len > 0]
Y = YY[YY.form_len > 0]
mlb.fit(Y.form.values)
y = mlb.transform(Y.form.values)
elif ctype == 'rhythm':
# filter
counts = pd.Series(np.concatenate(YY.rhythm.values)).value_counts()
counts = counts[counts > min_samples]
YY.rhythm = YY.rhythm.apply(lambda x: list(set(x).intersection(set(counts.index.values))))
YY['rhythm_len'] = YY.rhythm.apply(lambda x: len(x))
# select
X = XX[YY.rhythm_len > 0]
Y = YY[YY.rhythm_len > 0]
mlb.fit(Y.rhythm.values)
y = mlb.transform(Y.rhythm.values)
elif ctype == 'all':
# filter
counts = pd.Series(np.concatenate(YY.all_scp.values)).value_counts()
counts = counts[counts > min_samples]
YY.all_scp = YY.all_scp.apply(lambda x: list(set(x).intersection(set(counts.index.values))))
YY['all_scp_len'] = YY.all_scp.apply(lambda x: len(x))
# select
X = XX[YY.all_scp_len > 0]
Y = YY[YY.all_scp_len > 0]
mlb.fit(Y.all_scp.values)
y = mlb.transform(Y.all_scp.values)
else:
pass
# save Label_Binarizer
with open(output_folder + 'mlb.pkl', 'wb') as tokenizer:
pickle.dump(mlb, tokenizer)
return X, Y, y, mlb
def preprocess_signals(X_train, X_validation, X_test, outputfolder):
# Standardize data such that mean 0 and variance 1
ss = StandardScaler()
ss.fit(np.vstack(X_train).flatten()[:, np.newaxis].astype(float))
# Save Standardize data
with open(outputfolder + 'standard_scaler.pkl', 'wb') as ss_file:
pickle.dump(ss, ss_file)
return apply_standardizer(X_train, ss), apply_standardizer(X_validation,
ss), apply_standardizer(
X_test, ss)
def apply_standardizer(X, ss):
X_tmp = []
for x in X:
x_shape = x.shape
X_tmp.append(ss.transform(x.flatten()[:, np.newaxis]).reshape(x_shape))
X_tmp = np.array(X_tmp)
return X_tmp
# DOCUMENTATION STUFF
def generate_ptbxl_summary_table(selection=None, folder='/output/'):
exps = ['exp0', 'exp1', 'exp1.1', 'exp1.1.1', 'exp2', 'exp3']
metrics = ['macro_auc', 'Accuracy', 'TP', 'TN', 'FP', 'FN', 'Precision', 'Recall', 'F1']
# 0 1 2 3 4 5 6 7 8
# get models
models = {}
for i, exp in enumerate(exps):
if selection is None:
exp_models = [m.split('/')[-1] for m in glob.glob(folder + str(exp) + '/models/*')]
else:
exp_models = selection
if i == 0:
models = set(exp_models)
else:
models = models.union(set(exp_models))
results_dic = {'Method': [],
'exp0_macro_auc': [],
'exp1_macro_auc': [],
'exp1.1_macro_auc': [],
'exp1.1.1_macro_auc': [],
'exp2_macro_auc': [],
'exp3_macro_auc': [],
'exp0_Accuracy': [],
'exp1_Accuracy': [],
'exp1.1_Accuracy': [],
'exp1.1.1_Accuracy': [],
'exp2_Accuracy': [],
'exp3_Accuracy': [],
'exp0_F1': [],
'exp1_F1': [],
'exp1.1_F1': [],
'exp1.1.1_F1': [],
'exp2_F1': [],
'exp3_F1': [],
'exp0_Precision': [],
'exp1_Precision': [],
'exp1.1_Precision': [],
'exp1.1.1_Precision': [],
'exp2_Precision': [],
'exp3_Precision': [],
'exp0_Recall': [],
'exp1_Recall': [],
'exp1.1_Recall': [],
'exp1.1.1_Recall': [],
'exp2_Recall': [],
'exp3_Recall': [],
'exp0_TP': [],
'exp1_TP': [],
'exp1.1_TP': [],
'exp1.1.1_TP': [],
'exp2_TP': [],
'exp3_TP': [],
'exp0_TN': [],
'exp1_TN': [],
'exp1.1_TN': [],
'exp1.1.1_TN': [],
'exp2_TN': [],
'exp3_TN': [],
'exp0_FP': [],
'exp1_FP': [],
'exp1.1_FP': [],
'exp1.1.1_FP': [],
'exp2_FP': [],
'exp3_FP': [],
'exp0_FN': [],
'exp1_FN': [],
'exp1.1_FN': [],
'exp1.1.1_FN': [],
'exp2_FN': [],
'exp3_FN': []
}
for m in models:
results_dic['Method'].append(m)
for e in exps:
try:
me_res = pd.read_csv(folder + str(e) + '/models/' + str(m) + '/results/te_results.csv', index_col=0)
mean1 = me_res.loc['point'][metrics[0]]
unc1 = max(me_res.loc['upper'][metrics[0]] - me_res.loc['point'][metrics[0]],
me_res.loc['point'][metrics[0]] - me_res.loc['lower'][metrics[0]])
acc = me_res.loc['point'][metrics[1]]
f1 = me_res.loc['point'][metrics[8]]
precision = me_res.loc['point'][metrics[6]]
recall = me_res.loc['point'][metrics[7]]
tp = me_res.loc['point'][metrics[2]]
tn = me_res.loc['point'][metrics[3]]
fp = me_res.loc['point'][metrics[4]]
fn = me_res.loc['point'][metrics[5]]
results_dic[e + '_macro_auc'].append("%.3f(%.2d)" % (np.round(mean1, 3), int(unc1 * 1000)))
results_dic[e + '_Accuracy'].append("%.3f" % acc)
results_dic[e + '_F1'].append("%.3f" % f1)
results_dic[e + '_Precision'].append("%.3f" % precision)
results_dic[e + '_Recall'].append("%.3f" % recall)
results_dic[e + '_TP'].append("%.3f" % tp)
results_dic[e + '_TN'].append("%.3f" % tn)
results_dic[e + '_FP'].append("%.3f" % fp)
results_dic[e + '_FN'].append("%.3f" % fn)
except FileNotFoundError:
results_dic[e + '_macro_auc'].append("--")
results_dic[e + '_Accuracy'].append("--")
results_dic[e + '_F1'].append("--")
results_dic[e + '_Precision'].append("--")
results_dic[e + '_Recall'].append("--")
results_dic[e + '_TP'].append("--")
results_dic[e + '_TN'].append("--")
results_dic[e + '_FP'].append("--")
results_dic[e + '_FN'].append("--")
df = pd.DataFrame(results_dic)
df_index = df[df.Method.isin(['naive', 'ensemble'])]
df_rest = df[~df.Method.isin(['naive', 'ensemble'])]
df = pd.concat([df_rest, df_index])
df.to_csv(folder + 'results_ptbxl.csv')
titles = [
'### 1. PTB-XL: all statements',
'### 2. PTB-XL: diagnostic statements',
'### 3. PTB-XL: Diagnostic subclasses',
'### 4. PTB-XL: Diagnostic superclasses',
'### 5. PTB-XL: Form statements',
'### 6. PTB-XL: Rhythm statements'
]
# helper output function for markdown tables
our_work = 'https://arxiv.org/abs/2004.13701'
our_repo = 'https://github.com/helme/ecg_ptbxl_benchmarking/'
md_source = ''
for i, e in enumerate(exps):
md_source += '\n ' + titles[i] + ' \n \n'
md_source += '| Model | AUC |\n'
for row in df_rest[['Method', e + '_AUC']].sort_values(e + '_AUC', ascending=False).values:
md_source += '| ' + row[0].replace('fastai_', '') + ' | ' + row[1] + ' |\n'
print(md_source)