Last active
January 28, 2018 13:14
-
-
Save skjerns/459c9c296ffbd87cff9545ab295dc83c to your computer and use it in GitHub Desktop.
Calculating Transfer Scores for the EDFx with AutoSleepScorer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import glob | |
import csv | |
import numpy as np | |
from sklearn.metrics import f1_score | |
from sleepscorer import Scorer, SleepData | |
edfx_dir = "D:\\sleep\\edfx\\" | |
################################## | |
#%% First we convert the hypnograms | |
""" | |
This function is quite a hack to read the edf hypnogram as a byte array. | |
I found no working reader for the hypnogram edfs. | |
""" | |
print('Converting hypnograms') | |
files = glob.glob(edfx_dir + '*.hyp') | |
for file in files: | |
hypnogram = [] | |
with open(file, mode='rb') as f: # b is important -> binary | |
raw_hypno = [x for x in str(f.read()).split('Sleep_stage_')][1:] | |
for h in raw_hypno: | |
stage = h[0] | |
repeat = int(h.split('\\')[0][12:])//30 # no idea if this also works on linux | |
hypnogram.extend(stage*repeat) | |
with open(file[:-4] + '.txt', "w") as f: | |
writer = csv.writer(f, lineterminator='\r') | |
writer.writerows(hypnogram) | |
################################## | |
#%% Here we classisify. | |
# Scorer is only run at the end to prevent re-initialization of tensorflow | |
edffiles = glob.glob(edfx_dir + '*.edf') | |
filesarr = np.column_stack((edffiles)) | |
files = [] | |
for i in range(len(edffiles)): | |
print(edffiles[i]) | |
filename = edffiles[i] | |
file = SleepData(filename, | |
channels={'EEG':'EEG Fpz-Cz', 'EMG':'EMG submental', | |
'EOG':'EOG horizontal'}, preload=False) | |
files.append(file) | |
scorer = Scorer(files, hypnograms=False, demo=True) | |
scorer.run() | |
################################## | |
#%% | |
truth = glob.glob(edfx_dir + '*.txt') | |
pred = glob.glob(edfx_dir + '*.csv') | |
# This is the conversion dict for the ground truth | |
# Note: The scorer only scores into Wake, S1, S2, SWS and REM | |
# According to the AASM manual, S4 is therefore combined with S3 | |
# All 'other' stages such as movement etc are converted to wake. | |
conv_dict = {'1':1, '2':2, '3':3, '4':3, | |
'W':0, 'S1':1, 'S2':2, 'S3':3, 'S4':3, 'SWS':3, 'REM':4, 'R':4, | |
'A':0, 'M':0, '?':0} | |
accs = [] | |
f1s = [] | |
for i in range(len(truth)): | |
# Read ground truth file | |
with open(truth[i],'r') as file: | |
y_true = file.read().split('\n')[:-1] | |
y_true = np.array([conv_dict[x] for x in y_true]) | |
# Read predictions | |
with open(pred[i],'r') as file: | |
y_pred = file.read().split('\n')[:-1] | |
y_pred = np.array([int(x) for x in y_pred]) | |
# The EDFx features excessive pre and post wake recordings. | |
# It would be unfair to compare those. Accuracies would be well in the 95% then, as Wake is easy to recognize. | |
# For this I use the first non-zero entry of the ground truth - 60 epochs (30 minutes) | |
# From the end we use the last non-zero entry + 60 (30 minutes) | |
zero = np.where(y_true!=0)[0] | |
idx_start = zero[0] -60 | |
idx_stop = zero[-1] +60 | |
y_pred = y_pred[idx_start:idx_stop] | |
y_true = y_true[idx_start:idx_stop] | |
accs.append(np.mean(y_true==y_pred)) | |
f1s.append(f1_score(y_true, y_pred, average='macro')) | |
print('Prediction scores from a model trained on CCSHS50') | |
print('Mean transfer accuracy: {:.1f}%'.format(np.mean(accs)*100)) | |
print('Mean transfer f1 score: {:.1f}%'.format(np.mean(f1s)*100)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment