Skip to content

Instantly share code, notes, and snippets.

@skjerns
Last active January 28, 2018 13:14
Show Gist options
  • Save skjerns/459c9c296ffbd87cff9545ab295dc83c to your computer and use it in GitHub Desktop.
Save skjerns/459c9c296ffbd87cff9545ab295dc83c to your computer and use it in GitHub Desktop.
Calculating Transfer Scores for the EDFx with AutoSleepScorer
# -*- coding: utf-8 -*-
import glob
import csv
import numpy as np
from sklearn.metrics import f1_score
from sleepscorer import Scorer, SleepData
edfx_dir = "D:\\sleep\\edfx\\"
##################################
#%% First we convert the hypnograms
"""
This function is quite a hack to read the edf hypnogram as a byte array.
I found no working reader for the hypnogram edfs.
"""
print('Converting hypnograms')
files = glob.glob(edfx_dir + '*.hyp')
for file in files:
hypnogram = []
with open(file, mode='rb') as f: # b is important -> binary
raw_hypno = [x for x in str(f.read()).split('Sleep_stage_')][1:]
for h in raw_hypno:
stage = h[0]
repeat = int(h.split('\\')[0][12:])//30 # no idea if this also works on linux
hypnogram.extend(stage*repeat)
with open(file[:-4] + '.txt', "w") as f:
writer = csv.writer(f, lineterminator='\r')
writer.writerows(hypnogram)
##################################
#%% Here we classisify.
# Scorer is only run at the end to prevent re-initialization of tensorflow
edffiles = glob.glob(edfx_dir + '*.edf')
filesarr = np.column_stack((edffiles))
files = []
for i in range(len(edffiles)):
print(edffiles[i])
filename = edffiles[i]
file = SleepData(filename,
channels={'EEG':'EEG Fpz-Cz', 'EMG':'EMG submental',
'EOG':'EOG horizontal'}, preload=False)
files.append(file)
scorer = Scorer(files, hypnograms=False, demo=True)
scorer.run()
##################################
#%%
truth = glob.glob(edfx_dir + '*.txt')
pred = glob.glob(edfx_dir + '*.csv')
# This is the conversion dict for the ground truth
# Note: The scorer only scores into Wake, S1, S2, SWS and REM
# According to the AASM manual, S4 is therefore combined with S3
# All 'other' stages such as movement etc are converted to wake.
conv_dict = {'1':1, '2':2, '3':3, '4':3,
'W':0, 'S1':1, 'S2':2, 'S3':3, 'S4':3, 'SWS':3, 'REM':4, 'R':4,
'A':0, 'M':0, '?':0}
accs = []
f1s = []
for i in range(len(truth)):
# Read ground truth file
with open(truth[i],'r') as file:
y_true = file.read().split('\n')[:-1]
y_true = np.array([conv_dict[x] for x in y_true])
# Read predictions
with open(pred[i],'r') as file:
y_pred = file.read().split('\n')[:-1]
y_pred = np.array([int(x) for x in y_pred])
# The EDFx features excessive pre and post wake recordings.
# It would be unfair to compare those. Accuracies would be well in the 95% then, as Wake is easy to recognize.
# For this I use the first non-zero entry of the ground truth - 60 epochs (30 minutes)
# From the end we use the last non-zero entry + 60 (30 minutes)
zero = np.where(y_true!=0)[0]
idx_start = zero[0] -60
idx_stop = zero[-1] +60
y_pred = y_pred[idx_start:idx_stop]
y_true = y_true[idx_start:idx_stop]
accs.append(np.mean(y_true==y_pred))
f1s.append(f1_score(y_true, y_pred, average='macro'))
print('Prediction scores from a model trained on CCSHS50')
print('Mean transfer accuracy: {:.1f}%'.format(np.mean(accs)*100))
print('Mean transfer f1 score: {:.1f}%'.format(np.mean(f1s)*100))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment