skjerns · January 28, 2018 13:14
diff --git a/transfer_performance_edfx.py b/transfer_performance_edfx.py
 # -*- coding: utf-8 -*-
 import glob
 import csv
 import numpy as np
 from sklearn.metrics import f1_score
 from sleepscorer import Scorer, SleepData

 edfx_dir = "D:\\sleep\\edfx\\"

 ##################################
 #%% First we convert the hypnograms

 """
 This function is quite a hack to read the edf hypnogram as a byte array. 
 I found no working reader for the hypnogram edfs.
 """
 print('Converting hypnograms')
 files = glob.glob(edfx_dir + '*.hyp')
 for file in files:
    hypnogram = []
    with open(file, mode='rb') as f: # b is important -> binary
    
        raw_hypno = [x for x in str(f.read()).split('Sleep_stage_')][1:]
        for h in raw_hypno:
            stage  = h[0]
            repeat = int(h.split('\\')[0][12:])//30 # no idea if this also works on linux
            hypnogram.extend(stage*repeat)            
    with open(file[:-4] + '.txt', "w") as f:
        writer = csv.writer(f, lineterminator='\r')
        writer.writerows(hypnogram)
        
 ##################################         
 #%% Here we classisify. 
 #   Scorer is only run at the end to prevent re-initialization of tensorflow

 edffiles =  glob.glob(edfx_dir + '*.edf')
 filesarr = np.column_stack((edffiles))
 files = []
 for i in range(len(edffiles)):
    print(edffiles[i])
    filename = edffiles[i]
    file = SleepData(filename,     
                                  channels={'EEG':'EEG Fpz-Cz', 'EMG':'EMG submental', 
                                            'EOG':'EOG horizontal'}, preload=False)
    files.append(file)
 scorer = Scorer(files, hypnograms=False, demo=True)
 scorer.run()

 ##################################
 #%%

 truth = glob.glob(edfx_dir + '*.txt')
 pred =  glob.glob(edfx_dir + '*.csv')

 # This is the conversion dict for the ground truth
 # Note: The scorer only scores into Wake, S1, S2, SWS and REM
 #       According to the AASM manual, S4 is therefore combined with S3
 #       All 'other' stages such as movement etc are converted to wake.
 conv_dict =  {'1':1, '2':2, '3':3, '4':3,
                 'W':0, 'S1':1, 'S2':2, 'S3':3, 'S4':3, 'SWS':3, 'REM':4, 'R':4,
                 'A':0, 'M':0, '?':0}        

 accs = []
 f1s  = []

 for i in range(len(truth)):
    
    # Read ground truth file
    with open(truth[i],'r') as file:
        y_true = file.read().split('\n')[:-1]
        y_true = np.array([conv_dict[x] for x in y_true])
       
    # Read predictions
    with open(pred[i],'r') as file:
        y_pred = file.read().split('\n')[:-1]
        y_pred = np.array([int(x) for x in y_pred])
    
    # The EDFx features excessive pre and post wake recordings.
    # It would be unfair to compare those. Accuracies would be well in the 95% then, as Wake is easy to recognize.
    # For this I use the first non-zero entry of the ground truth - 60 epochs (30 minutes)
    # From the end we use the last non-zero entry + 60 (30 minutes)
    
    zero  = np.where(y_true!=0)[0]
    idx_start = zero[0] -60
    idx_stop  = zero[-1] +60
    y_pred = y_pred[idx_start:idx_stop]
    y_true = y_true[idx_start:idx_stop]
    
    accs.append(np.mean(y_true==y_pred))
    f1s.append(f1_score(y_true, y_pred, average='macro'))
    
 print('Prediction scores from a model trained on CCSHS50')
 print('Mean transfer accuracy: {:.1f}%'.format(np.mean(accs)*100))
 print('Mean transfer f1 score: {:.1f}%'.format(np.mean(f1s)*100))
	# -- coding: utf-8 --
	import glob
	import csv
	import numpy as np
	from sklearn.metrics import f1_score
	from sleepscorer import Scorer, SleepData

	edfx_dir = "D:\\sleep\\edfx\\"

	##################################
	#%% First we convert the hypnograms

	"""
	This function is quite a hack to read the edf hypnogram as a byte array.
	I found no working reader for the hypnogram edfs.
	"""
	print('Converting hypnograms')
	files = glob.glob(edfx_dir + '*.hyp')
	for file in files:
	hypnogram = []
	with open(file, mode='rb') as f: # b is important -> binary

	raw_hypno = [x for x in str(f.read()).split('Sleep_stage_')][1:]
	for h in raw_hypno:
	stage = h[0]
	repeat = int(h.split('\\')[0][12:])//30 # no idea if this also works on linux
	hypnogram.extend(stage*repeat)
	with open(file[:-4] + '.txt', "w") as f:
	writer = csv.writer(f, lineterminator='\r')
	writer.writerows(hypnogram)

	##################################
	#%% Here we classisify.
	# Scorer is only run at the end to prevent re-initialization of tensorflow

	edffiles = glob.glob(edfx_dir + '*.edf')
	filesarr = np.column_stack((edffiles))
	files = []
	for i in range(len(edffiles)):
	print(edffiles[i])
	filename = edffiles[i]
	file = SleepData(filename,
	channels={'EEG':'EEG Fpz-Cz', 'EMG':'EMG submental',
	'EOG':'EOG horizontal'}, preload=False)
	files.append(file)
	scorer = Scorer(files, hypnograms=False, demo=True)
	scorer.run()

	##################################
	#%%

	truth = glob.glob(edfx_dir + '*.txt')
	pred = glob.glob(edfx_dir + '*.csv')

	# This is the conversion dict for the ground truth
	# Note: The scorer only scores into Wake, S1, S2, SWS and REM
	# According to the AASM manual, S4 is therefore combined with S3
	# All 'other' stages such as movement etc are converted to wake.
	conv_dict = {'1':1, '2':2, '3':3, '4':3,
	'W':0, 'S1':1, 'S2':2, 'S3':3, 'S4':3, 'SWS':3, 'REM':4, 'R':4,
	'A':0, 'M':0, '?':0}

	accs = []
	f1s = []

	for i in range(len(truth)):

	# Read ground truth file
	with open(truth[i],'r') as file:
	y_true = file.read().split('\n')[:-1]
	y_true = np.array([conv_dict[x] for x in y_true])

	# Read predictions
	with open(pred[i],'r') as file:
	y_pred = file.read().split('\n')[:-1]
	y_pred = np.array([int(x) for x in y_pred])

	# The EDFx features excessive pre and post wake recordings.
	# It would be unfair to compare those. Accuracies would be well in the 95% then, as Wake is easy to recognize.
	# For this I use the first non-zero entry of the ground truth - 60 epochs (30 minutes)
	# From the end we use the last non-zero entry + 60 (30 minutes)

	zero = np.where(y_true!=0)[0]
	idx_start = zero[0] -60
	idx_stop = zero[-1] +60
	y_pred = y_pred[idx_start:idx_stop]
	y_true = y_true[idx_start:idx_stop]

	accs.append(np.mean(y_true==y_pred))
	f1s.append(f1_score(y_true, y_pred, average='macro'))

	print('Prediction scores from a model trained on CCSHS50')
	print('Mean transfer accuracy: {:.1f}%'.format(np.mean(accs)*100))
	print('Mean transfer f1 score: {:.1f}%'.format(np.mean(f1s)*100))