-
-
Save skjerns/bc660ef59dca0dbd53f00ed38c42f6be to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*- | |
""" | |
Created on Wed Dec 5 12:56:31 2018 | |
@author: skjerns | |
Gist to save a mne.io.Raw object to an EDF file using pyEDFlib | |
(https://github.com/holgern/pyedflib) | |
Disclaimer: | |
- Saving your data this way will result in slight | |
loss of precision (magnitude +-1e-09). | |
- It is assumed that the data is presented in Volt (V), | |
it will be internally converted to microvolt | |
- BDF or EDF+ is selected based on the filename extension | |
- Annotations preserved | |
Update: Since 2021, MNE also supports exporting EDF via edfio: | |
https://mne.tools/stable/generated/mne.export.export_raw.html | |
""" | |
import pyedflib # pip install pyedflib | |
from pyedflib import highlevel # new high-level interface | |
from pyedflib import FILETYPE_BDF, FILETYPE_BDFPLUS, FILETYPE_EDF, FILETYPE_EDFPLUS | |
from datetime import datetime, timezone, timedelta | |
import mne | |
import os | |
def _stamp_to_dt(utc_stamp): | |
"""Convert timestamp to datetime object in Windows-friendly way.""" | |
if 'datetime' in str(type(utc_stamp)): return utc_stamp | |
# The min on windows is 86400 | |
stamp = [int(s) for s in utc_stamp] | |
if len(stamp) == 1: # In case there is no microseconds information | |
stamp.append(0) | |
return (datetime.fromtimestamp(0, tz=timezone.utc) + | |
timedelta(0, stamp[0], stamp[1])) # day, sec, μs | |
def write_mne_edf(mne_raw, fname, picks=None, tmin=0, tmax=None, | |
overwrite=False): | |
""" | |
Saves the raw content of an MNE.io.Raw and its subclasses to | |
a file using the EDF+/BDF filetype | |
pyEDFlib is used to save the raw contents of the RawArray to disk | |
Parameters | |
update 2021: edf export is now also supported in MNE: | |
https://mne.tools/stable/generated/mne.export.export_raw.html | |
---------- | |
mne_raw : mne.io.Raw | |
An object with super class mne.io.Raw that contains the data | |
to save | |
fname : string | |
File name of the new dataset. This has to be a new filename | |
unless data have been preloaded. Filenames should end with .edf | |
picks : array-like of int | None | |
Indices of channels to include. If None all channels are kept. | |
tmin : float | None | |
Time in seconds of first sample to save. If None first sample | |
is used. | |
tmax : float | None | |
Time in seconds of last sample to save. If None last sample | |
is used. | |
overwrite : bool | |
If True, the destination file (if it exists) will be overwritten. | |
If False (default), an error will be raised if the file exists. | |
""" | |
print('did you know EDF export is now supported in MNE via edfio? have a look at https://mne.tools/stable/generated/mne.export.export_raw.html') | |
if not issubclass(type(mne_raw), mne.io.BaseRaw): | |
raise TypeError('Must be mne.io.Raw type') | |
if not overwrite and os.path.exists(fname): | |
raise OSError('File already exists. No overwrite.') | |
# static settings | |
has_annotations = True if len(mne_raw.annotations)>0 else False | |
if os.path.splitext(fname)[-1] == '.edf': | |
file_type = FILETYPE_EDFPLUS if has_annotations else FILETYPE_EDF | |
dmin, dmax = -32768, 32767 | |
else: | |
file_type = FILETYPE_BDFPLUS if has_annotations else FILETYPE_BDF | |
dmin, dmax = -8388608, 8388607 | |
print('saving to {}, filetype {}'.format(fname, file_type)) | |
sfreq = mne_raw.info['sfreq'] | |
date = _stamp_to_dt(mne_raw.info['meas_date']) | |
if tmin: | |
date += timedelta(seconds=tmin) | |
# no conversion necessary, as pyedflib can handle datetime. | |
#date = date.strftime('%d %b %Y %H:%M:%S') | |
first_sample = int(sfreq*tmin) | |
last_sample = int(sfreq*tmax) if tmax is not None else None | |
# convert data | |
channels = mne_raw.get_data(picks, | |
start = first_sample, | |
stop = last_sample) | |
# convert to microvolts to scale up precision | |
channels *= 1e6 | |
# set conversion parameters | |
n_channels = len(channels) | |
# create channel from this | |
try: | |
f = pyedflib.EdfWriter(fname, | |
n_channels=n_channels, | |
file_type=file_type) | |
channel_info = [] | |
ch_idx = range(n_channels) if picks is None else picks | |
keys = list(mne_raw._orig_units.keys()) | |
for i in ch_idx: | |
try: | |
ch_dict = {'label': mne_raw.ch_names[i], | |
'dimension': mne_raw._orig_units[keys[i]], | |
'sample_rate': mne_raw._raw_extras[0]['n_samps'][i], | |
'physical_min': mne_raw._raw_extras[0]['physical_min'][i], | |
'physical_max': mne_raw._raw_extras[0]['physical_max'][i], | |
'digital_min': mne_raw._raw_extras[0]['digital_min'][i], | |
'digital_max': mne_raw._raw_extras[0]['digital_max'][i], | |
'transducer': '', | |
'prefilter': ''} | |
except: | |
ch_dict = {'label': mne_raw.ch_names[i], | |
'dimension': mne_raw._orig_units[keys[i]], | |
'sample_rate': sfreq, | |
'physical_min': channels.min(), | |
'physical_max': channels.max(), | |
'digital_min': dmin, | |
'digital_max': dmax, | |
'transducer': '', | |
'prefilter': ''} | |
channel_info.append(ch_dict) | |
f.setPatientCode(mne_raw._raw_extras[0]['subject_info'].get('id', '0')) | |
f.setPatientName(mne_raw._raw_extras[0]['subject_info'].get('name', 'noname')) | |
f.setTechnician('mne-gist-save-edf-skjerns') | |
f.setSignalHeaders(channel_info) | |
f.setStartdatetime(date) | |
f.writeSamples(channels) | |
for annotation in mne_raw.annotations: | |
onset = annotation['onset'] | |
duration = annotation['duration'] | |
description = annotation['description'] | |
f.writeAnnotation(onset, duration, description) | |
except Exception as e: | |
raise e | |
finally: | |
f.close() | |
return True |
- mne seems to do a rescaling of data when it's loaded. The output for this shows that mne's values are all way lower than pyedflib's. I'm not sure if it's correct or not. But related: when mne loads in the file, it converts the 'uV' unit (which pyedflib shows) to 'µV' (it also doesn't properly load non-"uV" units, which will be important in a moment). When saving the file afterwards, pyedflib seems to not like this, and converts it to 'AuV'. mne doesn't like that, and converts it to n / a. As a result the units get screwed up, and the values (which can be seen in the output to the code above) get messed up.
Yeah, the autoscaling of mne
is indeed an problem and should be made optional imo. It also makes it more difficult to save data to disc again as we lose precision.
This is going to get confusing (if it wasn't already): I say "fixed" because while this channel's values stay the same between the two mne loaded objects, it does not stay the same between the pyedflib signal arrays.
Does that mean pyedflib
is doing a mistake? Or mne
? I'm not so experienced with the stim-channel.
but it must be taking that information and transforming the stim channel values somehow (1900799 turns in to 65791, don't ask me how).
that sounds awefully close to uint16 + uint8
i.e. 2^16+2^8
I feel like either of these issues could be far more easily solved by doing what I attempted - using the header and signal headers from pyedflib.highlevel.
That might indeed be a good approach :) would loading and processing things by pyedflib
be an option for you? If you run into problems there I might be able to help you, I'm actually one of the maintainers.
pyedflib
has an option to load data as digital
-ADC values. With this you can load the data as it is actually saved within the EDF file. This circumvents any errors due to unit-scaling, precision, etc. When you need the values in physical values again you can convert them with highlevel.dig2phys
.
You can also try to load it with pyedflib
and then plugin your data manually into a mne.RawArray
Does that mean
pyedflib
is doing a mistake? Ormne
? I'm not so experienced with the stim-channel.
Pretty sure it's mne
. As I mentioned, when loading the file with pyedflib
it keeps all of the units, but loading with mne
loses the non-voltage-based channel units (like the one for the stim channel). Losing those units or something related to them causes pyedflib
's second load to not work correctly.
One way to check this would be to do a simple save/load with pyedflib
only, and see if the stim channel values and units stay the same.
that sounds awefully close to
uint16 + uint8
i.e.2^16+2^8
Well that makes sense (because the channel starts high, and has 2^8 bits). I'm still not sure how it gets that initial high value (1900799), but it's probably a combination that I'm not familiar with.
That might indeed be a good approach :) would loading and processing things by
pyedflib
be an option for you? If you run into problems there I might be able to help you, I'm actually one of the maintainers.
pyedflib
has an option to load data asdigital
-ADC values. With this you can load the data as it is actually saved within the EDF file. This circumvents any errors due to unit-scaling, precision, etc. When you need the values in physical values again you can convert them withhighlevel.dig2phys
.You can also try to load it with
pyedflib
and then plugin your data manually into amne.RawArray
Really the only thing I'm doing right now is modifying the stim channel based on experimental information. That means I need the channel in a form I can interpret, and without knowing how 1900799 -> 65791 I can't reliably manipulate those values. mne
handles this right now. The variant of this gist/script I forked works for me, so I'm not overly concerned about changing it (at least until the person I'm working with tells me the output BDF isn't working in Matlab or something...).
In any case however that doesn't help with making this gist more robust. The changes I introduced help with BDF files, but may not be meaningful for EDF (or the + variants of either format).
Found another issue, I'm afraid.
Relevant code:
if __name__ == '__main__':
original = '???/BDFtestfiles/Newtest17-2048.bdf'
save = original[:-4] + '_save.bdf'
mne_1 = mne.io.read_raw_bdf(input_fname=original, preload=True)
write_mne_edf(mne_1, save, overwrite=True)
mne_2 = mne.io.read_raw_bdf(input_fname=save, preload=True)
channels_1 = len(mne_1.get_data())
print('visible: {} - total: {} ({})'.format(channels_1, mne_1._raw_extras[0]['nchan'], original))
channels_2 = len(mne_2.get_data())
print('visible: {} - total: {} ({})'.format(channels_2, mne_2._raw_extras[0]['nchan'], save))
If one runs this with the above gist script, one gets the following output:
visible: 17 - total: 17 (???/BDFtestfiles/Newtest17-2048.bdf)
visible: 17 - total: 18 (???/BDFtestfiles/Newtest17-2048_save.bdf)
This is because pyedflib is saving an additional annotation channel. If one changes line 69 of the gist to simply be "pyedflib.FILETYPE_BDF" one gets 17 both times. The annotation channel is not created.
The problem here is that other packages, like EEGLab in Matlab, cannot properly handle the annotation channel. Using the latest version of EEGLab and attempting to load the annotated file with BioSig throws an error ("EDFANNOT2EVT.M is currently not supported!").
Additionally, it doesn't look like annotations are even being saved here at all. You have an EdfWriter.writeAnnotation
method, but it isn't being called here. In such a case, I think the best way to handle this would be to just change lines 66 and 69 to the non-plus versions (FILETYPE_EDF
and FILETYPE_BDF
).
If you wanted to include EDF+ and BDF+ in the future, you could check if mne_raw._raw_extras[0]['nchan']
is different from the total number of shown channels.
Is EEGLab in general unable to load BDF+? Then this might be an issue to post at https://github.com/sccn/eeglab ?
Else I can implement your change, even if it feels a bit hacky.
Can you try to call f.set_number_of_annotation_signals(0)
right after line 95? might fix the issue more gracefully.
(don't have a matlab licence anymore, post-student-life etc ;-) )
Is EEGLab in general unable to load BDF+? Then this might be an issue to post at https://github.com/sccn/eeglab ?
Else I can implement your change, even if it feels a bit hacky.
Can you try to call
f.set_number_of_annotation_signals(0)
right after line 95? might fix the issue more gracefully.(don't have a matlab licence anymore, post-student-life etc ;-) )
Basically yes. There are a couple different methods of handling BDF imports. One (BioSig) merely gives a warning, the other (FileIO) gives an error. Here are a couple of things that reference the issue indirectly:
https://sccn.ucsd.edu/bugzilla/show_bug.cgi?id=1865
https://sccn.ucsd.edu/bugzilla/show_bug.cgi?id=1726
I downloaded a test BDF+ file from here (https://www.teuniz.net/edf_bdf_testfiles/) and got the same error (I've tried to attach a picture to this comment showing that, but GitHub doesn't like it).
How about something like this?
if mne_raw._raw_extras[0]['nchan'] == (len(mne_raw.get_data()) + 1):
annot = True
elif mne_raw._raw_extras[0]['nchan'] == len(mne_raw.get_data()):
annot = False
else:
# ??? throw Exception?
annot = False
if os.path.splitext(fname)[-1] == '.edf':
if annot:
file_type = pyedflib.FILETYPE_EDFPLUS
else:
file_type = pyedflib.FILETYPE_EDF
dmin, dmax = -32768, 32767
else:
if annot:
file_type = pyedflib.FILETYPE_BDFPLUS
else:
file_type = pyedflib.FILETYPE_BDF
dmin, dmax = -8388608, 8388607
This allows one to set the file to all four types. It's the only way I can see to detect the + version. It's possible for an object to have an annotation channel, but 0 annotations, so one cannot depend upon the raw.annotations object.
Okay. I'll look into this.
Actually saving the file as XDF instead of XDF+ should be no problem as all the additional features of XDF+ are not used anyway in this implementation.
Edit: @GABowers can you check if it works now? Somehow my test files only produce garbage
I can confirm it works. Running:
if __name__ == '__main__':
bdf = 'I:/???/BDFtestfiles/Newtest17-2048.bdf'
save = bdf[:-4] + '_save.bdf'
mne_1 = mne.io.read_raw_bdf(input_fname=bdf, preload=True)
write_mne_edf(mne_1, save, overwrite=True)
mne_2 = mne.io.read_raw_bdf(input_fname=save, preload=True)
channels_1 = len(mne_1.get_data())
print('visible: {} - total: {} ({})'.format(channels_1, mne_1._raw_extras[0]['nchan'], bdf))
channels_2 = len(mne_2.get_data())
print('visible: {} - total: {} ({})'.format(channels_2, mne_2._raw_extras[0]['nchan'], save))
...with the latest version of the gist above outputs the following:
visible: 17 - total: 17 (I:/???/BDFtestfiles/Newtest17-2048.bdf)
visible: 17 - total: 17 (I:/???/BDFtestfiles/Newtest17-2048_save.bdf)
And I can import it into Matlab just fine. Just for the record, that file can be downloaded here.
It should be noted that it is possible for a BDF+ file to not have any annotations, but still have an annotation channel. But one could easily argue that such a file should be BDF, not BDF+.
Good afternoon there.
I was wondering whether you managed to keep the annotations in saving raw data to a edf.file
Thanks.
M.
@MM75CO yes it does that.
Hi,
Great work!! I really miss this option in the mne package :D
I am trying to use the gist to convert a csv file to edf. I have contructed my own mne raw object from the data in the csv file, but I get an error when trying to save the Raw object to edf. I get the following error and the output file is empty (size 0KB):
'NoneType' object has no attribute 'keys'
The error seems to originate from line 103 and mne_raw._orig_units?
I also get this error when converting the timestamp. But I think solved this by omitting it. (But maybe that is messing something else up?)
'NoneType' object is not iterable
@LanaHub You might want to see what the mne field is for the example mne data and just copy from there.
@skjerns do you think there is any interesting in making this a PR to mne? There is a lot of i/o structure in mne already but it's all based around loading data, I think it would be a bit of a new direction to save data in non-fif formats. MNE-BIDS already uses pybv because fif is not one of the BIDS required formats for EEG.
@LanaHub if you just want to convert a csv
to edf
(and do not need mne
functionality), just use pyedflib
directly! the highlevel interface should make that quite easy. Load the CSV with numpy
or pandas
and save it with pyedflib
.
https://github.com/holgern/pyedflib#highlevel-interface
@alexrockhill I'd gladly do it, but the devs of mne do not want any third-party dependencies (which using pyedflib would introduce), that's why they told me to create a gist instead. That would mean that I'd have to create the edf creation from scratch in raw python. See also mne-tools/mne-python#5755
Thank you for getting back so soon!
I would like to use the mne functionallity, but I also want to save the data as edf to be compliant with BIDS. Btw. I get the same error when trying to convert a gdf file to edf..
I will have another look at the example mne data and try to figure out a way to make it work.
@skjerns ahh, I see. Maybe worth thinking about whether this could be in mne-bids since there is already a pybv dependency. That does seem like when the data needs to be in edf, i.e. when it's in BIDS. @jasmainak what do you think?
Also, the gist is nice, thanks for doing that but it is a rather important part that can go very wrong if the file is exported incorrectly and it would be great to have some round trip tests that would be added if it were to be a PR.
I'm a bit hesitant to take on the responsibility of an edf writer. There are tons of corner cases in the edf format. Why not contribute to existing efforts such as pyedf: https://github.com/bids-standard/pyedf
That doesn't have any tests either haha
It seems like there is a lot of use of the gist and like people would probably like it to be officially supported by mne
and @skjerns said he would write the initial PR but I understand the hesitance and I get the message. I thought it was worth floating the idea.
Hi,
this gist is really great idea! Just the functionality, which I have been searched in mne and did not find. A great supplement. Big thanks to you!
I tried to save the preprocessed mne raw object into a new edf, and got the following error
...
line 27, in _stamp_to_dt
stamp = [int(s) for s in utc_stamp]
TypeError: 'datetime.datetime' object is not iterable
raw.info['meas_date'] in my script gives me:
datetime.datetime(2020, 3, 12, 16, 34, 55, tzinfo=datetime.timezone.utc)
Have you encoutered this problem?
Yes, I had the same problem. I solved it by changing line 76 to date = mne_raw.info['meas_date']
. However then I got a new problem and I actually gave up :P Let me know if you succed!
I've adapted the script.
@datalw can you check if it works now?
@skjerns Thanks a lot for the swift response! I tried the adapted version and got the following error:
----> 3 if 'datetime' in type(utc_stamp): return utc_stamp
4 # The min on windows is 86400
5 stamp = [int(s) for s in utc_stamp]
TypeError: argument of type 'type' is not iterable
So I changed if 'datetime' in type(utc_stamp):
to if 'datetime' in str(type(utc_stamp))
and it works :D Would you mind to adapt it again?
Thanks a lot for the great work!
@LunaHub it works for me so far, you could try it again : )
oops, yes didnt test it, thank's, I corrected it
I have used the gist for a while, it is really awesome, especially for saving the preprocessed data!
I am here again for a follow-up question. So far the files without annotations have been saved successfully. However for a file with annotations, the raw.times in the saved file was incorrect. I have looked into the code and found out the problem might be due to this line
'sample_rate': mne_raw._raw_extras[0]['n_samps'][i]
mne_raw.info['sfreq']
gives 256, but mne_raw._raw_extras[0]['n_samps'][i]
gives 2048 instead.
Since I have never dealt with _raw_extras, I tried to find some information about _raw_extras in mne documention, but failed.
Does anyone know how to solve this problem, or how I can proceed? Or is this a bug in _raw_extras?
I am not familiar with raw._raw_extras
you might be able to reach a wider audience that wrote that part of the code on Gitter unless someone on this thread knows https://gitter.im/mne-tools/mne-python
@datalw did you resample the data? was one of original sample frequencies 2048?
to be honest, I just hacked this snippet together and changed it often over time, I'm also not very familiar with the mne
internals. If you find a solution, let me know! I'm still in favor of integrating edf writing compatibility inside mne
, but currently they don't want to introduce optional dependencies. maybe that changes as a python-native implementation of edflib
has been published last month :)
So this still isn't incorporated into mne-python, any chance it ever will? Been a few years.
Feel free to ask this directly at MNE: https://github.com/mne-tools/mne-python/issues or the gitter https://gitter.im/mne-tools/mne-python
The problem was the additional dependency on pyedflib
and that pyedflib
uses Cython to compile some C libraries, which they did not want to include (afaik, and which I somehow understand).
However, we could ask if it would be possible to include an optional dependency on pyedflib. Additionally, EDDlib has just released a Python-only-version which would solve some problems :) but its quite slow (it's Python, nevertheless)
@alexrockhill @skjerns Thanks a lot for the reply and sorry for my delayed answer. I plan to come back to this issue in the next couple of weeks and if I find a solution I will let you @skjerns know ; )
Hello, thanks a lot for the great work! I am having one problem when using this script. When I try to open the resulted edf file this message came up "Error, number of datarecords is 0, expected >0. You can fix this problem with the header editor, check the manual for details. File is not a valid EDF or BDF file.". I am not quite sure why is this the case and I wonder if anyone is having the same problem or has any solution.
Thanks a lot!
That's a bit hard to diagnose what's going on without sharing a minimally reproducible example that someone else can run on their machine.
It sounds like maybe your mne.io.Raw
object didn't have any data in it or any data of type eeg
, grad
, mag
or seeg
. See https://mne.tools/stable/generated/mne.io.Raw.html#mne.io.Raw.set_channel_types.
BioSemi actually has files to use - download here. They include a stim channel, which is important.
I was trying to just use the "highlevel" information (header and signal headers) and save the file with that instead, but ran into an issue with mne's parsing of the prefiltering--it seemed like it was recognizing an extra channel. I'm setting that aside for now, however, because I ran into a couple other extremely messy things. Pertinent code:
This can be fixed by changing the "dimension" key's value to "mne_raw._orig_units[keys[i]].replace('µ', 'u')" on your line 103.
Hence, I "fixed" this by commenting out your line 85 and added "channels[i] /= mne_raw._raw_extras[0]['units'][i]" in the loop (so line 111 or so).
This is going to get confusing (if it wasn't already): I say "fixed" because while this channel's values stay the same between the two mne loaded objects, it does not stay the same between the pyedflib signal arrays. I think this is because mne doesn't properly load non-voltage-based units. If you look at "mne_raw._orig_units" it has normal values for the electrode channels, but for the stim channel it just has n / a, while the pyedflib signal header shows a unit of "boolean." I haven't delved into the mne code, but it must be taking that information and transforming the stim channel values somehow (1900799 turns in to 65791, don't ask me how). Anyway, when that channel is saved, its "units" are just n / a. When that channel is loaded back in with pyedflib, it not having the correct units seems to mean that it isn't transformed again (it stays at 65791). But for mne this is fine: because it doesn't recognize the units (or possible something else that has changed), it doesn't do another conversion, it keeps the channel value as-is. Whew.
I feel like either of these issues could be far more easily solved by doing what I attempted - using the header and signal headers from pyedflib.highlevel.