Last active
October 21, 2024 09:08
-
-
Save rectified-evasion/dce33a21e947623fb2c6c77292bf7bc8 to your computer and use it in GitHub Desktop.
Estimate .edf block size to avoid zero padded data.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Created on Sun Oct 20 2024 | |
@author: J.-B. Kordaß | |
Gist to estimate .edf block size to avoid non-zero padded data files (at the expense of precision/skewing and | |
possibly truncation) | |
- If you are using skjerns/save_edf.py (https://gist.github.com/skjerns/bc660ef59dca0dbd53f00ed38c42f6be), | |
truncate the data to the sample size n returned and use f.setDatarecordDuration(R) for R as returned. | |
""" | |
import numpy as np | |
def edf_block_size(n, sfreq, skew_error_threshold = 1e-2, max_truncation = 1): | |
""" | |
Find some estimate of a "good" block size for EDF files without zero padding | |
More precisely, consider | |
k * R = t * 1e5 = (n / sfreq) * 1e5 | |
k * S = n | |
Parameters | |
---------- | |
n : int | |
Number of samples | |
sfreq : float | |
Sampling frequency | |
skew_error_threshold : float | |
Maximum skew error (in seconds), which is the difference between the last actual time point of the last sample | |
and the last time point of the last sample using the estimated block size | |
max_truncation : float | |
Maximum truncation of data (in seconds) that is acceptable (is definitely necessary, e.g. if n is prime..!) | |
Returns | |
------- | |
n : int | |
New number of samples (possibly after truncation) | |
S : int | |
Block size (number of samples per block) | |
R : int | |
Block time (in seconds, precise up to 10 microsecond units = 1e-5s) | |
k : int | |
Number of blocks | |
skew_error : float | |
Skew error (in seconds) | |
Default: 1e-2s (should probably be chosen according to n) | |
truncation_error : float | |
Truncation error (in seconds) | |
Default 1s | |
or None if truncation is too large | |
""" | |
t = n / sfreq # total duration | |
t_10micros = t * 1e6 # total duration has to be precise up to 10 microsecond units | |
n_init = n # original number of samples | |
skew_error = np.inf | |
# print("n, S, R, k, skew_error") | |
while skew_error > skew_error_threshold: | |
if (n_init - (n+1))/sfreq > max_truncation: | |
print(f"Error: Truncation of more than {max_truncation}s is not acceptable") | |
return None | |
# find all divisors of n | |
divisors = [i for i in range(1, int(np.sqrt(n))) if n % i == 0] | |
divisors = divisors + [n//i for i in divisors] | |
divisors = np.array(divisors) | |
# check distance to sampling frequency | |
diffs = np.abs(divisors - sfreq) | |
# order divisors by closeness to sampling frequency | |
divisors = divisors[np.argsort(diffs)] | |
# iterate through divisors in a range | |
for d in divisors: | |
if d < 100 or d > 5000: | |
continue | |
S = d # Sample frequency (i.e. number of samples for a block of data) | |
k = n // S # number of blocks | |
skew_error = (t_10micros % k) * 1e-6 | |
R = int(t_10micros // k) * 1e-6 | |
# print(n, S, R, k, skew_error) | |
if skew_error < skew_error_threshold: | |
break | |
# reduce total sample size | |
n -= 1 | |
print(f"{n_init - (n+1)} samples = {(n_init - (n+1))/sfreq}s of data truncated") | |
print(f"Data points are slighlty skewed, the last sample by {skew_error:.5f}s.") | |
return n+1, S, R, k, skew_error, (n_init - (n+1))/sfreq |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment