rectified-evasion · October 21, 2024 09:08
diff --git a/edf_block_size.py b/edf_block_size.py
 # -*- coding: utf-8 -*-
 """
 Created on Sun Oct 20 2024
 @author: J.-B. Kordaß
 Gist to estimate .edf block size to avoid non-zero padded data files (at the expense of precision/skewing and 
 possibly truncation)

 - If you are using skjerns/save_edf.py (https://gist.github.com/skjerns/bc660ef59dca0dbd53f00ed38c42f6be), 
  truncate the data to the sample size n returned and use f.setDatarecordDuration(R) for R as returned.
 """

 import numpy as np

 def edf_block_size(n, sfreq, skew_error_threshold = 1e-2, max_truncation = 1):
    """
    Find some estimate of a "good" block size for EDF files without zero padding

    More precisely, consider 
    k * R = t * 1e5 = (n / sfreq) * 1e5
    k * S = n

    Parameters
    ----------
    n : int
        Number of samples
    sfreq : float
        Sampling frequency
    skew_error_threshold : float
        Maximum skew error (in seconds), which is the difference between the last actual time point of the last sample 
        and the last time point of the last sample using the estimated block size
    max_truncation : float
        Maximum truncation of data (in seconds) that is acceptable (is definitely necessary, e.g. if n is prime..!)

    Returns
    -------
    n : int
        New number of samples (possibly after truncation)
    S : int
        Block size (number of samples per block)
    R : int
        Block time (in seconds, precise up to 10 microsecond units = 1e-5s)
    k : int
        Number of blocks
    skew_error : float
        Skew error (in seconds)
        Default: 1e-2s (should probably be chosen according to n)
    truncation_error : float
        Truncation error (in seconds)
        Default 1s
    or None if truncation is too large
    """

    t = n / sfreq # total duration
    t_10micros = t * 1e6 # total duration has to be precise up to 10 microsecond units

    n_init = n # original number of samples

    skew_error = np.inf

    # print("n, S, R, k, skew_error")

    while skew_error > skew_error_threshold:

        if (n_init - (n+1))/sfreq > max_truncation:
            print(f"Error: Truncation of more than {max_truncation}s is not acceptable")
            return None

        # find all divisors of n
        divisors = [i for i in range(1, int(np.sqrt(n))) if n % i == 0]
        divisors = divisors + [n//i for i in divisors]
        divisors = np.array(divisors)

        # check distance to sampling frequency
        diffs = np.abs(divisors - sfreq)

        # order divisors by closeness to sampling frequency
        divisors = divisors[np.argsort(diffs)]

        # iterate through divisors in a range
        for d in divisors:
            if d < 100 or d > 5000:
                continue
            S = d # Sample frequency (i.e. number of samples for a block of data)
            k = n // S  # number of blocks

            skew_error = (t_10micros % k) * 1e-6
            R = int(t_10micros // k) * 1e-6
            # print(n, S, R, k, skew_error)
            if skew_error < skew_error_threshold:
                break

        # reduce total sample size
        n -= 1

    print(f"{n_init - (n+1)} samples = {(n_init - (n+1))/sfreq}s of data truncated")
    print(f"Data points are slighlty skewed, the last sample by {skew_error:.5f}s.")
    return n+1, S, R, k, skew_error, (n_init - (n+1))/sfreq
	# -- coding: utf-8 --
	"""
	Created on Sun Oct 20 2024
	@author: J.-B. Kordaß
	Gist to estimate .edf block size to avoid non-zero padded data files (at the expense of precision/skewing and
	possibly truncation)

	- If you are using skjerns/save_edf.py (https://gist.github.com/skjerns/bc660ef59dca0dbd53f00ed38c42f6be),
	truncate the data to the sample size n returned and use f.setDatarecordDuration(R) for R as returned.
	"""

	import numpy as np

	def edf_block_size(n, sfreq, skew_error_threshold = 1e-2, max_truncation = 1):
	"""
	Find some estimate of a "good" block size for EDF files without zero padding

	More precisely, consider
	k * R = t * 1e5 = (n / sfreq) * 1e5
	k * S = n

	Parameters
	----------
	n : int
	Number of samples
	sfreq : float
	Sampling frequency
	skew_error_threshold : float
	Maximum skew error (in seconds), which is the difference between the last actual time point of the last sample
	and the last time point of the last sample using the estimated block size
	max_truncation : float
	Maximum truncation of data (in seconds) that is acceptable (is definitely necessary, e.g. if n is prime..!)

	Returns
	-------
	n : int
	New number of samples (possibly after truncation)
	S : int
	Block size (number of samples per block)
	R : int
	Block time (in seconds, precise up to 10 microsecond units = 1e-5s)
	k : int
	Number of blocks
	skew_error : float
	Skew error (in seconds)
	Default: 1e-2s (should probably be chosen according to n)
	truncation_error : float
	Truncation error (in seconds)
	Default 1s
	or None if truncation is too large
	"""

	t = n / sfreq # total duration
	t_10micros = t * 1e6 # total duration has to be precise up to 10 microsecond units

	n_init = n # original number of samples

	skew_error = np.inf

	# print("n, S, R, k, skew_error")

	while skew_error > skew_error_threshold:

	if (n_init - (n+1))/sfreq > max_truncation:
	print(f"Error: Truncation of more than {max_truncation}s is not acceptable")
	return None

	# find all divisors of n
	divisors = [i for i in range(1, int(np.sqrt(n))) if n % i == 0]
	divisors = divisors + [n//i for i in divisors]
	divisors = np.array(divisors)

	# check distance to sampling frequency
	diffs = np.abs(divisors - sfreq)

	# order divisors by closeness to sampling frequency
	divisors = divisors[np.argsort(diffs)]

	# iterate through divisors in a range
	for d in divisors:
	if d < 100 or d > 5000:
	continue
	S = d # Sample frequency (i.e. number of samples for a block of data)
	k = n // S # number of blocks

	skew_error = (t_10micros % k) * 1e-6
	R = int(t_10micros // k) * 1e-6
	# print(n, S, R, k, skew_error)
	if skew_error < skew_error_threshold:
	break

	# reduce total sample size
	n -= 1

	print(f"{n_init - (n+1)} samples = {(n_init - (n+1))/sfreq}s of data truncated")
	print(f"Data points are slighlty skewed, the last sample by {skew_error:.5f}s.")
	return n+1, S, R, k, skew_error, (n_init - (n+1))/sfreq