Last active
April 15, 2020 09:57
-
-
Save alonstern/b5fb6e63885c1ea17d8a88c6654e3e35 to your computer and use it in GitHub Desktop.
split to blocks with padding
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def _split_to_blocks(self, data, tags, block_size, padding_size): | |
data_blocks = [] | |
tags_blocks = [] | |
for file_data, file_tags in zip(data, tags): | |
for start_index in range(0, len(file_data), block_size): | |
data_blocks.append(self._get_padded_data(file_data, start_index, block_size, padding_size)) | |
tags_blocks.append(file_tags[start_index: start_index + block_size]) | |
return data_blocks, tags_blocks | |
def _get_padded_data(self, file_data, index, block_size, padding_size): | |
left_padding_number = int(padding_size / 2) | |
right_padding_number = padding_size - left_padding_number | |
# If there is data availble before the block we will use it for padding. Otherwise we will use FILE_START. | |
# Same for FILE_END. | |
left_padding = numpy.array([FILE_START] * (left_padding_number - index), dtype=int) | |
right_padding = numpy.array([FILE_END] * (right_padding_number - max(file_data.size - index - block_size, 0)), dtype=int) | |
block = file_data[max(index - left_padding_number, 0): index + block_size + right_padding_number] | |
return numpy.concatenate([left_padding, block, right_padding]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment