Skip to content

Instantly share code, notes, and snippets.

@alonstern
Last active April 15, 2020 09:57
Show Gist options
  • Save alonstern/b5fb6e63885c1ea17d8a88c6654e3e35 to your computer and use it in GitHub Desktop.
Save alonstern/b5fb6e63885c1ea17d8a88c6654e3e35 to your computer and use it in GitHub Desktop.
split to blocks with padding
def _split_to_blocks(self, data, tags, block_size, padding_size):
data_blocks = []
tags_blocks = []
for file_data, file_tags in zip(data, tags):
for start_index in range(0, len(file_data), block_size):
data_blocks.append(self._get_padded_data(file_data, start_index, block_size, padding_size))
tags_blocks.append(file_tags[start_index: start_index + block_size])
return data_blocks, tags_blocks
def _get_padded_data(self, file_data, index, block_size, padding_size):
left_padding_number = int(padding_size / 2)
right_padding_number = padding_size - left_padding_number
# If there is data availble before the block we will use it for padding. Otherwise we will use FILE_START.
# Same for FILE_END.
left_padding = numpy.array([FILE_START] * (left_padding_number - index), dtype=int)
right_padding = numpy.array([FILE_END] * (right_padding_number - max(file_data.size - index - block_size, 0)), dtype=int)
block = file_data[max(index - left_padding_number, 0): index + block_size + right_padding_number]
return numpy.concatenate([left_padding, block, right_padding])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment