Last active
October 22, 2020 11:05
-
-
Save kastnerkyle/ac7bd2894efa5caceaf5487de9c7239c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
# make a minibatch of time, batch, features | |
# time length 7 | |
# batch size 2 | |
# feature dimension 4: | |
# 1:4, 10:14, 20:24, 30:34, etc for first minibatch element | |
# 5:8, 15:18, etc second minibatch el | |
n_features = 4 | |
n_timesteps = 7 | |
base_mb1_features = np.arange(n_features) + 1 | |
time_mb1_features = 10 * np.arange(n_timesteps)[:, None] + base_mb1_features[None] | |
base_mb2_features = np.arange(n_features) + 5 + 1 | |
time_mb2_features = 10 * np.arange(n_timesteps)[:, None] + base_mb2_features[None] | |
data = np.concatenate((time_mb1_features[:, None], time_mb2_features[:, None]), axis=1) | |
time_len = data.shape[0] | |
minibatch_size = data.shape[1] | |
features = data.shape[2] | |
# for each example [0, 6) and [6, 12), we will make an autoregressive mask and equivalent targets for each step | |
# new assumption is that the "feature" dimension is the one for autoregression | |
# should be more natural compared to the previous example | |
# 1, 2, 3, 4 -> in: 0, 0, 0, 0, ; target: 1 | |
# 1, 2, 3, 4 -> in: 1, 0, 0, 0, ; target: 2 | |
# 1, 2, 3, 4 -> in: 1, 2, 0, 0, ; target: 3 | |
# 1, 2, 3, 4 -> in: 1, 2, 3, 0, ; target: 4 | |
# accomplished using np.triu with 1 argument | |
# [[1, 1, 1] | |
# [1, 1, 1] | |
# [1, 1, 1]] -> [[0, 1, 1] | |
# [0, 0, 1] | |
# [0, 0, 0]] | |
mask_array = np.triu(np.ones((features, features)), k=1) | |
# now we have a 4, 4 we want to multiply across 7, 2, 1 | |
# | |
# results in 4, 4, 7, 2 which is basically features, "feature time" (autoregressive), time, minibatch | |
masked_and_copied = mask_array[:, :, None, None] * data.transpose(2, 0, 1)[:, None] | |
# there is still extra 0s at the end but we leave them alone for now | |
# now we transpose it to | |
# "feature_time", time, minibatch, features | |
masked_and_copied = masked_and_copied.transpose(1, 2, 3, 0) | |
# 0th "timestep", 0th element, looping over the "autoregressive" axis we see | |
# masked_and_copied[0, 0, 0] = [0, 0, 0, 0] | |
# masked_and_copied[0, 0, 1] = [1, 0, 0, 0] | |
# masked_and_copied[0, 0, 2] = [1, 2, 0, 0] | |
# masked_and_copied[0, 0, 3] = [1, 2, 3, 0] | |
ar_data = masked_and_copied.copy() | |
ar_data_shape = ar_data.shape | |
# make the targets | |
ar_targets = data.transpose(2, 0, 1)[..., None] | |
ar_targets_shape = ar_targets.shape | |
# we rearrange (and unarrange) with this function to make the "normal" training scheme of minibatch, features | |
def ar_minibatch_conversion(arr, original_shape=None, inverse=False): | |
# expects | |
# "feature_time", time, minibatch, features | |
# skip total blank? | |
if inverse == False: | |
# ar, t, mb, f -> t, ar * mb, f | |
# particularly, we want the masked groups in order so that we can do a reshape/structured sum to average them | |
shp = arr.shape | |
arr = arr.transpose(1, 2, 0, 3) | |
arr = arr.reshape(shp[1], shp[2] * shp[0], shp[3]) | |
# now we have t, ar * mb, f | |
# *each* of these can be fed through a network, and effectively we ran all the timesteps in parallel (assuming not passing hidden info) | |
return arr | |
else: | |
# need to invert the old procedure, original shape is REQUIRED | |
shp = original_shape | |
arr = arr.reshape(shp[1], shp[2], shp[0], shp[3]) | |
arr = arr.transpose(2, 0, 1, 3) | |
return arr | |
flat_data = ar_minibatch_conversion(ar_data) | |
orig_data = ar_minibatch_conversion(flat_data, ar_data_shape, inverse=True) | |
flat_targets = ar_minibatch_conversion(ar_targets) | |
orig_targets = ar_minibatch_conversion(flat_targets, ar_targets_shape, inverse=True) | |
# normally you would do something like step_preds = f(flat_data) | |
# per_step_loss = (step_preds - flat_targets) ** 2 | |
# loss = loss.sum() or loss = loss.mean() | |
# loss.backwards() | |
for i in range(n_features): | |
print("flat") | |
# show that it is chunkwise blocked into the minibatch | |
print(flat_data[:, i]) | |
print(flat_targets[:, i]) | |
# this should match the previous | |
# this setup is available if you wanted to do more structured losses than just averaged per step | |
for i in range(n_features): | |
print("orig") | |
print(orig_data[:, i, 0]) | |
print(orig_targets[:, i, 0]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment