Created
March 2, 2024 21:23
-
-
Save wolfecameron/f9cb6645dc87a165ce3a7fae980610a4 to your computer and use it in GitHub Desktop.
Exploding activations from repeated matrix multiplications.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
# experiment settings | |
d = 5 | |
nlayers = 100 | |
normalize = False # set True to use normalization | |
# create vector with random entries between [-1, 1] | |
input_vector = (torch.rand(d) - 0.5) * 2.0 | |
# create matrix with random entries between [-1, 1] | |
# by which we can repeatedly multiply the input vector | |
weight_matrix = (torch.rand(d, d) - 0.5) * 2.0 | |
output = input_vector | |
for i in range(nlayers): | |
# optionally perform normalization | |
if normalize: | |
output = (output - torch.mean(output)) / torch.std(output) | |
# repeatedly multiply the vector by the matrix | |
output = weight_matrix @ output | |
# observe output values | |
print(output) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment