Last active
September 9, 2020 02:44
-
-
Save kevkid/760bf3fde79b2bc55c9f6543a0343234 to your computer and use it in GitHub Desktop.
Simple multimodal compact bilinear pooling
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import numpy as np | |
seed = 2 | |
torch.manual_seed(seed) | |
np.random.seed(seed) | |
x_img = torch.tensor([8,1,3,0,2]) | |
x_txt = torch.tensor([ 0, 7, 10, 1, 1]) | |
out_dim = 3 | |
def psi(x, out_dim): | |
in_dim = x.size() | |
y = torch.zeros(out_dim) | |
#initialize h and s to be input size. h has indicies corresponding to output dim | |
#e.g out_dim = 3 so h can look like: [1, 1, 0, 2, 1] | |
h = torch.LongTensor(in_dim).random_(0, out_dim) | |
#s is same size as input and we fill it with -1 and 1s | |
s = 2*torch.LongTensor(in_dim).random_(0, 2)-1 | |
#Ψ function in paper | |
for i in range(len(x)): | |
j = h[i] | |
y[j] += s[i]*x[i] | |
return y | |
y_img = psi(x_img, out_dim)#v1' | |
y_txt = psi(x_txt, out_dim)#v2' | |
print(y_img) | |
print(y_txt) | |
y_img_prime = np.fft.fft(y_img)#FFT(v1') | |
y_txt_prime = np.fft.fft(y_txt)#FFT(v2') | |
phi = np.fft.ifft(np.multiply(y_img_prime,y_txt_prime)) | |
print(phi) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment