Skip to content

Instantly share code, notes, and snippets.

@kevkid
Last active September 9, 2020 02:44
Show Gist options
  • Save kevkid/760bf3fde79b2bc55c9f6543a0343234 to your computer and use it in GitHub Desktop.
Save kevkid/760bf3fde79b2bc55c9f6543a0343234 to your computer and use it in GitHub Desktop.
Simple multimodal compact bilinear pooling
import torch
import numpy as np
seed = 2
torch.manual_seed(seed)
np.random.seed(seed)
x_img = torch.tensor([8,1,3,0,2])
x_txt = torch.tensor([ 0, 7, 10, 1, 1])
out_dim = 3
def psi(x, out_dim):
in_dim = x.size()
y = torch.zeros(out_dim)
#initialize h and s to be input size. h has indicies corresponding to output dim
#e.g out_dim = 3 so h can look like: [1, 1, 0, 2, 1]
h = torch.LongTensor(in_dim).random_(0, out_dim)
#s is same size as input and we fill it with -1 and 1s
s = 2*torch.LongTensor(in_dim).random_(0, 2)-1
#Ψ function in paper
for i in range(len(x)):
j = h[i]
y[j] += s[i]*x[i]
return y
y_img = psi(x_img, out_dim)#v1'
y_txt = psi(x_txt, out_dim)#v2'
print(y_img)
print(y_txt)
y_img_prime = np.fft.fft(y_img)#FFT(v1')
y_txt_prime = np.fft.fft(y_txt)#FFT(v2')
phi = np.fft.ifft(np.multiply(y_img_prime,y_txt_prime))
print(phi)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment