Last active
April 20, 2018 16:55
-
-
Save anna-hope/4fda5256c1097bc8ee2ceda294533153 to your computer and use it in GitHub Desktop.
VDCNN (Conneau et al.)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Very Deep Convolutional Network (http://aclweb.org/anthology/E17-1104) | |
# PyTorch implementation by Anton Melnikov | |
from typing import Iterable, Tuple | |
import torch | |
from torch import nn | |
import torch.nn.functional as F | |
from torch.autograd import Variable | |
import numpy as np | |
class DynamicKMaxPooling(nn.Module): | |
# this is not used in the rest of the module, | |
# but left anyway for reference | |
def __init__(self, k_top, L): | |
super().__init__() | |
# "L is the total number of convolutional layers | |
# in the network; | |
# k_top is the fixed pooling parameter for the | |
# topmost convolutional layer" | |
self.k_top = k_top | |
self.L = L | |
def forward(self, X, l): | |
# l is the current convolutional layer | |
# X is the input sequence | |
# s is the length of the sequence | |
s = X.size()[2] | |
k_ll = ((self.L - l) / self.L) * s | |
k_l = round(max(self.k_top, np.ceil(k_ll))) | |
out = F.adaptive_max_pool1d(X, k_l) | |
return out | |
class ConvBlock(nn.Module): | |
def __init__(self, in_channels, out_channels, *, | |
kernel_size, stride=1): | |
super().__init__() | |
# "same" padding (preserve original temporal dimension length) | |
padding = int(np.floor((kernel_size - 1) / 2)) | |
self.conv_1 = nn.Conv1d(in_channels, out_channels, kernel_size, | |
stride=stride, padding=padding) | |
self.batch_norm_1 = nn.BatchNorm1d(out_channels) | |
self.relu_1 = nn.ReLU() | |
# only the first block's stride should be changed | |
# p. 1111 (i) | |
self.conv_2 = nn.Conv1d(out_channels, out_channels, | |
kernel_size, stride=1, padding=padding) | |
self.batch_norm_2 = nn.BatchNorm1d(out_channels) | |
self.relu_2 = nn.ReLU() | |
self.init_conv() | |
def init_conv(self): | |
nn.init.kaiming_uniform(self.conv_1.weight.data) | |
nn.init.kaiming_uniform(self.conv_2.weight.data) | |
def forward(self, X): | |
X = self.conv_1(X) | |
X = self.batch_norm_1(X) | |
X = self.relu_1(X) | |
X = self.conv_2(X) | |
X = self.batch_norm_2(X) | |
X = self.relu_2(X) | |
return X | |
class Flatten(nn.Module): | |
# we need to wrap this into a module | |
# so it will be added to the network's children | |
# so it can be automatically applied | |
# to the intermediate tensors | |
# when iterating over the children | |
def __init__(self): | |
super().__init__() | |
def forward(self, X): | |
# X is a 3d tensor that we are flattening to 2d | |
return X.view(-1, X.size()[1] * X.size()[2]) | |
def __repr__(self): | |
return self.__class__.__name__ + '()' | |
class VDCNN(nn.Module): | |
def __init__(self, *, conv_blocks: Iterable[Tuple[int, int]], | |
vocab_size: int, n_classes: int, | |
input_conv_out_channels=64, | |
kernel_size=3, stride=1, embedding_dim=16, | |
padding_idx=None, | |
k_top=8): | |
super().__init__() | |
self.embed = nn.Embedding(vocab_size, embedding_dim, padding_idx=padding_idx) | |
self.input_conv = nn.Conv1d(embedding_dim, input_conv_out_channels, kernel_size) | |
conv_block_in_channels = input_conv_out_channels | |
n_conv_layers = 0 | |
for n, (n_channels, depth) in enumerate(conv_blocks): | |
for d in range(depth): | |
conv_block = ConvBlock(conv_block_in_channels, n_channels, | |
kernel_size=kernel_size, stride=stride) | |
self.add_module(f'conv_block_{n}_{d}', conv_block) | |
# each conv block contains 2 conv layers | |
n_conv_layers += 2 | |
conv_block_in_channels = n_channels | |
pooling = nn.MaxPool1d(3, 2) | |
self.add_module(f'pooling_{n}', pooling) | |
self.k_max_pooling = nn.AdaptiveMaxPool1d(k_top) | |
self.flatten = Flatten() | |
# the input dimension of the fully connected (fc) layer | |
# = n_channels * k_top | |
# e.g. if the last conv layer produces 512 channels | |
# and k is 8, the input dim of fc will be 4096 | |
self.fc1 = nn.Linear(conv_block_in_channels*k_top, 2048) | |
self.relu1 = nn.ReLU() | |
self.fc2 = nn.Linear(2048, 2048) | |
self.relu2 = nn.ReLU() | |
self.out = nn.Linear(2048, n_classes) | |
def forward(self, X): | |
embedded = self.embed(X) | |
X = embedded.permute(0, 2, 1) | |
for n, child in enumerate(self.children()): | |
# the first layer is the embedding, | |
# which we've already done | |
if n > 0: | |
X = child(X) | |
return X | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment