anna-hope · April 20, 2018 16:55
diff --git a/vdcnn.py b/vdcnn.py
 # Very Deep Convolutional Network (http://aclweb.org/anthology/E17-1104)
 # PyTorch implementation by Anton Melnikov

 from typing import Iterable, Tuple

 import torch
 from torch import nn
 import torch.nn.functional as F
 from torch.autograd import Variable

 import numpy as np


 class DynamicKMaxPooling(nn.Module):
    # this is not used in the rest of the module,
    # but left anyway for reference
    
    def __init__(self, k_top, L):
        super().__init__()
        # "L is the total  number  of  convolutional  layers
        # in  the  network;
        # k_top is the fixed pooling parameter for the
        # topmost  convolutional  layer" 
        self.k_top = k_top
        self.L = L
    
    def forward(self, X, l):
        # l is the current convolutional layer
        # X is the input sequence
        # s is the length of the sequence
        s = X.size()[2]
        k_ll = ((self.L - l) / self.L) * s
        k_l = round(max(self.k_top, np.ceil(k_ll)))
        out = F.adaptive_max_pool1d(X, k_l)
        return out
    
    
 class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, *,
                 kernel_size, stride=1):
        super().__init__()
        # "same" padding (preserve original temporal dimension length)
        padding = int(np.floor((kernel_size - 1) / 2))
        
        self.conv_1 = nn.Conv1d(in_channels, out_channels, kernel_size,
                                stride=stride, padding=padding)
        
        self.batch_norm_1 = nn.BatchNorm1d(out_channels)
        self.relu_1 = nn.ReLU()
        
        # only the first block's stride should be changed
        # p. 1111 (i)
        self.conv_2 = nn.Conv1d(out_channels, out_channels,
                                kernel_size, stride=1, padding=padding)
        self.batch_norm_2 = nn.BatchNorm1d(out_channels)
        self.relu_2 = nn.ReLU()
        self.init_conv()
    
    def init_conv(self):
        nn.init.kaiming_uniform(self.conv_1.weight.data)
        nn.init.kaiming_uniform(self.conv_2.weight.data)
    
    def forward(self, X):
        X = self.conv_1(X)
        X = self.batch_norm_1(X)
        X = self.relu_1(X)
        X = self.conv_2(X)
        X = self.batch_norm_2(X)
        X = self.relu_2(X)
        return X
    
 class Flatten(nn.Module):
    # we need to wrap this into a module
    # so it will be added to the network's children
    # so it can be automatically applied 
    # to the intermediate tensors
    # when iterating over the children
    
    def __init__(self):
        super().__init__()
    
    def forward(self, X):
        # X is a 3d tensor that we are flattening to 2d
        return X.view(-1, X.size()[1] * X.size()[2])
    
    def __repr__(self):
        return self.__class__.__name__ + '()'
   

 class VDCNN(nn.Module):
    
    def __init__(self, *, conv_blocks: Iterable[Tuple[int, int]],
                 vocab_size: int, n_classes: int, 
                 input_conv_out_channels=64,
                 kernel_size=3, stride=1, embedding_dim=16, 
                 padding_idx=None,
                 k_top=8):
        super().__init__()
        
        self.embed = nn.Embedding(vocab_size, embedding_dim, padding_idx=padding_idx)
        self.input_conv = nn.Conv1d(embedding_dim, input_conv_out_channels, kernel_size)
        
        conv_block_in_channels = input_conv_out_channels
        n_conv_layers = 0
        for n, (n_channels, depth) in enumerate(conv_blocks):
            for d in range(depth):
                conv_block = ConvBlock(conv_block_in_channels, n_channels, 
                                       kernel_size=kernel_size, stride=stride)
                self.add_module(f'conv_block_{n}_{d}', conv_block)
                # each conv block contains 2 conv layers
                n_conv_layers += 2
                conv_block_in_channels = n_channels
                
            pooling = nn.MaxPool1d(3, 2)
            self.add_module(f'pooling_{n}', pooling)
        
        self.k_max_pooling = nn.AdaptiveMaxPool1d(k_top)
        self.flatten = Flatten()
        
        # the input dimension of the fully connected (fc) layer
        # = n_channels * k_top
        # e.g. if the last conv layer produces 512 channels
        # and k is 8, the input dim of fc will be 4096
        self.fc1 = nn.Linear(conv_block_in_channels*k_top, 2048)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(2048, 2048)
        self.relu2 = nn.ReLU()
        self.out = nn.Linear(2048, n_classes)
    
    def forward(self, X):
        embedded = self.embed(X)
        X = embedded.permute(0, 2, 1)
        for n, child in enumerate(self.children()):
            # the first layer is the embedding,
            # which we've already done
            if n > 0:
                X = child(X)
        return X
	# Very Deep Convolutional Network (http://aclweb.org/anthology/E17-1104)
	# PyTorch implementation by Anton Melnikov

	from typing import Iterable, Tuple

	import torch
	from torch import nn
	import torch.nn.functional as F
	from torch.autograd import Variable

	import numpy as np


	class DynamicKMaxPooling(nn.Module):
	# this is not used in the rest of the module,
	# but left anyway for reference

	def __init__(self, k_top, L):
	super().__init__()
	# "L is the total number of convolutional layers
	# in the network;
	# k_top is the fixed pooling parameter for the
	# topmost convolutional layer"
	self.k_top = k_top
	self.L = L

	def forward(self, X, l):
	# l is the current convolutional layer
	# X is the input sequence
	# s is the length of the sequence
	s = X.size()[2]
	k_ll = ((self.L - l) / self.L) * s
	k_l = round(max(self.k_top, np.ceil(k_ll)))
	out = F.adaptive_max_pool1d(X, k_l)
	return out


	class ConvBlock(nn.Module):
	def __init__(self, in_channels, out_channels, *,
	kernel_size, stride=1):
	super().__init__()
	# "same" padding (preserve original temporal dimension length)
	padding = int(np.floor((kernel_size - 1) / 2))

	self.conv_1 = nn.Conv1d(in_channels, out_channels, kernel_size,
	stride=stride, padding=padding)

	self.batch_norm_1 = nn.BatchNorm1d(out_channels)
	self.relu_1 = nn.ReLU()

	# only the first block's stride should be changed
	# p. 1111 (i)
	self.conv_2 = nn.Conv1d(out_channels, out_channels,
	kernel_size, stride=1, padding=padding)
	self.batch_norm_2 = nn.BatchNorm1d(out_channels)
	self.relu_2 = nn.ReLU()
	self.init_conv()

	def init_conv(self):
	nn.init.kaiming_uniform(self.conv_1.weight.data)
	nn.init.kaiming_uniform(self.conv_2.weight.data)

	def forward(self, X):
	X = self.conv_1(X)
	X = self.batch_norm_1(X)
	X = self.relu_1(X)
	X = self.conv_2(X)
	X = self.batch_norm_2(X)
	X = self.relu_2(X)
	return X

	class Flatten(nn.Module):
	# we need to wrap this into a module
	# so it will be added to the network's children
	# so it can be automatically applied
	# to the intermediate tensors
	# when iterating over the children

	def __init__(self):
	super().__init__()

	def forward(self, X):
	# X is a 3d tensor that we are flattening to 2d
	return X.view(-1, X.size()[1] * X.size()[2])

	def __repr__(self):
	return self.__class__.__name__ + '()'


	class VDCNN(nn.Module):

	def __init__(self, *, conv_blocks: Iterable[Tuple[int, int]],
	vocab_size: int, n_classes: int,
	input_conv_out_channels=64,
	kernel_size=3, stride=1, embedding_dim=16,
	padding_idx=None,
	k_top=8):
	super().__init__()

	self.embed = nn.Embedding(vocab_size, embedding_dim, padding_idx=padding_idx)
	self.input_conv = nn.Conv1d(embedding_dim, input_conv_out_channels, kernel_size)

	conv_block_in_channels = input_conv_out_channels
	n_conv_layers = 0
	for n, (n_channels, depth) in enumerate(conv_blocks):
	for d in range(depth):
	conv_block = ConvBlock(conv_block_in_channels, n_channels,
	kernel_size=kernel_size, stride=stride)
	self.add_module(f'conv_block_{n}_{d}', conv_block)
	# each conv block contains 2 conv layers
	n_conv_layers += 2
	conv_block_in_channels = n_channels

	pooling = nn.MaxPool1d(3, 2)
	self.add_module(f'pooling_{n}', pooling)

	self.k_max_pooling = nn.AdaptiveMaxPool1d(k_top)
	self.flatten = Flatten()

	# the input dimension of the fully connected (fc) layer
	# = n_channels * k_top
	# e.g. if the last conv layer produces 512 channels
	# and k is 8, the input dim of fc will be 4096
	self.fc1 = nn.Linear(conv_block_in_channels*k_top, 2048)
	self.relu1 = nn.ReLU()
	self.fc2 = nn.Linear(2048, 2048)
	self.relu2 = nn.ReLU()
	self.out = nn.Linear(2048, n_classes)

	def forward(self, X):
	embedded = self.embed(X)
	X = embedded.permute(0, 2, 1)
	for n, child in enumerate(self.children()):
	# the first layer is the embedding,
	# which we've already done
	if n > 0:
	X = child(X)
	return X