This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
from torch.autograd import Variable | |
class CNN_Text(nn.Module): | |
def __init__(self): | |
super(CNN_Text, self).__init__() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# https://www.kaggle.com/yekenot/2dcnn-textclassifier | |
def model_cnn(embedding_matrix): | |
filter_sizes = [1,2,3,5] | |
num_filters = 36 | |
inp = Input(shape=(maxlen,)) | |
x = Embedding(max_features, embed_size, weights=[embedding_matrix])(inp) | |
x = Reshape((maxlen, embed_size, 1))(x) | |
maxpool_pool = [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class BiLSTM(nn.Module): | |
def __init__(self): | |
super(BiLSTM, self).__init__() | |
self.hidden_size = 64 | |
drp = 0.1 | |
self.embedding = nn.Embedding(max_features, embed_size) | |
self.embedding.weight = nn.Parameter(torch.tensor(embedding_matrix, dtype=torch.float32)) | |
self.embedding.weight.requires_grad = False | |
self.lstm = nn.LSTM(embed_size, self.hidden_size, bidirectional=True, batch_first=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# BiDirectional LSTM | |
def model_lstm_du(embedding_matrix): | |
inp = Input(shape=(maxlen,)) | |
x = Embedding(max_features, embed_size, weights=[embedding_matrix])(inp) | |
''' | |
Here 64 is the size(dim) of the hidden state vector as well as the output vector. Keeping return_sequence we want the output for the entire sequence. So what is the dimension of output for this layer? | |
64*70(maxlen)*2(bidirection concat) | |
CuDNNLSTM is fast implementation of LSTM layer in Keras which only runs on GPU | |
''' | |
x = Bidirectional(CuDNNLSTM(64, return_sequences=True))(x) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Attention(nn.Module): | |
def __init__(self, feature_dim, step_dim, bias=True, **kwargs): | |
super(Attention, self).__init__(**kwargs) | |
self.supports_masking = True | |
self.bias = bias | |
self.feature_dim = feature_dim | |
self.step_dim = step_dim | |
self.features_dim = 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def dot_product(x, kernel): | |
""" | |
Wrapper for dot product operation, in order to be compatible with both | |
Theano and Tensorflow | |
Args: | |
x (): input | |
kernel (): weights | |
Returns: | |
""" | |
if K.backend() == 'tensorflow': |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Language model data : We use test_df as validation for language model | |
data_lm = TextLMDataBunch.from_df(path = "",train_df= train_df ,valid_df = test_df) | |
learn = language_model_learner(data_lm, AWD_LSTM, drop_mult=0.5) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Learning with Discriminative fine tuning | |
learn.fit_one_cycle(1, 1e-2) | |
learn.unfreeze() | |
learn.fit_one_cycle(1, 1e-3) | |
# Save encoder Object | |
learn.save_encoder('ft_enc') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Creating Classification Data | |
data_clas = TextClasDataBunch.from_df(path ="", train_df=train, valid_df =valid, test_df=test_df, vocab=data_lm.train_ds.vocab, bs=32,label_cols = 'target') | |
# Creating Classifier Object | |
learn = text_classifier_learner(data_clas, AWD_LSTM, drop_mult=0.5) | |
# Add weights of finetuned Language model | |
learn.load_encoder('ft_enc') | |
# Fitting Classifier Object | |
learn.fit_one_cycle(1, 1e-2) | |
# Fitting Classifier Object after freezing all but last 2 layers |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import requests | |
import time | |
import argparse | |
import os | |
import json | |
from requests.compat import urljoin | |
class BotHandler(object): |