This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| train = Train(epochs=7,lr=0.01) | |
| metrics = train.train_model(model) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') | |
| model = Network(weight_matrix=embedding_matrix,hidden_dim=128,seq_len=440) | |
| model = model.to(device) | |
| model |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| class Train(): | |
| def __init__(self,epochs,lr=0.01,train_loader=train_loader,test_loader=test_loader,seq_len=440): | |
| self.train_loader = train_loader | |
| self.test_loader = test_loader | |
| self.epochs = epochs | |
| self.lr = lr | |
| self.seq_len = seq_len | |
| self.checkpoint_path = 'model1/chkpoint1_' | |
| self.best_model_path = 'model1/bestmodel1.pt' | |
| self.test_loss_min = 3.95275 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| class Network(nn.Module): | |
| def __init__(self,weight_matrix=embedding_matrix,hidden_dim=128,seq_len=440): | |
| super().__init__() | |
| vocab_size = weight_matrix.shape[0] | |
| vector_dim = weight_matrix.shape[1] | |
| self.seq_len = seq_len | |
| #text data | |
| self.hidden_dim = hidden_dim | |
| self.embedding = nn.Embedding(vocab_size,vector_dim) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # create Tensor datasets | |
| train_data = TensorDataset(torch.from_numpy(final_train), torch.from_numpy(y_train.values)) | |
| test_data = TensorDataset(torch.from_numpy(final_test), torch.from_numpy(y_test.values)) | |
| # dataloaders | |
| batch_size = 500 | |
| # make sure to SHUFFLE your data | |
| train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True) | |
| test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True ) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #loading embedding | |
| embedding_matrix = np.load('embedding_matrix_2.npy') | |
| x_train_ = [essay_train_pad,lb_train_school_state.reshape(-1,1),lb_train_teacher_prefix.reshape(-1,1),lb_train_category.reshape(-1,1),lb_train_sub_category.reshape(-1,1),lb_train_grade_category.reshape(-1,1),std_train_numeric] | |
| x_test_ = [essay_test_pad,lb_test_school_state.reshape(-1,1),lb_test_teacher_prefix.reshape(-1,1),lb_test_category.reshape(-1,1),lb_test_sub_category.reshape(-1,1),lb_test_grade_category.reshape(-1,1),std_test_numeric] | |
| final_train = np.concatenate(x_train_,axis=1) | |
| final_test = np.concatenate(x_test_,axis=1) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| embedding_matrix = np.zeros((len(corpus) + 1, 300)) | |
| for i,word in enumerate(corpus): | |
| if word in glove_dict.keys(): | |
| embedding_vec = glove_dict[word] | |
| embedding_matrix[i] = embedding_vec | |
| print(embedding_matrix.shape) | |
| np.save('embedding_matrix_2.npy',embedding_matrix) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #code from https://stackoverflow.com/questions/37793118/load-pretrained-glove-vectors-in-python | |
| def loadGloveModel(File): | |
| print("Loading Glove Model") | |
| f = open(File,'r') | |
| gloveModel = {} | |
| for line in f: | |
| splitLines = line.split() | |
| word = splitLines[0] | |
| wordEmbedding = np.array([float(value) for value in splitLines[1:]]) | |
| gloveModel[word] = wordEmbedding |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| rev_len = [len(i) for i in essay_train_p] | |
| pd.Series(rev_len).hist() | |
| plt.show() | |
| pd.Series(rev_len).describe() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def padding_(sentences, seq_len): | |
| """ | |
| do padding on left handside | |
| ie, if seq_len = 5 and input is [1,2,3] out will be [0,0,1,2,3] | |
| """ | |
| features = np.zeros((len(sentences), seq_len),dtype=int) | |
| for ii, review in enumerate(sentences): | |
| if len(review) != 0: | |
| features[ii, -len(review):] = np.array(review)[:seq_len] |
NewerOlder