This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
train = Train(epochs=7,lr=0.01) | |
metrics = train.train_model(model) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') | |
model = Network(weight_matrix=embedding_matrix,hidden_dim=128,seq_len=440) | |
model = model.to(device) | |
model |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Train(): | |
def __init__(self,epochs,lr=0.01,train_loader=train_loader,test_loader=test_loader,seq_len=440): | |
self.train_loader = train_loader | |
self.test_loader = test_loader | |
self.epochs = epochs | |
self.lr = lr | |
self.seq_len = seq_len | |
self.checkpoint_path = 'model1/chkpoint1_' | |
self.best_model_path = 'model1/bestmodel1.pt' | |
self.test_loss_min = 3.95275 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Network(nn.Module): | |
def __init__(self,weight_matrix=embedding_matrix,hidden_dim=128,seq_len=440): | |
super().__init__() | |
vocab_size = weight_matrix.shape[0] | |
vector_dim = weight_matrix.shape[1] | |
self.seq_len = seq_len | |
#text data | |
self.hidden_dim = hidden_dim | |
self.embedding = nn.Embedding(vocab_size,vector_dim) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# create Tensor datasets | |
train_data = TensorDataset(torch.from_numpy(final_train), torch.from_numpy(y_train.values)) | |
test_data = TensorDataset(torch.from_numpy(final_test), torch.from_numpy(y_test.values)) | |
# dataloaders | |
batch_size = 500 | |
# make sure to SHUFFLE your data | |
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True) | |
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True ) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#loading embedding | |
embedding_matrix = np.load('embedding_matrix_2.npy') | |
x_train_ = [essay_train_pad,lb_train_school_state.reshape(-1,1),lb_train_teacher_prefix.reshape(-1,1),lb_train_category.reshape(-1,1),lb_train_sub_category.reshape(-1,1),lb_train_grade_category.reshape(-1,1),std_train_numeric] | |
x_test_ = [essay_test_pad,lb_test_school_state.reshape(-1,1),lb_test_teacher_prefix.reshape(-1,1),lb_test_category.reshape(-1,1),lb_test_sub_category.reshape(-1,1),lb_test_grade_category.reshape(-1,1),std_test_numeric] | |
final_train = np.concatenate(x_train_,axis=1) | |
final_test = np.concatenate(x_test_,axis=1) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
embedding_matrix = np.zeros((len(corpus) + 1, 300)) | |
for i,word in enumerate(corpus): | |
if word in glove_dict.keys(): | |
embedding_vec = glove_dict[word] | |
embedding_matrix[i] = embedding_vec | |
print(embedding_matrix.shape) | |
np.save('embedding_matrix_2.npy',embedding_matrix) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#code from https://stackoverflow.com/questions/37793118/load-pretrained-glove-vectors-in-python | |
def loadGloveModel(File): | |
print("Loading Glove Model") | |
f = open(File,'r') | |
gloveModel = {} | |
for line in f: | |
splitLines = line.split() | |
word = splitLines[0] | |
wordEmbedding = np.array([float(value) for value in splitLines[1:]]) | |
gloveModel[word] = wordEmbedding |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
rev_len = [len(i) for i in essay_train_p] | |
pd.Series(rev_len).hist() | |
plt.show() | |
pd.Series(rev_len).describe() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def padding_(sentences, seq_len): | |
""" | |
do padding on left handside | |
ie, if seq_len = 5 and input is [1,2,3] out will be [0,0,1,2,3] | |
""" | |
features = np.zeros((len(sentences), seq_len),dtype=int) | |
for ii, review in enumerate(sentences): | |
if len(review) != 0: | |
features[ii, -len(review):] = np.array(review)[:seq_len] |
NewerOlder