This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# install docker | |
# https://docs.docker.com/engine/installation/linux/ubuntulinux/ | |
# install docker-compose | |
# https://docs.docker.com/compose/install/ | |
# install letsencrypt | |
# https://www.digitalocean.com/community/tutorials/how-to-secure-nginx-with-let-s-encrypt-on-ubuntu-16-04 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Main training requirements | |
numpy == 1.15.4 | |
progressbar2 | |
pandas | |
six | |
pyxdg | |
attrdict | |
absl-py | |
# Requirements for building native_client files |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Find similar words | |
w2v.vec_sim("machine", 3) | |
class word2vec(): | |
## Removed## | |
# Input vector, returns nearest word(s) | |
def vec_sim(self, word, top_n): | |
v_w1 = self.word_vec(word) | |
word_sim = {} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Get vector for word | |
vec = w2v.word_vec("machine") | |
class word2vec(): | |
## Removed ## | |
# Get vector from word | |
def word_vec(self, word): | |
w_index = self.word_index[word] | |
v_w = self.w1[w_index] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class word2vec(): | |
##Removed## | |
for i in range(self.epochs): | |
self.loss = 0 | |
for w_t, w_c in training_data: | |
##Removed## | |
# Calculate error | |
# 1. For a target word, calculate difference between y_pred and each of the context words |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class word2vec(): | |
def train(self, training_data): | |
##Removed## | |
# Cycle through each epoch | |
for i in range(self.epochs): | |
# Intialise loss to 0 | |
self.loss = 0 | |
# Cycle through each training sample |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Training | |
w2v.train(training_data) | |
class word2vec(): | |
def train(self, training_data): | |
# Initialising weight matrices | |
# Both s1 and s2 should be randomly initialised but for this demo, we pre-determine the arrays (getW1 and getW2) | |
# getW1 - shape (9x10) and getW2 - shape (10x9) | |
self.w1 = np.array(getW1) | |
self.w2 = np.array(getW2) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Training | |
w2v.train(training_data) | |
class word2vec(): | |
def train(self, training_data): | |
# Initialising weight matrices | |
# Both s1 and s2 should be randomly initialised but for this demo, we pre-determine the arrays (getW1 and getW2) | |
# getW1 - shape (9x10) and getW2 - shape (10x9) | |
self.w1 = np.array(getW1) | |
self.w2 = np.array(getW2) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Initialise object | |
w2v = word2vec() | |
# Numpy ndarray with one-hot representation for [target_word, context_words] | |
training_data = w2v.generate_training_data(settings, corpus) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class word2vec(): | |
def __init__(self): | |
self.n = settings['n'] | |
self.lr = settings['learning_rate'] | |
self.epochs = settings['epochs'] | |
self.window = settings['window_size'] | |
def generate_training_data(self, settings, corpus): | |
# Find unique word counts using dictonary | |
word_counts = defaultdict(int) |