Last active
May 9, 2019 18:45
-
-
Save evanmiltenburg/d7a5522148a0d0762a448eaf477c3aab to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import numpy as np | |
from gensim.models import Word2Vec | |
np.random.seed(1234) | |
from keras.models import Sequential | |
from keras.layers.core import Activation, Dense | |
from keras.callbacks import EarlyStopping | |
print("Loading the GoogleNews model...") | |
# Download the GoogleNews vectors and change the path. | |
googlenews = Word2Vec.load_word2vec_format('/Users/Emiel/Downloads/GoogleNews-vectors-negative300.bin.gz', binary=True) | |
print("Loaded!") | |
# See: | |
# Lynott, D., & Connell, L. (2013). | |
# Modality exclusivity norms for 400 nouns: | |
# The relationship between perceptual experience and surface word form. | |
# Behavior Research Methods, 45, 516-526. | |
# | |
# Downloaded from: http://www.lancaster.ac.uk/people/connelll/papers.html | |
with open('./Lynott&Connell_NounModalityNorms_tabDelimited.txt') as f: | |
reader = csv.DictReader(f, delimiter='\t') | |
entries = list(reader) | |
def get_data(entries, googlenews): | |
"This function produces the input and output data for the neural network." | |
data_points = [] | |
score_names = ['Auditory_mean', 'Gustatory_mean', 'Haptic_mean', 'Olfactory_mean', 'Visual_mean'] | |
for entry in entries: | |
noun = entry['Noun'] | |
scores = [float(entry[score])/5 for score in score_names] | |
try: | |
word_vector = googlenews[noun] | |
except KeyError: | |
continue | |
data_points.append([noun, word_vector, scores]) | |
nouns, inputs, outputs = zip(*data_points) | |
return nouns, np.array(inputs), np.array(outputs) | |
nouns, inputs, outputs = get_data(entries, googlenews) | |
model = Sequential() | |
model.add(Dense(100, input_dim=300)) | |
model.add(Activation('tanh')) | |
model.add(Dense(5)) | |
model.add(Activation('sigmoid')) | |
model.compile(loss='mean_squared_error', | |
optimizer='adagrad', | |
metrics=['accuracy']) | |
train_X = inputs[:300] | |
train_y = outputs[:300] | |
val_X = inputs[300:350] | |
val_y = outputs[300:350] | |
test_X = inputs[350:] | |
test_y = outputs[350:] | |
stopper = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto') | |
history = model.fit(train_X,train_y, | |
nb_epoch=500, | |
batch_size=10, | |
validation_data=(val_X,val_y), | |
callbacks=[stopper]) | |
score, acc = model.evaluate(test_X, test_y, batch_size=10) | |
print("The accuracy of the model is: ", acc) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment