evanmiltenburg · May 9, 2019 18:45
diff --git a/neural_modalities.py b/neural_modalities.py
 import csv
 import numpy as np
 from gensim.models import Word2Vec

 np.random.seed(1234)

 from keras.models import Sequential
 from keras.layers.core import Activation, Dense
 from keras.callbacks import EarlyStopping

 print("Loading the GoogleNews model...")
 # Download the GoogleNews vectors and change the path.
 googlenews = Word2Vec.load_word2vec_format('/Users/Emiel/Downloads/GoogleNews-vectors-negative300.bin.gz', binary=True)
 print("Loaded!")

 # See:
 # Lynott, D., & Connell, L. (2013).
 # Modality exclusivity norms for 400 nouns:
 # The relationship between perceptual experience and surface word form.
 # Behavior Research Methods, 45, 516-526.
 #
 # Downloaded from: http://www.lancaster.ac.uk/people/connelll/papers.html
 with open('./Lynott&Connell_NounModalityNorms_tabDelimited.txt') as f:
    reader = csv.DictReader(f, delimiter='\t')
    entries = list(reader)

 def get_data(entries, googlenews):
    "This function produces the input and output data for the neural network."
    data_points = []
    score_names = ['Auditory_mean', 'Gustatory_mean', 'Haptic_mean', 'Olfactory_mean', 'Visual_mean']
    for entry in entries:
        noun = entry['Noun']
        scores = [float(entry[score])/5 for score in score_names]
        try:
            word_vector = googlenews[noun]
        except KeyError:
            continue
        data_points.append([noun, word_vector, scores])
    nouns, inputs, outputs = zip(*data_points)
    return nouns, np.array(inputs), np.array(outputs)

 nouns, inputs, outputs = get_data(entries, googlenews)

 model = Sequential()
 model.add(Dense(100, input_dim=300))
 model.add(Activation('tanh'))
 model.add(Dense(5))
 model.add(Activation('sigmoid'))
 model.compile(loss='mean_squared_error',
              optimizer='adagrad',
              metrics=['accuracy'])

 train_X = inputs[:300]
 train_y = outputs[:300]

 val_X = inputs[300:350]
 val_y = outputs[300:350]

 test_X = inputs[350:]
 test_y = outputs[350:]

 stopper = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto')
 history = model.fit(train_X,train_y,
                    nb_epoch=500,
                    batch_size=10,
                    validation_data=(val_X,val_y),
                    callbacks=[stopper])

 score, acc = model.evaluate(test_X, test_y, batch_size=10)
 print("The accuracy of the model is: ", acc)
	import csv
	import numpy as np
	from gensim.models import Word2Vec

	np.random.seed(1234)

	from keras.models import Sequential
	from keras.layers.core import Activation, Dense
	from keras.callbacks import EarlyStopping

	print("Loading the GoogleNews model...")
	# Download the GoogleNews vectors and change the path.
	googlenews = Word2Vec.load_word2vec_format('/Users/Emiel/Downloads/GoogleNews-vectors-negative300.bin.gz', binary=True)
	print("Loaded!")

	# See:
	# Lynott, D., & Connell, L. (2013).
	# Modality exclusivity norms for 400 nouns:
	# The relationship between perceptual experience and surface word form.
	# Behavior Research Methods, 45, 516-526.
	#
	# Downloaded from: http://www.lancaster.ac.uk/people/connelll/papers.html
	with open('./Lynott&Connell_NounModalityNorms_tabDelimited.txt') as f:
	reader = csv.DictReader(f, delimiter='\t')
	entries = list(reader)

	def get_data(entries, googlenews):
	"This function produces the input and output data for the neural network."
	data_points = []
	score_names = ['Auditory_mean', 'Gustatory_mean', 'Haptic_mean', 'Olfactory_mean', 'Visual_mean']
	for entry in entries:
	noun = entry['Noun']
	scores = [float(entry[score])/5 for score in score_names]
	try:
	word_vector = googlenews[noun]
	except KeyError:
	continue
	data_points.append([noun, word_vector, scores])
	nouns, inputs, outputs = zip(*data_points)
	return nouns, np.array(inputs), np.array(outputs)

	nouns, inputs, outputs = get_data(entries, googlenews)

	model = Sequential()
	model.add(Dense(100, input_dim=300))
	model.add(Activation('tanh'))
	model.add(Dense(5))
	model.add(Activation('sigmoid'))
	model.compile(loss='mean_squared_error',
	optimizer='adagrad',
	metrics=['accuracy'])

	train_X = inputs[:300]
	train_y = outputs[:300]

	val_X = inputs[300:350]
	val_y = outputs[300:350]

	test_X = inputs[350:]
	test_y = outputs[350:]

	stopper = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto')
	history = model.fit(train_X,train_y,
	nb_epoch=500,
	batch_size=10,
	validation_data=(val_X,val_y),
	callbacks=[stopper])

	score, acc = model.evaluate(test_X, test_y, batch_size=10)
	print("The accuracy of the model is: ", acc)