Alan Nichol amn41

co-founder & CTO at @RasaHQ

amn41 / nn_classify_text.py

Last active October 17, 2017 01:31

	import numpy as np

	def sum_vecs(embed,text):

	tokens = text.split(' ')
	vec = np.zeros(embed.W.shape[1])

	for idx, term in enumerate(tokens):
	if term in embed.vocab:
	vec = vec + embed.W[embed.vocab[term], :]

amn41 / intent_svm.py

Created August 28, 2016 16:31

	import numpy as np
	from sklearn.svm import SVC
	from sklearn.decomposition import PCA
	from sklearn.cross_validation import train_test_split

	from sklearn.grid_search import GridSearchCV
	from sklearn.metrics import classification_report

	import matplotlib.pyplot as plt
	import pickle

amn41 / points_on_ball.tex

Created August 25, 2016 19:26

\emph{hello}

amn41 / train_ner.py

Last active August 31, 2016 13:55

	import sys, os
	from mitie import *
	sample = ner_training_instance(["I", "am", "looking", "for", "some", "cheap", "Mexican", "food", "."])

	sample.add_entity(xrange(5,6), "pricerange")
	sample.add_entity(xrange(6,7), "cuisine")

	# And we add another training example
	sample2 = ner_training_instance(["show", "me", "indian", "restaurants", "in", "the", "centre", "."])
	sample2.add_entity(xrange(2,3), "cuisine")

amn41 / train_text_categorizer.py

Last active May 9, 2017 12:34

	import sys, os
	from mitie import *

	trainer = text_categorizer_trainer("/path/to/total_word_feature_extractor.dat")

	data = {} # same as before - omitted for brevity

	for label in training_examples.keys():
	for text in training_examples[label]["examples"]:
	tokens = tokenize(text)

amn41 / test_distance_threshold.py

Last active February 10, 2018 22:21

	vocab_file ="/path/to/vocab_file"
	vectors_file ="/path/to/vectors_file"

	embed = Embedding(vocab_file,vectors_file)

	cuisine_refs = ["mexican","chinese","french","british","american"]
	threshold = 0.2

	text = "I want to find an indian restaurant"

amn41 / pick_out_words.py

Last active December 30, 2016 02:04

	def find_similar_words(embed,text,refs,thresh):

	C = np.zeros((len(refs),embed.W.shape[1]))

	for idx, term in enumerate(refs):
	if term in embed.vocab:
	C[idx,:] = embed.W[embed.vocab[term], :]


	tokens = text.split(' ')

amn41 / embedding.py

Last active October 17, 2017 01:19

	class Embedding(object):
	def __init__(self,vocab_file,vectors_file):
	with open(vocab_file, 'r') as f:
	words = [x.rstrip().split(' ')[0] for x in f.readlines()]

	with open(vectors_file, 'r') as f:
	vectors = {}
	for line in f:
	vals = line.rstrip().split(' ')
	vectors[vals[0]] = [float(x) for x in vals[1:]]

amn41 / ensure_form_complete.py

Last active April 4, 2016 13:47

	while ( not formData.is_complete() ):
	questionKey = formData.first_missing_field()
	ask(questions[questionKey])

amn41 / toothpaste_plot.py

Created March 1, 2016 09:03

	# I like using seaborn, but of course you can also just use this as a set of colours.

	import matplotlib.pyplot as plt
	import seaborn as sns
	import numpy as np

	# from seaborn docs
	def sinplot(flip=1):
	x = np.linspace(0, 14, 100)
	for i in range(1, 7):