Skip to content

Instantly share code, notes, and snippets.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
import numpy as np
def sum_vecs(embed,text):
tokens = text.split(' ')
vec = np.zeros(embed.W.shape[1])
for idx, term in enumerate(tokens):
if term in embed.vocab:
vec = vec + embed.W[embed.vocab[term], :]
import numpy as np
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.cross_validation import train_test_split
from sklearn.grid_search import GridSearchCV
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
import pickle
\emph{hello}
import sys, os
from mitie import *
sample = ner_training_instance(["I", "am", "looking", "for", "some", "cheap", "Mexican", "food", "."])
sample.add_entity(xrange(5,6), "pricerange")
sample.add_entity(xrange(6,7), "cuisine")
# And we add another training example
sample2 = ner_training_instance(["show", "me", "indian", "restaurants", "in", "the", "centre", "."])
sample2.add_entity(xrange(2,3), "cuisine")
import sys, os
from mitie import *
trainer = text_categorizer_trainer("/path/to/total_word_feature_extractor.dat")
data = {} # same as before - omitted for brevity
for label in training_examples.keys():
for text in training_examples[label]["examples"]:
tokens = tokenize(text)
vocab_file ="/path/to/vocab_file"
vectors_file ="/path/to/vectors_file"
embed = Embedding(vocab_file,vectors_file)
cuisine_refs = ["mexican","chinese","french","british","american"]
threshold = 0.2
text = "I want to find an indian restaurant"
def find_similar_words(embed,text,refs,thresh):
C = np.zeros((len(refs),embed.W.shape[1]))
for idx, term in enumerate(refs):
if term in embed.vocab:
C[idx,:] = embed.W[embed.vocab[term], :]
tokens = text.split(' ')
@amn41
amn41 / embedding.py
Last active October 17, 2017 01:19
class Embedding(object):
def __init__(self,vocab_file,vectors_file):
with open(vocab_file, 'r') as f:
words = [x.rstrip().split(' ')[0] for x in f.readlines()]
with open(vectors_file, 'r') as f:
vectors = {}
for line in f:
vals = line.rstrip().split(' ')
vectors[vals[0]] = [float(x) for x in vals[1:]]
while ( not formData.is_complete() ):
questionKey = formData.first_missing_field()
ask(questions[questionKey])