Skip to content

Instantly share code, notes, and snippets.

import spacy
nlp = spacy.load('en_vectors_web_lg')
text1 = 'The medical field is moving forward rapidly.'
text2 = 'Medicine is vital to the industry.'
text3 = 'Reggie Miller is a basketball player.'
doc1 = nlp(text1)
doc2 = nlp(text2)
sentence = 'Reggie Miller grew up in Riverside before going to UCLA.'
phrase = 'Riverside'
start_index = sentence.find(phrase)
end_index = start_index + len(phrase)
sentence[start_index-10:end_index+10]
# 'rew up in Riverside before go'
a = 'a'
b = 'b'
c = 'c'
d = 'd'
def test_empty():
check([], 0)
def test_all_one():
check([a, a], 1)
"""Make initial clusters of categories to bootstrap top-level categories."""
from collections import defaultdict
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans, MiniBatchKMeans
from j_util import get_rows