Skip to content

Instantly share code, notes, and snippets.

View chyikwei's full-sized avatar

Chyi-Kwei Yau chyikwei

View GitHub Profile
import nltk
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation
def print_top_words(model, feature_names, n_top_words):
for topic_idx, topic in enumerate(model.components_):
message = "Topic #%d: " % topic_idx
message += " ".join([feature_names[i] + " (" + str(round(topic[i], 2)) + ")"
for i in topic.argsort()[:-n_top_words - 1:-1]])
presto> explain (type distributed, format json) select * from example.example.numbers;
Query Plan
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
[ {
"id" : "5",
"name" : "Output",
"identifier" : "[text, value]",
"details" : "",
"children" : [ {
"id" : "63",