I wrote this four years ago, so instead use this command:
$ docker rmi $(docker images -q -f dangling=true)
import nltk | |
with open('sample.txt', 'r') as f: | |
sample = f.read() | |
sentences = nltk.sent_tokenize(sample) | |
tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences] | |
tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences] | |
chunked_sentences = nltk.batch_ne_chunk(tagged_sentences, binary=True) |
from geventwebsocket.handler import WebSocketHandler | |
from gevent.pywsgi import WSGIServer | |
from flask import Flask, request, render_template | |
app = Flask(__name__) | |
@app.route('/') | |
def index(): | |
return render_template('index.html') |
(function() { | |
// | |
// Iterates over an array of numbers and returns the sum. Example: | |
// | |
// _.sum([1, 2, 3]) => 6 | |
// | |
_.sum = function(obj) { | |
if (!$.isArray(obj) || obj.length == 0) return 0; | |
return _.reduce(obj, function(sum, n) { |
test: | |
clear | |
nosetests --with-coverage --cover-package name_utils test_name_utils.py | |
clean: | |
find -regex '.*\.pyc' -exec rm {} \; | |
find -regex '.*~' -exec rm {} \; | |
.PHONY: test clean |
import os | |
PATH = path/to/my/blueprints/directory | |
BLUEPRINT = 'the_blueprint' | |
def import_file(path, name=None): | |
""" imports a file with given name and path """ | |
# use the imp module to do actual imports | |
import imp | |
name = name or os.path.split(path)[-1].replace(".", "_") |
// works on selected text | |
var selection = window.getSelection().toString(); | |
if (selection) { | |
selection = selection.match(/[^\r\n]+/g) | |
.map(function(a) { return "* " + a; }). | |
join("\n"); | |
prompt("Here's your markdown:", selection); | |
} |
import numpy as np | |
import scipy.sparse as sp | |
import hat_trie | |
from sklearn.feature_extraction.text import CountVectorizer, _make_int_array | |
class HatTrieCountVectorizer(CountVectorizer): | |
def _count_vocab(self, raw_documents, fixed_vocab): | |
"""Create sparse feature matrix, and vocabulary where fixed_vocab=False |
docker rmi $(docker images -q -f dangling=true) |
import elasticsearch | |
from math import log | |
def tfidf_matrix(es, index, doc_type, fields, size=10, bulk=500, query=dict(match_all=[])): | |
"""Generate tfidf for `size` documents of `index`/`doc_type`. | |
All `fields` need to have the mapping "term_vector": "yes". | |
This is the consuming version (i.e. get everything at once). | |
:param es: elasticsearch client |