Skip to content

Instantly share code, notes, and snippets.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@kmike
kmike / results.txt
Last active December 30, 2015 10:19
count
fit time: 3.4s
fit memory usage: 150.2MB
dump time: 5.3s
dump memory usage: 71.5MB
transform features: 130107
load time: 2.1s
load memory usage: 94.5MB
transform time: 1.8s
transform memory leak: 35.4MB
#!/usr/bin/env python
from __future__ import division, print_function
import os
import sys
import time
import psutil
from sklearn.externals import joblib
from sklearn import datasets
# the following imports are not needed, but if we won't import them
import marisa_trie
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
# hack to store vocabulary in MARISA Trie
class _MarisaVocabularyMixin(object):
def fit_transform(self, raw_documents, y=None):
super(_MarisaVocabularyMixin, self).fit_transform(raw_documents)
self._freeze_vocabulary()
#!/usr/bin/env python
from __future__ import division, print_function
import os
import sys
import time
import resource
import psutil
from sklearn import datasets
from sklearn.externals import joblib
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer, HashingVectorizer
# -*- coding: utf-8 -*-
"""
Utilities for measuring parse quality.
"""
import functools
from russian_tagsets.ruscorpora import from_opencorpora_int
# memoize results for 4x speedup
op2rnc = functools.lru_cache(maxsize=10000)(from_opencorpora_int)
@kmike
kmike / gist:5285124
Last active December 15, 2015 15:49
import numpy as np
from scipy.misc import logsumexp
def _forward(n_observations, n_components, log_startprob, log_transmat,
framelogprob):
fwdlattice = np.empty((n_observations, n_components))
fwdlattice[0] = log_startprob + framelogprob[0]
for t in range(1, n_observations):
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from libc.math cimport log2 # FIXME: can be unavailable in Windows
cimport cython
ctypedef double dtype_t
DEF _NINF = float('-inf')
def log_add(*values):
@kmike
kmike / gist:5259525
Created March 28, 2013 00:39
use_speedups_if_available
def use_speedups_if_available(original, module_name, func_name):
try:
mod = __import__(module_name, fromlist=[func_name])
return getattr(mod, func_name), original
except ImportError:
return original, original
# ------------- Example usage ------------
def foo():