Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
count | |
fit time: 3.4s | |
fit memory usage: 150.2MB | |
dump time: 5.3s | |
dump memory usage: 71.5MB | |
transform features: 130107 | |
load time: 2.1s | |
load memory usage: 94.5MB | |
transform time: 1.8s | |
transform memory leak: 35.4MB |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from __future__ import division, print_function | |
import os | |
import sys | |
import time | |
import psutil | |
from sklearn.externals import joblib | |
from sklearn import datasets | |
# the following imports are not needed, but if we won't import them |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import marisa_trie | |
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer | |
# hack to store vocabulary in MARISA Trie | |
class _MarisaVocabularyMixin(object): | |
def fit_transform(self, raw_documents, y=None): | |
super(_MarisaVocabularyMixin, self).fit_transform(raw_documents) | |
self._freeze_vocabulary() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from __future__ import division, print_function | |
import os | |
import sys | |
import time | |
import resource | |
import psutil | |
from sklearn import datasets | |
from sklearn.externals import joblib | |
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer, HashingVectorizer |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Utilities for measuring parse quality. | |
""" | |
import functools | |
from russian_tagsets.ruscorpora import from_opencorpora_int | |
# memoize results for 4x speedup | |
op2rnc = functools.lru_cache(maxsize=10000)(from_opencorpora_int) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from scipy.misc import logsumexp | |
def _forward(n_observations, n_components, log_startprob, log_transmat, | |
framelogprob): | |
fwdlattice = np.empty((n_observations, n_components)) | |
fwdlattice[0] = log_startprob + framelogprob[0] | |
for t in range(1, n_observations): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
from __future__ import absolute_import | |
from libc.math cimport log2 # FIXME: can be unavailable in Windows | |
cimport cython | |
ctypedef double dtype_t | |
DEF _NINF = float('-inf') | |
def log_add(*values): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def use_speedups_if_available(original, module_name, func_name): | |
try: | |
mod = __import__(module_name, fromlist=[func_name]) | |
return getattr(mod, func_name), original | |
except ImportError: | |
return original, original | |
# ------------- Example usage ------------ | |
def foo(): |