Skip to content

Instantly share code, notes, and snippets.

View jnothman's full-sized avatar

Joel Nothman jnothman

  • Canva
  • Sydney
View GitHub Profile
from __future__ import print_function
import random
from timeit import timeit
from functools import partial
import numpy as np
from sklearn.preprocessing import LabelEncoder
N_TRIALS = 3
N_SAMPLES = 10000
MULTILABEL = True
import fractions
import numpy as np
from sklearn.base import MetaEstimatorMixin, BaseEstimator
class DiscretelyWeightedTrainer(BaseEstimator, MetaEstimatorMixin):
"""Replicates samples according to `sample_weight` to fit an estimator
@jnothman
jnothman / ipy_dkc.py
Created June 6, 2013 23:13
Dictionary key completion for IPython
from __future__ import print_function
import re
import sys
import six
dict_key_re = re.compile(r'''(?x)
( # preceding string of '.'-delimited identifiers
(?!\d)\w+
(?:\.(?!\d)\w+)*
@jnothman
jnothman / gist:5734967
Created June 8, 2013 12:08
benchmarking sklearn.metrics.precision_recall_fscore_support
import numpy as np, timeit, sklearn.metrics, sklearn.preprocessing, functools
from joblib import Memory
N_TRIALS = 50
memory = Memory('/tmp', verbose=0)
@memory.cache
def gen_mc(s, N=1000000, K=10):
return np.random.randint(K, size=N)
@jnothman
jnothman / csr_row_norms.pyx
Last active December 25, 2015 04:18
`csr_row_norms` for scikit-learn, using fused types and typed memory views.
from libc.math cimport sqrt
cimport cython
cimport numpy as np
import numpy as np
ctypedef fused my_fused_type:
cython.short
cython.int
cython.long
cython.float
@jnothman
jnothman / sklearn_multilabel_metrics_helper.py
Last active December 30, 2015 07:49
Polymorphic handling of metrics over multilabel formats in scikit-learn.
class _SparseMultiLabelHelper(object):
def __init__(self, y_true, y_pred):
self.y_true = y_true.tocsr()
self.y_pred = y_pred.tocsr()
self.shape = y_true.shape
def count_union(self, axis=None)
return self._count_nnz(self.y_true + self.y_pred, axis)
def count_intersection(self, axis=None)
@jnothman
jnothman / sklearn_stacking.py
Last active January 1, 2016 01:39
Stacking in scikit-learn, a quick attempt
from sklearn.base import BaseEstimator, TransformerMixin
class Transformer(BaseEstimator, TransformerMixin):
def __init__(self, fn):
self.fn = fn
def fit(self, X, y):
return self
def transform(self, X):
@jnothman
jnothman / slot_access.pyx
Created February 23, 2014 11:51
direct __slots__ attribute access in Cython
from cpython cimport PyObject
from libc.string cimport strcmp
cdef extern from "Python.h":
ctypedef struct PyTypeObject:
pass
cdef extern from "structmember.h":
ctypedef struct PyMemberDef:
char *name
@jnothman
jnothman / MaltGateway.java
Last active June 30, 2016 06:25
Reentrant Python wrapper to MaltParser using py4j server
import py4j.GatewayServer;
import java.io.File;
import java.net.URL;
import org.maltparser.concurrent.ConcurrentMaltParserModel;
import org.maltparser.concurrent.ConcurrentMaltParserService;
import org.maltparser.concurrent.graph.ConcurrentDependencyGraph;
import org.maltparser.concurrent.graph.ConcurrentDependencyNode;
@jnothman
jnothman / forkme.svg
Last active November 17, 2021 05:08
"Fork me on GitHub" ribbon in LaTeX/TikZ vector graphic
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.