Skip to content

Instantly share code, notes, and snippets.

View jnothman's full-sized avatar

Joel Nothman jnothman

  • Canva
  • Sydney
View GitHub Profile
@jnothman
jnothman / averaging.py
Created July 22, 2014 02:28
Illustration of P/R/F1 averaging methods
from __future__ import print_function
import numpy as np
from sklearn.metrics import precision_recall_fscore_support as prfs, confusion_matrix
from sklearn.preprocessing import label_binarize
true = [0, 0, 0, 1, 1, 2]
preds = [('under-generate 1', [0, 0, 0, 0, 1, 2]),
('under-generate 2', [0, 0, 0, 1, 1, 0]),
('over-generate 1', [0, 1, 1, 1, 1, 2]),
('confuse 1 and 2', [0, 0, 0, 1, 2, 1])]
@jnothman
jnothman / list-json-paths.py
Created September 5, 2014 05:16
Extract and list json paths
#!/usr/bin/env python
"""
Faced with a collection of JSON blobs, this script lists what
paths (i.e. sequences of nested keys) exist in the data from
root to leaf.
For example:
$ echo '[{"a": {"a1": 124}, "b": 111}, {"a": {"a2": 111}, "c": null}]' \
| list-json-paths.py
will output:
@jnothman
jnothman / sklearn_param_trans.py
Created November 17, 2014 09:36
Allow nested scikit-learn params to be renamed, or multiple parameters tied to hold the same value
from abc import ABCMeta, abstractmethod
from .base import BaseEstimator
from .externals.six import iteritems, with_metaclass
class BaseParameterTranslator(with_metaclass(ABCMeta, BaseEstimator)):
@property
def fit(self):
@jnothman
jnothman / resamplers.py
Created November 27, 2014 13:31
examples of resamplers for scikit-learn
from __future__ import print_function, division
import numpy as np
from sklearn.base import BaseEstimator
from sklearn.cluster import MiniBatchKMeans, SpectralClustering
from sklearn.neighbors import KNeighborsClassifier
from sklearn.utils.random import sample_without_replacement
from sklearn.svm import OneClassSVM
from sklearn.linear_model import LogisticRegression
from sklearn import datasets
@jnothman
jnothman / count
Created February 5, 2015 07:07
count or sum unix command
#!/usr/bin/env python
"""Count or sum, while uniquing rows, without full sort of data
By using --key-fields, can also show example row that has some particular fields.
(This was much simpler when it just counted!)
"""
import sys
import argparse
@jnothman
jnothman / modelbycluster.py
Created October 19, 2015 04:45
Generic scikit-learn estimator to cluster data and build predictive models for each cluster.
from sklearn.base import BaseEstimator, Clone
from sklearn.utils import safe_mask
class ModelByCluster(BaseEstimator):
def __init__(self, clusterer, estimator):
self.clusterer = clusterer
self.estimator = estimator
def fit(self, X, y):
self.clusterer_ = clone(self.clusterer)
clusters = self.clusterer_.fit_predict(X)
n_clusters = len(np.unique(clusters))
cdef cppclass myclass[T]:
T x
object foo(self):
return self.x
from cython cimport floating
def get(floating x):
@jnothman
jnothman / clean.sh
Created September 29, 2016 14:29
github issues to spreadsheet
#!/bin/bash
jq -s 'add | map({user: .user.login,
milestone: .milestone.title,
labels: .labels | map(.name) | join("|"),
created_at,
closed_at,
updated_at,
state,
comments,
title,
@jnothman
jnothman / view-circle-changed.js
Last active January 25, 2017 03:19
Bookmarklet: view scikit-learn changed documentation on CircleCI from PR page
javascript:
var status_lists = document.getElementsByClassName('merge-status-list');
var ci_links = status_lists[status_lists.length - 1].getElementsByClassName('status-actions');
for (var i in ci_links) {
var ci_link = ci_links[i];
if (ci_link.tagName.toUpperCase() != 'A') {
ci_link = ci_link.getElementsByTagName('a')[0];
}
var url = ci_link.getAttribute('href');
var match = /circleci.com\/.*?([0-9]+)\?/.exec(url);
import numpy as np
from scipy import sparse
def flexible_concatenate(it, final_len=None):
"""Concatenate the elements of an iterable
Supports generators of arrays, lists, sparse matrices or tuples thereof
>>> import numpy as np