Joel Nothman jnothman

Software engineer specialising in NLP and applied data science

jnothman / averaging.py

Created July 22, 2014 02:28

Illustration of P/R/F1 averaging methods

	from __future__ import print_function
	import numpy as np
	from sklearn.metrics import precision_recall_fscore_support as prfs, confusion_matrix
	from sklearn.preprocessing import label_binarize
	true = [0, 0, 0, 1, 1, 2]
	preds = [('under-generate 1', [0, 0, 0, 0, 1, 2]),
	('under-generate 2', [0, 0, 0, 1, 1, 0]),
	('over-generate 1', [0, 1, 1, 1, 1, 2]),
	('confuse 1 and 2', [0, 0, 0, 1, 2, 1])]

jnothman / list-json-paths.py

Created September 5, 2014 05:16

Extract and list json paths

	#!/usr/bin/env python
	"""
	Faced with a collection of JSON blobs, this script lists what
	paths (i.e. sequences of nested keys) exist in the data from
	root to leaf.

	For example:
	$ echo '[{"a": {"a1": 124}, "b": 111}, {"a": {"a2": 111}, "c": null}]' \
	\| list-json-paths.py
	will output:

jnothman / sklearn_param_trans.py

Created November 17, 2014 09:36

Allow nested scikit-learn params to be renamed, or multiple parameters tied to hold the same value

	from abc import ABCMeta, abstractmethod

	from .base import BaseEstimator
	from .externals.six import iteritems, with_metaclass


	class BaseParameterTranslator(with_metaclass(ABCMeta, BaseEstimator)):

	@property
	def fit(self):

jnothman / resamplers.py

Created November 27, 2014 13:31

examples of resamplers for scikit-learn

	from __future__ import print_function, division
	import numpy as np

	from sklearn.base import BaseEstimator
	from sklearn.cluster import MiniBatchKMeans, SpectralClustering
	from sklearn.neighbors import KNeighborsClassifier
	from sklearn.utils.random import sample_without_replacement
	from sklearn.svm import OneClassSVM
	from sklearn.linear_model import LogisticRegression
	from sklearn import datasets

jnothman / count

Created February 5, 2015 07:07

count or sum unix command

	#!/usr/bin/env python
	"""Count or sum, while uniquing rows, without full sort of data

	By using --key-fields, can also show example row that has some particular fields.

	(This was much simpler when it just counted!)
	"""

	import sys
	import argparse

jnothman / modelbycluster.py

Created October 19, 2015 04:45

Generic scikit-learn estimator to cluster data and build predictive models for each cluster.

	from sklearn.base import BaseEstimator, Clone
	from sklearn.utils import safe_mask
	class ModelByCluster(BaseEstimator):
	def __init__(self, clusterer, estimator):
	self.clusterer = clusterer
	self.estimator = estimator
	def fit(self, X, y):
	self.clusterer_ = clone(self.clusterer)
	clusters = self.clusterer_.fit_predict(X)
	n_clusters = len(np.unique(clusters))

jnothman / cppclass_playing.pyx

Created August 2, 2016 00:03

	cdef cppclass myclass[T]:
	T x

	object foo(self):
	return self.x


	from cython cimport floating

	def get(floating x):

jnothman / clean.sh

Created September 29, 2016 14:29

github issues to spreadsheet

jnothman / view-circle-changed.js

Last active January 25, 2017 03:19

Bookmarklet: view scikit-learn changed documentation on CircleCI from PR page

	javascript:
	var status_lists = document.getElementsByClassName('merge-status-list');
	var ci_links = status_lists[status_lists.length - 1].getElementsByClassName('status-actions');
	for (var i in ci_links) {
	var ci_link = ci_links[i];
	if (ci_link.tagName.toUpperCase() != 'A') {
	ci_link = ci_link.getElementsByTagName('a')[0];
	}
	var url = ci_link.getAttribute('href');
	var match = /circleci.com\/.*?([0-9]+)\?/.exec(url);

jnothman / flexible-concat.py

Last active January 9, 2017 03:51

	import numpy as np
	from scipy import sparse


	def flexible_concatenate(it, final_len=None):
	"""Concatenate the elements of an iterable

	Supports generators of arrays, lists, sparse matrices or tuples thereof

	>>> import numpy as np