Joel Nothman jnothman

Software engineer specialising in NLP and applied data science

jnothman / cached_transform_mixin.py

Last active August 17, 2017 01:15

Using a mixin to cache a transform method call in scikit-learn

	from sklearn.feature_extraction.text import CountVectorizer
	from joblib import Memory
	from sklearn.base import clone
	from sklearn.datasets import fetch_20newsgroups


	class CachedTransformMixin:
	memory = Memory('/tmp/cache')

	def transform(self, args, *kwargs):

jnothman / auspoliticians-wikidata.rq

Created July 6, 2017 03:26

Australian politicians'/parliamentarians' history from WikiData

jnothman / bench_semi_supervised_n_iter

Created July 5, 2017 08:24

Benchmarking `sklearn.semi_supervised` `n_iter_` as a function of model and data characteristics

	import numpy as np
	from sklearn import datasets
	from sklearn.semi_supervised import LabelPropagation, LabelSpreading
	###for n_samples in [20, 200, 2000, 20000]:
	### X, y = datasets.make_classification(n_samples=n_samples, n_classes=3, n_informative=3)
	for (X, y) in [datasets.load_iris(return_X_y=True)]:
	for model in [LabelPropagation(max_iter=1000),
	#LabelSpreading(alpha=0.01),
	#LabelSpreading(alpha=0.1),
	#LabelSpreading(alpha=0.3)

jnothman / doc2dash-userguide.py

Last active May 24, 2017 01:52

	from doc2dash.parsers.intersphinx import (InterSphinxParser,
	inv_entry_to_path,
	ParserEntry)
	import doc2dash.parsers

	class InterSphinxWithUserGuide(InterSphinxParser):
	def convert_type(self, inv_type):
	if inv_type == 'std:doc': # sphinx type
	return 'Guide' # Dash type
	return super(InterSphinxWithUserGuide, self).convert_type(inv_type)

jnothman / meta.yaml

Created April 28, 2017 02:34

Conda recipe for cssdecl produced by conda skeleton pypi cssdecl

	package:
	name: cssdecl
	version: "0.1.1"

	source:
	fn: cssdecl-0.1.1.tar.gz
	url: https://pypi.python.org/packages/c8/6a/5620e9f501f2332fe11fa3fc227a73458dc0d0ac43fce81d622906708789/cssdecl-0.1.1.tar.gz
	md5: b6f421becf8f14843de7bf821ccd80c2
	# patches:
	# List any patch files here

jnothman / onto-screen.scpt

Last active December 3, 2024 11:46

Apple script to get windows back on screen

	#!/usr/bin/osascript

	on run argv
	set l to 0
	set t to 0
	repeat with j from 1 to (count argv)
	set a to item j of argv
	tell application a
	repeat with x from 1 to (count windows)
	set b to bounds of window x

jnothman / styler_excel_test.py

Last active April 8, 2017 11:12

Demonstration of https://github.com/pandas-dev/pandas/pull/15530


	# coding: utf-8

	import pandas as pd
	import numpy as np


	df = pd.DataFrame(np.random.rand(5, 10))

jnothman / count_ngrams.py

Created March 22, 2017 23:48

	from __future__ import print_function
	from collections import Counter

	import nltk


	def count_ngrams(tokens, min_unigram_freq=2, min_ngram_freq=5, max_n=5):
	n_tokens = len(tokens)
	print('Number of tokens:', n_tokens)
	unigram_freqs = Counter(tokens)

jnothman / bibtex.py

Created March 22, 2017 11:33

example/test for scikit-learn#7602

	"""
	============================
	Classifier Chain
	============================
	An ensemble of 10 logistic regression classifier chains trained on a
	multi-label dataset achieves a higher Jaccard similarity score than a set
	of independently trained logistic regression models.

	"""

jnothman / resample.py

Last active February 22, 2017 23:53

Scikit-learn resampling as CV wrapper

	import numpy as np

	class Resample(object):
	def __init__(self, cv, method='under'):
	self.cv = cv
	self.method = method

	def split(self, X, y, **kwargs):
	for train_idx, test_idx in self.cv.split(X, y, **kwargs):
	counts = np.bincount(y[train_idx]) # assumes y are from {0, 1..., n_classes-1}