Skip to content

Instantly share code, notes, and snippets.

View jnothman's full-sized avatar

Joel Nothman jnothman

  • Canva
  • Sydney
View GitHub Profile
@jnothman
jnothman / cached_transform_mixin.py
Last active August 17, 2017 01:15
Using a mixin to cache a transform method call in scikit-learn
from sklearn.feature_extraction.text import CountVectorizer
from joblib import Memory
from sklearn.base import clone
from sklearn.datasets import fetch_20newsgroups
class CachedTransformMixin:
memory = Memory('/tmp/cache')
def transform(self, *args, **kwargs):
@jnothman
jnothman / auspoliticians-wikidata.rq
Created July 6, 2017 03:26
Australian politicians'/parliamentarians' history from WikiData
SELECT
?subj
?subjLabel
?prop
?position
?positionLabel
?start
?end
?district
?districtLabel
@jnothman
jnothman / bench_semi_supervised_n_iter
Created July 5, 2017 08:24
Benchmarking `sklearn.semi_supervised` `n_iter_` as a function of model and data characteristics
import numpy as np
from sklearn import datasets
from sklearn.semi_supervised import LabelPropagation, LabelSpreading
###for n_samples in [20, 200, 2000, 20000]:
### X, y = datasets.make_classification(n_samples=n_samples, n_classes=3, n_informative=3)
for (X, y) in [datasets.load_iris(return_X_y=True)]:
for model in [LabelPropagation(max_iter=1000),
#LabelSpreading(alpha=0.01),
#LabelSpreading(alpha=0.1),
#LabelSpreading(alpha=0.3)
from doc2dash.parsers.intersphinx import (InterSphinxParser,
inv_entry_to_path,
ParserEntry)
import doc2dash.parsers
class InterSphinxWithUserGuide(InterSphinxParser):
def convert_type(self, inv_type):
if inv_type == 'std:doc': # sphinx type
return 'Guide' # Dash type
return super(InterSphinxWithUserGuide, self).convert_type(inv_type)
@jnothman
jnothman / meta.yaml
Created April 28, 2017 02:34
Conda recipe for cssdecl produced by conda skeleton pypi cssdecl
package:
name: cssdecl
version: "0.1.1"
source:
fn: cssdecl-0.1.1.tar.gz
url: https://pypi.python.org/packages/c8/6a/5620e9f501f2332fe11fa3fc227a73458dc0d0ac43fce81d622906708789/cssdecl-0.1.1.tar.gz
md5: b6f421becf8f14843de7bf821ccd80c2
# patches:
# List any patch files here
@jnothman
jnothman / onto-screen.scpt
Last active December 3, 2024 11:46
Apple script to get windows back on screen
#!/usr/bin/osascript
on run argv
set l to 0
set t to 0
repeat with j from 1 to (count argv)
set a to item j of argv
tell application a
repeat with x from 1 to (count windows)
set b to bounds of window x
# coding: utf-8
import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.rand(5, 10))
from __future__ import print_function
from collections import Counter
import nltk
def count_ngrams(tokens, min_unigram_freq=2, min_ngram_freq=5, max_n=5):
n_tokens = len(tokens)
print('Number of tokens:', n_tokens)
unigram_freqs = Counter(tokens)
@jnothman
jnothman / bibtex.py
Created March 22, 2017 11:33
example/test for scikit-learn#7602
"""
============================
Classifier Chain
============================
An ensemble of 10 logistic regression classifier chains trained on a
multi-label dataset achieves a higher Jaccard similarity score than a set
of independently trained logistic regression models.
"""
@jnothman
jnothman / resample.py
Last active February 22, 2017 23:53
Scikit-learn resampling as CV wrapper
import numpy as np
class Resample(object):
def __init__(self, cv, method='under'):
self.cv = cv
self.method = method
def split(self, X, y, **kwargs):
for train_idx, test_idx in self.cv.split(X, y, **kwargs):
counts = np.bincount(y[train_idx]) # assumes y are from {0, 1..., n_classes-1}