twolodzko’s gists

twolodzko / matplotlib_identity_line.py

Created October 7, 2019 13:11

Identity line for matplotlib

	import matplotlib.pyplot as plt

	def identity_line(ax=None, ls='--', args, *kwargs):
	# see: https://stackoverflow.com/q/22104256/3986320
	ax = ax or plt.gca()
	identity, = ax.plot([], [], ls=ls, args, *kwargs)
	def callback(axes):
	low_x, high_x = ax.get_xlim()
	low_y, high_y = ax.get_ylim()
	low = min(low_x, low_y)

twolodzko / partial_correlation.py

Last active July 12, 2019 16:35

Partial correlation

	import numpy as np

	def pcor(X, rowvar=False):
	"""
	Partial correlation

	Implemented as in pcor::pcor function in R.

	Kim, S. (2015) ppcor: An R Package for a Fast Calculation to Semi-partial Correlation Coefficients.
	Communications for Statistical Applications and Methods, 22(6), 665-674.

twolodzko / sensivity_specifity_cutoff.py

Last active March 1, 2025 16:15

Use Youden index to determine cut-off for classification

	import numpy as np
	from sklearn.metrics import roc_curve

	def sensivity_specifity_cutoff(y_true, y_score):
	'''Find data-driven cut-off for classification

	Cut-off is determied using Youden's index defined as sensitivity + specificity - 1.

	Parameters
	----------

twolodzko / clustered_train_test_split.py

Last active October 1, 2018 10:36

Split to train and test samples by clusters

	import numpy as np

	def train_test_split(*arrays, test_size, random_state, clusters):
	'''Split to train and test samples by clusters

	Parameters
	----------

	test_size : float, 0 < test_size < 1
	fraction of clusters to include in test set

twolodzko / WeightedAverage_layer.py

Created September 20, 2018 08:57

WeightedAverage merging layer for Keras

	from keras import backend as K
	from keras.layers import Average
	from keras.activations import softmax

	class WeightedAverage(Average):

	def build(self, input_shape):
	self.kernel = self.add_weight(name='kernel',
	shape=(1, len(input_shape)),
	initializer='ones',

twolodzko / lr_finder.py

Created September 12, 2018 08:16 — forked from jeremyjordan/lr_finder.py

Keras Callback for finding the optimal range of learning rates

	from keras.callbacks import Callback
	import matplotlib.pyplot as plt

	class LRFinder(Callback):

	'''
	A simple callback for finding the optimal learning rate range for your model + dataset.

	# Usage
	```python

twolodzko / one_hot_xxhash.py

Created September 7, 2018 12:19

One hot encoder using xxHash

	import xxhash
	from keras.preprocessing.text import hashing_trick

	# one_hot and hashing_trick in Keras both use by default python's hash function
	# it is unstable: https://stackoverflow.com/q/27522626/3986320
	# alternatively, you could use md5, but it's not the fastest hashing function
	# xxHash package offers a faster alternative

	xxh = lambda w: int(xxhash.xxh32(w.encode()).hexdigest(), 16)
	one_hot = lambda x, n, kwargs: hashing_trick(x, n, hash_function=xxh, kwargs)

twolodzko / data_cleaning.sh

Last active August 22, 2018 09:21

Handy command line data cleaning functions in bash / sed / awk


	# Many of the functions are adapted from:
	# https://www.polydesmida.info/cookbook/functions.html


	nohead ()
	{
	if [[ $# -eq 0 \|\| "$1" == "-h" ]] ; then
	echo "Useage: nohead file [n=1]"
	echo "Omit header (n rows) from a file."

twolodzko / TopKTokenizer-class.py

Last active August 21, 2018 07:09

Tokenize Top K Words

	from __future__ import absolute_import
	from __future__ import division
	from __future__ import print_function
	from six import iteritems

	from tqdm import tqdm
	from collections import Counter
	import re

	class TopKTokenizer(object):

twolodzko / tqdm_function_decorator.py

Last active August 16, 2018 12:55

tqdm Function decorator


	from tqdm import tqdm

	def tqdm_function_decorator(args, *kwargs):
	"""
	Decorate a function by adding a progress bar

	Parameters
	----------
	args, *kwargs

Timothy Wolodzko twolodzko