Skip to content

Instantly share code, notes, and snippets.

@tianhuil
tianhuil / classifier_transform.py
Created November 9, 2019 05:20
Turns classifier's `predict_proba` into a transform
from sklearn.base import BaseEstimator, TransformerMixin
class ClassifierTransform(BaseEstimator, TransformerMixin):
def __init__(self, clf):
self.clf = clf
def fit(self, X, y=None):
self.clf.fit(X, y)
return self
@tianhuil
tianhuil / Residua Estimator.py
Last active November 8, 2019 22:54
This is a Residual Regressor and Residual Classifier
from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin
class _ResidualEstimator(BaseEstimator):
def __init__(self, base, residual):
self.base = base
self.residual = residual
def fit(self, X, y):
self.base.fit(X, y)
self.residual.fit(X, y - self.base.predict(X))
@tianhuil
tianhuil / update_branches.ipy
Last active December 3, 2017 18:25
Finds branches that can be safely merged
#!ipython
# to use this run these two steps
# curl https://gist.githubusercontent.com/tianhuil/a6675835a7a0c157fbcb296a743f52d4/raw/704a20201dd4362928f6e39ab0ab0bc0784b2af9/update_branches.ipy
# ipython merged_branches.ipy
!git checkout master
branches = !git branch
other_branches = [b.lstrip() for b in branches if b != '* master']
results = {}
# cd into a new folder and observe the error in the last line.
yes | rm email.py*
echo "from email.utils import formatdate; print 'OK'" > foo.py
PYTHONPATH="" python foo.py
touch email.py
PYTHONPATH="" python foo.py
@tianhuil
tianhuil / Learning_React.md
Created December 27, 2016 03:56
Learning React
@tianhuil
tianhuil / dicewords.py
Created June 20, 2015 15:00
A script to choose high-entropy but memorial passphrases
#!/usr/bin/python
"""
Script to generate passphrases according to the vocabular in:
http://world.std.com/~reinhold/diceware.wordlist.asc
To see why this might be a better algorithm for choosing a password:
https://blog.agilebits.com/2011/06/21/toward-better-master-passwords/
Usage: ./dicewords.py n m
@tianhuil
tianhuil / multiprocessing_pickle_hack.py
Created October 10, 2014 20:02
When you encounter pickle errors in multiprocessing
# from http://stackoverflow.com/questions/1816958/cant-pickle-type-instancemethod-when-using-pythons-multiprocessing-pool-ma
from multiprocessing import Pool, cpu_count
from multiprocessing.pool import ApplyResult
# --------- see Stenven's solution above -------------
from copy_reg import pickle
from types import MethodType
def _pickle_method(method):
@tianhuil
tianhuil / install_hadoop.sh
Last active August 29, 2015 14:01
Installing Hadoop on Ubuntu
# Instaling Ubuntu
# http://www.cloudera.com/content/cloudera-content/cloudera-docs/CDH4/latest/CDH4-Quick-Start/cdh4qs_topic_3_2.html
# While, we're at it, let's install the JDK ...
echo "y" | sudo apt-get install openjdk-6-jdk
# ... and Yelp's mrjob
pip install mrjob
# Install maven
@tianhuil
tianhuil / setup_venv.sh
Created May 9, 2014 20:17
Setting up virtual environments
# if you need to install virtualenv
# pip install virtualenv
# in the folder
virtualenv venv
. venv/bin/activate
# to deactivate
# deactivate
@tianhuil
tianhuil / setup_digitalocean.sh
Last active March 16, 2021 10:04
Setup Digital Ocean Ubuntu Droplet with scientific python and mysql
# To run this command:
# curl https://gist.githubusercontent.com/tianhuil/0aa9b265f55413dc7198/raw > setup_digitalocean.sh
# . setup_digitalocean.sh
# Update sudo apt-get
sudo apt-get update
# Installing scientific Python
sudo apt-get -y install --fix-missing build-essential python-dev python-numpy python-setuptools python-scipy libatlas-dev
sudo apt-get -y install --fix-missing build-essential python-sklearn