Last active
July 25, 2024 01:25
-
-
Save naught101/14042d91a2d0f18a6ae4 to your computer and use it in GitHub Desktop.
Python rank benchmarking
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Benchmarking the methods at | |
http://stackoverflow.com/questions/5284646/rank-items-in-an-array-using-python-numpy | |
""" | |
import timeit | |
from scipy.stats import rankdata | |
import pandas as pd | |
import numpy as np | |
def rank_arange(array): | |
temp = array.argsort() | |
ranks = np.empty(len(array), int) | |
ranks[temp] = np.arange(len(array)) | |
return(ranks) | |
def rank_argsort(array): | |
ranks = array.argsort().argsort() | |
return(ranks) | |
ns = [5, 10, 50, 100, 500, 1000, 5000, 10000] | |
funcs = ['rank_arange', 'rank_argsort', 'rankdata'] | |
results = pd.DataFrame(np.full((len(ns), 1 + len(funcs)), np.nan)) | |
results.columns = ['n'] + funcs | |
results['n'] = ns | |
for n in ns: | |
array = np.random.rand(n) | |
for f in funcs: | |
r = timeit.timeit('%s(array)' % f, | |
setup="from __main__ import array,%s" % f, | |
number=1000) | |
results.ix[results.n == n, f] = r | |
results | |
"""Results: | |
n rank_arange rank_argsort rankdata | |
0 5 0.008286 0.002283 0.020194 | |
1 10 0.004828 0.001600 0.019356 | |
2 50 0.007560 0.002222 0.019348 | |
3 100 0.004734 0.003443 0.016481 | |
4 500 0.012260 0.022422 0.033053 | |
5 1000 0.035825 0.097287 0.062478 | |
6 5000 0.443329 0.754238 0.492829 | |
7 10000 0.951714 1.658155 1.028654 | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment