Last active
February 23, 2018 04:48
-
-
Save sklam/4979921 to your computer and use it in GitHub Desktop.
Numba-fy Literate Jenks http://macwright.org/2013/02/18/literate-jenks.html.
Original python code: https://gist.github.com/llimllib/4974446
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
from pprint import pprint as pp | |
import numpy as np | |
from numba import autojit, typeof, int32 | |
INF = float('inf') | |
@autojit | |
def jenks_matrics_init(data, n_classes, ): | |
#fill the matrices with data+1 arrays of n_classes 0s | |
n_data = len(data) | |
lower_class_limits = np.zeros((n_data + 1, n_classes + 1), dtype=data.dtype) | |
variance_combinations = np.zeros((n_data + 1, n_classes + 1), dtype=data.dtype) | |
for i in xrange(1, n_classes + 1): | |
lower_class_limits[1, i] = 1. | |
variance_combinations[1, i] = 0. | |
for j in xrange(2, len(data) + 1): | |
variance_combinations[j, i] = INF | |
return lower_class_limits, variance_combinations | |
@autojit | |
def jenks_matrices(data, n_classes, lower_class_limits, variance_combinations): | |
variance = 0.0 | |
for l in range(2, len(data) + 1): | |
sum = 0.0 | |
sum_squares = 0.0 | |
w = 0.0 | |
for m in range(1, l + 1): | |
# `III` originally | |
lower_class_limit = l - m + 1 | |
val = data[lower_class_limit - 1] | |
# here we're estimating variance for each potential classing | |
# of the data, for each potential number of classes. `w` | |
# is the number of data points considered so far. | |
w += 1 | |
# increase the current sum and sum-of-squares | |
sum += val | |
sum_squares += val * val | |
# the variance at this point in the sequence is the difference | |
# between the sum of squares and the total x 2, over the number | |
# of samples. | |
variance = sum_squares - (sum * sum) / w | |
i4 = lower_class_limit - 1 | |
if i4 != 0: | |
for j in xrange(2, n_classes + 1): | |
if variance_combinations[l, j] >= (variance + variance_combinations[i4, j - 1]): | |
lower_class_limits[l, j] = lower_class_limit | |
variance_combinations[l, j] = variance + variance_combinations[i4, j - 1] | |
lower_class_limits[l, 1] = 1. | |
variance_combinations[l, 1] = variance | |
return lower_class_limits, variance_combinations | |
def get_jenks_breaks(data, lower_class_limits, n_classes): | |
k = int(len(data) - 1) | |
kclass = np.zeros(n_classes + 1, dtype=data.dtype) | |
kclass[n_classes] = data[len(data) - 1] | |
kclass[0] = data[0] | |
for countNum in xrange(n_classes, 1, -1): | |
elt = int(lower_class_limits[k, countNum] - 2) | |
kclass[countNum - 1] = data[elt] | |
k = int(lower_class_limits[k, countNum] - 1) | |
return kclass | |
def jenks(data, n_classes): | |
if n_classes > len(data): return | |
data.sort() | |
lower_class_limits, variance_combinations = jenks_matrics_init(data, n_classes) | |
jenks_matrices(data, n_classes, lower_class_limits, variance_combinations) | |
return get_jenks_breaks(data, lower_class_limits, n_classes) | |
def main(): | |
rawdata = json.load(open('test.json')) | |
data = np.array(rawdata) | |
pp(jenks(data, 5).tolist()) | |
if __name__ == "__main__": | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
jenks2$ time python numbajenks2.py | |
[0.0028109620325267315, | |
2.0935479691252112, | |
4.205495140049607, | |
6.178148351609707, | |
8.09175917180255, | |
9.997982932254672] | |
real 0m0.865s | |
user 0m0.735s | |
sys 0m0.127s | |
jenks2$ time python jenks2.py | |
[0.0028109620325267315, | |
2.0935479691252112, | |
4.205495140049607, | |
6.178148351609707, | |
8.09175917180255, | |
9.997982932254672] | |
real 0m4.390s | |
user 0m4.373s | |
sys 0m0.016s | |
Or, | |
%timeit jenks2.main() | |
1 loops, best of 3: 4.58 s per loop | |
%timeit numbajenks2.main() | |
1 loops, best of 3: 18.3 ms per loop | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment