rule of thumb for the number of dimensions above which a K-d tree is not ideal container:
rough dimension limit = log base 2 of the number of data points (n)
import numpy as NP
fnx = lambda n: NP.log2(n)
| import numpy as NP | |
| d = NP.random.randint(1, 10, 40).reshape(8, 5) | |
| mv = memoryview(d) | |
| def mem_addr(arr): | |
| ''' | |
| returns: location of a NumPy array in memory, or |
| ''' | |
| fizzbuzz refers to a quick test to filter applicants for programmer jobs who don't actually know how to code. | |
| (see eg, http://imranontech.com/2007/01/24/using-fizzbuzz-to-find-developers-who-grok-coding/) | |
| "pass in a sequence of integers from 1 to 100; | |
| for integers that are multiples of three: print “fizz” | |
| for integers that are multiples of five print “buzz” | |
| for integers that are multiples of five AND three print "fizzbuzz" | |
| for remaining integers, print the integer value | |
| ''' |
| >>> data = NP.arange(20) | |
| >>> data | |
| array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]) | |
| # use floor division to account for an odd number of items in the array, | |
| # ie, 20//2 and 21//2 return the *same* result | |
| >>> mp = data.shape[0] // 2 | |
| ''' |
| def fnx(arr): | |
| ''' | |
| pass in: NumPy 1d array | |
| returns: 3-tuple (idx_min_val, idx_max_val, max_diff) | |
| ''' | |
| arr = arr.reshape(-1, 1) | |
| arr_diff = arr.T - arr | |
| idx = NP.unravel_index(arr_diff.argmax(), arr_diff.shape) | |
| return (idx[0], idx[1], arr_diff[idx]) |
| <!DOCTYPE html> | |
| <head> | |
| <meta charset="utf-8"> | |
| <meta http-equiv="X-UA-Compatible" content="IE=edge"> | |
| <title>dynamic data in d3: data join & update-append-exit pattern</title> | |
| <meta name="description" content=""> | |
| <meta name="viewport" content="width=device-width, initial-scale=1"><!-- Place favicon.ico and apple-touch-icon.png in the root directory --> |
rule of thumb for the number of dimensions above which a K-d tree is not ideal container:
rough dimension limit = log base 2 of the number of data points (n)
import numpy as NP
fnx = lambda n: NP.log2(n)
| try: | |
| ... | |
| except Exception as xe: | |
| print("caught exception defined in module {}".format(xe.__class__.__module__) |
| #!/usr/local/bin/python2.7 | |
| # encoding: utf-8 | |
| import time | |
| from timeit import default_timer | |
| class Timer(object): | |
| def __init__(self, verbose=True): | |
| self.verbose = verbose |
| def str2num(t): | |
| return ( int(t[0]), float(t[1]) ) | |
| def parse_line(line): | |
| ''' | |
| returns: | |
| (i) score (scalar); | |
| (ii) adjacency dict (one row in sparse 2D array) | |
| pass in: |
| require(zoo) | |
| require(xts) | |
| # TSR is an R time series comprised of a datetime index and a single column of data | |
| # created like so: | |
| # mock a datetime index, by creating a data vector comprised of the previous 500 days | |
| nd = Sys.Date() | |
| st = 500 - nd | |
| idx = seq(from=st, to = nd, by='days') |