rule of thumb for the number of dimensions above which a K-d tree is not ideal container:
rough dimension limit = log base 2 of the number of data points (n)
import numpy as NP
fnx = lambda n: NP.log2(n)
| import time | |
| import numpy as NP | |
| from redis import StrictRedis as redis | |
| # a 2D array to serialize | |
| A = 10 * NP.random.randn(10000).reshape(1000, 10) | |
| # flatten the 2D NumPy array and save it as a binary string | |
| array_dtype = str(A.dtype) |
| >>> from scipy import misc as MSC | |
| >>> import os | |
| >>> df = os.path.expanduser("~/any-image.png") | |
| >>> # now read the image in as a multi-dimensional array (matrix) | |
| >>> # 'imread' is a wrapper over a PIL method | |
| >>> mg1 = MSC.imread(df) | |
| >>> mg1.shape |
| from datetime import datetime as DT | |
| #------------------ epoch time -----------------# | |
| >>> import time | |
| >>> time.time() | |
| 1411060580.205373 |
| df = '~/path/to/some/text/file.csv' | |
| df1 = 'path/to/some/text/file.gz' | |
| import zlib | |
| import gzip | |
| with open(df, 'r', encoding='utf-8') as fh: | |
| tx = fh.read() | |
| # compress a text file |
| require(zoo) | |
| require(xts) | |
| # TSR is an R time series comprised of a datetime index and a single column of data | |
| # created like so: | |
| # mock a datetime index, by creating a data vector comprised of the previous 500 days | |
| nd = Sys.Date() | |
| st = 500 - nd | |
| idx = seq(from=st, to = nd, by='days') |
| def str2num(t): | |
| return ( int(t[0]), float(t[1]) ) | |
| def parse_line(line): | |
| ''' | |
| returns: | |
| (i) score (scalar); | |
| (ii) adjacency dict (one row in sparse 2D array) | |
| pass in: |
| #!/usr/local/bin/python2.7 | |
| # encoding: utf-8 | |
| import time | |
| from timeit import default_timer | |
| class Timer(object): | |
| def __init__(self, verbose=True): | |
| self.verbose = verbose |
| try: | |
| ... | |
| except Exception as xe: | |
| print("caught exception defined in module {}".format(xe.__class__.__module__) |
rule of thumb for the number of dimensions above which a K-d tree is not ideal container:
rough dimension limit = log base 2 of the number of data points (n)
import numpy as NP
fnx = lambda n: NP.log2(n)