Per Vognsen pervognsen

Multi-dimensional array views for systems programmers

As C programmers, most of us think of pointer arithmetic for multi-dimensional arrays in a nested way:

The address for a 1-dimensional array is base + x. The address for a 2-dimensional array is base + x + y*x_size for row-major layout and base + y + x*y_size for column-major layout. The address for a 3-dimensional array is base + x + (y + z*y_size)*x_size for row-column-major layout. And so on.

	A = blocks(A, (w, h))
	B = blocks(B, (h, w))
	C = blocks(out, (w, w))
	for j in range(B.shape[0]):
	for k in range(B.shape[1]):
	np.copyto(column_panel, B[j, k])
	for i in range(A.shape[0]):
	np.copyto(row_panel, A[i, j])
	microkernel(C[i, k], row_panel, column_panel)

	# Version 0: Use slice views to access a matrix like a block matrix.
	def blocked_matrix_multiply0(A, B, block_size, panel_size):
	m, p, n = A.shape[0], A.shape[1], B.shape[1]
	w, h = block_size, panel_size
	C = np.zeros((m, n))
	for i in range(0, m, w):
	for j in range(0, p, h):
	row_panel = A[i:i+w, j:j+h]
	for k in range(0, n, w):
	column_panel = B[j:j+h, k:k+w]

	A = np.arange(8 * 4).reshape(8, 4)
	print(A)
	# [[ 0 1 2 3]
	# [ 4 5 6 7]
	# [ 8 9 10 11]
	# [12 13 14 15]
	# [16 17 18 19]
	# [20 21 22 23]
	# [24 25 26 27]
	# [28 29 30 31]]

	import numpy as np

	def plu_inplace(A):
	P = np.arange(len(A))
	for i in range(len(A)):
	j = i + np.argmax(np.abs(A[i:, i]))
	P[[i, j]], A[[i, j]] = P[[j, i]], A[[j, i]]
	A[i+1:, i] /= A[i, i]
	A[i+1:, i+1:] -= np.outer(A[i+1:, i], A[i, i+1:])
	return P, A, A

	# Tensor product contractions with Einstein's summation notation. Examples:
	# Matrix-matrix multiply is tensor(A, 'ij', B, 'jk')
	# Matrix-vector multiply is tensor(A, 'ij', v, 'j')
	# Matrix trace is tensor(A, 'ii')
	# Matrix transpose is tensor(A, 'ji')
	# Matrix diagonal is tensor('i', A, 'ii')
	# Inner product is tensor(v, 'i', w, 'i')
	# Outer product is tensor(v, 'i', w, 'j')
	def tensor(args, *kwargs):
	result = ''

	def cmp(x, y):
	return (x > y) - (x < y)

	nil = object()

	def sorted_zip(xs, ys, key=lambda x: x, cmp=cmp, nil=nil):
	xs = iter(xs)
	ys = iter(ys)
	x = next(xs, nil)
	y = next(ys, nil)

	class DiscreteDistribution:
	def __init__(self):
	self.total_weight = 0
	self.points = {}

	def add(self, point, weight):
	assert point not in self.points
	self.points[point] = weight
	self.total_weight += weight

	# Reverse-mode automatic differentiation

	import math

	# d(-x) = -dx
	def func_neg(x):
	return -x, [-1]

	# d(x + y) = dx + dy
	def func_add(x, y):

	from itertools import tee
	from random import randrange

	num_random_bits = 0

	def random_bit():
	global num_random_bits
	num_random_bits += 1
	return randrange(2)