Last active
June 5, 2017 07:31
-
-
Save hiropppe/88b01e27a1192796a0e656de5f15e00e to your computer and use it in GitHub Desktop.
Simple cython dot product sample for nogil
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# cython: boundscheck = False | |
# cython: wraparound = False | |
# cython: cdivision = True | |
import numpy as np | |
cimport numpy as np | |
from scipy.linalg cimport cython_blas | |
from libc.stdio cimport printf | |
from libc.stdlib cimport abort, malloc, free | |
cdef double cdot(int n, double dx[], double dy[]) nogil: | |
cdef int i | |
cdef double sum = 0.0 | |
for i in range(n): | |
sum += dx[i]*dy[i] | |
return sum | |
cdef double onehot_dot(int end, int onehot[], double w[]) nogil: | |
cdef double sum = 0.0 | |
cdef int i | |
i = onehot[0] | |
if i == 0: | |
return 0.0 | |
while i < end: | |
sum += w[i-1] | |
i = onehot[i] | |
return sum | |
def onehot_dot_sample(): | |
cdef int size = 5, end = size + 1 | |
cdef int onehot[6] | |
cdef double w[5] | |
cdef i | |
for i in range(5): | |
w[i] = <double>i | |
onehot[0] = 1 | |
onehot[1] = 3 | |
onehot[3] = 5 | |
onehot[5] = end | |
printf('onehot dot %7.3f', onehot_dot(end, onehot, w)) | |
def blas_ddot_sample(): | |
cdef int n, incx, incy, i | |
cdef double *x, *y | |
cdef double res | |
cdef int len_x, len_y | |
n = 5 | |
incx = 2 | |
incy = 1 | |
len_x = 1+(n-1)*incx | |
len_y = 1+(n-1)*incy | |
x = <double *>malloc( len_x * sizeof( double ) ) | |
y = <double *>malloc( len_y * sizeof( double ) ) | |
for i in range(n): | |
x[i*incx] = 2.0; | |
y[i*incy] = 1.0; | |
printf('ddot %7.3f', cython_blas.ddot(&n, x, &incx, y, &incy)) | |
free(x) | |
free(y) | |
def blas_ddot_2d_sample(): | |
cdef int r = 5, c = 5, incx = 1, incy = 1 | |
cdef int len_x, len_y, len_r | |
cdef int ri, ci | |
cdef double *dx | |
cdef double *dy | |
cdef double dot = 0 | |
len_r = c*incx | |
len_x = r*len_r | |
len_y = c*incy | |
dx = <double *>malloc( len_x * sizeof( double ) ) | |
dy = <double *>malloc( len_y * sizeof( double ) ) | |
for ri in range(r): | |
for ci in range(c): | |
dx[ri*len_r + ci*incx] = ri*ci | |
for ci in range(c): | |
dy[ci*incx] = ci | |
for ri in range(r): | |
dot += cython_blas.ddot(&c, &dx[ri*len_r], &incx, dy, &incy) | |
printf('ddot %7.3f', dot) | |
free(dx) | |
free(dy) | |
def npdot_speed(): | |
dot = 0 | |
dx = np.ndarray((1000), dtype=np.float32) | |
dy = np.ndarray((1000), dtype=np.float32) | |
for j in range(1000): | |
dx[j] = j | |
dy[j] = 1000 - 1 - j | |
import time | |
elapsed = [] | |
for i in range(10000): | |
s = time.time() | |
dot += dx.dot(dy) | |
elapsed.append(time.time() - s) | |
print 'npdot', dot | |
print('{:.3f} us'.format(np.mean(elapsed)*1000*1000)) | |
def cdot_speed(): | |
cdef double[1000] dx | |
cdef double[1000] dy | |
cdef int i | |
cdef double dot = 0 | |
for j in range(1000): | |
dx[j] = j | |
dy[j] = 1000 - 1 - j | |
import time | |
elapsed = [] | |
for i in range(10000): | |
s = time.time() | |
dot += cdot(1000, dx, dy) | |
elapsed.append(time.time() - s) | |
print 'cdot', dot | |
print('{:.3f} us'.format(np.mean(elapsed)*1000*1000)) | |
def ddot_speed(): | |
cdef int n = 1000, incx = 1, incy = 1 | |
cdef int i | |
cdef double *dx | |
cdef double *dy | |
cdef double dot = 0 | |
len_x = 1+(n-1)*incx | |
len_y = 1+(n-1)*incy | |
dx = <double *>malloc( len_x * sizeof( double ) ) | |
dy = <double *>malloc( len_y * sizeof( double ) ) | |
for i in range(1000): | |
dx[i*incx] = i | |
dy[i*incy] = 1000 - 1 - i | |
import time | |
elapsed = [] | |
for i in range(10000): | |
s = time.time() | |
dot += cython_blas.ddot(&n, dx, &incx, dy, &incy) | |
elapsed.append(time.time() - s) | |
print 'ddot', dot | |
print('{:.3f} us'.format(np.mean(elapsed)*1000*1000)) | |
free(dx) | |
free(dy) | |
def onehot_dot_speed(): | |
cdef int size = 100000, end = size + 1 | |
cdef int onehot[100001] | |
cdef double w[100000] | |
cdef int i | |
cdef double dot = 0 | |
for i in range(size): | |
w[i] = np.random.randn() | |
onehot[0] = 1 | |
onehot[1] = 2 | |
onehot[2] = 4 | |
onehot[4] = 8200 | |
onehot[8200] = 50000 | |
onehot[50000] = end | |
import time | |
elapsed = [] | |
for i in range(10000): | |
s = time.time() | |
onehot_dot(end, onehot, w) | |
elapsed.append(time.time() - s) | |
print('{:.3f} us'.format(np.mean(elapsed)*1000*1000)) | |
def ddot_2d_speed(): | |
cdef int r = 12, c = 1000, incx = 1, incy = 1 | |
cdef int len_x, len_y, len_r | |
cdef int i, ri, ci | |
cdef double *dx | |
cdef double *dy | |
cdef double dot = 0 | |
len_r = c*incx | |
len_x = r*len_r | |
len_y = c*incy | |
dx = <double *>malloc( len_x * sizeof( double ) ) | |
dy = <double *>malloc( len_y * sizeof( double ) ) | |
for ri in range(r): | |
for ci in range(c): | |
dx[ri*len_r + ci*incx] = ri*ci | |
for ci in range(c): | |
dy[ci*incx] = ci | |
import time | |
elapsed = [] | |
for i in range(10000): | |
s = time.time() | |
for ri in range(r): | |
dot += cython_blas.ddot(&c, &dx[ri*len_r], &incx, dy, &incy) | |
elapsed.append(time.time() - s) | |
print 'ddot', dot | |
print('{:.3f} us'.format(np.mean(elapsed)*1000*1000)) | |
free(dx) | |
free(dy) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment