Last active
August 26, 2020 20:42
-
-
Save ezietsman/226473 to your computer and use it in GitHub Desktop.
Python, Cython, Fortran f2py and OpenCL versions of a Deeming periodogram
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
f2py -c -m deeming periodogram.f90 -lgomp | |
f2py -c -m deemingomp periodogram.f90 --f90flags="-fopenmp " -lgomp |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# Cython version of the Deeming periodogram function. | |
# See deeming.py | |
# | |
import numpy as np | |
cimport numpy as np | |
cimport cython | |
cdef extern from 'math.h': | |
double sin(double theta) | |
double cos(double theta) | |
DTYPE = np.float | |
ctypedef np.float_t DTYPE_t | |
@cython.cdivision(True) | |
@cython.boundscheck(False) | |
def periodogram(np.ndarray[DTYPE_t, ndim=1] t not None, \ | |
np.ndarray[DTYPE_t, ndim=1] m not None,\ | |
np.ndarray[DTYPE_t, ndim=1] freqs not None): | |
cdef DTYPE_t realpart = 0.0 | |
cdef DTYPE_t imagpart = 0.0 | |
cdef DTYPE_t pi = np.pi | |
cdef int Nf = freqs.shape[0] | |
cdef int Nt = t.shape[0] | |
cdef np.ndarray[DTYPE_t, ndim=1] amps = np.zeros([Nf], dtype=DTYPE) | |
cdef int i, j | |
for i in range(Nf): | |
for j in range(Nt): | |
realpart = realpart + m[j]*cos(2.0*pi*freqs[i]*t[j]) | |
imagpart = imagpart + m[j]*sin(2.0*pi*freqs[i]*t[j]) | |
amps[i] = 2.0*(realpart**2 + imagpart**2)**0.5/Nt | |
realpart = 0.0 | |
imagpart = 0.0 | |
return amps | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
''' | |
Testing the efficiency of different implimentations of the Deeming | |
periodogram. Its an O(N*N) algorithm for calculating a periodogram but | |
it works for unevenly sampled data. Used frequently in astronomical | |
time-series. | |
cyperiodogram contains a cython implimentation | |
periodogram.f90 contains two Fortran 90 implimentations | |
1 : Using the same array syntax as numpy | |
2 : Hand coded loops (Much faster in this case) | |
The fortran subroutines use openmp and are wrapped using f2py. Look in | |
build_deeming.sh for the compilation commands. | |
The OpenCL version uses deeming_kernel.cl for computations on the opencl device | |
(A GPU in my case) | |
To run this example (in Linux): | |
sh build_deeming.sh | |
python deeming.py | |
The cython extension will be compiled when running the deeming.py file. | |
''' | |
import numpy as np | |
import deemingomp | |
import time | |
import pyximport; pyximport.install() | |
import cyperiodogram as cy | |
import pyopencl as cl | |
import matplotlib.pyplot as plt | |
import matplotlib.style | |
matplotlib.style.use('ggplot') | |
def timeit(method): | |
''' | |
From: http://www.samuelbosch.com/2012/02/timing-functions-in-python.html | |
''' | |
def timed(*args, **kw): | |
ts = time.time() | |
result = method(*args, **kw) | |
te = time.time() | |
# print('%r %2.2f sec' % (method.__name__, te-ts)) | |
return result, (te-ts) | |
return timed | |
# Deeming periodogram done with numpy | |
@timeit | |
def periodogram_numpy(t, m, freqs): | |
''' Calculate the Deeming periodogram using numpy | |
''' | |
pi = np.pi | |
amps = np.zeros(freqs.size, dtype='float') | |
cos = np.cos | |
sin = np.sin | |
for i, f in enumerate(freqs): | |
real = (m*cos(2*pi*f*t)).sum() | |
imag = (m*sin(2*pi*f*t)).sum() | |
amps[i] = real**2 + imag**2 | |
amps = 2.0*np.sqrt(amps)/t.size | |
return amps | |
@timeit | |
def periodogram_cython(t, m, f): | |
''' Calculate Deeming periodogram using cython: see cyperiodogram.pyx | |
''' | |
cyamps = cy.periodogram(t, m, f) | |
return cyamps | |
@timeit | |
def periodogram_fortran_openmp(t, m, f, threads): | |
''' Calculate the Deeming periodogram using Fortran with OpenMP | |
''' | |
ampsf90omp_2 = deemingomp.periodogram2(t, m, f, t.size, f.size, threads) | |
return ampsf90omp_2 | |
@timeit | |
def periodogram_opencl(t, m, f): | |
''' Calculate the Deeming periodogram using OpenCL on the GPU | |
''' | |
# create a context and a job queue | |
ctx = cl.create_some_context() | |
queue = cl.CommandQueue(ctx) | |
# create buffers to send to device | |
mf = cl.mem_flags | |
# input buffers | |
times_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=t) | |
mags_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=m) | |
freqs_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=f) | |
# output buffers | |
amps_buffer = cl.Buffer(ctx, mf.WRITE_ONLY, f.nbytes) | |
amps_g = np.empty_like(f) | |
# read and compile the opencl kernel | |
with open('deeming_kernel.cl') as kernel: | |
prg = cl.Program(ctx, kernel.read()).build() | |
try: | |
prg.build() | |
except: | |
print("Error:") | |
print(prg.get_build_info(ctx.devices[0], | |
cl.program_build_info.LOG)) | |
raise | |
# call the function and copy the values from the buffer to a numpy array | |
prg.periodogram(queue, amps_g.shape, None, | |
times_g, | |
mags_g, | |
freqs_g, | |
amps_buffer, | |
np.int32(t.size)) | |
cl.enqueue_copy(queue, amps_g, amps_buffer) | |
return amps_g | |
@timeit | |
def run_benchmark(N, M): | |
''' Run the Deeming benchmark using all the implementations using N data | |
points and M frequencies | |
''' | |
pi = np.pi | |
# Data | |
t = np.linspace(0.0, 0.25, N) | |
m = 1.75*np.sin(2*pi*150*t) + 0.75*np.sin(2*pi*277*t) + np.sin(2*pi*333*t) | |
# Frequencies | |
freqs = np.linspace(0.0, 1000.0, M) | |
# Run the different versions | |
_times = {} | |
# numpy | |
amps, t_numpy = periodogram_numpy(t, m, freqs) | |
# cython | |
cyamps, t_cython = periodogram_cython(t, m, freqs) | |
assert(np.allclose(cyamps, amps)) | |
# Fortran with openmp | |
for threads in [1, 2, 4, 8]: | |
ampsf90omp_2, t_fortran = periodogram_fortran_openmp(t, | |
m, | |
freqs, | |
threads) | |
_times['fortran {}'.format(threads)] = t_fortran | |
assert(np.allclose(ampsf90omp_2, amps)) | |
# opencl | |
opencl_amps, t_opencl = periodogram_opencl(t, m, freqs) | |
assert(np.allclose(opencl_amps, amps)) | |
_times['numpy'] = t_numpy | |
_times['cython'] = t_cython | |
_times['opencl'] = t_opencl | |
return _times | |
def make_bar_plot(N, times_dict): | |
''' Make a bar plot using the timing dict obtained from run_benchmarks | |
''' | |
speedups = [] | |
times = [] | |
methods = ['numpy', 'cython', 'fortran 1', 'fortran 2', 'fortran 4', | |
'fortran 8', 'opencl'] | |
for i, key in enumerate(methods): | |
speedups.append(times_dict['numpy']/times_dict[key]) | |
times.append(times_dict[key]) | |
index = np.arange(len(methods)) | |
fig, ax = plt.subplots() | |
rect1 = ax.bar(index, speedups) | |
plt.xlabel('Method', fontsize=18) | |
plt.ylabel('Speedup (higher is better)', fontsize=18) | |
plt.xticks(index + 0.4, methods, fontsize=14) | |
def autolabel(rects, times): | |
# attach some text labels | |
for rect, _time in zip(rects, times): | |
height = rect.get_height() | |
ax.text(rect.get_x()+rect.get_width()/2., 1.02*height, '%1.1fs'% _time, | |
ha='center', va='bottom') | |
autolabel(rect1, times) | |
# save to file | |
plt.savefig('{}x{}-barchart.jpg'.format(N, N)) | |
if __name__ == "__main__": | |
for N in [1000, 2000, 4000, 8000, 16000, 32000, 64000]: | |
_times, _time_all = run_benchmark(N, N) | |
make_bar_plot(N, _times) | |
print("\n{} datapoints, {} frequencies in {}s".format(N, N, round(_time_all, 1))) | |
for key in sorted(_times): | |
print("{}: {} {} {}".format(key, | |
" "*(20-len(key)), | |
round(_times[key], 3), | |
round(_times['numpy']/_times[key], 1))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Kernel to compute the deeming periodogram for a given frequency over a set | |
// of data | |
#define PYOPENCL_DEFINE_CDOUBLE | |
#pragma OPENCL EXTENSION cl_khr_fp64: enable | |
__kernel void periodogram( | |
__global const double *times_g, | |
__global const double *mags_g, | |
__global const double *freqs_g, | |
__global double *amps_g, | |
const int datalength) { | |
int gid = get_global_id(0); | |
double this_frequency = freqs_g[gid]; | |
double realpart = 0.0; | |
double imagpart = 0.0; | |
double pi = 3.141592653589793; | |
for (int i=0; i < datalength; i++){ | |
realpart = realpart + mags_g[i]*cos(2.0*pi*this_frequency*times_g[i]); | |
imagpart = imagpart + mags_g[i]*sin(2.0*pi*this_frequency*times_g[i]); | |
} | |
amps_g[gid] = 2.0*sqrt(pow(realpart, 2) + pow(imagpart, 2))/datalength; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
! | |
! See deeming.py for info. This file contains 2 Fortran implimentations | |
! of the deeming periodogram algorithm | |
subroutine periodogram(time, value, freqs, nt, nf , numprocs, amps) | |
! periodogram calculates the Deeming periodogram of the datapoints contained | |
! in the arrays time, value at the frequencies contained in freqs | |
! This subroutine is meant for f2py wrapping as well as a testbed of openMP | |
! within a python environment | |
use omp_lib | |
implicit none | |
! sizes of time and frequency arrays | |
integer :: nt, nf, id | |
! arrays | |
double precision time(nt), value(nt) | |
double precision freqs(nf), amps(nf) | |
integer :: numprocs, f | |
double precision :: pi, realpart, imagpart | |
pi = 3.1415926535897931D0 | |
realpart = 0.0D0 | |
imagpart = 0.0D0 | |
! Add compiler directives so f2py knows what to do | |
!f2py intent(in) time | |
!f2py intent(in) value | |
!f2py intent(in) nt | |
!f2py depend(nt) time | |
!f2py depend(nt) value | |
!f2py intent(in) freqs | |
!f2py intent(in) nf | |
!f2py depend(nf) freqs | |
!f2py intent(in) numprocs | |
!f2py intent(out) amps | |
call OMP_SET_NUM_THREADS(numprocs) | |
!$OMP PARALLEL DO & | |
!$OMP DEFAULT(SHARED) PRIVATE(f,realpart, imagpart) | |
do f = 1, nf | |
! calculate real and imaginary parts | |
realpart = sum((value*dcos(2.0D0*pi*freqs(f)*time))) | |
imagpart = sum((value*dsin(2.0D0*pi*freqs(f)*time))) | |
! calculate the amplitude | |
amps(f) = 2.0D0*dsqrt(realpart**2 + imagpart**2)/nt | |
end do | |
!$OMP END PARALLEL DO | |
end subroutine | |
subroutine periodogram2(time, value, freqs, nt, nf , numprocs, amps) | |
! periodogram calculates the Deeming periodogram of the datapoints contained | |
! in the arrays time, value at the frequencies contained in freqs | |
! This subroutine is meant for f2py wrapping as well as a testbed of openMP | |
! within a python environment | |
use omp_lib | |
implicit none | |
! sizes of time and frequency arrays | |
integer :: nt, nf, id | |
! arrays | |
double precision time(nt), value(nt) | |
double precision freqs(nf), amps(nf) | |
integer :: numprocs, f, d | |
double precision :: pi, realpart, imagpart | |
pi = 3.1415926535897931D0 | |
realpart = 0.0D0 | |
imagpart = 0.0D0 | |
! Add compiler directives so f2py knows what to do | |
!f2py threadsafe | |
!f2py intent(in) time | |
!f2py intent(in) value | |
!f2py intent(in) nt | |
!f2py depend(nt) time | |
!f2py depend(nt) value | |
!f2py intent(in) freqs | |
!f2py intent(in) nf | |
!f2py depend(nf) freqs | |
!f2py intent(in) numprocs | |
!f2py intent(out) amps | |
call OMP_SET_NUM_THREADS(numprocs) | |
!$OMP PARALLEL DO & | |
!$OMP DEFAULT(SHARED) PRIVATE(f,realpart, imagpart) | |
do f = 1, nf | |
! calculate real and imaginary parts | |
do d = 1, nt | |
realpart = realpart + value(d)*dcos(2.0D0*pi*freqs(f)*time(d)) | |
imagpart = imagpart + value(d)*dsin(2.0D0*pi*freqs(f)*time(d)) | |
end do | |
! calculate the amplitude | |
amps(f) = 2.0D0*dsqrt(realpart*realpart + imagpart*imagpart)/nt | |
realpart = 0.0D0 | |
imagpart = 0.0D0 | |
end do | |
!$OMP END PARALLEL DO | |
end subroutine |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment