ezietsman · August 26, 2020 20:42
diff --git a/build_deeming.sh b/build_deeming.sh
 #!/bin/bash
 f2py -c -m deeming periodogram.f90 -lgomp
 f2py -c -m deemingomp periodogram.f90 --f90flags="-fopenmp " -lgomp
diff --git a/cyperiodogram.pyx b/cyperiodogram.pyx
 #
 # Cython version of the Deeming periodogram function. 
 # See deeming.py
 #


 import numpy as np
 cimport numpy as np
 cimport cython

 cdef extern from 'math.h':
    double sin(double theta)
    double cos(double theta)


 DTYPE = np.float
 ctypedef np.float_t DTYPE_t

 @cython.cdivision(True)
 @cython.boundscheck(False)
 def periodogram(np.ndarray[DTYPE_t, ndim=1]  t not None, \
                np.ndarray[DTYPE_t, ndim=1]  m not None,\
                np.ndarray[DTYPE_t, ndim=1]  freqs not None):

    cdef DTYPE_t realpart = 0.0
    cdef DTYPE_t imagpart = 0.0
    cdef DTYPE_t pi = np.pi

    cdef int Nf = freqs.shape[0]
    cdef int Nt = t.shape[0]

    cdef np.ndarray[DTYPE_t, ndim=1]  amps = np.zeros([Nf], dtype=DTYPE)
    cdef int i, j

    for i in range(Nf):
        for j in range(Nt):
            realpart = realpart + m[j]*cos(2.0*pi*freqs[i]*t[j])
            imagpart = imagpart + m[j]*sin(2.0*pi*freqs[i]*t[j])

        amps[i] = 2.0*(realpart**2 + imagpart**2)**0.5/Nt

        realpart = 0.0
        imagpart = 0.0

    return amps

diff --git a/deeming.py b/deeming.py
 #!/usr/bin/env python

 '''
 Testing the efficiency of different implimentations of the Deeming
 periodogram. Its an O(N*N) algorithm for calculating a periodogram but
 it works for unevenly sampled data. Used frequently in astronomical
 time-series.

 cyperiodogram contains a cython implimentation

 periodogram.f90 contains two Fortran 90 implimentations
  1 : Using the same array syntax as numpy
  2 : Hand coded loops (Much faster in this case)

 The fortran subroutines use openmp and are wrapped using f2py. Look in
 build_deeming.sh for the compilation commands.

 The OpenCL version uses deeming_kernel.cl for computations on the opencl device
 (A GPU in my case)

 To run this example (in Linux):
  sh build_deeming.sh
  python deeming.py

 The cython extension will be compiled when running the deeming.py file.



 '''

 import numpy as np
 import deemingomp
 import time
 import pyximport; pyximport.install()
 import cyperiodogram as cy
 import pyopencl as cl
 import matplotlib.pyplot as plt
 import matplotlib.style

 matplotlib.style.use('ggplot')


 def timeit(method):
    '''
    From: http://www.samuelbosch.com/2012/02/timing-functions-in-python.html

    '''
    def timed(*args, **kw):
        ts = time.time()
        result = method(*args, **kw)
        te = time.time()

        # print('%r %2.2f sec' % (method.__name__, te-ts))
        return result, (te-ts)

    return timed


 # Deeming periodogram done with numpy
 @timeit
 def periodogram_numpy(t, m, freqs):
    ''' Calculate the Deeming periodogram using numpy
    '''
    pi = np.pi
    amps = np.zeros(freqs.size, dtype='float')
    cos = np.cos
    sin = np.sin
    for i, f in enumerate(freqs):
        real = (m*cos(2*pi*f*t)).sum()
        imag = (m*sin(2*pi*f*t)).sum()
        amps[i] = real**2 + imag**2

    amps = 2.0*np.sqrt(amps)/t.size
    return amps


 @timeit
 def periodogram_cython(t, m, f):
    ''' Calculate Deeming periodogram using cython: see cyperiodogram.pyx
    '''
    cyamps = cy.periodogram(t, m, f)
    return cyamps


 @timeit
 def periodogram_fortran_openmp(t, m, f, threads):
    ''' Calculate the Deeming periodogram using Fortran with OpenMP
    '''
    ampsf90omp_2 = deemingomp.periodogram2(t, m, f, t.size, f.size, threads)

    return ampsf90omp_2


 @timeit
 def periodogram_opencl(t, m, f):
    ''' Calculate the Deeming periodogram using OpenCL on the GPU
    '''

    # create a context and a job queue
    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)

    # create buffers to send to device
    mf = cl.mem_flags
    # input buffers
    times_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=t)
    mags_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=m)
    freqs_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=f)

    # output buffers
    amps_buffer = cl.Buffer(ctx, mf.WRITE_ONLY, f.nbytes)
    amps_g = np.empty_like(f)

    # read and compile the opencl kernel
    with open('deeming_kernel.cl') as kernel:
        prg = cl.Program(ctx, kernel.read()).build()
        try:
            prg.build()
        except:
            print("Error:")
            print(prg.get_build_info(ctx.devices[0],
                                     cl.program_build_info.LOG))
            raise

    # call the function and copy the values from the buffer to a numpy array
    prg.periodogram(queue, amps_g.shape, None,
                    times_g,
                    mags_g,
                    freqs_g,
                    amps_buffer,
                    np.int32(t.size))
    cl.enqueue_copy(queue, amps_g, amps_buffer)

    return amps_g


 @timeit
 def run_benchmark(N, M):
    ''' Run the Deeming benchmark using all the implementations using N data
    points and M frequencies

    '''

    pi = np.pi
    # Data
    t = np.linspace(0.0, 0.25, N)
    m = 1.75*np.sin(2*pi*150*t) + 0.75*np.sin(2*pi*277*t) + np.sin(2*pi*333*t)

    # Frequencies
    freqs = np.linspace(0.0, 1000.0, M)

    # Run the different versions

    _times = {}

    # numpy
    amps, t_numpy = periodogram_numpy(t, m, freqs)

    # cython
    cyamps, t_cython = periodogram_cython(t, m, freqs)
    assert(np.allclose(cyamps, amps))

    # Fortran with openmp
    for threads in [1, 2, 4, 8]:
        ampsf90omp_2, t_fortran = periodogram_fortran_openmp(t,
                                                             m,
                                                             freqs,
                                                             threads)
        _times['fortran {}'.format(threads)] = t_fortran
        assert(np.allclose(ampsf90omp_2, amps))

    # opencl
    opencl_amps, t_opencl = periodogram_opencl(t, m, freqs)
    assert(np.allclose(opencl_amps, amps))

    _times['numpy'] = t_numpy
    _times['cython'] = t_cython
    _times['opencl'] = t_opencl

    return _times


 def make_bar_plot(N, times_dict):
    ''' Make a bar plot using the timing dict obtained from run_benchmarks
    '''
    speedups = []
    times = []
    methods = ['numpy', 'cython', 'fortran 1', 'fortran 2', 'fortran 4',
               'fortran 8', 'opencl']

    for i, key in enumerate(methods):
        speedups.append(times_dict['numpy']/times_dict[key])
        times.append(times_dict[key])

    index = np.arange(len(methods))
    fig, ax = plt.subplots()
    rect1 = ax.bar(index, speedups)
    plt.xlabel('Method', fontsize=18)
    plt.ylabel('Speedup (higher is better)', fontsize=18)
    plt.xticks(index + 0.4, methods, fontsize=14)

    def autolabel(rects, times):
        # attach some text labels
        for rect, _time in zip(rects, times):
            height = rect.get_height()
            ax.text(rect.get_x()+rect.get_width()/2., 1.02*height, '%1.1fs'% _time,
                    ha='center', va='bottom')

    autolabel(rect1, times)
    # save to file
    plt.savefig('{}x{}-barchart.jpg'.format(N, N))


 if __name__ == "__main__":

    for N in [1000, 2000, 4000, 8000, 16000, 32000, 64000]:
        _times, _time_all = run_benchmark(N, N)
        make_bar_plot(N, _times)
        print("\n{} datapoints, {} frequencies in {}s".format(N, N, round(_time_all, 1)))
        for key in sorted(_times):
            print("{}: {} {} {}".format(key,
                                        " "*(20-len(key)),
                                        round(_times[key], 3),
                                        round(_times['numpy']/_times[key], 1)))
diff --git a/deeming_kernel.cl b/deeming_kernel.cl
 // Kernel to compute the deeming periodogram for a given frequency over a set
 // of data

 #define PYOPENCL_DEFINE_CDOUBLE
 #pragma OPENCL EXTENSION cl_khr_fp64: enable

 __kernel void periodogram(
        __global const double *times_g,
        __global const double *mags_g,
        __global const double *freqs_g,
        __global double *amps_g,
        const int datalength) {

    int gid = get_global_id(0);
    double this_frequency = freqs_g[gid];
    double realpart = 0.0;
    double imagpart = 0.0;
    double pi = 3.141592653589793;

    for (int i=0; i < datalength; i++){
        realpart = realpart + mags_g[i]*cos(2.0*pi*this_frequency*times_g[i]);
        imagpart = imagpart + mags_g[i]*sin(2.0*pi*this_frequency*times_g[i]);
    }
    amps_g[gid] = 2.0*sqrt(pow(realpart, 2) + pow(imagpart, 2))/datalength;
 }
diff --git a/periodogram.f90 b/periodogram.f90
 !
 !  See deeming.py for info. This file contains 2 Fortran implimentations
 !  of the deeming periodogram algorithm
 subroutine periodogram(time, value, freqs, nt, nf , numprocs, amps)

 ! periodogram calculates the Deeming periodogram of the datapoints contained
 ! in the arrays time, value at the frequencies contained in freqs
 ! This subroutine is meant for f2py wrapping as well as a testbed of openMP
 ! within a python environment

    use omp_lib
    implicit none


    ! sizes of time and frequency arrays
    integer :: nt, nf, id
    ! arrays
    double precision time(nt), value(nt)
    double precision freqs(nf), amps(nf)
    integer :: numprocs, f
    double precision :: pi, realpart, imagpart

    pi = 3.1415926535897931D0
    realpart = 0.0D0
    imagpart = 0.0D0

 ! Add compiler directives so f2py knows what to do

 !f2py intent(in) time
 !f2py intent(in) value

 !f2py intent(in) nt
 !f2py depend(nt) time
 !f2py depend(nt) value

 !f2py intent(in) freqs
 !f2py intent(in) nf
 !f2py depend(nf) freqs

 !f2py intent(in) numprocs
 !f2py intent(out) amps

    call OMP_SET_NUM_THREADS(numprocs)

    !$OMP PARALLEL DO &
    !$OMP DEFAULT(SHARED) PRIVATE(f,realpart, imagpart)
    do f = 1, nf
        ! calculate real and imaginary parts
        realpart = sum((value*dcos(2.0D0*pi*freqs(f)*time)))
        imagpart = sum((value*dsin(2.0D0*pi*freqs(f)*time)))

        ! calculate the amplitude
        amps(f) = 2.0D0*dsqrt(realpart**2 + imagpart**2)/nt
    end do
    !$OMP END PARALLEL DO



 end subroutine


 subroutine periodogram2(time, value, freqs, nt, nf , numprocs, amps)

 ! periodogram calculates the Deeming periodogram of the datapoints contained
 ! in the arrays time, value at the frequencies contained in freqs
 ! This subroutine is meant for f2py wrapping as well as a testbed of openMP
 ! within a python environment

    use omp_lib
    implicit none


    ! sizes of time and frequency arrays
    integer :: nt, nf, id
    ! arrays
    double precision time(nt), value(nt)
    double precision freqs(nf), amps(nf)
    integer :: numprocs, f, d
    double precision :: pi, realpart, imagpart

    pi = 3.1415926535897931D0
    realpart = 0.0D0
    imagpart = 0.0D0

 ! Add compiler directives so f2py knows what to do

 !f2py threadsafe
 !f2py intent(in) time
 !f2py intent(in) value

 !f2py intent(in) nt
 !f2py depend(nt) time
 !f2py depend(nt) value

 !f2py intent(in) freqs
 !f2py intent(in) nf
 !f2py depend(nf) freqs

 !f2py intent(in) numprocs
 !f2py intent(out) amps

    call OMP_SET_NUM_THREADS(numprocs)

    !$OMP PARALLEL DO &
    !$OMP DEFAULT(SHARED) PRIVATE(f,realpart, imagpart)
    do f = 1, nf
        ! calculate real and imaginary parts
        do d = 1, nt
            realpart = realpart + value(d)*dcos(2.0D0*pi*freqs(f)*time(d))
            imagpart = imagpart + value(d)*dsin(2.0D0*pi*freqs(f)*time(d))
        end do

        ! calculate the amplitude
        amps(f) = 2.0D0*dsqrt(realpart*realpart + imagpart*imagpart)/nt

        realpart = 0.0D0
        imagpart = 0.0D0
    end do
    !$OMP END PARALLEL DO

 end subroutine
	#!/bin/bash
	f2py -c -m deeming periodogram.f90 -lgomp
	f2py -c -m deemingomp periodogram.f90 --f90flags="-fopenmp " -lgomp
	#
	# Cython version of the Deeming periodogram function.
	# See deeming.py
	#


	import numpy as np
	cimport numpy as np
	cimport cython

	cdef extern from 'math.h':
	double sin(double theta)
	double cos(double theta)


	DTYPE = np.float
	ctypedef np.float_t DTYPE_t

	@cython.cdivision(True)
	@cython.boundscheck(False)
	def periodogram(np.ndarray[DTYPE_t, ndim=1] t not None, \
	np.ndarray[DTYPE_t, ndim=1] m not None,\
	np.ndarray[DTYPE_t, ndim=1] freqs not None):

	cdef DTYPE_t realpart = 0.0
	cdef DTYPE_t imagpart = 0.0
	cdef DTYPE_t pi = np.pi

	cdef int Nf = freqs.shape[0]
	cdef int Nt = t.shape[0]

	cdef np.ndarray[DTYPE_t, ndim=1] amps = np.zeros([Nf], dtype=DTYPE)
	cdef int i, j

	for i in range(Nf):
	for j in range(Nt):
	realpart = realpart + m[j]cos(2.0pifreqs[i]t[j])
	imagpart = imagpart + m[j]sin(2.0pifreqs[i]t[j])

	amps[i] = 2.0(realpart2 + imagpart2)*0.5/Nt

	realpart = 0.0
	imagpart = 0.0

	return amps
	#!/usr/bin/env python

	'''
	Testing the efficiency of different implimentations of the Deeming
	periodogram. Its an O(N*N) algorithm for calculating a periodogram but
	it works for unevenly sampled data. Used frequently in astronomical
	time-series.

	cyperiodogram contains a cython implimentation

	periodogram.f90 contains two Fortran 90 implimentations
	1 : Using the same array syntax as numpy
	2 : Hand coded loops (Much faster in this case)

	The fortran subroutines use openmp and are wrapped using f2py. Look in
	build_deeming.sh for the compilation commands.

	The OpenCL version uses deeming_kernel.cl for computations on the opencl device
	(A GPU in my case)

	To run this example (in Linux):
	sh build_deeming.sh
	python deeming.py

	The cython extension will be compiled when running the deeming.py file.



	'''

	import numpy as np
	import deemingomp
	import time
	import pyximport; pyximport.install()
	import cyperiodogram as cy
	import pyopencl as cl
	import matplotlib.pyplot as plt
	import matplotlib.style

	matplotlib.style.use('ggplot')


	def timeit(method):
	'''
	From: http://www.samuelbosch.com/2012/02/timing-functions-in-python.html

	'''
	def timed(args, *kw):
	ts = time.time()
	result = method(args, *kw)
	te = time.time()

	# print('%r %2.2f sec' % (method.__name__, te-ts))
	return result, (te-ts)

	return timed


	# Deeming periodogram done with numpy
	@timeit
	def periodogram_numpy(t, m, freqs):
	''' Calculate the Deeming periodogram using numpy
	'''
	pi = np.pi
	amps = np.zeros(freqs.size, dtype='float')
	cos = np.cos
	sin = np.sin
	for i, f in enumerate(freqs):
	real = (mcos(2pift)).sum()
	imag = (msin(2pift)).sum()
	amps[i] = real2 + imag2

	amps = 2.0*np.sqrt(amps)/t.size
	return amps


	@timeit
	def periodogram_cython(t, m, f):
	''' Calculate Deeming periodogram using cython: see cyperiodogram.pyx
	'''
	cyamps = cy.periodogram(t, m, f)
	return cyamps


	@timeit
	def periodogram_fortran_openmp(t, m, f, threads):
	''' Calculate the Deeming periodogram using Fortran with OpenMP
	'''
	ampsf90omp_2 = deemingomp.periodogram2(t, m, f, t.size, f.size, threads)

	return ampsf90omp_2


	@timeit
	def periodogram_opencl(t, m, f):
	''' Calculate the Deeming periodogram using OpenCL on the GPU
	'''

	# create a context and a job queue
	ctx = cl.create_some_context()
	queue = cl.CommandQueue(ctx)

	# create buffers to send to device
	mf = cl.mem_flags
	# input buffers
	times_g = cl.Buffer(ctx, mf.READ_ONLY \| mf.COPY_HOST_PTR, hostbuf=t)
	mags_g = cl.Buffer(ctx, mf.READ_ONLY \| mf.COPY_HOST_PTR, hostbuf=m)
	freqs_g = cl.Buffer(ctx, mf.READ_ONLY \| mf.COPY_HOST_PTR, hostbuf=f)

	# output buffers
	amps_buffer = cl.Buffer(ctx, mf.WRITE_ONLY, f.nbytes)
	amps_g = np.empty_like(f)

	# read and compile the opencl kernel
	with open('deeming_kernel.cl') as kernel:
	prg = cl.Program(ctx, kernel.read()).build()
	try:
	prg.build()
	except:
	print("Error:")
	print(prg.get_build_info(ctx.devices[0],
	cl.program_build_info.LOG))
	raise

	# call the function and copy the values from the buffer to a numpy array
	prg.periodogram(queue, amps_g.shape, None,
	times_g,
	mags_g,
	freqs_g,
	amps_buffer,
	np.int32(t.size))
	cl.enqueue_copy(queue, amps_g, amps_buffer)

	return amps_g


	@timeit
	def run_benchmark(N, M):
	''' Run the Deeming benchmark using all the implementations using N data
	points and M frequencies

	'''

	pi = np.pi
	# Data
	t = np.linspace(0.0, 0.25, N)
	m = 1.75np.sin(2pi150t) + 0.75np.sin(2pi277t) + np.sin(2pi333*t)

	# Frequencies
	freqs = np.linspace(0.0, 1000.0, M)

	# Run the different versions

	_times = {}

	# numpy
	amps, t_numpy = periodogram_numpy(t, m, freqs)

	# cython
	cyamps, t_cython = periodogram_cython(t, m, freqs)
	assert(np.allclose(cyamps, amps))

	# Fortran with openmp
	for threads in [1, 2, 4, 8]:
	ampsf90omp_2, t_fortran = periodogram_fortran_openmp(t,
	m,
	freqs,
	threads)
	_times['fortran {}'.format(threads)] = t_fortran
	assert(np.allclose(ampsf90omp_2, amps))

	# opencl
	opencl_amps, t_opencl = periodogram_opencl(t, m, freqs)
	assert(np.allclose(opencl_amps, amps))

	_times['numpy'] = t_numpy
	_times['cython'] = t_cython
	_times['opencl'] = t_opencl

	return _times


	def make_bar_plot(N, times_dict):
	''' Make a bar plot using the timing dict obtained from run_benchmarks
	'''
	speedups = []
	times = []
	methods = ['numpy', 'cython', 'fortran 1', 'fortran 2', 'fortran 4',
	'fortran 8', 'opencl']

	for i, key in enumerate(methods):
	speedups.append(times_dict['numpy']/times_dict[key])
	times.append(times_dict[key])

	index = np.arange(len(methods))
	fig, ax = plt.subplots()
	rect1 = ax.bar(index, speedups)
	plt.xlabel('Method', fontsize=18)
	plt.ylabel('Speedup (higher is better)', fontsize=18)
	plt.xticks(index + 0.4, methods, fontsize=14)

	def autolabel(rects, times):
	# attach some text labels
	for rect, _time in zip(rects, times):
	height = rect.get_height()
	ax.text(rect.get_x()+rect.get_width()/2., 1.02*height, '%1.1fs'% _time,
	ha='center', va='bottom')

	autolabel(rect1, times)
	# save to file
	plt.savefig('{}x{}-barchart.jpg'.format(N, N))


	if __name__ == "__main__":

	for N in [1000, 2000, 4000, 8000, 16000, 32000, 64000]:
	_times, _time_all = run_benchmark(N, N)
	make_bar_plot(N, _times)
	print("\n{} datapoints, {} frequencies in {}s".format(N, N, round(_time_all, 1)))
	for key in sorted(_times):
	print("{}: {} {} {}".format(key,
	" "*(20-len(key)),
	round(_times[key], 3),
	round(_times['numpy']/_times[key], 1)))
	// Kernel to compute the deeming periodogram for a given frequency over a set
	// of data

	#define PYOPENCL_DEFINE_CDOUBLE
	#pragma OPENCL EXTENSION cl_khr_fp64: enable

	__kernel void periodogram(
	__global const double *times_g,
	__global const double *mags_g,
	__global const double *freqs_g,
	__global double *amps_g,
	const int datalength) {

	int gid = get_global_id(0);
	double this_frequency = freqs_g[gid];
	double realpart = 0.0;
	double imagpart = 0.0;
	double pi = 3.141592653589793;

	for (int i=0; i < datalength; i++){
	realpart = realpart + mags_g[i]cos(2.0pithis_frequencytimes_g[i]);
	imagpart = imagpart + mags_g[i]sin(2.0pithis_frequencytimes_g[i]);
	}
	amps_g[gid] = 2.0*sqrt(pow(realpart, 2) + pow(imagpart, 2))/datalength;
	}
	!
	! See deeming.py for info. This file contains 2 Fortran implimentations
	! of the deeming periodogram algorithm
	subroutine periodogram(time, value, freqs, nt, nf , numprocs, amps)

	! periodogram calculates the Deeming periodogram of the datapoints contained
	! in the arrays time, value at the frequencies contained in freqs
	! This subroutine is meant for f2py wrapping as well as a testbed of openMP
	! within a python environment

	use omp_lib
	implicit none


	! sizes of time and frequency arrays
	integer :: nt, nf, id
	! arrays
	double precision time(nt), value(nt)
	double precision freqs(nf), amps(nf)
	integer :: numprocs, f
	double precision :: pi, realpart, imagpart

	pi = 3.1415926535897931D0
	realpart = 0.0D0
	imagpart = 0.0D0

	! Add compiler directives so f2py knows what to do

	!f2py intent(in) time
	!f2py intent(in) value

	!f2py intent(in) nt
	!f2py depend(nt) time
	!f2py depend(nt) value

	!f2py intent(in) freqs
	!f2py intent(in) nf
	!f2py depend(nf) freqs

	!f2py intent(in) numprocs
	!f2py intent(out) amps

	call OMP_SET_NUM_THREADS(numprocs)

	!$OMP PARALLEL DO &
	!$OMP DEFAULT(SHARED) PRIVATE(f,realpart, imagpart)
	do f = 1, nf
	! calculate real and imaginary parts
	realpart = sum((valuedcos(2.0D0pifreqs(f)time)))
	imagpart = sum((valuedsin(2.0D0pifreqs(f)time)))

	! calculate the amplitude
	amps(f) = 2.0D0dsqrt(realpart2 + imagpart*2)/nt
	end do
	!$OMP END PARALLEL DO



	end subroutine


	subroutine periodogram2(time, value, freqs, nt, nf , numprocs, amps)

	! periodogram calculates the Deeming periodogram of the datapoints contained
	! in the arrays time, value at the frequencies contained in freqs
	! This subroutine is meant for f2py wrapping as well as a testbed of openMP
	! within a python environment

	use omp_lib
	implicit none


	! sizes of time and frequency arrays
	integer :: nt, nf, id
	! arrays
	double precision time(nt), value(nt)
	double precision freqs(nf), amps(nf)
	integer :: numprocs, f, d
	double precision :: pi, realpart, imagpart

	pi = 3.1415926535897931D0
	realpart = 0.0D0
	imagpart = 0.0D0

	! Add compiler directives so f2py knows what to do

	!f2py threadsafe
	!f2py intent(in) time
	!f2py intent(in) value

	!f2py intent(in) nt
	!f2py depend(nt) time
	!f2py depend(nt) value

	!f2py intent(in) freqs
	!f2py intent(in) nf
	!f2py depend(nf) freqs

	!f2py intent(in) numprocs
	!f2py intent(out) amps

	call OMP_SET_NUM_THREADS(numprocs)

	!$OMP PARALLEL DO &
	!$OMP DEFAULT(SHARED) PRIVATE(f,realpart, imagpart)
	do f = 1, nf
	! calculate real and imaginary parts
	do d = 1, nt
	realpart = realpart + value(d)dcos(2.0D0pifreqs(f)time(d))
	imagpart = imagpart + value(d)dsin(2.0D0pifreqs(f)time(d))
	end do

	! calculate the amplitude
	amps(f) = 2.0D0dsqrt(realpartrealpart + imagpart*imagpart)/nt

	realpart = 0.0D0
	imagpart = 0.0D0
	end do
	!$OMP END PARALLEL DO

	end subroutine