Last active
February 1, 2017 20:16
-
-
Save lgarrison/8c7912fc1e594f67cb2c1fff29ec1c3e to your computer and use it in GitHub Desktop.
swot vs corrfunc: timing & accuracy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
swot: https://github.com/jcoupon/swot/ | |
corrfunc: https://github.com/manodeep/Corrfunc | |
To test the speed and agreement of swot vs corrfuc, we generated random | |
datasets of increasing size and measured the w(theta) autocorrelation function. | |
Swot has an approximation parameter 'OA' (opening angle) that critically impacts its runtime. | |
We tried OA=0.03 and OA='no'. However, even the conservative value of OA=0.03 | |
caused >10% errors in wtheta for this randomized configuration. Thus, it should | |
be used with caution. | |
Corrfunc makes no approximations, and is thus equivalent to OA='no'. | |
Corrfunc is at least two orders of magnitude faster than swot(OA='no'). | |
500k points takes 2 hours on swot and 10 seconds on corrfunc. | |
Corrfunc is a factor of a few faster than swot(OA=0.03) for problems | |
smaller than 5M points, at which point swot becomes faster. By 8M points, | |
swot is twice as fast. This is probably because Corrfunc has to do O(N^2) | |
comparisons, while swot can bypass this with the OA approximation. | |
This is especially true for the largest bin, which contains the most points, | |
and is also the most likely to benefit from the OA speedup | |
(since the opening angle of a distant leaf will be small). | |
It would be interesting to see if this is the correct explanation and | |
if it holds for 3D and/or periodic xi(r). | |
The results presented in this note are for a 20 core machine, using 20 threads. | |
The single threaded version shows similar advantages for Corrfunc in the small | |
problem sizes that were accessible. | |
The binning was not varied, with the bins spanning ~1e-3 and ~1e-1 times the | |
domain size. | |
The only modification to swot was to add '-O3' to the Makefile. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Configuration file for swot | |
# ---------------------------------------------------------- # | |
# Input catalogues # | |
# ---------------------------------------------------------- # | |
data1 swot_test_data.txt # input data catalogue #1 | |
cols1 1,2 # Column ids for data1 | |
#data2 swot_test_data.txt # input data catalogue #2 | |
#cols2 1,2 # Column ids for data2 | |
ran1 swot_test_random.txt # input random catalogue #1 | |
rancols1 1,2 # Column ids for ran1 | |
#ran2 swot_test_ran2.txt # input random catalogue #2 | |
#rancols2 1,2 # Column ids for ran2 | |
fits no # input files in fits format? [auto,yes,no(ascii)]. | |
# ---------------------------------------------------------- # | |
# Correlation options # | |
# ---------------------------------------------------------- # | |
corr auto # Type of correlation: | |
# [auto,cross,gglens,auto_wp,cross_wp,auto_3D,cross_3D,number] | |
est ls # Estimator [ls,nat,ham,peebles] | |
range {BMIN},{BMAX} # Correlation range. Dimension same as "proj": | |
nbins {NBINS} # Number of bins | |
nbins_pi 100 # Number of bins for pi (for wp) | |
log no # Logarithmic bins [yes,no] | |
err jackknife # Resampling method [bootstrap,jackknife,subsample] | |
# or [bootstrap2D,jackknife2D] | |
nsub 0 # Number of subvolumes for resampling (power of 2) | |
nsamples 0 # Number of samples for resampling (= nsub for jackknife) | |
seed time # random seed for bootstrap | |
OA {OA} # Open angle for approximation (value or "no") | |
calib no # Calibration factor [yes,no] (for lensing). Replace e1 by 1+m or c. | |
RR_in no # file for pre-computed RR pairs (only for clustering) | |
RR_out no # file for saving RR pairs (only for clustering) | |
# ---------------------------------------------------------- # | |
# Cosmology (for gal-gal correlations, w(R) and xi(rp,PI)) # | |
# ---------------------------------------------------------- # | |
H0 72 # Hubble parameter | |
Omega_M 0.258 # Relative matter density | |
Omega_L 0.742 # Relative energy density (Lambda) | |
deltaz 0.03 # For gg lensing: Zsource > Zlens + deltaz | |
pi_max 40 # For wp: limit for pi integration | |
# ---------------------------------------------------------- # | |
# Output options # | |
# ---------------------------------------------------------- # | |
proj theta # Axis projection [theta, como, phys] | |
# for auto, cross: default: theta (deg) | |
# for auto_[3d,wp], cross_[3d,wp], gglens: default: como (Mpc) | |
out corr.out # Output file in ascii format | |
cov no # Covariance matrix of errors [yes,no] (in "out".cov) | |
xi no # xi(rp, pi) for auto_wp [yes,no] (in "out".xi) | |
printTree no # Output tree in "out".data[1,2]_tree.[ascii,fits] | |
printTreeAndExit no # Same as above but exits after | |
printSamples no # Ouput samples in "out".samples |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
lgarrison@alan:~/swot$ ./timing_tests.py | |
*** wtheta autocorrelation test *** | |
nthreads: 20 | |
## Npts swot(OA=no) swot(OA=0.03) Corrfunc(AVX) Corrfunc(SSE42) Corrfunc(fallback) | |
10000 4.098 1.069 0.067 0.029 0.028 | |
20000 13.945 2.205 0.063 0.071 0.081 | |
50000 82.725 5.969 0.250 0.355 0.305 | |
100000 320.544 12.493 0.673 0.885 0.872 | |
200000 1271.339 25.263 2.077 2.638 2.663 | |
500000 7911.312 66.514 10.552 14.514 14.511 | |
Now, the same test repeated for larger Npts without the very slow OA=no: | |
lgarrison@alan:~/swot$ ./timing_tests.py | |
*** wtheta autocorrelation test *** | |
nthreads: 20 | |
## Npts swot(OA=0.03) Corrfunc(AVX) Corrfunc(SSE42) Corrfunc(fallback) | |
10000 1.250 0.058 0.030 0.030 | |
20000 2.342 0.058 0.061 0.060 | |
50000 5.948 0.210 0.257 0.257 | |
100000 12.556 0.646 0.884 0.844 | |
200000 26.248 2.012 2.703 2.540 | |
500000 67.472 11.142 14.567 14.439 | |
1000000 139.649 40.367 56.611 55.431 | |
2000000 285.077 156.884 222.501 218.995 | |
5000000 779.451 962.195 1367.952 1344.129 | |
Even larger problems: | |
lgarrison@alan:~/swot$ ./timing_tests.py | |
*** wtheta autocorrelation test *** | |
nthreads: 20 | |
## Npts swot(OA=0.03) Corrfunc(AVX) | |
2000000 292.563 157.972 | |
5000000 778.405 966.364 | |
8000000 1262.958 2465.033 | |
10000000 1595.865 3857.273 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
''' | |
Code to test the timing and accuracy of Corrfunc vs swot. | |
Based on @manodeep's Corrfunc timing code. | |
''' | |
from __future__ import print_function | |
import numpy as np | |
import subprocess, os | |
#from Corrfunc.mocks import DD | |
from os.path import dirname, abspath, join as pjoin | |
import Corrfunc | |
from Corrfunc.mocks.DDtheta_mocks import DDtheta_mocks | |
from Corrfunc.io import read_catalog | |
from Corrfunc.utils import convert_3d_counts_to_cf | |
import time | |
def swot_timing(data, random, theta_bins, nthreads=1, OA=0.01, **kwargs): | |
# swot only supports evenly spaced bins | |
assert (theta_bins == np.linspace(theta_bins.min(), theta_bins.max(), len(theta_bins))).all() | |
# Generate the config file from the template | |
with open('config.template', 'rb') as template_fp: | |
template = template_fp.read() | |
template = template.format(NBINS=len(theta_bins)-1, | |
OA=OA, | |
BMIN=theta_bins.min(), | |
BMAX=theta_bins.max()) | |
with open('config.autogen', 'wb') as autogen_fp: | |
autogen_fp.write(template) | |
mpi = ['mpirun', '-np', str(nthreads)] | |
exec_path = os.getenv('HOME') + '/swot/bin/swot' | |
options = ['-c', 'config.autogen'] | |
output = subprocess.check_output(mpi + [exec_path] + options, stderr=subprocess.STDOUT) | |
wtheta = np.loadtxt('corr.out') | |
return wtheta[:,1], wtheta[:,5] | |
def corrfunc_timing(data, random, theta_bins, isa='avx', nthreads=1): | |
RA = data[:, 0] | |
DEC = data[:, 1] | |
rand_RA = random[:, 0] | |
rand_DEC = random[:, 1] | |
# The following taken from http://corrfunc.readthedocs.io/en/master/modules/converting_ddtheta_mocks.html | |
# Auto pair counts in DD | |
autocorr=1 | |
DD_counts = DDtheta_mocks(autocorr, nthreads, theta_bins, RA, DEC, isa=isa, output_thetaavg=True) | |
# Cross pair counts in DR | |
autocorr=0 | |
DR_counts = DDtheta_mocks(autocorr, nthreads, theta_bins, RA, DEC, RA2=rand_RA, DEC2=rand_DEC, isa=isa) | |
# Auto pairs counts in RR | |
autocorr=1 | |
RR_counts = DDtheta_mocks(autocorr, nthreads, theta_bins, rand_RA, rand_DEC, isa=isa) | |
# All the pair counts are done, get the angular correlation function | |
wtheta = convert_3d_counts_to_cf(len(data), len(data), len(random), len(random), DD_counts, DR_counts, DR_counts, RR_counts) | |
np.savetxt('corrfunc_wtheta.out', np.array([wtheta, DD_counts['npairs'], DR_counts['npairs'], RR_counts['npairs']]).T) | |
return wtheta, DD_counts['thetaavg'] | |
corrfunc_timing_sse = lambda *args, **kwargs: corrfunc_timing(*args, isa='sse42', **kwargs) | |
corrfunc_timing_fallback = lambda *args, **kwargs: corrfunc_timing(*args, isa='fallback', **kwargs) | |
def main(): | |
functions_to_test = [] | |
function_names = [] | |
swot_OAno = lambda *args, **kwargs: swot_timing(*args, OA='no', **kwargs) | |
functions_to_test.append(swot_OAno) | |
function_names.append('swot(OA=no)') | |
OA = 0.03 | |
swot_OA = lambda *args, **kwargs: swot_timing(*args, OA=OA, **kwargs) | |
functions_to_test.append(swot_OA) | |
function_names.append('swot(OA={})'.format(OA)) | |
functions_to_test.append(corrfunc_timing) | |
function_names.append("Corrfunc(AVX)") | |
functions_to_test.append(corrfunc_timing_sse) | |
function_names.append("Corrfunc(SSE42)") | |
functions_to_test.append(corrfunc_timing_fallback) | |
function_names.append("Corrfunc(fallback)") | |
# Read the supplied galaxies on a periodic box | |
#galaxy_catalog=pjoin(dirname(abspath(Corrfunc.__file__)), "../mocks/tests/data", "Mr19_mock_northonly.rdcz.ff") | |
#RA, DEC, _ = read_catalog(galaxy_catalog) | |
RADEC_min = np.array([[100., -45.]]) | |
RADEC_max = np.array([[300., 45.]]) | |
nthreads = 20 | |
check_accuracy = False | |
rtol = 1e-4 # relative tolerance, if checking accuracy | |
npts = [1e4, 2e4, 5e4, 1e5, 2e5, 5e5, 1e6, 2e6, 5e6, 1e7, 2e7, 4e7] | |
npts = [int(i) for i in npts] | |
print('*** wtheta autocorrelation test ***') | |
print('nthreads: {}'.format(nthreads)) | |
print('check_accuracy: {}'.format(check_accuracy)) | |
print("## Npts ", end="") | |
for ifunc in function_names: | |
print(" {0:10s}".format(ifunc), end="") | |
print("") | |
for n in npts: | |
numd = n | |
numr = n | |
data = np.random.rand(numd,2)*(RADEC_max - RADEC_min) + RADEC_min | |
random = np.random.rand(numr,2)*(RADEC_max - RADEC_min) + RADEC_min | |
nbins = 16 | |
theta_bins = np.linspace(0.1, 10, nbins+1) | |
with open('binfile', 'w') as f: | |
for i in xrange(nbins): | |
f.write("{0} {1}\n".format(theta_bins[i], theta_bins[i+1])) | |
np.savetxt('swot_test_data.txt', data) | |
np.savetxt('swot_test_random.txt', random) | |
print("{0:8d}".format(n), end="") | |
for ii, func_name in enumerate(function_names): | |
t0 = time.time() | |
wtheta, thetaavg = functions_to_test[ii](data, random, theta_bins, nthreads=nthreads) | |
t1 = time.time() | |
print(" {0:7.3f} ".format(t1-t0), end="") | |
if not check_accuracy: | |
continue | |
try: | |
# For random catalogs, OA > 0 is very inaccurate and will always fail these tests | |
assert np.allclose(wtheta, ref_wtheta, rtol=rtol) | |
assert np.allclose(thetaavg, ref_thetaavg, rtol=rtol) | |
except NameError: # set the reference values | |
ref_wtheta = wtheta | |
ref_thetaavg = thetaavg | |
except AssertionError: | |
print('\nGot:') | |
print(wtheta) | |
print(thetaavg) | |
print('Expected:') | |
print(ref_wtheta) | |
print(ref_thetaavg) | |
raise | |
if check_accuracy: | |
del ref_wtheta, ref_thetaavg | |
print("") | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment