Created
March 6, 2012 14:07
-
-
Save cvanweelden/1986460 to your computer and use it in GitHub Desktop.
Files needed to get cpmc_release1 working on a 64-bit Intel Mac with Matlab R2011a.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CC = gcc #gcc-4.2 | |
MATLABDIR=/Applications/MATLAB_R2011a.app | |
INCLUDES=-I$(MATLABDIR)/extern/include | |
LDIRS= -L$(MATLABDIR)/bin/maci64 | |
EXE_TARGETS = segm_overlap_mex.mexa64 segm_intersection_mex.mexa64 | |
all: $(EXE_TARGETS) | |
overlap.o: overlap.c | |
$(CC) -D__MAIN__ -O3 -fPIC -c $(INCLUDES) -fopenmp -o overlap.o overlap.c | |
intersection.o: intersection.c | |
$(CC) -D__MAIN__ -O3 -fPIC -c $(INCLUDES) -fopenmp -o intersection.o intersection.c | |
segm_overlap_mex.o: segm_overlap_mex.c | |
$(CC) -O3 -c $(INCLUDES) -o segm_overlap_mex.o segm_overlap_mex.c -fPIC | |
segm_intersection_mex.o: segm_intersection_mex.c | |
$(CC) -O3 -c $(INCLUDES) -o segm_intersection_mex.o segm_intersection_mex.c -fPIC | |
segm_overlap_mex.mexa64: segm_overlap_mex.o overlap.o | |
$(CC) segm_overlap_mex.o $(LDIRS) -lmx -lmex -fopenmp -shared -o segm_overlap_mex.mexmaci64 overlap.o | |
segm_intersection_mex.mexa64: segm_intersection_mex.o intersection.o | |
$(CC) segm_intersection_mex.o $(LDIRS) -lmx -lmex -fopenmp -shared -o segm_intersection_mex.mexmaci64 intersection.o | |
clean: | |
rm -f *.o $(EXE_TARGETS) $(LIB_TARGETS) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
% | |
% 32bits and 64bits mex files and Makefiles for chi2_mex are provided | |
% the same for segm_overlap_mex.c | |
% You need to set the right path to matlab in the Makefiles, in order to | |
% recompile them. | |
% | |
% These files have makefiles because they use multiple cores with omp | |
% | |
% for the other files | |
mex -O code/cartprod_mex.c -o code/cartprod_mex | |
cd ./code/ | |
!make | |
cd .. | |
mex -O code/int_hist.c -o code/int_hist | |
mex -O code/intens_pixel_diff_mex.c -o code/intens_pixel_diff_mex | |
cd ./external_code/paraFmex/ | |
make_pseudo() | |
cd ../.. | |
% these two files contributed by andreas mueller | |
mex -cxx -I/usr/local/include -O external_code/my_phog_desc_mex.cpp -o external_code/my_phog_desc_mex % requires boost development files | |
mex -O external_code/overlap_care.c -o external_code/overlap_care |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function cpmc_example() | |
addpath('./code/'); | |
addpath('./external_code/'); | |
addpath('./external_code/paraFmex/'); | |
addpath('./external_code/imrender/vgg/'); | |
addpath('./external_code/immerge/'); | |
addpath('./external_code/color_sift/'); | |
addpath('./external_code/vlfeats/toolbox/'); | |
vl_setup; | |
addpath('./external_code/globalPb/lib/'); | |
addpath('./external_code/mpi-chi2-v1_5/'); | |
% create multiple threads (set how many you have) | |
N_THREADS = 2; | |
if(matlabpool('size')~=N_THREADS) | |
matlabpool('open', N_THREADS); | |
end | |
exp_dir = './data/'; | |
%img_name = '2010_000238'; % airplane and people | |
img_name = '2007_009084'; % dogs, motorbike, chairs, people | |
%img_name = '2010_002868'; % buses | |
%img_name = '2010_003781'; % cat, bottle, potted plants | |
[masks, scores] = cpmc(exp_dir, img_name); | |
I = imread([exp_dir '/JPEGImages/' img_name '.jpg']); | |
% visualization and ground truth score for whole pool | |
fprintf(['Best segments from initial pool of ' int2str(size(masks,3))]); | |
Q = SvmSegm_segment_quality(img_name, exp_dir, masks, 'overlap'); | |
save('duh_32.mat', 'Q'); | |
avg_best_overlap = mean(max([Q.q])) | |
SvmSegm_show_best_segments(I,Q,masks); | |
% visualization and ground truth score for top 200 segments | |
top_masks = masks(:,:,1:200); | |
figure; | |
disp('Best 200 segments after filtering'); | |
Q = SvmSegm_segment_quality(img_name, exp_dir, top_masks, 'overlap'); | |
avg_best_overlap = mean(max([Q.q])) | |
SvmSegm_show_best_segments(I,Q,top_masks); | |
fprintf('Best among top 200 after filtering\n\n'); | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// fast chi-squared distance function in x86 compiler intrinsics | |
// (C) 2007-2008 Christoph Lampert <[email protected]> | |
#include <stdio.h> | |
#include <limits.h> | |
#include <float.h> // for FLT_MIN | |
/* We calculate calculate chi2=(a-b)**2/(a+b+FLT_MIN) to avoid division-by-zero: | |
If a+b != 0, then (a+b+FLT_MIN)==(a+b) and nothing changed. | |
If a+b == 0, then the numerator is 0 as well, and we don't divide by 0. | |
*/ | |
/* Using compiler intrinsics (for SSE >=2) can have a huge speedup effect: | |
8x for float and 3.5x for double on Intel Core2. | |
You have to compile with the right CPU setting, e.g. gcc -march=k8 or -march=nocona */ | |
#ifdef __SSE2__ | |
#include <emmintrin.h> // for float | |
#endif | |
/* OpenMP allows to achieve almost linear speedup on multiCore CPUs: use gcc-4.2 -fopenmp */ | |
/*#ifdef _OPENMP*/ | |
#include <omp.h> | |
/*#endif*/ | |
static inline double chi2_baseline_double(const int n, const double* const x, const double* const y) { | |
double result = 0.f; | |
int i; | |
for (i=0; i<n; i++) { | |
const double num = x[i]-y[i]; | |
const double denom = 1./(x[i]+y[i]+DBL_MIN); | |
result += num*num*denom; | |
} | |
return result; | |
} | |
/* use compiler intrinsics for 2x parallel processing */ | |
static inline double chi2_intrinsic_double(int n, const double* x, const double* y) { | |
double result=0; | |
const __m128d eps = _mm_set1_pd(DBL_MIN); | |
const __m128d zero = _mm_setzero_pd(); | |
__m128d chi2 = _mm_setzero_pd(); | |
for ( ; n>1; n-=2) { | |
const __m128d a = _mm_loadu_pd(x); | |
const __m128d b = _mm_loadu_pd(y); | |
x+=2; | |
y+=2; | |
const __m128d a_plus_b = _mm_add_pd(a,b); | |
const __m128d a_plus_b_plus_eps = _mm_add_pd(a_plus_b,eps); | |
const __m128d a_minus_b = _mm_sub_pd(a,b); | |
const __m128d a_minus_b_sq = _mm_mul_pd(a_minus_b, a_minus_b); | |
const __m128d quotient = _mm_div_pd(a_minus_b_sq, a_plus_b_plus_eps); | |
chi2 = _mm_add_pd(chi2, quotient); | |
} | |
const __m128d shuffle = _mm_shuffle_pd(chi2, chi2, _MM_SHUFFLE2(0,1)); | |
const __m128d sum = _mm_add_pd(chi2, shuffle); | |
// with SSE3, we could use hadd_pd, but the difference is negligible | |
_mm_store_sd(&result,sum); | |
_mm_empty(); | |
if (n) | |
result += chi2_baseline_double(n, x, y); // remaining entries | |
return result; | |
} | |
/* calculate the chi2-distance between two vectors/histograms */ | |
double chi2_double(const int dim, const double* const x, const double* const y) { | |
double (*chi2_double)(const int, const double*, const double*) = chi2_baseline_double; | |
#ifdef __SSE2__ | |
chi2_double = chi2_intrinsic_double; | |
#endif | |
return chi2_double(dim, x, y); | |
} | |
/* calculate the chi2-measure between two sets of vectors/histograms */ | |
double chi2sym_distance_double(const int dim, const int nx, const double* const x, | |
double* const K) { | |
double (*chi2_double)(const int, const double*, const double*) = chi2_baseline_double; | |
#ifdef __SSE2__ | |
chi2_double = chi2_intrinsic_double; | |
#endif | |
double sumK=0.; | |
#pragma omp parallel | |
{ | |
int i,j; | |
#pragma omp for reduction (+:sumK) schedule (dynamic, 2) | |
for (i=0;i<nx;i++) { | |
K[i*nx+i]=0.; | |
for (j=0;j<i;j++) { | |
const double chi2 = chi2_double(dim, &x[i*dim], &x[j*dim]); | |
K[i*nx+j] = chi2; | |
K[j*nx+i] = chi2; | |
sumK += 2*chi2; | |
} | |
} | |
} | |
return sumK/((float)(nx*nx)); | |
} | |
/* calculate the chi2-measure between two sets of vectors/histograms */ | |
double chi2_distance_double(const int dim, const int nx, const double* const x, | |
const int ny, const double* const y, double* const K) { | |
double (*chi2_double)(const int, const double*, const double*) = chi2_baseline_double; | |
#ifdef __SSE2__ | |
chi2_double = chi2_intrinsic_double; | |
#endif | |
double sumK=0.; | |
#pragma omp parallel | |
{ | |
int i,j; | |
#pragma omp for reduction (+:sumK) | |
for (i=0;i<nx;i++) | |
for (j=0;j<ny;j++) { | |
const double chi2 = chi2_double(dim, &x[i*dim], &y[j*dim]); | |
K[i*ny+j] = chi2; | |
sumK += chi2; | |
} | |
} | |
return sumK/((float)(nx*ny)); | |
} | |
#ifdef __MAIN__ | |
#include <stdlib.h> | |
#include <time.h> | |
int main() | |
{ | |
const int dim=3000; | |
const int n1=1000; | |
const int n2=2000; | |
int i,j; | |
/* test calculating a kernel with double entries | |
double *data1 = (double*)memalign(16,dim*n1*sizeof(double)); | |
double *data2 = (double*)memalign(16,dim*n2*sizeof(double)); | |
double *K = (double*)malloc(n1*n2*sizeof(double)); | |
if ((!data1) || (!data2) || (!K)) { | |
free(data1); | |
free(data2); | |
free(K); | |
return 1; | |
} | |
const clock_t before_init=clock(); | |
for (i=0;i<n1*dim;i++) | |
data1[i]=1./(double)(i+1.); | |
for (i=0;i<n2*dim;i++) | |
data2[i]=1./(double)(i+1.); | |
const clock_t after_init=clock(); | |
printf("init time: %8.4f\n",(after_init-before_init)*1./CLOCKS_PER_SEC); | |
const clock_t before_chi2=clock(); | |
const double mean_K = chi2_distance_double(dim, n1, data1, n2, data2, K); | |
const clock_t after_chi2=clock(); | |
printf("chi2 time: %8.4f\n",(after_chi2-before_chi2)*1./CLOCKS_PER_SEC); | |
printf("result: %e\n",mean_K); | |
free(data1); | |
free(data2); | |
free(K); | |
*/ | |
return 0; | |
} | |
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// fast chi-squared distance function in x86 compiler intrinsics | |
// (C) 2007-2008 Christoph Lampert <[email protected]> | |
#include <stdio.h> | |
#include <limits.h> | |
#include <float.h>// for FLT_MIN | |
/* We calculate calculate chi2=(a-b)**2/(a+b+FLT_MIN) to avoid division-by-zero: | |
If a+b != 0, then (a+b+FLT_MIN)==(a+b) and nothing changed. | |
If a+b == 0, then the numerator is 0 as well, and we don't divide by 0. | |
*/ | |
/* Using SSE compiler intrinsics can have a huge speedup effect: | |
8x for float and 3.5x for double on Intel Core2. | |
You have to compile with the right CPU setting, e.g. gcc -march=k8 or -march=nocona */ | |
#ifdef __SSE__ | |
#include <xmmintrin.h> // for float | |
#endif | |
/* OpenMP allows to achieve almost linear speedup on multiCore CPUs: use gcc-4.2 -fopenmp */ | |
#ifdef _OPENMP | |
#include <omp.h> | |
#endif | |
static inline float chi2_baseline_float(const int n, const float* x, const float* y) { | |
float result = 0.f; | |
int i; | |
for (i=0; i<n; i++) { | |
const float num = x[i]-y[i]; | |
const float denom = 1./(x[i]+y[i]+FLT_MIN); | |
result += num*num*denom; | |
} | |
return result; | |
} | |
/* use compiler intrinsics for 4x parallel processing */ | |
static inline float chi2_intrinsic_float(int n, const float* x, const float* y) { | |
float result=0; | |
const __m128 eps = _mm_set1_ps(FLT_MIN); | |
const __m128 zero = _mm_setzero_ps(); | |
__m128 chi2 = _mm_setzero_ps(); | |
for (; n>3; n-=4) { | |
const __m128 a = _mm_loadu_ps(x); | |
const __m128 b = _mm_loadu_ps(y); | |
const __m128 a_plus_eps = _mm_add_ps(a,eps); | |
const __m128 a_plus_b_plus_eps = _mm_add_ps(a_plus_eps,b); | |
const __m128 a_minus_b = _mm_sub_ps(a,b); | |
const __m128 a_minus_b_sq = _mm_mul_ps(a_minus_b, a_minus_b); | |
const __m128 prod = _mm_div_ps(a_minus_b_sq, a_plus_b_plus_eps); | |
chi2 = _mm_add_ps(chi2, prod); | |
x+=4; | |
y+=4; | |
} | |
const __m128 shuffle1 = _mm_shuffle_ps(chi2, chi2, _MM_SHUFFLE(1,0,3,2)); | |
const __m128 sum1 = _mm_add_ps(chi2, shuffle1); | |
const __m128 shuffle2 = _mm_shuffle_ps(sum1, sum1, _MM_SHUFFLE(2,3,0,1)); | |
const __m128 sum2 = _mm_add_ps(sum1, shuffle2); | |
// with SSE3, we could use hadd_ps, but the difference is negligible | |
_mm_store_ss(&result,sum2); | |
_mm_empty(); | |
if (n) | |
result += chi2_baseline_float(n, x, y); // remaining 1-3 entries | |
return result; | |
} | |
/* calculate the chi2-distance between two vectors/histograms */ | |
float chi2_float(const int dim, const float* const x, const float* const y) { | |
float (*chi2_float)(const int, const float*, const float*) = chi2_baseline_float; | |
#ifdef __SSE__ | |
chi2_float = chi2_intrinsic_float; | |
#endif | |
return chi2_float(dim, x, y); | |
} | |
/* calculate the chi2-distance matrix between a sets of vectors/histograms. */ | |
float chi2sym_distance_float(const int dim, const int nx, const float* const x, | |
float* const K) { | |
float (*chi2_float)(const int, const float*, const float*) = chi2_baseline_float; | |
#ifdef __SSE__ | |
chi2_float = chi2_intrinsic_float; | |
#endif | |
float sumK=0.f; | |
#pragma omp parallel | |
{ | |
int i,j; | |
#pragma omp for reduction (+:sumK) schedule (dynamic,2) | |
for (i=0;i<nx;i++) { | |
K[i*nx+i]=0.; | |
for (j=0;j<i;j++) { | |
const float chi2 = (*chi2_float)(dim, &x[i*dim], &x[j*dim]); | |
K[i*nx+j] = chi2; | |
K[j*nx+i] = chi2; | |
sumK += 2*chi2; | |
} | |
} | |
} | |
return sumK/((float)(nx*nx)); | |
} | |
/* calculate the chi2-distance matrix between two sets of vectors/histograms. */ | |
float chi2_distance_float(const int dim, const int nx, const float* const x, | |
const int ny, const float* const y, float* const K) { | |
float (*chi2_float)(const int, const float*, const float*) = chi2_baseline_float; | |
#ifdef __SSE__ | |
chi2_float = chi2_intrinsic_float; | |
#endif | |
float sumK=0.f; | |
#pragma omp parallel | |
{ | |
int i,j; | |
#pragma omp for reduction (+:sumK) schedule (dynamic,2) | |
for (i=0;i<nx;i++) { | |
for (j=0;j<ny;j++) { | |
float chi2 = (*chi2_float)(dim, &x[i*dim], &y[j*dim]); | |
K[i*ny+j] = chi2; | |
sumK += chi2; | |
} | |
} | |
} | |
return sumK/((float)(nx*ny)); | |
} | |
#ifdef __MAIN__ | |
#include <stdlib.h> | |
#include <time.h> | |
int main() | |
{ | |
const int dim=3000; | |
const int n1=1000; | |
const int n2=2000; | |
int i,j; | |
/* test calculating a kernel with float entries | |
float *data1 = (float*)memalign(16,dim*n1*sizeof(float)); | |
float *data2 = (float*)memalign(16,dim*n2*sizeof(float)); | |
float *K = (float*)malloc(n1*n2*sizeof(float)); | |
if ((!data1) || (!data2) || (!K)) { | |
free(data1); | |
free(data2); | |
free(K); | |
return 1; | |
} | |
const clock_t before_init=clock(); | |
for (i=0;i<n1*dim;i++) | |
data1[i]=1./(float)(i+1.); | |
for (i=0;i<n2*dim;i++) | |
data2[i]=1./(float)(i+1.); | |
const clock_t after_init=clock(); | |
printf("init time: %8.4f\n",(after_init-before_init)*1./CLOCKS_PER_SEC); | |
const clock_t before_chi2=clock(); | |
const float mean_K = chi2_distance_float(dim, n1, data1, n2, data2, K); | |
const clock_t after_chi2=clock(); | |
printf("chi2 time: %8.4f\n",(after_chi2-before_chi2)*1./CLOCKS_PER_SEC); | |
printf("result: %e\n",mean_K); | |
free(data1); | |
free(data2); | |
free(K); | |
*/ | |
return 0; | |
} | |
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CC = gcc-4.2 | |
CFLAGS = -O3 -fPIC -march=nocona -ffast-math -fomit-frame-pointer | |
#-L/home/joao/matlab/bin/glnx86/ | |
#CC=icc | |
#CFLAGS = -xP -fast -fPIC | |
OMPFLAGS = -fopenmp | |
MATLABDIR=/Applications/MATLAB_R2011a.app | |
INCLUDES=-I$(MATLABDIR)/extern/include | |
#LDIRS= -L$(MATLABDIR)/bin/glnx86 | |
LDIRS= -L$(MATLABDIR)/bin/maci64 | |
EXE_TARGETS = chi2float chi2double chi2_mex.mexmaci64 | |
#EXE_TARGETS = chi2float chi2double chi2_mex.mexglx | |
LIB_TARGETS = libchi2.a | |
all: $(EXE_TARGETS) $(LIB_TARGETS) | |
chi2float: chi2float.c chi2float.h Makefile | |
$(CC) -D__MAIN__ $(CFLAGS) $(OMPFLAGS) -o chi2float chi2float.c | |
chi2double: chi2double.c chi2double.h Makefile | |
$(CC) -D__MAIN__ $(CFLAGS) $(OMPFLAGS) -o chi2double chi2double.c | |
libchi2.a: chi2double.c chi2double.h chi2float.c chi2float.h Makefile | |
$(CC) $(CFLAGS) -fopenmp -shared -fPIC chi2double.c chi2float.c -o libchi2.a | |
chi2double.o : chi2double.c chi2double.h Makefile | |
$(CC) -D__MAIN__ $(CFLAGS) -c $(OMPFLAGS) -o chi2double.o chi2double.c | |
chi2float.o: chi2float.c chi2double.h Makefile | |
$(CC) -D__MAIN__ $(CFLAGS) -c $(OMPFLAGS) -o chi2float.o chi2float.c | |
chi2_mex.o: chi2_mex.c | |
$(CC) $(CFLAGS) -c $(INCLUDES) $(OMPFLAGS) -o chi2_mex.o chi2_mex.c | |
chi2_mex.mexglx: chi2_mex.c chi2_mex.o chi2float.o | |
$(CC) -fopenmp chi2_mex.o $(LDIRS) $(CFLAGS) -lmx -lmex -shared -o chi2_mex.mexglx chi2float.o | |
chi2_mex.mexmaci64: chi2_mex.c chi2_mex.o chi2float.o | |
$(CC) -fopenmp chi2_mex.o $(LDIRS) $(CFLAGS) -lmx -lmex -shared -o chi2_mex.mexmaci64 chi2float.o | |
# default installation of libomp cannot be opened using dlopen() as would be required e.g. for Python | |
clean: | |
rm -f *.o $(EXE_TARGETS) $(LIB_TARGETS) | |
timing: $(EXE_TARGETS) | |
time ./chi2float | |
time ./chi2double |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Put each file in the correct directories (replace the " " in the filenames with "/").