hunandy14 · January 20, 2018 13:31
diff --git a/interp_extremum.cpp b/interp_extremum.cpp
 class fpoint
 {
 	public:
 		fpoint() 
 		{
 			x = 0.0;
 			y = 0.0;
 		};
 		fpoint(float p_x, float p_y) : x(p_x), y(p_y) {}
 		~fpoint() {};
 		float x;
 		float y;
 };

 /*特徵點定位 DoG函數*/
 static struct feature* interp_extremum(vector<vector<vector<float>>> &dog_pyr, vector<int> &COL, vector<int> &ROW, int octv, int intvl, int r, int c, int intvls, double contr_thr)
 {
 	struct feature* feat = NULL;
 	struct detection_data* ddata = NULL;
 	float xi = 0.0, xr = 0.0, xc = 0.0, contr = 0.0;
 	int i = 0;

 	while (i < SIFT_MAX_INTERP_STEPS)
 	{
 		interp_step(dog_pyr, COL, ROW, octv, intvl, r, c, xi, xr, xc);
 		if (fabs(xi) < 0.5  &&  fabs(xr) < 0.5  &&  fabs(xc) < 0.5)
 			break;

 		c	  += (int)round(xc);
 		r	  += (int)round(xr);
 		intvl += (int)round(xi);

 		if (intvl < 1				|| intvl >  intvls						||
 			c	  < SIFT_IMG_BORDER || c	 >= COL[octv] - SIFT_IMG_BORDER ||
 			r	  < SIFT_IMG_BORDER || r	 >= ROW[octv] - SIFT_IMG_BORDER )
 		{
 			return NULL;
 		}

 		i++;
 	}

 	/* ensure convergence of interpolation */
 	if (i >= SIFT_MAX_INTERP_STEPS)
 		return NULL;

 	contr = interp_contr(dog_pyr, COL, ROW, octv, intvl, r, c, xi, xr, xc);
 	//if (fabs(contr) < (float)(contr_thr / intvls))
 	if (fabs(contr) < (float)(contr_thr))
 		return NULL;

 	feat = new_feature();
 	ddata = feat_detection_data(feat);
 	feat->img_pt.x = feat->x = ((float)c + xc) * (float)pow(2.0, octv);
 	feat->img_pt.y = feat->y = ((float)r + xr) * (float)pow(2.0, octv);
 	ddata->r = r;
 	ddata->c = c;
 	ddata->octv = octv;
 	ddata->intvl = intvl;
 	ddata->subintvl = xi;

 	return feat;
 }
diff --git a/sift.cpp b/sift.cpp
 #include "sift.hpp"
 #include "gaussian.hpp"
 #include "matrix.hpp"
 #include "First_order_GPU.hpp"

 #include <algorithm>
 #include <iostream>
 #include <time.h>

 // CUDA runtime
 #include <cuda_runtime.h>

 // Utilities and system includes
 #include <helper_functions.h>
 #include <helper_cuda.h>

 #define M_PI 3.14159265358979323846f
 #define interp_hist_peak( l, c, r ) ( 0.5f * ((l)-(r)) / ((l) - 2.f * (c) + (r)) )

 using namespace std;
 /************************* Local Function Prototypes *************************/
 static Image_Data create_init_img(Image_Data&, bool, double);
 static pyramid_ladder* build_gauss_pyr(Image_Data&, int, int, double);
 static pyramid_ladder* build_dog_pyr(pyramid_ladder*, int, int);
 static struct feature* scale_space_extrema(vector<vector<vector<float>>> &, vector<int> &, vector<int> &, int, int, double, int, int&);
 static bool is_extremum(vector<vector<vector<float>>>&, vector<int>&, vector<int>&, int, int, int, int);
 static struct feature* interp_extremum(vector < vector<vector<float>>>&, vector<int>&, vector<int>&, int, int, int, int, int, double);
 static void interp_step(vector < vector<vector<float>>>&, vector<int>&, vector<int>&, int, int, int, int, float &, float &, float &);
 static float interp_contr(vector < vector<vector<float>>>&, vector<int>&, vector<int>&, int, int, int, int, float, float, float);
 static struct feature* new_feature(void);
 static bool is_too_edge_like(vector<float>&, int&, int, int, int);
 static void calc_feature_scales(struct feature*, double, int);
 static void adjust_for_img_dbl(struct feature*);
 static struct feature* calc_feature_oris(struct feature *, vector < vector<vector<float>>>&, vector<int>&, vector<int>&, int &);
 static vector<float> ori_hist(vector<float> &, int&, int&, int, int, int, int, float);
 static bool calc_grad_mag_ori(vector<float> &, int&, int&, int, int, float &, float &);
 static void smooth_ori_hist(vector<float>&, int);
 static float dominant_ori(vector<float>&, int);
 static struct feature* add_good_ori_features(vector<float>&, int, float, struct feature*, int &, int &);
 static struct feature* clone_feature(struct feature*);
 static void compute_descriptors(struct feature*, vector < vector<vector<float>>>&, vector<int>&, vector<int>&, int, int);
 static vector<vector<vector<float>>> descr_hist(vector<float> &, int&, int&, int, int, float, float, int, int);
 static void interp_hist_entry(vector<vector<vector<float>>>&, float, float, float, float, int, int);
 static void hist_to_descr(vector<vector<vector<float>>>&, int, int, struct feature*);
 static void normalize_descr(struct feature*);
 static void release_pyr(vector<vector<vector<float>>>, int, int);
 /********************** Local Function Prototypes GPU ************************/
 static void create_init_img_G(float*, vector<vector<float*>> &, double, int, int);
 static void build_gauss_pyr_G(vector<vector<float*>> &, int, int, int, int, double);
 static void build_dog_pyr_G(vector<vector<float*>> &, vector<vector<float*>> &, int, int, int, int);
 static void release_pyr_G(vector<vector<float*>> &, int, int);
 /*********************** Functions prototyped in sift.h **********************/
 /*
 int sift_features(Image_Data &img, struct feature **feat)
 {
 	return _sift_features(img, feat, SIFT_INTVLS, SIFT_SIGMA, SIFT_CONTR_THR,
 		SIFT_CURV_THR, SIFT_IMG_DBL, SIFT_DESCR_WIDTH,
 		SIFT_DESCR_HIST_BINS);
 }

 int _sift_features(Image_Data &img, struct feature **feat, int intvls,
 	double sigma, double contr_thr, int curv_thr,
 	bool img_dbl, int descr_width, int descr_hist_bins)
 {
 	clock_t start, end;
 	start = clock();
 	//---------------------------------------
 	Image_Data init_img;
 	pyramid_ladder *gauss_pyr, *dog_pyr;
 	struct feature* features = NULL;
 	struct feature* features_t = NULL;
 	int octvs, n = 0, kpt = 0, i = 0;
 	//---------------------------------------
 	// 建立第一圖層
 	init_img = create_init_img(img, img_dbl, sigma);
 	//---------------------------------------
 	// 由長寬去計算層數
 	octvs = (int)(log(min(init_img.getCol(), init_img.getRow())) / log(2.0) - 2);
 	cout << "Octvs : " << octvs << endl;
 	//---------------------------------------
 	// 建立高斯圖層
 	gauss_pyr = build_gauss_pyr(init_img, octvs, intvls, sigma);
 	//---------------------------------------
 	// 建立DoG圖層
 	dog_pyr = build_dog_pyr(gauss_pyr, octvs, intvls);
 	//---------------------------------------
 	// 計算特徵點
 	features = scale_space_extrema(dog_pyr, octvs, intvls, contr_thr, curv_thr, kpt);
 	cout << "KeyPoint Total : " << kpt << endl;
 	calc_feature_scales(features, sigma, intvls);
 	if (img_dbl)
 		adjust_for_img_dbl(features);
 	//---------------------------------------
 	// 統計副方向，峰值超過最大峰值的 80 % 為副方向
 	features_t = calc_feature_oris(features, gauss_pyr, n);
 	cout << "KeyPoint Total(Including the secondary direction) : " << n << endl;
 	//---------------------------------------
 	//計算特徵點的128維
 	compute_descriptors(features_t, gauss_pyr, descr_width, descr_hist_bins);

 	*feat = new struct feature[n];

 	for (struct feature *ptr = features_t; ptr != NULL; ptr = ptr->next)
 	{
 		(*feat)[i] = *ptr;
 		delete((*feat)[i].feature_data);
 		(*feat)[i].feature_data = NULL;
 		(*feat)[i].next = NULL;
 		i++;
 	}

 	release_pyr(&gauss_pyr, octvs, intvls + 3);
 	release_pyr(&dog_pyr  , octvs, intvls + 2);
 	end = clock();
 	std::cout << "SIFT " << "Runing Time : " << (double)(end - start) / CLOCKS_PER_SEC << " seconds" << endl;
 	return n;
 }*/

 /*
 static double ConvolveLocWidth(vector<double>::iterator kernel, int dim, vector<float> &src, int x, int y, int Xsize, int Ysize)
 {
 	double pixel = 0.0;
 	int cen = dim / 2;
 	for (int i = 0; i < dim; ++i)
 	{
 		int col = x + (i - cen);
 		if (col < 0)
 			col = 0;
 		if (col >= Xsize)
 			col = Xsize - 1;

 		pixel += kernel[i] * src[y * Xsize + col];
 	}

 	if (pixel > 1.0)
 		pixel = 1.0;
 	return pixel;
 }
 //x方向作捲積
 static void Convolve1DWidth(vector<double>::iterator kern, int dim, vector<float> &src, vector<float> &dst, int Xsize, int Ysize)
 {
 	for (int j = 0; j < Ysize; ++j)
 	{
 		for (int i = 0; i < Xsize; ++i)
 		{
 			dst[j * Xsize + i] = ConvolveLocWidth(kern, dim, src, i, j, Xsize, Ysize);
 		}
 	}
 }
 static double ConvolveLocHeight(vector<double>::iterator kernel, int dim, vector<float> &src, int x, int y, int Xsize, int Ysize)
 {
 	double pixel = 0.0;
 	int cen = dim / 2;
 	for (int j = 0; j < dim; ++j)
 	{
 		int row = y + (j - cen);
 		if (row < 0)
 			row = 0;
 		if (row >= Ysize)
 			row = Ysize - 1;

 		pixel += kernel[j] * src[row * Xsize + x];
 	}
 	if (pixel > 1.0)
 		pixel = 1.0;
 	return pixel;
 }
 //y方向作捲積  
 static void Convolve1DHeight(vector<double>::iterator kern, int dim, vector<float> &src, vector<float> &dst, int Xsize, int Ysize)
 {
 	for (int j = 0; j < Ysize; ++j)
 	{
 		for (int i = 0; i < Xsize; ++i)
 		{
 			dst[j * Xsize + i] = ConvolveLocHeight(kern, dim, src, i, j, Xsize, Ysize);
 		}
 	}
 }
 static vector<double> GaussianKernel1D(double sigma, int dim)
 {
 	vector<double> kern(dim);
 	double s2 = sigma * sigma;
 	int c = (int)(dim / 2.0);
 	double m = 1.0 / (sqrt(2.0 * M_PI) * sigma);
 	double v;
 	for (int i = 0; i < (dim + 1) / 2; ++i)
 	{
 		v = m * exp(-(1.0 * i * i) / (2.0 * s2));
 		kern[c + i] = v;
 		kern[c - i] = v;
 	}
 	double total = 0.0;
 	for (int i = 0; i < dim; ++i)
 	{
 		total += kern[i];
 	}
 	for (int i = 0; i < dim; ++i)
 	{
 		kern[i] /= total;
 	}

 	return kern;
 }
 void BlurImage(vector<float> &output, vector<float> &input, double sigma, int Xsize, int Ysize)
 {
 	int dim;
 	dim = (int)(((sigma - 0.8) / 0.3 + 1.0) * 2.0);

 	if (dim % 2 == 0)
 		dim++;
 	vector<float> Temp(Xsize * Ysize, 0.f);
 	vector<double> convkernel = GaussianKernel1D(sigma, dim);
 	vector<double>::iterator kernal;
 	kernal = convkernel.begin();

 	Convolve1DWidth(kernal, dim, input, Temp, Xsize, Ysize);
 	Convolve1DHeight(kernal, dim, Temp, output, Xsize, Ysize);

 	Temp.clear();
 	convkernel.clear();
 }*/

 /*void sub(vector<float> &d_dst, vector<float> &Minuend, vector<float> &Subtrahend, int Xsize, int Ysize)
 {
 	int i = 0, j = 0;
 	float *img_r, *img_M, *img_S;
 	for (i = 0; i < Ysize; i++)
 	{
 		img_r = &d_dst[i * Xsize];
 		img_M = &Minuend[i * Xsize];
 		img_S = &Subtrahend[i * Xsize];
 		for (j = 0; j < Xsize; j++)
 		{
 			if (i < Ysize && j < Xsize)
 				img_r[j] = img_M[j] - img_S[j];
 			else
 				img_r[j] = 0.f;
 		}
 	}
 }*/
 /***************************** GPU **************************/

 int sift_features_GPUandCPU(float *img, vector<vector<float*>> &G_Gaussian, vector<vector<float*>> &G_DoG, int Xsize, int Ysize, struct feature **feat)
 {
 	return _sift_features_GPUandCPU(img, G_Gaussian, G_DoG, Xsize, Ysize, feat, SIFT_INTVLS, SIFT_SIGMA, SIFT_CONTR_THR,
 		SIFT_CURV_THR, SIFT_IMG_DBL, SIFT_DESCR_WIDTH,
 		SIFT_DESCR_HIST_BINS);
 }

 int _sift_features_GPUandCPU(float *img, vector<vector<float*>> &G_Gaussian, vector<vector<float*>> &G_DoG, int Xsize, int Ysize, struct feature **feat, int intvls,
 	double sigma, double contr_thr, int curv_thr,
 	bool img_dbl, int descr_width, int descr_hist_bins)
 {
 	clock_t start, end;
 	start = clock();
 	//---------------------------------------
 	struct feature* features = NULL;
 	struct feature* features_t = NULL;
 	int octvs, n = 0, kpt = 0, i = 0;
 	int Up_Xsize = 2 * Xsize, Up_Ysize = 2 * Ysize;
 	//---------------------------------------
 	// 建立第一圖層
 	cout << sigma << endl;
 	create_init_img_G(img, G_Gaussian, sigma, Xsize, Ysize);
 	//---------------------------------------
 	// 由長寬去計算層數
 	octvs = (int)(log(min(Up_Xsize, Up_Ysize)) / log(2.0) - 2);
 	//cout << "Octvs : " << octvs << endl;
 	//---------------------------------------
 	// 建立高斯圖層
 	build_gauss_pyr_G(G_Gaussian, octvs, intvls, Up_Xsize, Up_Ysize, sigma);
 	//---------------------------------------
 	// 建立DoG圖層
 	build_dog_pyr_G(G_Gaussian, G_DoG, octvs, intvls, Up_Xsize, Up_Ysize);
 	//---------------------------------------
 	// 計算特徵點
 	vector<vector<vector<float>>> dog_pyr_CPU(octvs), gauss_pyr_CPU(octvs);
 	vector<int> COL(octvs, 0), ROW(octvs, 0);
 	COL[0] = Up_Xsize;
 	ROW[0] = Up_Ysize;
 	for (int o = 1; o < octvs; o++)
 	{
 		COL[o] = COL[o - 1] / 2;
 		ROW[o] = ROW[o - 1] / 2;
 	}
 	
 	for (int o = 0; o < octvs; o++)
 	{
 		dog_pyr_CPU[o].resize(intvls + 2);
 		gauss_pyr_CPU[o].resize(intvls + 3);
 		for (int i = 0; i < intvls + 3; i++)
 		{
 			if (i < intvls + 2)
 			{
 				dog_pyr_CPU[o][i].resize(COL[o] * ROW[o], 0.f);
 				//FloatToShort(GPUData_reg, G_DoG[o][i], COL[o], ROW[o]);
 				//checkCudaErrors(cudaMemcpy(CPUData_reg, GPUData_reg, COL[o] * ROW[o] * sizeof(short), cudaMemcpyDeviceToHost));
 				checkCudaErrors(cudaMemcpy((float*)&dog_pyr_CPU[o][i][0], G_DoG[o][i], COL[o] * ROW[o] * sizeof(float), cudaMemcpyDeviceToHost));
 			}

 			gauss_pyr_CPU[o][i].resize(COL[o] * ROW[o], 0.f);
 			checkCudaErrors(cudaMemcpy((float*)&gauss_pyr_CPU[o][i][0], G_Gaussian[o][i], COL[o] * ROW[o] * sizeof(float), cudaMemcpyDeviceToHost));
 		}
 	}
 	/*start = clock();
 	for (int i = 0; i < 1000; i++)
 	{
 		sub_G(G_DoG[1][0], G_Gaussian[1][1], G_Gaussian[1][0], COL[1], ROW[1]);
 	}
 	end = clock();
 	std::cout << "GPU Time : " << (double)(end - start) / CLOCKS_PER_SEC << " seconds" << endl;

 	vector<float> d_ddd(COL[1] * ROW[1], 0.f);
 	start = clock();
 	for (int i = 0; i < 1000; i++)
 	{
 		sub(d_ddd, gauss_pyr_CPU[1][1], gauss_pyr_CPU[1][0], COL[1], ROW[1]);
 	}
 	end = clock();
 	std::cout << "CPU Time : " << (double)(end - start) / CLOCKS_PER_SEC << " seconds" << endl;

 	system("pause");*/

 	/*aaa = clock();
 	vector<vector<unsigned char*>> pyr_test;
 	pyr_test.resize(octvs);
 	for (i = 0; i < octvs; i++)
 	{
 		pyr_test[i].resize(intvls + 2);
 	}
 	start = clock();
 	for (int o = 0; o < octvs; o++)
 	{
 		for (i = 0; i < intvls + 2; i++)
 		{
 			pyr_test[o][i] = new unsigned char[ROW[o] * COL[o]];
 			FloatToUchar(GPU_reg, dog_pyr[o][i], COL[o], ROW[o]);
 			checkCudaErrors(cudaMemcpy(pyr_test[o][i], GPU_reg, COL[o] * ROW[o] * sizeof(unsigned char), cudaMemcpyDeviceToHost));
 		}
 	}
 	aaab = clock();
 	std::cout << "dddd : " << (double)(aaab - aaa) / CLOCKS_PER_SEC << " seconds" << endl;
 	fstream gaussian;
 	gaussian.open("G_test.raw", ios::binary | ios::out);
 	//輸出高斯
 	int Col_t = Up_Xsize * (SIFT_INTVLS + 2);
 	int Row_t = 0;
 	for (int i = 0; i < octvs; ++i)
 	{
 		Row_t += ROW[i];
 		for (int y = 0; y < ROW[i]; ++y)
 		{
 			int wide = 0;
 			for (int j = 0; j < SIFT_INTVLS + 2; ++j)
 			{
 				for (int x = 0; x < COL[i]; ++x)
 				{
 					gaussian << pyr_test[i][j][y * COL[i] + x];
 				}
 				wide += COL[i];
 			}
 			if (wide < Col_t)
 			{
 				for (int t = wide; t < Col_t; ++t)
 				{
 					gaussian << 0x00;
 				}
 			}
 		}
 	}
 	cout << "GaussianImg Size : " << Col_t << " X " << Row_t << endl;
 	gaussian.close();
 	system("G_test.raw");*/

 	features = scale_space_extrema(dog_pyr_CPU, COL, ROW, octvs, intvls, contr_thr, curv_thr, kpt);
 	//cout << "KeyPoint Total : " << kpt << endl;
 	calc_feature_scales(features, sigma, intvls);
 	if (img_dbl)
 		adjust_for_img_dbl(features);
 	//---------------------------------------
 	// 統計副方向，峰值超過最大峰值的 80 % 為副方向
 	features_t = calc_feature_oris(features, gauss_pyr_CPU, COL, ROW, n);
 	//cout << "KeyPoint Total(Including the secondary direction) : " << n << endl;
 	//---------------------------------------
 	//計算特徵點的128維
 	compute_descriptors(features_t, gauss_pyr_CPU, COL, ROW, descr_width, descr_hist_bins);



 	// 把資料變成連續記憶體(搭配rob函式)
 	*feat = new struct feature[n];
 	for (struct feature *ptr = features_t; ptr != NULL; ptr = ptr->next)
 	{
 		(*feat)[i] = *ptr;
 		delete((*feat)[i].feature_data);
 		(*feat)[i].feature_data = NULL;
 		(*feat)[i].next = NULL;
 		i++;
 	}
 	end = clock();
 	std::cout << "SIFT " << "Runing GPU Time : " << (double)(end - start) / CLOCKS_PER_SEC << " seconds" << endl;
 	release_pyr(gauss_pyr_CPU, octvs, intvls + 3);
 	release_pyr(dog_pyr_CPU, octvs, intvls + 2);
 	return n;
 }


 /************************ Functions prototyped here **************************/
 /*****************************************************************************/
 /*                                                                           */
 /*                                                                           */
 /*                                                                           */
 /*                 ****************************************                  */
 /*                                                                           */
 /*                                                                           */
 /*                                                                           */
 /*****************************************************************************/
 //第一步驟 建立第一層

 /*創建第一層*/
 static Image_Data create_init_img(Image_Data &img, bool img_dbl, double sigma)
 {
 	float sig_diff = 0.f;

 	Image_Data gray = ConvertScale(img, /*(1.f / 255.f)*/ 1.f);

 	if (img_dbl)
 	{
 		sig_diff = (float)sqrt(sigma * sigma - SIFT_INIT_SIGMA * SIFT_INIT_SIGMA * 4.0);
 		Image_Data dbl = First_order(gray, 2.f);
 		dbl = BlurImage(dbl, sig_diff);
 		return dbl;
 	}
 	else
 	{
 		sig_diff = (float)sqrt(sigma * sigma - SIFT_INIT_SIGMA * SIFT_INIT_SIGMA);
 		gray = BlurImage(gray, sig_diff);
 		return gray;
 	}
 }
 /***************************** GPU **************************/
 /*創建第一層*/
 static void create_init_img_G(float *img, vector<vector<float*>> &G_Gaussian, double sigma, int Xsize, int Ysize)
 {
 	float sig_diff = 0.f;

 	sig_diff = (float)sqrt(sigma * sigma - SIFT_INIT_SIGMA * SIFT_INIT_SIGMA * 4.0);
 	sig_diff = (float)sqrt(sigma * sigma + SIFT_INIT_SIGMA * SIFT_INIT_SIGMA);
 	cout << sig_diff << endl;
 	system("pause");
 	//float *Buffer;
 	//checkCudaErrors(cudaMalloc((void **)&Buffer, Xsize * Ysize * 4 * sizeof(float)));
 	Gpu_First_order(G_Gaussian[0][1], img, Xsize, Ysize, UP);
 	BlurImage_G(G_Gaussian[0][0], G_Gaussian[0][1], sig_diff, Xsize * 2, Ysize * 2);
 	//checkCudaErrors(cudaFree(Buffer));
 }
 /*****************************************************************************/
 /*                                                                           */
 /*                    *********************************                      */
 /*                                                                           */
 /*                                                                           */
 /*                                                                           */
 /*              **********************************************               */
 /*                                                                           */
 /*****************************************************************************/
 //第二步驟 建立高斯金字塔及DoG金字塔

 /*建立高斯金字塔*/
 static pyramid_ladder* build_gauss_pyr(Image_Data &base, int octvs, int intvls, double sigma)
 {
 	pyramid_ladder *gauss_pyr;
 	vector<float> sig((intvls + 3), 0.0);
 	float k;
 	int i, o;

 	gauss_pyr = new pyramid_ladder[octvs];

 	for (i = 0; i < octvs; i++)
 	{
 		gauss_pyr[i].Intvls = new pyramid_floors[intvls + 3];
 	}
 	k = (float)pow(2.0, 1.0 / intvls);
 	sig[0] = (float)sigma;
 	sig[1] = (float)(sigma * sqrt(k*k - 1.0));
 	for (i = 2; i < intvls + 3; i++)
 		sig[i] = sig[i - 1] * k;

 	for (o = 0; o < octvs; o++)
 	{
 		for (i = 0; i < intvls + 3; i++)
 		{
 			if (o == 0 && i == 0)
 			{
 				gauss_pyr[o].Intvls[i].Level = base;
 			}
 			else if (i == 0)
 			{
 				gauss_pyr[o].Intvls[i].Level = First_order(gauss_pyr[o - 1].Intvls[intvls].Level, 0.5);
 			}
 			else
 			{
 				gauss_pyr[o].Intvls[i].Level = BlurImage(gauss_pyr[o].Intvls[i - 1].Level, sig[i]);
 			}
 		}
 	}
 	sig.clear();
 	return gauss_pyr;
 }
 /*建立DoG金字塔*/
 static pyramid_ladder* build_dog_pyr(pyramid_ladder *gauss_pyr, int octvs, int intvls)
 {
 	pyramid_ladder* dog_pyr;
 	int i, o;

 	dog_pyr = new pyramid_ladder[octvs];
 	for (i = 0; i < octvs; i++)
 	{
 		dog_pyr[i].Intvls = new pyramid_floors[intvls + 2];
 	}

 	for (o = 0; o < octvs; o++)
 	{
 		for (i = 0; i < intvls + 2; i++)
 		{
 			dog_pyr[o].Intvls[i].Level = sub(gauss_pyr[o].Intvls[i + 1].Level, gauss_pyr[o].Intvls[i].Level);
 		}
 	}
 	return dog_pyr;
 }
 /*GPU*/
 /*建立高斯金字塔*/
 static void build_gauss_pyr_G(vector<vector<float*>> &G_Gaussian, int octvs, int intvls, int Xsize, int Ysize, double sigma)
 {
 	//vector<vector<float*>> gauss_pyr;
 	vector<float> sig((intvls + 3), 0.0);
 	float k;
 	int i, o;

 	/*gauss_pyr.resize(octvs);

 	for (i = 0; i < octvs; i++)
 	{
 		gauss_pyr[i].resize(intvls + 3);
 	}*/
 	k = (float)pow(2.0, 1.0 / intvls);
 	sig[0] = (float)sigma;
 	sig[1] = (float)(sigma * sqrt(k*k - 1.0));
 	for (i = 2; i < intvls + 3; i++)
 		sig[i] = sig[i - 1] * k;

 	vector<int> COL(octvs, 0);
 	vector<int> ROW(octvs, 0);
 	COL[0] = Xsize;
 	ROW[0] = Ysize;
 	for (o = 1; o < octvs; o++)
 	{
 		COL[o] = COL[o - 1] / 2;
 		ROW[o] = ROW[o - 1] / 2;
 	}

 	for (o = 0; o < octvs; o++)
 	{
 		for (i = 0; i < intvls + 3; i++)
 		{
 			if (o == 0 && i == 0)
 			{
 				//gauss_pyr[o][i] = base;
 			}
 			else if (i == 0)
 			{
 				//checkCudaErrors(cudaMalloc((void **)&gauss_pyr[o][i], COL[o] * ROW[o] * sizeof(float)));
 				Gpu_First_order(G_Gaussian[o][i], G_Gaussian[o - 1][intvls], COL[o - 1], ROW[o - 1], DOWN);
 			}
 			else
 			{
 				//checkCudaErrors(cudaMalloc((void **)&gauss_pyr[o][i], COL[o] * ROW[o] * sizeof(float)));
 				BlurImage_G(G_Gaussian[o][i], G_Gaussian[o][i - 1], sig[i], COL[o], ROW[o]);
 			}
 		}
 	}
 	/*clock_t aaa, bbb;
 	aaa = clock();
 	for (int i = 0; i < 100; i++)
 	{
 		BlurImage_G(G_Gaussian[1][1], G_Gaussian[1][0], sig[1], COL[1], ROW[1]);
 	}
 	bbb = clock();
 	std::cout << "GPU Runing Time : " << (double)(bbb - aaa) / CLOCKS_PER_SEC << " seconds" << endl;

 	vector<float> CPU_1(COL[1] * ROW[1], 0.f), CPU_2(COL[1] * ROW[1], 0.f);
 	checkCudaErrors(cudaMemcpy((float*)&CPU_1[0], G_Gaussian[1][0], COL[1] * ROW[1] * sizeof(float), cudaMemcpyDeviceToHost));

 	aaa = clock();
 	for (int i = 0; i < 100; i++)
 	{
 		BlurImage(CPU_2, CPU_1, sig[1], COL[1], ROW[1]);
 	}
 	bbb = clock();
 	std::cout << "CPU Runing Time : " << (double)(bbb - aaa) / CLOCKS_PER_SEC << " seconds" << endl;

 	system("pause");*/

 	sig.clear();
 	COL.clear();
 	ROW.clear();
 	//return gauss_pyr;
 }
 /*建立DoG金字塔*/
 static void build_dog_pyr_G(vector<vector<float*>> &G_Gaussian, vector<vector<float*>> &G_DoG, int octvs, int intvls, int Xsize, int Ysize)
 {
 	int i, o;
 	vector<int> COL(octvs, 0);
 	vector<int> ROW(octvs, 0);
 	COL[0] = Xsize;
 	ROW[0] = Ysize;
 	for (o = 1; o < octvs; o++)
 	{
 		COL[o] = COL[o - 1] / 2;
 		ROW[o] = ROW[o - 1] / 2;
 	}
 	/*vector<vector<float*>> dog_pyr;

 	dog_pyr.resize(octvs);
 	for (i = 0; i < octvs; i++)
 	{
 		dog_pyr[i].resize(intvls + 2);
 	}*/

 	for (o = 0; o < octvs; o++)
 	{
 		for (i = 0; i < intvls + 2; i++)
 		{
 			//checkCudaErrors(cudaMalloc((void **)&dog_pyr[o][i], COL[o] * ROW[o] * sizeof(float)));
 			sub_G(G_DoG[o][i], G_Gaussian[o][i + 1], G_Gaussian[o][i], COL[o], ROW[o]);
 		}
 	}
 }
 /*****************************************************************************/
 /*                                                                           */
 /*                    *********************************                      */
 /*                                                                           */
 /*                            *****************                              */
 /*                                                                           */
 /*              **********************************************               */
 /*                                                                           */
 /*****************************************************************************/
 //第三步驟 尋找特徵點
 /*尋找特徵點*/
 static struct feature* scale_space_extrema(vector<vector<vector<float>>> &dog_pyr, vector<int> &COL, vector<int> &ROW, int octvs, int intvls, double contr_thr, int curv_thr, int &cnt)
 {
 	struct feature* features = NULL;
 	//double prelim_contr_thr = 0.5 * contr_thr / (double)intvls;
 	double prelim_contr_thr = 0.5 * contr_thr;
 	struct feature* feat = NULL;
 	struct detection_data* ddata = NULL;
 	int o, i, r, c;
 	int Col = 0, Row = 0;
 	for (o = 0; o < octvs; o++)
 	{
 		Col = COL[o];
 		Row = ROW[o];
 		for (i = 1; i <= intvls; i++)
 		{
 			//開始跑圖
 			for (r = SIFT_IMG_BORDER; r < Row - SIFT_IMG_BORDER; r++)
 			{
 				for (c = SIFT_IMG_BORDER; c < Col - SIFT_IMG_BORDER; c++)
 				{
 					/* perform preliminary check on contrast */
 					if (fabs(dog_pyr[o][i][r * Col + c]) > prelim_contr_thr)
 					{
 						if (is_extremum(dog_pyr, COL, ROW, o, i, r, c)) // 找極值
 						{
 							feat = interp_extremum(dog_pyr, COL, ROW, o, i, r, c, intvls, contr_thr);
 							if (feat)
 							{
 								ddata = feat_detection_data(feat);
 								// 角點偵測
 								if (!is_too_edge_like(dog_pyr[ddata->octv][ddata->intvl], COL[o], ddata->r, ddata->c, curv_thr))
 								{
 									feat->next = features;
 									features = feat;
 									cnt++;
 								} else {
 									delete ddata;
 									delete feat;
 								}
 								ddata = NULL;
 								feat = NULL;
 							}
 						}
 					}
 				}
 			}
 		}
 	}
 	return features;
 }
 /*找極值*/
 static bool is_extremum(vector<vector<vector<float>>> &dog_pyr, vector<int> &COL, vector<int> &ROW, int octv, int intvl, int r, int c)
 {
 	int Col = COL[octv];
 	float val = dog_pyr[octv][intvl][r * Col + c];
 	if (val > 0.0)
 	{
 		for (int i = -1; i <= 1; ++i)
 			for (int j = -1; j <= 1; ++j)
 				for (int k = -1; k <= 1; ++k)
 					if (val < dog_pyr[octv][intvl + i][(r + j) * Col + (c + k)])
 						return false;
 	}
 	else
 	{
 		for (int i = -1; i <= 1; i++)
 			for (int j = -1; j <= 1; j++)
 				for (int k = -1; k <= 1; k++)
 					if (val > dog_pyr[octv][intvl + i][(r + j) * Col + (c + k)])
 						return false;
 	}
 	return true;
 }
 /*特徵點定位 DoG函數*/
 static struct feature* interp_extremum(vector<vector<vector<float>>> &dog_pyr, vector<int> &COL, vector<int> &ROW, int octv, int intvl, int r, int c, int intvls, double contr_thr)
 {
 	struct feature* feat = NULL;
 	struct detection_data* ddata = NULL;
 	float xi = 0.0, xr = 0.0, xc = 0.0, contr = 0.0;
 	int i = 0;

 	while (i < SIFT_MAX_INTERP_STEPS)
 	{
 		interp_step(dog_pyr, COL, ROW, octv, intvl, r, c, xi, xr, xc);
 		if (fabs(xi) < 0.5  &&  fabs(xr) < 0.5  &&  fabs(xc) < 0.5)
 			break;

 		c	  += (int)round(xc);
 		r	  += (int)round(xr);
 		intvl += (int)round(xi);

 		if (intvl < 1				|| intvl >  intvls						||
 			c	  < SIFT_IMG_BORDER || c	 >= COL[octv] - SIFT_IMG_BORDER ||
 			r	  < SIFT_IMG_BORDER || r	 >= ROW[octv] - SIFT_IMG_BORDER )
 		{
 			return NULL;
 		}

 		i++;
 	}

 	/* ensure convergence of interpolation */
 	if (i >= SIFT_MAX_INTERP_STEPS)
 		return NULL;

 	contr = interp_contr(dog_pyr, COL, ROW, octv, intvl, r, c, xi, xr, xc);
 	//if (fabs(contr) < (float)(contr_thr / intvls))
 	if (fabs(contr) < (float)(contr_thr))
 		return NULL;

 	feat = new_feature();
 	ddata = feat_detection_data(feat);
 	feat->img_pt.x = feat->x = ((float)c + xc) * (float)pow(2.0, octv);
 	feat->img_pt.y = feat->y = ((float)r + xr) * (float)pow(2.0, octv);
 	ddata->r = r;
 	ddata->c = c;
 	ddata->octv = octv;
 	ddata->intvl = intvl;
 	ddata->subintvl = xi;

 	return feat;
 }
 static void interp_step(vector<vector<vector<float>>> &dog_pyr, vector<int> &COL, vector<int> &ROW, int octv, int intvl, int r, int c, float &xi, float &xr, float &xc)
 {
 	vector<float> dD = deriv_3D(dog_pyr, COL, ROW, octv, intvl, r, c);
 	vector<float> H = hessian_3D(dog_pyr, COL, ROW, octv, intvl, r, c);

 	vector<float> H_inv = Invert3X3(H);

 	vector<float> x = GEMM_3X1(H_inv, dD, -1.0);

 	xi = x[2];
 	xr = x[1];
 	xc = x[0];
 }
 static float interp_contr(vector<vector<vector<float>>> &dog_pyr, vector<int> &COL, vector<int> &ROW, int octv, int intvl, int r, int c, float xi, float xr, float xc)
 {
 	int Col = COL[octv];
 	float t = 0.0;
 	vector<float> x(3, 0.0);
 	x[0] = xc;
 	x[1] = xr;
 	x[2] = xi;

 	vector<float> dD = deriv_3D(dog_pyr, COL, ROW, octv, intvl, r, c);
 	t = GEMM_1X1(dD, x);

 	return dog_pyr[octv][intvl][r * Col + c] + t * 0.5f;
 }
 /*建立新的點*/
 static struct feature* new_feature(void)
 {
 	struct feature* feat;
 	struct detection_data* ddata;

 	feat = new struct feature;
 	memset(feat, 0, sizeof(struct feature));
 	ddata = new struct detection_data;
 	memset(ddata, 0, sizeof(struct detection_data));
 	feat->feature_data = ddata;

 	return feat;
 }
 /*消除邊緣響應*/
 static bool is_too_edge_like(vector<float> &dog_img, int &COL, int r, int c, int curv_thr)
 {
 	float d, dxx, dyy, dxy, tr, det;
 	int Col = COL;

 	d   =  dog_img[(r    ) * Col + (c    )];
 	dxx =  dog_img[(r    ) * Col + (c + 1)] + dog_img[(r    ) * Col + (c - 1)] - (float)2.0 * d;
 	dyy =  dog_img[(r + 1) * Col + (c    )] + dog_img[(r - 1) * Col + (c    )] - (float)2.0 * d;
 	
 	dxy = (dog_img[(r + 1) * Col + (c + 1)] - dog_img[(r + 1) * Col + (c - 1)] -
 		   dog_img[(r - 1) * Col + (c + 1)] + dog_img[(r - 1) * Col + (c - 1)]) / (float)4.0;
 	tr  =  dxx + dyy;
 	det =  dxx * dyy - dxy * dxy;

 	if (det <= 0)
 		return true;

 	if (tr * tr / det < ((float)curv_thr + 1.0) * ((float)curv_thr + 1.0) / (float)curv_thr)
 		return false;

 	return true;
 }
 /*計算所在層數及特徵點方向計算所需數據*/
 static void calc_feature_scales(struct feature* features, double sigma, int intvls)
 {
 	struct detection_data* ddata;
 	float intvl;
 	for (struct feature* feat = features; feat != NULL; feat = feat->next)
 	{
 		ddata = feat_detection_data(feat);
 		intvl = (float)(ddata->intvl + ddata->subintvl);
 		feat->scl = (float)(sigma * pow(2.0, ddata->octv + intvl / intvls));
 		ddata->scl_octv = (float)(sigma * pow(2.0, intvl / intvls));
 	}
 }
 /*如果有將原圖放大須將座標調回*/
 static void adjust_for_img_dbl(struct feature *features)
 {
 	for (struct feature* feat = features; feat != NULL; feat = feat->next)
 	{
 		feat->x /= 2.f;
 		feat->y /= 2.f;
 		feat->scl /= 2.f;
 		feat->img_pt.x /= 2.f;
 		feat->img_pt.y /= 2.f;
 	}
 }

 /*****************************************************************************/
 /*              **********************************************               */
 /*              *              *                *            *               */
 /*              *             *                 *            *               */
 /*              *            *                  *            *               */
 /*              *           *                   **********   *               */
 /*              *                                            *               */
 /*              **********************************************               */
 /*****************************************************************************/
 //第四步驟 特徵點方向性

 /*計算特徵點角度*/
 static struct feature* calc_feature_oris(struct feature *features, vector < vector<vector<float>>>&gauss_pyr, vector<int>&COL, vector<int>&ROW, int &cnt)
 {
 	struct feature *features_t = NULL;
 	struct feature *features_reg = NULL;
 	struct feature *ptr = NULL;
 	struct detection_data* ddata;
 	vector<float> hist;
 	float omax;
 	int i, j, t;

 	for (struct feature* feat = features; feat != NULL; feat = feat->next)
 	{
 		ddata = feat_detection_data(feat);
 		hist = ori_hist(
 			gauss_pyr[ddata->octv][ddata->intvl], 
 			COL[ddata->octv], ROW[ddata->octv], 
 			ddata->r, ddata->c,
 			SIFT_ORI_HIST_BINS, 
 			(int)round(SIFT_ORI_RADIUS * ddata->scl_octv), 
 			(float)SIFT_ORI_SIG_FCTR * ddata->scl_octv
 		);

 		for (j = 0; j < SIFT_ORI_SMOOTH_PASSES; j++)
 		{
 			smooth_ori_hist(hist, SIFT_ORI_HIST_BINS);
 		}
 		omax = dominant_ori(hist, SIFT_ORI_HIST_BINS);
 		t = 0;
 		features_reg = add_good_ori_features(hist, SIFT_ORI_HIST_BINS, omax * (float)SIFT_ORI_PEAK_RATIO, feat, cnt, t);
 		if (features_reg)
 		{
 			ptr = features_reg;
 			for (i = 0; i < (t - 1); ++i)
 			{
 				ptr = ptr->next;
 			}
 			ptr->next = features_t;
 			features_t = features_reg;
 		}
 		features_reg = NULL;
 		ptr = NULL;
 	}
 	return features_t;
 }
 /*計算方向直方圖*/
 static vector<float> ori_hist(vector<float> &img, int &COL, int &ROW, int r, int c, int n, int rad, float sigma)
 {
 	vector<float> hist(n, 0.0);
 	float mag, ori, w, exp_denom, PI2 = M_PI * 2.f;
 	int bin, i, j;

 	exp_denom = 2.f * sigma * sigma;
 	for (i = -rad; i <= rad; i++)
 	{
 		for (j = -rad; j <= rad; j++)
 		{
 			if (calc_grad_mag_ori(img, COL, ROW, r + i, c + j, mag, ori))
 			{
 				w = exp(-(i*i + j*j) / exp_denom);
 				bin = (int)round((float)n * (ori + M_PI) / PI2);
 				bin = (bin < n) ? bin : 0;
 				hist[bin] += w * mag;
 			}
 		}
 	}
 	return hist;
 }
 /*計算特徵點梯度及方向角度*/
 static bool calc_grad_mag_ori(vector<float> &img, int &COL, int &ROW, int r, int c, float &mag, float &ori)
 {
 	float dx = 0.0, dy = 0.0;
 	int Col = COL;
 	if (r > 0 && r < ROW - 1 && c > 0 && c < COL - 1)
 	{
 		dx = img[(r    ) * Col + (c + 1)] - img[(r    ) * Col + (c - 1)];
 		dy = img[(r - 1) * Col + (c    )] - img[(r + 1) * Col + (c    )];
 		mag = sqrt(dx * dx + dy * dy);
 		//mag = dx > dy ? max(0.875f*dx+0.5f*dy, dx) : max(0.875f*dy + 0.5f*dx, dy);
 		ori = atan2(dy, dx);
 		return true;
 	}
 	else
 	{
 		return false;
 	}
 }
 /*對方向直方圖做濾波*/
 static void smooth_ori_hist(vector<float> &hist, int n)
 {
 	float prev, tmp = 0.0, h0 = hist[0];
 	int i;

 	prev = hist[n - 1];
 	for (i = 0; i < n; i++)
 	{
 		tmp = hist[i];
 		hist[i] = 0.25f * prev + 0.5f * hist[i] + 0.25f * ((i + 1 == n) ? h0 : hist[i + 1]);
 		prev = tmp;
 	}
 }
 /*找出最大角度*/
 static float dominant_ori(vector<float> &hist, int n)
 {
 	float omax;
 	int i;

 	omax = hist[0];
 	for (i = 1; i < n; i++)
 	{
 		if (hist[i] > omax)
 		{
 			omax = hist[i];
 		}
 	}
 	return omax;
 }
 /*新增副方向點*/
 static struct feature* add_good_ori_features(vector<float> &hist, int n, float mag_thr, struct feature *feat, int &cnt, int &t)
 {
 	struct feature* new_feat = NULL;
 	struct feature* features = NULL;
 	float bin, PI2 = M_PI * 2.f;
 	int l, r, i;
 	for (i = 0; i < n; i++)
 	{
 		l = (i == 0) ? n - 1 : i - 1;
 		r = (i + 1) % n;

 		if (hist[i] > hist[l] && hist[i] > hist[r] && hist[i] >= mag_thr)
 		{
 			bin = i + interp_hist_peak(hist[l], hist[i], hist[r]);
 			bin = (bin < 0) ? n + bin : (bin >= n) ? bin - n : bin;
 			new_feat = clone_feature(feat);
 			new_feat->ori = ((PI2 * bin) / n) - M_PI;
 			new_feat->next = features;
 			features = new_feat;
 			cnt++;
 			t++;
 		}
 	}
 	return features;
 }
 /*複製特徵點資訊 副方向用*/
 static struct feature* clone_feature(feature* feat)
 {
 	struct feature* new_feat;
 	struct detection_data* ddata;

 	new_feat = new_feature();
 	ddata = feat_detection_data(new_feat);
 	memcpy(new_feat, feat, sizeof(struct feature));
 	memcpy(ddata, feat_detection_data(feat), sizeof(struct detection_data));
 	new_feat->feature_data = ddata;

 	return new_feat;
 }
 /*****************************************************************************/
 /*              **********************************************               */
 /*                                   *                                       */
 /*                                   *                                       */
 /*                      *********************************                    */
 /*                                   *                  *                    */
 /*                                   *                  *                    */
 /*              **********************************************               */
 /*****************************************************************************/
 //第五步驟 特徵點維度統計

 //計算128維度值 4*4*8
 static void compute_descriptors(struct feature* features, vector < vector<vector<float>>>&gauss_pyr, vector<int>&COL, vector<int>&ROW, int d, int n)
 {
 	struct detection_data* ddata;
 	vector<vector<vector<float>>> hist;
 	struct feature* feat;

 	for (feat = features; feat != NULL; feat = feat->next)
 	{
 		ddata = feat_detection_data(feat);
 		hist = descr_hist(gauss_pyr[ddata->octv][ddata->intvl], COL[ddata->octv], ROW[ddata->octv], ddata->r,
 			ddata->c, feat->ori, ddata->scl_octv, d, n);
 		hist_to_descr(hist, d, n, feat);
 	}
 }
 static vector<vector<vector<float>>> descr_hist(vector<float> &img, int &COL, int &ROW, int r, int c, float ori, float scl, int d, int n)
 {
 	vector<vector<vector<float>>> hist;
 	float cos_t, sin_t, hist_width, exp_denom, r_rot, c_rot, grad_mag,
 		grad_ori, w, rbin, cbin, obin, bins_per_rad, PI2 = 2.f * M_PI;
 	int radius, i, j;

 	hist.resize(d);
 	for (i = 0; i < d; i++)
 	{
 		hist[i].resize(d);
 		for (j = 0; j < d; j++)
 		{
 			hist[i][j].resize(n);
 		}
 	}

 	cos_t = cos(ori);
 	sin_t = sin(ori);
 	bins_per_rad = n / PI2;
 	exp_denom = d * d * 0.5f;
 	hist_width = (float)SIFT_DESCR_SCL_FCTR * scl;
 	radius = (int)(hist_width * sqrt(2.f) * (d + 1.f) * 0.5f + 0.5f);

 	for (i = -radius; i <= radius; i++)
 	{
 		for (j = -radius; j <= radius; j++)
 		{
 			// 把位置縮放到 [0~4)
 			c_rot = (j * cos_t - i * sin_t) / hist_width;
 			r_rot = (j * sin_t + i * cos_t) / hist_width;
 			rbin = r_rot + d / 2.f - 0.5f;
 			cbin = c_rot + d / 2.f - 0.5f;
 			/*cout << hist_width << endl;
 			cout << radius << endl;
 			cout << c_rot << " " << r_rot << endl;
 			cout << cbin << " " << rbin << endl;
 			system("pause");*/

 			// 確保位置 [0~4) 刪除超出原圖指定半徑外的點
 			if (rbin > -1.f  &&  rbin < d  &&  cbin > -1.f  &&  cbin < d)
 			{
 				// 計算成功回傳1 並修改數值
 				if (calc_grad_mag_ori(img, COL, ROW, r + i, c + j, grad_mag, grad_ori))
 				{
 					grad_ori -= ori;
 					// 修正0~360
 					while (grad_ori < 0.f)
 					{
 						grad_ori += PI2;
 					}
 					while (grad_ori >= PI2)
 					{
 						grad_ori -= PI2;
 					}

 					obin = grad_ori * bins_per_rad;// 高斯加權分布
 					w = exp(-(c_rot * c_rot + r_rot * r_rot) / exp_denom); // 計算權值
 					interp_hist_entry(hist, rbin, cbin, obin, grad_mag * w, d, n); //插補
 				}
 			}
 		}
 	}
 	return hist;
 }
 static void interp_hist_entry(vector<vector<vector<float>>> &hist, float rbin, float cbin, float obin, float mag, int d, int n)
 {
 	float d_r, d_c, d_o, v_r, v_c, v_o;
 	vector<vector<float>>::iterator row;
 	vector<float>::iterator h;
 	int r0, c0, o0, rb, cb, ob, r, c, o;

 	r0 = (int)floor(rbin);
 	c0 = (int)floor(cbin);
 	o0 = (int)floor(obin);
 	d_r = rbin - r0;
 	d_c = cbin - c0;
 	d_o = obin - o0; 

 	for (r = 0; r <= 1; r++)
 	{
 		rb = r0 + r;
 		if (rb >= 0 && rb < d)
 		{
 			v_r = mag * ((r == 0) ? 1.f - d_r : d_r);
 			row = hist[rb].begin();
 			for (c = 0; c <= 1; c++)
 			{
 				cb = c0 + c;
 				if (cb >= 0 && cb < d)
 				{
 					v_c = v_r * ((c == 0) ? 1.f - d_c : d_c);
 					h = row[cb].begin();
 					for (o = 0; o <= 1; o++)
 					{
 						ob = (o0 + o) % n;
 						v_o = v_c * ((o == 0) ? 1.f - d_o : d_o);
 						h[ob] += v_o;
 					}
 				}
 			}
 		}
 	}
 }
 static void hist_to_descr(vector<vector<vector<float>>> &hist, int d, int n, feature* feat)
 {
 	int int_val, i, r, c, o, k = 0;

 	for (r = 0; r < d; r++)
 	{
 		for (c = 0; c < d; c++)
 		{
 			for (o = 0; o < n; o++)
 			{
 				feat->descr[k++] = hist[r][c][o];
 			}
 		}
 	}
 	feat->d = k;
 	normalize_descr(feat);
 	for (i = 0; i < k; i++)
 	{
 		if (feat->descr[i] > SIFT_DESCR_MAG_THR)
 		{
 			feat->descr[i] = (float)SIFT_DESCR_MAG_THR;
 		}
 	}
 	normalize_descr(feat);

 	/* convert floating-point descriptor to integer valued descriptor */
 	for (i = 0; i < k; i++)
 	{
 		int_val = (int)(SIFT_INT_DESCR_FCTR * feat->descr[i]);
 		feat->descr[i] = (float)min(255, int_val);
 	}
 }
 /*正規化*/
 static void normalize_descr(feature* feat)
 {
 	float cur, len_inv, len_sq = 0.f;
 	int i, d = feat->d;

 	for (i = 0; i < d; i++)
 	{
 		cur = feat->descr[i];
 		len_sq += cur*cur;
 	}
 	len_inv = 1.f / sqrt(len_sq);
 	for (i = 0; i < d; i++)
 		feat->descr[i] *= len_inv;
 }
 /*****************************************************************************/
 /*釋放金字塔記憶體*/
 static void release_pyr(vector<vector<vector<float>>> pyr, int octvs, int n)
 {
 	int i, j;
 	for (i = 0; i < octvs; i++)
 	{
 		for (j = 0; j < n; j++)
 		{
 			pyr[j].clear();
 		}
 		pyr[i].clear();
 	}
 	pyr.clear();
 }
 /*GPU*/
 static void release_pyr_G(vector<vector<float*>> &pyr, int octvs, int n)
 {
 	int i, j;
 	for (i = 0; i < octvs; i++)
 	{
 		for (j = 0; j < n; j++)
 		{
 			checkCudaErrors(cudaFree(pyr[i][j]));
 		}
 		pyr[i].clear();
 	}
 	pyr.clear();
 }
	class fpoint
	{
	public:
	fpoint()
	{
	x = 0.0;
	y = 0.0;
	};
	fpoint(float p_x, float p_y) : x(p_x), y(p_y) {}
	~fpoint() {};
	float x;
	float y;
	};

	/特徵點定位 DoG函數/
	static struct feature* interp_extremum(vector<vector<vector<float>>> &dog_pyr, vector<int> &COL, vector<int> &ROW, int octv, int intvl, int r, int c, int intvls, double contr_thr)
	{
	struct feature* feat = NULL;
	struct detection_data* ddata = NULL;
	float xi = 0.0, xr = 0.0, xc = 0.0, contr = 0.0;
	int i = 0;

	while (i < SIFT_MAX_INTERP_STEPS)
	{
	interp_step(dog_pyr, COL, ROW, octv, intvl, r, c, xi, xr, xc);
	if (fabs(xi) < 0.5 && fabs(xr) < 0.5 && fabs(xc) < 0.5)
	break;

	c += (int)round(xc);
	r += (int)round(xr);
	intvl += (int)round(xi);

	if (intvl < 1 \|\| intvl > intvls \|\|
	c < SIFT_IMG_BORDER \|\| c >= COL[octv] - SIFT_IMG_BORDER \|\|
	r < SIFT_IMG_BORDER \|\| r >= ROW[octv] - SIFT_IMG_BORDER )
	{
	return NULL;
	}

	i++;
	}

	/* ensure convergence of interpolation */
	if (i >= SIFT_MAX_INTERP_STEPS)
	return NULL;

	contr = interp_contr(dog_pyr, COL, ROW, octv, intvl, r, c, xi, xr, xc);
	//if (fabs(contr) < (float)(contr_thr / intvls))
	if (fabs(contr) < (float)(contr_thr))
	return NULL;

	feat = new_feature();
	ddata = feat_detection_data(feat);
	feat->img_pt.x = feat->x = ((float)c + xc) * (float)pow(2.0, octv);
	feat->img_pt.y = feat->y = ((float)r + xr) * (float)pow(2.0, octv);
	ddata->r = r;
	ddata->c = c;
	ddata->octv = octv;
	ddata->intvl = intvl;
	ddata->subintvl = xi;

	return feat;
	}