satoruhiga · April 3, 2016 14:43
diff --git a/stack_blur.h b/stack_blur.h
 #include "ofMain.h"

 namespace stack_blur {
 	
 	template<class T> struct stack_blur_tables {
 		static uint16_t const g_stack_blur8_mul[255];
 		static uint8_t const g_stack_blur8_shr[255];
 	};
 	
 	//------------------------------------------------------------------------
 	template<class T>
 	uint16_t const stack_blur_tables<T>::g_stack_blur8_mul[255] =
 	{
 		512,512,456,512,328,456,335,512,405,328,271,456,388,335,292,512,
 		454,405,364,328,298,271,496,456,420,388,360,335,312,292,273,512,
 		482,454,428,405,383,364,345,328,312,298,284,271,259,496,475,456,
 		437,420,404,388,374,360,347,335,323,312,302,292,282,273,265,512,
 		497,482,468,454,441,428,417,405,394,383,373,364,354,345,337,328,
 		320,312,305,298,291,284,278,271,265,259,507,496,485,475,465,456,
 		446,437,428,420,412,404,396,388,381,374,367,360,354,347,341,335,
 		329,323,318,312,307,302,297,292,287,282,278,273,269,265,261,512,
 		505,497,489,482,475,468,461,454,447,441,435,428,422,417,411,405,
 		399,394,389,383,378,373,368,364,359,354,350,345,341,337,332,328,
 		324,320,316,312,309,305,301,298,294,291,287,284,281,278,274,271,
 		268,265,262,259,257,507,501,496,491,485,480,475,470,465,460,456,
 		451,446,442,437,433,428,424,420,416,412,408,404,400,396,392,388,
 		385,381,377,374,370,367,363,360,357,354,350,347,344,341,338,335,
 		332,329,326,323,320,318,315,312,310,307,304,302,299,297,294,292,
 		289,287,285,282,280,278,275,273,271,269,267,265,263,261,259
 	};
 	
 	//------------------------------------------------------------------------
 	template<class T>
 	uint8_t const stack_blur_tables<T>::g_stack_blur8_shr[255] =
 	{
 		9, 11, 12, 13, 13, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 17,
 		17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 18, 19,
 		19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20,
 		20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21,
 		21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
 		21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22,
 		22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
 		22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23,
 		23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
 		23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
 		23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
 		23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
 		24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
 		24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
 		24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
 		24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24
 	};
 	
 	template <typename T>
 	struct process_pixel_type {};
 	
 	template <>
 	struct process_pixel_type<uint8_t> {
 		typedef uint32_t value_type;
 	};
 	
 	template <>
 	struct process_pixel_type<float> {
 		typedef float value_type;
 	};
 	
 	template <typename T, int N>
 	struct pixel_type {
 		T data[N];
 		
 		inline pixel_type() {}
 		
 		template <typename V>
 		inline pixel_type(const pixel_type<V, N>& copy) {
 			for (int i = 0; i < N; i++)
 				data[i] = copy.data[i];
 		}
 		
 		template <typename V>
 		inline pixel_type(V v) {
 			for (int i = 0; i < N; i++)
 				data[i] = v;
 			return *this;
 		}
 		
 		template <typename V>
 		inline pixel_type operator*(const V v) const {
 			pixel_type p;
 			for (int i = 0; i < N; i++)
 				p.data[i] = data[i] * v;
 			return p;
 		}
 		
 		template <typename V>
 		inline pixel_type operator>>(const V v) const {
 			pixel_type p;
 			for (int i = 0; i < N; i++)
 				p.data[i] = data[i] >> v;
 			return p;
 		}
 		
 		template <typename V>
 		inline pixel_type& operator+=(const pixel_type<V, N>& v) {
 			for (int i = 0; i < N; i++)
 				data[i] += v.data[i];
 			return *this;
 		}
 		
 		template <typename V>
 		inline pixel_type& operator-=(const pixel_type<V, N>& v) {
 			for (int i = 0; i < N; i++)
 				data[i] -= v.data[i];
 			return *this;
 		}
 	};
 	
 	//
 	
 	template <typename T, int N>
 	inline pixel_type<T, N>* get_pixel(T* data, int x, int y, int w)
 	{
 		return (pixel_type<T, N>*)(data + (x + (y * w)) * N);
 	}
 	
 	template <typename T, int N>
 	inline const pixel_type<T, N>* get_pixel(const T* data, int x, int y, int w)
 	{
 		return (pixel_type<T, N>*)(data + (x + (y * w)) * N);
 	}
 	
 	template <typename T, int N>
 	void stack_blur(const T* src, T* dst, int width, int height, int blur_width, int blur_height)
 	{
 		assert(src);
 		assert(dst);
 		
 		typedef typename process_pixel_type<T>::value_type value_type;
 		typedef pixel_type<T, N> pixel_ptr_type;
 		typedef pixel_type<value_type, N> pixel_type;
 		
 		const int w = width;
 		const int h = height;
 		const int wm = w - 1;
 		const int hm = h - 1;
 		int rx = blur_width;
 		int ry = blur_height;
 		
 		if(rx > 0)
 		{
 			if(rx > 254) rx = 254;
 			
 			const value_type div = rx * 2 + 1;
 			const value_type mul_sum = stack_blur_tables<value_type>::g_stack_blur8_mul[rx];
 			const value_type shr_sum = stack_blur_tables<value_type>::g_stack_blur8_shr[rx];
 			
 			vector<pixel_type> stack(div);
 			
 			for (int y = 0; y < h; y++)
 			{
 				pixel_type sum(0);
 				pixel_type sum_in(0);
 				pixel_type sum_out(0);
 				
 				const pixel_ptr_type* src_pix_ptr = get_pixel<T, N>(src, 0, y, w);
 				pixel_ptr_type* dst_pix_ptr = NULL;
 				
 				pixel_type pix = *src_pix_ptr;
 				
 				for (int i = 0; i <= rx; i++)
 					stack[i] = pix;
 				
 				for (int i = 0; i <= rx; i++)
 					sum += pix * (i + 1);
 				
 				for (int i = 0; i <= rx; i++)
 					sum_out += pix;
 				
 				for (int i = 1; i <= rx; i++)
 				{
 					if (i <= wm) src_pix_ptr++;
 					pix = *src_pix_ptr;
 					stack[i + rx] = pix;
 					sum += pix * (rx + 1 - i);
 					sum_in += pix;
 				}
 				
 				unsigned int stack_ptr = rx;
 				unsigned int xp = rx;
 				unsigned int stack_start;
 				
 				if (xp > wm) xp = wm;
 				
 				src_pix_ptr = get_pixel<T, N>(src, xp, y, w);
 				dst_pix_ptr = get_pixel<T, N>(dst, 0, y, w);
 				
 				for (int x = 0; x < w; x++)
 				{
 					*dst_pix_ptr = (sum * mul_sum) >> shr_sum;
 					dst_pix_ptr++;
 					
 					sum -= sum_out;
 					
 					stack_start = stack_ptr + div - rx;
 					
 					if (stack_start >= div)
 						stack_start -= div;
 					
 					sum_out -= stack[stack_start];
 					
 					if (xp < wm)
 					{
 						src_pix_ptr++;
 						pix = *src_pix_ptr;
 						++xp;
 					}
 					
 					stack[stack_start] = pix;
 					
 					sum_in += pix;
 					sum += sum_in;
 					
 					++stack_ptr;
 					
 					if (stack_ptr >= div) stack_ptr = 0;
 					
 					const pixel_type& stack_pix = stack[stack_ptr];
 					sum_out += stack_pix;
 					sum_in -= stack_pix;
 				}
 			}
 		}
 		
 		if(ry > 0)
 		{
 			if(ry > 254) ry = 254;
 			
 			const value_type div = ry * 2 + 1;
 			const value_type mul_sum = stack_blur_tables<value_type>::g_stack_blur8_mul[ry];
 			const value_type shr_sum = stack_blur_tables<value_type>::g_stack_blur8_shr[ry];
 			
 			vector<pixel_type> stack(div);
 			
 			int stride = w;
 			for(int x = 0; x < w; x++)
 			{
 				pixel_type sum(0);
 				pixel_type sum_in(0);
 				pixel_type sum_out(0);
 				
 				const pixel_ptr_type* src_pix_ptr = get_pixel<T, N>(dst, x, 0, w);
 				pixel_ptr_type* dst_pix_ptr = NULL;
 				pixel_type pix = *src_pix_ptr;
 				
 				for (int i = 0; i <= ry; i++)
 					stack[i] = pix;
 				
 				for (int i = 0; i <= ry; i++)
 					sum += pix * (i + 1);
 				
 				for (int i = 0; i <= ry; i++)
 					sum_out += pix;
 				
 				for (int i = 1; i <= ry; i++)
 				{
 					if (i <= hm) src_pix_ptr += stride;
 					pix = *src_pix_ptr;
 					stack[i + ry] = pix;
 					sum += pix * (ry + 1 - i);
 					sum_in += pix;
 				}
 				
 				unsigned int stack_ptr = ry;
 				unsigned int yp = ry;
 				unsigned int stack_start;
 				
 				if (yp > hm) yp = hm;
 				
 				src_pix_ptr = get_pixel<T, N>(dst, x, yp, w);
 				dst_pix_ptr = get_pixel<T, N>(dst, x, 0, w);
 				
 				for (int y = 0; y < h; y++)
 				{
 					*dst_pix_ptr = (sum * mul_sum) >> shr_sum;
 					dst_pix_ptr += stride;
 					
 					sum -= sum_out;
 					
 					stack_start = stack_ptr + div - ry;
 					if (stack_start >= div) stack_start -= div;
 					sum_out -= stack[stack_start];
 					
 					if (yp < hm)
 					{
 						src_pix_ptr += stride;
 						pix = *src_pix_ptr;
 						++yp;
 					}
 					
 					stack[stack_start] = pix;
 					
 					sum_in += pix;
 					sum += sum_in;
 					
 					++stack_ptr;
 					if (stack_ptr >= div) stack_ptr = 0;
 					
 					const pixel_type& stack_pix = stack[stack_ptr];
 					sum_out += stack_pix;
 					sum_in -= stack_pix;
 				}
 			}
 		}
 	}
 	
 	void blur(const ofPixels& in, ofPixels& out, int radius)
 	{
 		assert(in.getNumChannels() == out.getNumChannels());
 		
 		switch (in.getNumChannels()) {
 			case 1: {
 				stack_blur<uint8_t, 1>(in.getPixels(), out.getPixels(), in.getWidth(), in.getHeight(), radius, radius);
 				break;
 			}
 			case 3: {
 				stack_blur<uint8_t, 3>(in.getPixels(), out.getPixels(), in.getWidth(), in.getHeight(), radius, radius);
 				break;
 			}
 			case 4: {
 				stack_blur<uint8_t, 4>(in.getPixels(), out.getPixels(), in.getWidth(), in.getHeight(), radius, radius);
 				break;
 			}
 			default: throw;
 		}
 	}
 	
 }
	#include "ofMain.h"

	namespace stack_blur {

	template<class T> struct stack_blur_tables {
	static uint16_t const g_stack_blur8_mul[255];
	static uint8_t const g_stack_blur8_shr[255];
	};

	//------------------------------------------------------------------------
	template<class T>
	uint16_t const stack_blur_tables<T>::g_stack_blur8_mul[255] =
	{
	512,512,456,512,328,456,335,512,405,328,271,456,388,335,292,512,
	454,405,364,328,298,271,496,456,420,388,360,335,312,292,273,512,
	482,454,428,405,383,364,345,328,312,298,284,271,259,496,475,456,
	437,420,404,388,374,360,347,335,323,312,302,292,282,273,265,512,
	497,482,468,454,441,428,417,405,394,383,373,364,354,345,337,328,
	320,312,305,298,291,284,278,271,265,259,507,496,485,475,465,456,
	446,437,428,420,412,404,396,388,381,374,367,360,354,347,341,335,
	329,323,318,312,307,302,297,292,287,282,278,273,269,265,261,512,
	505,497,489,482,475,468,461,454,447,441,435,428,422,417,411,405,
	399,394,389,383,378,373,368,364,359,354,350,345,341,337,332,328,
	324,320,316,312,309,305,301,298,294,291,287,284,281,278,274,271,
	268,265,262,259,257,507,501,496,491,485,480,475,470,465,460,456,
	451,446,442,437,433,428,424,420,416,412,408,404,400,396,392,388,
	385,381,377,374,370,367,363,360,357,354,350,347,344,341,338,335,
	332,329,326,323,320,318,315,312,310,307,304,302,299,297,294,292,
	289,287,285,282,280,278,275,273,271,269,267,265,263,261,259
	};

	//------------------------------------------------------------------------
	template<class T>
	uint8_t const stack_blur_tables<T>::g_stack_blur8_shr[255] =
	{
	9, 11, 12, 13, 13, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 17,
	17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 18, 19,
	19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20,
	20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21,
	21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
	21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22,
	22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
	22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23,
	23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
	23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
	23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
	23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
	24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
	24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
	24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
	24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24
	};

	template <typename T>
	struct process_pixel_type {};

	template <>
	struct process_pixel_type<uint8_t> {
	typedef uint32_t value_type;
	};

	template <>
	struct process_pixel_type<float> {
	typedef float value_type;
	};

	template <typename T, int N>
	struct pixel_type {
	T data[N];

	inline pixel_type() {}

	template <typename V>
	inline pixel_type(const pixel_type<V, N>& copy) {
	for (int i = 0; i < N; i++)
	data[i] = copy.data[i];
	}

	template <typename V>
	inline pixel_type(V v) {
	for (int i = 0; i < N; i++)
	data[i] = v;
	return *this;
	}

	template <typename V>
	inline pixel_type operator*(const V v) const {
	pixel_type p;
	for (int i = 0; i < N; i++)
	p.data[i] = data[i] * v;
	return p;
	}

	template <typename V>
	inline pixel_type operator>>(const V v) const {
	pixel_type p;
	for (int i = 0; i < N; i++)
	p.data[i] = data[i] >> v;
	return p;
	}

	template <typename V>
	inline pixel_type& operator+=(const pixel_type<V, N>& v) {
	for (int i = 0; i < N; i++)
	data[i] += v.data[i];
	return *this;
	}

	template <typename V>
	inline pixel_type& operator-=(const pixel_type<V, N>& v) {
	for (int i = 0; i < N; i++)
	data[i] -= v.data[i];
	return *this;
	}
	};

	//

	template <typename T, int N>
	inline pixel_type<T, N>* get_pixel(T* data, int x, int y, int w)
	{
	return (pixel_type<T, N>)(data + (x + (y w)) * N);
	}

	template <typename T, int N>
	inline const pixel_type<T, N>* get_pixel(const T* data, int x, int y, int w)
	{
	return (pixel_type<T, N>)(data + (x + (y w)) * N);
	}

	template <typename T, int N>
	void stack_blur(const T* src, T* dst, int width, int height, int blur_width, int blur_height)
	{
	assert(src);
	assert(dst);

	typedef typename process_pixel_type<T>::value_type value_type;
	typedef pixel_type<T, N> pixel_ptr_type;
	typedef pixel_type<value_type, N> pixel_type;

	const int w = width;
	const int h = height;
	const int wm = w - 1;
	const int hm = h - 1;
	int rx = blur_width;
	int ry = blur_height;

	if(rx > 0)
	{
	if(rx > 254) rx = 254;

	const value_type div = rx * 2 + 1;
	const value_type mul_sum = stack_blur_tables<value_type>::g_stack_blur8_mul[rx];
	const value_type shr_sum = stack_blur_tables<value_type>::g_stack_blur8_shr[rx];

	vector<pixel_type> stack(div);

	for (int y = 0; y < h; y++)
	{
	pixel_type sum(0);
	pixel_type sum_in(0);
	pixel_type sum_out(0);

	const pixel_ptr_type* src_pix_ptr = get_pixel<T, N>(src, 0, y, w);
	pixel_ptr_type* dst_pix_ptr = NULL;

	pixel_type pix = *src_pix_ptr;

	for (int i = 0; i <= rx; i++)
	stack[i] = pix;

	for (int i = 0; i <= rx; i++)
	sum += pix * (i + 1);

	for (int i = 0; i <= rx; i++)
	sum_out += pix;

	for (int i = 1; i <= rx; i++)
	{
	if (i <= wm) src_pix_ptr++;
	pix = *src_pix_ptr;
	stack[i + rx] = pix;
	sum += pix * (rx + 1 - i);
	sum_in += pix;
	}

	unsigned int stack_ptr = rx;
	unsigned int xp = rx;
	unsigned int stack_start;

	if (xp > wm) xp = wm;

	src_pix_ptr = get_pixel<T, N>(src, xp, y, w);
	dst_pix_ptr = get_pixel<T, N>(dst, 0, y, w);

	for (int x = 0; x < w; x++)
	{
	dst_pix_ptr = (sum mul_sum) >> shr_sum;
	dst_pix_ptr++;

	sum -= sum_out;

	stack_start = stack_ptr + div - rx;

	if (stack_start >= div)
	stack_start -= div;

	sum_out -= stack[stack_start];

	if (xp < wm)
	{
	src_pix_ptr++;
	pix = *src_pix_ptr;
	++xp;
	}

	stack[stack_start] = pix;

	sum_in += pix;
	sum += sum_in;

	++stack_ptr;

	if (stack_ptr >= div) stack_ptr = 0;

	const pixel_type& stack_pix = stack[stack_ptr];
	sum_out += stack_pix;
	sum_in -= stack_pix;
	}
	}
	}

	if(ry > 0)
	{
	if(ry > 254) ry = 254;

	const value_type div = ry * 2 + 1;
	const value_type mul_sum = stack_blur_tables<value_type>::g_stack_blur8_mul[ry];
	const value_type shr_sum = stack_blur_tables<value_type>::g_stack_blur8_shr[ry];

	vector<pixel_type> stack(div);

	int stride = w;
	for(int x = 0; x < w; x++)
	{
	pixel_type sum(0);
	pixel_type sum_in(0);
	pixel_type sum_out(0);

	const pixel_ptr_type* src_pix_ptr = get_pixel<T, N>(dst, x, 0, w);
	pixel_ptr_type* dst_pix_ptr = NULL;
	pixel_type pix = *src_pix_ptr;

	for (int i = 0; i <= ry; i++)
	stack[i] = pix;

	for (int i = 0; i <= ry; i++)
	sum += pix * (i + 1);

	for (int i = 0; i <= ry; i++)
	sum_out += pix;

	for (int i = 1; i <= ry; i++)
	{
	if (i <= hm) src_pix_ptr += stride;
	pix = *src_pix_ptr;
	stack[i + ry] = pix;
	sum += pix * (ry + 1 - i);
	sum_in += pix;
	}

	unsigned int stack_ptr = ry;
	unsigned int yp = ry;
	unsigned int stack_start;

	if (yp > hm) yp = hm;

	src_pix_ptr = get_pixel<T, N>(dst, x, yp, w);
	dst_pix_ptr = get_pixel<T, N>(dst, x, 0, w);

	for (int y = 0; y < h; y++)
	{
	dst_pix_ptr = (sum mul_sum) >> shr_sum;
	dst_pix_ptr += stride;

	sum -= sum_out;

	stack_start = stack_ptr + div - ry;
	if (stack_start >= div) stack_start -= div;
	sum_out -= stack[stack_start];

	if (yp < hm)
	{
	src_pix_ptr += stride;
	pix = *src_pix_ptr;
	++yp;
	}

	stack[stack_start] = pix;

	sum_in += pix;
	sum += sum_in;

	++stack_ptr;
	if (stack_ptr >= div) stack_ptr = 0;

	const pixel_type& stack_pix = stack[stack_ptr];
	sum_out += stack_pix;
	sum_in -= stack_pix;
	}
	}
	}
	}

	void blur(const ofPixels& in, ofPixels& out, int radius)
	{
	assert(in.getNumChannels() == out.getNumChannels());

	switch (in.getNumChannels()) {
	case 1: {
	stack_blur<uint8_t, 1>(in.getPixels(), out.getPixels(), in.getWidth(), in.getHeight(), radius, radius);
	break;
	}
	case 3: {
	stack_blur<uint8_t, 3>(in.getPixels(), out.getPixels(), in.getWidth(), in.getHeight(), radius, radius);
	break;
	}
	case 4: {
	stack_blur<uint8_t, 4>(in.getPixels(), out.getPixels(), in.getWidth(), in.getHeight(), radius, radius);
	break;
	}
	default: throw;
	}
	}

	}