Skip to content

Instantly share code, notes, and snippets.

@msg7086
Last active August 29, 2015 14:07
Show Gist options
  • Save msg7086/dd32e7337a66d719bec9 to your computer and use it in GitHub Desktop.
Save msg7086/dd32e7337a66d719bec9 to your computer and use it in GitHub Desktop.
void Recursive_Gaussian2D_Horizontal(double * output, const double * input, int width, int height, int stride, const double B, const double B1, const double B2, const double B3)
{
5BD13750 push ebp
5BD13751 mov ebp,esp
5BD13753 sub esp,0Ch
5BD13756 push ebx
5BD13757 mov eax,edx
5BD13759 mov ebx,ecx
int i, j, lower, upper;
double P0, P1, P2, P3;
for (j = 0; j < height; j++)
5BD1375B mov edx,dword ptr [height]
5BD1375E mov dword ptr [ebp-8],ebx
5BD13761 push esi
5BD13762 push edi
5BD13763 test edx,edx
5BD13765 jle Recursive_Gaussian2D_Horizontal+120h (5BD13870h)
5BD1376B mov ecx,dword ptr [stride]
5BD1376E mov esi,1
5BD13773 vmovsd xmm5,qword ptr [B3]
5BD13778 vmovsd xmm6,qword ptr [B2]
5BD1377D vmovsd xmm7,qword ptr [B1]
5BD13782 vmovsd xmm1,qword ptr [B]
5BD13787 shl ecx,3
5BD1378A mov edi,ebx
5BD1378C sub eax,ebx
5BD1378E mov dword ptr [ebp-0Ch],ecx
5BD13791 mov dword ptr [ebp-4],eax
upper = lower + width;
i = lower;
output[i] = P3 = P2 = P1 = input[i];
for (i++; i < upper; i++)
5BD13794 mov ecx,dword ptr [width]
5BD13797 vmovsd xmm2,qword ptr [edi+eax]
5BD1379C dec ecx
5BD1379D mov eax,esi
5BD1379F add ecx,esi
5BD137A1 vmovapd xmm3,xmm2
5BD137A5 vmovapd xmm4,xmm2
5BD137A9 vmovsd qword ptr [edi],xmm2
5BD137AD cmp esi,ecx
5BD137AF jge Recursive_Gaussian2D_Horizontal+0B0h (5BD13800h)
{
lower = stride*j;
5BD137B1 mov ebx,dword ptr [ebp-4]
{
lower = stride*j;
5BD137B4 lea edx,[edi+8]
5BD137B7 sub ecx,esi
upper = lower + width;
i = lower;
output[i] = P3 = P2 = P1 = input[i];
for (i++; i < upper; i++)
5BD137B9 lea eax,[esi+ecx]
5BD137BC lea esp,[esp]
5BD137C0 lea edx,[edx+8]
{
P0 = B*input[i] + B1*P1 + B2*P2 + B3*P3;
5BD137C3 vmulsd xmm1,xmm1,mmword ptr [ebx+edx-8]
5BD137C9 vmulsd xmm0,xmm2,xmm7
5BD137CD vaddsd xmm1,xmm1,xmm0
5BD137D1 vmulsd xmm0,xmm3,xmm6
5BD137D5 vaddsd xmm1,xmm1,xmm0
5BD137D9 vmulsd xmm0,xmm4,xmm5
5BD137DD vaddsd xmm0,xmm1,xmm0
5BD137E1 vmovsd xmm1,qword ptr [B]
P3 = P2;
5BD137E6 vmovapd xmm4,xmm3
P2 = P1;
5BD137EA vmovapd xmm3,xmm2
P1 = P0;
output[i] = P0;
5BD137EE vmovsd qword ptr [edx-8],xmm0
5BD137F3 vmovapd xmm2,xmm0
5BD137F7 dec ecx
5BD137F8 jne Recursive_Gaussian2D_Horizontal+70h (5BD137C0h)
5BD137FA mov ebx,dword ptr [ebp-8]
5BD137FD mov edx,dword ptr [height]
}
i--;
P3 = P2 = P1 = output[i];
5BD13800 vmovsd xmm2,qword ptr [ebx+eax*8-8]
for (i--; i >= lower; i--)
5BD13806 sub eax,2
5BD13809 lea ecx,[esi-1]
5BD1380C vmovapd xmm3,xmm2
5BD13810 vmovapd xmm4,xmm2
5BD13814 cmp eax,ecx
5BD13816 jl Recursive_Gaussian2D_Horizontal+108h (5BD13858h)
5BD13818 jmp Recursive_Gaussian2D_Horizontal+0D0h (5BD13820h)
for (i--; i >= lower; i--)
5BD1381A lea ebx,[ebx]
{
P0 = B*output[i] + B1*P1 + B2*P2 + B3*P3;
5BD13820 vmulsd xmm1,xmm1,mmword ptr [ebx+eax*8]
5BD13825 vmulsd xmm0,xmm2,xmm7
5BD13829 vaddsd xmm1,xmm1,xmm0
5BD1382D vmulsd xmm0,xmm3,xmm6
5BD13831 vaddsd xmm1,xmm1,xmm0
5BD13835 vmulsd xmm0,xmm4,xmm5
5BD13839 vaddsd xmm0,xmm1,xmm0
5BD1383D vmovsd xmm1,qword ptr [B]
P3 = P2;
P2 = P1;
P1 = P0;
output[i] = P0;
5BD13842 vmovsd qword ptr [ebx+eax*8],xmm0
5BD13847 dec eax
5BD13848 vmovapd xmm4,xmm3
5BD1384C vmovapd xmm3,xmm2
5BD13850 vmovapd xmm2,xmm0
5BD13854 cmp eax,ecx
5BD13856 jge Recursive_Gaussian2D_Horizontal+0D0h (5BD13820h)
int i, j, lower, upper;
double P0, P1, P2, P3;
for (j = 0; j < height; j++)
5BD13858 add edi,dword ptr [ebp-0Ch]
5BD1385B add esi,dword ptr [stride]
5BD1385E dec edx
5BD1385F vmovsd xmm1,qword ptr [B]
5BD13864 mov eax,dword ptr [ebp-4]
5BD13867 mov dword ptr [height],edx
5BD1386A jne Recursive_Gaussian2D_Horizontal+44h (5BD13794h)
}
}
}
void Recursive_Gaussian2D_Horizontal(double * output, const double * input, int width, int height, int stride, const double B, const double B1, const double B2, const double B3)
{
5BD03740 push ebp
5BD03741 mov ebp,esp
5BD03743 sub esp,0Ch
5BD03746 push ebx
5BD03747 mov eax,edx
5BD03749 mov ebx,ecx
int i, j, lower, upper;
double P0, P1, P2, P3;
for (j = 0; j < height; j++)
5BD0374B mov edx,dword ptr [height]
5BD0374E mov dword ptr [ebp-8],ebx
5BD03751 push esi
5BD03752 push edi
5BD03753 test edx,edx
5BD03755 jle Recursive_Gaussian2D_Horizontal+12Eh (5BD0386Eh)
5BD0375B mov ecx,dword ptr [stride]
5BD0375E mov esi,1
5BD03763 movsd xmm5,mmword ptr [B3]
5BD03768 mov edi,ebx
5BD0376A movsd xmm6,mmword ptr [B2]
5BD0376F movsd xmm7,mmword ptr [B1]
5BD03774 movsd xmm0,mmword ptr [B]
5BD03779 shl ecx,3
5BD0377C sub eax,ebx
5BD0377E mov dword ptr [ebp-0Ch],ecx
5BD03781 mov dword ptr [ebp-4],eax
upper = lower + width;
i = lower;
output[i] = P3 = P2 = P1 = input[i];
for (i++; i < upper; i++)
5BD03784 mov ecx,dword ptr [width]
5BD03787 movsd xmm2,mmword ptr [edi+eax]
5BD0378C dec ecx
5BD0378D add ecx,esi
5BD0378F movsd mmword ptr [edi],xmm2
5BD03793 movapd xmm4,xmm2
5BD03797 movapd xmm3,xmm2
5BD0379B mov eax,esi
5BD0379D cmp esi,ecx
5BD0379F jge Recursive_Gaussian2D_Horizontal+0BAh (5BD037FAh)
{
lower = stride*j;
5BD037A1 mov ebx,dword ptr [ebp-4]
5BD037A4 lea edx,[edi+8]
5BD037A7 sub ecx,esi
upper = lower + width;
i = lower;
output[i] = P3 = P2 = P1 = input[i];
for (i++; i < upper; i++)
5BD037A9 lea eax,[esi+ecx]
5BD037AC lea esp,[esp]
{
P0 = B*input[i] + B1*P1 + B2*P2 + B3*P3;
5BD037B0 movsd xmm1,mmword ptr [ebx+edx]
5BD037B5 mulsd xmm1,xmm0
5BD037B9 movapd xmm0,xmm2
5BD037BD mulsd xmm0,xmm7
5BD037C1 mulsd xmm3,xmm5
5BD037C5 addsd xmm1,xmm0
5BD037C9 movapd xmm0,xmm4
5BD037CD mulsd xmm0,xmm6
5BD037D1 addsd xmm1,xmm0
5BD037D5 movsd xmm0,mmword ptr [B]
5BD037DA addsd xmm1,xmm3
P3 = P2;
5BD037DE movapd xmm3,xmm4
P2 = P1;
5BD037E2 movapd xmm4,xmm2
P1 = P0;
output[i] = P0;
5BD037E6 movsd mmword ptr [edx],xmm1
5BD037EA movapd xmm2,xmm1
5BD037EE add edx,8
5BD037F1 dec ecx
5BD037F2 jne Recursive_Gaussian2D_Horizontal+70h (5BD037B0h)
5BD037F4 mov ebx,dword ptr [ebp-8]
5BD037F7 mov edx,dword ptr [height]
}
i--;
P3 = P2 = P1 = output[i];
5BD037FA movsd xmm2,mmword ptr [ebx+eax*8-8]
for (i--; i >= lower; i--)
5BD03800 lea ecx,[esi-1]
5BD03803 sub eax,2
5BD03806 movapd xmm4,xmm2
5BD0380A movapd xmm3,xmm2
5BD0380E cmp eax,ecx
5BD03810 jl Recursive_Gaussian2D_Horizontal+116h (5BD03856h)
{
P0 = B*output[i] + B1*P1 + B2*P2 + B3*P3;
5BD03812 movsd xmm1,mmword ptr [ebx+eax*8]
5BD03817 mulsd xmm1,xmm0
5BD0381B movapd xmm0,xmm2
5BD0381F mulsd xmm0,xmm7
5BD03823 mulsd xmm3,xmm5
5BD03827 addsd xmm1,xmm0
5BD0382B movapd xmm0,xmm4
5BD0382F mulsd xmm0,xmm6
5BD03833 addsd xmm1,xmm0
5BD03837 movsd xmm0,mmword ptr [B]
5BD0383C addsd xmm1,xmm3
P3 = P2;
5BD03840 movapd xmm3,xmm4
P2 = P1;
5BD03844 movapd xmm4,xmm2
P1 = P0;
output[i] = P0;
5BD03848 movsd mmword ptr [ebx+eax*8],xmm1
5BD0384D movapd xmm2,xmm1
5BD03851 dec eax
5BD03852 cmp eax,ecx
5BD03854 jge Recursive_Gaussian2D_Horizontal+0D2h (5BD03812h)
int i, j, lower, upper;
double P0, P1, P2, P3;
for (j = 0; j < height; j++)
5BD03856 add edi,dword ptr [ebp-0Ch]
5BD03859 add esi,dword ptr [stride]
5BD0385C dec edx
5BD0385D movsd xmm0,mmword ptr [B]
5BD03862 mov eax,dword ptr [ebp-4]
5BD03865 mov dword ptr [height],edx
5BD03868 jne Recursive_Gaussian2D_Horizontal+44h (5BD03784h)
}
}
}
void Recursive_Gaussian2D_Vertical(double * output, const double * input, int width, int height, int stride, const double B, const double B1, const double B2, const double B3)
{
5B8F3610 push ebp
5B8F3611 mov ebp,esp
5B8F3613 sub esp,0Ch
int i, j, lower, upper;
double P0, P1, P2, P3;
int pcount = stride*height;
5B8F3616 mov eax,dword ptr [height]
5B8F3619 push ebx
5B8F361A push esi
5B8F361B push edi
5B8F361C mov edi,dword ptr [stride]
for (j = 0; j < width; j++)
5B8F361F xor esi,esi
5B8F3621 imul eax,edi
5B8F3624 mov ebx,ecx
5B8F3626 mov dword ptr [ebp-4],edx
5B8F3629 mov dword ptr [ebp-0Ch],ebx
5B8F362C mov dword ptr [height],eax
5B8F362F cmp dword ptr [width],esi
5B8F3632 jle Recursive_Gaussian2D_Vertical+134h (5B8F3744h)
5B8F3638 vmovsd xmm5,qword ptr [B3]
5B8F363D vmovsd xmm6,qword ptr [B2]
5B8F3642 vmovsd xmm7,qword ptr [B1]
5B8F3647 vmovsd xmm1,qword ptr [B]
5B8F364C lea eax,[edi*8]
5B8F3653 mov edx,ebx
5B8F3655 lea ecx,[eax+ebx]
5B8F3658 mov dword ptr [ebp-8],eax
5B8F365B mov eax,dword ptr [ebp-4]
5B8F365E sub eax,ebx
5B8F3660 mov dword ptr [stride],ecx
5B8F3663 mov dword ptr [ebp-4],eax
5B8F3666 jmp Recursive_Gaussian2D_Vertical+60h (5B8F3670h)
5B8F3668 lea esp,[esp]
5B8F366F nop
{
lower = j;
upper = pcount;
i = lower;
output[i] = P3 = P2 = P1 = input[i];
5B8F3670 vmovsd xmm2,qword ptr [edx+eax]
for (i += stride; i < upper; i += stride)
5B8F3675 lea eax,[esi+edi]
5B8F3678 vmovapd xmm3,xmm2
5B8F367C vmovapd xmm4,xmm2
5B8F3680 vmovsd qword ptr [edx],xmm2
5B8F3684 cmp eax,dword ptr [height]
5B8F3687 jge Recursive_Gaussian2D_Vertical+0C2h (5B8F36D2h)
5B8F3689 mov ebx,dword ptr [ebp-4]
5B8F368C lea esp,[esp]
{
P0 = B*input[i] + B1*P1 + B2*P2 + B3*P3;
5B8F3690 vmulsd xmm1,xmm1,mmword ptr [ebx+ecx]
5B8F3695 vmulsd xmm0,xmm2,xmm7
5B8F3699 vaddsd xmm1,xmm1,xmm0
5B8F369D vmulsd xmm0,xmm3,xmm6
5B8F36A1 vaddsd xmm1,xmm1,xmm0
5B8F36A5 vmulsd xmm0,xmm4,xmm5
5B8F36A9 vaddsd xmm0,xmm1,xmm0
5B8F36AD vmovsd xmm1,qword ptr [B]
P3 = P2;
P2 = P1;
P1 = P0;
output[i] = P0;
5B8F36B2 vmovsd qword ptr [ecx],xmm0
P3 = P2;
P2 = P1;
P1 = P0;
output[i] = P0;
5B8F36B6 add ecx,dword ptr [ebp-8]
5B8F36B9 add eax,edi
5B8F36BB vmovapd xmm4,xmm3
5B8F36BF vmovapd xmm3,xmm2
5B8F36C3 vmovapd xmm2,xmm0
5B8F36C7 cmp eax,dword ptr [height]
5B8F36CA jl Recursive_Gaussian2D_Vertical+80h (5B8F3690h)
5B8F36CC mov ebx,dword ptr [ebp-0Ch]
5B8F36CF mov ecx,dword ptr [stride]
}
i -= stride;
5B8F36D2 sub eax,edi
P3 = P2 = P1 = output[i];
5B8F36D4 vmovsd xmm2,qword ptr [ebx+eax*8]
for (i -= stride; i >= lower; i -= stride)
5B8F36D9 sub eax,edi
5B8F36DB vmovapd xmm3,xmm2
5B8F36DF vmovapd xmm4,xmm2
5B8F36E3 cmp eax,esi
5B8F36E5 jl Recursive_Gaussian2D_Vertical+119h (5B8F3729h)
5B8F36E7 jmp Recursive_Gaussian2D_Vertical+0E0h (5B8F36F0h)
5B8F36E9 lea esp,[esp]
{
P0 = B*output[i] + B1*P1 + B2*P2 + B3*P3;
5B8F36F0 vmulsd xmm1,xmm1,mmword ptr [ebx+eax*8]
5B8F36F5 vmulsd xmm0,xmm2,xmm7
5B8F36F9 vaddsd xmm1,xmm1,xmm0
5B8F36FD vmulsd xmm0,xmm3,xmm6
5B8F3701 vaddsd xmm1,xmm1,xmm0
5B8F3705 vmulsd xmm0,xmm4,xmm5
5B8F3709 vaddsd xmm0,xmm1,xmm0
5B8F370D vmovsd xmm1,qword ptr [B]
P3 = P2;
P2 = P1;
P1 = P0;
output[i] = P0;
5B8F3712 vmovsd qword ptr [ebx+eax*8],xmm0
5B8F3717 sub eax,edi
5B8F3719 vmovapd xmm4,xmm3
5B8F371D vmovapd xmm3,xmm2
5B8F3721 vmovapd xmm2,xmm0
5B8F3725 cmp eax,esi
5B8F3727 jge Recursive_Gaussian2D_Vertical+0E0h (5B8F36F0h)
for (j = 0; j < width; j++)
5B8F3729 vmovsd xmm1,qword ptr [B]
5B8F372E mov eax,dword ptr [ebp-4]
5B8F3731 inc esi
5B8F3732 add ecx,8
5B8F3735 add edx,8
5B8F3738 mov dword ptr [stride],ecx
5B8F373B cmp esi,dword ptr [width]
5B8F373E jl Recursive_Gaussian2D_Vertical+60h (5B8F3670h)
}
}
}
void Recursive_Gaussian2D_Vertical(double * output, const double * input, int width, int height, int stride, const double B, const double B1, const double B2, const double B3)
{
5BD035F3 sub esp,0Ch
int i, j, lower, upper;
double P0, P1, P2, P3;
int pcount = stride*height;
5BD035F6 mov eax,dword ptr [height]
5BD035F9 push ebx
5BD035FA push esi
5BD035FB push edi
5BD035FC mov edi,dword ptr [stride]
for (j = 0; j < width; j++)
5BD035FF xor esi,esi
5BD03601 imul eax,edi
5BD03604 mov dword ptr [ebp-4],edx
5BD03607 mov edx,ecx
5BD03609 mov dword ptr [ebp-0Ch],edx
5BD0360C mov dword ptr [height],eax
5BD0360F cmp dword ptr [width],esi
5BD03612 jle Recursive_Gaussian2D_Vertical+143h (5BD03733h)
5BD03618 movsd xmm5,mmword ptr [B3]
5BD0361D lea eax,[edi*8]
5BD03624 movsd xmm6,mmword ptr [B2]
5BD03629 lea ecx,[eax+edx]
5BD0362C movsd xmm7,mmword ptr [B1]
5BD03631 mov ebx,edx
5BD03633 movsd xmm0,mmword ptr [B]
5BD03638 mov dword ptr [ebp-8],eax
5BD0363B mov eax,dword ptr [ebp-4]
5BD0363E sub eax,edx
5BD03640 mov dword ptr [stride],ecx
5BD03643 mov dword ptr [ebp-4],eax
5BD03646 jmp Recursive_Gaussian2D_Vertical+60h (5BD03650h)
5BD03648 lea esp,[esp]
5BD0364F nop
{
lower = j;
upper = pcount;
i = lower;
output[i] = P3 = P2 = P1 = input[i];
5BD03650 movsd xmm2,mmword ptr [ebx+eax]
for (i += stride; i < upper; i += stride)
5BD03655 lea eax,[esi+edi]
for (i += stride; i < upper; i += stride)
5BD03658 movapd xmm4,xmm2
5BD0365C movapd xmm3,xmm2
5BD03660 movsd mmword ptr [ebx],xmm2
5BD03664 cmp eax,dword ptr [height]
5BD03667 jge Recursive_Gaussian2D_Vertical+0CEh (5BD036BEh)
5BD03669 mov edx,dword ptr [ebp-4]
5BD0366C lea esp,[esp]
{
P0 = B*input[i] + B1*P1 + B2*P2 + B3*P3;
5BD03670 movsd xmm1,mmword ptr [edx+ecx]
5BD03675 add eax,edi
5BD03677 mulsd xmm1,xmm0
5BD0367B movapd xmm0,xmm2
5BD0367F mulsd xmm0,xmm7
5BD03683 mulsd xmm3,xmm5
5BD03687 addsd xmm1,xmm0
5BD0368B movapd xmm0,xmm4
5BD0368F mulsd xmm0,xmm6
5BD03693 addsd xmm1,xmm0
5BD03697 movsd xmm0,mmword ptr [B]
5BD0369C addsd xmm1,xmm3
P3 = P2;
5BD036A0 movapd xmm3,xmm4
P2 = P1;
5BD036A4 movapd xmm4,xmm2
P1 = P0;
output[i] = P0;
5BD036A8 movsd mmword ptr [ecx],xmm1
5BD036AC movapd xmm2,xmm1
5BD036B0 add ecx,dword ptr [ebp-8]
5BD036B3 cmp eax,dword ptr [height]
5BD036B6 jl Recursive_Gaussian2D_Vertical+80h (5BD03670h)
5BD036B8 mov edx,dword ptr [ebp-0Ch]
5BD036BB mov ecx,dword ptr [stride]
}
i -= stride;
5BD036BE sub eax,edi
P3 = P2 = P1 = output[i];
5BD036C0 movsd xmm2,mmword ptr [edx+eax*8]
for (i -= stride; i >= lower; i -= stride)
5BD036C5 sub eax,edi
5BD036C7 movapd xmm4,xmm2
5BD036CB movapd xmm3,xmm2
5BD036CF cmp eax,esi
5BD036D1 jl Recursive_Gaussian2D_Vertical+128h (5BD03718h)
{
P0 = B*output[i] + B1*P1 + B2*P2 + B3*P3;
5BD036D3 movsd xmm1,mmword ptr [edx+eax*8]
5BD036D8 mulsd xmm1,xmm0
5BD036DC movapd xmm0,xmm2
5BD036E0 mulsd xmm0,xmm7
5BD036E4 mulsd xmm3,xmm5
5BD036E8 addsd xmm1,xmm0
5BD036EC movapd xmm0,xmm4
5BD036F0 mulsd xmm0,xmm6
5BD036F4 addsd xmm1,xmm0
5BD036F8 movsd xmm0,mmword ptr [B]
5BD036FD addsd xmm1,xmm3
P3 = P2;
5BD03701 movapd xmm3,xmm4
P2 = P1;
5BD03705 movapd xmm4,xmm2
P1 = P0;
output[i] = P0;
5BD03709 movsd mmword ptr [edx+eax*8],xmm1
5BD0370E movapd xmm2,xmm1
5BD03712 sub eax,edi
5BD03714 cmp eax,esi
5BD03716 jge Recursive_Gaussian2D_Vertical+0E3h (5BD036D3h)
for (j = 0; j < width; j++)
5BD03718 movsd xmm0,mmword ptr [B]
5BD0371D inc esi
5BD0371E mov eax,dword ptr [ebp-4]
5BD03721 add ecx,8
5BD03724 add ebx,8
5BD03727 mov dword ptr [stride],ecx
for (j = 0; j < width; j++)
5BD0372A cmp esi,dword ptr [width]
5BD0372D jl Recursive_Gaussian2D_Vertical+60h (5BD03650h)
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment