-
-
Save walkoncross/55c5f6c8642726bfcf6cc63670a84c6a to your computer and use it in GitHub Desktop.
Bit Count test
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <cstdio> | |
#include <cstring> | |
#include <time.h> | |
#include <cassert> | |
char table[256]; | |
int arr[100 << 20]; | |
inline int bits1(int x) { | |
x = (x & 0x55555555) + ((x >> 1 ) & 0x55555555); | |
x = (x & 0x33333333) + ((x >> 2 ) & 0x33333333); | |
x = (x & 0x0f0f0f0f) + ((x >> 4 ) & 0x0f0f0f0f); | |
x = (x & 0x00ff00ff) + ((x >> 8 ) & 0x00ff00ff); | |
x = (x & 0x0000ffff) + ((x >> 16) & 0x0000ffff); | |
return x; | |
} | |
inline int bits2(int x) { | |
int ans = 0; | |
ans += table[x & 0xff] + table[(x & 0xff00) >> 8]; | |
x >>= 16; | |
ans += table[x & 0xff] + table[(x & 0xff00) >> 8]; | |
return ans; | |
} | |
int main() | |
{ | |
for (int i = 0; i < 256; i++) { | |
table[i] = bits1(i); | |
} | |
srand (11); | |
for (int i = 0; i < sizeof(arr) / sizeof(int); i++) { | |
arr[i] = (0x341 + i) << (i % 13); | |
assert(bits1(arr[i]) == bits2(arr[i])); | |
} | |
printf("start to run.\n"); | |
int nonsense = 0; | |
const int N = sizeof(arr) / sizeof(int); | |
for (int epoch = 0; epoch < 10; epoch ++) { | |
clock_t start = clock(); | |
for (int i = 0; i < N; i++) | |
nonsense += bits1(arr[i]); | |
clock_t t1 = clock(); | |
for (int i = 0; i < N; i++) | |
nonsense += bits2(arr[i]); | |
clock_t t2 = clock(); | |
for (int i = 0; i < N; i++) | |
nonsense += __builtin_popcount(arr[i]); | |
clock_t t3 = clock(); | |
printf("bits1: %5lu, bits2: %5lu, builtin: %5lu\n", t1 - start, t2 - t1, t3 - t2); | |
} | |
printf("%d\n", nonsense); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.file "A.cpp" | |
.section .rdata,"dr" | |
.align 8 | |
.LC0: | |
.ascii "bits1: %5lu, bits2: %5lu, builtin: %5lu\12\0" | |
.text | |
.p2align 4,,15 | |
.def _Z6printfPKcz.constprop.0; .scl 3; .type 32; .endef | |
.seh_proc _Z6printfPKcz.constprop.0 | |
_Z6printfPKcz.constprop.0: | |
.LFB91: | |
subq $56, %rsp | |
.seh_stackalloc 56 | |
.seh_endprologue | |
leaq .LC0(%rip), %rcx | |
movq %rdx, 72(%rsp) | |
leaq 72(%rsp), %rdx | |
movq %r8, 80(%rsp) | |
movq %r9, 88(%rsp) | |
movq %rdx, 40(%rsp) | |
call __mingw_vprintf | |
addq $56, %rsp | |
ret | |
.seh_endproc | |
.section .text$_Z6printfPKcz,"x" | |
.linkonce discard | |
.p2align 4,,15 | |
.globl _Z6printfPKcz | |
.def _Z6printfPKcz; .scl 2; .type 32; .endef | |
.seh_proc _Z6printfPKcz | |
_Z6printfPKcz: | |
.LFB8: | |
subq $56, %rsp | |
.seh_stackalloc 56 | |
.seh_endprologue | |
movq %rdx, 72(%rsp) | |
leaq 72(%rsp), %rdx | |
movq %r8, 80(%rsp) | |
movq %r9, 88(%rsp) | |
movq %rdx, 40(%rsp) | |
call __mingw_vprintf | |
addq $56, %rsp | |
ret | |
.seh_endproc | |
.def __main; .scl 2; .type 32; .endef | |
.section .rdata,"dr" | |
.col0: | |
.ascii "A.cpp\0" | |
.align 8 | |
.LC11: | |
.ascii "bits1(arr[i]) == bits2(arr[i])\0" | |
.LC12: | |
.ascii "start to run.\12\0" | |
.LC15: | |
.ascii "%d\12\0" | |
.section .text.startup,"x" | |
.p2align 4,,15 | |
.globl main | |
.def main; .scl 2; .type 32; .endef | |
.seh_proc main | |
main: | |
.LFB90: | |
pushq %r15 | |
.seh_pushreg %r15 | |
pushq %r14 | |
.seh_pushreg %r14 | |
pushq %r13 | |
.seh_pushreg %r13 | |
pushq %r12 | |
.seh_pushreg %r12 | |
pushq %rbp | |
.seh_pushreg %rbp | |
pushq %rdi | |
.seh_pushreg %rdi | |
pushq %rsi | |
.seh_pushreg %rsi | |
pushq %rbx | |
.seh_pushreg %rbx | |
subq $200, %rsp | |
.seh_stackalloc 200 | |
movaps %xmm6, 48(%rsp) | |
.seh_savexmm %xmm6, 48 | |
movaps %xmm7, 64(%rsp) | |
.seh_savexmm %xmm7, 64 | |
movaps %xmm8, 80(%rsp) | |
.seh_savexmm %xmm8, 80 | |
movaps %xmm9, 96(%rsp) | |
.seh_savexmm %xmm9, 96 | |
movaps %xmm10, 112(%rsp) | |
.seh_savexmm %xmm10, 112 | |
movaps %xmm11, 128(%rsp) | |
.seh_savexmm %xmm11, 128 | |
movaps %xmm12, 144(%rsp) | |
.seh_savexmm %xmm12, 144 | |
movaps %xmm13, 160(%rsp) | |
.seh_savexmm %xmm13, 160 | |
movaps %xmm14, 176(%rsp) | |
.seh_savexmm %xmm14, 176 | |
.seh_endprologue | |
leaq table(%rip), %r12 | |
call __main | |
leaq 256+table(%rip), %rdx | |
movq %r12, %rax | |
movdqa .LC1(%rip), %xmm2 | |
movdqa .LC2(%rip), %xmm10 | |
movdqa .LC3(%rip), %xmm9 | |
movdqa .LC4(%rip), %xmm5 | |
movdqa .LC5(%rip), %xmm4 | |
movdqa .LC6(%rip), %xmm6 | |
movdqa .LC7(%rip), %xmm7 | |
movdqa .LC8(%rip), %xmm8 | |
movdqa .LC9(%rip), %xmm3 | |
.L4: | |
movdqa %xmm2, %xmm12 | |
movdqa %xmm2, %xmm1 | |
movdqa %xmm2, %xmm0 | |
psrad $1, %xmm12 | |
paddd %xmm9, %xmm1 | |
paddd %xmm5, %xmm0 | |
pand %xmm6, %xmm12 | |
movdqa %xmm12, %xmm11 | |
movdqa %xmm2, %xmm12 | |
movdqa %xmm2, %xmm13 | |
pand %xmm6, %xmm12 | |
paddd %xmm11, %xmm12 | |
movdqa %xmm1, %xmm11 | |
pand %xmm6, %xmm1 | |
psrad $1, %xmm11 | |
paddd %xmm4, %xmm13 | |
movdqa %xmm12, %xmm14 | |
pand %xmm7, %xmm12 | |
pand %xmm6, %xmm11 | |
paddd %xmm1, %xmm11 | |
movdqa %xmm0, %xmm1 | |
pand %xmm6, %xmm0 | |
psrad $1, %xmm1 | |
addq $16, %rax | |
paddd %xmm10, %xmm2 | |
pand %xmm6, %xmm1 | |
paddd %xmm0, %xmm1 | |
movdqa %xmm13, %xmm0 | |
pand %xmm6, %xmm13 | |
psrad $1, %xmm0 | |
pand %xmm6, %xmm0 | |
paddd %xmm13, %xmm0 | |
movdqa %xmm11, %xmm13 | |
pand %xmm7, %xmm11 | |
psrad $2, %xmm13 | |
psrad $2, %xmm14 | |
pand %xmm7, %xmm13 | |
paddd %xmm11, %xmm13 | |
movdqa %xmm1, %xmm11 | |
pand %xmm7, %xmm14 | |
psrad $2, %xmm11 | |
paddd %xmm12, %xmm14 | |
movdqa %xmm0, %xmm12 | |
pand %xmm7, %xmm11 | |
pand %xmm7, %xmm1 | |
psrad $2, %xmm12 | |
pand %xmm7, %xmm0 | |
paddd %xmm11, %xmm1 | |
movdqa %xmm14, %xmm11 | |
pand %xmm7, %xmm12 | |
paddd %xmm0, %xmm12 | |
psrad $4, %xmm11 | |
movdqa %xmm13, %xmm0 | |
pand %xmm8, %xmm14 | |
pand %xmm8, %xmm13 | |
paddd %xmm14, %xmm11 | |
psrad $4, %xmm0 | |
paddd %xmm13, %xmm0 | |
movdqa %xmm11, %xmm13 | |
punpckhwd %xmm0, %xmm13 | |
punpcklwd %xmm0, %xmm11 | |
movdqa %xmm11, %xmm0 | |
punpcklwd %xmm13, %xmm11 | |
punpckhwd %xmm13, %xmm0 | |
punpcklwd %xmm0, %xmm11 | |
movdqa %xmm1, %xmm0 | |
pand %xmm8, %xmm1 | |
psrad $4, %xmm0 | |
paddd %xmm1, %xmm0 | |
movdqa %xmm12, %xmm1 | |
pand %xmm8, %xmm12 | |
psrad $4, %xmm1 | |
paddd %xmm12, %xmm1 | |
movdqa %xmm0, %xmm12 | |
punpckhwd %xmm1, %xmm12 | |
punpcklwd %xmm1, %xmm0 | |
movdqa %xmm0, %xmm1 | |
punpcklwd %xmm12, %xmm0 | |
punpckhwd %xmm12, %xmm1 | |
punpcklwd %xmm1, %xmm0 | |
movdqa %xmm11, %xmm1 | |
pand %xmm3, %xmm0 | |
pand %xmm3, %xmm1 | |
movdqa %xmm1, %xmm14 | |
packuswb %xmm0, %xmm14 | |
movaps %xmm14, -16(%rax) | |
cmpq %rax, %rdx | |
jne .L4 | |
movl $11, %ecx | |
movl $833, %ebp | |
xorl %edi, %edi | |
call srand | |
leaq arr(%rip), %rbx | |
movl $1321528399, %esi | |
.p2align 4,,10 | |
.L6: | |
movl %edi, %eax | |
movl %edi, %ecx | |
movl %ebp, %r8d | |
imull %esi | |
movl %edi, %eax | |
sarl $31, %eax | |
sarl $2, %edx | |
subl %eax, %edx | |
leal (%rdx,%rdx,2), %eax | |
leal (%rdx,%rax,4), %eax | |
subl %eax, %ecx | |
sall %cl, %r8d | |
movl %r8d, %eax | |
movl %r8d, %edx | |
movl %r8d, (%rbx,%rdi,4) | |
sarl %eax | |
andl $1431655765, %edx | |
andl $1431655765, %eax | |
addl %edx, %eax | |
movl %eax, %ecx | |
sarl $2, %eax | |
andl $858993459, %eax | |
andl $858993459, %ecx | |
addl %eax, %ecx | |
movl %ecx, %edx | |
sarl $4, %ecx | |
andl $252645135, %ecx | |
andl $252645135, %edx | |
addl %ecx, %edx | |
movl %r8d, %ecx | |
movl %edx, %eax | |
sarl $8, %edx | |
andl $16711935, %edx | |
andl $16711935, %eax | |
sarl $16, %ecx | |
addl %edx, %eax | |
movq %r8, %rdx | |
movzbl %r8b, %r8d | |
movzbl %dh, %edx | |
movzwl %ax, %r9d | |
shrl $16, %eax | |
movsbl (%r12,%rdx), %r10d | |
addl %r9d, %eax | |
movsbl (%r12,%r8), %edx | |
leal (%r10,%rdx), %r8d | |
movzbl %ch, %edx | |
movzbl %cl, %ecx | |
movsbl (%r12,%rdx), %edx | |
movsbl (%r12,%rcx), %ecx | |
addl %ecx, %edx | |
addl %r8d, %edx | |
cmpl %eax, %edx | |
je .L5 | |
leaq .LC10(%rip), %rdx | |
movl $32, %r8d | |
leaq .LC11(%rip), %rcx | |
call _assert | |
.L5: | |
addq $1, %rdi | |
addl $1, %ebp | |
cmpq $104857600, %rdi | |
jne .L6 | |
leaq .LC12(%rip), %rcx | |
movl $10, %esi | |
xorl %r13d, %r13d | |
call _Z6printfPKcz | |
movdqa .LC13(%rip), %xmm9 | |
movdqa .LC14(%rip), %xmm10 | |
.p2align 4,,10 | |
.L10: | |
call clock | |
leaq arr(%rip), %r14 | |
pxor %xmm4, %xmm4 | |
leaq 419430400(%rbx), %r15 | |
movl %eax, %ebp | |
movq %r14, %rbx | |
movq %r14, %rax | |
.p2align 4,,10 | |
.L7: | |
movdqa (%rax), %xmm0 | |
addq $16, %rax | |
cmpq %rax, %r15 | |
movdqa %xmm0, %xmm3 | |
pand %xmm6, %xmm0 | |
psrad $1, %xmm3 | |
pand %xmm6, %xmm3 | |
paddd %xmm0, %xmm3 | |
movdqa %xmm3, %xmm2 | |
pand %xmm7, %xmm3 | |
psrad $2, %xmm2 | |
pand %xmm7, %xmm2 | |
paddd %xmm3, %xmm2 | |
movdqa %xmm2, %xmm1 | |
pand %xmm8, %xmm2 | |
psrad $4, %xmm1 | |
pand %xmm8, %xmm1 | |
paddd %xmm2, %xmm1 | |
movdqa %xmm1, %xmm0 | |
pand %xmm9, %xmm1 | |
psrad $8, %xmm0 | |
pand %xmm9, %xmm0 | |
paddd %xmm1, %xmm0 | |
movdqa %xmm0, %xmm1 | |
pand %xmm10, %xmm0 | |
psrld $16, %xmm1 | |
paddd %xmm1, %xmm0 | |
paddd %xmm0, %xmm4 | |
jne .L7 | |
movdqa %xmm4, %xmm0 | |
psrldq $8, %xmm0 | |
paddd %xmm0, %xmm4 | |
movdqa %xmm4, %xmm0 | |
psrldq $4, %xmm0 | |
paddd %xmm0, %xmm4 | |
movd %xmm4, %eax | |
addl %eax, %r13d | |
call clock | |
leaq arr(%rip), %r8 | |
movl %eax, %edi | |
.p2align 4,,10 | |
.L8: | |
movl (%r8), %edx | |
addq $4, %r8 | |
movzbl %dh, %eax | |
movl %edx, %ecx | |
movzbl %dl, %edx | |
movsbl (%r12,%rax), %r9d | |
sarl $16, %ecx | |
movsbl (%r12,%rdx), %eax | |
addl %eax, %r9d | |
movzbl %ch, %eax | |
movzbl %cl, %ecx | |
movsbl (%r12,%rax), %eax | |
movsbl (%r12,%rcx), %edx | |
addl %edx, %eax | |
addl %r9d, %eax | |
addl %eax, %r13d | |
cmpq %r8, %r15 | |
jne .L8 | |
call clock | |
movl %eax, %r8d | |
.p2align 4,,10 | |
.L9: | |
popcntl (%r14), %eax | |
addq $4, %r14 | |
addl %eax, %r13d | |
cmpq %r14, %r15 | |
jne .L9 | |
movl %r8d, 44(%rsp) | |
call clock | |
movl 44(%rsp), %r8d | |
movl %edi, %edx | |
leaq .LC0(%rip), %rcx | |
subl %ebp, %edx | |
subl %r8d, %eax | |
subl %edi, %r8d | |
movl %eax, %r9d | |
call _Z6printfPKcz.constprop.0 | |
subl $1, %esi | |
jne .L10 | |
leaq aarr(%rip), %rax | |
pxor %xmm0, %xmm0 | |
leaq 419430400(%rax), %rdx | |
.p2align 4,,10 | |
.L11: | |
paddd (%rax), %xmm0 | |
addq $16, %rax | |
cmpq %rax, %rdx | |
jne .L11 | |
leaq .LC15(%rip), %rcx | |
movdqa %xmm0, %xmm1 | |
psrldq $8, %xmm1 | |
paddd %xmm1, %xmm0 | |
movdqa %xmm0, %xmm1 | |
psrldq $4, %xmm1 | |
paddd %xmm1, %xmm0 | |
movd %xmm0, %edx | |
addl %r13d, %edx | |
call _Z6printfPKcz | |
nop | |
movaps 48(%rsp), %xmm6 | |
xorl %eax, %eax | |
movaps 64(%rsp), %xmm7 | |
movaps 80(%rsp), %xmm8 | |
movaps 96(%rsp), %xmm9 | |
movaps 112(%rsp), %xmm10 | |
movaps 128(%rsp), %xmm11 | |
movaps 144(%rsp), %xmm12 | |
movaps 160(%rsp), %xmm13 | |
movaps 176(%rsp), %xmm14 | |
addq $200, %rsp | |
popq %rbx | |
popq %rsi | |
popq %rdi | |
popq %rbp | |
popq %r12 | |
popq %r13 | |
popq %r14 | |
popq %r15 | |
ret | |
.seh_endproc | |
.globl aarr | |
.bss | |
.align 32 | |
aarr: | |
.space 419430400 | |
.globl arr | |
.align 32 | |
arr: | |
.space 419430400 | |
.globl table | |
.align 32 | |
table: | |
.space 256 | |
.section .rdata,"dr" | |
.align 16 | |
.LC1: | |
.long 0 | |
.long 1 | |
.long 2 | |
.long 3 | |
.align 16 | |
.LC2: | |
.long 16 | |
.long 16 | |
.long 16 | |
.long 16 | |
.align 16 | |
.LC3: | |
.long 4 | |
.long 4 | |
.long 4 | |
.long 4 | |
.align 16 | |
.LC4: | |
.long 8 | |
.long 8 | |
.long 8 | |
.long 8 | |
.align 16 | |
.LC5: | |
.long 12 | |
.long 12 | |
.long 12 | |
.long 12 | |
.align 16 | |
.LC6: | |
.long 1431655765 | |
.long 1431655765 | |
.long 1431655765 | |
.long 1431655765 | |
.align 16 | |
.LC7: | |
.long 858993459 | |
.long 858993459 | |
.long 858993459 | |
.long 858993459 | |
.align 16 | |
.LC8: | |
.long 252645135 | |
.long 252645135 | |
.long 252645135 | |
.long 252645135 | |
.align 16 | |
.LC9: | |
.word 255 | |
.word 255 | |
.word 255 | |
.word 255 | |
.word 255 | |
.word 255 | |
.word 255 | |
.word 255 | |
.align 16 | |
.LC13: | |
.long 16711935 | |
.long 16711935 | |
.long 16711935 | |
.long 16711935 | |
.align 16 | |
.LC14: | |
.long 65535 | |
.long 65535 | |
.long 65535 | |
.long 65535 | |
.ident "GCC: (x86_64-posix-seh-rev0, Built by MinGW-W64 project) 5.3.0" | |
.def __mingw_vprintf; .scl 2; .type 32; .endef | |
.def srand; .scl 2; .type 32; .endef | |
.def _assert; .scl 2; .type 32; .endef | |
.def clock; .scl 2; .type 32; .endef #asd |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment