Created
February 12, 2015 15:38
-
-
Save ArchRobison/0acab7f723b1357fa2fc to your computer and use it in GitHub Desktop.
Slight variation of Miles Lubin's https://gist.github.com/mlubin/4994c65c7a2fa90a3c7e
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <stdio.h> | |
| #include <math.h> | |
| #include <sys/time.h> | |
| // gcc -O2 -std=c99 -march=native -mno-fma -o newton.gcc newton.c | |
| // clang -O2 -march=native -o newton.clang newton.c | |
| // copied from julia | |
| double clock_now() | |
| { | |
| struct timeval now; | |
| gettimeofday(&now, NULL); | |
| return (double)now.tv_sec + (double)now.tv_usec/1.0e6; | |
| } | |
| double squareroot(double x) | |
| { | |
| double it = x; | |
| while (fabs(it*it - x) > 1e-13) { | |
| it = it - (it*it-x)/(2*it); | |
| } | |
| return it; | |
| } | |
| int main() | |
| { | |
| const int num_iter = 10000000; | |
| double t = clock_now(); | |
| volatile double sum_real = 0; | |
| for (int i = 0; i < num_iter; i++) { | |
| sum_real += squareroot(10000.0); | |
| } | |
| double t_double = clock_now() - t; | |
| printf("%f nsec\n", t_double/num_iter*1E9); | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| function squareroot(x) | |
| it = x | |
| while abs(it*it - x) > 1e-13 | |
| it = it - (it*it-x)/(2it) | |
| end | |
| return it | |
| end | |
| function time_sqrt(x) | |
| const num_iter = 10000000 | |
| q = zero(x) | |
| t = time() | |
| for i in 1:num_iter | |
| q += squareroot(x) | |
| end | |
| t = time() - t | |
| @printf("%f nsec\n",t/num_iter*1e9) | |
| return q | |
| end | |
| q=time_sqrt(10000.0) | |
| @printf("q=%.18f\n",q) | |
| @code_native squareroot(10000.0) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| $ clang -v | |
| clang version 3.7.0 (trunk 227196) | |
| Target: x86_64-unknown-linux-gnu | |
| Thread model: posix | |
| Found candidate GCC installation: /usr/lib/gcc/i686-redhat-linux/4.8.2 | |
| Found candidate GCC installation: /usr/lib/gcc/x86_64-redhat-linux/4.8.2 | |
| Selected GCC installation: /usr/lib/gcc/x86_64-redhat-linux/4.8.2 | |
| Candidate multilib: .;@m64 | |
| Candidate multilib: 32;@m32 | |
| Selected multilib: .;@m64 | |
| $ clang -O2 -march=native -o newton.clang newton.c | |
| $ newton.clang | |
| 46.274900 nsec | |
| $ gcc -v | |
| Using built-in specs. | |
| COLLECT_GCC=gcc | |
| COLLECT_LTO_WRAPPER=/usr/libexec/gcc/x86_64-redhat-linux/4.8.2/lto-wrapper | |
| Target: x86_64-redhat-linux | |
| Configured with: ../configure --prefix=/usr --mandir=/usr/share/man --infodir=/usr/share/info --with-bugurl=http://bugzilla.redhat.com/bugzilla --enable-bootstrap --enable-shared --enable-threads=posix --enable-checking=release --with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions --enable-gnu-unique-object --enable-linker-build-id --with-linker-hash-style=gnu --enable-languages=c,c++,objc,obj-c++,java,fortran,ada,go,lto --enable-plugin --enable-initfini-array --disable-libgcj --with-isl=/builddir/build/BUILD/gcc-4.8.2-20140120/obj-x86_64-redhat-linux/isl-install --with-cloog=/builddir/build/BUILD/gcc-4.8.2-20140120/obj-x86_64-redhat-linux/cloog-install --enable-gnu-indirect-function --with-tune=generic --with-arch_32=x86-64 --build=x86_64-redhat-linux | |
| Thread model: posix | |
| gcc version 4.8.2 20140120 (Red Hat 4.8.2-16) (GCC) | |
| $ gcc -O2 -std=c99 -march=native -mno-fma -o newton.gcc newton.c | |
| $ newton | |
| newton.clang newton.gcc newton.gcc.nofma newton.icc newton.icc.nofma newton.icc.strict | |
| $ newton.gcc | |
| 45.589709 nsec | |
| $ pwd | |
| /localdisk/adrobiso/julia-0.3.5/julia | |
| $ julia newton.jl | |
| 70.634222 nsec | |
| q=1000000000.000000000000000000 | |
| .text | |
| Filename: /localdisk/adrobiso/julia-0.3.5/julia/newton.jl | |
| Source line: 3 | |
| push RBP | |
| mov RBP, RSP | |
| Source line: 3 | |
| vmulsd XMM1, XMM0, XMM0 | |
| vsubsd XMM1, XMM1, XMM0 | |
| vmovq RCX, XMM1 | |
| movabs RAX, 9223372036854775807 | |
| and RCX, RAX | |
| vmovq XMM1, RCX | |
| movabs RCX, 139772758918544 | |
| vucomisd XMM1, QWORD PTR [RCX] | |
| ja L64 | |
| vmovaps XMM1, XMM0 | |
| jmpq L133 | |
| L64: movabs RDX, 139772758918552 | |
| vmovsd XMM2, QWORD PTR [RDX] | |
| vmovaps XMM1, XMM0 | |
| Source line: 4 | |
| L82: vmulsd XMM3, XMM1, XMM2 | |
| vmulsd XMM4, XMM1, XMM1 | |
| vsubsd XMM4, XMM4, XMM0 | |
| vdivsd XMM3, XMM4, XMM3 | |
| vaddsd XMM1, XMM1, XMM3 | |
| vmulsd XMM3, XMM1, XMM1 | |
| vsubsd XMM3, XMM3, XMM0 | |
| vmovq RDX, XMM3 | |
| and RDX, RAX | |
| vmovq XMM3, RDX | |
| vucomisd XMM3, QWORD PTR [RCX] | |
| ja L82 | |
| Source line: 6 | |
| L133: vmovaps XMM0, XMM1 | |
| pop RBP | |
| ret | |
| $ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment