samson-wang · February 19, 2019 09:56
diff --git a/result.log b/result.log
 Minimum float positive value: 1.175494E-38
 A: 0.403069, HEX: 0x3ece5f18
 A: 4.0306925774e-01, HEX: 0x3ece5f18
 A: 3.7674255964e-06, HEX: 0x367cd3e1
 A: 3.8132049561e+01, HEX: 0x42188738

 real	0m0.057s
 user	0m0.056s
 sys	0m0.000s
 Minimum float positive value: 1.175494E-38
 A: 0.403069, HEX: 0x3ece5f18
 A: 4.0306921002e-39, HEX: 0x2be3ee
 A: 3.7835058537e-44, HEX: 0x1b
 A: 4.0263963917e-37, HEX: 0x30902dc

 real	0m0.622s
 user	0m0.620s
 sys	0m0.000s
 Minimum float positive value: 1.175494E-38
 A: 0.403069, HEX: 0x3ece5f18
 A: 4.0306925774e-01, HEX: 0x3ece5f18
 A: 3.7674255964e-06, HEX: 0x367cd3e1
 A: 3.8132049561e+01, HEX: 0x42188738

 real	0m0.025s
 user	0m0.024s
 sys	0m0.000s
 Minimum float positive value: 1.175494E-38
 A: 0.403069, HEX: 0x3ece5f18
 A: 4.0306921002e-39, HEX: 0x2be3ee
 A: 3.7835058537e-44, HEX: 0x1b
 A: 4.0263963917e-37, HEX: 0x30902dc
diff --git a/result_icc.log b/result_icc.log
 Minimum float positive value: 1.175494E-38
 A: 0.403069, HEX: 0x3ece5f18
 A: 4.0306925774e-01, HEX: 0x3ece5f18
 A: 3.7674255964e-06, HEX: 0x367cd3e1
 A: 3.8132049561e+01, HEX: 0x42188738

 real	0m0.015s
 user	0m0.012s
 sys	0m0.000s
 Minimum float positive value: 1.175494E-38
 A: 0.403069, HEX: 0x3ece5f18
 A: 4.0306921002e-39, HEX: 0x2be3ee
 A: 3.7835058537e-44, HEX: 0x1b
 A: 4.0263963917e-37, HEX: 0x30902dc

 real	0m0.540s
 user	0m0.540s
 sys	0m0.000s
 Minimum float positive value: 1.175494E-38
 A: 0.403069, HEX: 0x3ece5f18
 A: 4.0306925774e-01, HEX: 0x3ece5f18
 A: 3.7674255964e-06, HEX: 0x367cd3e1
 A: 3.8132049561e+01, HEX: 0x42188738

 real	0m0.018s
 user	0m0.016s
 sys	0m0.000s
 Minimum float positive value: 1.175494E-38
 A: 0.403069, HEX: 0x3ece5f18
 A: 0.0000000000e+00, HEX: 0x0
 A: 0.0000000000e+00, HEX: 0x0
 A: 0.0000000000e+00, HEX: 0x0

 real	0m0.016s
 user	0m0.012s
 sys	0m0.004s
diff --git a/run.sh b/run.sh
 #!/bin/bash
 rm ./a.out
 gcc  test_float_mul.c
 time ./a.out 10000000
 time ./a.out 10000000 small

 gcc test_float_mul.c -S -o test_float_mul_O0.s

 rm ./a.out
 gcc  -O1 test_float_mul.c
 time ./a.out 10000000
 time ./a.out 10000000 small

 gcc test_float_mul.c -S -o test_float_mul_O1.s
diff --git a/run_icc.sh b/run_icc.sh
 #!/bin/bash
 rm ./a_icc.out
 icc  -o a_icc.out -O0 test_float_mul.c
 time ./a.out 10000000
 time ./a_icc.out 10000000 small

 icc test_float_mul.c -S -O0 -o icc_test_float_mul_O0.s

 rm ./a_icc.out
 icc  -o a_icc.out -O1 test_float_mul.c
 time ./a.out 10000000
 time ./a_icc.out 10000000 small

 icc test_float_mul.c -S -O1 -o icc_test_float_mul_O1.s
diff --git a/test_float_mul.c b/test_float_mul.c
 #include <stdio.h>
 #include <stdint.h>
 #include <inttypes.h>
 #include <math.h>
 #include <stdlib.h>
 //#include <xmmintrin.h>
 //#define _MM_DENORMALS_ZERO_MASK   0x0040
 //#define _MM_DENORMALS_ZERO_ON     0x0040
 //#define _MM_DENORMALS_ZERO_OFF    0x0000
 //
 //#define _MM_SET_DENORMALS_ZERO_MODE(mode)                                   \
 //            _mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (mode))
 //#define _MM_GET_DENORMALS_ZERO_MODE()                                       \
 //            (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
 //_mm_setcsr( _mm_getcsr() | 0x8040 );
 //_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
 //#include <fenv.h>
 //fesetenv(FE_DFL_DISABLE_SSE_DENORMS_ENV);
 #include <float.h>

 float foo(float a, float b, int count) {
    float tmp = 0.;
    for (int i = 0; i < count; i ++) {
        tmp += a * b;
    }
    return tmp;
 }

 int main(int argc, char** argv) {

    int count = 100000;

    if (argc > 1) {
        count = (int)atoi(argv[1]);
    }
    printf("Minimum float positive value: %E\n", FLT_MIN );

    float a = 0.40306925773620605;
    union {
        float f;
        uint32_t u;
    } f2u = { .f = a};

    printf("A: %lf, HEX: 0x%" PRIx32 "\n", a, f2u.u);

    if (argc > 2) {
        a = a * 1e-38;
        f2u.f = a;
    }
    printf("A: %.10e, HEX: 0x%" PRIx32 "\n", a, f2u.u);

    float b = 0.9346844673156738 * 1e-5;

    f2u.f = a * b;
    printf("A: %.10e, HEX: 0x%" PRIx32 "\n", a * b, f2u.u);

    float tmp = foo(a, b, count);
    f2u.f = tmp;
    printf("A: %.10e, HEX: 0x%" PRIx32 "\n", tmp, f2u.u);

 }
diff --git a/test_prelu_pytorch.py b/test_prelu_pytorch.py
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 import time

 torch.random.manual_seed(3)
 alpha = torch.rand(1)

 x = torch.randn((32, 256, 64, 64))

 m = nn.Conv2d(256, 256, 3, 1, padding=1)
 print x[0][1]
 NUM = 10
 st = time.time()
 for _ in xrange(NUM):
    F.prelu(x, alpha)
 #    x = m(x)


 print (time.time() - st) / NUM

 alpha = alpha * 1e-40
 st = time.time()
 for _ in xrange(NUM):
    F.prelu(x, alpha)
 #    x = m(x)


 print (time.time() - st) / NUM
	Minimum float positive value: 1.175494E-38
	A: 0.403069, HEX: 0x3ece5f18
	A: 4.0306925774e-01, HEX: 0x3ece5f18
	A: 3.7674255964e-06, HEX: 0x367cd3e1
	A: 3.8132049561e+01, HEX: 0x42188738

	real 0m0.057s
	user 0m0.056s
	sys 0m0.000s
	Minimum float positive value: 1.175494E-38
	A: 0.403069, HEX: 0x3ece5f18
	A: 4.0306921002e-39, HEX: 0x2be3ee
	A: 3.7835058537e-44, HEX: 0x1b
	A: 4.0263963917e-37, HEX: 0x30902dc

	real 0m0.622s
	user 0m0.620s
	sys 0m0.000s
	Minimum float positive value: 1.175494E-38
	A: 0.403069, HEX: 0x3ece5f18
	A: 4.0306925774e-01, HEX: 0x3ece5f18
	A: 3.7674255964e-06, HEX: 0x367cd3e1
	A: 3.8132049561e+01, HEX: 0x42188738

	real 0m0.025s
	user 0m0.024s
	sys 0m0.000s
	Minimum float positive value: 1.175494E-38
	A: 0.403069, HEX: 0x3ece5f18
	A: 4.0306921002e-39, HEX: 0x2be3ee
	A: 3.7835058537e-44, HEX: 0x1b
	A: 4.0263963917e-37, HEX: 0x30902dc
	#!/bin/bash
	rm ./a.out
	gcc test_float_mul.c
	time ./a.out 10000000
	time ./a.out 10000000 small

	gcc test_float_mul.c -S -o test_float_mul_O0.s

	rm ./a.out
	gcc -O1 test_float_mul.c
	time ./a.out 10000000
	time ./a.out 10000000 small

	gcc test_float_mul.c -S -o test_float_mul_O1.s
	#!/bin/bash
	rm ./a_icc.out
	icc -o a_icc.out -O0 test_float_mul.c
	time ./a.out 10000000
	time ./a_icc.out 10000000 small

	icc test_float_mul.c -S -O0 -o icc_test_float_mul_O0.s

	rm ./a_icc.out
	icc -o a_icc.out -O1 test_float_mul.c
	time ./a.out 10000000
	time ./a_icc.out 10000000 small

	icc test_float_mul.c -S -O1 -o icc_test_float_mul_O1.s
	#include <stdio.h>
	#include <stdint.h>
	#include <inttypes.h>
	#include <math.h>
	#include <stdlib.h>
	//#include <xmmintrin.h>
	//#define _MM_DENORMALS_ZERO_MASK 0x0040
	//#define _MM_DENORMALS_ZERO_ON 0x0040
	//#define _MM_DENORMALS_ZERO_OFF 0x0000
	//
	//#define _MM_SET_DENORMALS_ZERO_MODE(mode) \
	// _mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) \| (mode))
	//#define _MM_GET_DENORMALS_ZERO_MODE() \
	// (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
	//_mm_setcsr( _mm_getcsr() \| 0x8040 );
	//_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
	//#include <fenv.h>
	//fesetenv(FE_DFL_DISABLE_SSE_DENORMS_ENV);
	#include <float.h>

	float foo(float a, float b, int count) {
	float tmp = 0.;
	for (int i = 0; i < count; i ++) {
	tmp += a * b;
	}
	return tmp;
	}

	int main(int argc, char** argv) {

	int count = 100000;

	if (argc > 1) {
	count = (int)atoi(argv[1]);
	}
	printf("Minimum float positive value: %E\n", FLT_MIN );

	float a = 0.40306925773620605;
	union {
	float f;
	uint32_t u;
	} f2u = { .f = a};

	printf("A: %lf, HEX: 0x%" PRIx32 "\n", a, f2u.u);

	if (argc > 2) {
	a = a * 1e-38;
	f2u.f = a;
	}
	printf("A: %.10e, HEX: 0x%" PRIx32 "\n", a, f2u.u);

	float b = 0.9346844673156738 * 1e-5;

	f2u.f = a * b;
	printf("A: %.10e, HEX: 0x%" PRIx32 "\n", a * b, f2u.u);

	float tmp = foo(a, b, count);
	f2u.f = tmp;
	printf("A: %.10e, HEX: 0x%" PRIx32 "\n", tmp, f2u.u);

	}
	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	import time

	torch.random.manual_seed(3)
	alpha = torch.rand(1)

	x = torch.randn((32, 256, 64, 64))

	m = nn.Conv2d(256, 256, 3, 1, padding=1)
	print x[0][1]
	NUM = 10
	st = time.time()
	for _ in xrange(NUM):
	F.prelu(x, alpha)
	# x = m(x)


	print (time.time() - st) / NUM

	alpha = alpha * 1e-40
	st = time.time()
	for _ in xrange(NUM):
	F.prelu(x, alpha)
	# x = m(x)


	print (time.time() - st) / NUM