Hermann-SW · July 7, 2025 19:23 · Hermann-SW · Jul 9, 2025
diff --git a/hip-magic.cpp b/hip-magic.cpp
 /*
   Compare single thread pure integer performance of CPUs and [AMD] GPUs

   hipcc hip-magic.hip
   cpplint --filter=-legal/copyright hip-magic.hip.cpp
   cppcheck --enable=all --suppress=missingIncludeSystem hip-magic.hip.cpp --check-config
 */
 #include <sys/time.h>
 #include <stdint.h>
 #include <hip/hip_runtime.h>

 #define HIP_CHECK(condition) assert(condition == hipSuccess);


 #define Prime(i) ((B[(i)>>5] & (0x80000000UL >> ((i)%32))) != 0)

 #define forall_odd_primes_less_than(p, m, block) \
  for ((p) = 3; (p) < (m); (p) += 2)              \
    if (Prime((p)))                              \
      block


 __global__ void gpuHelloWorld(int N, int L) {
    int gid = blockIdx.x * blockDim.x + threadIdx.x;

    if (gid < N) {
        uint32_t B[] = {0x35145105, 0x4510414, 0x11411040, 0x45144001};
        uint8_t p, a, b, c, d;

 for(int i=1; i <= L; ++i) {
  forall_odd_primes_less_than(p, 64,
    forall_odd_primes_less_than(a, p,
      if Prime(2*p-a) {
        forall_odd_primes_less_than(b, p,
          if ( (b != a) && Prime(2*p-b) ) {
            c = 3*p - (a+b);

            if ( (c < 2*p) && (2*p-c != a) && (2*p-c != b) && Prime(c) && Prime(2*p-c) ) {
              if (2*a+b > 2*p) {
                d = 2*a + b - 2*p;   // 3*p - (3*p-(a+b)) - (2*p-a)

                if ( (d != a) && (d != b) && (d != 2*p-c) && Prime(d) && Prime(2*p-d) ) {
                  if (i == L)
                    printf("%3u|%3u|%3u|\n%3u|%3u|%3u|\n%3u|%3u|%3u|\n",
                      a, b, c, 2*p-d, p, d, 2*p-c, 2*p-b, 2*p-a);

                  goto fin;
                }
              }
            }
          }
        )
      }
    )
  )
  fin: if (i == L)  i=i;
 }
    }
 }

 int main(int argc, char *argv[]) {
    int gridSize = 1;  // #blocks
    int blockSize = 1;  // Threads per block
    int N = gridSize * blockSize;  // total #threads

    int L = argc == 1 ? 10000 : atoi(argv[1]);

    hipEvent_t start, stop;
    HIP_CHECK(hipEventCreate(&start));
    HIP_CHECK(hipEventCreate(&stop));

    float elapsed_ms{};

    HIP_CHECK(hipEventRecord(start, hipStreamDefault));

    gpuHelloWorld<<<gridSize, blockSize>>>(N, L);  // Launch kernel

    HIP_CHECK(hipEventRecord(stop, hipStreamDefault));
    HIP_CHECK(hipEventSynchronize(stop));

    HIP_CHECK(hipEventElapsedTime(&elapsed_ms, start, stop));
    printf("%.1f ms\n", elapsed_ms);

    HIP_CHECK(hipEventDestroy(stop));
    HIP_CHECK(hipEventDestroy(start));

    struct timeval tv0, tv1;
    gettimeofday(&tv1, NULL);      // wait for usec change
    do  gettimeofday(&tv0, NULL);  while (tv0.tv_usec == tv1.tv_usec);

    uint32_t B[] = {0x35145105, 0x4510414, 0x11411040, 0x45144001};
    uint8_t p, a, b, c, d;

 for(int i=1; i <= L; ++i) {
  forall_odd_primes_less_than(p, 64,
    forall_odd_primes_less_than(a, p,
      if Prime(2*p-a) {
        forall_odd_primes_less_than(b, p,
          if ( (b != a) && Prime(2*p-b) ) {
            c = 3*p - (a+b);

            if ( (c < 2*p) && (2*p-c != a) && (2*p-c != b) && Prime(c) && Prime(2*p-c) ) {
              if (2*a+b > 2*p) {
                d = 2*a + b - 2*p;   // 3*p - (3*p-(a+b)) - (2*p-a)

                if ( (d != a) && (d != b) && (d != 2*p-c) && Prime(d) && Prime(2*p-d) ) {
                  if (i == L)
                    printf("%3u|%3u|%3u|\n%3u|%3u|%3u|\n%3u|%3u|%3u|\n",
                      a, b, c, 2*p-d, p, d, 2*p-c, 2*p-b, 2*p-a);

                  goto fin;
                }
              }
            }
          }
        )
      }
    )
  )
  fin: if (i == L)  i=i;
 }

    gettimeofday(&tv1, NULL);

    printf("%ldus\n",
           1000000*(tv1.tv_sec-tv0.tv_sec)+tv1.tv_usec-tv0.tv_usec);

    return 0;
 }
	/*
	Compare single thread pure integer performance of CPUs and [AMD] GPUs

	hipcc hip-magic.hip
	cpplint --filter=-legal/copyright hip-magic.hip.cpp
	cppcheck --enable=all --suppress=missingIncludeSystem hip-magic.hip.cpp --check-config
	*/
	#include <sys/time.h>
	#include <stdint.h>
	#include <hip/hip_runtime.h>

	#define HIP_CHECK(condition) assert(condition == hipSuccess);


	#define Prime(i) ((B[(i)>>5] & (0x80000000UL >> ((i)%32))) != 0)

	#define forall_odd_primes_less_than(p, m, block) \
	for ((p) = 3; (p) < (m); (p) += 2) \
	if (Prime((p))) \
	block


	__global__ void gpuHelloWorld(int N, int L) {
	int gid = blockIdx.x * blockDim.x + threadIdx.x;

	if (gid < N) {
	uint32_t B[] = {0x35145105, 0x4510414, 0x11411040, 0x45144001};
	uint8_t p, a, b, c, d;

	for(int i=1; i <= L; ++i) {
	forall_odd_primes_less_than(p, 64,
	forall_odd_primes_less_than(a, p,
	if Prime(2*p-a) {
	forall_odd_primes_less_than(b, p,
	if ( (b != a) && Prime(2*p-b) ) {
	c = 3*p - (a+b);

	if ( (c < 2p) && (2p-c != a) && (2p-c != b) && Prime(c) && Prime(2p-c) ) {
	if (2a+b > 2p) {
	d = 2a + b - 2p; // 3p - (3p-(a+b)) - (2*p-a)

	if ( (d != a) && (d != b) && (d != 2p-c) && Prime(d) && Prime(2p-d) ) {
	if (i == L)
	printf("%3u\|%3u\|%3u\|\n%3u\|%3u\|%3u\|\n%3u\|%3u\|%3u\|\n",
	a, b, c, 2p-d, p, d, 2p-c, 2p-b, 2p-a);

	goto fin;
	}
	}
	}
	}
	)
	}
	)
	)
	fin: if (i == L) i=i;
	}
	}
	}

	int main(int argc, char *argv[]) {
	int gridSize = 1; // #blocks
	int blockSize = 1; // Threads per block
	int N = gridSize * blockSize; // total #threads

	int L = argc == 1 ? 10000 : atoi(argv[1]);

	hipEvent_t start, stop;
	HIP_CHECK(hipEventCreate(&start));
	HIP_CHECK(hipEventCreate(&stop));

	float elapsed_ms{};

	HIP_CHECK(hipEventRecord(start, hipStreamDefault));

	gpuHelloWorld<<<gridSize, blockSize>>>(N, L); // Launch kernel

	HIP_CHECK(hipEventRecord(stop, hipStreamDefault));
	HIP_CHECK(hipEventSynchronize(stop));

	HIP_CHECK(hipEventElapsedTime(&elapsed_ms, start, stop));
	printf("%.1f ms\n", elapsed_ms);

	HIP_CHECK(hipEventDestroy(stop));
	HIP_CHECK(hipEventDestroy(start));

	struct timeval tv0, tv1;
	gettimeofday(&tv1, NULL); // wait for usec change
	do gettimeofday(&tv0, NULL); while (tv0.tv_usec == tv1.tv_usec);

	uint32_t B[] = {0x35145105, 0x4510414, 0x11411040, 0x45144001};
	uint8_t p, a, b, c, d;

	for(int i=1; i <= L; ++i) {
	forall_odd_primes_less_than(p, 64,
	forall_odd_primes_less_than(a, p,
	if Prime(2*p-a) {
	forall_odd_primes_less_than(b, p,
	if ( (b != a) && Prime(2*p-b) ) {
	c = 3*p - (a+b);

	if ( (c < 2p) && (2p-c != a) && (2p-c != b) && Prime(c) && Prime(2p-c) ) {
	if (2a+b > 2p) {
	d = 2a + b - 2p; // 3p - (3p-(a+b)) - (2*p-a)

	if ( (d != a) && (d != b) && (d != 2p-c) && Prime(d) && Prime(2p-d) ) {
	if (i == L)
	printf("%3u\|%3u\|%3u\|\n%3u\|%3u\|%3u\|\n%3u\|%3u\|%3u\|\n",
	a, b, c, 2p-d, p, d, 2p-c, 2p-b, 2p-a);

	goto fin;
	}
	}
	}
	}
	)
	}
	)
	)
	fin: if (i == L) i=i;
	}

	gettimeofday(&tv1, NULL);

	printf("%ldus\n",
	1000000*(tv1.tv_sec-tv0.tv_sec)+tv1.tv_usec-tv0.tv_usec);

	return 0;
	}