Skip to content

Instantly share code, notes, and snippets.

@BalintCsala
Created June 9, 2020 20:40
Show Gist options
  • Save BalintCsala/afd663f3e91957b6eca04e1768fdcaa4 to your computer and use it in GitHub Desktop.
Save BalintCsala/afd663f3e91957b6eca04e1768fdcaa4 to your computer and use it in GitHub Desktop.
#include <cstdint>
#include <memory.h>
#include <cstdio>
#include <ctime>
#include <thread>
#include <vector>
#include <mutex>
#include <chrono>
#include <string>
#define RANDOM_MULTIPLIER 0x5DEECE66DULL
#define RANDOM_ADDEND 0xBULL
#define RANDOM_MASK ((1ULL << 48ULL) - 1ULL)
#ifndef FLOOR_LEVEL
#define FLOOR_LEVEL 63LL
#endif
#ifndef WANTED_CACTUS_HEIGHT
#define WANTED_CACTUS_HEIGHT 10LL
#endif
#ifndef WORK_UNIT_SIZE
#define WORK_UNIT_SIZE (1ULL << 23ULL)
#endif
#ifndef BLOCK_SIZE
#define BLOCK_SIZE 256ULL
#endif
#ifndef GPU_COUNT
#define GPU_COUNT 1ULL
#endif
#ifndef OFFSET
#define OFFSET 0
#endif
#ifndef END
#define END (1ULL << 48ULL)
#endif
namespace java_random {
// Random::next(bits)
__device__ inline uint32_t next(uint64_t *random, int32_t bits) {
*random = (*random * RANDOM_MULTIPLIER + RANDOM_ADDEND) & RANDOM_MASK;
return (uint32_t) (*random >> (48ULL - bits));
}
__device__ inline int32_t next_int_unknown(uint64_t *seed, int16_t bound) {
if ((bound & -bound) == bound) {
*seed = (*seed * RANDOM_MULTIPLIER + RANDOM_ADDEND) & RANDOM_MASK;
return (int32_t) ((bound * (*seed >> 17ULL)) >> 31ULL);
}
int32_t bits, value;
do {
*seed = (*seed * RANDOM_MULTIPLIER + RANDOM_ADDEND) & RANDOM_MASK;
bits = *seed >> 17ULL;
value = bits % bound;
} while (bits - value + (bound - 1) < 0);
return value;
}
// Random::nextInt(bound)
__device__ inline uint32_t next_int(uint64_t *random) {
return java_random::next(random, 31) % 3;
}
}
__global__ __launch_bounds__(256, 2) void crack(uint64_t seed_offset, int32_t *num_seeds, uint64_t *seeds) {
uint64_t originalSeed = blockIdx.x * blockDim.x + threadIdx.x + seed_offset;
uint64_t seed = originalSeed;
int8_t heightMap[1024];
#pragma unroll
for (int i = 0; i < 1024; i++) {
heightMap[i] = FLOOR_LEVEL;
}
int8_t *currentHighestPos = heightMap, *posMap;
int16_t initialPosX, initialPosY, initialPosZ;
int16_t posX, posY, posZ;
int16_t i, a, j;
for (i = -90; i < 0; i += 9) {
// Keep, most threads finish early this way
if (*currentHighestPos - WANTED_CACTUS_HEIGHT - FLOOR_LEVEL < i)
return;
initialPosX = java_random::next(&seed, 4) + 8;
initialPosZ = java_random::next(&seed, 4) + 8;
initialPosY = java_random::next_int_unknown(&seed, (heightMap[initialPosX + initialPosZ * 32] + 1) * 2);
for (a = 0; a < 10; a++) {
posX = initialPosX + java_random::next(&seed, 3) - java_random::next(&seed, 3);
posY = initialPosY + java_random::next(&seed, 2) - java_random::next(&seed, 2);
posZ = initialPosZ + java_random::next(&seed, 3) - java_random::next(&seed, 3);
posMap = heightMap + posX + posZ * 32;
// Keep
if (posY <= *posMap)
continue;
for (j = 0; j < 1 + java_random::next_int_unknown(&seed, java_random::next_int(&seed) + 1); j++) {
if ((posY + j - 1) > *posMap || posY < 0) continue;
if ((posY + j) <= heightMap[(posX + 1) + posZ * 32]) continue;
if ((posY + j) <= heightMap[posX + (posZ - 1) * 32]) continue;
if ((posY + j) <= heightMap[(posX - 1) + posZ * 32]) continue;
if ((posY + j) <= heightMap[posX + (posZ + 1) * 32]) continue;
heightMap[posX + posZ * 32]++;
if (*currentHighestPos < *posMap) {
currentHighestPos = posMap;
}
}
}
if (*currentHighestPos - FLOOR_LEVEL >= WANTED_CACTUS_HEIGHT) {
seeds[atomicAdd(num_seeds, 1)] = originalSeed;
return;
}
}
}
struct GPU_Node {
int* num_seeds;
uint64_t* seeds;
};
void setup_gpu_node(GPU_Node* node, int32_t gpu) {
cudaSetDevice(gpu);
cudaMallocManaged(&node->num_seeds, sizeof(*node->num_seeds));
cudaMallocManaged(&node->seeds, 1ULL << 10ULL); // approx 1kb
}
GPU_Node nodes[GPU_COUNT];
uint64_t offset = OFFSET;
uint64_t count = 0;
std::mutex info_lock;
void gpu_manager(int32_t gpu_index) {
std::string fileName = "kaktoos_seeds" + std::to_string(gpu_index) + ".txt";
FILE *out_file = fopen(fileName.c_str(), "a");
cudaSetDevice(gpu_index);
while (offset < END) {
*nodes[gpu_index].num_seeds = 0;
crack<<<WORK_UNIT_SIZE / BLOCK_SIZE, BLOCK_SIZE, 0>>> (offset, nodes[gpu_index].num_seeds, nodes[gpu_index].seeds);
info_lock.lock();
offset += WORK_UNIT_SIZE;
info_lock.unlock();
cudaDeviceSynchronize();
for (int32_t i = 0, e = *nodes[gpu_index].num_seeds; i < e; i++) {
fprintf(out_file, "%lld\n", (long long int)nodes[gpu_index].seeds[i]);
printf("Found seed: %lld\n", (long long int)nodes[gpu_index].seeds[i]);
}
fflush(out_file);
info_lock.lock();
count += *nodes[gpu_index].num_seeds;
info_lock.unlock();
}
fclose(out_file);
}
int main() {
printf("Searching %ld total seeds...\n", END - OFFSET);
std::thread threads[GPU_COUNT];
time_t startTime = time(nullptr), currentTime;
for(int32_t i = 0; i < GPU_COUNT; i++) {
setup_gpu_node(&nodes[i], i);
threads[i] = std::thread(gpu_manager, i);
}
using namespace std::chrono_literals;
while (offset < END) {
time(&currentTime);
int timeElapsed = (int)(currentTime - startTime);
double speed = (double)(offset - OFFSET) / (double)timeElapsed / 1000000.0;
printf("Searched %lld seeds, offset: %lld found %lld matches. Time elapsed: %ds. Speed: %.2fm seeds/s. %f%%\n",
(long long int)(offset - OFFSET),
(long long int)offset,
(long long int)count,
timeElapsed,
speed,
(double)(offset - OFFSET) / (END - OFFSET) * 100);
std::this_thread::sleep_for(0.5s);
}
for (auto &thread : threads) {
thread.join();
}
printf("Done!\n");
printf("But, verily, it be the nature of dreams to end.\n");
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment