Skip to content

Instantly share code, notes, and snippets.

@vurtun
Last active May 8, 2025 11:54
Show Gist options
  • Save vurtun/90af0b93e1fe51b2c187f43a2a13814c to your computer and use it in GitHub Desktop.
Save vurtun/90af0b93e1fe51b2c187f43a2a13814c to your computer and use it in GitHub Desktop.
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <arm_neon.h>
#define cast(t,p) ((t)p)
#define casts(p) cast(short,p)
#define castus(p) cast(unsigned short,p)
#define casti(p) cast(int,p)
#define castu(p) cast(unsigned,p)
#define GRID_SIZE 64
#define MAP_BITS (GRID_SIZE * GRID_SIZE)
#define MAP_WORDS (MAP_BITS / 64)
#define HEAP_CAPACITY MAP_BITS
struct as_ctx {
unsigned char was_found;
unsigned short heap_cnt;
unsigned short heap[HEAP_CAPACITY]; // [8 kbyte]
unsigned short g_scores[MAP_BITS]; // [8 kbyte]
unsigned short f_scores[MAP_BITS]; // [8 kbyte]
unsigned char parents[MAP_BITS]; // [4 kbyte]
unsigned long long closed[MAP_WORDS]; // [512 bytes]
} __attribute__((aligned(64)));
static void
as_heap_push(struct as_ctx *ctx, unsigned short node) {
ctx->heap[ctx->heap_cnt] = node;
int idx = ctx->heap_cnt;
// heap sift up
while (idx > 0) {
int parent = (idx - 1) >> 1;
if (ctx->f_scores[ctx->heap[idx]] >= ctx->f_scores[ctx->heap[parent]]) {
break;
}
int tmp = ctx->heap[idx];
ctx->heap[idx] = ctx->heap[parent];
ctx->heap[parent] = tmp;
idx = parent;
}
ctx->heap_cnt++;
}
static int
as_heap_pop(struct as_ctx *ctx) {
int res = ctx->heap[0];
ctx->heap_cnt--;
if (ctx->heap_cnt > 0) {
ctx->heap[0] = ctx->heap[ctx->heap_cnt];
int idx = 0;
// heap sift down
while (1) {
int lo = idx;
int lhs = 2 * idx + 1;
int rhs = 2 * idx + 2;
lo = (lhs < ctx->heap_cnt && ctx->f_scores[ctx->heap[lhs]] < ctx->f_scores[ctx->heap[lo]]) ? lhs : lo;
lo = (rhs < ctx->heap_cnt && ctx->f_scores[ctx->heap[rhs]] < ctx->f_scores[ctx->heap[lo]]) ? rhs : lo;
if (lo == idx) {
break;
}
int tmp = ctx->heap[idx];
ctx->heap[idx] = ctx->heap[lo];
ctx->heap[lo] = tmp;
idx = lo;
}
}
return res;
}
static inline int
as_heuristic(int x1, int y1, int x2, int y2) {
int dx = abs(x1 - x2);
int dy = abs(y1 - y2);
return (100 * (dx + dy) + (dx < dy ? dx : dy) * 141);
}
extern void
as_solve(struct as_ctx *ctx, const unsigned long long *map,
int start_x, int start_y, int goal_x, int goal_y) {
// Directions: {dx, dy, cost * 100}
static const unsigned char dir[8][3] = {
{ 0, -1, 100}, { 0, 1, 100}, {-1, 0, 100}, { 1, 0, 100},
{-1, -1, 141}, {-1, 1, 141}, { 1, -1, 141}, { 1, 1, 141}
};
int src = start_y * GRID_SIZE + start_x;
int dst = goal_y * GRID_SIZE + goal_x;
// Reset context with NEON
uint64x2_t zero = vdupq_n_u64(0);
for (int i = 0; i < MAP_WORDS; i += 2) {
vst1q_u64(&ctx->closed[i], zero);
}
uint16x8_t max = vdupq_n_u16(0xFFFF);
for (int i = 0; i < MAP_BITS; i += 8) {
vst1q_u16(&ctx->g_scores[i], max);
vst1q_u16(&ctx->f_scores[i], max);
}
ctx->heap_cnt = 0;
ctx->was_found = 0;
// Initialize start node
ctx->g_scores[src] = 0;
ctx->f_scores[src] = castus(as_heuristic(start_x, start_y, goal_x, goal_y));
as_heap_push(ctx, src);
while (ctx->heap_cnt > 0) {
int cur = as_heap_pop(ctx);
int cx = cur & 63;
int cy = cur >> 6;
ctx->closed[cy] |= (1ULL << (cx));
if (cur == dst) {
ctx->was_found = 1;
ctx->parents[MAP_BITS - 1] = cur;
return;
}
for (int i = 0; i < 8; i++) {
unsigned nx = castu(cx + dir[i][0]);
unsigned ny = castu(cy + dir[i][1]);
if (nx >= GRID_SIZE || ny >= GRID_SIZE) {
continue;
}
int n = ny * GRID_SIZE + nx;
int word = n >> 6;
int bit = n & 63;
if ((map[word] & (1ULL << bit)) | (ctx->closed[word] & (1ULL << bit))) {
continue;
}
int new_g = ctx->g_scores[cur] + dir[i][2];
if (new_g < ctx->g_scores[n]) {
ctx->parents[n] = i; // Store direction
ctx->g_scores[n] = new_g;
ctx->f_scores[n] = new_g + as_heuristic(nx, ny, goal_x, goal_y);
as_heap_push(ctx, n);
}
}
}
}
extern int
as_path(struct as_ctx *ctx, unsigned short *path) {
assert(ctx->was_found);
// Directions: {dx, dy}
static const char dir[8][2] = {
{ 0, -1}, { 0, 1}, {-1, 0}, { 1, 0},
{-1, -1}, {-1, 1}, { 1, -1}, { 1, 1}
};
// Reconstruct path from goal to start
int path_len = 0;
int node = ctx->parents[MAP_BITS - 1];
int x = (MAP_BITS - 1) & 63, y = (MAP_BITS - 1) >> 6;
while (ctx->g_scores[node] != 0) {
int didx = ctx->parents[node];
path[path_len++] = node;
x -= dir[didx][0];
y -= dir[didx][1];
node = y * GRID_SIZE + x;
}
path[path_len++] = node;
// Reverse path
for (int i = 0; i < path_len / 2; i++) {
int tmp = path[i];
path[i] = path[path_len - 1 - i];
path[path_len - 1 - i] = castus(tmp);
}
return path_len;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment