To scribble LDR qN, [xN, offset] into MOVI vN.16b, 0, do:
hack_arm_binary some_aarch64_binary_file neon-load-to-movi-zero
| #include <fcntl.h> | |
| #include <sys/mman.h> | |
| #include <sys/stat.h> | |
| #include <unistd.h> | |
| #include <algorithm> | |
| #include <cstdint> | |
| #include <cstdio> | |
| #include <cstdlib> | |
| #include <cstring> | |
| #include <functional> | |
| int main(int argc, char* argv[]) { | |
| if (argc != 3 && argc != 4) { | |
| fprintf(stderr, R"TXT( | |
| usage: %s filename operation [operation_argument] | |
| applies the given operation in-place on the given file. | |
| Available operations: | |
| neon-load-to-movi-zero | |
| Turns certain flavors of NEON load instructions (not exhaustive, could be | |
| extended) into MOVI filling the destination register with immediate 0. | |
| neon-load-shift-offset N | |
| Right-shifts the immediate offsets in certain flavors of NEON load | |
| instructions (not exhaustive, could be extended) by N bits. | |
| neon-load-offset-offset N | |
| Offsets the immediate offsets in certain flavors of NEON load | |
| instructions (not exhaustive, could be extended) by N. Note: this is | |
| in typically not bytes. The effective byte amount is specific to each | |
| encoding. | |
| )TXT", | |
| argv[0]); | |
| exit(EXIT_FAILURE); | |
| } | |
| const char* filename = argv[1]; | |
| const char* op_name = argv[2]; | |
| const char* op_arg = argc > 3 ? argv[3] : nullptr; | |
| std::function<std::uint32_t(std::uint32_t)> op_func; | |
| if (!strcmp(op_name, "neon-load-to-movi-zero")) { | |
| op_func = [](std::uint32_t w) { | |
| // If this is a LDR qN with immediate unsigned offset | |
| if ((w & 0xffc00000u) == 0x3dc00000u) { | |
| // Keep the low 5 bits indicating the destination register, | |
| // change the remaining bits to make this a MOVI writing 0. | |
| return (w & 0x1fu) | 0x4f00e400; | |
| } else { | |
| return w; | |
| } | |
| }; | |
| } else if (!strcmp(op_name, "neon-load-shift-offset")) { | |
| if (!op_arg) { | |
| fprintf( | |
| stderr, | |
| "neon-load-shift-offset requires an argument (right shift amount)\n"); | |
| exit(EXIT_FAILURE); | |
| } | |
| int right_shift_amount = strtol(op_arg, nullptr, 10); | |
| op_func = [=](std::uint32_t w) { | |
| // If this is a LDR qN with immediate unsigned offset | |
| if ((w & 0xffc00000u) == 0x3dc00000u) { | |
| // Change the 12-bit immediate offset stored in bits 10--21. | |
| std::uint32_t offset_mask = 0x3ffc00; | |
| std::uint32_t insn_with_cleared_offset = w & ~offset_mask; | |
| std::uint32_t offset = (w & offset_mask) >> 10; | |
| std::uint32_t new_offset = offset >> right_shift_amount; | |
| return insn_with_cleared_offset | (new_offset << 10); | |
| } else { | |
| return w; | |
| } | |
| }; | |
| } else if (!strcmp(op_name, "neon-load-offset-offset")) { | |
| if (!op_arg) { | |
| fprintf(stderr, | |
| "neon-load-offset-offset requires an argument (offset amount)\n"); | |
| exit(EXIT_FAILURE); | |
| } | |
| int offset_amount = strtol(op_arg, nullptr, 10); | |
| op_func = [=](std::uint32_t w) { | |
| // If this is a LDR qN with immediate unsigned offset | |
| if ((w & 0xffc00000u) == 0x3dc00000u) { | |
| // Change the 12-bit immediate offset stored in bits 10--21. | |
| std::uint32_t offset_mask = 0x3ffc00; | |
| std::uint32_t insn_with_cleared_offset = w & ~offset_mask; | |
| std::int32_t offset = (w & offset_mask) >> 10; // note: signed! | |
| std::uint32_t new_offset = std::max(0, offset + offset_amount); | |
| return insn_with_cleared_offset | (new_offset << 10); | |
| } else { | |
| return w; | |
| } | |
| }; | |
| } else { | |
| fprintf(stderr, "unrecognized operation argument: %s\n", op_name); | |
| exit(EXIT_FAILURE); | |
| } | |
| int fd = open(filename, O_RDWR); | |
| if (fd == -1) { | |
| fprintf(stderr, "failed to open %s for read\n", filename); | |
| exit(EXIT_FAILURE); | |
| } | |
| struct stat s; | |
| if (fstat(fd, &s) == -1) { | |
| fprintf(stderr, "failed to stat %s\n", filename); | |
| exit(EXIT_FAILURE); | |
| } | |
| std::size_t size = s.st_size; | |
| void* mapping = | |
| mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); | |
| if (mapping == MAP_FAILED) { | |
| fprintf(stderr, "failed to map %s\n", filename); | |
| exit(EXIT_FAILURE); | |
| } | |
| std::uint32_t* data_uint32 = static_cast<std::uint32_t*>(mapping); | |
| std::size_t occurences = 0; | |
| for (std::size_t i = 0; i < size / sizeof(*data_uint32); i++) { | |
| std::uint32_t& w = data_uint32[i]; | |
| std::uint32_t new_w = op_func(w); | |
| if (new_w == w) { | |
| continue; | |
| } | |
| if (occurences < 100) fprintf(stderr, "replaced %x -> %x\n", w, new_w); | |
| if (occurences == 100) fprintf(stderr, "(more occurrences not reported)\n"); | |
| w = new_w; | |
| occurences++; | |
| } | |
| fprintf(stderr, "replaced %lu occurences\n", occurences); | |
| msync(mapping, size, MS_SYNC); | |
| close(fd); | |
| } |