Created
September 27, 2023 23:44
-
-
Save lbguilherme/b19bde8f6db233fad9e8195086281873 to your computer and use it in GitHub Desktop.
Sum all numbers of a file, separated by "\n"
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <fcntl.h> | |
#include <stdint.h> | |
#include <stdio.h> | |
#include <string.h> | |
#include <sys/mman.h> | |
#include <sys/stat.h> | |
off_t total_size = 0; | |
char* mmap_file(const char* name) { | |
int fd = open(name, O_RDONLY); | |
struct stat st; | |
fstat(fd, &st); | |
total_size = st.st_size; | |
return (char*)mmap(0, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); | |
} | |
// https://github.com/KholdStare/qnd-integer-parsing-experiments/blob/master/parsing.hh#L93 | |
inline uint64_t parse_8_chars(const char* string) noexcept { | |
uint64_t chunk = 0; | |
memcpy(&chunk, string, sizeof(chunk)); | |
// 1-byte mask trick (works on 4 pairs of single digits) | |
uint64_t lower_digits = (chunk & 0x0f000f000f000f00) >> 8; | |
uint64_t upper_digits = (chunk & 0x000f000f000f000f) * 10; | |
chunk = lower_digits + upper_digits; | |
// 2-byte mask trick (works on 2 pairs of two digits) | |
lower_digits = (chunk & 0x00ff000000ff0000) >> 16; | |
upper_digits = (chunk & 0x000000ff000000ff) * 100; | |
chunk = lower_digits + upper_digits; | |
// 4-byte mask trick (works on pair of four digits) | |
lower_digits = (chunk & 0x0000ffff00000000) >> 32; | |
upper_digits = (chunk & 0x000000000000ffff) * 10000; | |
chunk = lower_digits + upper_digits; | |
return chunk; | |
} | |
int main() { | |
char* data = mmap_file("file"); | |
uint64_t sum = 0; | |
uint64_t i = 0; | |
while (data[i]) { | |
uint64_t value = 0; | |
int g = 0; | |
// https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord | |
uint64_t first8 = *(uint64_t*)(data + i) ^ 0x0A0A0A0A0A0A0A0AULL; | |
if (!((first8 - 0x0101010101010101ULL) & (~first8) & 0x8080808080808080ULL)) { | |
value = parse_8_chars(data + i); | |
i += 8; | |
g = 8; | |
} | |
while (data[i] != '\n') { | |
value = value * 10 + data[i] - '0'; | |
i++; | |
} | |
i++; | |
sum += value; | |
} | |
printf("%lld\n", sum); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment