Last active
July 30, 2024 00:01
-
-
Save Silva97/3d9731a7170012ca646991f6d6608e46 to your computer and use it in GitHub Desktop.
Just a exercise to count lines on text files using SSE
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// gcc countlines.c -O2 -o countlines | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <stdint.h> | |
#include <unistd.h> | |
#include <fcntl.h> | |
#include <emmintrin.h> | |
#define BLOCK_SIZE 4096 * 2 | |
__always_inline | |
int array_sum16(int8_t *array) | |
{ | |
return array[0] | |
+ array[1] | |
+ array[2] | |
+ array[3] | |
+ array[4] | |
+ array[5] | |
+ array[6] | |
+ array[7] | |
+ array[8] | |
+ array[9] | |
+ array[10] | |
+ array[11] | |
+ array[12] | |
+ array[13] | |
+ array[14] | |
+ array[15]; | |
} | |
unsigned long long int count_block_lines(__m128i *block, ssize_t size) | |
{ | |
static int8_t array_result[sizeof (__m128i)]; | |
static int8_t array_lines[] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'}; | |
__m128i result; | |
__m128i vector_lines = _mm_load_si128((__m128i *) array_lines); | |
unsigned int block_index; | |
unsigned long long int nlines = 0; | |
int block_count = size / sizeof (__m128i); | |
for (block_index = 0; block_index < block_count; block_index++) | |
{ | |
result = _mm_cmpeq_epi8(vector_lines, block[block_index]); | |
_mm_store_si128((__m128i *) array_result, result); | |
nlines += -array_sum16(array_result); | |
} | |
int remaining_content_size = size % sizeof (__m128i); | |
if (remaining_content_size == 0) | |
{ | |
return nlines; | |
} | |
_mm_store_si128((__m128i *) array_result, block[block_index]); | |
for (int i = 0; i < remaining_content_size; i++) | |
{ | |
nlines += (array_result[i] == '\n'); | |
} | |
return nlines; | |
} | |
unsigned long long int count_file_lines(int fd) | |
{ | |
static __m128i block[BLOCK_SIZE]; | |
ssize_t buffer_size; | |
unsigned long long int nlines = 0; | |
while ((buffer_size = read(fd, block, BLOCK_SIZE)) > 0) | |
{ | |
nlines += count_block_lines(block, buffer_size); | |
} | |
return nlines; | |
} | |
int main(int argc, char *argv[]) | |
{ | |
if (argc < 2) | |
{ | |
fputs("Usage: ./countlines <filepath>\n", stderr); | |
return EXIT_SUCCESS; | |
} | |
int fd = open(argv[1], O_RDONLY); | |
if (fd < 0) { | |
perror("Error on open file"); | |
return EXIT_FAILURE; | |
} | |
printf("%llu\n", count_file_lines(fd)); | |
close(fd); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment