Skip to content

Instantly share code, notes, and snippets.

@Silva97
Last active July 30, 2024 00:01
Show Gist options
  • Save Silva97/3d9731a7170012ca646991f6d6608e46 to your computer and use it in GitHub Desktop.
Save Silva97/3d9731a7170012ca646991f6d6608e46 to your computer and use it in GitHub Desktop.
Just a exercise to count lines on text files using SSE
// gcc countlines.c -O2 -o countlines
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <unistd.h>
#include <fcntl.h>
#include <emmintrin.h>
#define BLOCK_SIZE 4096 * 2
__always_inline
int array_sum16(int8_t *array)
{
return array[0]
+ array[1]
+ array[2]
+ array[3]
+ array[4]
+ array[5]
+ array[6]
+ array[7]
+ array[8]
+ array[9]
+ array[10]
+ array[11]
+ array[12]
+ array[13]
+ array[14]
+ array[15];
}
unsigned long long int count_block_lines(__m128i *block, ssize_t size)
{
static int8_t array_result[sizeof (__m128i)];
static int8_t array_lines[] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'};
__m128i result;
__m128i vector_lines = _mm_load_si128((__m128i *) array_lines);
unsigned int block_index;
unsigned long long int nlines = 0;
int block_count = size / sizeof (__m128i);
for (block_index = 0; block_index < block_count; block_index++)
{
result = _mm_cmpeq_epi8(vector_lines, block[block_index]);
_mm_store_si128((__m128i *) array_result, result);
nlines += -array_sum16(array_result);
}
int remaining_content_size = size % sizeof (__m128i);
if (remaining_content_size == 0)
{
return nlines;
}
_mm_store_si128((__m128i *) array_result, block[block_index]);
for (int i = 0; i < remaining_content_size; i++)
{
nlines += (array_result[i] == '\n');
}
return nlines;
}
unsigned long long int count_file_lines(int fd)
{
static __m128i block[BLOCK_SIZE];
ssize_t buffer_size;
unsigned long long int nlines = 0;
while ((buffer_size = read(fd, block, BLOCK_SIZE)) > 0)
{
nlines += count_block_lines(block, buffer_size);
}
return nlines;
}
int main(int argc, char *argv[])
{
if (argc < 2)
{
fputs("Usage: ./countlines <filepath>\n", stderr);
return EXIT_SUCCESS;
}
int fd = open(argv[1], O_RDONLY);
if (fd < 0) {
perror("Error on open file");
return EXIT_FAILURE;
}
printf("%llu\n", count_file_lines(fd));
close(fd);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment