Skip to content

Instantly share code, notes, and snippets.

@scivision
Created June 28, 2017 17:51
Show Gist options
  • Save scivision/a28e8fd9df0ff3fe497211814fb4d980 to your computer and use it in GitHub Desktop.
Save scivision/a28e8fd9df0ff3fe497211814fb4d980 to your computer and use it in GitHub Desktop.
Josh Katz's version of wc http://blog.gravypod.com/
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
// This is a "chunk" of characters that we can pull out and handle
// at one time. Since arrays in C are just collections of bytes we
// can iterate through a char array and pretend it was an int array.
// Using this union you can still get at the individual characters.
typedef union {
int value;
char letters[4];
} chunk_t;
// Total lines, words, and chars in all of the files passed.
static int total_line_count = 0, total_word_count = 0, total_char_count = 0;
void help(const char *cmd)
{
printf("Usage: %s <--help|file...>\n", cmd);
exit(1);
}
static inline void count_word(const char next, int * const word_count)
{
static bool is_in_word = false;
const bool is_next_space = isspace(next);
if (is_in_word && is_next_space)
{
*word_count += 1;
is_in_word = false;
}
else if (!is_in_word && !is_next_space)
{
is_in_word = true;
}
}
static inline void count_newl(const char next, int * const line_count)
{
*line_count += next == '\n';
}
static inline void count_chunk(const chunk_t * const chunk, int * const line_count, int * const word_count)
{
// Count all new lines.
count_newl(chunk->letters[0], line_count);
count_newl(chunk->letters[1], line_count);
count_newl(chunk->letters[2], line_count);
count_newl(chunk->letters[3], line_count);
// For each chunk, count the letters.
count_word(chunk->letters[0], word_count);
count_word(chunk->letters[1], word_count);
count_word(chunk->letters[2], word_count);
count_word(chunk->letters[3], word_count);
}
static inline void print_stats(const char* file_name)
{
#define BUFFER_SIZE (1024 * 16)
// Make a character buffer for reading the chunks of the file into.
// Most of the time pages are 4k or 8k aligned so the buffer should
// be one of those sizes. This will make sure you don't ask for more
// data then the kernel is likely to have buffered for you.
static char cbuffer[BUFFER_SIZE];
// Make a new way of looking at the character buffer. This lets you
// loop throuh and look at 4 characters at a time. This cuts down on
// the number of loops you are running and will let you eventually
// pipeline instructions for counting.
static chunk_t * const gbuffer = (chunk_t*) cbuffer;
// Keep track of all of the values we want to print.
int line_count = 0, word_count = 0, char_count = 0;
// File handling. We only want to read.
FILE* file = fopen(file_name, "r");
if (!file)
{
printf("No such file %s\n", file_name);
return;
}
// Read until we don't get any more data.
size_t read_size;
while ((read_size = fread(cbuffer, sizeof(char), BUFFER_SIZE, file)))
{
// Count characters
char_count += read_size;
// Handle bulk chunks
for (int i = 0; i < (read_size / sizeof(chunk_t)); i++)
count_chunk(&gbuffer[i], &line_count, &word_count);
// Handle where N % 4 != 0. This is left over characters at the
// end of the buffer that exist if the file length wasn't divisable
// by sizeof(int)
for (int i = (read_size - (read_size % 4)); i < read_size; i++)
{
count_newl(cbuffer[i], &line_count);
count_word(cbuffer[i], &word_count);
}
}
fclose(file);
printf("%d %d %d %s\n", line_count, word_count, char_count, file_name);
// Add to the total line count.
total_line_count += line_count;
total_word_count += word_count;
total_char_count += char_count;
}
int main(const int argc, const char *argv[])
{
if (argc == 1 || strcmp(argv[1], "--help") == 0)
help(argv[0]);
for (int i = 1; i < argc; i++)
print_stats(argv[i]);
// If more then one file print totals.
if (argc - 1 > 1)
printf("%d %d %d total\n", total_line_count, total_word_count, total_char_count);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment