Created
June 28, 2017 17:51
-
-
Save scivision/a28e8fd9df0ff3fe497211814fb4d980 to your computer and use it in GitHub Desktop.
Josh Katz's version of wc http://blog.gravypod.com/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdbool.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <ctype.h> | |
// This is a "chunk" of characters that we can pull out and handle | |
// at one time. Since arrays in C are just collections of bytes we | |
// can iterate through a char array and pretend it was an int array. | |
// Using this union you can still get at the individual characters. | |
typedef union { | |
int value; | |
char letters[4]; | |
} chunk_t; | |
// Total lines, words, and chars in all of the files passed. | |
static int total_line_count = 0, total_word_count = 0, total_char_count = 0; | |
void help(const char *cmd) | |
{ | |
printf("Usage: %s <--help|file...>\n", cmd); | |
exit(1); | |
} | |
static inline void count_word(const char next, int * const word_count) | |
{ | |
static bool is_in_word = false; | |
const bool is_next_space = isspace(next); | |
if (is_in_word && is_next_space) | |
{ | |
*word_count += 1; | |
is_in_word = false; | |
} | |
else if (!is_in_word && !is_next_space) | |
{ | |
is_in_word = true; | |
} | |
} | |
static inline void count_newl(const char next, int * const line_count) | |
{ | |
*line_count += next == '\n'; | |
} | |
static inline void count_chunk(const chunk_t * const chunk, int * const line_count, int * const word_count) | |
{ | |
// Count all new lines. | |
count_newl(chunk->letters[0], line_count); | |
count_newl(chunk->letters[1], line_count); | |
count_newl(chunk->letters[2], line_count); | |
count_newl(chunk->letters[3], line_count); | |
// For each chunk, count the letters. | |
count_word(chunk->letters[0], word_count); | |
count_word(chunk->letters[1], word_count); | |
count_word(chunk->letters[2], word_count); | |
count_word(chunk->letters[3], word_count); | |
} | |
static inline void print_stats(const char* file_name) | |
{ | |
#define BUFFER_SIZE (1024 * 16) | |
// Make a character buffer for reading the chunks of the file into. | |
// Most of the time pages are 4k or 8k aligned so the buffer should | |
// be one of those sizes. This will make sure you don't ask for more | |
// data then the kernel is likely to have buffered for you. | |
static char cbuffer[BUFFER_SIZE]; | |
// Make a new way of looking at the character buffer. This lets you | |
// loop throuh and look at 4 characters at a time. This cuts down on | |
// the number of loops you are running and will let you eventually | |
// pipeline instructions for counting. | |
static chunk_t * const gbuffer = (chunk_t*) cbuffer; | |
// Keep track of all of the values we want to print. | |
int line_count = 0, word_count = 0, char_count = 0; | |
// File handling. We only want to read. | |
FILE* file = fopen(file_name, "r"); | |
if (!file) | |
{ | |
printf("No such file %s\n", file_name); | |
return; | |
} | |
// Read until we don't get any more data. | |
size_t read_size; | |
while ((read_size = fread(cbuffer, sizeof(char), BUFFER_SIZE, file))) | |
{ | |
// Count characters | |
char_count += read_size; | |
// Handle bulk chunks | |
for (int i = 0; i < (read_size / sizeof(chunk_t)); i++) | |
count_chunk(&gbuffer[i], &line_count, &word_count); | |
// Handle where N % 4 != 0. This is left over characters at the | |
// end of the buffer that exist if the file length wasn't divisable | |
// by sizeof(int) | |
for (int i = (read_size - (read_size % 4)); i < read_size; i++) | |
{ | |
count_newl(cbuffer[i], &line_count); | |
count_word(cbuffer[i], &word_count); | |
} | |
} | |
fclose(file); | |
printf("%d %d %d %s\n", line_count, word_count, char_count, file_name); | |
// Add to the total line count. | |
total_line_count += line_count; | |
total_word_count += word_count; | |
total_char_count += char_count; | |
} | |
int main(const int argc, const char *argv[]) | |
{ | |
if (argc == 1 || strcmp(argv[1], "--help") == 0) | |
help(argv[0]); | |
for (int i = 1; i < argc; i++) | |
print_stats(argv[i]); | |
// If more then one file print totals. | |
if (argc - 1 > 1) | |
printf("%d %d %d total\n", total_line_count, total_word_count, total_char_count); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment