Micrified · April 23, 2019 18:10
diff --git a/wordcount.c b/wordcount.c
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <errno.h>
 #include <string.h>
 #include <ctype.h>


 /* 
 * Author: <elided>
 * Student Number: <elided>
 * Date Written: 23/04/2019
 * Course: <elided> (Embedded Systems Lab)
 *
 * Complexity: ~ nlog(n)
 */


 /*
 *******************************************************************************
 *                             Symbolic Constants                              *
 *******************************************************************************
 */


 // Maximum number of bytes buffered at a time
 #define READ_BUF_SIZE		4096

 // Default starting (expected) word size
 #define DEF_WORD_SIZE		8

 // The range (root bucket count) of the hash table (nodes dynamically allocated)
 #define HASH_TAB_SIZE		1024

 // Panic abort sequence
 #define PANIC	{ fprintf(stderr, "%s:%d Panic: %s\n", \
 				__FILE__, __LINE__, strerror(errno)); exit(-1); }


 /*
 *******************************************************************************
 *                              Type Definitions                               *
 *******************************************************************************
 */


 // Describes a node used in the hash-table linked-list.
 typedef struct node {
 	const char *word;
 	unsigned int count;
 	struct node *next;
 } node;


 /*
 *******************************************************************************
 *                              Global Variables                               *
 *******************************************************************************
 */


 // The temporary resizeable word buffer
 unsigned char *temp_word_buf;

 // The temporary resizable word buffer length and capacity
 ssize_t temp_word_buf_len, temp_word_buf_cap;

 // The hash table (fixed number of pointers to linked-lists of nodes)
 node *hash_table[HASH_TAB_SIZE];


 /*
 *******************************************************************************
 *                       Temporary Word Buffer Functions                       *
 *******************************************************************************
 */


 // Stores a single word used when buffering (dynamically allocated)
 void reset_word_buffer (void) {
 	if (temp_word_buf == NULL) {
 		temp_word_buf = malloc(DEF_WORD_SIZE * sizeof(unsigned char));
 		if (temp_word_buf == NULL) {
 			PANIC
 		}
 		temp_word_buf_cap = DEF_WORD_SIZE;
 	}
 	temp_word_buf_len = 0;
 }

 // Appends a character to the dynamically resizable word buffer
 void append_word_buffer (unsigned char c) {
 	if (temp_word_buf_len >= temp_word_buf_cap) {
 		temp_word_buf_cap *= 2;
 		temp_word_buf = realloc(temp_word_buf, temp_word_buf_cap * sizeof(unsigned char));
 		if (temp_word_buf == NULL) {
 			PANIC
 		}
 	}
 	temp_word_buf[temp_word_buf_len++] = c;
 }

 // Frees the word buffer. Must only be called once at the end
 void free_word_buffer (void) {
 	free(temp_word_buf);
 }


 /*
 *******************************************************************************
 *                          Buffered Reader Functions                          *
 *******************************************************************************
 */


 // Reads max READ_BUF_SIZE bytes. Returns bytes read
 ssize_t bufferedread (int fd, unsigned char **bp) {
 	static unsigned char b[READ_BUF_SIZE];

 	// Read bytes straight from stdin
 	ssize_t s = read(fd, (void *)b, READ_BUF_SIZE);

 	// Abort on error
 	if (s == -1 || bp == NULL) {
 		PANIC
 	}

 	// Update pointer parameter, return size read
 	*bp = b;
 	return s;
 }

 // Reads the next alphanumeric word on input. Returns pointer to dynamically allocated word.
 // If you want to keep this word between calls - it must be copied.
 const char *readword () {
 	static unsigned char *b;
 	static ssize_t i, count;

 	// Initialize if necessary
 	if (b == NULL) {
 		count = bufferedread(STDIN_FILENO, &b);
 		i = 0;
 	}

 	// If there is nothing to read, abort
 	if (count == 0) {
 		return NULL;
 	}

 	// Otherwise drop non-alphanumeric until next alphanumeric
 	do {
 		for ( ; i < count && !isalnum(b[i]); ++i);
 		if (i >= count) {
 			count = bufferedread(STDIN_FILENO, &b);
 			i = 0;
 		} else {
 			break;
 		}
 	} while (count > 0);

 	// If there is nothing to read, abort
 	if (count == 0) {
 		return NULL;
 	}

 	// Now we are certain of at least one alphanumeric character on input.
 	// (i < count) for sure, and isalnum(b[i]) for sure. So we store that
 	reset_word_buffer();
 	append_word_buffer(b[i]);
 	i += 1;
 	do {
 		for ( ; i < count && isalnum(b[i]); ++i) {
 			append_word_buffer(b[i]);
 		}
 		if (i >= count) {
 			count = bufferedread(STDIN_FILENO, &b);
 			i = 0;
 		} else {
 			break;
 		}
 	} while (count > 0);

 	// Append the null-byte
 	append_word_buffer('\0');

 	// Return the temporary word buffer
 	return (const char *)temp_word_buf;
 }


 /*
 *******************************************************************************
 *                            Hash Table Functions                             *
 *******************************************************************************
 */


 // The DJB2 hashing algorithm. Source: http://www.cse.yorku.ca/~oz/hash.html
 unsigned long djb2 (const char *s) {
 	unsigned long h = 5381;
 	int c;
 	while ((c = *(s++)) != '\0') h = ((h << 5) + h) + c;
 	return h;
 }


 // Allocates a new node, making a copy of the word in the process. Returns ptr
 node *make_node (const char *word) {

 	// Allocate the node
 	node *n = malloc(sizeof(node));
 	if (n == NULL) {
 		PANIC
 	}

 	// Allocate copy of the word (remember + 1 for NULL byte!)
 	size_t copy_size = ((strlen(word) + 1) * sizeof(char));
 	const char *copy = malloc(copy_size);
 	if (copy == NULL) {
 		PANIC
 	} else {
 		memcpy((void *)copy, (void *)word, copy_size);
 	}

 	// Set all fields.
 	*n = (node){.word = copy, .count = 1, .next = NULL};

 	// Return the pointer
 	return n;
 }

 // Frees a single node
 void free_node (node *n) {
 	if (n == NULL) {
 		return;
 	}
 	free((char *)(n->word));
 	free(n);
 }

 // Inserts a word alphabetically into a linked-list. Returns list head
 node *link_word_alpha (const char *word, node *n) {
 	int ord;
 	node *new;

 	// Case: Must make a new node since none is present
 	if (n == NULL) {
 		return make_node(word);
 	}
 	
 	// Set the lexicographical ordering value
 	ord = strcmp(word, n->word);

 	// Case: Word goes after that of the current node
 	if (ord > 0) {
 		n->next = link_word_alpha(word, n->next);
 		return n;
 	}

 	// Case: Word goes before that of the current node
 	if (ord < 0) {
 		new = make_node(word);
 		new->next = n;
 		return new;
 	}

 	// Case: Word is the same as the current node
 	n->count++;
 	return n;
 }

 // Inserts a given word in the hash table. Lists are alphabetically ordered
 void insert_word (const char *word) {
 	unsigned long index = djb2(word) % HASH_TAB_SIZE;
 	hash_table[index] = link_word_alpha(word, hash_table[index]);
 }

 // Outputs all words in lexicographical order from the hash table
 void dump_words_lexicographically (void) {
 	unsigned long min_index, i;

 	while (1) {
 	
 		// Find first valid node
 		for (min_index = 0; 
 			 min_index < HASH_TAB_SIZE && hash_table[min_index] == NULL; 
 			 min_index++);

 		// If none were found: break the loop
 		if (min_index >= HASH_TAB_SIZE) {
 			break;
 		}

 		// Otherwise we search for any words with smaller lexicographic value
 		for (i = min_index + 1; i < HASH_TAB_SIZE; ++i) {
 			if (hash_table[i] != NULL && strcmp(hash_table[min_index]->word, hash_table[i]->word) > 0) {
 				min_index = i;
 			}
 		}

 		// Output that word
 		fprintf(stdout, "%s: %u\n", hash_table[min_index]->word, 
 			hash_table[min_index]->count);

 		// Remove the word from the linked list, leaving its successor in place
 		node *next = hash_table[min_index]->next;
 		free_node(hash_table[min_index]);
 		hash_table[min_index] = next;
 	}
 }


 /*
 *******************************************************************************
 *                                    Main                                     *
 *******************************************************************************
 */


 int main (int argc, const char *argv[]) {
 	const char *word;

 	// While we read value words, insert them into the hash table
 	while ((word = readword()) != NULL) {
 		insert_word(word);
 	}

 	// Dump words lexicographically
 	dump_words_lexicographically();

 	// Free the temporary word buffer
 	free_word_buffer();

 	return EXIT_SUCCESS;
 }
	#include <stdio.h>
	#include <stdlib.h>
	#include <unistd.h>
	#include <errno.h>
	#include <string.h>
	#include <ctype.h>


	/*
	* Author: <elided>
	* Student Number: <elided>
	* Date Written: 23/04/2019
	* Course: <elided> (Embedded Systems Lab)
	*
	* Complexity: ~ nlog(n)
	*/


	/*
	*******************************************************************************
	* Symbolic Constants *
	*******************************************************************************
	*/


	// Maximum number of bytes buffered at a time
	#define READ_BUF_SIZE 4096

	// Default starting (expected) word size
	#define DEF_WORD_SIZE 8

	// The range (root bucket count) of the hash table (nodes dynamically allocated)
	#define HASH_TAB_SIZE 1024

	// Panic abort sequence
	#define PANIC { fprintf(stderr, "%s:%d Panic: %s\n", \
	__FILE__, __LINE__, strerror(errno)); exit(-1); }


	/*
	*******************************************************************************
	* Type Definitions *
	*******************************************************************************
	*/


	// Describes a node used in the hash-table linked-list.
	typedef struct node {
	const char *word;
	unsigned int count;
	struct node *next;
	} node;


	/*
	*******************************************************************************
	* Global Variables *
	*******************************************************************************
	*/


	// The temporary resizeable word buffer
	unsigned char *temp_word_buf;

	// The temporary resizable word buffer length and capacity
	ssize_t temp_word_buf_len, temp_word_buf_cap;

	// The hash table (fixed number of pointers to linked-lists of nodes)
	node *hash_table[HASH_TAB_SIZE];


	/*
	*******************************************************************************
	* Temporary Word Buffer Functions *
	*******************************************************************************
	*/


	// Stores a single word used when buffering (dynamically allocated)
	void reset_word_buffer (void) {
	if (temp_word_buf == NULL) {
	temp_word_buf = malloc(DEF_WORD_SIZE * sizeof(unsigned char));
	if (temp_word_buf == NULL) {
	PANIC
	}
	temp_word_buf_cap = DEF_WORD_SIZE;
	}
	temp_word_buf_len = 0;
	}

	// Appends a character to the dynamically resizable word buffer
	void append_word_buffer (unsigned char c) {
	if (temp_word_buf_len >= temp_word_buf_cap) {
	temp_word_buf_cap *= 2;
	temp_word_buf = realloc(temp_word_buf, temp_word_buf_cap * sizeof(unsigned char));
	if (temp_word_buf == NULL) {
	PANIC
	}
	}
	temp_word_buf[temp_word_buf_len++] = c;
	}

	// Frees the word buffer. Must only be called once at the end
	void free_word_buffer (void) {
	free(temp_word_buf);
	}


	/*
	*******************************************************************************
	* Buffered Reader Functions *
	*******************************************************************************
	*/


	// Reads max READ_BUF_SIZE bytes. Returns bytes read
	ssize_t bufferedread (int fd, unsigned char **bp) {
	static unsigned char b[READ_BUF_SIZE];

	// Read bytes straight from stdin
	ssize_t s = read(fd, (void *)b, READ_BUF_SIZE);

	// Abort on error
	if (s == -1 \|\| bp == NULL) {
	PANIC
	}

	// Update pointer parameter, return size read
	*bp = b;
	return s;
	}

	// Reads the next alphanumeric word on input. Returns pointer to dynamically allocated word.
	// If you want to keep this word between calls - it must be copied.
	const char *readword () {
	static unsigned char *b;
	static ssize_t i, count;

	// Initialize if necessary
	if (b == NULL) {
	count = bufferedread(STDIN_FILENO, &b);
	i = 0;
	}

	// If there is nothing to read, abort
	if (count == 0) {
	return NULL;
	}

	// Otherwise drop non-alphanumeric until next alphanumeric
	do {
	for ( ; i < count && !isalnum(b[i]); ++i);
	if (i >= count) {
	count = bufferedread(STDIN_FILENO, &b);
	i = 0;
	} else {
	break;
	}
	} while (count > 0);

	// If there is nothing to read, abort
	if (count == 0) {
	return NULL;
	}

	// Now we are certain of at least one alphanumeric character on input.
	// (i < count) for sure, and isalnum(b[i]) for sure. So we store that
	reset_word_buffer();
	append_word_buffer(b[i]);
	i += 1;
	do {
	for ( ; i < count && isalnum(b[i]); ++i) {
	append_word_buffer(b[i]);
	}
	if (i >= count) {
	count = bufferedread(STDIN_FILENO, &b);
	i = 0;
	} else {
	break;
	}
	} while (count > 0);

	// Append the null-byte
	append_word_buffer('\0');

	// Return the temporary word buffer
	return (const char *)temp_word_buf;
	}


	/*
	*******************************************************************************
	* Hash Table Functions *
	*******************************************************************************
	*/


	// The DJB2 hashing algorithm. Source: http://www.cse.yorku.ca/~oz/hash.html
	unsigned long djb2 (const char *s) {
	unsigned long h = 5381;
	int c;
	while ((c = *(s++)) != '\0') h = ((h << 5) + h) + c;
	return h;
	}


	// Allocates a new node, making a copy of the word in the process. Returns ptr
	node make_node (const char word) {

	// Allocate the node
	node *n = malloc(sizeof(node));
	if (n == NULL) {
	PANIC
	}

	// Allocate copy of the word (remember + 1 for NULL byte!)
	size_t copy_size = ((strlen(word) + 1) * sizeof(char));
	const char *copy = malloc(copy_size);
	if (copy == NULL) {
	PANIC
	} else {
	memcpy((void )copy, (void )word, copy_size);
	}

	// Set all fields.
	*n = (node){.word = copy, .count = 1, .next = NULL};

	// Return the pointer
	return n;
	}

	// Frees a single node
	void free_node (node *n) {
	if (n == NULL) {
	return;
	}
	free((char *)(n->word));
	free(n);
	}

	// Inserts a word alphabetically into a linked-list. Returns list head
	node link_word_alpha (const char word, node *n) {
	int ord;
	node *new;

	// Case: Must make a new node since none is present
	if (n == NULL) {
	return make_node(word);
	}

	// Set the lexicographical ordering value
	ord = strcmp(word, n->word);

	// Case: Word goes after that of the current node
	if (ord > 0) {
	n->next = link_word_alpha(word, n->next);
	return n;
	}

	// Case: Word goes before that of the current node
	if (ord < 0) {
	new = make_node(word);
	new->next = n;
	return new;
	}

	// Case: Word is the same as the current node
	n->count++;
	return n;
	}

	// Inserts a given word in the hash table. Lists are alphabetically ordered
	void insert_word (const char *word) {
	unsigned long index = djb2(word) % HASH_TAB_SIZE;
	hash_table[index] = link_word_alpha(word, hash_table[index]);
	}

	// Outputs all words in lexicographical order from the hash table
	void dump_words_lexicographically (void) {
	unsigned long min_index, i;

	while (1) {

	// Find first valid node
	for (min_index = 0;
	min_index < HASH_TAB_SIZE && hash_table[min_index] == NULL;
	min_index++);

	// If none were found: break the loop
	if (min_index >= HASH_TAB_SIZE) {
	break;
	}

	// Otherwise we search for any words with smaller lexicographic value
	for (i = min_index + 1; i < HASH_TAB_SIZE; ++i) {
	if (hash_table[i] != NULL && strcmp(hash_table[min_index]->word, hash_table[i]->word) > 0) {
	min_index = i;
	}
	}

	// Output that word
	fprintf(stdout, "%s: %u\n", hash_table[min_index]->word,
	hash_table[min_index]->count);

	// Remove the word from the linked list, leaving its successor in place
	node *next = hash_table[min_index]->next;
	free_node(hash_table[min_index]);
	hash_table[min_index] = next;
	}
	}


	/*
	*******************************************************************************
	* Main *
	*******************************************************************************
	*/


	int main (int argc, const char *argv[]) {
	const char *word;

	// While we read value words, insert them into the hash table
	while ((word = readword()) != NULL) {
	insert_word(word);
	}

	// Dump words lexicographically
	dump_words_lexicographically();

	// Free the temporary word buffer
	free_word_buffer();

	return EXIT_SUCCESS;
	}