Created
April 23, 2019 18:10
-
-
Save Micrified/e77bbe757560d4563e968f7c78b6c392 to your computer and use it in GitHub Desktop.
Word Sorting Program (Assignment)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <unistd.h> | |
#include <errno.h> | |
#include <string.h> | |
#include <ctype.h> | |
/* | |
* Author: <elided> | |
* Student Number: <elided> | |
* Date Written: 23/04/2019 | |
* Course: <elided> (Embedded Systems Lab) | |
* | |
* Complexity: ~ nlog(n) | |
*/ | |
/* | |
******************************************************************************* | |
* Symbolic Constants * | |
******************************************************************************* | |
*/ | |
// Maximum number of bytes buffered at a time | |
#define READ_BUF_SIZE 4096 | |
// Default starting (expected) word size | |
#define DEF_WORD_SIZE 8 | |
// The range (root bucket count) of the hash table (nodes dynamically allocated) | |
#define HASH_TAB_SIZE 1024 | |
// Panic abort sequence | |
#define PANIC { fprintf(stderr, "%s:%d Panic: %s\n", \ | |
__FILE__, __LINE__, strerror(errno)); exit(-1); } | |
/* | |
******************************************************************************* | |
* Type Definitions * | |
******************************************************************************* | |
*/ | |
// Describes a node used in the hash-table linked-list. | |
typedef struct node { | |
const char *word; | |
unsigned int count; | |
struct node *next; | |
} node; | |
/* | |
******************************************************************************* | |
* Global Variables * | |
******************************************************************************* | |
*/ | |
// The temporary resizeable word buffer | |
unsigned char *temp_word_buf; | |
// The temporary resizable word buffer length and capacity | |
ssize_t temp_word_buf_len, temp_word_buf_cap; | |
// The hash table (fixed number of pointers to linked-lists of nodes) | |
node *hash_table[HASH_TAB_SIZE]; | |
/* | |
******************************************************************************* | |
* Temporary Word Buffer Functions * | |
******************************************************************************* | |
*/ | |
// Stores a single word used when buffering (dynamically allocated) | |
void reset_word_buffer (void) { | |
if (temp_word_buf == NULL) { | |
temp_word_buf = malloc(DEF_WORD_SIZE * sizeof(unsigned char)); | |
if (temp_word_buf == NULL) { | |
PANIC | |
} | |
temp_word_buf_cap = DEF_WORD_SIZE; | |
} | |
temp_word_buf_len = 0; | |
} | |
// Appends a character to the dynamically resizable word buffer | |
void append_word_buffer (unsigned char c) { | |
if (temp_word_buf_len >= temp_word_buf_cap) { | |
temp_word_buf_cap *= 2; | |
temp_word_buf = realloc(temp_word_buf, temp_word_buf_cap * sizeof(unsigned char)); | |
if (temp_word_buf == NULL) { | |
PANIC | |
} | |
} | |
temp_word_buf[temp_word_buf_len++] = c; | |
} | |
// Frees the word buffer. Must only be called once at the end | |
void free_word_buffer (void) { | |
free(temp_word_buf); | |
} | |
/* | |
******************************************************************************* | |
* Buffered Reader Functions * | |
******************************************************************************* | |
*/ | |
// Reads max READ_BUF_SIZE bytes. Returns bytes read | |
ssize_t bufferedread (int fd, unsigned char **bp) { | |
static unsigned char b[READ_BUF_SIZE]; | |
// Read bytes straight from stdin | |
ssize_t s = read(fd, (void *)b, READ_BUF_SIZE); | |
// Abort on error | |
if (s == -1 || bp == NULL) { | |
PANIC | |
} | |
// Update pointer parameter, return size read | |
*bp = b; | |
return s; | |
} | |
// Reads the next alphanumeric word on input. Returns pointer to dynamically allocated word. | |
// If you want to keep this word between calls - it must be copied. | |
const char *readword () { | |
static unsigned char *b; | |
static ssize_t i, count; | |
// Initialize if necessary | |
if (b == NULL) { | |
count = bufferedread(STDIN_FILENO, &b); | |
i = 0; | |
} | |
// If there is nothing to read, abort | |
if (count == 0) { | |
return NULL; | |
} | |
// Otherwise drop non-alphanumeric until next alphanumeric | |
do { | |
for ( ; i < count && !isalnum(b[i]); ++i); | |
if (i >= count) { | |
count = bufferedread(STDIN_FILENO, &b); | |
i = 0; | |
} else { | |
break; | |
} | |
} while (count > 0); | |
// If there is nothing to read, abort | |
if (count == 0) { | |
return NULL; | |
} | |
// Now we are certain of at least one alphanumeric character on input. | |
// (i < count) for sure, and isalnum(b[i]) for sure. So we store that | |
reset_word_buffer(); | |
append_word_buffer(b[i]); | |
i += 1; | |
do { | |
for ( ; i < count && isalnum(b[i]); ++i) { | |
append_word_buffer(b[i]); | |
} | |
if (i >= count) { | |
count = bufferedread(STDIN_FILENO, &b); | |
i = 0; | |
} else { | |
break; | |
} | |
} while (count > 0); | |
// Append the null-byte | |
append_word_buffer('\0'); | |
// Return the temporary word buffer | |
return (const char *)temp_word_buf; | |
} | |
/* | |
******************************************************************************* | |
* Hash Table Functions * | |
******************************************************************************* | |
*/ | |
// The DJB2 hashing algorithm. Source: http://www.cse.yorku.ca/~oz/hash.html | |
unsigned long djb2 (const char *s) { | |
unsigned long h = 5381; | |
int c; | |
while ((c = *(s++)) != '\0') h = ((h << 5) + h) + c; | |
return h; | |
} | |
// Allocates a new node, making a copy of the word in the process. Returns ptr | |
node *make_node (const char *word) { | |
// Allocate the node | |
node *n = malloc(sizeof(node)); | |
if (n == NULL) { | |
PANIC | |
} | |
// Allocate copy of the word (remember + 1 for NULL byte!) | |
size_t copy_size = ((strlen(word) + 1) * sizeof(char)); | |
const char *copy = malloc(copy_size); | |
if (copy == NULL) { | |
PANIC | |
} else { | |
memcpy((void *)copy, (void *)word, copy_size); | |
} | |
// Set all fields. | |
*n = (node){.word = copy, .count = 1, .next = NULL}; | |
// Return the pointer | |
return n; | |
} | |
// Frees a single node | |
void free_node (node *n) { | |
if (n == NULL) { | |
return; | |
} | |
free((char *)(n->word)); | |
free(n); | |
} | |
// Inserts a word alphabetically into a linked-list. Returns list head | |
node *link_word_alpha (const char *word, node *n) { | |
int ord; | |
node *new; | |
// Case: Must make a new node since none is present | |
if (n == NULL) { | |
return make_node(word); | |
} | |
// Set the lexicographical ordering value | |
ord = strcmp(word, n->word); | |
// Case: Word goes after that of the current node | |
if (ord > 0) { | |
n->next = link_word_alpha(word, n->next); | |
return n; | |
} | |
// Case: Word goes before that of the current node | |
if (ord < 0) { | |
new = make_node(word); | |
new->next = n; | |
return new; | |
} | |
// Case: Word is the same as the current node | |
n->count++; | |
return n; | |
} | |
// Inserts a given word in the hash table. Lists are alphabetically ordered | |
void insert_word (const char *word) { | |
unsigned long index = djb2(word) % HASH_TAB_SIZE; | |
hash_table[index] = link_word_alpha(word, hash_table[index]); | |
} | |
// Outputs all words in lexicographical order from the hash table | |
void dump_words_lexicographically (void) { | |
unsigned long min_index, i; | |
while (1) { | |
// Find first valid node | |
for (min_index = 0; | |
min_index < HASH_TAB_SIZE && hash_table[min_index] == NULL; | |
min_index++); | |
// If none were found: break the loop | |
if (min_index >= HASH_TAB_SIZE) { | |
break; | |
} | |
// Otherwise we search for any words with smaller lexicographic value | |
for (i = min_index + 1; i < HASH_TAB_SIZE; ++i) { | |
if (hash_table[i] != NULL && strcmp(hash_table[min_index]->word, hash_table[i]->word) > 0) { | |
min_index = i; | |
} | |
} | |
// Output that word | |
fprintf(stdout, "%s: %u\n", hash_table[min_index]->word, | |
hash_table[min_index]->count); | |
// Remove the word from the linked list, leaving its successor in place | |
node *next = hash_table[min_index]->next; | |
free_node(hash_table[min_index]); | |
hash_table[min_index] = next; | |
} | |
} | |
/* | |
******************************************************************************* | |
* Main * | |
******************************************************************************* | |
*/ | |
int main (int argc, const char *argv[]) { | |
const char *word; | |
// While we read value words, insert them into the hash table | |
while ((word = readword()) != NULL) { | |
insert_word(word); | |
} | |
// Dump words lexicographically | |
dump_words_lexicographically(); | |
// Free the temporary word buffer | |
free_word_buffer(); | |
return EXIT_SUCCESS; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment