Skip to content

Instantly share code, notes, and snippets.

@Micrified
Created April 23, 2019 18:10
Show Gist options
  • Save Micrified/e77bbe757560d4563e968f7c78b6c392 to your computer and use it in GitHub Desktop.
Save Micrified/e77bbe757560d4563e968f7c78b6c392 to your computer and use it in GitHub Desktop.
Word Sorting Program (Assignment)
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <ctype.h>
/*
* Author: <elided>
* Student Number: <elided>
* Date Written: 23/04/2019
* Course: <elided> (Embedded Systems Lab)
*
* Complexity: ~ nlog(n)
*/
/*
*******************************************************************************
* Symbolic Constants *
*******************************************************************************
*/
// Maximum number of bytes buffered at a time
#define READ_BUF_SIZE 4096
// Default starting (expected) word size
#define DEF_WORD_SIZE 8
// The range (root bucket count) of the hash table (nodes dynamically allocated)
#define HASH_TAB_SIZE 1024
// Panic abort sequence
#define PANIC { fprintf(stderr, "%s:%d Panic: %s\n", \
__FILE__, __LINE__, strerror(errno)); exit(-1); }
/*
*******************************************************************************
* Type Definitions *
*******************************************************************************
*/
// Describes a node used in the hash-table linked-list.
typedef struct node {
const char *word;
unsigned int count;
struct node *next;
} node;
/*
*******************************************************************************
* Global Variables *
*******************************************************************************
*/
// The temporary resizeable word buffer
unsigned char *temp_word_buf;
// The temporary resizable word buffer length and capacity
ssize_t temp_word_buf_len, temp_word_buf_cap;
// The hash table (fixed number of pointers to linked-lists of nodes)
node *hash_table[HASH_TAB_SIZE];
/*
*******************************************************************************
* Temporary Word Buffer Functions *
*******************************************************************************
*/
// Stores a single word used when buffering (dynamically allocated)
void reset_word_buffer (void) {
if (temp_word_buf == NULL) {
temp_word_buf = malloc(DEF_WORD_SIZE * sizeof(unsigned char));
if (temp_word_buf == NULL) {
PANIC
}
temp_word_buf_cap = DEF_WORD_SIZE;
}
temp_word_buf_len = 0;
}
// Appends a character to the dynamically resizable word buffer
void append_word_buffer (unsigned char c) {
if (temp_word_buf_len >= temp_word_buf_cap) {
temp_word_buf_cap *= 2;
temp_word_buf = realloc(temp_word_buf, temp_word_buf_cap * sizeof(unsigned char));
if (temp_word_buf == NULL) {
PANIC
}
}
temp_word_buf[temp_word_buf_len++] = c;
}
// Frees the word buffer. Must only be called once at the end
void free_word_buffer (void) {
free(temp_word_buf);
}
/*
*******************************************************************************
* Buffered Reader Functions *
*******************************************************************************
*/
// Reads max READ_BUF_SIZE bytes. Returns bytes read
ssize_t bufferedread (int fd, unsigned char **bp) {
static unsigned char b[READ_BUF_SIZE];
// Read bytes straight from stdin
ssize_t s = read(fd, (void *)b, READ_BUF_SIZE);
// Abort on error
if (s == -1 || bp == NULL) {
PANIC
}
// Update pointer parameter, return size read
*bp = b;
return s;
}
// Reads the next alphanumeric word on input. Returns pointer to dynamically allocated word.
// If you want to keep this word between calls - it must be copied.
const char *readword () {
static unsigned char *b;
static ssize_t i, count;
// Initialize if necessary
if (b == NULL) {
count = bufferedread(STDIN_FILENO, &b);
i = 0;
}
// If there is nothing to read, abort
if (count == 0) {
return NULL;
}
// Otherwise drop non-alphanumeric until next alphanumeric
do {
for ( ; i < count && !isalnum(b[i]); ++i);
if (i >= count) {
count = bufferedread(STDIN_FILENO, &b);
i = 0;
} else {
break;
}
} while (count > 0);
// If there is nothing to read, abort
if (count == 0) {
return NULL;
}
// Now we are certain of at least one alphanumeric character on input.
// (i < count) for sure, and isalnum(b[i]) for sure. So we store that
reset_word_buffer();
append_word_buffer(b[i]);
i += 1;
do {
for ( ; i < count && isalnum(b[i]); ++i) {
append_word_buffer(b[i]);
}
if (i >= count) {
count = bufferedread(STDIN_FILENO, &b);
i = 0;
} else {
break;
}
} while (count > 0);
// Append the null-byte
append_word_buffer('\0');
// Return the temporary word buffer
return (const char *)temp_word_buf;
}
/*
*******************************************************************************
* Hash Table Functions *
*******************************************************************************
*/
// The DJB2 hashing algorithm. Source: http://www.cse.yorku.ca/~oz/hash.html
unsigned long djb2 (const char *s) {
unsigned long h = 5381;
int c;
while ((c = *(s++)) != '\0') h = ((h << 5) + h) + c;
return h;
}
// Allocates a new node, making a copy of the word in the process. Returns ptr
node *make_node (const char *word) {
// Allocate the node
node *n = malloc(sizeof(node));
if (n == NULL) {
PANIC
}
// Allocate copy of the word (remember + 1 for NULL byte!)
size_t copy_size = ((strlen(word) + 1) * sizeof(char));
const char *copy = malloc(copy_size);
if (copy == NULL) {
PANIC
} else {
memcpy((void *)copy, (void *)word, copy_size);
}
// Set all fields.
*n = (node){.word = copy, .count = 1, .next = NULL};
// Return the pointer
return n;
}
// Frees a single node
void free_node (node *n) {
if (n == NULL) {
return;
}
free((char *)(n->word));
free(n);
}
// Inserts a word alphabetically into a linked-list. Returns list head
node *link_word_alpha (const char *word, node *n) {
int ord;
node *new;
// Case: Must make a new node since none is present
if (n == NULL) {
return make_node(word);
}
// Set the lexicographical ordering value
ord = strcmp(word, n->word);
// Case: Word goes after that of the current node
if (ord > 0) {
n->next = link_word_alpha(word, n->next);
return n;
}
// Case: Word goes before that of the current node
if (ord < 0) {
new = make_node(word);
new->next = n;
return new;
}
// Case: Word is the same as the current node
n->count++;
return n;
}
// Inserts a given word in the hash table. Lists are alphabetically ordered
void insert_word (const char *word) {
unsigned long index = djb2(word) % HASH_TAB_SIZE;
hash_table[index] = link_word_alpha(word, hash_table[index]);
}
// Outputs all words in lexicographical order from the hash table
void dump_words_lexicographically (void) {
unsigned long min_index, i;
while (1) {
// Find first valid node
for (min_index = 0;
min_index < HASH_TAB_SIZE && hash_table[min_index] == NULL;
min_index++);
// If none were found: break the loop
if (min_index >= HASH_TAB_SIZE) {
break;
}
// Otherwise we search for any words with smaller lexicographic value
for (i = min_index + 1; i < HASH_TAB_SIZE; ++i) {
if (hash_table[i] != NULL && strcmp(hash_table[min_index]->word, hash_table[i]->word) > 0) {
min_index = i;
}
}
// Output that word
fprintf(stdout, "%s: %u\n", hash_table[min_index]->word,
hash_table[min_index]->count);
// Remove the word from the linked list, leaving its successor in place
node *next = hash_table[min_index]->next;
free_node(hash_table[min_index]);
hash_table[min_index] = next;
}
}
/*
*******************************************************************************
* Main *
*******************************************************************************
*/
int main (int argc, const char *argv[]) {
const char *word;
// While we read value words, insert them into the hash table
while ((word = readword()) != NULL) {
insert_word(word);
}
// Dump words lexicographically
dump_words_lexicographically();
// Free the temporary word buffer
free_word_buffer();
return EXIT_SUCCESS;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment