Last active
January 3, 2016 01:29
-
-
Save silentbicycle/8389129 to your computer and use it in GitHub Desktop.
simple linear-time sorting algorithm, using a flattened linked-list index
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Copyright (c) 2014 Scott Vokes <[email protected]> | |
* | |
* Permission to use, copy, modify, and/or distribute this software for any | |
* purpose with or without fee is hereby granted, provided that the above | |
* copyright notice and this permission notice appear in all copies. | |
* | |
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
*/ | |
/* Example code for a linear-time sorting algorithm, a variation of | |
* counting sort. Note that, rather than sorting its input in place, | |
* it returns an array of offsets that can be used to slice the | |
* input in ascending order. | |
* | |
* This could be used on larger values than just uint8_ts, though | |
* next[256] below will need to be changed to either next[max-min+1] | |
* or a sparse data structure (e.g. hash table). */ | |
#include <stdlib.h> | |
#include <stdio.h> | |
#include <stdint.h> | |
#include <stdbool.h> | |
#include <string.h> | |
#include <assert.h> | |
/* For a byte BUF of SZ bytes, generate an array of indices that | |
* can be used to slice BUF to get its content in ascending order. | |
* Linear time and space. */ | |
static uint32_t *flattened_index_sort(uint8_t *buf, size_t sz) { | |
uint32_t *ao = malloc(sz * sizeof(uint32_t)); // ascending offsets | |
uint32_t *idx = malloc(sz * sizeof(uint32_t)); // index | |
if (ao == NULL || idx == NULL) { goto cleanup; } | |
uint32_t next[256]; | |
memset(next, 0xFF, sizeof(next)); | |
const uint32_t none = (uint32_t)-1; | |
/* First pass: build flattened linked-list index. | |
* idx[i] will have the offset for the next instance of the byte | |
* at buf[i], or -1 for end-of-list. The offset of the first instance | |
* of byte c will be stored in next[c]. */ | |
for (int i = sz - 1; i >= 0; i--) { | |
uint8_t c = buf[i]; | |
idx[i] = next[c]; | |
next[c] = i; | |
} | |
/* Second pass: fill the array with ascending offsets while | |
* walking the linked lists of offsets in the index, starting | |
* from next[byte], which points at the first. */ | |
size_t offset = 0; | |
for (uint32_t i = 0; i < 256; i++) { | |
uint32_t link = next[i]; | |
while (link != none) { | |
ao[offset++] = link; | |
link = idx[link]; | |
} | |
} | |
free(idx); | |
assert(offset == sz); | |
return ao; | |
cleanup: | |
if (ao) free(ao); | |
if (idx) free(idx); | |
return NULL; | |
} | |
int main(int argc, char **argv) { | |
#define BUF_SZ 128 | |
char inbuf[BUF_SZ]; | |
char sorted[BUF_SZ + 1]; | |
for (;;) { | |
printf("? "); | |
char *s = fgets(inbuf, BUF_SZ, stdin); | |
if (s == NULL) { break; } | |
size_t len = strlen(s) - 1; /* drop '\0' from end */ | |
uint32_t *ao = flattened_index_sort((uint8_t *)s, len); | |
if (ao == NULL) { break; } | |
for (uint32_t i = 0; i < len; i++) { | |
sorted[i] = inbuf[ao[i]]; | |
} | |
sorted[len] = '\0'; | |
printf("offsets to content, in ascending order\n "); | |
for (uint32_t i = 0; i < len; i++) { | |
if ((i & 0x0F) == 0 && i > 0) { printf("\n "); } | |
printf("%d ", ao[i]); | |
} | |
free(ao); | |
printf("\n\nsorted:\n %s\n", sorted); | |
} | |
(void)argc; (void)argv; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment