Created
March 30, 2012 12:29
-
-
Save agrif/2251175 to your computer and use it in GitHub Desktop.
encode/decode a byte array using an arbitrary alphabet
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <assert.h> | |
#include <math.h> | |
#include <string.h> | |
#include <stdlib.h> | |
typedef unsigned char u8; | |
typedef unsigned short u16; | |
typedef unsigned int u32; | |
/* helper to divide numerator in-place, and return the remainder */ | |
u8 long_divide(u8* numerator, u32 numerator_length, u8 denominator) | |
{ | |
assert(numerator_length > 0); | |
u32 remainder = 0; | |
u16 i; | |
for (i = 0; i < numerator_length; i++) | |
{ | |
remainder <<= 8; | |
remainder += numerator[i] % denominator; | |
numerator[i] /= denominator; | |
if (remainder > denominator) | |
{ | |
assert(numerator[i] + (remainder / denominator) < 256); | |
numerator[i] += remainder / denominator; | |
remainder %= denominator; | |
} | |
} | |
assert(remainder < denominator); | |
return remainder; | |
} | |
/* helper to multiply a number in-place. return carry-over */ | |
u8 long_multiply(u8* a, u32 a_length, u8 b) | |
{ | |
assert(a_length > 0); | |
u32 i; | |
u32 carry = 0; | |
for (i = 0; i < a_length; i++) | |
{ | |
carry += a[a_length - 1 - i] * b; | |
a[a_length - 1 - i] = (carry & 0xff); | |
carry >>= 8; | |
assert(carry < 256); | |
} | |
return carry; | |
} | |
/* helper to add to a number in-place. return carry-over */ | |
u8 long_add(u8* a, u32 a_length, u8 b) | |
{ | |
assert(a_length > 0); | |
u32 i; | |
u32 carry = b; | |
for (i = 0; i < a_length; i++) | |
{ | |
carry += a[a_length - 1 - i]; | |
a[a_length - 1 - i] = (carry & 0xff); | |
carry >>= 8; | |
assert(carry < 256); | |
} | |
return carry; | |
} | |
/* Encode the given byte array with the given alphabet. | |
* Both input and output are big-endian. | |
*/ | |
void encode(const u8* bytes, u32 bytes_len, u8** dest, u32* dest_len, const char* alphabet) | |
{ | |
/* figure out how big it should be */ | |
u32 alphabet_len = strlen(alphabet); | |
assert(alphabet_len < 256); | |
double exact_dest_len = bytes_len * log(256) / log(alphabet_len); | |
*dest_len = exact_dest_len; | |
if (exact_dest_len > *dest_len) | |
(*dest_len)++; | |
/* allocate the work string */ | |
u8* work = malloc(bytes_len); | |
if (!work) | |
{ | |
*dest_len = 0; | |
*dest = NULL; | |
return; | |
} | |
/* allocate the destination string */ | |
*dest = calloc(*dest_len, 1); | |
if (*dest == NULL) | |
{ | |
*dest_len = 0; | |
free(work); | |
return; | |
} | |
/* fill up the work string */ | |
memcpy(work, bytes, bytes_len); | |
/* the encoding loop */ | |
u32 i; | |
for (i = 0; i < *dest_len; i++) | |
{ | |
/* remainder = work % alphabet_len; | |
* work /= alphabet_len; | |
*/ | |
u8 remainder = long_divide(work, bytes_len, alphabet_len); | |
(*dest)[*dest_len - 1 - i] = alphabet[remainder]; | |
} | |
/* free the work string */ | |
free(work); | |
} | |
void decode(const u8* data, u32 data_len, u8** dest, u32* dest_len, const char* alphabet) | |
{ | |
/* figure out how big it should be */ | |
u32 alphabet_len = strlen(alphabet); | |
assert(alphabet_len < 256); | |
double exact_dest_len = data_len * log(alphabet_len) / log(256); | |
*dest_len = exact_dest_len; | |
if (exact_dest_len > *dest_len) | |
(*dest_len)++; | |
/* allocate the dest buffer */ | |
*dest = calloc(*dest_len, 1); | |
if (*dest == NULL) | |
{ | |
*dest_len = 0; | |
return; | |
} | |
/* the decoding loop */ | |
u32 i; | |
for (i = 0; i < data_len; i++) | |
{ | |
/* lookup the letter value */ | |
u8 letter = data[i]; | |
u8 value; | |
for (value = 0; value < alphabet_len; value++) | |
{ | |
if (alphabet[value] == letter) | |
break; | |
} | |
if (value >= alphabet_len) | |
{ | |
/* encountered unknown letter */ | |
free(*dest); | |
*dest_len = 0; | |
return; | |
} | |
/* dest = dest * alphabet_len + value; */ | |
u32 carry = long_multiply(*dest, *dest_len, alphabet_len); | |
carry += long_add(*dest, *dest_len, value); | |
assert(carry == 0); | |
} | |
} | |
int main(int argc, char** argv) | |
{ | |
char input[256]; | |
printf("input> "); | |
gets(input); | |
char alphabet[256]; | |
printf("alphabet> "); | |
gets(alphabet); | |
printf("encoding: %s\n", input); | |
printf("with alphabet: %s\n", alphabet); | |
u8* dest = NULL; | |
u32 dest_len = 0; | |
encode(input, strlen(input), &dest, &dest_len, alphabet); | |
if (dest == NULL) | |
{ | |
printf("an error occurred during encoding\n"); | |
} else { | |
printf("encoded: "); | |
fwrite(dest, 1, dest_len, stdout); | |
printf("\n"); | |
u8* decoded = NULL; | |
u32 decoded_len = 0; | |
decode(dest, dest_len, &decoded, &decoded_len, alphabet); | |
if (decoded == NULL) | |
{ | |
printf("an error occurred during decoding\n"); | |
} else { | |
printf("decoded: "); | |
fwrite(decoded, 1, decoded_len, stdout); | |
printf("\n"); | |
free(decoded); | |
} | |
free(dest); | |
} | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.PHONY : all clean | |
all : universal-encoder | |
clean : | |
rm universal-encoder main.o | |
universal-encoder : main.o | |
gcc main.o -o universal-encoder -lm -g | |
main.o : main.c | |
gcc -c main.c -o main.o -g |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment