Skip to content

Instantly share code, notes, and snippets.

@agrif
Created March 30, 2012 12:29
Show Gist options
  • Save agrif/2251175 to your computer and use it in GitHub Desktop.
Save agrif/2251175 to your computer and use it in GitHub Desktop.
encode/decode a byte array using an arbitrary alphabet
#include <stdio.h>
#include <assert.h>
#include <math.h>
#include <string.h>
#include <stdlib.h>
typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned int u32;
/* helper to divide numerator in-place, and return the remainder */
u8 long_divide(u8* numerator, u32 numerator_length, u8 denominator)
{
assert(numerator_length > 0);
u32 remainder = 0;
u16 i;
for (i = 0; i < numerator_length; i++)
{
remainder <<= 8;
remainder += numerator[i] % denominator;
numerator[i] /= denominator;
if (remainder > denominator)
{
assert(numerator[i] + (remainder / denominator) < 256);
numerator[i] += remainder / denominator;
remainder %= denominator;
}
}
assert(remainder < denominator);
return remainder;
}
/* helper to multiply a number in-place. return carry-over */
u8 long_multiply(u8* a, u32 a_length, u8 b)
{
assert(a_length > 0);
u32 i;
u32 carry = 0;
for (i = 0; i < a_length; i++)
{
carry += a[a_length - 1 - i] * b;
a[a_length - 1 - i] = (carry & 0xff);
carry >>= 8;
assert(carry < 256);
}
return carry;
}
/* helper to add to a number in-place. return carry-over */
u8 long_add(u8* a, u32 a_length, u8 b)
{
assert(a_length > 0);
u32 i;
u32 carry = b;
for (i = 0; i < a_length; i++)
{
carry += a[a_length - 1 - i];
a[a_length - 1 - i] = (carry & 0xff);
carry >>= 8;
assert(carry < 256);
}
return carry;
}
/* Encode the given byte array with the given alphabet.
* Both input and output are big-endian.
*/
void encode(const u8* bytes, u32 bytes_len, u8** dest, u32* dest_len, const char* alphabet)
{
/* figure out how big it should be */
u32 alphabet_len = strlen(alphabet);
assert(alphabet_len < 256);
double exact_dest_len = bytes_len * log(256) / log(alphabet_len);
*dest_len = exact_dest_len;
if (exact_dest_len > *dest_len)
(*dest_len)++;
/* allocate the work string */
u8* work = malloc(bytes_len);
if (!work)
{
*dest_len = 0;
*dest = NULL;
return;
}
/* allocate the destination string */
*dest = calloc(*dest_len, 1);
if (*dest == NULL)
{
*dest_len = 0;
free(work);
return;
}
/* fill up the work string */
memcpy(work, bytes, bytes_len);
/* the encoding loop */
u32 i;
for (i = 0; i < *dest_len; i++)
{
/* remainder = work % alphabet_len;
* work /= alphabet_len;
*/
u8 remainder = long_divide(work, bytes_len, alphabet_len);
(*dest)[*dest_len - 1 - i] = alphabet[remainder];
}
/* free the work string */
free(work);
}
void decode(const u8* data, u32 data_len, u8** dest, u32* dest_len, const char* alphabet)
{
/* figure out how big it should be */
u32 alphabet_len = strlen(alphabet);
assert(alphabet_len < 256);
double exact_dest_len = data_len * log(alphabet_len) / log(256);
*dest_len = exact_dest_len;
if (exact_dest_len > *dest_len)
(*dest_len)++;
/* allocate the dest buffer */
*dest = calloc(*dest_len, 1);
if (*dest == NULL)
{
*dest_len = 0;
return;
}
/* the decoding loop */
u32 i;
for (i = 0; i < data_len; i++)
{
/* lookup the letter value */
u8 letter = data[i];
u8 value;
for (value = 0; value < alphabet_len; value++)
{
if (alphabet[value] == letter)
break;
}
if (value >= alphabet_len)
{
/* encountered unknown letter */
free(*dest);
*dest_len = 0;
return;
}
/* dest = dest * alphabet_len + value; */
u32 carry = long_multiply(*dest, *dest_len, alphabet_len);
carry += long_add(*dest, *dest_len, value);
assert(carry == 0);
}
}
int main(int argc, char** argv)
{
char input[256];
printf("input> ");
gets(input);
char alphabet[256];
printf("alphabet> ");
gets(alphabet);
printf("encoding: %s\n", input);
printf("with alphabet: %s\n", alphabet);
u8* dest = NULL;
u32 dest_len = 0;
encode(input, strlen(input), &dest, &dest_len, alphabet);
if (dest == NULL)
{
printf("an error occurred during encoding\n");
} else {
printf("encoded: ");
fwrite(dest, 1, dest_len, stdout);
printf("\n");
u8* decoded = NULL;
u32 decoded_len = 0;
decode(dest, dest_len, &decoded, &decoded_len, alphabet);
if (decoded == NULL)
{
printf("an error occurred during decoding\n");
} else {
printf("decoded: ");
fwrite(decoded, 1, decoded_len, stdout);
printf("\n");
free(decoded);
}
free(dest);
}
return 0;
}
.PHONY : all clean
all : universal-encoder
clean :
rm universal-encoder main.o
universal-encoder : main.o
gcc main.o -o universal-encoder -lm -g
main.o : main.c
gcc -c main.c -o main.o -g
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment