-
-
Save aqrit/a2ccea48d7cac7e9d4d99f19d4759666 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <intrin.h> | |
#include "smmintrin.h" | |
size_t hash(unsigned char* lut, size_t c){ | |
size_t a; | |
size_t b; | |
// psrld | |
// note: the 0xE0 simulates the shifting in of three bits from the next byte... | |
a = 0xE0 | (c >> 3); | |
// pshufb | |
b = lut[c & 0x0F]; | |
if(c & 0x80) b = 0x00; | |
// pavg | |
return ((a + b + 1) >> 1); | |
} | |
// I'm lazy... just use the real thing | |
unsigned char adds8(unsigned char a, unsigned char b){ | |
__m128i x = _mm_cvtsi32_si128(a); | |
__m128i y = _mm_cvtsi32_si128(b); | |
__m128i r = _mm_adds_epi8(x, y); | |
return _mm_cvtsi128_si32(r); | |
} | |
unsigned char valid_chars[64] = { | |
0x2B, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, | |
0x36, 0x37, 0x38, 0x39, 0x41, 0x42, 0x43, 0x44, | |
0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, | |
0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, | |
0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x61, 0x62, | |
0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, | |
0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, | |
0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A | |
}; | |
unsigned char decoded_chars[64] = { | |
62, 63, 52, 53, 54, 55, 56, 57, | |
58, 59, 60, 61, 0, 1, 2, 3, | |
4, 5, 6, 7, 8, 9, 10, 11, | |
12, 13, 14, 15, 16, 17, 18, 19, | |
20, 21, 22, 23, 24, 25, 26, 27, | |
28, 29, 30, 31, 32, 33, 34, 35, | |
36, 37, 38, 39, 40, 41, 42, 43, | |
44, 45, 46, 47, 48, 49, 50, 51 | |
}; | |
// all signed chars are also invalid | |
unsigned char invalid_chars[64] = { | |
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | |
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, | |
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, | |
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, | |
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, | |
0x28, 0x29, 0x2A, 0x2C, 0x2D, 0x2E, 0x3A, 0x3B, | |
0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x5B, 0x5C, 0x5D, | |
0x5E, 0x5F, 0x60, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F | |
}; | |
unsigned char delta_asso[16] = { | |
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, | |
0x00, 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x0F | |
}; | |
unsigned char delta_values[16] = { | |
0x00, 0x00, 0x00, 0x13, 0x04, 0xBF, 0xBF, 0xB9, | |
0xB9, 0x00, 0x10, 0xC3, 0xBF, 0xBF, 0xB9, 0xB9 | |
}; | |
unsigned char check_asso[16] = { | |
0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, | |
0x01, 0x01, 0x03, 0x07, 0x0B, 0x0B, 0x0B, 0x0F | |
}; | |
unsigned char check_values[16] = { | |
0x80, 0x80, 0x80, 0x80, 0xCF, 0xBF, 0xD5, 0xA6, | |
0xB5, 0x86, 0xD1, 0x80, 0xB1, 0x80, 0x91, 0x80 | |
}; | |
void check_decode(){ | |
for( size_t i = 0; i < 64; i++ ){ | |
unsigned char c = valid_chars[i]; | |
unsigned char h = hash(delta_asso, c); | |
unsigned char v = adds8(delta_values[h & 0x0F],c); | |
if( decoded_chars[i] != v ){ | |
printf("FAIL: %02X decoded to %02X \n",c,v); | |
} | |
} | |
// set equal sign (0x3D) to zero in decode, because why not? | |
unsigned char x = adds8(delta_values[hash(delta_asso, 0x3D) & 0x0F], 0x3D); | |
if( x != 0 ) printf("FAIL: %02X decoded to %02X \n",0x3D,x); | |
} | |
void check_invalid_char_detection() | |
{ | |
for( size_t i = 0; i < 64; i++ ){ | |
unsigned char c = valid_chars[i]; | |
unsigned char h = hash(check_asso, c); | |
unsigned char v = adds8(check_values[h & 0x0F],c); | |
if(v >= 0x80){ | |
printf("FAIL: valid char 0x%02X not detected\n",c); | |
} | |
} | |
for( size_t i = 0; i < 64; i++ ){ | |
unsigned char c = invalid_chars[i]; | |
unsigned char h = hash(check_asso, c); | |
unsigned char v = adds8(check_values[h & 0x0F], c); | |
if(v < 0x80){ | |
printf("FAIL: invalid char 0x%02X not detected\n",c); | |
printf( "%02X %02X %02X\n", h, check_values[h & 0x0F], v ); | |
} | |
} | |
// note: | |
// all check_values are signed... | |
// because using sat_adds8: signed + signed = signed | |
// with signed input it is possible hash overflows | |
// in which case, 0 + signed = signed | |
for( int i = 0; i < 16; i++ ){ | |
unsigned char c = check_values[i]; | |
if((c & 0x80) == 0){ | |
printf("FAIL: check_value %02X is unsigned\n", c); | |
} | |
} | |
for( size_t i = 128; i < 256; i++ ){ | |
unsigned char c = i; | |
unsigned char h = hash(check_asso, c); | |
unsigned char v = adds8(check_values[h & 0x0F], c); | |
if(v < 0x80){ | |
printf("FAIL: invalid char 0x%02X not detected\n",c); | |
} | |
} | |
} | |
// hash must produce unsigned results for unsigned input | |
void check_unsigned_hash(){ | |
for( size_t i = 0; i < 128; i++ ){ | |
unsigned char h_c = hash(check_asso, i); | |
if(h_c >= 0x80){ | |
printf("FAIL: %02X hashes to %02X\n",i,h_c); | |
} | |
unsigned char h_d = hash(delta_asso, i); | |
if(h_d >= 0x80){ | |
printf("FAIL: %02X hashes to %02X\n",i,h_d); | |
} | |
} | |
} | |
void print_hash( unsigned char* lut ){ | |
for( size_t i = 0; i < 128; i++ ){ | |
unsigned char c = i; | |
unsigned char h = hash(lut, c); | |
if((i & 0x0F) == 0)printf("\n"); | |
printf( "%01X ", h & 0x0F); | |
} | |
printf("\n"); | |
} | |
static const char moby_dick_base64[] = | |
"Q2FsbCBtZSBJc2htYWVsLiBTb21lIHllYXJzIGFnby0tbmV2ZXIgbWluZCBob3cgbG9uZ" | |
"yBwcmVjaXNlbHktLWhhdmluZwpsaXR0bGUgb3Igbm8gbW9uZXkgaW4gbXkgcHVyc2UsIG" | |
"FuZCBub3RoaW5nIHBhcnRpY3VsYXIgdG8gaW50ZXJlc3QgbWUgb24Kc2hvcmUsIEkgdGh" | |
"vdWdodCBJIHdvdWxkIHNhaWwgYWJvdXQgYSBsaXR0bGUgYW5kIHNlZSB0aGUgd2F0ZXJ5" | |
"IHBhcnQgb2YKdGhlIHdvcmxkLiBJdCBpcyBhIHdheSBJIGhhdmUgb2YgZHJpdmluZyBvZ" | |
"mYgdGhlIHNwbGVlbiBhbmQgcmVndWxhdGluZwp0aGUgY2lyY3VsYXRpb24uIFdoZW5ldm" | |
"VyIEkgZmluZCBteXNlbGYgZ3Jvd2luZyBncmltIGFib3V0IHRoZSBtb3V0aDsKd2hlbmV" | |
"2ZXIgaXQgaXMgYSBkYW1wLCBkcml6emx5IE5vdmVtYmVyIGluIG15IHNvdWw7IHdoZW5l" | |
"dmVyIEkgZmluZApteXNlbGYgaW52b2x1bnRhcmlseSBwYXVzaW5nIGJlZm9yZSBjb2Zma" | |
"W4gd2FyZWhvdXNlcywgYW5kIGJyaW5naW5nIHVwCnRoZSByZWFyIG9mIGV2ZXJ5IGZ1bm" | |
"VyYWwgSSBtZWV0OyBhbmQgZXNwZWNpYWxseSB3aGVuZXZlciBteSBoeXBvcyBnZXQKc3V" | |
"jaCBhbiB1cHBlciBoYW5kIG9mIG1lLCB0aGF0IGl0IHJlcXVpcmVzIGEgc3Ryb25nIG1v" | |
"cmFsIHByaW5jaXBsZSB0bwpwcmV2ZW50IG1lIGZyb20gZGVsaWJlcmF0ZWx5IHN0ZXBwa" | |
"W5nIGludG8gdGhlIHN0cmVldCwgYW5kIG1ldGhvZGljYWxseQprbm9ja2luZyBwZW9wbG" | |
"UncyBoYXRzIG9mZi0tdGhlbiwgSSBhY2NvdW50IGl0IGhpZ2ggdGltZSB0byBnZXQgdG8" | |
"gc2VhCmFzIHNvb24gYXMgSSBjYW4uIFRoaXMgaXMgbXkgc3Vic3RpdHV0ZSBmb3IgcGlz" | |
"dG9sIGFuZCBiYWxsLiBXaXRoIGEKcGhpbG9zb3BoaWNhbCBmbG91cmlzaCBDYXRvIHRoc" | |
"m93cyBoaW1zZWxmIHVwb24gaGlzIHN3b3JkOyBJIHF1aWV0bHkKdGFrZSB0byB0aGUgc2" | |
"hpcC4gVGhlcmUgaXMgbm90aGluZyBzdXJwcmlzaW5nIGluIHRoaXMuIElmIHRoZXkgYnV" | |
"0IGtuZXcKaXQsIGFsbW9zdCBhbGwgbWVuIGluIHRoZWlyIGRlZ3JlZSwgc29tZSB0aW1l" | |
"IG9yIG90aGVyLCBjaGVyaXNoIHZlcnkKbmVhcmx5IHRoZSBzYW1lIGZlZWxpbmdzIHRvd" | |
"2FyZHMgdGhlIG9jZWFuIHdpdGggbWUuCg=="; | |
bool base64_decode_ssse3( void* dst_void, void* src_void, size_t length ) | |
{ | |
unsigned char* src = (unsigned char*)src_void; | |
unsigned char* dst = (unsigned char*)dst_void; | |
const __m128i delta_asso = _mm_setr_epi8( | |
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, | |
0x00, 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x0F | |
); | |
const __m128i delta_values = _mm_setr_epi8( | |
0x00, 0x00, 0x00, 0x13, 0x04, 0xBF, 0xBF, 0xB9, | |
0xB9, 0x00, 0x10, 0xC3, 0xBF, 0xBF, 0xB9, 0xB9 | |
); | |
const __m128i check_asso = _mm_setr_epi8( | |
0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, | |
0x01, 0x01, 0x03, 0x07, 0x0B, 0x0B, 0x0B, 0x0F | |
); | |
const __m128i check_values = _mm_setr_epi8( | |
0x80, 0x80, 0x80, 0x80, 0xCF, 0xBF, 0xD5, 0xA6, | |
0xB5, 0x86, 0xD1, 0x80, 0xB1, 0x80, 0x91, 0x80 | |
); | |
for( unsigned char* end = &src[(length & ~15)]; src != end; src += 16){ | |
__m128i asrc, shifted, delta_hash, check_hash, out, chk; | |
int mask; | |
asrc = _mm_loadu_si128((__m128i *)src); | |
shifted = _mm_srli_epi32(asrc, 3); | |
delta_hash = _mm_avg_epu8(_mm_shuffle_epi8(delta_asso, asrc), shifted); | |
check_hash = _mm_avg_epu8(_mm_shuffle_epi8(check_asso, asrc), shifted); | |
out = _mm_adds_epi8(_mm_shuffle_epi8(delta_values, delta_hash), asrc); | |
chk = _mm_adds_epi8(_mm_shuffle_epi8(check_values, check_hash), asrc); | |
mask = _mm_movemask_epi8(chk); | |
if(mask != 0){ | |
break; | |
} | |
const __m128i pack_shuffle = _mm_setr_epi8( | |
2, 1, 0, 6, 5, 4, 10, 9, | |
8, 14, 13, 12, -1, -1, -1, -1); | |
out = _mm_maddubs_epi16(out, _mm_set1_epi32(0x01400140)); | |
out = _mm_madd_epi16(out, _mm_set1_epi32(0x00011000)); | |
out = _mm_shuffle_epi8(out, pack_shuffle); | |
_mm_storeu_si128((__m128i *)dst, out); | |
dst += 12; | |
} | |
return true; | |
} | |
int main () | |
{ | |
print_hash( check_asso ); | |
print_hash( delta_asso ); | |
check_unsigned_hash(); | |
check_decode(); | |
check_invalid_char_detection(); | |
static unsigned char out[0x4000]; | |
memset(out,0,sizeof(out)); | |
base64_decode_ssse3(out, (void*)moby_dick_base64, -1); | |
printf( "\n\n%s", out); | |
printf("\npress enter to exit\n"); | |
getchar(); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment