Skip to content

Instantly share code, notes, and snippets.

@acapola
Created August 31, 2015 14:42
Show Gist options
  • Save acapola/d5b940da024080dfaf5f to your computer and use it in GitHub Desktop.
Save acapola/d5b940da024080dfaf5f to your computer and use it in GitHub Desktop.
AES128 how-to using GCC and Intel AES-NI
#include <stdint.h> //for int8_t
#include <string.h> //for memcmp
#include <wmmintrin.h> //for intrinsics for AES-NI
//compile using gcc and following arguments: -g;-O0;-Wall;-msse2;-msse;-march=native;-maes
//internal stuff
//macros
#define DO_ENC_BLOCK(m,k) \
do{\
m = _mm_xor_si128 (m, k[ 0]); \
m = _mm_aesenc_si128 (m, k[ 1]); \
m = _mm_aesenc_si128 (m, k[ 2]); \
m = _mm_aesenc_si128 (m, k[ 3]); \
m = _mm_aesenc_si128 (m, k[ 4]); \
m = _mm_aesenc_si128 (m, k[ 5]); \
m = _mm_aesenc_si128 (m, k[ 6]); \
m = _mm_aesenc_si128 (m, k[ 7]); \
m = _mm_aesenc_si128 (m, k[ 8]); \
m = _mm_aesenc_si128 (m, k[ 9]); \
m = _mm_aesenclast_si128(m, k[10]);\
}while(0)
#define DO_DEC_BLOCK(m,k) \
do{\
m = _mm_xor_si128 (m, k[10+0]); \
m = _mm_aesdec_si128 (m, k[10+1]); \
m = _mm_aesdec_si128 (m, k[10+2]); \
m = _mm_aesdec_si128 (m, k[10+3]); \
m = _mm_aesdec_si128 (m, k[10+4]); \
m = _mm_aesdec_si128 (m, k[10+5]); \
m = _mm_aesdec_si128 (m, k[10+6]); \
m = _mm_aesdec_si128 (m, k[10+7]); \
m = _mm_aesdec_si128 (m, k[10+8]); \
m = _mm_aesdec_si128 (m, k[10+9]); \
m = _mm_aesdeclast_si128(m, k[0]);\
}while(0)
#define AES_128_key_exp(k, rcon) aes_128_key_expansion(k, _mm_aeskeygenassist_si128(k, rcon))
static __m128i key_schedule[20];//the expanded key
static __m128i aes_128_key_expansion(__m128i key, __m128i keygened){
keygened = _mm_shuffle_epi32(keygened, _MM_SHUFFLE(3,3,3,3));
key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
return _mm_xor_si128(key, keygened);
}
//public API
void aes128_load_key(int8_t *enc_key){
key_schedule[0] = _mm_loadu_si128((const __m128i*) enc_key);
key_schedule[1] = AES_128_key_exp(key_schedule[0], 0x01);
key_schedule[2] = AES_128_key_exp(key_schedule[1], 0x02);
key_schedule[3] = AES_128_key_exp(key_schedule[2], 0x04);
key_schedule[4] = AES_128_key_exp(key_schedule[3], 0x08);
key_schedule[5] = AES_128_key_exp(key_schedule[4], 0x10);
key_schedule[6] = AES_128_key_exp(key_schedule[5], 0x20);
key_schedule[7] = AES_128_key_exp(key_schedule[6], 0x40);
key_schedule[8] = AES_128_key_exp(key_schedule[7], 0x80);
key_schedule[9] = AES_128_key_exp(key_schedule[8], 0x1B);
key_schedule[10] = AES_128_key_exp(key_schedule[9], 0x36);
// generate decryption keys in reverse order.
// k[10] is shared by last encryption and first decryption rounds
// k[0] is shared by first encryption round and last decryption round (and is the original user key)
// For some implementation reasons, decryption key schedule is NOT the encryption key schedule in reverse order
key_schedule[11] = _mm_aesimc_si128(key_schedule[9]);
key_schedule[12] = _mm_aesimc_si128(key_schedule[8]);
key_schedule[13] = _mm_aesimc_si128(key_schedule[7]);
key_schedule[14] = _mm_aesimc_si128(key_schedule[6]);
key_schedule[15] = _mm_aesimc_si128(key_schedule[5]);
key_schedule[16] = _mm_aesimc_si128(key_schedule[4]);
key_schedule[17] = _mm_aesimc_si128(key_schedule[3]);
key_schedule[18] = _mm_aesimc_si128(key_schedule[2]);
key_schedule[19] = _mm_aesimc_si128(key_schedule[1]);
}
void aes128_enc(int8_t *plainText,int8_t *cipherText){
__m128i m = _mm_loadu_si128((__m128i *) plainText);
DO_ENC_BLOCK(m,key_schedule);
_mm_storeu_si128((__m128i *) cipherText, m);
}
void aes128_dec(int8_t *cipherText,int8_t *plainText){
__m128i m = _mm_loadu_si128((__m128i *) cipherText);
DO_DEC_BLOCK(m,key_schedule);
_mm_storeu_si128((__m128i *) plainText, m);
}
//return 0 if no error
//1 if encryption failed
//2 if decryption failed
//3 if both failed
int aes128_self_test(void){
int8_t plain[] = {0x32, 0x43, 0xf6, 0xa8, 0x88, 0x5a, 0x30, 0x8d, 0x31, 0x31, 0x98, 0xa2, 0xe0, 0x37, 0x07, 0x34};
int8_t enc_key[] = {0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c};
int8_t cipher[] = {0x39, 0x25, 0x84, 0x1d, 0x02, 0xdc, 0x09, 0xfb, 0xdc, 0x11, 0x85, 0x97, 0x19, 0x6a, 0x0b, 0x32};
int8_t computed_cipher[16];
int8_t computed_plain[16];
int out=0;
aes128_load_key(enc_key);
aes128_enc(plain,computed_cipher);
aes128_dec(cipher,computed_plain);
if(memcmp(cipher,computed_cipher,sizeof(cipher))) out=1;
if(memcmp(plain,computed_plain,sizeof(plain))) out|=2;
return out;
}
@k3it
Copy link

k3it commented Dec 3, 2019

Great clean example, tested with NIST recommended vectors and produces correct output. thank you

@VaasKahnGrim
Copy link

is this using any specific mode? or are modes irrelevent for using AES-NI? I can't really tell based off this ussage example

@VaasKahnGrim
Copy link

also side note would changing this to aes256 be as simple as just changing 128 in the function names to 256?

@acapola
Copy link
Author

acapola commented Mar 12, 2020

is this using any specific mode? or are modes irrelevent for using AES-NI? I can't really tell based off this ussage example

this is just ECB mode, so any other mode can be built on top.

also side note would changing this to aes256 be as simple as just changing 128 in the function names to 256?

No, pretty much all functions need to be modified

By the way the maintained version is here

@n4sm
Copy link

n4sm commented Aug 18, 2020

Thanks a lot it's quite helpful !!

@Nielsbishere
Copy link

@VaasKahnGrim My understanding is that you need to do a few extra rounds (4 more) and you have to probably collapse the key in a different way

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment