Created
August 6, 2024 11:56
-
-
Save randombit/234af030daebab50a96f632eb897bfd2 to your computer and use it in GitHub Desktop.
Camellia using GFNI
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <botan/internal/simd_avx2.h> | |
#include <botan/internal/simd_avx2_gfni.h> | |
#include <immintrin.h> | |
#include <botan/hex.h> | |
#include <botan/system_rng.h> | |
namespace Botan { | |
SIMD_8x32 camellia_s1(SIMD_8x32 x) { | |
constexpr uint64_t pre_a = gfni_matrix(R"( | |
1 1 1 0 1 1 0 1 | |
0 0 1 1 0 0 1 0 | |
1 1 0 1 0 0 0 0 | |
1 0 1 1 0 0 1 1 | |
0 0 0 0 1 1 0 0 | |
1 0 1 0 0 1 0 0 | |
0 0 1 0 1 1 0 0 | |
1 0 0 0 0 1 1 0)"); | |
constexpr uint8_t pre_c = 0b01000101; | |
constexpr uint64_t post_a = gfni_matrix(R"( | |
0 0 0 0 0 0 0 1 | |
0 1 1 0 0 1 1 0 | |
1 0 1 1 1 1 1 0 | |
0 0 0 1 1 0 1 1 | |
1 0 0 0 1 1 1 0 | |
0 1 0 1 1 1 1 0 | |
0 1 1 1 1 1 1 1 | |
0 0 0 1 1 1 0 0 | |
)"); | |
constexpr uint8_t post_c = 0b01101110; | |
auto y = gf2p8affine<pre_a, pre_c>(x); | |
return gf2p8affineinv<post_a, post_c>(y); | |
} | |
SIMD_8x32 camellia_s2(SIMD_8x32 x) { | |
constexpr uint64_t pre_a = gfni_matrix(R"( | |
1 1 1 0 1 1 0 1 | |
0 0 1 1 0 0 1 0 | |
1 1 0 1 0 0 0 0 | |
1 0 1 1 0 0 1 1 | |
0 0 0 0 1 1 0 0 | |
1 0 1 0 0 1 0 0 | |
0 0 1 0 1 1 0 0 | |
1 0 0 0 0 1 1 0)"); | |
constexpr uint8_t pre_c = 0b01000101; | |
constexpr uint64_t post_a = gfni_matrix(R"( | |
0 0 0 1 1 1 0 0 | |
0 0 0 0 0 0 0 1 | |
0 1 1 0 0 1 1 0 | |
1 0 1 1 1 1 1 0 | |
0 0 0 1 1 0 1 1 | |
1 0 0 0 1 1 1 0 | |
0 1 0 1 1 1 1 0 | |
0 1 1 1 1 1 1 1 | |
)"); | |
constexpr uint8_t post_c = 0b11011100; | |
auto y = gf2p8affine<pre_a, pre_c>(x); | |
return gf2p8affineinv<post_a, post_c>(y); | |
} | |
SIMD_8x32 camellia_s3(SIMD_8x32 x) { | |
constexpr uint64_t pre_a = gfni_matrix(R"( | |
1 1 1 0 1 1 0 1 | |
0 0 1 1 0 0 1 0 | |
1 1 0 1 0 0 0 0 | |
1 0 1 1 0 0 1 1 | |
0 0 0 0 1 1 0 0 | |
1 0 1 0 0 1 0 0 | |
0 0 1 0 1 1 0 0 | |
1 0 0 0 0 1 1 0)"); | |
constexpr uint8_t pre_c = 0b01000101; | |
constexpr uint64_t post_a = gfni_matrix(R"( | |
0 1 1 0 0 1 1 0 | |
1 0 1 1 1 1 1 0 | |
0 0 0 1 1 0 1 1 | |
1 0 0 0 1 1 1 0 | |
0 1 0 1 1 1 1 0 | |
0 1 1 1 1 1 1 1 | |
0 0 0 1 1 1 0 0 | |
0 0 0 0 0 0 0 1 | |
)"); | |
constexpr uint8_t post_c = 0b00110111; | |
auto y = gf2p8affine<pre_a, pre_c>(x); | |
return gf2p8affineinv<post_a, post_c>(y); | |
} | |
SIMD_8x32 camellia_s4(SIMD_8x32 x) { | |
constexpr uint64_t pre_a = gfni_matrix(R"( | |
1 1 0 1 1 0 1 1 | |
0 1 1 0 0 1 0 0 | |
1 0 1 0 0 0 0 1 | |
0 1 1 0 0 1 1 1 | |
0 0 0 1 1 0 0 0 | |
0 1 0 0 1 0 0 1 | |
0 1 0 1 1 0 0 0 | |
0 0 0 0 1 1 0 1)"); | |
constexpr uint8_t pre_c = 0b01000101; | |
constexpr uint64_t post_a = gfni_matrix(R"( | |
0 0 0 0 0 0 0 1 | |
0 1 1 0 0 1 1 0 | |
1 0 1 1 1 1 1 0 | |
0 0 0 1 1 0 1 1 | |
1 0 0 0 1 1 1 0 | |
0 1 0 1 1 1 1 0 | |
0 1 1 1 1 1 1 1 | |
0 0 0 1 1 1 0 0 | |
)"); | |
constexpr uint8_t post_c = 0b01101110; | |
auto y = gf2p8affine<pre_a, pre_c>(x); | |
return gf2p8affineinv<post_a, post_c>(y); | |
} | |
SIMD_8x32 camellia_s1234(SIMD_8x32 x) { | |
constexpr uint64_t pre123_a = gfni_matrix(R"( | |
1 1 1 0 1 1 0 1 | |
0 0 1 1 0 0 1 0 | |
1 1 0 1 0 0 0 0 | |
1 0 1 1 0 0 1 1 | |
0 0 0 0 1 1 0 0 | |
1 0 1 0 0 1 0 0 | |
0 0 1 0 1 1 0 0 | |
1 0 0 0 0 1 1 0)"); | |
constexpr uint64_t pre4_a = gfni_matrix(R"( | |
1 1 0 1 1 0 1 1 | |
0 1 1 0 0 1 0 0 | |
1 0 1 0 0 0 0 1 | |
0 1 1 0 0 1 1 1 | |
0 0 0 1 1 0 0 0 | |
0 1 0 0 1 0 0 1 | |
0 1 0 1 1 0 0 0 | |
0 0 0 0 1 1 0 1)"); | |
constexpr uint8_t pre_c = 0b01000101; | |
auto pre = _mm256_set_epi64x(pre4_a, pre123_a, pre123_a, pre123_a); | |
auto y = _mm256_gf2p8affine_epi64_epi8(x.raw(), pre, pre_c); | |
constexpr uint64_t post2_a = gfni_matrix(R"( | |
0 0 0 1 1 1 0 0 | |
0 0 0 0 0 0 0 1 | |
0 1 1 0 0 1 1 0 | |
1 0 1 1 1 1 1 0 | |
0 0 0 1 1 0 1 1 | |
1 0 0 0 1 1 1 0 | |
0 1 0 1 1 1 1 0 | |
0 1 1 1 1 1 1 1 | |
)"); | |
constexpr uint64_t post3_a = gfni_matrix(R"( | |
0 1 1 0 0 1 1 0 | |
1 0 1 1 1 1 1 0 | |
0 0 0 1 1 0 1 1 | |
1 0 0 0 1 1 1 0 | |
0 1 0 1 1 1 1 0 | |
0 1 1 1 1 1 1 1 | |
0 0 0 1 1 1 0 0 | |
0 0 0 0 0 0 0 1 | |
)"); | |
constexpr uint64_t post14_a = gfni_matrix(R"( | |
0 0 0 0 0 0 0 1 | |
0 1 1 0 0 1 1 0 | |
1 0 1 1 1 1 1 0 | |
0 0 0 1 1 0 1 1 | |
1 0 0 0 1 1 1 0 | |
0 1 0 1 1 1 1 0 | |
0 1 1 1 1 1 1 1 | |
0 0 0 1 1 1 0 0 | |
)"); | |
//constexpr uint8_t post14_c = 0b01101110; | |
const auto post_a = _mm256_set_epi64x(post14_a, post3_a, post2_a, post14_a); | |
const auto post_c = _mm256_set_epi64x(0x6E6E6E6E6E6E6E6E, 0x3737373737373737, 0xDCDCDCDCDCDCDCDC, 0x6E6E6E6E6E6E6E6E); | |
return SIMD_8x32(_mm256_xor_si256(post_c, _mm256_gf2p8affineinv_epi64_epi8(y, post_a, 0))); | |
} | |
alignas(256) const uint8_t SBOX1[256] = { | |
0x70, 0x82, 0x2C, 0xEC, 0xB3, 0x27, 0xC0, 0xE5, 0xE4, 0x85, 0x57, 0x35, 0xEA, 0x0C, 0xAE, 0x41, 0x23, 0xEF, 0x6B, | |
0x93, 0x45, 0x19, 0xA5, 0x21, 0xED, 0x0E, 0x4F, 0x4E, 0x1D, 0x65, 0x92, 0xBD, 0x86, 0xB8, 0xAF, 0x8F, 0x7C, 0xEB, | |
0x1F, 0xCE, 0x3E, 0x30, 0xDC, 0x5F, 0x5E, 0xC5, 0x0B, 0x1A, 0xA6, 0xE1, 0x39, 0xCA, 0xD5, 0x47, 0x5D, 0x3D, 0xD9, | |
0x01, 0x5A, 0xD6, 0x51, 0x56, 0x6C, 0x4D, 0x8B, 0x0D, 0x9A, 0x66, 0xFB, 0xCC, 0xB0, 0x2D, 0x74, 0x12, 0x2B, 0x20, | |
0xF0, 0xB1, 0x84, 0x99, 0xDF, 0x4C, 0xCB, 0xC2, 0x34, 0x7E, 0x76, 0x05, 0x6D, 0xB7, 0xA9, 0x31, 0xD1, 0x17, 0x04, | |
0xD7, 0x14, 0x58, 0x3A, 0x61, 0xDE, 0x1B, 0x11, 0x1C, 0x32, 0x0F, 0x9C, 0x16, 0x53, 0x18, 0xF2, 0x22, 0xFE, 0x44, | |
0xCF, 0xB2, 0xC3, 0xB5, 0x7A, 0x91, 0x24, 0x08, 0xE8, 0xA8, 0x60, 0xFC, 0x69, 0x50, 0xAA, 0xD0, 0xA0, 0x7D, 0xA1, | |
0x89, 0x62, 0x97, 0x54, 0x5B, 0x1E, 0x95, 0xE0, 0xFF, 0x64, 0xD2, 0x10, 0xC4, 0x00, 0x48, 0xA3, 0xF7, 0x75, 0xDB, | |
0x8A, 0x03, 0xE6, 0xDA, 0x09, 0x3F, 0xDD, 0x94, 0x87, 0x5C, 0x83, 0x02, 0xCD, 0x4A, 0x90, 0x33, 0x73, 0x67, 0xF6, | |
0xF3, 0x9D, 0x7F, 0xBF, 0xE2, 0x52, 0x9B, 0xD8, 0x26, 0xC8, 0x37, 0xC6, 0x3B, 0x81, 0x96, 0x6F, 0x4B, 0x13, 0xBE, | |
0x63, 0x2E, 0xE9, 0x79, 0xA7, 0x8C, 0x9F, 0x6E, 0xBC, 0x8E, 0x29, 0xF5, 0xF9, 0xB6, 0x2F, 0xFD, 0xB4, 0x59, 0x78, | |
0x98, 0x06, 0x6A, 0xE7, 0x46, 0x71, 0xBA, 0xD4, 0x25, 0xAB, 0x42, 0x88, 0xA2, 0x8D, 0xFA, 0x72, 0x07, 0xB9, 0x55, | |
0xF8, 0xEE, 0xAC, 0x0A, 0x36, 0x49, 0x2A, 0x68, 0x3C, 0x38, 0xF1, 0xA4, 0x40, 0x28, 0xD3, 0x7B, 0xBB, 0xC9, 0x43, | |
0xC1, 0x15, 0xE3, 0xAD, 0xF4, 0x77, 0xC7, 0x80, 0x9E}; | |
// SBOX2[x] = rotl<1>(SBOX1[x]) | |
alignas(256) const uint8_t SBOX2[256] = { | |
0xE0, 0x05, 0x58, 0xD9, 0x67, 0x4E, 0x81, 0xCB, 0xC9, 0x0B, 0xAE, 0x6A, 0xD5, 0x18, 0x5D, 0x82, 0x46, 0xDF, 0xD6, | |
0x27, 0x8A, 0x32, 0x4B, 0x42, 0xDB, 0x1C, 0x9E, 0x9C, 0x3A, 0xCA, 0x25, 0x7B, 0x0D, 0x71, 0x5F, 0x1F, 0xF8, 0xD7, | |
0x3E, 0x9D, 0x7C, 0x60, 0xB9, 0xBE, 0xBC, 0x8B, 0x16, 0x34, 0x4D, 0xC3, 0x72, 0x95, 0xAB, 0x8E, 0xBA, 0x7A, 0xB3, | |
0x02, 0xB4, 0xAD, 0xA2, 0xAC, 0xD8, 0x9A, 0x17, 0x1A, 0x35, 0xCC, 0xF7, 0x99, 0x61, 0x5A, 0xE8, 0x24, 0x56, 0x40, | |
0xE1, 0x63, 0x09, 0x33, 0xBF, 0x98, 0x97, 0x85, 0x68, 0xFC, 0xEC, 0x0A, 0xDA, 0x6F, 0x53, 0x62, 0xA3, 0x2E, 0x08, | |
0xAF, 0x28, 0xB0, 0x74, 0xC2, 0xBD, 0x36, 0x22, 0x38, 0x64, 0x1E, 0x39, 0x2C, 0xA6, 0x30, 0xE5, 0x44, 0xFD, 0x88, | |
0x9F, 0x65, 0x87, 0x6B, 0xF4, 0x23, 0x48, 0x10, 0xD1, 0x51, 0xC0, 0xF9, 0xD2, 0xA0, 0x55, 0xA1, 0x41, 0xFA, 0x43, | |
0x13, 0xC4, 0x2F, 0xA8, 0xB6, 0x3C, 0x2B, 0xC1, 0xFF, 0xC8, 0xA5, 0x20, 0x89, 0x00, 0x90, 0x47, 0xEF, 0xEA, 0xB7, | |
0x15, 0x06, 0xCD, 0xB5, 0x12, 0x7E, 0xBB, 0x29, 0x0F, 0xB8, 0x07, 0x04, 0x9B, 0x94, 0x21, 0x66, 0xE6, 0xCE, 0xED, | |
0xE7, 0x3B, 0xFE, 0x7F, 0xC5, 0xA4, 0x37, 0xB1, 0x4C, 0x91, 0x6E, 0x8D, 0x76, 0x03, 0x2D, 0xDE, 0x96, 0x26, 0x7D, | |
0xC6, 0x5C, 0xD3, 0xF2, 0x4F, 0x19, 0x3F, 0xDC, 0x79, 0x1D, 0x52, 0xEB, 0xF3, 0x6D, 0x5E, 0xFB, 0x69, 0xB2, 0xF0, | |
0x31, 0x0C, 0xD4, 0xCF, 0x8C, 0xE2, 0x75, 0xA9, 0x4A, 0x57, 0x84, 0x11, 0x45, 0x1B, 0xF5, 0xE4, 0x0E, 0x73, 0xAA, | |
0xF1, 0xDD, 0x59, 0x14, 0x6C, 0x92, 0x54, 0xD0, 0x78, 0x70, 0xE3, 0x49, 0x80, 0x50, 0xA7, 0xF6, 0x77, 0x93, 0x86, | |
0x83, 0x2A, 0xC7, 0x5B, 0xE9, 0xEE, 0x8F, 0x01, 0x3D}; | |
// SBOX3[x] = rotl<7>(SBOX1[x]) | |
alignas(256) const uint8_t SBOX3[256] = { | |
0x38, 0x41, 0x16, 0x76, 0xD9, 0x93, 0x60, 0xF2, 0x72, 0xC2, 0xAB, 0x9A, 0x75, 0x06, 0x57, 0xA0, 0x91, 0xF7, 0xB5, | |
0xC9, 0xA2, 0x8C, 0xD2, 0x90, 0xF6, 0x07, 0xA7, 0x27, 0x8E, 0xB2, 0x49, 0xDE, 0x43, 0x5C, 0xD7, 0xC7, 0x3E, 0xF5, | |
0x8F, 0x67, 0x1F, 0x18, 0x6E, 0xAF, 0x2F, 0xE2, 0x85, 0x0D, 0x53, 0xF0, 0x9C, 0x65, 0xEA, 0xA3, 0xAE, 0x9E, 0xEC, | |
0x80, 0x2D, 0x6B, 0xA8, 0x2B, 0x36, 0xA6, 0xC5, 0x86, 0x4D, 0x33, 0xFD, 0x66, 0x58, 0x96, 0x3A, 0x09, 0x95, 0x10, | |
0x78, 0xD8, 0x42, 0xCC, 0xEF, 0x26, 0xE5, 0x61, 0x1A, 0x3F, 0x3B, 0x82, 0xB6, 0xDB, 0xD4, 0x98, 0xE8, 0x8B, 0x02, | |
0xEB, 0x0A, 0x2C, 0x1D, 0xB0, 0x6F, 0x8D, 0x88, 0x0E, 0x19, 0x87, 0x4E, 0x0B, 0xA9, 0x0C, 0x79, 0x11, 0x7F, 0x22, | |
0xE7, 0x59, 0xE1, 0xDA, 0x3D, 0xC8, 0x12, 0x04, 0x74, 0x54, 0x30, 0x7E, 0xB4, 0x28, 0x55, 0x68, 0x50, 0xBE, 0xD0, | |
0xC4, 0x31, 0xCB, 0x2A, 0xAD, 0x0F, 0xCA, 0x70, 0xFF, 0x32, 0x69, 0x08, 0x62, 0x00, 0x24, 0xD1, 0xFB, 0xBA, 0xED, | |
0x45, 0x81, 0x73, 0x6D, 0x84, 0x9F, 0xEE, 0x4A, 0xC3, 0x2E, 0xC1, 0x01, 0xE6, 0x25, 0x48, 0x99, 0xB9, 0xB3, 0x7B, | |
0xF9, 0xCE, 0xBF, 0xDF, 0x71, 0x29, 0xCD, 0x6C, 0x13, 0x64, 0x9B, 0x63, 0x9D, 0xC0, 0x4B, 0xB7, 0xA5, 0x89, 0x5F, | |
0xB1, 0x17, 0xF4, 0xBC, 0xD3, 0x46, 0xCF, 0x37, 0x5E, 0x47, 0x94, 0xFA, 0xFC, 0x5B, 0x97, 0xFE, 0x5A, 0xAC, 0x3C, | |
0x4C, 0x03, 0x35, 0xF3, 0x23, 0xB8, 0x5D, 0x6A, 0x92, 0xD5, 0x21, 0x44, 0x51, 0xC6, 0x7D, 0x39, 0x83, 0xDC, 0xAA, | |
0x7C, 0x77, 0x56, 0x05, 0x1B, 0xA4, 0x15, 0x34, 0x1E, 0x1C, 0xF8, 0x52, 0x20, 0x14, 0xE9, 0xBD, 0xDD, 0xE4, 0xA1, | |
0xE0, 0x8A, 0xF1, 0xD6, 0x7A, 0xBB, 0xE3, 0x40, 0x4F}; | |
// SBOX4[x] = SBOX1[rotl<1>(x)] | |
alignas(256) const uint8_t SBOX4[256] = { | |
0x70, 0x2C, 0xB3, 0xC0, 0xE4, 0x57, 0xEA, 0xAE, 0x23, 0x6B, 0x45, 0xA5, 0xED, 0x4F, 0x1D, 0x92, 0x86, 0xAF, 0x7C, | |
0x1F, 0x3E, 0xDC, 0x5E, 0x0B, 0xA6, 0x39, 0xD5, 0x5D, 0xD9, 0x5A, 0x51, 0x6C, 0x8B, 0x9A, 0xFB, 0xB0, 0x74, 0x2B, | |
0xF0, 0x84, 0xDF, 0xCB, 0x34, 0x76, 0x6D, 0xA9, 0xD1, 0x04, 0x14, 0x3A, 0xDE, 0x11, 0x32, 0x9C, 0x53, 0xF2, 0xFE, | |
0xCF, 0xC3, 0x7A, 0x24, 0xE8, 0x60, 0x69, 0xAA, 0xA0, 0xA1, 0x62, 0x54, 0x1E, 0xE0, 0x64, 0x10, 0x00, 0xA3, 0x75, | |
0x8A, 0xE6, 0x09, 0xDD, 0x87, 0x83, 0xCD, 0x90, 0x73, 0xF6, 0x9D, 0xBF, 0x52, 0xD8, 0xC8, 0xC6, 0x81, 0x6F, 0x13, | |
0x63, 0xE9, 0xA7, 0x9F, 0xBC, 0x29, 0xF9, 0x2F, 0xB4, 0x78, 0x06, 0xE7, 0x71, 0xD4, 0xAB, 0x88, 0x8D, 0x72, 0xB9, | |
0xF8, 0xAC, 0x36, 0x2A, 0x3C, 0xF1, 0x40, 0xD3, 0xBB, 0x43, 0x15, 0xAD, 0x77, 0x80, 0x82, 0xEC, 0x27, 0xE5, 0x85, | |
0x35, 0x0C, 0x41, 0xEF, 0x93, 0x19, 0x21, 0x0E, 0x4E, 0x65, 0xBD, 0xB8, 0x8F, 0xEB, 0xCE, 0x30, 0x5F, 0xC5, 0x1A, | |
0xE1, 0xCA, 0x47, 0x3D, 0x01, 0xD6, 0x56, 0x4D, 0x0D, 0x66, 0xCC, 0x2D, 0x12, 0x20, 0xB1, 0x99, 0x4C, 0xC2, 0x7E, | |
0x05, 0xB7, 0x31, 0x17, 0xD7, 0x58, 0x61, 0x1B, 0x1C, 0x0F, 0x16, 0x18, 0x22, 0x44, 0xB2, 0xB5, 0x91, 0x08, 0xA8, | |
0xFC, 0x50, 0xD0, 0x7D, 0x89, 0x97, 0x5B, 0x95, 0xFF, 0xD2, 0xC4, 0x48, 0xF7, 0xDB, 0x03, 0xDA, 0x3F, 0x94, 0x5C, | |
0x02, 0x4A, 0x33, 0x67, 0xF3, 0x7F, 0xE2, 0x9B, 0x26, 0x37, 0x3B, 0x96, 0x4B, 0xBE, 0x2E, 0x79, 0x8C, 0x6E, 0x8E, | |
0xF5, 0xB6, 0xFD, 0x59, 0x98, 0x6A, 0x46, 0xBA, 0x25, 0x42, 0xA2, 0xFA, 0x07, 0x55, 0xEE, 0x0A, 0x49, 0x68, 0x38, | |
0xA4, 0x28, 0x7B, 0xC9, 0xC1, 0xE3, 0xF4, 0xC7, 0x9E}; | |
} | |
int main() { | |
using namespace Botan; | |
for(size_t i = 0; i != 8; ++i) { | |
uint8_t input[32]; | |
uint8_t exp[32]; | |
for(size_t j = 0; j != 32; ++j) { | |
input[j] = 32*i + j; | |
if(j < 8) { | |
exp[j] = SBOX1[32*i + j]; | |
} else if(j < 16) { | |
exp[j] = SBOX2[32*i + j]; | |
} else if(j < 24) { | |
exp[j] = SBOX3[32*i + j]; | |
} else { | |
exp[j] = SBOX4[32*i + j]; | |
} | |
} | |
SIMD_8x32 x = SIMD_8x32::load_le(input); | |
x = camellia_s1234(x); | |
uint8_t gfni[32]; | |
x.store_le(gfni); | |
//printf("%s\n", hex_encode(input, 32).c_str()); | |
printf("%s\n", hex_encode(exp, 32).c_str()); | |
printf("%s\n", hex_encode(gfni, 32).c_str()); | |
bool bad = false; | |
for(size_t j = 0; j != 32; ++j) { | |
printf("%02X", gfni[j] ^ exp[j]); | |
if(gfni[j] != exp[j]) { | |
printf("bad at %d\n", j); | |
bad = true; | |
} | |
} | |
printf("\n"); | |
if(bad) { | |
return 1; | |
} | |
} | |
printf("ok\n"); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment