Created
October 10, 2016 04:31
-
-
Save userid/b3baba535298fffcc34392df938d8542 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| From 5f22ddce5c2f301e9e6b2d38315e44388cefce5d Mon Sep 17 00:00:00 2001 | |
| From: Thomas Deutschmann <whissi@whissi.de> | |
| Date: Thu, 22 Sep 2016 19:01:48 +0200 | |
| Subject: [PATCH] Implementation of draft and RFC versions of CHACHA20-POLY1305 | |
| ciphers | |
| Backport of https://github.com/cloudflare/sslconfig/commit/3afa6467ad3b15db2f678f8b5a1f817e56874602 | |
| --- | |
| Configure | 56 +- | |
| Makefile.org | 4 +- | |
| apps/speed.c | 30 +- | |
| crypto/chacha20poly1305/Makefile | 97 +++ | |
| crypto/chacha20poly1305/asm/chacha20_avx.pl | 408 +++++++++++ | |
| crypto/chacha20poly1305/asm/chacha20_avx2.pl | 443 ++++++++++++ | |
| crypto/chacha20poly1305/asm/poly1305_avx.pl | 732 ++++++++++++++++++++ | |
| crypto/chacha20poly1305/asm/poly1305_avx2.pl | 984 +++++++++++++++++++++++++++ | |
| crypto/chacha20poly1305/asm/poly1305_x64.pl | 281 ++++++++ | |
| crypto/chacha20poly1305/chacha20.c | 162 +++++ | |
| crypto/chacha20poly1305/chacha20poly1305.h | 79 +++ | |
| crypto/chacha20poly1305/chapolytest.c | 470 +++++++++++++ | |
| crypto/chacha20poly1305/poly1305.c | 287 ++++++++ | |
| crypto/cryptlib.c | 10 - | |
| crypto/evp/Makefile | 7 +- | |
| crypto/evp/e_chacha20poly1305.c | 435 ++++++++++++ | |
| crypto/evp/evp.h | 4 + | |
| ssl/s3_lib.c | 119 ++++ | |
| ssl/ssl.h | 2 + | |
| ssl/ssl_ciph.c | 60 +- | |
| ssl/ssl_locl.h | 2 + | |
| ssl/tls1.h | 28 + | |
| 22 files changed, 4646 insertions(+), 54 deletions(-) | |
| create mode 100644 crypto/chacha20poly1305/Makefile | |
| create mode 100644 crypto/chacha20poly1305/asm/chacha20_avx.pl | |
| create mode 100644 crypto/chacha20poly1305/asm/chacha20_avx2.pl | |
| create mode 100644 crypto/chacha20poly1305/asm/poly1305_avx.pl | |
| create mode 100644 crypto/chacha20poly1305/asm/poly1305_avx2.pl | |
| create mode 100644 crypto/chacha20poly1305/asm/poly1305_x64.pl | |
| create mode 100644 crypto/chacha20poly1305/chacha20.c | |
| create mode 100644 crypto/chacha20poly1305/chacha20poly1305.h | |
| create mode 100644 crypto/chacha20poly1305/chapolytest.c | |
| create mode 100644 crypto/chacha20poly1305/poly1305.c | |
| create mode 100644 crypto/evp/e_chacha20poly1305.c | |
| diff --git a/Configure b/Configure | |
| index c39f71a..b139783 100755 | |
| --- a/Configure | |
| +++ b/Configure | |
| @@ -150,25 +150,25 @@ my $tlib="-lnsl -lsocket"; | |
| my $bits1="THIRTY_TWO_BIT "; | |
| my $bits2="SIXTY_FOUR_BIT "; | |
| -my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o x86-gf2m.o::des-586.o crypt586.o:aes-586.o vpaes-x86.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cmll-x86.o:ghash-x86.o:"; | |
| +my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o x86-gf2m.o::des-586.o crypt586.o:aes-586.o vpaes-x86.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cmll-x86.o:ghash-x86.o::"; | |
| my $x86_elf_asm="$x86_asm:elf"; | |
| -my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-gf2m.o rsaz_exp.o rsaz-x86_64.o rsaz-avx2.o:ecp_nistz256.o ecp_nistz256-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_64.o aesni-sha1-x86_64.o aesni-sha256-x86_64.o aesni-mb-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o sha1-mb-x86_64.o sha256-mb-x86_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghash-x86_64.o aesni-gcm-x86_64.o:"; | |
| -my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o:::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::ghash-ia64.o::void"; | |
| -my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o::des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o:aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o::::::camellia.o cmll_misc.o cmll_cbc.o cmllt4-sparcv9.o:ghash-sparcv9.o::void"; | |
| -my $sparcv8_asm=":sparcv8.o::des_enc-sparc.o fcrypt_b.o:::::::::::::void"; | |
| -my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o::::::sha1-alpha.o:::::::ghash-alpha.o::void"; | |
| -my $mips64_asm=":bn-mips.o mips-mont.o:::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o sha512-mips.o::::::::"; | |
| +my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-gf2m.o rsaz_exp.o rsaz-x86_64.o rsaz-avx2.o:ecp_nistz256.o ecp_nistz256-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_64.o aesni-sha1-x86_64.o aesni-sha256-x86_64.o aesni-mb-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o sha1-mb-x86_64.o sha256-mb-x86_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghash-x86_64.o aesni-gcm-x86_64.o::chacha20_avx.o chacha20_avx2.o poly1305_x64.o poly1305_avx2.o"; | |
| +my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o:::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::ghash-ia64.o:::void"; | |
| +my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o::des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o:aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o::::::camellia.o cmll_misc.o cmll_cbc.o cmllt4-sparcv9.o:ghash-sparcv9.o:::void"; | |
| +my $sparcv8_asm=":sparcv8.o::des_enc-sparc.o fcrypt_b.o::::::::::::::void"; | |
| +my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o::::::sha1-alpha.o:::::::ghash-alpha.o:::void"; | |
| +my $mips64_asm=":bn-mips.o mips-mont.o:::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o sha512-mips.o:::::::::"; | |
| my $mips32_asm=$mips64_asm; $mips32_asm =~ s/\s*sha512\-mips\.o//; | |
| -my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o:::aes-s390x.o aes-ctr.o aes-xts.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-s390x.o:::::ghash-s390x.o:"; | |
| -my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o:::aes_cbc.o aes-armv4.o bsaes-armv7.o aesv8-armx.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o ghashv8-armx.o::void"; | |
| -my $aarch64_asm="armcap.o arm64cpuid.o mem_clr.o::::aes_core.o aes_cbc.o aesv8-armx.o:::sha1-armv8.o sha256-armv8.o sha512-armv8.o:::::::ghashv8-armx.o:"; | |
| -my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o:::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::32"; | |
| -my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o:::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::64"; | |
| -my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o:::aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o aesp8-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o sha256p8-ppc.o sha512p8-ppc.o:::::::ghashp8-ppc.o:"; | |
| +my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o:::aes-s390x.o aes-ctr.o aes-xts.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-s390x.o:::::ghash-s390x.o::"; | |
| +my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o:::aes_cbc.o aes-armv4.o bsaes-armv7.o aesv8-armx.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o ghashv8-armx.o:::void"; | |
| +my $aarch64_asm="armcap.o arm64cpuid.o mem_clr.o::::aes_core.o aes_cbc.o aesv8-armx.o:::sha1-armv8.o sha256-armv8.o sha512-armv8.o:::::::ghashv8-armx.o::"; | |
| +my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o:::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o:::32"; | |
| +my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o:::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o:::64"; | |
| +my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o:::aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o aesp8-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o sha256p8-ppc.o sha512p8-ppc.o:::::::ghashp8-ppc.o::"; | |
| my $ppc32_asm=$ppc64_asm; | |
| -my $no_asm="::::::::::::::::void"; | |
| +my $no_asm=":::::::::::::::::void"; | |
| # As for $BSDthreads. Idea is to maintain "collective" set of flags, | |
| # which would cover all BSD flavors. -pthread applies to them all, | |
| @@ -220,7 +220,7 @@ my %table=( | |
| "debug-linux-ppro","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG -DL_ENDIAN -g -mcpu=pentiumpro -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn", | |
| "debug-linux-elf","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG -DL_ENDIAN -g -march=i486 -Wall::-D_REENTRANT::-lefence -ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", | |
| "debug-linux-elf-noefence","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG -DL_ENDIAN -g -march=i486 -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", | |
| -"debug-linux-ia32-aes", "gcc:-DAES_EXPERIMENTAL -DL_ENDIAN -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:x86cpuid.o:bn-586.o co-586.o x86-mont.o::des-586.o crypt586.o:aes_x86core.o aes_cbc.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o::ghash-x86.o::elf:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", | |
| +"debug-linux-ia32-aes", "gcc:-DAES_EXPERIMENTAL -DL_ENDIAN -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:x86cpuid.o:bn-586.o co-586.o x86-mont.o::des-586.o crypt586.o:aes_x86core.o aes_cbc.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o::ghash-x86.o:::elf:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", | |
| "debug-linux-generic32","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -g -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", | |
| "debug-linux-generic64","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -DTERMIO -g -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", | |
| "debug-linux-x86_64","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -m64 -DL_ENDIAN -g -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64", | |
| @@ -327,7 +327,7 @@ my %table=( | |
| "hpux-parisc-gcc","gcc:-O3 -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-Wl,+s -ldld:BN_LLONG DES_PTR DES_UNROLL DES_RISC1:${no_asm}:dl:hpux-shared:-fPIC:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", | |
| "hpux-parisc1_1-gcc","gcc:-O3 -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-Wl,+s -ldld:BN_LLONG DES_PTR DES_UNROLL DES_RISC1:${parisc11_asm}:dl:hpux-shared:-fPIC:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::/pa1.1", | |
| "hpux-parisc2-gcc","gcc:-march=2.0 -O3 -DB_ENDIAN -D_REENTRANT::::-Wl,+s -ldld:SIXTY_FOUR_BIT RC4_CHAR RC4_CHUNK DES_PTR DES_UNROLL DES_RISC1:".eval{my $asm=$parisc20_asm;$asm=~s/2W\./2\./;$asm=~s/:64/:32/;$asm}.":dl:hpux-shared:-fPIC:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::/pa20_32", | |
| -"hpux64-parisc2-gcc","gcc:-O3 -DB_ENDIAN -D_REENTRANT::::-ldl:SIXTY_FOUR_BIT_LONG MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC1 DES_INT::pa-risc2W.o:::::::::::::::void:dlfcn:hpux-shared:-fpic:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::/pa20_64", | |
| +"hpux64-parisc2-gcc","gcc:-O3 -DB_ENDIAN -D_REENTRANT::::-ldl:SIXTY_FOUR_BIT_LONG MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC1 DES_INT::pa-risc2W.o::::::::::::::::void:dlfcn:hpux-shared:-fpic:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::/pa20_64", | |
| # More attempts at unified 10.X and 11.X targets for HP C compiler. | |
| # | |
| @@ -584,9 +584,9 @@ my %table=( | |
| # Visual C targets | |
| # | |
| # Win64 targets, WIN64I denotes IA-64 and WIN64A - AMD64 | |
| -"VC-WIN64I","cl:-W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64I::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:ia64cpuid.o:ia64.o ia64-mont.o:::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o:::::::ghash-ia64.o::ias:win32", | |
| +"VC-WIN64I","cl:-W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64I::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:ia64cpuid.o:ia64.o ia64-mont.o:::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::::::::ghash-ia64.o::ias:win32", | |
| "VC-WIN64A","cl:-W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64A::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:".eval{my $asm=$x86_64_asm;$asm=~s/x86_64-gcc\.o/bn_asm.o/;$asm}.":auto:win32", | |
| -"debug-VC-WIN64I","cl:-W3 -Gs0 -Gy -Zi -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64I::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:ia64cpuid.o:ia64.o:::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o:::::::ghash-ia64.o::ias:win32", | |
| +"debug-VC-WIN64I","cl:-W3 -Gs0 -Gy -Zi -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64I::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:ia64cpuid.o:ia64.o:::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::::::::ghash-ia64.o::ias:win32", | |
| "debug-VC-WIN64A","cl:-W3 -Gs0 -Gy -Zi -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64A::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:".eval{my $asm=$x86_64_asm;$asm=~s/x86_64-gcc\.o/bn_asm.o/;$asm}.":auto:win32", | |
| # x86 Win32 target defaults to ANSI API, if you want UNICODE, complement | |
| # 'perl Configure VC-WIN32' with '-DUNICODE -D_UNICODE' | |
| @@ -714,6 +714,7 @@ my $idx_wp_obj = $idx++; | |
| my $idx_cmll_obj = $idx++; | |
| my $idx_modes_obj = $idx++; | |
| my $idx_engines_obj = $idx++; | |
| +my $idx_chapoly_obj = $idx++; | |
| my $idx_perlasm_scheme = $idx++; | |
| my $idx_dso_scheme = $idx++; | |
| my $idx_shared_target = $idx++; | |
| @@ -756,6 +757,7 @@ my $bf ="crypto/bf/bf_locl.h"; | |
| my $bn_asm ="bn_asm.o"; | |
| my $des_enc="des_enc.o fcrypt_b.o"; | |
| my $aes_enc="aes_core.o aes_cbc.o"; | |
| +my $chapoly_enc=""; | |
| my $bf_enc ="bf_enc.o"; | |
| my $cast_enc="c_enc.o"; | |
| my $rc4_enc="rc4_enc.o rc4_skey.o"; | |
| @@ -1211,7 +1213,7 @@ $openssldir=$prefix . "/" . $openssldir if $openssldir !~ /(^\/|^[a-zA-Z]:[\\\/] | |
| print "IsMK1MF=$IsMK1MF\n"; | |
| -my @fields = split(/\s*:\s*/,$table{$target} . ":" x 30 , -1); | |
| +my @fields = split(/\s*:\s*/,$table{$target} . ":" x 31 , -1); | |
| my $cc = $fields[$idx_cc]; | |
| # Allow environment CC to override compiler... | |
| if($ENV{CC}) { | |
| @@ -1240,6 +1242,7 @@ my $wp_obj = $fields[$idx_wp_obj]; | |
| my $cmll_obj = $fields[$idx_cmll_obj]; | |
| my $modes_obj = $fields[$idx_modes_obj]; | |
| my $engines_obj = $fields[$idx_engines_obj]; | |
| +my $chapoly_obj = $fields[$idx_chapoly_obj]; | |
| my $perlasm_scheme = $fields[$idx_perlasm_scheme]; | |
| my $dso_scheme = $fields[$idx_dso_scheme]; | |
| my $shared_target = $fields[$idx_shared_target]; | |
| @@ -1407,7 +1410,7 @@ if ($no_asm) | |
| { | |
| $cpuid_obj=$bn_obj=$ec_obj= | |
| $des_obj=$aes_obj=$bf_obj=$cast_obj=$rc4_obj=$rc5_obj=$cmll_obj= | |
| - $modes_obj=$sha1_obj=$md5_obj=$rmd160_obj=$wp_obj=$engines_obj=""; | |
| + $modes_obj=$sha1_obj=$md5_obj=$rmd160_obj=$wp_obj=$engines_obj=$chapoly_obj=""; | |
| } | |
| if (!$no_shared) | |
| @@ -1567,6 +1570,14 @@ else | |
| { | |
| $rc4_obj=$rc4_enc; | |
| } | |
| +if ($chapoly_obj =~ /\.o$/) | |
| + { | |
| + $cflags.=" -DCHAPOLY_x86_64_ASM"; | |
| + } | |
| +else | |
| + { | |
| + $chapoly_obj=$chapoly_enc; | |
| + } | |
| if ($sha1_obj =~ /\.o$/) | |
| { | |
| # $sha1_obj=$sha1_enc; | |
| @@ -1751,6 +1762,7 @@ while (<IN>) | |
| s/^WP_ASM_OBJ=.*$/WP_ASM_OBJ= $wp_obj/; | |
| s/^CMLL_ENC=.*$/CMLL_ENC= $cmll_obj/; | |
| s/^MODES_ASM_OBJ.=*$/MODES_ASM_OBJ= $modes_obj/; | |
| + s/^CHAPOLY_ENC=.*$/CHAPOLY_ENC= $chapoly_obj/; | |
| s/^ENGINES_ASM_OBJ.=*$/ENGINES_ASM_OBJ= $engines_obj/; | |
| s/^PERLASM_SCHEME=.*$/PERLASM_SCHEME= $perlasm_scheme/; | |
| s/^PROCESSOR=.*/PROCESSOR= $processor/; | |
| @@ -1813,6 +1825,7 @@ print "RMD160_OBJ_ASM=$rmd160_obj\n"; | |
| print "CMLL_ENC =$cmll_obj\n"; | |
| print "MODES_OBJ =$modes_obj\n"; | |
| print "ENGINES_OBJ =$engines_obj\n"; | |
| +print "CHAPOLY_ENC =$chapoly_obj\n"; | |
| print "PROCESSOR =$processor\n"; | |
| print "RANLIB =$ranlib\n"; | |
| print "ARFLAGS =$arflags\n"; | |
| @@ -2211,7 +2224,7 @@ sub print_table_entry | |
| my ($cc, $cflags, $unistd, $thread_cflag, $sys_id, $lflags, | |
| $bn_ops, $cpuid_obj, $bn_obj, $ec_obj, $des_obj, $aes_obj, $bf_obj, | |
| $md5_obj, $sha1_obj, $cast_obj, $rc4_obj, $rmd160_obj, | |
| - $rc5_obj, $wp_obj, $cmll_obj, $modes_obj, $engines_obj, | |
| + $rc5_obj, $wp_obj, $cmll_obj, $modes_obj, $engines_obj, $chapoly_obj, | |
| $perlasm_scheme, $dso_scheme, $shared_target, $shared_cflag, | |
| $shared_ldflag, $shared_extension, $ranlib, $arflags, $multilib)= | |
| split(/\s*:\s*/,$table{$target} . ":" x 30 , -1); | |
| @@ -2242,6 +2255,7 @@ sub print_table_entry | |
| \$cmll_obj = $cmll_obj | |
| \$modes_obj = $modes_obj | |
| \$engines_obj = $engines_obj | |
| +\$chapoly_obj = $chapoly_obj | |
| \$perlasm_scheme = $perlasm_scheme | |
| \$dso_scheme = $dso_scheme | |
| \$shared_target= $shared_target | |
| diff --git a/Makefile.org b/Makefile.org | |
| index 2377f50..33f4cd1 100644 | |
| --- a/Makefile.org | |
| +++ b/Makefile.org | |
| @@ -92,6 +92,7 @@ BN_ASM= bn_asm.o | |
| EC_ASM= | |
| DES_ENC= des_enc.o fcrypt_b.o | |
| AES_ENC= aes_core.o aes_cbc.o | |
| +CHAPOLY_ENC= | |
| BF_ENC= bf_enc.o | |
| CAST_ENC= c_enc.o | |
| RC4_ENC= rc4_enc.o | |
| @@ -149,7 +150,7 @@ SDIRS= \ | |
| bn ec rsa dsa ecdsa dh ecdh dso engine \ | |
| buffer bio stack lhash rand err \ | |
| evp asn1 pem x509 x509v3 conf txt_db pkcs7 pkcs12 comp ocsp ui krb5 \ | |
| - cms pqueue ts jpake srp store cmac | |
| + cms pqueue ts jpake srp store cmac chacha20poly1305 | |
| # keep in mind that the above list is adjusted by ./Configure | |
| # according to no-xxx arguments... | |
| @@ -236,6 +237,7 @@ BUILDENV= LC_ALL=C PLATFORM='$(PLATFORM)' PROCESSOR='$(PROCESSOR)'\ | |
| WP_ASM_OBJ='$(WP_ASM_OBJ)' \ | |
| MODES_ASM_OBJ='$(MODES_ASM_OBJ)' \ | |
| ENGINES_ASM_OBJ='$(ENGINES_ASM_OBJ)' \ | |
| + CHAPOLY_ENC='$(CHAPOLY_ENC)' \ | |
| PERLASM_SCHEME='$(PERLASM_SCHEME)' \ | |
| FIPSLIBDIR='${FIPSLIBDIR}' \ | |
| FIPSDIR='${FIPSDIR}' \ | |
| diff --git a/apps/speed.c b/apps/speed.c | |
| index b862868..28a5589 100644 | |
| --- a/apps/speed.c | |
| +++ b/apps/speed.c | |
| @@ -1,4 +1,4 @@ | |
| -/* apps/speed.c */ | |
| +/* apps/speed.c -*- mode:C; c-file-style: "eay" -*- */ | |
| /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | |
| * All rights reserved. | |
| * | |
| @@ -226,7 +226,7 @@ | |
| # endif | |
| # undef BUFSIZE | |
| -# define BUFSIZE ((long)1024*8+1) | |
| +# define BUFSIZE ((long)1024*8+16) | |
| static volatile int run = 0; | |
| static int mr = 0; | |
| @@ -241,7 +241,7 @@ static void print_result(int alg, int run_no, int count, double time_used); | |
| static int do_multi(int multi); | |
| # endif | |
| -# define ALGOR_NUM 30 | |
| +# define ALGOR_NUM 31 | |
| # define SIZE_NUM 5 | |
| # define RSA_NUM 4 | |
| # define DSA_NUM 3 | |
| @@ -256,7 +256,7 @@ static const char *names[ALGOR_NUM] = { | |
| "aes-128 cbc", "aes-192 cbc", "aes-256 cbc", | |
| "camellia-128 cbc", "camellia-192 cbc", "camellia-256 cbc", | |
| "evp", "sha256", "sha512", "whirlpool", | |
| - "aes-128 ige", "aes-192 ige", "aes-256 ige", "ghash" | |
| + "aes-128 ige", "aes-192 ige", "aes-256 ige", "ghash", "chacha20-poly1305" | |
| }; | |
| static double results[ALGOR_NUM][SIZE_NUM]; | |
| @@ -516,6 +516,7 @@ int MAIN(int argc, char **argv) | |
| # define D_IGE_192_AES 27 | |
| # define D_IGE_256_AES 28 | |
| # define D_GHASH 29 | |
| +# define D_CHAPOLY 30 | |
| double d = 0.0; | |
| long c[ALGOR_NUM][SIZE_NUM]; | |
| # define R_DSA_512 0 | |
| @@ -972,6 +973,9 @@ int MAIN(int argc, char **argv) | |
| doit[D_CBC_256_CML] = 1; | |
| } else | |
| # endif | |
| + if (strcmp(*argv, "chacha20-poly1305") == 0) { | |
| + doit[D_CHAPOLY] = 1; | |
| + } else | |
| # ifndef OPENSSL_NO_RSA | |
| if (strcmp(*argv, "rsa") == 0) { | |
| rsa_doit[R_RSA_512] = 1; | |
| @@ -1139,6 +1143,7 @@ int MAIN(int argc, char **argv) | |
| BIO_printf(bio_err, "rc4"); | |
| # endif | |
| BIO_printf(bio_err, "\n"); | |
| + BIO_printf(bio_err, "chacha20-poly1305\n"); | |
| # ifndef OPENSSL_NO_RSA | |
| BIO_printf(bio_err, "rsa512 rsa1024 rsa2048 rsa4096\n"); | |
| @@ -1370,6 +1375,7 @@ int MAIN(int argc, char **argv) | |
| c[D_IGE_192_AES][0] = count; | |
| c[D_IGE_256_AES][0] = count; | |
| c[D_GHASH][0] = count; | |
| + c[D_CHAPOLY][0] = count; | |
| for (i = 1; i < SIZE_NUM; i++) { | |
| c[D_MD2][i] = c[D_MD2][0] * 4 * lengths[0] / lengths[i]; | |
| @@ -1821,6 +1827,22 @@ int MAIN(int argc, char **argv) | |
| CRYPTO_gcm128_release(ctx); | |
| } | |
| # endif | |
| + if (doit[D_CHAPOLY]) { | |
| + EVP_CIPHER_CTX ctx; | |
| + EVP_CIPHER_CTX_init(&ctx); | |
| + EVP_CipherInit_ex(&ctx, EVP_chacha20_poly1305(), NULL, key32, iv, 1); | |
| + | |
| + for (j = 0; j < SIZE_NUM; j++) { | |
| + print_message(names[D_CHAPOLY], c[D_CHAPOLY][j], lengths[j]); | |
| + Time_F(START); | |
| + for (count = 0, run = 1; COND(c[D_CHAPOLY][j]); count++) { | |
| + EVP_CIPHER_CTX_ctrl(&ctx, EVP_CTRL_AEAD_TLS1_AAD, 13, buf); | |
| + EVP_Cipher(&ctx, buf, buf, (unsigned long)lengths[j] + 16); | |
| + } | |
| + d = Time_F(STOP); | |
| + print_result(D_CHAPOLY, j, count, d); | |
| + } | |
| + } | |
| # ifndef OPENSSL_NO_CAMELLIA | |
| if (doit[D_CBC_128_CML]) { | |
| for (j = 0; j < SIZE_NUM; j++) { | |
| diff --git a/crypto/chacha20poly1305/Makefile b/crypto/chacha20poly1305/Makefile | |
| new file mode 100644 | |
| index 0000000..446eb27 | |
| --- /dev/null | |
| +++ b/crypto/chacha20poly1305/Makefile | |
| @@ -0,0 +1,97 @@ | |
| +# | |
| +# crypto/chacha20poly1305/Makefile | |
| +# | |
| + | |
| +DIR= chacha20poly1305 | |
| +TOP= ../.. | |
| +CC= cc | |
| +CPP= $(CC) -E | |
| +INCLUDES= | |
| +CFLAG=-g | |
| +MAKEFILE= Makefile | |
| +AR= ar r | |
| + | |
| +CHAPOLY_ENC= | |
| + | |
| +CFLAGS= $(INCLUDES) $(CFLAG) | |
| +ASFLAGS= $(INCLUDES) $(ASFLAG) | |
| +AFLAGS= $(ASFLAGS) | |
| + | |
| +GENERAL=Makefile | |
| +TEST=chapolytest.c | |
| +APPS= | |
| + | |
| +LIB=$(TOP)/libcrypto.a | |
| +LIBSRC=chacha20.c poly1305.c | |
| +LIBOBJ=chacha20.o poly1305.o $(CHAPOLY_ENC) | |
| + | |
| +SRC= $(LIBSRC) | |
| + | |
| +EXHEADER=chacha20poly1305.h | |
| +HEADER= $(EXHEADER) | |
| + | |
| +ALL= $(GENERAL) $(SRC) $(HEADER) | |
| + | |
| +top: | |
| + (cd ../..; $(MAKE) DIRS=crypto SDIRS=$(DIR) sub_all) | |
| + | |
| +all: lib | |
| + | |
| +lib: $(LIBOBJ) | |
| + $(AR) $(LIB) $(LIBOBJ) | |
| + $(RANLIB) $(LIB) || echo Never mind. | |
| + @touch lib | |
| + | |
| +poly1305_x64.s:asm/poly1305_x64.pl | |
| + $(PERL) asm/poly1305_x64.pl $(PERLASM_SCHEME) > $@ | |
| +chacha20_avx.s:asm/chacha20_avx.pl | |
| + $(PERL) asm/chacha20_avx.pl $(PERLASM_SCHEME) > $@ | |
| +poly1305_avx.s:asm/poly1305_avx.pl | |
| + $(PERL) asm/poly1305_avx.pl $(PERLASM_SCHEME) > $@ | |
| +chacha20_avx2.s:asm/chacha20_avx2.pl | |
| + $(PERL) asm/chacha20_avx2.pl $(PERLASM_SCHEME) > $@ | |
| +poly1305_avx2.s:asm/poly1305_avx2.pl | |
| + $(PERL) asm/poly1305_avx2.pl $(PERLASM_SCHEME) > $@ | |
| + | |
| +files: | |
| + $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO | |
| + | |
| +links: | |
| + @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER) | |
| + @$(PERL) $(TOP)/util/mklink.pl ../../test $(TEST) | |
| + @$(PERL) $(TOP)/util/mklink.pl ../../apps $(APPS) | |
| + | |
| +install: | |
| + @[ -n "$(INSTALLTOP)" ] # should be set by top Makefile... | |
| + @headerlist="$(EXHEADER)"; for i in $$headerlist ; \ | |
| + do \ | |
| + (cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i; \ | |
| + chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \ | |
| + done; | |
| + | |
| +tags: | |
| + ctags $(SRC) | |
| + | |
| +tests: | |
| + | |
| +chapolytest: top chapolytest.c $(LIB) | |
| + $(CC) $(CFLAGS) -Wall -Werror -g -o chapolytest cahpolytest.c $(LIB) | |
| + | |
| +lint: | |
| + lint -DLINT $(INCLUDES) $(SRC)>fluff | |
| + | |
| +depend: | |
| + @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... | |
| + $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) | |
| + | |
| +dclean: | |
| + $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKEFILE) >Makefile.new | |
| + mv -f Makefile.new $(MAKEFILE) | |
| + | |
| +clean: | |
| + rm -f *.s *.o *.obj lib tags core .pure .nfs* *.old *.bak fluff | |
| + | |
| +# DO NOT DELETE THIS LINE -- make depend depends on it. | |
| + | |
| +chacha20.o: ../../include/openssl/chacha20poly1305.h chacha20.c | |
| +poly1305.o: ../../include/openssl/chacha20poly1305.h poly1305.c | |
| diff --git a/crypto/chacha20poly1305/asm/chacha20_avx.pl b/crypto/chacha20poly1305/asm/chacha20_avx.pl | |
| new file mode 100644 | |
| index 0000000..bf3e3f0 | |
| --- /dev/null | |
| +++ b/crypto/chacha20poly1305/asm/chacha20_avx.pl | |
| @@ -0,0 +1,408 @@ | |
| +#!/usr/bin/env perl | |
| + | |
| +############################################################################## | |
| +# # | |
| +# Copyright 2014 Intel Corporation # | |
| +# # | |
| +# Licensed under the Apache License, Version 2.0 (the "License"); # | |
| +# you may not use this file except in compliance with the License. # | |
| +# You may obtain a copy of the License at # | |
| +# # | |
| +# http://www.apache.org/licenses/LICENSE-2.0 # | |
| +# # | |
| +# Unless required by applicable law or agreed to in writing, software # | |
| +# distributed under the License is distributed on an "AS IS" BASIS, # | |
| +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # | |
| +# See the License for the specific language governing permissions and # | |
| +# limitations under the License. # | |
| +# # | |
| +############################################################################## | |
| +# # | |
| +# Developers and authors: # | |
| +# Shay Gueron (1, 2), and Vlad Krasnov (1) # | |
| +# (1) Intel Corporation, Israel Development Center # | |
| +# (2) University of Haifa # | |
| +# # | |
| +# Related work: # | |
| +# M. Goll, S. Gueron, "Vectorization on ChaCha Stream Cipher", IEEE # | |
| +# Proceedings of 11th International Conference on Information # | |
| +# Technology: New Generations (ITNG 2014), 612-615 (2014). # | |
| +# M. Goll, S. Gueron, "Vectorization on Poly1305 Message Authentication Code"# | |
| +# to be published. # | |
| +# A. Langley, chacha20poly1305 for the AEAD head # | |
| +# https://git.openssl.org/gitweb/?p=openssl.git;a=commit;h=9a8646510b3d0a48e950748f7a2aaa12ed40d5e0 # | |
| +############################################################################## | |
| + | |
| + | |
| +$flavour = shift; | |
| +$output = shift; | |
| +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } | |
| + | |
| +$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); | |
| + | |
| +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; | |
| +( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or | |
| +( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or | |
| +die "can't locate x86_64-xlate.pl"; | |
| + | |
| +open OUT,"| \"$^X\" $xlate $flavour $output"; | |
| +*STDOUT=*OUT; | |
| + | |
| +if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` | |
| + =~ /GNU assembler version ([2-9]\.[0-9]+)/) { | |
| + $avx = ($1>=2.19) + ($1>=2.22); | |
| +} | |
| + | |
| +if ($win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && | |
| + `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) { | |
| + $avx = ($1>=2.09) + ($1>=2.10); | |
| +} | |
| + | |
| +if ($win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && | |
| + `ml64 2>&1` =~ /Version ([0-9]+)\./) { | |
| + $avx = ($1>=10) + ($1>=11); | |
| +} | |
| + | |
| +if (`$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9])\.([0-9]+)/) { | |
| + my $ver = $2 + $3/100.0; # 3.1->3.01, 3.10->3.10 | |
| + $avx = ($ver>=3.0) + ($ver>=3.01); | |
| +} | |
| + | |
| +if ($avx>=1) {{ | |
| + | |
| +my ($rol8, $rol16, $state_cdef, $tmp, | |
| + $v0, $v1, $v2, $v3, $v4, $v5, $v6, $v7, | |
| + $v8, $v9, $v10, $v11)=map("%xmm$_",(0..15)); | |
| + | |
| +sub chacha_qr { | |
| + | |
| +my ($a,$b,$c,$d)=@_; | |
| +$code.=<<___; | |
| + | |
| + vpaddd $b, $a, $a # a += b | |
| + vpxor $a, $d, $d # d ^= a | |
| + vpshufb $rol16, $d, $d # d <<<= 16 | |
| + | |
| + vpaddd $d, $c, $c # c += d | |
| + vpxor $c, $b, $b # b ^= c | |
| + vpslld \$12, $b, $tmp | |
| + vpsrld \$20, $b, $b | |
| + vpxor $tmp, $b, $b # b <<<= 12 | |
| + | |
| + vpaddd $b, $a, $a # a += b | |
| + vpxor $a, $d, $d # d ^= a | |
| + vpshufb $rol8, $d, $d # d <<<= 8 | |
| + | |
| + vpaddd $d, $c, $c # c += d | |
| + vpxor $c, $b, $b # b ^= c | |
| + | |
| + vpslld \$7, $b, $tmp | |
| + vpsrld \$25, $b, $b | |
| + vpxor $tmp, $b, $b # b <<<= 7 | |
| +___ | |
| + | |
| +} | |
| + | |
| +$code.=<<___; | |
| + | |
| +.text | |
| +.align 16 | |
| +chacha20_consts: | |
| +.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' | |
| +.rol8: | |
| +.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 | |
| +.rol16: | |
| +.byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 | |
| +.avxInc: | |
| +.quad 1,0 | |
| + | |
| +___ | |
| + | |
| +{ | |
| + | |
| + | |
| +my ($out, $in, $in_len, $key_ptr, $nr) | |
| + =("%rdi", "%rsi", "%rdx", "%rcx", "%r8"); | |
| + | |
| +$code.=<<___; | |
| +.globl chacha_20_core_avx | |
| +.type chacha_20_core_avx ,\@function,2 | |
| +.align 64 | |
| +chacha_20_core_avx: | |
| + vzeroupper | |
| + | |
| + # Init state | |
| + vmovdqa .rol8(%rip), $rol8 | |
| + vmovdqa .rol16(%rip), $rol16 | |
| + vmovdqu 16*2($key_ptr), $state_cdef | |
| + | |
| +2: | |
| + cmp \$64*3, $in_len | |
| + jb 2f | |
| + | |
| + vmovdqa chacha20_consts(%rip), $v0 | |
| + vmovdqu 16*0($key_ptr), $v1 | |
| + vmovdqu 16*1($key_ptr), $v2 | |
| + vmovdqa $state_cdef, $v3 | |
| + | |
| + vmovdqa $v0, $v4 | |
| + vmovdqa $v0, $v8 | |
| + | |
| + vmovdqa $v1, $v5 | |
| + vmovdqa $v1, $v9 | |
| + | |
| + vmovdqa $v2, $v6 | |
| + vmovdqa $v2, $v10 | |
| + | |
| + vpaddq .avxInc(%rip), $v3, $v7 | |
| + vpaddq .avxInc(%rip), $v7, $v11 | |
| + | |
| + mov \$10, $nr | |
| + | |
| + 1: | |
| +___ | |
| + | |
| + &chacha_qr( $v0, $v1, $v2, $v3); | |
| + &chacha_qr( $v4, $v5, $v6, $v7); | |
| + &chacha_qr( $v8, $v9,$v10,$v11); | |
| + | |
| +$code.=<<___; | |
| + vpalignr \$4, $v1, $v1, $v1 | |
| + vpalignr \$8, $v2, $v2, $v2 | |
| + vpalignr \$12, $v3, $v3, $v3 | |
| + vpalignr \$4, $v5, $v5, $v5 | |
| + vpalignr \$8, $v6, $v6, $v6 | |
| + vpalignr \$12, $v7, $v7, $v7 | |
| + vpalignr \$4, $v9, $v9, $v9 | |
| + vpalignr \$8, $v10, $v10, $v10 | |
| + vpalignr \$12, $v11, $v11, $v11 | |
| +___ | |
| + | |
| + &chacha_qr( $v0, $v1, $v2, $v3); | |
| + &chacha_qr( $v4, $v5, $v6, $v7); | |
| + &chacha_qr( $v8, $v9,$v10,$v11); | |
| + | |
| +$code.=<<___; | |
| + vpalignr \$12, $v1, $v1, $v1 | |
| + vpalignr \$8, $v2, $v2, $v2 | |
| + vpalignr \$4, $v3, $v3, $v3 | |
| + vpalignr \$12, $v5, $v5, $v5 | |
| + vpalignr \$8, $v6, $v6, $v6 | |
| + vpalignr \$4, $v7, $v7, $v7 | |
| + vpalignr \$12, $v9, $v9, $v9 | |
| + vpalignr \$8, $v10, $v10, $v10 | |
| + vpalignr \$4, $v11, $v11, $v11 | |
| + | |
| + dec $nr | |
| + | |
| + jnz 1b | |
| + | |
| + vpaddd chacha20_consts(%rip), $v0, $v0 | |
| + vpaddd chacha20_consts(%rip), $v4, $v4 | |
| + vpaddd chacha20_consts(%rip), $v8, $v8 | |
| + | |
| + vpaddd 16*0($key_ptr), $v1, $v1 | |
| + vpaddd 16*0($key_ptr), $v5, $v5 | |
| + vpaddd 16*0($key_ptr), $v9, $v9 | |
| + | |
| + vpaddd 16*1($key_ptr), $v2, $v2 | |
| + vpaddd 16*1($key_ptr), $v6, $v6 | |
| + vpaddd 16*1($key_ptr), $v10, $v10 | |
| + | |
| + vpaddd $state_cdef, $v3, $v3 | |
| + vpaddq .avxInc(%rip), $state_cdef, $state_cdef | |
| + vpaddd $state_cdef, $v7, $v7 | |
| + vpaddq .avxInc(%rip), $state_cdef, $state_cdef | |
| + vpaddd $state_cdef, $v11, $v11 | |
| + vpaddq .avxInc(%rip), $state_cdef, $state_cdef | |
| + | |
| + vpxor 16*0($in), $v0, $v0 | |
| + vpxor 16*1($in), $v1, $v1 | |
| + vpxor 16*2($in), $v2, $v2 | |
| + vpxor 16*3($in), $v3, $v3 | |
| + | |
| + vmovdqu $v0, 16*0($out) | |
| + vmovdqu $v1, 16*1($out) | |
| + vmovdqu $v2, 16*2($out) | |
| + vmovdqu $v3, 16*3($out) | |
| + | |
| + vpxor 16*4($in), $v4, $v4 | |
| + vpxor 16*5($in), $v5, $v5 | |
| + vpxor 16*6($in), $v6, $v6 | |
| + vpxor 16*7($in), $v7, $v7 | |
| + | |
| + vmovdqu $v4, 16*4($out) | |
| + vmovdqu $v5, 16*5($out) | |
| + vmovdqu $v6, 16*6($out) | |
| + vmovdqu $v7, 16*7($out) | |
| + | |
| + vpxor 16*8($in), $v8, $v8 | |
| + vpxor 16*9($in), $v9, $v9 | |
| + vpxor 16*10($in), $v10, $v10 | |
| + vpxor 16*11($in), $v11, $v11 | |
| + | |
| + vmovdqu $v8, 16*8($out) | |
| + vmovdqu $v9, 16*9($out) | |
| + vmovdqu $v10, 16*10($out) | |
| + vmovdqu $v11, 16*11($out) | |
| + | |
| + lea 16*12($in), $in | |
| + lea 16*12($out), $out | |
| + sub \$16*12, $in_len | |
| + | |
| + jmp 2b | |
| + | |
| +2: | |
| + cmp \$64*2, $in_len | |
| + jb 2f | |
| + | |
| + vmovdqa chacha20_consts(%rip), $v0 | |
| + vmovdqa chacha20_consts(%rip), $v4 | |
| + vmovdqu 16*0($key_ptr), $v1 | |
| + vmovdqu 16*0($key_ptr), $v5 | |
| + vmovdqu 16*1($key_ptr), $v2 | |
| + vmovdqu 16*1($key_ptr), $v6 | |
| + vmovdqu 16*1($key_ptr), $v10 | |
| + vmovdqa $state_cdef, $v3 | |
| + vpaddq .avxInc(%rip), $v3, $v7 | |
| + | |
| + mov \$10, $nr | |
| + | |
| + 1: | |
| +___ | |
| + | |
| + &chacha_qr($v0,$v1,$v2,$v3); | |
| + &chacha_qr($v4,$v5,$v6,$v7); | |
| + | |
| +$code.=<<___; | |
| + vpalignr \$4, $v1, $v1, $v1 | |
| + vpalignr \$8, $v2, $v2, $v2 | |
| + vpalignr \$12, $v3, $v3, $v3 | |
| + vpalignr \$4, $v5, $v5, $v5 | |
| + vpalignr \$8, $v6, $v6, $v6 | |
| + vpalignr \$12, $v7, $v7, $v7 | |
| +___ | |
| + | |
| + &chacha_qr($v0,$v1,$v2,$v3); | |
| + &chacha_qr($v4,$v5,$v6,$v7); | |
| + | |
| +$code.=<<___; | |
| + vpalignr \$12, $v1, $v1, $v1 | |
| + vpalignr \$8, $v2, $v2, $v2 | |
| + vpalignr \$4, $v3, $v3, $v3 | |
| + vpalignr \$12, $v5, $v5, $v5 | |
| + vpalignr \$8, $v6, $v6, $v6 | |
| + vpalignr \$4, $v7, $v7, $v7 | |
| + | |
| + dec $nr | |
| + | |
| + jnz 1b | |
| + | |
| + vpaddd chacha20_consts(%rip), $v0, $v0 | |
| + vpaddd chacha20_consts(%rip), $v4, $v4 | |
| + | |
| + vpaddd 16*0($key_ptr), $v1, $v1 | |
| + vpaddd 16*0($key_ptr), $v5, $v5 | |
| + | |
| + vpaddd 16*1($key_ptr), $v2, $v2 | |
| + vpaddd 16*1($key_ptr), $v6, $v6 | |
| + | |
| + vpaddd $state_cdef, $v3, $v3 | |
| + vpaddq .avxInc(%rip), $state_cdef, $state_cdef | |
| + vpaddd $state_cdef, $v7, $v7 | |
| + vpaddq .avxInc(%rip), $state_cdef, $state_cdef | |
| + | |
| + vpxor 16*0($in), $v0, $v0 | |
| + vpxor 16*1($in), $v1, $v1 | |
| + vpxor 16*2($in), $v2, $v2 | |
| + vpxor 16*3($in), $v3, $v3 | |
| + | |
| + vmovdqu $v0, 16*0($out) | |
| + vmovdqu $v1, 16*1($out) | |
| + vmovdqu $v2, 16*2($out) | |
| + vmovdqu $v3, 16*3($out) | |
| + | |
| + vpxor 16*4($in), $v4, $v4 | |
| + vpxor 16*5($in), $v5, $v5 | |
| + vpxor 16*6($in), $v6, $v6 | |
| + vpxor 16*7($in), $v7, $v7 | |
| + | |
| + vmovdqu $v4, 16*4($out) | |
| + vmovdqu $v5, 16*5($out) | |
| + vmovdqu $v6, 16*6($out) | |
| + vmovdqu $v7, 16*7($out) | |
| + | |
| + lea 16*8($in), $in | |
| + lea 16*8($out), $out | |
| + sub \$16*8, $in_len | |
| + | |
| + jmp 2b | |
| +2: | |
| + cmp \$64, $in_len | |
| + jb 2f | |
| + | |
| + vmovdqa chacha20_consts(%rip), $v0 | |
| + vmovdqu 16*0($key_ptr), $v1 | |
| + vmovdqu 16*1($key_ptr), $v2 | |
| + vmovdqa $state_cdef, $v3 | |
| + | |
| + mov \$10, $nr | |
| + | |
| + 1: | |
| +___ | |
| + | |
| + &chacha_qr($v0,$v1,$v2,$v3); | |
| + | |
| +$code.=<<___; | |
| + vpalignr \$4, $v1, $v1, $v1 | |
| + vpalignr \$8, $v2, $v2, $v2 | |
| + vpalignr \$12, $v3, $v3, $v3 | |
| +___ | |
| + | |
| + &chacha_qr($v0,$v1,$v2,$v3); | |
| +$code.=<<___; | |
| + vpalignr \$12, $v1, $v1, $v1 | |
| + vpalignr \$8, $v2, $v2, $v2 | |
| + vpalignr \$4, $v3, $v3, $v3 | |
| + | |
| + dec $nr | |
| + jnz 1b | |
| + | |
| + vpaddd chacha20_consts(%rip), $v0, $v0 | |
| + vpaddd 16*0($key_ptr), $v1, $v1 | |
| + vpaddd 16*1($key_ptr), $v2, $v2 | |
| + vpaddd $state_cdef, $v3, $v3 | |
| + vpaddq .avxInc(%rip), $state_cdef, $state_cdef | |
| + | |
| + vpxor 16*0($in), $v0, $v0 | |
| + vpxor 16*1($in), $v1, $v1 | |
| + vpxor 16*2($in), $v2, $v2 | |
| + vpxor 16*3($in), $v3, $v3 | |
| + | |
| + vmovdqu $v0, 16*0($out) | |
| + vmovdqu $v1, 16*1($out) | |
| + vmovdqu $v2, 16*2($out) | |
| + vmovdqu $v3, 16*3($out) | |
| + | |
| + lea 16*4($in), $in | |
| + lea 16*4($out), $out | |
| + sub \$16*4, $in_len | |
| + jmp 2b | |
| + | |
| +2: | |
| + vmovdqu $state_cdef, 16*2($key_ptr) | |
| + | |
| + vzeroupper | |
| + ret | |
| +.size chacha_20_core_avx,.-chacha_20_core_avx | |
| +___ | |
| +} | |
| +}} | |
| + | |
| + | |
| +$code =~ s/\`([^\`]*)\`/eval($1)/gem; | |
| + | |
| +print $code; | |
| + | |
| +close STDOUT; | |
| + | |
| diff --git a/crypto/chacha20poly1305/asm/chacha20_avx2.pl b/crypto/chacha20poly1305/asm/chacha20_avx2.pl | |
| new file mode 100644 | |
| index 0000000..9f31f86 | |
| --- /dev/null | |
| +++ b/crypto/chacha20poly1305/asm/chacha20_avx2.pl | |
| @@ -0,0 +1,443 @@ | |
| +#!/usr/bin/env perl | |
| + | |
| +############################################################################## | |
| +# # | |
| +# Copyright 2014 Intel Corporation # | |
| +# # | |
| +# Licensed under the Apache License, Version 2.0 (the "License"); # | |
| +# you may not use this file except in compliance with the License. # | |
| +# You may obtain a copy of the License at # | |
| +# # | |
| +# http://www.apache.org/licenses/LICENSE-2.0 # | |
| +# # | |
| +# Unless required by applicable law or agreed to in writing, software # | |
| +# distributed under the License is distributed on an "AS IS" BASIS, # | |
| +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # | |
| +# See the License for the specific language governing permissions and # | |
| +# limitations under the License. # | |
| +# # | |
| +############################################################################## | |
| +# # | |
| +# Developers and authors: # | |
| +# Shay Gueron (1, 2), and Vlad Krasnov (1) # | |
| +# (1) Intel Corporation, Israel Development Center # | |
| +# (2) University of Haifa # | |
| +# # | |
| +# Related work: # | |
| +# M. Goll, S. Gueron, "Vectorization on ChaCha Stream Cipher", IEEE # | |
| +# Proceedings of 11th International Conference on Information # | |
| +# Technology: New Generations (ITNG 2014), 612-615 (2014). # | |
| +# M. Goll, S. Gueron, "Vectorization on Poly1305 Message Authentication Code"# | |
| +# to be published. # | |
| +# A. Langley, chacha20poly1305 for the AEAD head # | |
| +# https://git.openssl.org/gitweb/?p=openssl.git;a=commit;h=9a8646510b3d0a48e950748f7a2aaa12ed40d5e0 # | |
| +############################################################################## | |
| + | |
| +$flavour = shift; | |
| +$output = shift; | |
| +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } | |
| + | |
| +$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); | |
| + | |
| +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; | |
| +( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or | |
| +( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or | |
| +die "can't locate x86_64-xlate.pl"; | |
| + | |
| +open OUT,"| \"$^X\" $xlate $flavour $output"; | |
| +*STDOUT=*OUT; | |
| + | |
| +if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` | |
| + =~ /GNU assembler version ([2-9]\.[0-9]+)/) { | |
| + $avx = ($1>=2.19) + ($1>=2.22); | |
| +} | |
| + | |
| +if ($win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && | |
| + `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) { | |
| + $avx = ($1>=2.09) + ($1>=2.10); | |
| +} | |
| + | |
| +if ($win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && | |
| + `ml64 2>&1` =~ /Version ([0-9]+)\./) { | |
| + $avx = ($1>=10) + ($1>=11); | |
| +} | |
| + | |
| +if (`$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9])\.([0-9]+)/) { | |
| + my $ver = $2 + $3/100.0; # 3.1->3.01, 3.10->3.10 | |
| + $avx = ($ver>=3.0) + ($ver>=3.01); | |
| +} | |
| + | |
| +if ($avx>=2) {{ | |
| + | |
| +my ($state_4567, $state_89ab, $state_cdef, $tmp, | |
| + $v0, $v1, $v2, $v3, $v4, $v5, $v6, $v7, | |
| + $v8, $v9, $v10, $v11)=map("%ymm$_",(0..15)); | |
| + | |
| +sub chacha_qr { | |
| + | |
| +my ($a,$b,$c,$d)=@_; | |
| + | |
| +$code.=<<___; | |
| + | |
| + vpaddd $b, $a, $a # a += b | |
| + vpxor $a, $d, $d # d ^= a | |
| + vpshufb .rol16(%rip), $d, $d # d <<<= 16 | |
| + | |
| + vpaddd $d, $c, $c # c += d | |
| + vpxor $c, $b, $b # b ^= c | |
| + vpslld \$12, $b, $tmp | |
| + vpsrld \$20, $b, $b | |
| + vpxor $tmp, $b, $b # b <<<= 12 | |
| + | |
| + vpaddd $b, $a, $a # a += b | |
| + vpxor $a, $d, $d # d ^= a | |
| + vpshufb .rol8(%rip), $d, $d # d <<<= 8 | |
| + | |
| + vpaddd $d, $c, $c # c += d | |
| + vpxor $c, $b, $b # b ^= c | |
| + | |
| + vpslld \$7, $b, $tmp | |
| + vpsrld \$25, $b, $b | |
| + vpxor $tmp, $b, $b # b <<<= 7 | |
| +___ | |
| +} | |
| + | |
| + | |
| +$code.=<<___; | |
| +.text | |
| +.align 32 | |
| +chacha20_consts: | |
| +.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' | |
| +.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' | |
| +.rol8: | |
| +.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 | |
| +.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 | |
| +.rol16: | |
| +.byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 | |
| +.byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 | |
| +.avx2Init: | |
| +.quad 0,0,1,0 | |
| +.avx2Inc: | |
| +.quad 2,0,2,0 | |
| +___ | |
| + | |
| +{ | |
| + | |
| +my $state_cdef_xmm=$state_cdef; | |
| + | |
| +substr($state_cdef_xmm, 1, 1, "x"); | |
| + | |
| +my ($out, $in, $in_len, $key_ptr, $nr) | |
| + =("%rdi", "%rsi", "%rdx", "%rcx", "%r8"); | |
| + | |
| +$code.=<<___; | |
| +.globl chacha_20_core_avx2 | |
| +.type chacha_20_core_avx2 ,\@function,2 | |
| +.align 64 | |
| +chacha_20_core_avx2: | |
| + | |
| + vzeroupper | |
| + | |
| + # Init state | |
| + vbroadcasti128 16*0($key_ptr), $state_4567 | |
| + vbroadcasti128 16*1($key_ptr), $state_89ab | |
| + vbroadcasti128 16*2($key_ptr), $state_cdef | |
| + vpaddq .avx2Init(%rip), $state_cdef, $state_cdef | |
| + | |
| +2: | |
| + cmp \$6*64, $in_len | |
| + jb 2f | |
| + | |
| + vmovdqa chacha20_consts(%rip), $v0 | |
| + vmovdqa chacha20_consts(%rip), $v4 | |
| + vmovdqa chacha20_consts(%rip), $v8 | |
| + | |
| + vmovdqa $state_4567, $v1 | |
| + vmovdqa $state_4567, $v5 | |
| + vmovdqa $state_4567, $v9 | |
| + | |
| + vmovdqa $state_89ab, $v2 | |
| + vmovdqa $state_89ab, $v6 | |
| + vmovdqa $state_89ab, $v10 | |
| + | |
| + vmovdqa $state_cdef, $v3 | |
| + vpaddq .avx2Inc(%rip), $v3, $v7 | |
| + vpaddq .avx2Inc(%rip), $v7, $v11 | |
| + | |
| + mov \$10, $nr | |
| + | |
| + 1: | |
| +___ | |
| + | |
| + &chacha_qr( $v0, $v1, $v2, $v3); | |
| + &chacha_qr( $v4, $v5, $v6, $v7); | |
| + &chacha_qr( $v8, $v9,$v10,$v11); | |
| + | |
| +$code.=<<___; | |
| + vpalignr \$4, $v1, $v1, $v1 | |
| + vpalignr \$8, $v2, $v2, $v2 | |
| + vpalignr \$12, $v3, $v3, $v3 | |
| + vpalignr \$4, $v5, $v5, $v5 | |
| + vpalignr \$8, $v6, $v6, $v6 | |
| + vpalignr \$12, $v7, $v7, $v7 | |
| + vpalignr \$4, $v9, $v9, $v9 | |
| + vpalignr \$8, $v10, $v10, $v10 | |
| + vpalignr \$12, $v11, $v11, $v11 | |
| +___ | |
| + | |
| + &chacha_qr( $v0, $v1, $v2, $v3); | |
| + &chacha_qr( $v4, $v5, $v6, $v7); | |
| + &chacha_qr( $v8, $v9,$v10,$v11); | |
| + | |
| +$code.=<<___; | |
| + vpalignr \$12, $v1, $v1, $v1 | |
| + vpalignr \$8, $v2, $v2, $v2 | |
| + vpalignr \$4, $v3, $v3, $v3 | |
| + vpalignr \$12, $v5, $v5, $v5 | |
| + vpalignr \$8, $v6, $v6, $v6 | |
| + vpalignr \$4, $v7, $v7, $v7 | |
| + vpalignr \$12, $v9, $v9, $v9 | |
| + vpalignr \$8, $v10, $v10, $v10 | |
| + vpalignr \$4, $v11, $v11, $v11 | |
| + | |
| + dec $nr | |
| + | |
| + jnz 1b | |
| + | |
| + vpaddd chacha20_consts(%rip), $v0, $v0 | |
| + vpaddd chacha20_consts(%rip), $v4, $v4 | |
| + vpaddd chacha20_consts(%rip), $v8, $v8 | |
| + | |
| + vpaddd $state_4567, $v1, $v1 | |
| + vpaddd $state_4567, $v5, $v5 | |
| + vpaddd $state_4567, $v9, $v9 | |
| + | |
| + vpaddd $state_89ab, $v2, $v2 | |
| + vpaddd $state_89ab, $v6, $v6 | |
| + vpaddd $state_89ab, $v10, $v10 | |
| + | |
| + vpaddd $state_cdef, $v3, $v3 | |
| + vpaddq .avx2Inc(%rip), $state_cdef, $state_cdef | |
| + vpaddd $state_cdef, $v7, $v7 | |
| + vpaddq .avx2Inc(%rip), $state_cdef, $state_cdef | |
| + vpaddd $state_cdef, $v11, $v11 | |
| + vpaddq .avx2Inc(%rip), $state_cdef, $state_cdef | |
| + | |
| + vperm2i128 \$0x02, $v0, $v1, $tmp | |
| + vpxor 32*0($in), $tmp, $tmp | |
| + vmovdqu $tmp, 32*0($out) | |
| + vperm2i128 \$0x02, $v2, $v3, $tmp | |
| + vpxor 32*1($in), $tmp, $tmp | |
| + vmovdqu $tmp, 32*1($out) | |
| + vperm2i128 \$0x13, $v0, $v1, $tmp | |
| + vpxor 32*2($in), $tmp, $tmp | |
| + vmovdqu $tmp, 32*2($out) | |
| + vperm2i128 \$0x13, $v2, $v3, $tmp | |
| + vpxor 32*3($in), $tmp, $tmp | |
| + vmovdqu $tmp, 32*3($out) | |
| + | |
| + vperm2i128 \$0x02, $v4, $v5, $v0 | |
| + vperm2i128 \$0x02, $v6, $v7, $v1 | |
| + vperm2i128 \$0x13, $v4, $v5, $v2 | |
| + vperm2i128 \$0x13, $v6, $v7, $v3 | |
| + | |
| + vpxor 32*4($in), $v0, $v0 | |
| + vpxor 32*5($in), $v1, $v1 | |
| + vpxor 32*6($in), $v2, $v2 | |
| + vpxor 32*7($in), $v3, $v3 | |
| + | |
| + vmovdqu $v0, 32*4($out) | |
| + vmovdqu $v1, 32*5($out) | |
| + vmovdqu $v2, 32*6($out) | |
| + vmovdqu $v3, 32*7($out) | |
| + | |
| + vperm2i128 \$0x02, $v8, $v9, $v0 | |
| + vperm2i128 \$0x02, $v10, $v11, $v1 | |
| + vperm2i128 \$0x13, $v8, $v9, $v2 | |
| + vperm2i128 \$0x13, $v10, $v11, $v3 | |
| + | |
| + vpxor 32*8($in), $v0, $v0 | |
| + vpxor 32*9($in), $v1, $v1 | |
| + vpxor 32*10($in), $v2, $v2 | |
| + vpxor 32*11($in), $v3, $v3 | |
| + | |
| + vmovdqu $v0, 32*8($out) | |
| + vmovdqu $v1, 32*9($out) | |
| + vmovdqu $v2, 32*10($out) | |
| + vmovdqu $v3, 32*11($out) | |
| + | |
| + lea 64*6($in), $in | |
| + lea 64*6($out), $out | |
| + sub \$64*6, $in_len | |
| + | |
| + jmp 2b | |
| + | |
| +2: | |
| + cmp \$4*64, $in_len | |
| + jb 2f | |
| + | |
| + vmovdqa chacha20_consts(%rip), $v0 | |
| + vmovdqa chacha20_consts(%rip), $v4 | |
| + vmovdqa $state_4567, $v1 | |
| + vmovdqa $state_4567, $v5 | |
| + vmovdqa $state_89ab, $v2 | |
| + vmovdqa $state_89ab, $v6 | |
| + vmovdqa $state_89ab, $v10 | |
| + vmovdqa $state_cdef, $v3 | |
| + vpaddq .avx2Inc(%rip), $v3, $v7 | |
| + | |
| + mov \$10, $nr | |
| + | |
| + 1: | |
| +___ | |
| + | |
| + &chacha_qr($v0,$v1,$v2,$v3); | |
| + &chacha_qr($v4,$v5,$v6,$v7); | |
| + | |
| +$code.=<<___; | |
| + vpalignr \$4, $v1, $v1, $v1 | |
| + vpalignr \$8, $v2, $v2, $v2 | |
| + vpalignr \$12, $v3, $v3, $v3 | |
| + vpalignr \$4, $v5, $v5, $v5 | |
| + vpalignr \$8, $v6, $v6, $v6 | |
| + vpalignr \$12, $v7, $v7, $v7 | |
| +___ | |
| + | |
| + &chacha_qr($v0,$v1,$v2,$v3); | |
| + &chacha_qr($v4,$v5,$v6,$v7); | |
| + | |
| +$code.=<<___; | |
| + vpalignr \$12, $v1, $v1, $v1 | |
| + vpalignr \$8, $v2, $v2, $v2 | |
| + vpalignr \$4, $v3, $v3, $v3 | |
| + vpalignr \$12, $v5, $v5, $v5 | |
| + vpalignr \$8, $v6, $v6, $v6 | |
| + vpalignr \$4, $v7, $v7, $v7 | |
| + | |
| + dec $nr | |
| + | |
| + jnz 1b | |
| + | |
| + vpaddd chacha20_consts(%rip), $v0, $v0 | |
| + vpaddd chacha20_consts(%rip), $v4, $v4 | |
| + | |
| + vpaddd $state_4567, $v1, $v1 | |
| + vpaddd $state_4567, $v5, $v5 | |
| + | |
| + vpaddd $state_89ab, $v2, $v2 | |
| + vpaddd $state_89ab, $v6, $v6 | |
| + | |
| + vpaddd $state_cdef, $v3, $v3 | |
| + vpaddq .avx2Inc(%rip), $state_cdef, $state_cdef | |
| + vpaddd $state_cdef, $v7, $v7 | |
| + vpaddq .avx2Inc(%rip), $state_cdef, $state_cdef | |
| + | |
| + vperm2i128 \$0x02, $v0, $v1, $v8 | |
| + vperm2i128 \$0x02, $v2, $v3, $v9 | |
| + vperm2i128 \$0x13, $v0, $v1, $v10 | |
| + vperm2i128 \$0x13, $v2, $v3, $v11 | |
| + | |
| + vpxor 32*0($in), $v8, $v8 | |
| + vpxor 32*1($in), $v9, $v9 | |
| + vpxor 32*2($in), $v10, $v10 | |
| + vpxor 32*3($in), $v11, $v11 | |
| + | |
| + vmovdqu $v8, 32*0($out) | |
| + vmovdqu $v9, 32*1($out) | |
| + vmovdqu $v10, 32*2($out) | |
| + vmovdqu $v11, 32*3($out) | |
| + | |
| + vperm2i128 \$0x02, $v4, $v5, $v0 | |
| + vperm2i128 \$0x02, $v6, $v7, $v1 | |
| + vperm2i128 \$0x13, $v4, $v5, $v2 | |
| + vperm2i128 \$0x13, $v6, $v7, $v3 | |
| + | |
| + vpxor 32*4($in), $v0, $v0 | |
| + vpxor 32*5($in), $v1, $v1 | |
| + vpxor 32*6($in), $v2, $v2 | |
| + vpxor 32*7($in), $v3, $v3 | |
| + | |
| + vmovdqu $v0, 32*4($out) | |
| + vmovdqu $v1, 32*5($out) | |
| + vmovdqu $v2, 32*6($out) | |
| + vmovdqu $v3, 32*7($out) | |
| + | |
| + lea 64*4($in), $in | |
| + lea 64*4($out), $out | |
| + sub \$64*4, $in_len | |
| + | |
| + jmp 2b | |
| +2: | |
| + cmp \$128, $in_len | |
| + jb 2f | |
| + | |
| + vmovdqa chacha20_consts(%rip), $v0 | |
| + vmovdqa $state_4567, $v1 | |
| + vmovdqa $state_89ab, $v2 | |
| + vmovdqa $state_cdef, $v3 | |
| + | |
| + mov \$10, $nr | |
| + | |
| + 1: | |
| +___ | |
| + | |
| + &chacha_qr($v0,$v1,$v2,$v3); | |
| + | |
| +$code.=<<___; | |
| + vpalignr \$4, $v1, $v1, $v1 | |
| + vpalignr \$8, $v2, $v2, $v2 | |
| + vpalignr \$12, $v3, $v3, $v3 | |
| +___ | |
| + | |
| + &chacha_qr($v0,$v1,$v2,$v3); | |
| +$code.=<<___; | |
| + vpalignr \$12, $v1, $v1, $v1 | |
| + vpalignr \$8, $v2, $v2, $v2 | |
| + vpalignr \$4, $v3, $v3, $v3 | |
| + | |
| + dec $nr | |
| + jnz 1b | |
| + | |
| + vpaddd chacha20_consts(%rip), $v0, $v0 | |
| + vpaddd $state_4567, $v1, $v1 | |
| + vpaddd $state_89ab, $v2, $v2 | |
| + vpaddd $state_cdef, $v3, $v3 | |
| + vpaddq .avx2Inc(%rip), $state_cdef, $state_cdef | |
| + | |
| + vperm2i128 \$0x02, $v0, $v1, $v8 | |
| + vperm2i128 \$0x02, $v2, $v3, $v9 | |
| + vperm2i128 \$0x13, $v0, $v1, $v10 | |
| + vperm2i128 \$0x13, $v2, $v3, $v11 | |
| + | |
| + vpxor 32*0($in), $v8, $v8 | |
| + vpxor 32*1($in), $v9, $v9 | |
| + vpxor 32*2($in), $v10, $v10 | |
| + vpxor 32*3($in), $v11, $v11 | |
| + | |
| + vmovdqu $v8, 32*0($out) | |
| + vmovdqu $v9, 32*1($out) | |
| + vmovdqu $v10, 32*2($out) | |
| + vmovdqu $v11, 32*3($out) | |
| + | |
| + lea 64*2($in), $in | |
| + lea 64*2($out), $out | |
| + sub \$64*2, $in_len | |
| + jmp 2b | |
| + | |
| +2: | |
| + vmovdqu $state_cdef_xmm, 16*2($key_ptr) | |
| + | |
| + vzeroupper | |
| + ret | |
| +.size chacha_20_core_avx2,.-chacha_20_core_avx2 | |
| +___ | |
| +} | |
| +}} | |
| + | |
| + | |
| +$code =~ s/\`([^\`]*)\`/eval($1)/gem; | |
| + | |
| +print $code; | |
| + | |
| +close STDOUT; | |
| + | |
| diff --git a/crypto/chacha20poly1305/asm/poly1305_avx.pl b/crypto/chacha20poly1305/asm/poly1305_avx.pl | |
| new file mode 100644 | |
| index 0000000..cedeca1 | |
| --- /dev/null | |
| +++ b/crypto/chacha20poly1305/asm/poly1305_avx.pl | |
| @@ -0,0 +1,732 @@ | |
| +############################################################################## | |
| +# # | |
| +# Copyright 2014 Intel Corporation # | |
| +# # | |
| +# Licensed under the Apache License, Version 2.0 (the "License"); # | |
| +# you may not use this file except in compliance with the License. # | |
| +# You may obtain a copy of the License at # | |
| +# # | |
| +# http://www.apache.org/licenses/LICENSE-2.0 # | |
| +# # | |
| +# Unless required by applicable law or agreed to in writing, software # | |
| +# distributed under the License is distributed on an "AS IS" BASIS, # | |
| +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # | |
| +# See the License for the specific language governing permissions and # | |
| +# limitations under the License. # | |
| +# # | |
| +############################################################################## | |
| +# # | |
| +# Developers and authors: # | |
| +# Shay Gueron (1, 2), and Vlad Krasnov (1) # | |
| +# (1) Intel Corporation, Israel Development Center # | |
| +# (2) University of Haifa # | |
| +# # | |
| +############################################################################## | |
| +# state: | |
| +# 0: r[0] || r^2[0] | |
| +# 16: r[1] || r^2[1] | |
| +# 32: r[2] || r^2[2] | |
| +# 48: r[3] || r^2[3] | |
| +# 64: r[4] || r^2[4] | |
| +# 80: r[1]*5 || r^2[1]*5 | |
| +# 96: r[2]*5 || r^2[2]*5 | |
| +#112: r[3]*5 || r^2[3]*5 | |
| +#128: r[4]*5 || r^2[4]*5 | |
| +#144: k | |
| +#160: A0 | |
| +#164: A1 | |
| +#168: A2 | |
| +#172: A3 | |
| +#176: A4 | |
| +#180: END | |
| + | |
| +$flavour = shift; | |
| +$output = shift; | |
| +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } | |
| + | |
| +$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); | |
| + | |
| +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; | |
| +( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or | |
| +( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or | |
| +die "can't locate x86_64-xlate.pl"; | |
| + | |
| +open OUT,"| \"$^X\" $xlate $flavour $output"; | |
| +*STDOUT=*OUT; | |
| + | |
| +if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` | |
| + =~ /GNU assembler version ([2-9]\.[0-9]+)/) { | |
| + $avx = ($1>=2.19) + ($1>=2.22); | |
| +} | |
| + | |
| +if ($win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && | |
| + `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) { | |
| + $avx = ($1>=2.09) + ($1>=2.10); | |
| +} | |
| + | |
| +if ($win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && | |
| + `ml64 2>&1` =~ /Version ([0-9]+)\./) { | |
| + $avx = ($1>=10) + ($1>=11); | |
| +} | |
| + | |
| +if (`$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9])\.([0-9]+)/) { | |
| + my $ver = $2 + $3/100.0; # 3.1->3.01, 3.10->3.10 | |
| + $avx = ($ver>=3.0) + ($ver>=3.01); | |
| +} | |
| + | |
| +if ($avx>=1) {{ | |
| + | |
| +my ($_r0_, $_r1_, $_r2_, $_r3_, $_r4_, | |
| + $_r1_x5, $_r2_x5, $_r3_x5, $_r4_x5, | |
| + $_k_, | |
| + $_A0_, $_A1_, $_A2_, $_A3_, $_A4_) | |
| + = (0, 16, 32, 48, 64, | |
| + 80, 96, 112, 128, | |
| + 144, | |
| + 160, 164, 168, 172, 176); | |
| + | |
| +$code.=<<___; | |
| +.text | |
| +.align 32 | |
| +.LandMask: | |
| +.quad 0x3FFFFFF, 0x3FFFFFF | |
| +.LsetBit: | |
| +.quad 0x1000000, 0x1000000 | |
| +.LrSet: | |
| +.quad 0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFF | |
| +.quad 0x0FFFFFFC0FFFFFFC, 0x0FFFFFFC0FFFFFFC | |
| +.Lone: | |
| +.quad 1,0 | |
| +.Lpoly: | |
| +.quad 0xfffffffffffffffb, 0xffffffffffffffff | |
| +___ | |
| + | |
| + | |
| +{ | |
| +my ($A0, $A1, $A2, $A3, $A4, | |
| + $r0, $r1, $r2, $r3, $r4, | |
| + $T0, $T1, $A5, $A6, $A7, $A8)=map("%xmm$_",(0..15)); | |
| +my ($state, $key) | |
| + =("%rdi", "%rsi"); | |
| + | |
| +$code.=<<___; | |
| +################################################################################ | |
| +# void poly1305_init_avx(void *state, uint8_t key[32]) | |
| + | |
| +.globl poly1305_init_avx | |
| +.type poly1305_init_avx, \@function, 2 | |
| +.align 64 | |
| +poly1305_init_avx: | |
| + vzeroupper | |
| + # load and convert r | |
| + vmovq 8*0($key), $r0 | |
| + vmovq 8*1($key), $T0 | |
| + vpand .LrSet(%rip), $r0, $r0 | |
| + vpand .LrSet+16(%rip), $T0, $T0 | |
| + | |
| + vpsrlq \$26, $r0, $r1 | |
| + vpand .LandMask(%rip), $r0, $r0 | |
| + vpsrlq \$26, $r1, $r2 | |
| + vpand .LandMask(%rip), $r1, $r1 | |
| + vpsllq \$12, $T0, $T1 | |
| + vpxor $T1, $r2, $r2 | |
| + vpsrlq \$26, $r2, $r3 | |
| + vpsrlq \$40, $T0, $r4 | |
| + vpand .LandMask(%rip), $r2, $r2 | |
| + vpand .LandMask(%rip), $r3, $r3 | |
| + | |
| + # SQR R | |
| + vpmuludq $r0, $r0, $A0 | |
| + vpmuludq $r1, $r0, $A1 | |
| + vpmuludq $r2, $r0, $A2 | |
| + vpmuludq $r3, $r0, $A3 | |
| + vpmuludq $r4, $r0, $A4 | |
| + | |
| + vpsllq \$1, $A1, $A1 | |
| + vpsllq \$1, $A2, $A2 | |
| + vpmuludq $r1, $r1, $T0 | |
| + vpaddq $T0, $A2, $A2 | |
| + vpmuludq $r2, $r1, $T0 | |
| + vpaddq $T0, $A3, $A3 | |
| + vpmuludq $r3, $r1, $T0 | |
| + vpaddq $T0, $A4, $A4 | |
| + vpmuludq $r4, $r1, $A5 | |
| + | |
| + vpsllq \$1, $A3, $A3 | |
| + vpsllq \$1, $A4, $A4 | |
| + vpmuludq $r2, $r2, $T0 | |
| + vpaddq $T0, $A4, $A4 | |
| + vpmuludq $r3, $r2, $T0 | |
| + vpaddq $T0, $A5, $A5 | |
| + vpmuludq $r4, $r2, $A6 | |
| + | |
| + vpsllq \$1, $A5, $A5 | |
| + vpsllq \$1, $A6, $A6 | |
| + vpmuludq $r3, $r3, $T0 | |
| + vpaddq $T0, $A6, $A6 | |
| + vpmuludq $r4, $r3, $A7 | |
| + | |
| + vpsllq \$1, $A7, $A7 | |
| + vpmuludq $r4, $r4, $A8 | |
| + | |
| + # Reduce | |
| + vpsrlq \$26, $A4, $T0 | |
| + vpand .LandMask(%rip), $A4, $A4 | |
| + vpaddq $T0, $A5, $A5 | |
| + | |
| + vpsllq \$2, $A5, $T0 | |
| + vpaddq $T0, $A5, $A5 | |
| + vpsllq \$2, $A6, $T0 | |
| + vpaddq $T0, $A6, $A6 | |
| + vpsllq \$2, $A7, $T0 | |
| + vpaddq $T0, $A7, $A7 | |
| + vpsllq \$2, $A8, $T0 | |
| + vpaddq $T0, $A8, $A8 | |
| + | |
| + vpaddq $A5, $A0, $A0 | |
| + vpaddq $A6, $A1, $A1 | |
| + vpaddq $A7, $A2, $A2 | |
| + vpaddq $A8, $A3, $A3 | |
| + | |
| + vpsrlq \$26, $A0, $T0 | |
| + vpand .LandMask(%rip), $A0, $A0 | |
| + vpaddq $T0, $A1, $A1 | |
| + vpsrlq \$26, $A1, $T0 | |
| + vpand .LandMask(%rip), $A1, $A1 | |
| + vpaddq $T0, $A2, $A2 | |
| + vpsrlq \$26, $A2, $T0 | |
| + vpand .LandMask(%rip), $A2, $A2 | |
| + vpaddq $T0, $A3, $A3 | |
| + vpsrlq \$26, $A3, $T0 | |
| + vpand .LandMask(%rip), $A3, $A3 | |
| + vpaddq $T0, $A4, $A4 | |
| + | |
| + vpunpcklqdq $r0, $A0, $r0 | |
| + vpunpcklqdq $r1, $A1, $r1 | |
| + vpunpcklqdq $r2, $A2, $r2 | |
| + vpunpcklqdq $r3, $A3, $r3 | |
| + vpunpcklqdq $r4, $A4, $r4 | |
| + | |
| + vmovdqu $r0, $_r0_($state) | |
| + vmovdqu $r1, $_r1_($state) | |
| + vmovdqu $r2, $_r2_($state) | |
| + vmovdqu $r3, $_r3_($state) | |
| + vmovdqu $r4, $_r4_($state) | |
| + | |
| + vpsllq \$2, $r1, $A1 | |
| + vpsllq \$2, $r2, $A2 | |
| + vpsllq \$2, $r3, $A3 | |
| + vpsllq \$2, $r4, $A4 | |
| + | |
| + vpaddq $A1, $r1, $A1 | |
| + vpaddq $A2, $r2, $A2 | |
| + vpaddq $A3, $r3, $A3 | |
| + vpaddq $A4, $r4, $A4 | |
| + | |
| + vmovdqu $A1, $_r1_x5($state) | |
| + vmovdqu $A2, $_r2_x5($state) | |
| + vmovdqu $A3, $_r3_x5($state) | |
| + vmovdqu $A4, $_r4_x5($state) | |
| + # Store k | |
| + vmovdqu 16*1($key), $T0 | |
| + vmovdqu $T0, $_k_($state) | |
| + # Init the MAC value | |
| + vpxor $T0, $T0, $T0 | |
| + vmovdqu $T0, $_A0_($state) | |
| + vmovd $T0, $_A4_($state) | |
| + vzeroupper | |
| + ret | |
| +.size poly1305_init_avx,.-poly1305_init_avx | |
| +___ | |
| +} | |
| + | |
| +{ | |
| + | |
| +my ($A0, $A1, $A2, $A3, $A4, | |
| + $T0, $T1, $R0, $R1, $R2, | |
| + $R3, $R4, $AND_MASK) | |
| + = map("%xmm$_",(0..12)); | |
| + | |
| +my ($state, $in, $in_len) | |
| + =("%rdi", "%rsi", "%rdx"); | |
| + | |
| +$code.=<<___; | |
| + | |
| +############################################################################### | |
| +# void* poly1305_update_avx(void* state, void* in, uint64_t in_len) | |
| +.globl poly1305_update_avx | |
| +.type poly1305_update_avx, \@function, 2 | |
| +.align 64 | |
| +poly1305_update_avx: | |
| + | |
| + vzeroupper | |
| + vmovd $_A0_($state), $A0 | |
| + vmovd $_A1_($state), $A1 | |
| + vmovd $_A2_($state), $A2 | |
| + vmovd $_A3_($state), $A3 | |
| + vmovd $_A4_($state), $A4 | |
| + vmovdqa .LandMask(%rip), $AND_MASK | |
| + # Skip to single block case | |
| + cmp \$32, $in_len | |
| + jb 3f | |
| +1: | |
| + cmp \$16*4, $in_len | |
| + jb 1f | |
| + sub \$16*2, $in_len | |
| + # load the next two blocks | |
| + vmovdqu 16*0($in), $R2 | |
| + vmovdqu 16*1($in), $R3 | |
| + add \$16*2, $in | |
| + | |
| + vpunpcklqdq $R3, $R2, $R0 | |
| + vpunpckhqdq $R3, $R2, $R1 | |
| + | |
| + vpsrlq \$26, $R0, $R2 | |
| + vpand $AND_MASK, $R0, $R0 | |
| + vpaddq $R0, $A0, $A0 | |
| + | |
| + vpsrlq \$26, $R2, $R0 | |
| + vpand $AND_MASK, $R2, $R2 | |
| + vpaddq $R2, $A1, $A1 | |
| + | |
| + vpsllq \$12, $R1, $R2 | |
| + vpxor $R2, $R0, $R0 | |
| + vpand $AND_MASK, $R0, $R0 | |
| + vpaddq $R0, $A2, $A2 | |
| + | |
| + vpsrlq \$26, $R2, $R0 | |
| + vpsrlq \$40, $R1, $R2 | |
| + vpand $AND_MASK, $R0, $R0 | |
| + vpxor .LsetBit(%rip), $R2, $R2 | |
| + vpaddq $R0, $A3, $A3 | |
| + vpaddq $R2, $A4, $A4 | |
| + | |
| + # Multiply input by R[0] | |
| + vbroadcastss $_r0_($state), $T0 | |
| + vpmuludq $T0, $A0, $R0 | |
| + vpmuludq $T0, $A1, $R1 | |
| + vpmuludq $T0, $A2, $R2 | |
| + vpmuludq $T0, $A3, $R3 | |
| + vpmuludq $T0, $A4, $R4 | |
| + # Multiply input by R[1] (and R[1]*5) | |
| + vbroadcastss $_r1_x5($state), $T0 | |
| + vpmuludq $T0, $A4, $T1 | |
| + vpaddq $T1, $R0, $R0 | |
| + vbroadcastss $_r1_($state), $T0 | |
| + vpmuludq $T0, $A0, $T1 | |
| + vpaddq $T1, $R1, $R1 | |
| + vpmuludq $T0, $A1, $T1 | |
| + vpaddq $T1, $R2, $R2 | |
| + vpmuludq $T0, $A2, $T1 | |
| + vpaddq $T1, $R3, $R3 | |
| + vpmuludq $T0, $A3, $T1 | |
| + vpaddq $T1, $R4, $R4 | |
| + # Etc | |
| + vbroadcastss $_r2_x5($state), $T0 | |
| + vpmuludq $T0, $A3, $T1 | |
| + vpaddq $T1, $R0, $R0 | |
| + vpmuludq $T0, $A4, $T1 | |
| + vpaddq $T1, $R1, $R1 | |
| + vbroadcastss $_r2_($state), $T0 | |
| + vpmuludq $T0, $A0, $T1 | |
| + vpaddq $T1, $R2, $R2 | |
| + vpmuludq $T0, $A1, $T1 | |
| + vpaddq $T1, $R3, $R3 | |
| + vpmuludq $T0, $A2, $T1 | |
| + vpaddq $T1, $R4, $R4 | |
| + | |
| + vbroadcastss $_r3_x5($state), $T0 | |
| + vpmuludq $T0, $A2, $T1 | |
| + vpaddq $T1, $R0, $R0 | |
| + vpmuludq $T0, $A3, $T1 | |
| + vpaddq $T1, $R1, $R1 | |
| + vpmuludq $T0, $A4, $T1 | |
| + vpaddq $T1, $R2, $R2 | |
| + vbroadcastss $_r3_($state), $T0 | |
| + vpmuludq $T0, $A0, $T1 | |
| + vpaddq $T1, $R3, $R3 | |
| + vpmuludq $T0, $A1, $T1 | |
| + vpaddq $T1, $R4, $R4 | |
| + | |
| + vbroadcastss $_r4_x5($state), $T0 | |
| + vpmuludq $T0, $A1, $T1 | |
| + vpaddq $T1, $R0, $R0 | |
| + vpmuludq $T0, $A2, $T1 | |
| + vpaddq $T1, $R1, $R1 | |
| + vpmuludq $T0, $A3, $T1 | |
| + vpaddq $T1, $R2, $R2 | |
| + vpmuludq $T0, $A4, $T1 | |
| + vpaddq $T1, $R3, $R3 | |
| + vbroadcastss $_r4_($state), $T0 | |
| + vpmuludq $T0, $A0, $T1 | |
| + vpaddq $T1, $R4, $R4 | |
| + # Reduce | |
| + vpsrlq \$26, $R3, $T0 | |
| + vpaddq $T0, $R4, $R4 | |
| + vpand $AND_MASK, $R3, $R3 | |
| + | |
| + vpsrlq \$26, $R4, $T0 | |
| + vpsllq \$2, $T0, $T1 | |
| + vpaddq $T1, $T0, $T0 | |
| + vpaddq $T0, $R0, $R0 | |
| + vpand $AND_MASK, $R4, $R4 | |
| + | |
| + vpsrlq \$26, $R0, $T0 | |
| + vpand $AND_MASK, $R0, $A0 | |
| + vpaddq $T0, $R1, $R1 | |
| + vpsrlq \$26, $R1, $T0 | |
| + vpand $AND_MASK, $R1, $A1 | |
| + vpaddq $T0, $R2, $R2 | |
| + vpsrlq \$26, $R2, $T0 | |
| + vpand $AND_MASK, $R2, $A2 | |
| + vpaddq $T0, $R3, $R3 | |
| + vpsrlq \$26, $R3, $T0 | |
| + vpand $AND_MASK, $R3, $A3 | |
| + vpaddq $T0, $R4, $A4 | |
| + jmp 1b | |
| +1: | |
| + cmp \$16*2, $in_len | |
| + jb 1f | |
| + sub \$16*2, $in_len | |
| + # load the next two blocks | |
| + vmovdqu 16*0($in), $R2 | |
| + vmovdqu 16*1($in), $R3 | |
| + add \$16*2, $in | |
| + | |
| + vpunpcklqdq $R3, $R2, $R0 | |
| + vpunpckhqdq $R3, $R2, $R1 | |
| + | |
| + vpsrlq \$26, $R0, $R2 | |
| + vpand $AND_MASK, $R0, $R0 | |
| + vpaddq $R0, $A0, $A0 | |
| + | |
| + vpsrlq \$26, $R2, $R0 | |
| + vpand $AND_MASK, $R2, $R2 | |
| + vpaddq $R2, $A1, $A1 | |
| + | |
| + vpsllq \$12, $R1, $R2 | |
| + vpxor $R2, $R0, $R0 | |
| + vpand $AND_MASK, $R0, $R0 | |
| + vpaddq $R0, $A2, $A2 | |
| + | |
| + vpsrlq \$26, $R2, $R0 | |
| + vpsrlq \$40, $R1, $R2 | |
| + vpand $AND_MASK, $R0, $R0 | |
| + vpxor .LsetBit(%rip), $R2, $R2 | |
| + vpaddq $R0, $A3, $A3 | |
| + vpaddq $R2, $A4, $A4 | |
| + | |
| + # Multiply input by R[0] | |
| + vmovdqu $_r0_($state), $T0 | |
| + vpmuludq $T0, $A0, $R0 | |
| + vpmuludq $T0, $A1, $R1 | |
| + vpmuludq $T0, $A2, $R2 | |
| + vpmuludq $T0, $A3, $R3 | |
| + vpmuludq $T0, $A4, $R4 | |
| + # Multiply input by R[1] (and R[1]*5) | |
| + vmovdqu $_r1_x5($state), $T0 | |
| + vpmuludq $T0, $A4, $T1 | |
| + vpaddq $T1, $R0, $R0 | |
| + vmovdqu $_r1_($state), $T0 | |
| + vpmuludq $T0, $A0, $T1 | |
| + vpaddq $T1, $R1, $R1 | |
| + vpmuludq $T0, $A1, $T1 | |
| + vpaddq $T1, $R2, $R2 | |
| + vpmuludq $T0, $A2, $T1 | |
| + vpaddq $T1, $R3, $R3 | |
| + vpmuludq $T0, $A3, $T1 | |
| + vpaddq $T1, $R4, $R4 | |
| + # Etc | |
| + vmovdqu $_r2_x5($state), $T0 | |
| + vpmuludq $T0, $A3, $T1 | |
| + vpaddq $T1, $R0, $R0 | |
| + vpmuludq $T0, $A4, $T1 | |
| + vpaddq $T1, $R1, $R1 | |
| + vmovdqu $_r2_($state), $T0 | |
| + vpmuludq $T0, $A0, $T1 | |
| + vpaddq $T1, $R2, $R2 | |
| + vpmuludq $T0, $A1, $T1 | |
| + vpaddq $T1, $R3, $R3 | |
| + vpmuludq $T0, $A2, $T1 | |
| + vpaddq $T1, $R4, $R4 | |
| + | |
| + vmovdqu $_r3_x5($state), $T0 | |
| + vpmuludq $T0, $A2, $T1 | |
| + vpaddq $T1, $R0, $R0 | |
| + vpmuludq $T0, $A3, $T1 | |
| + vpaddq $T1, $R1, $R1 | |
| + vpmuludq $T0, $A4, $T1 | |
| + vpaddq $T1, $R2, $R2 | |
| + vmovdqu $_r3_($state), $T0 | |
| + vpmuludq $T0, $A0, $T1 | |
| + vpaddq $T1, $R3, $R3 | |
| + vpmuludq $T0, $A1, $T1 | |
| + vpaddq $T1, $R4, $R4 | |
| + | |
| + vmovdqu $_r4_x5($state), $T0 | |
| + vpmuludq $T0, $A1, $T1 | |
| + vpaddq $T1, $R0, $R0 | |
| + vpmuludq $T0, $A2, $T1 | |
| + vpaddq $T1, $R1, $R1 | |
| + vpmuludq $T0, $A3, $T1 | |
| + vpaddq $T1, $R2, $R2 | |
| + vpmuludq $T0, $A4, $T1 | |
| + vpaddq $T1, $R3, $R3 | |
| + vmovdqu $_r4_($state), $T0 | |
| + vpmuludq $T0, $A0, $T1 | |
| + vpaddq $T1, $R4, $R4 | |
| +1: | |
| + vpsrldq \$8, $R0, $A0 | |
| + vpsrldq \$8, $R1, $A1 | |
| + vpsrldq \$8, $R2, $A2 | |
| + vpsrldq \$8, $R3, $A3 | |
| + vpsrldq \$8, $R4, $A4 | |
| + | |
| + vpaddq $R0, $A0, $A0 | |
| + vpaddq $R1, $A1, $A1 | |
| + vpaddq $R2, $A2, $A2 | |
| + vpaddq $R3, $A3, $A3 | |
| + vpaddq $R4, $A4, $A4 | |
| + # Reduce | |
| + vpsrlq \$26, $A3, $T0 | |
| + vpaddq $T0, $A4, $A4 | |
| + vpand $AND_MASK, $A3, $A3 | |
| + vpsrlq \$26, $A4, $T0 | |
| + vpsllq \$2, $T0, $T1 | |
| + vpaddq $T1, $T0, $T0 | |
| + vpaddq $T0, $A0, $A0 | |
| + vpand $AND_MASK, $A4, $A4 | |
| + vpsrlq \$26, $A0, $T0 | |
| + vpand $AND_MASK, $A0, $A0 | |
| + vpaddq $T0, $A1, $A1 | |
| + vpsrlq \$26, $A1, $T0 | |
| + vpand $AND_MASK, $A1, $A1 | |
| + vpaddq $T0, $A2, $A2 | |
| + vpsrlq \$26, $A2, $T0 | |
| + vpand $AND_MASK, $A2, $A2 | |
| + vpaddq $T0, $A3, $A3 | |
| + vpsrlq \$26, $A3, $T0 | |
| + vpand $AND_MASK, $A3, $A3 | |
| + vpaddq $T0, $A4, $A4 | |
| +3: | |
| + cmp \$16, $in_len | |
| + jb 1f | |
| + | |
| + # load the next block | |
| + vmovq 8*0($in), $R0 | |
| + vmovq 8*1($in), $R1 | |
| + add \$16, $in | |
| + sub \$16, $in_len | |
| + | |
| + vpsrlq \$26, $R0, $R2 | |
| + vpand $AND_MASK, $R0, $R0 | |
| + vpaddq $R0, $A0, $A0 | |
| + | |
| + vpsrlq \$26, $R2, $R0 | |
| + vpand $AND_MASK, $R2, $R2 | |
| + vpaddq $R2, $A1, $A1 | |
| + | |
| + vpsllq \$12, $R1, $R2 | |
| + vpxor $R2, $R0, $R0 | |
| + vpand $AND_MASK, $R0, $R0 | |
| + vpaddq $R0, $A2, $A2 | |
| + | |
| + vpsrlq \$26, $R2, $R0 | |
| + vpsrlq \$40, $R1, $R2 | |
| + vpand $AND_MASK, $R0, $R0 | |
| + vpxor .LsetBit(%rip), $R2, $R2 | |
| + vpaddq $R0, $A3, $A3 | |
| + vpaddq $R2, $A4, $A4 | |
| +2: | |
| + # Multiply input by R[0] | |
| + vmovq $_r0_+8($state), $T0 | |
| + vpmuludq $T0, $A0, $R0 | |
| + vpmuludq $T0, $A1, $R1 | |
| + vpmuludq $T0, $A2, $R2 | |
| + vpmuludq $T0, $A3, $R3 | |
| + vpmuludq $T0, $A4, $R4 | |
| + # Multiply input by R[1] (and R[1]*5) | |
| + vmovq $_r1_x5+8($state), $T0 | |
| + vpmuludq $T0, $A4, $T1 | |
| + vpaddq $T1, $R0, $R0 | |
| + vmovq $_r1_+8($state), $T0 | |
| + vpmuludq $T0, $A0, $T1 | |
| + vpaddq $T1, $R1, $R1 | |
| + vpmuludq $T0, $A1, $T1 | |
| + vpaddq $T1, $R2, $R2 | |
| + vpmuludq $T0, $A2, $T1 | |
| + vpaddq $T1, $R3, $R3 | |
| + vpmuludq $T0, $A3, $T1 | |
| + vpaddq $T1, $R4, $R4 | |
| + # Etc | |
| + vmovq $_r2_x5+8($state), $T0 | |
| + vpmuludq $T0, $A3, $T1 | |
| + vpaddq $T1, $R0, $R0 | |
| + vpmuludq $T0, $A4, $T1 | |
| + vpaddq $T1, $R1, $R1 | |
| + vmovq $_r2_+8($state), $T0 | |
| + vpmuludq $T0, $A0, $T1 | |
| + vpaddq $T1, $R2, $R2 | |
| + vpmuludq $T0, $A1, $T1 | |
| + vpaddq $T1, $R3, $R3 | |
| + vpmuludq $T0, $A2, $T1 | |
| + vpaddq $T1, $R4, $R4 | |
| + | |
| + vmovq $_r3_x5+8($state), $T0 | |
| + vpmuludq $T0, $A2, $T1 | |
| + vpaddq $T1, $R0, $R0 | |
| + vpmuludq $T0, $A3, $T1 | |
| + vpaddq $T1, $R1, $R1 | |
| + vpmuludq $T0, $A4, $T1 | |
| + vpaddq $T1, $R2, $R2 | |
| + vmovq $_r3_+8($state), $T0 | |
| + vpmuludq $T0, $A0, $T1 | |
| + vpaddq $T1, $R3, $R3 | |
| + vpmuludq $T0, $A1, $T1 | |
| + vpaddq $T1, $R4, $R4 | |
| + | |
| + vmovq $_r4_x5+8($state), $T0 | |
| + vpmuludq $T0, $A1, $T1 | |
| + vpaddq $T1, $R0, $R0 | |
| + vpmuludq $T0, $A2, $T1 | |
| + vpaddq $T1, $R1, $R1 | |
| + vpmuludq $T0, $A3, $T1 | |
| + vpaddq $T1, $R2, $R2 | |
| + vpmuludq $T0, $A4, $T1 | |
| + vpaddq $T1, $R3, $R3 | |
| + vmovq $_r4_+8($state), $T0 | |
| + vpmuludq $T0, $A0, $T1 | |
| + vpaddq $T1, $R4, $R4 | |
| + # Reduce | |
| + vpsrlq \$26, $R3, $T0 | |
| + vpaddq $T0, $R4, $R4 | |
| + vpand $AND_MASK, $R3, $R3 | |
| + vpsrlq \$26, $R4, $T0 | |
| + vpsllq \$2, $T0, $T1 | |
| + vpaddq $T1, $T0, $T0 | |
| + vpaddq $T0, $R0, $R0 | |
| + vpand $AND_MASK, $R4, $R4 | |
| + vpsrlq \$26, $R0, $T0 | |
| + vpand $AND_MASK, $R0, $A0 | |
| + vpaddq $T0, $R1, $R1 | |
| + vpsrlq \$26, $R1, $T0 | |
| + vpand $AND_MASK, $R1, $A1 | |
| + vpaddq $T0, $R2, $R2 | |
| + vpsrlq \$26, $R2, $T0 | |
| + vpand $AND_MASK, $R2, $A2 | |
| + vpaddq $T0, $R3, $R3 | |
| + vpsrlq \$26, $R3, $T0 | |
| + vpand $AND_MASK, $R3, $A3 | |
| + vpaddq $T0, $R4, $A4 | |
| + | |
| +1: | |
| + test $in_len, $in_len | |
| + jz 1f | |
| + | |
| + vmovdqa .Lone(%rip), $R0 | |
| +3: | |
| + dec $in_len | |
| + vpslldq \$1, $R0, $R0 | |
| + vpinsrb \$0, ($in, $in_len), $R0, $R0 | |
| + test $in_len, $in_len | |
| + jnz 3b | |
| + | |
| + vpsrldq \$8, $R0, $R1 | |
| + vpsrlq \$26, $R0, $R2 | |
| + vpand $AND_MASK, $R0, $R0 | |
| + vpaddq $R0, $A0, $A0 | |
| + | |
| + vpsrlq \$26, $R2, $R0 | |
| + vpand $AND_MASK, $R2, $R2 | |
| + vpaddq $R2, $A1, $A1 | |
| + | |
| + vpsllq \$12, $R1, $R2 | |
| + vpxor $R2, $R0, $R0 | |
| + vpand $AND_MASK, $R0, $R0 | |
| + vpaddq $R0, $A2, $A2 | |
| + | |
| + vpsrlq \$26, $R2, $R0 | |
| + vpsrlq \$40, $R1, $R2 | |
| + vpand $AND_MASK, $R0, $R0 | |
| + vpaddq $R0, $A3, $A3 | |
| + vpaddq $R2, $A4, $A4 | |
| + xor $in_len, $in_len | |
| + jmp 2b | |
| +1: | |
| + | |
| + vmovd $A0, $_A0_($state) | |
| + vmovd $A1, $_A1_($state) | |
| + vmovd $A2, $_A2_($state) | |
| + vmovd $A3, $_A3_($state) | |
| + vmovd $A4, $_A4_($state) | |
| + | |
| + mov $in, %rax | |
| + vzeroupper | |
| + ret | |
| +.size poly1305_update_avx,.-poly1305_update_avx | |
| +############################################################################### | |
| +# void poly1305_finish_avx(void* $state, uint64_t mac[2]); | |
| +.type poly1305_finish_avx,\@function, 2 | |
| +.globl poly1305_finish_avx | |
| +poly1305_finish_avx: | |
| +___ | |
| +my $mac="%rsi"; | |
| +$code.=<<___; | |
| + mov $_A0_($state), %r8d | |
| + mov $_A1_($state), %eax | |
| + mov $_A2_($state), %r9d | |
| + mov $_A3_($state), %ecx | |
| + mov $_A4_($state), %r10d | |
| + | |
| + shl \$26, %rax | |
| + add %rax, %r8 | |
| + | |
| + mov %r9, %rax | |
| + shl \$52, %rax | |
| + shr \$12, %r9 | |
| + add %rax, %r8 | |
| + adc \$0, %r9 | |
| + | |
| + mov %r10, %rax | |
| + shl \$14, %rcx | |
| + shl \$40, %rax | |
| + shr \$24, %r10 | |
| + | |
| + add %rcx, %r9 | |
| + add %rax, %r9 | |
| + adc \$0, %r10 | |
| + | |
| + mov %r10, %rax | |
| + shr \$2, %rax | |
| + and \$3, %r10 | |
| + | |
| + mov %rax, %rcx | |
| + shl \$2, %rax | |
| + add %rcx, %rax | |
| + | |
| + add %rax, %r8 | |
| + adc \$0, %r9 | |
| + adc \$0, %r10 | |
| + | |
| + mov %r8, %rax | |
| + mov %r9, %rcx | |
| + sub \$-5, %rax | |
| + sbb \$-1, %rcx | |
| + sbb \$3, %r10 | |
| + | |
| + cmovc %r8, %rax | |
| + cmovc %r9, %rcx | |
| + add $_k_($state), %rax | |
| + adc $_k_+8($state), %rcx | |
| + mov %rax, ($mac) | |
| + mov %rcx, 8($mac) | |
| + ret | |
| +.size poly1305_finish_avx,.-poly1305_finish_avx | |
| +___ | |
| +} | |
| +}} | |
| + | |
| +$code =~ s/\`([^\`]*)\`/eval($1)/gem; | |
| +print $code; | |
| +close STDOUT; | |
| + | |
| diff --git a/crypto/chacha20poly1305/asm/poly1305_avx2.pl b/crypto/chacha20poly1305/asm/poly1305_avx2.pl | |
| new file mode 100644 | |
| index 0000000..d2dd51f | |
| --- /dev/null | |
| +++ b/crypto/chacha20poly1305/asm/poly1305_avx2.pl | |
| @@ -0,0 +1,984 @@ | |
| +############################################################################## | |
| +# # | |
| +# Copyright 2014 Intel Corporation # | |
| +# # | |
| +# Licensed under the Apache License, Version 2.0 (the "License"); # | |
| +# you may not use this file except in compliance with the License. # | |
| +# You may obtain a copy of the License at # | |
| +# # | |
| +# http://www.apache.org/licenses/LICENSE-2.0 # | |
| +# # | |
| +# Unless required by applicable law or agreed to in writing, software # | |
| +# distributed under the License is distributed on an "AS IS" BASIS, # | |
| +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # | |
| +# See the License for the specific language governing permissions and # | |
| +# limitations under the License. # | |
| +# # | |
| +############################################################################## | |
| +# # | |
| +# Developers and authors: # | |
| +# Shay Gueron (1, 2), and Vlad Krasnov (1) # | |
| +# (1) Intel Corporation, Israel Development Center # | |
| +# (2) University of Haifa # | |
| +# # | |
| +############################################################################## | |
| + | |
| +$flavour = shift; | |
| +$output = shift; | |
| +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } | |
| + | |
| +$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); | |
| + | |
| +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; | |
| +( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or | |
| +( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or | |
| +die "can't locate x86_64-xlate.pl"; | |
| + | |
| +open OUT,"| \"$^X\" $xlate $flavour $output"; | |
| +*STDOUT=*OUT; | |
| + | |
| +if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` | |
| + =~ /GNU assembler version ([2-9]\.[0-9]+)/) { | |
| + $avx = ($1>=2.19) + ($1>=2.22); | |
| +} | |
| + | |
| +if ($win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && | |
| + `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) { | |
| + $avx = ($1>=2.09) + ($1>=2.10); | |
| +} | |
| + | |
| +if ($win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && | |
| + `ml64 2>&1` =~ /Version ([0-9]+)\./) { | |
| + $avx = ($1>=10) + ($1>=11); | |
| +} | |
| + | |
| +if (`$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9])\.([0-9]+)/) { | |
| + my $ver = $2 + $3/100.0; # 3.1->3.01, 3.10->3.10 | |
| + $avx = ($ver>=3.0) + ($ver>=3.01); | |
| +} | |
| + | |
| +if ($avx>=1) {{ | |
| + | |
| +my ($_r0_, $_r1_, $_r2_, $_r3_, $_r4_, | |
| + $_r1_x5, $_r2_x5, $_r3_x5, $_r4_x5, | |
| + $_k_, | |
| + $_A0_, $_A1_, $_A2_, $_A3_, $_A4_) | |
| + = (64, 96, 128, 160, 192, | |
| + 224, 256, 288, 320, | |
| + 40, | |
| + 352, 356, 360, 364, 368); | |
| + | |
| +$code.=<<___; | |
| +.text | |
| +.align 32 | |
| +.LandMask: | |
| +.quad 0x3FFFFFF, 0x3FFFFFF, 0x3FFFFFF, 0x3FFFFFF | |
| +.LsetBit: | |
| +.quad 0x1000000, 0x1000000, 0x1000000, 0x1000000 | |
| +.LrSet: | |
| +.quad 0xFFFFFFC0FFFFFFF, 0xFFFFFFC0FFFFFFF, 0xFFFFFFC0FFFFFFF, 0xFFFFFFC0FFFFFFF | |
| +.quad 0xFFFFFFC0FFFFFFC, 0xFFFFFFC0FFFFFFC, 0xFFFFFFC0FFFFFFC, 0xFFFFFFC0FFFFFFC | |
| + | |
| +.LpermFix: | |
| +.long 6,7,6,7,6,7,6,7 | |
| +.long 4,5,6,7,6,7,6,7 | |
| +.long 2,3,6,7,4,5,6,7 | |
| +.long 0,1,4,5,2,3,6,7 | |
| +___ | |
| + | |
| + | |
| +{ | |
| +my ($A0, $A1, $A2, $A3, $A4, | |
| + $r0, $r1, $r2, $r3, $r4, | |
| + $T0, $T1, $A5, $A6, $A7, $A8)=map("%xmm$_",(0..15)); | |
| + | |
| +my ($A0_y, $A1_y, $A2_y, $A3_y, $A4_y, | |
| + $r0_y, $r1_y, $r2_y, $r3_y, $r4_y)=map("%ymm$_",(0..9)); | |
| + | |
| +my ($state, $key) | |
| + =("%rdi", "%rsi"); | |
| + | |
| +$code.=<<___; | |
| +################################################################################ | |
| +# void poly1305_init_avx2(void *state) | |
| + | |
| +.type poly1305_init_avx2, \@function, 2 | |
| +.align 64 | |
| +poly1305_init_avx2: | |
| + vzeroupper | |
| + | |
| + # Init the MAC value | |
| + mov 8*0($state), %r8 | |
| + mov 8*1($state), %r9 | |
| + mov 8*2($state), %r10 | |
| + | |
| + mov %r8, %rax | |
| + mov %r9, %rcx | |
| + | |
| + sub \$-5, %r8 | |
| + sbb \$-1, %r9 | |
| + sbb \$3, %r10 | |
| + | |
| + cmovc %rax, %r8 | |
| + cmovc %rcx, %r9 | |
| + cmovc 8*2($state), %r10 | |
| + | |
| + mov %r8, %rax | |
| + and \$0x3ffffff, %rax | |
| + mov %eax, $_A0_($state) | |
| + | |
| + shrd \$26, %r9, %r8 | |
| + shrd \$26, %r10, %r9 | |
| + | |
| + mov %r8, %rax | |
| + and \$0x3ffffff, %rax | |
| + mov %eax, $_A1_($state) | |
| + | |
| + shrd \$26, %r9, %r8 | |
| + shr \$26, %r9 | |
| + | |
| + mov %r8, %rax | |
| + and \$0x3ffffff, %rax | |
| + mov %eax, $_A2_($state) | |
| + | |
| + shrd \$26, %r9, %r8 | |
| + | |
| + mov %r8, %rax | |
| + and \$0x3ffffff, %rax | |
| + mov %eax, $_A3_($state) | |
| + | |
| + shr \$26, %r8 | |
| + | |
| + mov %r8, %rax | |
| + and \$0x3ffffff, %rax | |
| + mov %eax, $_A4_($state) | |
| + | |
| + # load and convert r | |
| + vmovq 8*3($state), $r0 | |
| + vmovq 8*4($state), $T0 | |
| + vpand .LrSet(%rip), $r0, $r0 | |
| + vpand .LrSet+32(%rip), $T0, $T0 | |
| + | |
| + vpsrlq \$26, $r0, $r1 | |
| + vpand .LandMask(%rip), $r0, $r0 | |
| + vpsrlq \$26, $r1, $r2 | |
| + vpand .LandMask(%rip), $r1, $r1 | |
| + vpsllq \$12, $T0, $T1 | |
| + vpxor $T1, $r2, $r2 | |
| + vpsrlq \$26, $r2, $r3 | |
| + vpsrlq \$40, $T0, $r4 | |
| + vpand .LandMask(%rip), $r2, $r2 | |
| + vpand .LandMask(%rip), $r3, $r3 | |
| + # SQR R | |
| + vpmuludq $r0, $r0, $A0 | |
| + vpmuludq $r1, $r0, $A1 | |
| + vpmuludq $r2, $r0, $A2 | |
| + vpmuludq $r3, $r0, $A3 | |
| + vpmuludq $r4, $r0, $A4 | |
| + | |
| + vpsllq \$1, $A1, $A1 | |
| + vpsllq \$1, $A2, $A2 | |
| + vpmuludq $r1, $r1, $T0 | |
| + vpaddq $T0, $A2, $A2 | |
| + vpmuludq $r2, $r1, $T0 | |
| + vpaddq $T0, $A3, $A3 | |
| + vpmuludq $r3, $r1, $T0 | |
| + vpaddq $T0, $A4, $A4 | |
| + vpmuludq $r4, $r1, $A5 | |
| + | |
| + vpsllq \$1, $A3, $A3 | |
| + vpsllq \$1, $A4, $A4 | |
| + vpmuludq $r2, $r2, $T0 | |
| + vpaddq $T0, $A4, $A4 | |
| + vpmuludq $r3, $r2, $T0 | |
| + vpaddq $T0, $A5, $A5 | |
| + vpmuludq $r4, $r2, $A6 | |
| + | |
| + vpsllq \$1, $A5, $A5 | |
| + vpsllq \$1, $A6, $A6 | |
| + vpmuludq $r3, $r3, $T0 | |
| + vpaddq $T0, $A6, $A6 | |
| + vpmuludq $r4, $r3, $A7 | |
| + | |
| + vpsllq \$1, $A7, $A7 | |
| + vpmuludq $r4, $r4, $A8 | |
| + | |
| + # Reduce | |
| + vpsrlq \$26, $A4, $T0 | |
| + vpand .LandMask(%rip), $A4, $A4 | |
| + vpaddq $T0, $A5, $A5 | |
| + | |
| + vpsllq \$2, $A5, $T0 | |
| + vpaddq $T0, $A5, $A5 | |
| + vpsllq \$2, $A6, $T0 | |
| + vpaddq $T0, $A6, $A6 | |
| + vpsllq \$2, $A7, $T0 | |
| + vpaddq $T0, $A7, $A7 | |
| + vpsllq \$2, $A8, $T0 | |
| + vpaddq $T0, $A8, $A8 | |
| + | |
| + vpaddq $A5, $A0, $A0 | |
| + vpaddq $A6, $A1, $A1 | |
| + vpaddq $A7, $A2, $A2 | |
| + vpaddq $A8, $A3, $A3 | |
| + | |
| + vpsrlq \$26, $A0, $T0 | |
| + vpand .LandMask(%rip), $A0, $A0 | |
| + vpaddq $T0, $A1, $A1 | |
| + vpsrlq \$26, $A1, $T0 | |
| + vpand .LandMask(%rip), $A1, $A1 | |
| + vpaddq $T0, $A2, $A2 | |
| + vpsrlq \$26, $A2, $T0 | |
| + vpand .LandMask(%rip), $A2, $A2 | |
| + vpaddq $T0, $A3, $A3 | |
| + vpsrlq \$26, $A3, $T0 | |
| + vpand .LandMask(%rip), $A3, $A3 | |
| + vpaddq $T0, $A4, $A4 | |
| + | |
| + vpunpcklqdq $r0, $A0, $r0 | |
| + vpunpcklqdq $r1, $A1, $r1 | |
| + vpunpcklqdq $r2, $A2, $r2 | |
| + vpunpcklqdq $r3, $A3, $r3 | |
| + vpunpcklqdq $r4, $A4, $r4 | |
| + | |
| + vmovdqu $r0, $_r0_+16($state) | |
| + vmovdqu $r1, $_r1_+16($state) | |
| + vmovdqu $r2, $_r2_+16($state) | |
| + vmovdqu $r3, $_r3_+16($state) | |
| + vmovdqu $r4, $_r4_+16($state) | |
| + | |
| + vpsllq \$2, $r1, $A1 | |
| + vpsllq \$2, $r2, $A2 | |
| + vpsllq \$2, $r3, $A3 | |
| + vpsllq \$2, $r4, $A4 | |
| + | |
| + vpaddq $A1, $r1, $A1 | |
| + vpaddq $A2, $r2, $A2 | |
| + vpaddq $A3, $r3, $A3 | |
| + vpaddq $A4, $r4, $A4 | |
| + | |
| + vmovdqu $A1, $_r1_x5+16($state) | |
| + vmovdqu $A2, $_r2_x5+16($state) | |
| + vmovdqu $A3, $_r3_x5+16($state) | |
| + vmovdqu $A4, $_r4_x5+16($state) | |
| + | |
| + # Compute r^3 and r^4 | |
| + vpshufd \$0x44, $r0, $A0 | |
| + vpshufd \$0x44, $r1, $A1 | |
| + vpshufd \$0x44, $r2, $A2 | |
| + vpshufd \$0x44, $r3, $A3 | |
| + vpshufd \$0x44, $r4, $A4 | |
| + | |
| + # Multiply input by R[0] | |
| + vmovdqu $_r0_+16($state), $T0 | |
| + vpmuludq $T0, $A0, $r0 | |
| + vpmuludq $T0, $A1, $r1 | |
| + vpmuludq $T0, $A2, $r2 | |
| + vpmuludq $T0, $A3, $r3 | |
| + vpmuludq $T0, $A4, $r4 | |
| + # Multiply input by R[1] (and R[1]*5) | |
| + vmovdqu $_r1_x5+16($state), $T0 | |
| + vpmuludq $T0, $A4, $T1 | |
| + vpaddq $T1, $r0, $r0 | |
| + vmovdqu $_r1_+16($state), $T0 | |
| + vpmuludq $T0, $A0, $T1 | |
| + vpaddq $T1, $r1, $r1 | |
| + vpmuludq $T0, $A1, $T1 | |
| + vpaddq $T1, $r2, $r2 | |
| + vpmuludq $T0, $A2, $T1 | |
| + vpaddq $T1, $r3, $r3 | |
| + vpmuludq $T0, $A3, $T1 | |
| + vpaddq $T1, $r4, $r4 | |
| + # Etc | |
| + vmovdqu $_r2_x5+16($state), $T0 | |
| + vpmuludq $T0, $A3, $T1 | |
| + vpaddq $T1, $r0, $r0 | |
| + vpmuludq $T0, $A4, $T1 | |
| + vpaddq $T1, $r1, $r1 | |
| + vmovdqu $_r2_+16($state), $T0 | |
| + vpmuludq $T0, $A0, $T1 | |
| + vpaddq $T1, $r2, $r2 | |
| + vpmuludq $T0, $A1, $T1 | |
| + vpaddq $T1, $r3, $r3 | |
| + vpmuludq $T0, $A2, $T1 | |
| + vpaddq $T1, $r4, $r4 | |
| + | |
| + vmovdqu $_r3_x5+16($state), $T0 | |
| + vpmuludq $T0, $A2, $T1 | |
| + vpaddq $T1, $r0, $r0 | |
| + vpmuludq $T0, $A3, $T1 | |
| + vpaddq $T1, $r1, $r1 | |
| + vpmuludq $T0, $A4, $T1 | |
| + vpaddq $T1, $r2, $r2 | |
| + vmovdqu $_r3_+16($state), $T0 | |
| + vpmuludq $T0, $A0, $T1 | |
| + vpaddq $T1, $r3, $r3 | |
| + vpmuludq $T0, $A1, $T1 | |
| + vpaddq $T1, $r4, $r4 | |
| + | |
| + vmovdqu $_r4_x5+16($state), $T0 | |
| + vpmuludq $T0, $A1, $T1 | |
| + vpaddq $T1, $r0, $r0 | |
| + vpmuludq $T0, $A2, $T1 | |
| + vpaddq $T1, $r1, $r1 | |
| + vpmuludq $T0, $A3, $T1 | |
| + vpaddq $T1, $r2, $r2 | |
| + vpmuludq $T0, $A4, $T1 | |
| + vpaddq $T1, $r3, $r3 | |
| + vmovdqu $_r4_+16($state), $T0 | |
| + vpmuludq $T0, $A0, $T1 | |
| + vpaddq $T1, $r4, $r4 | |
| + # Reduce | |
| + vpsrlq \$26, $r3, $T0 | |
| + vpaddq $T0, $r4, $r4 | |
| + vpand .LandMask(%rip), $r3, $r3 | |
| + vpsrlq \$26, $r4, $T0 | |
| + vpsllq \$2, $T0, $T1 | |
| + vpaddq $T1, $T0, $T0 | |
| + vpaddq $T0, $r0, $r0 | |
| + vpand .LandMask(%rip), $r4, $r4 | |
| + vpsrlq \$26, $r0, $T0 | |
| + vpand .LandMask(%rip), $r0, $r0 | |
| + vpaddq $T0, $r1, $r1 | |
| + vpsrlq \$26, $r1, $T0 | |
| + vpand .LandMask(%rip), $r1, $r1 | |
| + vpaddq $T0, $r2, $r2 | |
| + vpsrlq \$26, $r2, $T0 | |
| + vpand .LandMask(%rip), $r2, $r2 | |
| + vpaddq $T0, $r3, $r3 | |
| + vpsrlq \$26, $r3, $T0 | |
| + vpand .LandMask(%rip), $r3, $r3 | |
| + vpaddq $T0, $r4, $r4 | |
| + | |
| + vmovdqu $r0, $_r0_($state) | |
| + vmovdqu $r1, $_r1_($state) | |
| + vmovdqu $r2, $_r2_($state) | |
| + vmovdqu $r3, $_r3_($state) | |
| + vmovdqu $r4, $_r4_($state) | |
| + | |
| + vpsllq \$2, $r1, $A1 | |
| + vpsllq \$2, $r2, $A2 | |
| + vpsllq \$2, $r3, $A3 | |
| + vpsllq \$2, $r4, $A4 | |
| + | |
| + vpaddq $A1, $r1, $A1 | |
| + vpaddq $A2, $r2, $A2 | |
| + vpaddq $A3, $r3, $A3 | |
| + vpaddq $A4, $r4, $A4 | |
| + | |
| + vmovdqu $A1, $_r1_x5($state) | |
| + vmovdqu $A2, $_r2_x5($state) | |
| + vmovdqu $A3, $_r3_x5($state) | |
| + vmovdqu $A4, $_r4_x5($state) | |
| + | |
| + movq \$1, 8*7($state) | |
| + | |
| + ret | |
| +.size poly1305_init_avx2,.-poly1305_init_avx2 | |
| +___ | |
| +} | |
| + | |
| +{ | |
| + | |
| +my ($A0, $A1, $A2, $A3, $A4, | |
| + $T0, $T1, $R0, $R1, $R2, | |
| + $R3, $R4, $AND_MASK, $PERM_MASK, $SET_MASK) | |
| + =map("%ymm$_",(0..14)); | |
| + | |
| +my ($A0_x, $A1_x, $A2_x, $A3_x, $A4_x, | |
| + $T0_x, $T1_x, $R0_x, $R1_x, $R2_x, | |
| + $R3_x, $R4_x, $AND_MASK_x, $PERM_MASK_x, $SET_MASK_x) | |
| + =map("%xmm$_",(0..14)); | |
| + | |
| +my ($state, $in, $in_len, $hlp, $rsp_save) | |
| + =("%rdi", "%rsi", "%rdx", "%rcx", "%rax"); | |
| + | |
| +$code.=<<___; | |
| + | |
| +############################################################################### | |
| +# void poly1305_update_avx2(void* $state, void* in, uint64_t in_len) | |
| +.globl poly1305_update_avx2 | |
| +.type poly1305_update_avx2, \@function, 2 | |
| +.align 64 | |
| +poly1305_update_avx2: | |
| + | |
| + | |
| + test $in_len, $in_len | |
| + jz 6f | |
| + | |
| + cmpq \$0, 8*7($state) | |
| + jne 1f # This means we already started avx2, and must finish | |
| + | |
| + # If we are here we need to check in_len is longer than the min for avx2 | |
| + | |
| + cmp \$512, $in_len | |
| + jge 2f | |
| + | |
| + jmp poly1305_update_x64 | |
| + | |
| +2: | |
| + call poly1305_init_avx2 | |
| + | |
| +1: | |
| + vmovd $_A0_($state), $A0_x | |
| + vmovd $_A1_($state), $A1_x | |
| + vmovd $_A2_($state), $A2_x | |
| + vmovd $_A3_($state), $A3_x | |
| + vmovd $_A4_($state), $A4_x | |
| + | |
| + vmovdqa .LandMask(%rip), $AND_MASK | |
| +1: | |
| + cmp \$32*4, $in_len | |
| + jb 1f | |
| + sub \$32*2, $in_len | |
| + | |
| + # load the next four blocks | |
| + vmovdqu 32*0($in), $R2 | |
| + vmovdqu 32*1($in), $R3 | |
| + add \$32*2, $in | |
| + | |
| + vpunpcklqdq $R3, $R2, $R0 | |
| + vpunpckhqdq $R3, $R2, $R1 | |
| + | |
| + vpermq \$0xD8, $R0, $R0 # it is possible to rearrange the precomputations, and save this shuffle | |
| + vpermq \$0xD8, $R1, $R1 | |
| + | |
| + vpsrlq \$26, $R0, $R2 | |
| + vpand $AND_MASK, $R0, $R0 | |
| + vpaddq $R0, $A0, $A0 | |
| + | |
| + vpsrlq \$26, $R2, $R0 | |
| + vpand $AND_MASK, $R2, $R2 | |
| + vpaddq $R2, $A1, $A1 | |
| + | |
| + vpsllq \$12, $R1, $R2 | |
| + vpxor $R2, $R0, $R0 | |
| + vpand $AND_MASK, $R0, $R0 | |
| + vpaddq $R0, $A2, $A2 | |
| + | |
| + vpsrlq \$26, $R2, $R0 | |
| + vpsrlq \$40, $R1, $R2 | |
| + vpand $AND_MASK, $R0, $R0 | |
| + vpxor .LsetBit(%rip), $R2, $R2 | |
| + vpaddq $R0, $A3, $A3 | |
| + vpaddq $R2, $A4, $A4 | |
| + | |
| + # Multiply input by R[0] | |
| + vpbroadcastq $_r0_($state), $T0 | |
| + vpmuludq $T0, $A0, $R0 | |
| + vpmuludq $T0, $A1, $R1 | |
| + vpmuludq $T0, $A2, $R2 | |
| + vpmuludq $T0, $A3, $R3 | |
| + vpmuludq $T0, $A4, $R4 | |
| + # Multiply input by R[1] (and R[1]*5) | |
| + vpbroadcastq $_r1_x5($state), $T0 | |
| + vpmuludq $T0, $A4, $T1 | |
| + vpaddq $T1, $R0, $R0 | |
| + vpbroadcastq $_r1_($state), $T0 | |
| + vpmuludq $T0, $A0, $T1 | |
| + vpaddq $T1, $R1, $R1 | |
| + vpmuludq $T0, $A1, $T1 | |
| + vpaddq $T1, $R2, $R2 | |
| + vpmuludq $T0, $A2, $T1 | |
| + vpaddq $T1, $R3, $R3 | |
| + vpmuludq $T0, $A3, $T1 | |
| + vpaddq $T1, $R4, $R4 | |
| + # Etc | |
| + vpbroadcastq $_r2_x5($state), $T0 | |
| + vpmuludq $T0, $A3, $T1 | |
| + vpaddq $T1, $R0, $R0 | |
| + vpmuludq $T0, $A4, $T1 | |
| + vpaddq $T1, $R1, $R1 | |
| + vpbroadcastq $_r2_($state), $T0 | |
| + vpmuludq $T0, $A0, $T1 | |
| + vpaddq $T1, $R2, $R2 | |
| + vpmuludq $T0, $A1, $T1 | |
| + vpaddq $T1, $R3, $R3 | |
| + vpmuludq $T0, $A2, $T1 | |
| + vpaddq $T1, $R4, $R4 | |
| + | |
| + vpbroadcastq $_r3_x5($state), $T0 | |
| + vpmuludq $T0, $A2, $T1 | |
| + vpaddq $T1, $R0, $R0 | |
| + vpmuludq $T0, $A3, $T1 | |
| + vpaddq $T1, $R1, $R1 | |
| + vpmuludq $T0, $A4, $T1 | |
| + vpaddq $T1, $R2, $R2 | |
| + vpbroadcastq $_r3_($state), $T0 | |
| + vpmuludq $T0, $A0, $T1 | |
| + vpaddq $T1, $R3, $R3 | |
| + vpmuludq $T0, $A1, $T1 | |
| + vpaddq $T1, $R4, $R4 | |
| + | |
| + vpbroadcastq $_r4_x5($state), $T0 | |
| + vpmuludq $T0, $A1, $T1 | |
| + vpaddq $T1, $R0, $R0 | |
| + vpmuludq $T0, $A2, $T1 | |
| + vpaddq $T1, $R1, $R1 | |
| + vpmuludq $T0, $A3, $T1 | |
| + vpaddq $T1, $R2, $R2 | |
| + vpmuludq $T0, $A4, $T1 | |
| + vpaddq $T1, $R3, $R3 | |
| + vpbroadcastq $_r4_($state), $T0 | |
| + vpmuludq $T0, $A0, $T1 | |
| + vpaddq $T1, $R4, $R4 | |
| + # Reduce | |
| + vpsrlq \$26, $R3, $T0 | |
| + vpaddq $T0, $R4, $R4 | |
| + vpand $AND_MASK, $R3, $R3 | |
| + | |
| + vpsrlq \$26, $R4, $T0 | |
| + vpsllq \$2, $T0, $T1 | |
| + vpaddq $T1, $T0, $T0 | |
| + vpaddq $T0, $R0, $R0 | |
| + vpand $AND_MASK, $R4, $R4 | |
| + | |
| + vpsrlq \$26, $R0, $T0 | |
| + vpand $AND_MASK, $R0, $A0 | |
| + vpaddq $T0, $R1, $R1 | |
| + vpsrlq \$26, $R1, $T0 | |
| + vpand $AND_MASK, $R1, $A1 | |
| + vpaddq $T0, $R2, $R2 | |
| + vpsrlq \$26, $R2, $T0 | |
| + vpand $AND_MASK, $R2, $A2 | |
| + vpaddq $T0, $R3, $R3 | |
| + vpsrlq \$26, $R3, $T0 | |
| + vpand $AND_MASK, $R3, $A3 | |
| + vpaddq $T0, $R4, $A4 | |
| + jmp 1b | |
| +1: | |
| + | |
| + cmp \$32*2, $in_len | |
| + jb 1f | |
| + sub \$32*2, $in_len | |
| + # load the next four blocks | |
| + vmovdqu 32*0($in), $R2 | |
| + vmovdqu 32*1($in), $R3 | |
| + add \$32*2, $in | |
| + | |
| + vpunpcklqdq $R3, $R2, $R0 | |
| + vpunpckhqdq $R3, $R2, $R1 | |
| + | |
| + vpermq \$0xD8, $R0, $R0 | |
| + vpermq \$0xD8, $R1, $R1 | |
| + | |
| + vpsrlq \$26, $R0, $R2 | |
| + vpand $AND_MASK, $R0, $R0 | |
| + vpaddq $R0, $A0, $A0 | |
| + | |
| + vpsrlq \$26, $R2, $R0 | |
| + vpand $AND_MASK, $R2, $R2 | |
| + vpaddq $R2, $A1, $A1 | |
| + | |
| + vpsllq \$12, $R1, $R2 | |
| + vpxor $R2, $R0, $R0 | |
| + vpand $AND_MASK, $R0, $R0 | |
| + vpaddq $R0, $A2, $A2 | |
| + | |
| + vpsrlq \$26, $R2, $R0 | |
| + vpsrlq \$40, $R1, $R2 | |
| + vpand $AND_MASK, $R0, $R0 | |
| + vpxor .LsetBit(%rip), $R2, $R2 | |
| + vpaddq $R0, $A3, $A3 | |
| + vpaddq $R2, $A4, $A4 | |
| + | |
| + # Multiply input by R[0] | |
| + vmovdqu $_r0_($state), $T0 | |
| + vpmuludq $T0, $A0, $R0 | |
| + vpmuludq $T0, $A1, $R1 | |
| + vpmuludq $T0, $A2, $R2 | |
| + vpmuludq $T0, $A3, $R3 | |
| + vpmuludq $T0, $A4, $R4 | |
| + # Multiply input by R[1] (and R[1]*5) | |
| + vmovdqu $_r1_x5($state), $T0 | |
| + vpmuludq $T0, $A4, $T1 | |
| + vpaddq $T1, $R0, $R0 | |
| + vmovdqu $_r1_($state), $T0 | |
| + vpmuludq $T0, $A0, $T1 | |
| + vpaddq $T1, $R1, $R1 | |
| + vpmuludq $T0, $A1, $T1 | |
| + vpaddq $T1, $R2, $R2 | |
| + vpmuludq $T0, $A2, $T1 | |
| + vpaddq $T1, $R3, $R3 | |
| + vpmuludq $T0, $A3, $T1 | |
| + vpaddq $T1, $R4, $R4 | |
| + # Etc | |
| + vmovdqu $_r2_x5($state), $T0 | |
| + vpmuludq $T0, $A3, $T1 | |
| + vpaddq $T1, $R0, $R0 | |
| + vpmuludq $T0, $A4, $T1 | |
| + vpaddq $T1, $R1, $R1 | |
| + vmovdqu $_r2_($state), $T0 | |
| + vpmuludq $T0, $A0, $T1 | |
| + vpaddq $T1, $R2, $R2 | |
| + vpmuludq $T0, $A1, $T1 | |
| + vpaddq $T1, $R3, $R3 | |
| + vpmuludq $T0, $A2, $T1 | |
| + vpaddq $T1, $R4, $R4 | |
| + | |
| + vmovdqu $_r3_x5($state), $T0 | |
| + vpmuludq $T0, $A2, $T1 | |
| + vpaddq $T1, $R0, $R0 | |
| + vpmuludq $T0, $A3, $T1 | |
| + vpaddq $T1, $R1, $R1 | |
| + vpmuludq $T0, $A4, $T1 | |
| + vpaddq $T1, $R2, $R2 | |
| + vmovdqu $_r3_($state), $T0 | |
| + vpmuludq $T0, $A0, $T1 | |
| + vpaddq $T1, $R3, $R3 | |
| + vpmuludq $T0, $A1, $T1 | |
| + vpaddq $T1, $R4, $R4 | |
| + | |
| + vmovdqu $_r4_x5($state), $T0 | |
| + vpmuludq $T0, $A1, $T1 | |
| + vpaddq $T1, $R0, $R0 | |
| + vpmuludq $T0, $A2, $T1 | |
| + vpaddq $T1, $R1, $R1 | |
| + vpmuludq $T0, $A3, $T1 | |
| + vpaddq $T1, $R2, $R2 | |
| + vpmuludq $T0, $A4, $T1 | |
| + vpaddq $T1, $R3, $R3 | |
| + vmovdqu $_r4_($state), $T0 | |
| + vpmuludq $T0, $A0, $T1 | |
| + vpaddq $T1, $R4, $R4 | |
| + # Reduce | |
| + vpsrlq \$26, $R3, $T0 | |
| + vpaddq $T0, $R4, $R4 | |
| + vpand $AND_MASK, $R3, $R3 | |
| + vpsrlq \$26, $R4, $T0 | |
| + vpsllq \$2, $T0, $T1 | |
| + vpaddq $T1, $T0, $T0 | |
| + vpaddq $T0, $R0, $R0 | |
| + vpand $AND_MASK, $R4, $R4 | |
| + vpsrlq \$26, $R0, $T0 | |
| + vpand $AND_MASK, $R0, $A0 | |
| + vpaddq $T0, $R1, $R1 | |
| + vpsrlq \$26, $R1, $T0 | |
| + vpand $AND_MASK, $R1, $A1 | |
| + vpaddq $T0, $R2, $R2 | |
| + vpsrlq \$26, $R2, $T0 | |
| + vpand $AND_MASK, $R2, $A2 | |
| + vpaddq $T0, $R3, $R3 | |
| + vpsrlq \$26, $R3, $T0 | |
| + vpand $AND_MASK, $R3, $A3 | |
| + vpaddq $T0, $R4, $A4 | |
| + | |
| + vpsrldq \$8, $A0, $R0 | |
| + vpsrldq \$8, $A1, $R1 | |
| + vpsrldq \$8, $A2, $R2 | |
| + vpsrldq \$8, $A3, $R3 | |
| + vpsrldq \$8, $A4, $R4 | |
| + | |
| + vpaddq $R0, $A0, $A0 | |
| + vpaddq $R1, $A1, $A1 | |
| + vpaddq $R2, $A2, $A2 | |
| + vpaddq $R3, $A3, $A3 | |
| + vpaddq $R4, $A4, $A4 | |
| + | |
| + vpermq \$0xAA, $A0, $R0 | |
| + vpermq \$0xAA, $A1, $R1 | |
| + vpermq \$0xAA, $A2, $R2 | |
| + vpermq \$0xAA, $A3, $R3 | |
| + vpermq \$0xAA, $A4, $R4 | |
| + | |
| + vpaddq $R0, $A0, $A0 | |
| + vpaddq $R1, $A1, $A1 | |
| + vpaddq $R2, $A2, $A2 | |
| + vpaddq $R3, $A3, $A3 | |
| + vpaddq $R4, $A4, $A4 | |
| +1: | |
| + test $in_len, $in_len | |
| + jz 5f | |
| + # In case 1,2 or 3 blocks remain, we want to multiply them correctly | |
| + vmovq $A0_x, $A0_x | |
| + vmovq $A1_x, $A1_x | |
| + vmovq $A2_x, $A2_x | |
| + vmovq $A3_x, $A3_x | |
| + vmovq $A4_x, $A4_x | |
| + | |
| + mov .LsetBit(%rip), $hlp | |
| + mov %rsp, $rsp_save | |
| + test \$15, $in_len | |
| + jz 1f | |
| + xor $hlp, $hlp | |
| + sub \$64, %rsp | |
| + vpxor $R0, $R0, $R0 | |
| + vmovdqu $R0, (%rsp) | |
| + vmovdqu $R0, 32(%rsp) | |
| +3: | |
| + movb ($in, $hlp), %r8b | |
| + movb %r8b, (%rsp, $hlp) | |
| + inc $hlp | |
| + cmp $hlp, $in_len | |
| + jne 3b | |
| + | |
| + movb \$1, (%rsp, $hlp) | |
| + xor $hlp, $hlp | |
| + mov %rsp, $in | |
| + | |
| +1: | |
| + | |
| + cmp \$16, $in_len | |
| + ja 2f | |
| + vmovq 8*0($in), $R0_x | |
| + vmovq 8*1($in), $R1_x | |
| + vmovq $hlp, $SET_MASK_x | |
| + vmovdqa .LpermFix(%rip), $PERM_MASK | |
| + jmp 1f | |
| +2: | |
| + cmp \$32, $in_len | |
| + ja 2f | |
| + vmovdqu 16*0($in), $R2_x | |
| + vmovdqu 16*1($in), $R3_x | |
| + vmovq .LsetBit(%rip), $SET_MASK_x | |
| + vpinsrq \$1, $hlp, $SET_MASK_x, $SET_MASK_x | |
| + vmovdqa .LpermFix+32(%rip), $PERM_MASK | |
| + | |
| + vpunpcklqdq $R3, $R2, $R0 | |
| + vpunpckhqdq $R3, $R2, $R1 | |
| + jmp 1f | |
| +2: | |
| + cmp \$48, $in_len | |
| + ja 2f | |
| + vmovdqu 32*0($in), $R2 | |
| + vmovdqu 32*1($in), $R3_x | |
| + vmovq .LsetBit(%rip), $SET_MASK_x | |
| + vpinsrq \$1, $hlp, $SET_MASK_x, $SET_MASK_x | |
| + vpermq \$0xc4, $SET_MASK, $SET_MASK | |
| + vmovdqa .LpermFix+64(%rip), $PERM_MASK | |
| + | |
| + vpunpcklqdq $R3, $R2, $R0 | |
| + vpunpckhqdq $R3, $R2, $R1 | |
| + jmp 1f | |
| +2: | |
| + vmovdqu 32*0($in), $R2 | |
| + vmovdqu 32*1($in), $R3 | |
| + vmovq .LsetBit(%rip), $SET_MASK_x | |
| + vpinsrq \$1, $hlp, $SET_MASK_x, $SET_MASK_x | |
| + vpermq \$0x40, $SET_MASK, $SET_MASK | |
| + vmovdqa .LpermFix+96(%rip), $PERM_MASK | |
| + | |
| + vpunpcklqdq $R3, $R2, $R0 | |
| + vpunpckhqdq $R3, $R2, $R1 | |
| + | |
| +1: | |
| + mov $rsp_save, %rsp | |
| + | |
| + vpsrlq \$26, $R0, $R2 | |
| + vpand $AND_MASK, $R0, $R0 | |
| + vpaddq $R0, $A0, $A0 | |
| + | |
| + vpsrlq \$26, $R2, $R0 | |
| + vpand $AND_MASK, $R2, $R2 | |
| + vpaddq $R2, $A1, $A1 | |
| + | |
| + vpsllq \$12, $R1, $R2 | |
| + vpxor $R2, $R0, $R0 | |
| + vpand $AND_MASK, $R0, $R0 | |
| + vpaddq $R0, $A2, $A2 | |
| + | |
| + vpsrlq \$26, $R2, $R0 | |
| + vpsrlq \$40, $R1, $R2 | |
| + vpand $AND_MASK, $R0, $R0 | |
| + vpxor $SET_MASK, $R2, $R2 | |
| + vpaddq $R0, $A3, $A3 | |
| + vpaddq $R2, $A4, $A4 | |
| + | |
| + # Multiply input by R[0] | |
| + vmovdqu $_r0_($state), $T0 | |
| + vpermd $T0, $PERM_MASK, $T0 | |
| + vpmuludq $T0, $A0, $R0 | |
| + vpmuludq $T0, $A1, $R1 | |
| + vpmuludq $T0, $A2, $R2 | |
| + vpmuludq $T0, $A3, $R3 | |
| + vpmuludq $T0, $A4, $R4 | |
| + # Multiply input by R[1] (and R[1]*5) | |
| + vmovdqu $_r1_x5($state), $T0 | |
| + vpermd $T0, $PERM_MASK, $T0 | |
| + vpmuludq $T0, $A4, $T1 | |
| + vpaddq $T1, $R0, $R0 | |
| + vmovdqu $_r1_($state), $T0 | |
| + vpermd $T0, $PERM_MASK, $T0 | |
| + vpmuludq $T0, $A0, $T1 | |
| + vpaddq $T1, $R1, $R1 | |
| + vpmuludq $T0, $A1, $T1 | |
| + vpaddq $T1, $R2, $R2 | |
| + vpmuludq $T0, $A2, $T1 | |
| + vpaddq $T1, $R3, $R3 | |
| + vpmuludq $T0, $A3, $T1 | |
| + vpaddq $T1, $R4, $R4 | |
| + # Etc | |
| + vmovdqu $_r2_x5($state), $T0 | |
| + vpermd $T0, $PERM_MASK, $T0 | |
| + vpmuludq $T0, $A3, $T1 | |
| + vpaddq $T1, $R0, $R0 | |
| + vpmuludq $T0, $A4, $T1 | |
| + vpaddq $T1, $R1, $R1 | |
| + vmovdqu $_r2_($state), $T0 | |
| + vpermd $T0, $PERM_MASK, $T0 | |
| + vpmuludq $T0, $A0, $T1 | |
| + vpaddq $T1, $R2, $R2 | |
| + vpmuludq $T0, $A1, $T1 | |
| + vpaddq $T1, $R3, $R3 | |
| + vpmuludq $T0, $A2, $T1 | |
| + vpaddq $T1, $R4, $R4 | |
| + | |
| + vmovdqu $_r3_x5($state), $T0 | |
| + vpermd $T0, $PERM_MASK, $T0 | |
| + vpmuludq $T0, $A2, $T1 | |
| + vpaddq $T1, $R0, $R0 | |
| + vpmuludq $T0, $A3, $T1 | |
| + vpaddq $T1, $R1, $R1 | |
| + vpmuludq $T0, $A4, $T1 | |
| + vpaddq $T1, $R2, $R2 | |
| + vmovdqu $_r3_($state), $T0 | |
| + vpermd $T0, $PERM_MASK, $T0 | |
| + vpmuludq $T0, $A0, $T1 | |
| + vpaddq $T1, $R3, $R3 | |
| + vpmuludq $T0, $A1, $T1 | |
| + vpaddq $T1, $R4, $R4 | |
| + | |
| + vmovdqu $_r4_x5($state), $T0 | |
| + vpermd $T0, $PERM_MASK, $T0 | |
| + vpmuludq $T0, $A1, $T1 | |
| + vpaddq $T1, $R0, $R0 | |
| + vpmuludq $T0, $A2, $T1 | |
| + vpaddq $T1, $R1, $R1 | |
| + vpmuludq $T0, $A3, $T1 | |
| + vpaddq $T1, $R2, $R2 | |
| + vpmuludq $T0, $A4, $T1 | |
| + vpaddq $T1, $R3, $R3 | |
| + vmovdqu $_r4_($state), $T0 | |
| + vpermd $T0, $PERM_MASK, $T0 | |
| + vpmuludq $T0, $A0, $T1 | |
| + vpaddq $T1, $R4, $R4 | |
| + # Reduce | |
| + vpsrlq \$26, $R3, $T0 | |
| + vpaddq $T0, $R4, $R4 | |
| + vpand $AND_MASK, $R3, $R3 | |
| + vpsrlq \$26, $R4, $T0 | |
| + vpsllq \$2, $T0, $T1 | |
| + vpaddq $T1, $T0, $T0 | |
| + vpaddq $T0, $R0, $R0 | |
| + vpand $AND_MASK, $R4, $R4 | |
| + vpsrlq \$26, $R0, $T0 | |
| + vpand $AND_MASK, $R0, $A0 | |
| + vpaddq $T0, $R1, $R1 | |
| + vpsrlq \$26, $R1, $T0 | |
| + vpand $AND_MASK, $R1, $A1 | |
| + vpaddq $T0, $R2, $R2 | |
| + vpsrlq \$26, $R2, $T0 | |
| + vpand $AND_MASK, $R2, $A2 | |
| + vpaddq $T0, $R3, $R3 | |
| + vpsrlq \$26, $R3, $T0 | |
| + vpand $AND_MASK, $R3, $A3 | |
| + vpaddq $T0, $R4, $A4 | |
| + | |
| + vpsrldq \$8, $A0, $R0 | |
| + vpsrldq \$8, $A1, $R1 | |
| + vpsrldq \$8, $A2, $R2 | |
| + vpsrldq \$8, $A3, $R3 | |
| + vpsrldq \$8, $A4, $R4 | |
| + | |
| + vpaddq $R0, $A0, $A0 | |
| + vpaddq $R1, $A1, $A1 | |
| + vpaddq $R2, $A2, $A2 | |
| + vpaddq $R3, $A3, $A3 | |
| + vpaddq $R4, $A4, $A4 | |
| + | |
| + vpermq \$0xAA, $A0, $R0 | |
| + vpermq \$0xAA, $A1, $R1 | |
| + vpermq \$0xAA, $A2, $R2 | |
| + vpermq \$0xAA, $A3, $R3 | |
| + vpermq \$0xAA, $A4, $R4 | |
| + | |
| + vpaddq $R0, $A0, $A0 | |
| + vpaddq $R1, $A1, $A1 | |
| + vpaddq $R2, $A2, $A2 | |
| + vpaddq $R3, $A3, $A3 | |
| + vpaddq $R4, $A4, $A4 | |
| + | |
| +5: | |
| + vmovd $A0_x, $_A0_($state) | |
| + vmovd $A1_x, $_A1_($state) | |
| + vmovd $A2_x, $_A2_($state) | |
| + vmovd $A3_x, $_A3_($state) | |
| + vmovd $A4_x, $_A4_($state) | |
| +6: | |
| + ret | |
| +.size poly1305_update_avx2,.-poly1305_update_avx2 | |
| +############################################################################### | |
| +# void poly1305_finish_avx2(void* $state, uint8_t mac[16]); | |
| +.type poly1305_finish_avx2,\@function,2 | |
| +.globl poly1305_finish_avx2 | |
| +poly1305_finish_avx2: | |
| +___ | |
| +my $mac="%rsi"; | |
| +my ($A0, $A1, $A2, $A3, $A4, $T0, $T1) | |
| + =map("%xmm$_",(0..6)); | |
| + | |
| +$code.=<<___; | |
| + cmpq \$0, 8*7($state) | |
| + jne 1f | |
| + jmp poly1305_finish_x64 | |
| + | |
| +1: | |
| + mov $_A0_($state), %r8d | |
| + mov $_A1_($state), %eax | |
| + mov $_A2_($state), %r9d | |
| + mov $_A3_($state), %ecx | |
| + mov $_A4_($state), %r10d | |
| + | |
| + shl \$26, %rax | |
| + add %rax, %r8 | |
| + | |
| + mov %r9, %rax | |
| + shl \$52, %rax | |
| + shr \$12, %r9 | |
| + add %rax, %r8 | |
| + adc \$0, %r9 | |
| + | |
| + mov %r10, %rax | |
| + shl \$14, %rcx | |
| + shl \$40, %rax | |
| + shr \$24, %r10 | |
| + | |
| + add %rcx, %r9 | |
| + add %rax, %r9 | |
| + adc \$0, %r10 | |
| + | |
| + mov %r10, %rax | |
| + shr \$2, %rax | |
| + and \$3, %r10 | |
| + | |
| + mov %rax, %rcx | |
| + shl \$2, %rax | |
| + add %rcx, %rax | |
| + | |
| + add %rax, %r8 | |
| + adc \$0, %r9 | |
| + adc \$0, %r10 | |
| + | |
| + mov %r8, %rax | |
| + mov %r9, %rcx | |
| + sub \$-5, %rax | |
| + sbb \$-1, %rcx | |
| + sbb \$3, %r10 | |
| + | |
| + cmovc %r8, %rax | |
| + cmovc %r9, %rcx | |
| + add $_k_($state), %rax | |
| + adc $_k_+8($state), %rcx | |
| + mov %rax, ($mac) | |
| + mov %rcx, 8($mac) | |
| + | |
| + ret | |
| +.size poly1305_finish_avx2,.-poly1305_finish_avx2 | |
| +___ | |
| +} | |
| +}} | |
| + | |
| +$code =~ s/\`([^\`]*)\`/eval(\$1)/gem; | |
| +print $code; | |
| +close STDOUT; | |
| + | |
| diff --git a/crypto/chacha20poly1305/asm/poly1305_x64.pl b/crypto/chacha20poly1305/asm/poly1305_x64.pl | |
| new file mode 100644 | |
| index 0000000..31c4c47 | |
| --- /dev/null | |
| +++ b/crypto/chacha20poly1305/asm/poly1305_x64.pl | |
| @@ -0,0 +1,281 @@ | |
| + | |
| +############################################################################## | |
| +# # | |
| +# Copyright 2016 CloudFlare LTD # | |
| +# # | |
| +# Licensed under the Apache License, Version 2.0 (the "License"); # | |
| +# you may not use this file except in compliance with the License. # | |
| +# You may obtain a copy of the License at # | |
| +# # | |
| +# http://www.apache.org/licenses/LICENSE-2.0 # | |
| +# # | |
| +# Unless required by applicable law or agreed to in writing, software # | |
| +# distributed under the License is distributed on an "AS IS" BASIS, # | |
| +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # | |
| +# See the License for the specific language governing permissions and # | |
| +# limitations under the License. # | |
| +# # | |
| +############################################################################## | |
| +# # | |
| +# Author: Vlad Krasnov # | |
| +# # | |
| +############################################################################## | |
| + | |
| +$flavour = shift; | |
| +$output = shift; | |
| +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } | |
| + | |
| +$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); | |
| + | |
| +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; | |
| +( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or | |
| +( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or | |
| +die "can't locate x86_64-xlate.pl"; | |
| + | |
| +open OUT,"| \"$^X\" $xlate $flavour $output"; | |
| +*STDOUT=*OUT; | |
| + | |
| +if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` | |
| + =~ /GNU assembler version ([2-9]\.[0-9]+)/) { | |
| + $avx = ($1>=2.19) + ($1>=2.22); | |
| +} | |
| + | |
| +if ($win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && | |
| + `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) { | |
| + $avx = ($1>=2.09) + ($1>=2.10); | |
| +} | |
| + | |
| +if ($win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && | |
| + `ml64 2>&1` =~ /Version ([0-9]+)\./) { | |
| + $avx = ($1>=10) + ($1>=11); | |
| +} | |
| + | |
| +if (`$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9])\.([0-9]+)/) { | |
| + my $ver = $2 + $3/100.0; # 3.1->3.01, 3.10->3.10 | |
| + $avx = ($ver>=3.0) + ($ver>=3.01); | |
| +} | |
| + | |
| + | |
| +{ | |
| +{ | |
| + | |
| +my ($state, $key) | |
| + =("%rdi", "%rsi"); | |
| + | |
| +$code.=<<___; | |
| + | |
| +.LrSet: | |
| +.align 16 | |
| +.quad 0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC | |
| +############################################################################### | |
| +# void poly1305_init_x64(void *state, uint8_t key[32]) | |
| + | |
| +.globl poly1305_init_x64 | |
| +.type poly1305_init_x64, \@function, 2 | |
| +.align 64 | |
| +poly1305_init_x64: | |
| + | |
| + xor %rax, %rax | |
| + mov %rax, 8*0($state) | |
| + mov %rax, 8*1($state) | |
| + mov %rax, 8*2($state) | |
| + | |
| + movdqu 16*0($key), %xmm0 | |
| + movdqu 16*1($key), %xmm1 | |
| + pand .LrSet(%rip), %xmm0 | |
| + | |
| + movdqu %xmm0, 8*3($state) | |
| + movdqu %xmm1, 8*3+16($state) | |
| + movq \$0, 8*7($state) | |
| + | |
| + ret | |
| +.size poly1305_init_x64,.-poly1305_init_x64 | |
| +___ | |
| +} | |
| + | |
| +{ | |
| + | |
| +my ($state, $inp) | |
| + =("%rdi", "%rsi"); | |
| + | |
| +my ($acc0, $acc1, $acc2, $inl, $t0, $t1, $t2, $t3, $r0) | |
| + =("%rcx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15"); | |
| + | |
| +my ($r1) | |
| + =("8*4($state)"); | |
| + | |
| +$code.=<<___; | |
| +############################################################################### | |
| +# void* poly1305_update_x64(void* state, void* in, uint64_t in_len) | |
| +.globl poly1305_update_x64 | |
| +.type poly1305_update_x64, \@function, 2 | |
| +.align 64 | |
| +poly1305_update_x64: | |
| + | |
| + push %r11 | |
| + push %r12 | |
| + push %r13 | |
| + push %r14 | |
| + push %r15 | |
| + | |
| + mov %rdx, $inl | |
| + | |
| + mov 8*0($state), $acc0 | |
| + mov 8*1($state), $acc1 | |
| + mov 8*2($state), $acc2 | |
| + mov 8*3($state), $r0 | |
| + | |
| + cmp \$16, $inl | |
| + jb 2f | |
| + jmp 1f | |
| + | |
| +.align 64 | |
| +1: | |
| +############################ | |
| + add 8*0($inp), $acc0 | |
| + adc 8*1($inp), $acc1 | |
| + lea 16($inp), $inp | |
| + adc \$1, $acc2 | |
| + | |
| +5: | |
| + mov $r0, %rax | |
| + mulq $acc0 | |
| + mov %rax, $t0 | |
| + mov %rdx, $t1 | |
| + | |
| + mov $r0, %rax | |
| + mulq $acc1 | |
| + add %rax, $t1 | |
| + adc \$0, %rdx | |
| + | |
| + mov $r0, $t2 | |
| + imul $acc2, $t2 | |
| + add %rdx, $t2 | |
| +############################ | |
| + mov $r1, %rax | |
| + mulq $acc0 | |
| + add %rax, $t1 | |
| + adc \$0, %rdx | |
| + mov %rdx, $acc0 | |
| + | |
| + mov $r1, %rax | |
| + mulq $acc1 | |
| + add $acc0, $t2 | |
| + adc \$0, %rdx | |
| + add %rax, $t2 | |
| + adc \$0, %rdx | |
| + | |
| + mov $r1, $t3 | |
| + imul $acc2, $t3 | |
| + add %rdx, $t3 | |
| +############################ | |
| + | |
| + mov $t0, $acc0 | |
| + mov $t1, $acc1 | |
| + mov $t2, $acc2 | |
| + and \$3, $acc2 | |
| + | |
| + mov $t2, $t0 | |
| + mov $t3, $t1 | |
| + | |
| + and \$-4, $t0 | |
| + shrd \$2, $t3, $t2 | |
| + shr \$2, $t3 | |
| + | |
| + add $t0, $acc0 | |
| + adc $t1, $acc1 | |
| + adc \$0, $acc2 | |
| + | |
| + add $t2, $acc0 | |
| + adc $t3, $acc1 | |
| + adc \$0, $acc2 | |
| + | |
| + sub \$16, $inl | |
| + cmp \$16, $inl | |
| + jae 1b | |
| + | |
| +2: | |
| + test $inl, $inl | |
| + jz 3f | |
| + | |
| + mov \$1, $t0 | |
| + xor $t1, $t1 | |
| + xor $t2, $t2 | |
| + add $inl, $inp | |
| + | |
| +4: | |
| + shld \$8, $t0, $t1 | |
| + shl \$8, $t0 | |
| + movzxb -1($inp), $t2 | |
| + xor $t2, $t0 | |
| + dec $inp | |
| + dec $inl | |
| + jnz 4b | |
| + | |
| + add $t0, $acc0 | |
| + adc $t1, $acc1 | |
| + adc \$0, $acc2 | |
| + | |
| + mov \$16, $inl | |
| + jmp 5b | |
| + | |
| +3: | |
| + | |
| + mov $acc0, 8*0($state) | |
| + mov $acc1, 8*1($state) | |
| + mov $acc2, 8*2($state) | |
| + | |
| + pop %r15 | |
| + pop %r14 | |
| + pop %r13 | |
| + pop %r12 | |
| + pop %r11 | |
| + ret | |
| +.size poly1305_update_x64, .-poly1305_update_x64 | |
| +___ | |
| +} | |
| + | |
| +{ | |
| + | |
| +my ($mac, $state)=("%rsi", "%rdi"); | |
| + | |
| +my ($acc0, $acc1, $acc2, $t0, $t1, $t2) | |
| + =("%rcx", "%rax", "%rdx", "%r8", "%r9", "%r10"); | |
| + | |
| +$code.=<<___; | |
| +############################################################################### | |
| +# void poly1305_finish_x64(void* state, uint64_t mac[2]); | |
| +.type poly1305_finish_x64,\@function, 2 | |
| +.align 64 | |
| +.globl poly1305_finish_x64 | |
| +poly1305_finish_x64: | |
| + | |
| + mov 8*0($state), $acc0 | |
| + mov 8*1($state), $acc1 | |
| + mov 8*2($state), $acc2 | |
| + | |
| + mov $acc0, $t0 | |
| + mov $acc1, $t1 | |
| + mov $acc2, $t2 | |
| + | |
| + sub \$-5, $acc0 | |
| + sbb \$-1, $acc1 | |
| + sbb \$3, $acc2 | |
| + | |
| + cmovc $t0, $acc0 | |
| + cmovc $t1, $acc1 | |
| + cmovc $t2, $acc2 | |
| + | |
| + add 8*5($state), $acc0 | |
| + adc 8*6($state), $acc1 | |
| + mov $acc0, ($mac) | |
| + mov $acc1, 8($mac) | |
| + | |
| + ret | |
| +.size poly1305_finish_x64, .-poly1305_finish_x64 | |
| +___ | |
| +} | |
| +} | |
| +$code =~ s/\`([^\`]*)\`/eval($1)/gem; | |
| +print $code; | |
| +close STDOUT; | |
| diff --git a/crypto/chacha20poly1305/chacha20.c b/crypto/chacha20poly1305/chacha20.c | |
| new file mode 100644 | |
| index 0000000..3044751 | |
| --- /dev/null | |
| +++ b/crypto/chacha20poly1305/chacha20.c | |
| @@ -0,0 +1,162 @@ | |
| +/* Copyright (c) 2014, Google Inc. | |
| + * | |
| + * Permission to use, copy, modify, and/or distribute this software for any | |
| + * purpose with or without fee is hereby granted, provided that the above | |
| + * copyright notice and this permission notice appear in all copies. | |
| + * | |
| + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
| + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
| + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY | |
| + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
| + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION | |
| + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN | |
| + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ | |
| + | |
| +/* Adapted from the public domain, estream code by D. Bernstein. */ | |
| + | |
| +#include "chacha20poly1305.h" | |
| + | |
| +/* sigma contains the ChaCha constants, which happen to be an ASCII string. */ | |
| +static const char sigma[16] = "expand 32-byte k"; | |
| + | |
| +#define ROTATE(v, n) (((v) << (n)) | ((v) >> (32 - (n)))) | |
| +#define XOR(v, w) ((v) ^ (w)) | |
| +#define PLUS(x, y) ((x) + (y)) | |
| +#define PLUSONE(v) (PLUS((v), 1)) | |
| + | |
| +#define U32TO8_LITTLE(p, v) \ | |
| + { \ | |
| + (p)[0] = (v >> 0) & 0xff; \ | |
| + (p)[1] = (v >> 8) & 0xff; \ | |
| + (p)[2] = (v >> 16) & 0xff; \ | |
| + (p)[3] = (v >> 24) & 0xff; \ | |
| + } | |
| + | |
| +#define U8TO32_LITTLE(p) \ | |
| + (((uint32_t)((p)[0])) | ((uint32_t)((p)[1]) << 8) | \ | |
| + ((uint32_t)((p)[2]) << 16) | ((uint32_t)((p)[3]) << 24)) | |
| + | |
| +/* QUARTERROUND updates a, b, c, d with a ChaCha "quarter" round. */ | |
| +#define QUARTERROUND(a,b,c,d) \ | |
| + x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]),16); \ | |
| + x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]),12); \ | |
| + x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]), 8); \ | |
| + x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]), 7); | |
| + | |
| +/* chacha_core performs |num_rounds| rounds of ChaCha20 on the input words in | |
| + * |input| and writes the 64 output bytes to |output|. */ | |
| +static void chacha_core(uint8_t output[64], const uint32_t input[16]) { | |
| + uint32_t x[16]; | |
| + int i; | |
| + | |
| + memcpy(x, input, sizeof(uint32_t) * 16); | |
| + for (i = 20; i > 0; i -= 2) { | |
| + QUARTERROUND(0, 4, 8, 12) | |
| + QUARTERROUND(1, 5, 9, 13) | |
| + QUARTERROUND(2, 6, 10, 14) | |
| + QUARTERROUND(3, 7, 11, 15) | |
| + QUARTERROUND(0, 5, 10, 15) | |
| + QUARTERROUND(1, 6, 11, 12) | |
| + QUARTERROUND(2, 7, 8, 13) | |
| + QUARTERROUND(3, 4, 9, 14) | |
| + } | |
| + | |
| + for (i = 0; i < 16; ++i) { | |
| + x[i] = PLUS(x[i], input[i]); | |
| + } | |
| + for (i = 0; i < 16; ++i) { | |
| + U32TO8_LITTLE(output + 4 * i, x[i]); | |
| + } | |
| +} | |
| + | |
| +void CRYPTO_chacha_20(uint8_t *out, const uint8_t *in, size_t in_len, | |
| + uint8_t nonce[48]) { | |
| + | |
| +#ifdef CHAPOLY_x86_64_ASM | |
| + const int AVX2_threshold = 256; | |
| + const int AVX2_min_buf = 128; | |
| + const int AVX_min_buf = 64; | |
| + | |
| + uint8_t buf[128]; | |
| + size_t buf_size; | |
| + | |
| + void (*core_func)(uint8_t *out, const uint8_t *in, size_t in_len, | |
| + uint8_t nonce[48]) = NULL; | |
| +#else /* !CHAPOLY_x86_64_ASM */ | |
| + | |
| + uint8_t buf[64]; | |
| + | |
| +#endif | |
| + | |
| + uint32_t input[16]; | |
| + size_t todo, i; | |
| + | |
| +#ifdef CHAPOLY_x86_64_ASM | |
| + if (in_len >= AVX2_threshold && ((OPENSSL_ia32cap_loc()[1] >> 5) & 1)) { | |
| + buf_size = AVX2_min_buf; | |
| + core_func = chacha_20_core_avx2; | |
| + } else if ((OPENSSL_ia32cap_loc()[0] >> 60) & 1) { | |
| + buf_size = AVX_min_buf; | |
| + core_func = chacha_20_core_avx; | |
| + } else goto do_legacy; | |
| + | |
| + core_func(out, in, in_len, nonce); | |
| + todo = in_len & (buf_size - 1); | |
| + | |
| + if(todo) { | |
| + out += in_len - todo; | |
| + in += in_len - todo; | |
| + memcpy(buf, in, todo); | |
| + | |
| + core_func(buf, buf, buf_size, nonce); | |
| + | |
| + memcpy(out, buf, todo); | |
| + memset(buf, 0, buf_size); | |
| + } | |
| + return; | |
| + | |
| +do_legacy: | |
| +#endif /* CHAPOLY_x86_64_ASM */ | |
| + | |
| + input[0] = U8TO32_LITTLE(sigma + 0); | |
| + input[1] = U8TO32_LITTLE(sigma + 4); | |
| + input[2] = U8TO32_LITTLE(sigma + 8); | |
| + input[3] = U8TO32_LITTLE(sigma + 12); | |
| + | |
| + input[4] = U8TO32_LITTLE(nonce + 0); | |
| + input[5] = U8TO32_LITTLE(nonce + 4); | |
| + input[6] = U8TO32_LITTLE(nonce + 8); | |
| + input[7] = U8TO32_LITTLE(nonce + 12); | |
| + | |
| + input[8] = U8TO32_LITTLE(nonce + 16); | |
| + input[9] = U8TO32_LITTLE(nonce + 20); | |
| + input[10] = U8TO32_LITTLE(nonce + 24); | |
| + input[11] = U8TO32_LITTLE(nonce + 28); | |
| + | |
| + input[12] = U8TO32_LITTLE(nonce + 32); | |
| + input[13] = U8TO32_LITTLE(nonce + 36); | |
| + input[14] = U8TO32_LITTLE(nonce + 40); | |
| + input[15] = U8TO32_LITTLE(nonce + 44); | |
| + | |
| + while (in_len > 0) { | |
| + todo = 64; | |
| + if (in_len < todo) { | |
| + todo = in_len; | |
| + } | |
| + | |
| + chacha_core(buf, input); | |
| + for (i = 0; i < todo; i++) { | |
| + out[i] = in[i] ^ buf[i]; | |
| + } | |
| + | |
| + out += todo; | |
| + in += todo; | |
| + in_len -= todo; | |
| + | |
| + ((uint64_t*)input)[6]++; | |
| + } | |
| + | |
| + U32TO8_LITTLE(nonce + 32, input[12]); | |
| + U32TO8_LITTLE(nonce + 36, input[13]); | |
| +} | |
| + | |
| diff --git a/crypto/chacha20poly1305/chacha20poly1305.h b/crypto/chacha20poly1305/chacha20poly1305.h | |
| new file mode 100644 | |
| index 0000000..09b0450 | |
| --- /dev/null | |
| +++ b/crypto/chacha20poly1305/chacha20poly1305.h | |
| @@ -0,0 +1,79 @@ | |
| +/* Copyright (c) 2014, Google Inc. | |
| + * | |
| + * Permission to use, copy, modify, and/or distribute this software for any | |
| + * purpose with or without fee is hereby granted, provided that the above | |
| + * copyright notice and this permission notice appear in all copies. | |
| + * | |
| + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
| + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
| + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY | |
| + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
| + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION | |
| + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN | |
| + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ | |
| + | |
| +#ifndef OPENSSL_HEADER_POLY1305_H | |
| +#define OPENSSL_HEADER_POLY1305_H | |
| + | |
| +#include <stdint.h> | |
| +#include <stddef.h> | |
| +#include <string.h> | |
| +#include "crypto.h" | |
| + | |
| +#ifdef __cplusplus | |
| +extern "C" { | |
| +#endif | |
| + | |
| +#define POLY1305_MAC_LEN (16) | |
| +#define POLY1305_PAD_LEN (16) | |
| + | |
| +typedef unsigned char poly1305_state[372]; | |
| + | |
| + | |
| +/* CRYPTO_poly1305_init sets up |state| so that it can be used to calculate an | |
| + * authentication tag with the one-time key |key|. Note that |key| is a | |
| + * one-time key and therefore there is no `reset' method because that would | |
| + * enable several messages to be authenticated with the same key. */ | |
| +void CRYPTO_poly1305_init(poly1305_state* state, const uint8_t key[32]); | |
| + | |
| +/* CRYPTO_poly1305_update processes |in_len| bytes from |in|. It can be called | |
| + * zero or more times after poly1305_init. */ | |
| +void CRYPTO_poly1305_update(poly1305_state* state, const uint8_t* in, | |
| + size_t in_len); | |
| + | |
| +/* CRYPTO_poly1305_finish completes the poly1305 calculation and writes a 16 | |
| + * byte authentication tag to |mac|. */ | |
| +void CRYPTO_poly1305_finish(poly1305_state* state, | |
| + uint8_t mac[POLY1305_MAC_LEN]); | |
| + | |
| +/* CRYPTO_chacha_20 encrypts |in_len| bytes from |in| with the given key and | |
| + * nonce and writes the result to |out|, which may be equal to |in|. The | |
| + * initial block counter is specified by |counter|. */ | |
| +void CRYPTO_chacha_20(uint8_t *out, const uint8_t *in, size_t in_len, | |
| + uint8_t nonce[48]); | |
| + | |
| +#ifdef CHAPOLY_x86_64_ASM | |
| +void poly1305_init_x64(poly1305_state* state, const uint8_t key[32]); | |
| +void poly1305_update_x64(poly1305_state* state, const uint8_t *in, size_t in_len); | |
| +void poly1305_finish_x64(poly1305_state* state, uint8_t mac[16]); | |
| + | |
| +void poly1305_init_avx(poly1305_state* state, const uint8_t key[32]); | |
| +void poly1305_update_avx(poly1305_state* state, const uint8_t *in, size_t in_len); | |
| +void poly1305_finish_avx(poly1305_state* state, uint8_t mac[16]); | |
| + | |
| +void poly1305_update_avx2(poly1305_state* state, const uint8_t *in, size_t in_len); | |
| +void poly1305_finish_avx2(poly1305_state* state, uint8_t mac[16]); | |
| + | |
| +void chacha_20_core_avx(uint8_t *out, const uint8_t *in, size_t in_len, | |
| + uint8_t nonce[48]); | |
| + | |
| +void chacha_20_core_avx2(uint8_t *out, const uint8_t *in, size_t in_len, | |
| + uint8_t nonce[48]); | |
| +#endif | |
| + | |
| + | |
| +#if defined(__cplusplus) | |
| +} /* extern C */ | |
| +#endif | |
| + | |
| +#endif /* OPENSSL_HEADER_POLY1305_H */ | |
| diff --git a/crypto/chacha20poly1305/chapolytest.c b/crypto/chacha20poly1305/chapolytest.c | |
| new file mode 100644 | |
| index 0000000..7e2933f | |
| --- /dev/null | |
| +++ b/crypto/chacha20poly1305/chapolytest.c | |
| @@ -0,0 +1,470 @@ | |
| +/* ==================================================================== | |
| + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. | |
| + * | |
| + * Redistribution and use in source and binary forms, with or without | |
| + * modification, are permitted provided that the following conditions | |
| + * are met: | |
| + * | |
| + * 1. Redistributions of source code must retain the above copyright | |
| + * notice, this list of conditions and the following disclaimer. | |
| + * | |
| + * 2. Redistributions in binary form must reproduce the above copyright | |
| + * notice, this list of conditions and the following disclaimer in | |
| + * the documentation and/or other materials provided with the | |
| + * distribution. | |
| + * | |
| + * 3. All advertising materials mentioning features or use of this | |
| + * software must display the following acknowledgment: | |
| + * "This product includes software developed by the OpenSSL Project | |
| + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" | |
| + * | |
| + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
| + * endorse or promote products derived from this software without | |
| + * prior written permission. For written permission, please contact | |
| + * licensing@OpenSSL.org. | |
| + * | |
| + * 5. Products derived from this software may not be called "OpenSSL" | |
| + * nor may "OpenSSL" appear in their names without prior written | |
| + * permission of the OpenSSL Project. | |
| + * | |
| + * 6. Redistributions of any form whatsoever must retain the following | |
| + * acknowledgment: | |
| + * "This product includes software developed by the OpenSSL Project | |
| + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" | |
| + * | |
| + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
| + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
| + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
| + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
| + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
| + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
| + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
| + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
| + * OF THE POSSIBILITY OF SUCH DAMAGE. | |
| + * ==================================================================== | |
| + */ | |
| + | |
| + | |
| +#include <stdio.h> | |
| +#include <stdlib.h> | |
| +#include <string.h> | |
| +#include <stdint.h> | |
| + | |
| +#include <openssl/chacha20poly1305.h> | |
| + | |
| +struct chacha_test { | |
| + const char *noncehex; | |
| + const char *outhex; | |
| +}; | |
| + | |
| +struct poly1305_test { | |
| + const char *inputhex; | |
| + const char *keyhex; | |
| + const char *outhex; | |
| +}; | |
| + | |
| +static const struct chacha_test chacha_tests[] = { | |
| + { | |
| + "00000000000000000000000000000000""00000000000000000000000000000000" | |
| + "00000000000000000000000000000000", | |
| + "76b8e0ada0f13d90405d6ae55386bd28""bdd219b8a08ded1aa836efcc8b770dc7" | |
| + "da41597c5157488d7724e03fb8d84a37""6a43b8f41518a11cc387b669b2ee6586", | |
| + }, | |
| + { | |
| + "00000000000000000000000000000000""00000000000000000000000000000001" | |
| + "00000000000000000000000000000000", | |
| + "4540f05a9f1fb296d7736e7b208e3c96""eb4fe1834688d2604f450952ed432d41" | |
| + "bbe2a0b6ea7566d2a5d1e7e20d42af2c""53d792b1c43fea817e9ad275ae546963", | |
| + }, | |
| + { | |
| + "00000000000000000000000000000000""00000000000000000000000000000000" | |
| + "00000000000000000000000000000001", | |
| + "de9cba7bf3d69ef5e786dc63973f653a""0b49e015adbff7134fcb7df137821031" | |
| + "e85a050278a7084527214f73efc7fa5b""5277062eb7a0433e445f41e31afab757", | |
| + }, | |
| + { | |
| + "00000000000000000000000000000000""00000000000000000000000000000000" | |
| + "00000000000000000100000000000000", | |
| + "ef3fdfd6c61578fbf5cf35bd3dd33b80""09631634d21e42ac33960bd138e50d32" | |
| + "111e4caf237ee53ca8ad6426194a8854""5ddc497a0b466e7d6bbdb0041b2f586b", | |
| + }, | |
| + { | |
| + "000102030405060708090a0b0c0d0e0f""101112131415161718191a1b1c1d1e1f" | |
| + "00000000000000000001020304050607", | |
| + "f798a189f195e66982105ffb640bb775""7f579da31602fc93ec01ac56f85ac3c1" | |
| + "34a4547b733b46413042c94400491769""05d3be59ea1c53f15916155c2be8241a" | |
| + "38008b9a26bc35941e2444177c8ade66""89de95264986d95889fb60e84629c9bd" | |
| + "9a5acb1cc118be563eb9b3a4a472f82e""09a7e778492b562ef7130e88dfe031c7" | |
| + "9db9d4f7c7a899151b9a475032b63fc3""85245fe054e3dd5a97a5f576fe064025" | |
| + "d3ce042c566ab2c507b138db853e3d69""59660996546cc9c4a6eafdc777c040d7" | |
| + "0eaf46f76dad3979e5c5360c3317166a""1c894c94a371876a94df7628fe4eaaf2" | |
| + "ccb27d5aaae0ad7ad0f9d4b6ad3b5409""8746d4524d38407a6deb", | |
| + }, | |
| +}; | |
| + | |
| +static const struct poly1305_test poly1305_tests[] = { | |
| + { | |
| + "", | |
| + "c8afaac331ee372cd6082de134943b17""4710130e9f6fea8d72293850a667d86c", | |
| + "4710130e9f6fea8d72293850a667d86c", | |
| + }, | |
| + { | |
| + "48656c6c6f20776f726c6421", | |
| + "746869732069732033322d6279746520""6b657920666f7220506f6c7931333035", | |
| + "a6f745008f81c916a20dcc74eef2b2f0", | |
| + }, | |
| + { | |
| + "00000000000000000000000000000000""00000000000000000000000000000000", | |
| + "746869732069732033322d6279746520""6b657920666f7220506f6c7931333035", | |
| + "49ec78090e481ec6c26b33b91ccc0307", | |
| + }, | |
| + { | |
| + "43727970746f6772617068696320466f""72756d2052657365617263682047726f" | |
| + "7570", | |
| + "85d6be7857556d337f4452fe42d506a8""0103808afb0db2fd4abff6af4149f51b", | |
| + "a8061dc1305136c6c22b8baf0c0127a9" | |
| + }, | |
| + { | |
| + "f3f6", | |
| + "851fc40c3467ac0be05cc20404f3f700""580b3b0f9447bb1e69d095b5928b6dbc", | |
| + "f4c633c3044fc145f84f335cb81953de" | |
| + }, | |
| + { | |
| + "", | |
| + "a0f3080000f46400d0c7e9076c834403""dd3fab2251f11ac759f0887129cc2ee7", | |
| + "dd3fab2251f11ac759f0887129cc2ee7" | |
| + }, | |
| + { | |
| + "663cea190ffb83d89593f3f476b6bc24""d7e679107ea26adb8caf6652d0656136", | |
| + "48443d0bb0d21109c89a100b5ce2c208""83149c69b561dd88298a1798b10716ef", | |
| + "0ee1c16bb73f0f4fd19881753c01cdbe" | |
| + }, | |
| + { | |
| + "ab0812724a7f1e342742cbed374d94d1""36c6b8795d45b3819830f2c04491faf0" | |
| + "990c62e48b8018b2c3e4a0fa3134cb67""fa83e158c994d961c4cb21095c1bf9", | |
| + "12976a08c4426d0ce8a82407c4f48207""80f8c20aa71202d1e29179cbcb555a57", | |
| + "5154ad0d2cb26e01274fc51148491f1b" | |
| + }, | |
| + /* | |
| + * self-generated | |
| + */ | |
| + { | |
| + "ab0812724a7f1e342742cbed374d94d1""36c6b8795d45b3819830f2c04491faf0" | |
| + "990c62e48b8018b2c3e4a0fa3134cb67""fa83e158c994d961c4cb21095c1bf9af", | |
| + "12976a08c4426d0ce8a82407c4f48207""80f8c20aa71202d1e29179cbcb555a57", | |
| + "812059a5da198637cac7c4a631bee466" | |
| + }, | |
| + { | |
| + "ab0812724a7f1e342742cbed374d94d1""36c6b8795d45b3819830f2c04491faf0" | |
| + "990c62e48b8018b2c3e4a0fa3134cb67", | |
| + "12976a08c4426d0ce8a82407c4f48207""80f8c20aa71202d1e29179cbcb555a57", | |
| + "5b88d7f6228b11e2e28579a5c0c1f761" | |
| + }, | |
| + { | |
| + "ab0812724a7f1e342742cbed374d94d136c6b8795d45b3819830f2c04491faf0" | |
| + "990c62e48b8018b2c3e4a0fa3134cb67fa83e158c994d961c4cb21095c1bf9af" | |
| + "663cea190ffb83d89593f3f476b6bc24d7e679107ea26adb8caf6652d0656136", | |
| + "12976a08c4426d0ce8a82407c4f48207""80f8c20aa71202d1e29179cbcb555a57", | |
| + "bbb613b2b6d753ba07395b916aaece15" | |
| + }, | |
| + { | |
| + "ab0812724a7f1e342742cbed374d94d136c6b8795d45b3819830f2c04491faf0" | |
| + "990c62e48b8018b2c3e4a0fa3134cb67fa83e158c994d961c4cb21095c1bf9af" | |
| + "48443d0bb0d21109c89a100b5ce2c20883149c69b561dd88298a1798b10716ef" | |
| + "663cea190ffb83d89593f3f476b6bc24", | |
| + "12976a08c4426d0ce8a82407c4f48207""80f8c20aa71202d1e29179cbcb555a57", | |
| + "c794d7057d1778c4bbee0a39b3d97342" | |
| + }, | |
| + { | |
| + "ab0812724a7f1e342742cbed374d94d136c6b8795d45b3819830f2c04491faf0" | |
| + "990c62e48b8018b2c3e4a0fa3134cb67fa83e158c994d961c4cb21095c1bf9af" | |
| + "48443d0bb0d21109c89a100b5ce2c20883149c69b561dd88298a1798b10716ef" | |
| + "663cea190ffb83d89593f3f476b6bc24d7e679107ea26adb8caf6652d0656136", | |
| + "12976a08c4426d0ce8a82407c4f48207""80f8c20aa71202d1e29179cbcb555a57", | |
| + "ffbcb9b371423152d7fca5ad042fbaa9" | |
| + }, | |
| + { | |
| + "ab0812724a7f1e342742cbed374d94d136c6b8795d45b3819830f2c04491faf0" | |
| + "990c62e48b8018b2c3e4a0fa3134cb67fa83e158c994d961c4cb21095c1bf9af" | |
| + "48443d0bb0d21109c89a100b5ce2c20883149c69b561dd88298a1798b10716ef" | |
| + "663cea190ffb83d89593f3f476b6bc24d7e679107ea26adb8caf6652d0656136" | |
| + "812059a5da198637cac7c4a631bee466", | |
| + "12976a08c4426d0ce8a82407c4f48207""80f8c20aa71202d1e29179cbcb555a57", | |
| + "069ed6b8ef0f207b3e243bb1019fe632" | |
| + }, | |
| + { | |
| + "ab0812724a7f1e342742cbed374d94d136c6b8795d45b3819830f2c04491faf0" | |
| + "990c62e48b8018b2c3e4a0fa3134cb67fa83e158c994d961c4cb21095c1bf9af" | |
| + "48443d0bb0d21109c89a100b5ce2c20883149c69b561dd88298a1798b10716ef" | |
| + "663cea190ffb83d89593f3f476b6bc24d7e679107ea26adb8caf6652d0656136" | |
| + "812059a5da198637cac7c4a631bee4665b88d7f6228b11e2e28579a5c0c1f761", | |
| + "12976a08c4426d0ce8a82407c4f48207""80f8c20aa71202d1e29179cbcb555a57", | |
| + "cca339d9a45fa2368c2c68b3a4179133" | |
| + }, | |
| + { | |
| + "ab0812724a7f1e342742cbed374d94d136c6b8795d45b3819830f2c04491faf0" | |
| + "990c62e48b8018b2c3e4a0fa3134cb67fa83e158c994d961c4cb21095c1bf9af" | |
| + "48443d0bb0d21109c89a100b5ce2c20883149c69b561dd88298a1798b10716ef" | |
| + "663cea190ffb83d89593f3f476b6bc24d7e679107ea26adb8caf6652d0656136" | |
| + "812059a5da198637cac7c4a631bee4665b88d7f6228b11e2e28579a5c0c1f761" | |
| + "ab0812724a7f1e342742cbed374d94d136c6b8795d45b3819830f2c04491faf0" | |
| + "990c62e48b8018b2c3e4a0fa3134cb67fa83e158c994d961c4cb21095c1bf9af" | |
| + "48443d0bb0d21109c89a100b5ce2c20883149c69b561dd88298a1798b10716ef" | |
| + "663cea190ffb83d89593f3f476b6bc24d7e679107ea26adb8caf6652d0656136", | |
| + "12976a08c4426d0ce8a82407c4f48207""80f8c20aa71202d1e29179cbcb555a57", | |
| + "53f6e828a2f0fe0ee815bf0bd5841a34" | |
| + }, | |
| + { | |
| + "ab0812724a7f1e342742cbed374d94d136c6b8795d45b3819830f2c04491faf0" | |
| + "990c62e48b8018b2c3e4a0fa3134cb67fa83e158c994d961c4cb21095c1bf9af" | |
| + "48443d0bb0d21109c89a100b5ce2c20883149c69b561dd88298a1798b10716ef" | |
| + "663cea190ffb83d89593f3f476b6bc24d7e679107ea26adb8caf6652d0656136" | |
| + "812059a5da198637cac7c4a631bee4665b88d7f6228b11e2e28579a5c0c1f761" | |
| + "ab0812724a7f1e342742cbed374d94d136c6b8795d45b3819830f2c04491faf0" | |
| + "990c62e48b8018b2c3e4a0fa3134cb67fa83e158c994d961c4cb21095c1bf9af" | |
| + "48443d0bb0d21109c89a100b5ce2c20883149c69b561dd88298a1798b10716ef" | |
| + "663cea190ffb83d89593f3f476b6bc24d7e679107ea26adb8caf6652d0656136" | |
| + "812059a5da198637cac7c4a631bee4665b88d7f6228b11e2e28579a5c0c1f761", | |
| + "12976a08c4426d0ce8a82407c4f48207""80f8c20aa71202d1e29179cbcb555a57", | |
| + "b846d44e9bbd53cedffbfbb6b7fa4933" | |
| + }, | |
| + { | |
| + /* | |
| + * poly1305_ieee754.c failed this in final stage | |
| + */ | |
| + "842364e156336c0998b933a6237726180d9e3fdcbde4cd5d17080fc3beb49614" | |
| + "d7122c037463ff104d73f19c12704628d417c4c54a3fe30d3c3d7714382d43b0" | |
| + "382a50a5dee54be844b076e8df88201a1cd43b90eb21643fa96f39b518aa8340" | |
| + "c942ff3c31baf7c9bdbf0f31ae3fa096bf8c63030609829fe72e179824890bc8" | |
| + "e08c315c1cce2a83144dbbff09f74e3efc770b54d0984a8f19b14719e6363564" | |
| + "1d6b1eedf63efbf080e1783d32445412114c20de0b837a0dfa33d6b82825fff4" | |
| + "4c9a70ea54ce47f07df698e6b03323b53079364a5fc3e9dd034392bdde86dccd" | |
| + "da94321c5e44060489336cb65bf3989c36f7282c2f5d2b882c171e74", | |
| + "95d5c005503e510d8cd0aa072c4a4d06""6eabc52d11653df47fbf63ab198bcc26", | |
| + "f248312e578d9d58f8b7bb4d19105431" | |
| + }, | |
| + /* | |
| + * test vectors from Google | |
| + */ | |
| + { | |
| + "", | |
| + "c8afaac331ee372cd6082de134943b17""4710130e9f6fea8d72293850a667d86c", | |
| + "4710130e9f6fea8d72293850a667d86c", | |
| + }, | |
| + { | |
| + "48656c6c6f20776f726c6421", | |
| + "746869732069732033322d6279746520""6b657920666f7220506f6c7931333035", | |
| + "a6f745008f81c916a20dcc74eef2b2f0" | |
| + }, | |
| + { | |
| + "0000000000000000000000000000000000000000000000000000000000000000", | |
| + "746869732069732033322d6279746520""6b657920666f7220506f6c7931333035", | |
| + "49ec78090e481ec6c26b33b91ccc0307" | |
| + }, | |
| + /* | |
| + * test vectors from Andrew Moon | |
| + */ | |
| + { /* nacl */ | |
| + "8e993b9f48681273c29650ba32fc76ce48332ea7164d96a4476fb8c531a1186a" | |
| + "c0dfc17c98dce87b4da7f011ec48c97271d2c20f9b928fe2270d6fb863d51738" | |
| + "b48eeee314a7cc8ab932164548e526ae90224368517acfeabd6bb3732bc0e9da" | |
| + "99832b61ca01b6de56244a9e88d5f9b37973f622a43d14a6599b1f654cb45a74" | |
| + "e355a5", | |
| + "eea6a7251c1e72916d11c2cb214d3c25""2539121d8e234e652d651fa4c8cff880", | |
| + "f3ffc7703f9400e52a7dfb4b3d3305d9" | |
| + }, | |
| + { /* wrap 2^130-5 */ | |
| + "ffffffffffffffffffffffffffffffff", | |
| + "02000000000000000000000000000000""00000000000000000000000000000000", | |
| + "03000000000000000000000000000000" | |
| + }, | |
| + { /* wrap 2^128 */ | |
| + "02000000000000000000000000000000", | |
| + "02000000000000000000000000000000""ffffffffffffffffffffffffffffffff", | |
| + "03000000000000000000000000000000" | |
| + }, | |
| + { /* limb carry */ | |
| + "fffffffffffffffffffffffffffffffff0ffffffffffffffffffffffffffffff" | |
| + "11000000000000000000000000000000", | |
| + "01000000000000000000000000000000""00000000000000000000000000000000", | |
| + "05000000000000000000000000000000" | |
| + }, | |
| + { /* 2^130-5 */ | |
| + "fffffffffffffffffffffffffffffffffbfefefefefefefefefefefefefefefe" | |
| + "01010101010101010101010101010101", | |
| + "01000000000000000000000000000000""00000000000000000000000000000000", | |
| + "00000000000000000000000000000000" | |
| + }, | |
| + { /* 2^130-6 */ | |
| + "fdffffffffffffffffffffffffffffff", | |
| + "02000000000000000000000000000000""00000000000000000000000000000000", | |
| + "faffffffffffffffffffffffffffffff" | |
| + }, | |
| + { /* 5*H+L reduction intermediate */ | |
| + "e33594d7505e43b900000000000000003394d7505e4379cd0100000000000000" | |
| + "0000000000000000000000000000000001000000000000000000000000000000", | |
| + "01000000000000000400000000000000""00000000000000000000000000000000", | |
| + "14000000000000005500000000000000" | |
| + }, | |
| + { /* 5*H+L reduction final */ | |
| + "e33594d7505e43b900000000000000003394d7505e4379cd0100000000000000" | |
| + "00000000000000000000000000000000", | |
| + "01000000000000000400000000000000""00000000000000000000000000000000", | |
| + "13000000000000000000000000000000" | |
| + } | |
| +}; | |
| + | |
| +static unsigned char hex_digit(char h) | |
| +{ | |
| + if (h >= '0' && h <= '9') | |
| + return h - '0'; | |
| + else if (h >= 'a' && h <= 'f') | |
| + return h - 'a' + 10; | |
| + else if (h >= 'A' && h <= 'F') | |
| + return h - 'A' + 10; | |
| + else | |
| + abort(); | |
| +} | |
| + | |
| +static void hex_decode(unsigned char *out, const char* hex) | |
| +{ | |
| + size_t j = 0; | |
| + | |
| + while (*hex != 0) { | |
| + unsigned char v = hex_digit(*hex++); | |
| + v <<= 4; | |
| + v |= hex_digit(*hex++); | |
| + out[j++] = v; | |
| + } | |
| +} | |
| + | |
| +static void hexdump(unsigned char *a, size_t len) | |
| +{ | |
| + size_t i; | |
| + | |
| + for (i = 0; i < len; i++) { | |
| + printf("%02x", a[i]); | |
| + } | |
| +} | |
| + | |
| +/* misalign returns a pointer that points 0 to 15 bytes into |in| such that the | |
| + * returned pointer has alignment 1 mod 16. */ | |
| +static void* misalign(void* in) | |
| +{ | |
| + intptr_t x = (intptr_t) in; | |
| + x += (17 - (x % 16)) % 16; | |
| + return (void*) x; | |
| +} | |
| + | |
| +int main() | |
| +{ | |
| + | |
| + unsigned num_tests = | |
| + sizeof(chacha_tests) / sizeof(struct chacha_test); | |
| + unsigned i; | |
| + unsigned char nonce_bytes[48 + 16] = {0}; | |
| + | |
| + for (i = 0; i < num_tests; i++) { | |
| + unsigned char *nonce = misalign(nonce_bytes); | |
| + | |
| + printf("ChaCha20 test #%d\n", i); | |
| + const struct chacha_test *test = &chacha_tests[i]; | |
| + unsigned char *expected, *out_bytes, *zero_bytes, *out, *zeros; | |
| + size_t len = strlen(test->outhex); | |
| + | |
| + if (strlen(test->noncehex) != 48*2 || (len & 1) == 1) | |
| + return 1; | |
| + | |
| + len /= 2; | |
| + | |
| + hex_decode(nonce, test->noncehex); | |
| + | |
| + expected = malloc(len); | |
| + out_bytes = malloc(len+16); | |
| + zero_bytes = malloc(len+16); | |
| + /* Attempt to test unaligned inputs. */ | |
| + out = misalign(out_bytes); | |
| + zeros = misalign(zero_bytes); | |
| + memset(zeros, 0, len); | |
| + | |
| + hex_decode(expected, test->outhex); | |
| + CRYPTO_chacha_20(out, zeros, len, nonce); | |
| + | |
| + if (memcmp(out, expected, len) != 0) { | |
| + printf("ChaCha20 test #%d failed.\n", i); | |
| + printf("got: "); | |
| + hexdump(out, len); | |
| + printf("\nexpected: "); | |
| + hexdump(expected, len); | |
| + printf("\n"); | |
| + return 1; | |
| + } | |
| + | |
| + | |
| + free(expected); | |
| + free(zero_bytes); | |
| + free(out_bytes); | |
| + } | |
| + | |
| + num_tests = | |
| + sizeof(poly1305_tests) / sizeof(struct poly1305_test); | |
| + unsigned char key[32], out[16], expected[16]; | |
| + poly1305_state poly1305; | |
| + | |
| + for (i = 0; i < num_tests; i++) { | |
| + printf("Poly1305 test #%d\n", i); | |
| + const struct poly1305_test *test = &poly1305_tests[i]; | |
| + unsigned char *in; | |
| + size_t inlen = strlen(test->inputhex); | |
| + | |
| + if (strlen(test->keyhex) != sizeof(key)*2 || | |
| + strlen(test->outhex) != sizeof(out)*2 || | |
| + (inlen & 1) == 1) | |
| + return 1; | |
| + | |
| + inlen /= 2; | |
| + | |
| + hex_decode(key, test->keyhex); | |
| + hex_decode(expected, test->outhex); | |
| + | |
| + in = malloc(inlen); | |
| + | |
| + hex_decode(in, test->inputhex); | |
| + | |
| +#ifdef CHAPOLY_x86_64_ASM | |
| + if((OPENSSL_ia32cap_loc()[1] >> 5) & 1) { | |
| + poly1305_init_x64(&poly1305, key); | |
| + poly1305_update_avx2(&poly1305, in, inlen); | |
| + poly1305_finish_avx2(&poly1305, out); | |
| + } else { | |
| + poly1305_init_x64(&poly1305, key); | |
| + poly1305_update_x64(&poly1305, in, inlen); | |
| + poly1305_finish_x64(&poly1305, out); | |
| + } | |
| +#else | |
| + { | |
| + CRYPTO_poly1305_init(&poly1305, key); | |
| + CRYPTO_poly1305_update(&poly1305, in, inlen); | |
| + CRYPTO_poly1305_finish(&poly1305, out); | |
| + } | |
| +#endif | |
| + if (memcmp(out, expected, sizeof(expected)) != 0) { | |
| + printf("Poly1305 test #%d failed.\n", i); | |
| + printf("got: "); | |
| + hexdump(out, sizeof(out)); | |
| + printf("\nexpected: "); | |
| + hexdump(expected, sizeof(expected)); | |
| + printf("\n"); | |
| + return 1; | |
| + } | |
| + | |
| + free(in); | |
| + } | |
| + | |
| + printf("PASS\n"); | |
| + return 0; | |
| +} | |
| + | |
| diff --git a/crypto/chacha20poly1305/poly1305.c b/crypto/chacha20poly1305/poly1305.c | |
| new file mode 100644 | |
| index 0000000..50bc4a0 | |
| --- /dev/null | |
| +++ b/crypto/chacha20poly1305/poly1305.c | |
| @@ -0,0 +1,287 @@ | |
| +/* Copyright (c) 2014, Google Inc. | |
| + * | |
| + * Permission to use, copy, modify, and/or distribute this software for any | |
| + * purpose with or without fee is hereby granted, provided that the above | |
| + * copyright notice and this permission notice appear in all copies. | |
| + * | |
| + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
| + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
| + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY | |
| + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
| + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION | |
| + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN | |
| + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ | |
| + | |
| +/* This implementation of poly1305 is by Andrew Moon | |
| + * (https://github.com/floodyberry/poly1305-donna) and released as public | |
| + * domain. */ | |
| + | |
| +#include "chacha20poly1305.h" | |
| + | |
| +#include <string.h> | |
| + | |
| +#if !defined(B_ENDIAN) | |
| +/* We can assume little-endian. */ | |
| +static uint32_t U8TO32_LE(const uint8_t *m) { | |
| + uint32_t r; | |
| + memcpy(&r, m, sizeof(r)); | |
| + return r; | |
| +} | |
| + | |
| +static void U32TO8_LE(uint8_t *m, uint32_t v) { memcpy(m, &v, sizeof(v)); } | |
| +#else | |
| +static uint32_t U8TO32_LE(const uint8_t *m) { | |
| + return (uint32_t)m[0] | (uint32_t)m[1] << 8 | (uint32_t)m[2] << 16 | | |
| + (uint32_t)m[3] << 24; | |
| +} | |
| + | |
| +static void U32TO8_LE(uint8_t *m, uint32_t v) { | |
| + m[0] = v; | |
| + m[1] = v >> 8; | |
| + m[2] = v >> 16; | |
| + m[3] = v >> 24; | |
| +} | |
| +#endif | |
| + | |
| +static uint64_t mul32x32_64(uint32_t a, uint32_t b) { return (uint64_t)a * b; } | |
| + | |
| +struct poly1305_state_st { | |
| + uint32_t r0, r1, r2, r3, r4; | |
| + uint32_t s1, s2, s3, s4; | |
| + uint32_t h0, h1, h2, h3, h4; | |
| + uint8_t buf[16]; | |
| + unsigned int buf_used; | |
| + uint8_t key[16]; | |
| +}; | |
| + | |
| +/* poly1305_blocks updates |state| given some amount of input data. This | |
| + * function may only be called with a |len| that is not a multiple of 16 at the | |
| + * end of the data. Otherwise the input must be buffered into 16 byte blocks. */ | |
| +static void poly1305_update(struct poly1305_state_st *state, const uint8_t *in, | |
| + size_t len) { | |
| + uint32_t t0, t1, t2, t3; | |
| + uint64_t t[5]; | |
| + uint32_t b; | |
| + uint64_t c; | |
| + size_t j; | |
| + uint8_t mp[16]; | |
| + | |
| + if (len < 16) { | |
| + goto poly1305_donna_atmost15bytes; | |
| + } | |
| + | |
| +poly1305_donna_16bytes: | |
| + t0 = U8TO32_LE(in); | |
| + t1 = U8TO32_LE(in + 4); | |
| + t2 = U8TO32_LE(in + 8); | |
| + t3 = U8TO32_LE(in + 12); | |
| + | |
| + in += 16; | |
| + len -= 16; | |
| + | |
| + state->h0 += t0 & 0x3ffffff; | |
| + state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff; | |
| + state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff; | |
| + state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff; | |
| + state->h4 += (t3 >> 8) | (1 << 24); | |
| + | |
| +poly1305_donna_mul: | |
| + t[0] = mul32x32_64(state->h0, state->r0) + mul32x32_64(state->h1, state->s4) + | |
| + mul32x32_64(state->h2, state->s3) + mul32x32_64(state->h3, state->s2) + | |
| + mul32x32_64(state->h4, state->s1); | |
| + t[1] = mul32x32_64(state->h0, state->r1) + mul32x32_64(state->h1, state->r0) + | |
| + mul32x32_64(state->h2, state->s4) + mul32x32_64(state->h3, state->s3) + | |
| + mul32x32_64(state->h4, state->s2); | |
| + t[2] = mul32x32_64(state->h0, state->r2) + mul32x32_64(state->h1, state->r1) + | |
| + mul32x32_64(state->h2, state->r0) + mul32x32_64(state->h3, state->s4) + | |
| + mul32x32_64(state->h4, state->s3); | |
| + t[3] = mul32x32_64(state->h0, state->r3) + mul32x32_64(state->h1, state->r2) + | |
| + mul32x32_64(state->h2, state->r1) + mul32x32_64(state->h3, state->r0) + | |
| + mul32x32_64(state->h4, state->s4); | |
| + t[4] = mul32x32_64(state->h0, state->r4) + mul32x32_64(state->h1, state->r3) + | |
| + mul32x32_64(state->h2, state->r2) + mul32x32_64(state->h3, state->r1) + | |
| + mul32x32_64(state->h4, state->r0); | |
| + | |
| + state->h0 = (uint32_t)t[0] & 0x3ffffff; | |
| + c = (t[0] >> 26); | |
| + t[1] += c; | |
| + state->h1 = (uint32_t)t[1] & 0x3ffffff; | |
| + b = (uint32_t)(t[1] >> 26); | |
| + t[2] += b; | |
| + state->h2 = (uint32_t)t[2] & 0x3ffffff; | |
| + b = (uint32_t)(t[2] >> 26); | |
| + t[3] += b; | |
| + state->h3 = (uint32_t)t[3] & 0x3ffffff; | |
| + b = (uint32_t)(t[3] >> 26); | |
| + t[4] += b; | |
| + state->h4 = (uint32_t)t[4] & 0x3ffffff; | |
| + b = (uint32_t)(t[4] >> 26); | |
| + state->h0 += b * 5; | |
| + | |
| + if (len >= 16) | |
| + goto poly1305_donna_16bytes; | |
| + | |
| +/* final bytes */ | |
| +poly1305_donna_atmost15bytes: | |
| + if (!len) | |
| + return; | |
| + | |
| + for (j = 0; j < len; j++) | |
| + mp[j] = in[j]; | |
| + mp[j++] = 1; | |
| + for (; j < 16; j++) | |
| + mp[j] = 0; | |
| + len = 0; | |
| + | |
| + t0 = U8TO32_LE(mp + 0); | |
| + t1 = U8TO32_LE(mp + 4); | |
| + t2 = U8TO32_LE(mp + 8); | |
| + t3 = U8TO32_LE(mp + 12); | |
| + | |
| + state->h0 += t0 & 0x3ffffff; | |
| + state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff; | |
| + state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff; | |
| + state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff; | |
| + state->h4 += (t3 >> 8); | |
| + | |
| + goto poly1305_donna_mul; | |
| +} | |
| + | |
| +void CRYPTO_poly1305_init(poly1305_state *statep, const uint8_t key[32]) { | |
| + struct poly1305_state_st *state = (struct poly1305_state_st *)statep; | |
| + uint32_t t0, t1, t2, t3; | |
| + | |
| + t0 = U8TO32_LE(key + 0); | |
| + t1 = U8TO32_LE(key + 4); | |
| + t2 = U8TO32_LE(key + 8); | |
| + t3 = U8TO32_LE(key + 12); | |
| + | |
| + /* precompute multipliers */ | |
| + state->r0 = t0 & 0x3ffffff; | |
| + t0 >>= 26; | |
| + t0 |= t1 << 6; | |
| + state->r1 = t0 & 0x3ffff03; | |
| + t1 >>= 20; | |
| + t1 |= t2 << 12; | |
| + state->r2 = t1 & 0x3ffc0ff; | |
| + t2 >>= 14; | |
| + t2 |= t3 << 18; | |
| + state->r3 = t2 & 0x3f03fff; | |
| + t3 >>= 8; | |
| + state->r4 = t3 & 0x00fffff; | |
| + | |
| + state->s1 = state->r1 * 5; | |
| + state->s2 = state->r2 * 5; | |
| + state->s3 = state->r3 * 5; | |
| + state->s4 = state->r4 * 5; | |
| + | |
| + /* init state */ | |
| + state->h0 = 0; | |
| + state->h1 = 0; | |
| + state->h2 = 0; | |
| + state->h3 = 0; | |
| + state->h4 = 0; | |
| + | |
| + state->buf_used = 0; | |
| + memcpy(state->key, key + 16, sizeof(state->key)); | |
| +} | |
| + | |
| +void CRYPTO_poly1305_update(poly1305_state *statep, const uint8_t *in, | |
| + size_t in_len) { | |
| + unsigned int i; | |
| + struct poly1305_state_st *state = (struct poly1305_state_st *)statep; | |
| + | |
| + if (state->buf_used) { | |
| + unsigned int todo = 16 - state->buf_used; | |
| + if (todo > in_len) | |
| + todo = in_len; | |
| + for (i = 0; i < todo; i++) | |
| + state->buf[state->buf_used + i] = in[i]; | |
| + state->buf_used += todo; | |
| + in_len -= todo; | |
| + in += todo; | |
| + | |
| + if (state->buf_used == 16) { | |
| + poly1305_update(state, state->buf, 16); | |
| + state->buf_used = 0; | |
| + } | |
| + } | |
| + | |
| + if (in_len >= 16) { | |
| + size_t todo = in_len & ~0xf; | |
| + poly1305_update(state, in, todo); | |
| + in += todo; | |
| + in_len &= 0xf; | |
| + } | |
| + | |
| + if (in_len) { | |
| + for (i = 0; i < in_len; i++) | |
| + state->buf[i] = in[i]; | |
| + state->buf_used = in_len; | |
| + } | |
| +} | |
| + | |
| +void CRYPTO_poly1305_finish(poly1305_state *statep, uint8_t mac[16]) { | |
| + struct poly1305_state_st *state = (struct poly1305_state_st *)statep; | |
| + uint64_t f0, f1, f2, f3; | |
| + uint32_t g0, g1, g2, g3, g4; | |
| + uint32_t b, nb; | |
| + | |
| + if (state->buf_used) | |
| + poly1305_update(state, state->buf, state->buf_used); | |
| + | |
| + b = state->h0 >> 26; | |
| + state->h0 = state->h0 & 0x3ffffff; | |
| + state->h1 += b; | |
| + b = state->h1 >> 26; | |
| + state->h1 = state->h1 & 0x3ffffff; | |
| + state->h2 += b; | |
| + b = state->h2 >> 26; | |
| + state->h2 = state->h2 & 0x3ffffff; | |
| + state->h3 += b; | |
| + b = state->h3 >> 26; | |
| + state->h3 = state->h3 & 0x3ffffff; | |
| + state->h4 += b; | |
| + b = state->h4 >> 26; | |
| + state->h4 = state->h4 & 0x3ffffff; | |
| + state->h0 += b * 5; | |
| + | |
| + g0 = state->h0 + 5; | |
| + b = g0 >> 26; | |
| + g0 &= 0x3ffffff; | |
| + g1 = state->h1 + b; | |
| + b = g1 >> 26; | |
| + g1 &= 0x3ffffff; | |
| + g2 = state->h2 + b; | |
| + b = g2 >> 26; | |
| + g2 &= 0x3ffffff; | |
| + g3 = state->h3 + b; | |
| + b = g3 >> 26; | |
| + g3 &= 0x3ffffff; | |
| + g4 = state->h4 + b - (1 << 26); | |
| + | |
| + b = (g4 >> 31) - 1; | |
| + nb = ~b; | |
| + state->h0 = (state->h0 & nb) | (g0 & b); | |
| + state->h1 = (state->h1 & nb) | (g1 & b); | |
| + state->h2 = (state->h2 & nb) | (g2 & b); | |
| + state->h3 = (state->h3 & nb) | (g3 & b); | |
| + state->h4 = (state->h4 & nb) | (g4 & b); | |
| + | |
| + f0 = ((state->h0) | (state->h1 << 26)) + (uint64_t)U8TO32_LE(&state->key[0]); | |
| + f1 = ((state->h1 >> 6) | (state->h2 << 20)) + | |
| + (uint64_t)U8TO32_LE(&state->key[4]); | |
| + f2 = ((state->h2 >> 12) | (state->h3 << 14)) + | |
| + (uint64_t)U8TO32_LE(&state->key[8]); | |
| + f3 = ((state->h3 >> 18) | (state->h4 << 8)) + | |
| + (uint64_t)U8TO32_LE(&state->key[12]); | |
| + | |
| + U32TO8_LE(&mac[0], f0); | |
| + f1 += (f0 >> 32); | |
| + U32TO8_LE(&mac[4], f1); | |
| + f2 += (f1 >> 32); | |
| + U32TO8_LE(&mac[8], f2); | |
| + f3 += (f2 >> 32); | |
| + U32TO8_LE(&mac[12], f3); | |
| +} | |
| + | |
| diff --git a/crypto/cryptlib.c b/crypto/cryptlib.c | |
| index 1925428..3446c6a 100644 | |
| --- a/crypto/cryptlib.c | |
| +++ b/crypto/cryptlib.c | |
| @@ -656,16 +656,6 @@ const char *CRYPTO_get_lock_name(int type) | |
| extern unsigned int OPENSSL_ia32cap_P[4]; | |
| unsigned long *OPENSSL_ia32cap_loc(void) | |
| { | |
| - if (sizeof(long) == 4) | |
| - /* | |
| - * If 32-bit application pulls address of OPENSSL_ia32cap_P[0] | |
| - * clear second element to maintain the illusion that vector | |
| - * is 32-bit. | |
| - */ | |
| - OPENSSL_ia32cap_P[1] = 0; | |
| - | |
| - OPENSSL_ia32cap_P[2] = 0; | |
| - | |
| return (unsigned long *)OPENSSL_ia32cap_P; | |
| } | |
| diff --git a/crypto/evp/Makefile b/crypto/evp/Makefile | |
| index fa138d0..7eee6c7 100644 | |
| --- a/crypto/evp/Makefile | |
| +++ b/crypto/evp/Makefile | |
| @@ -29,7 +29,8 @@ LIBSRC= encode.c digest.c evp_enc.c evp_key.c evp_acnf.c evp_cnf.c \ | |
| c_all.c c_allc.c c_alld.c evp_lib.c bio_ok.c \ | |
| evp_pkey.c evp_pbe.c p5_crpt.c p5_crpt2.c \ | |
| e_old.c pmeth_lib.c pmeth_fn.c pmeth_gn.c m_sigver.c \ | |
| - e_aes_cbc_hmac_sha1.c e_aes_cbc_hmac_sha256.c e_rc4_hmac_md5.c | |
| + e_aes_cbc_hmac_sha1.c e_aes_cbc_hmac_sha256.c e_rc4_hmac_md5.c \ | |
| + e_chacha20poly1305.c | |
| LIBOBJ= encode.o digest.o evp_enc.o evp_key.o evp_acnf.o evp_cnf.o \ | |
| e_des.o e_bf.o e_idea.o e_des3.o e_camellia.o\ | |
| @@ -42,7 +43,8 @@ LIBOBJ= encode.o digest.o evp_enc.o evp_key.o evp_acnf.o evp_cnf.o \ | |
| c_all.o c_allc.o c_alld.o evp_lib.o bio_ok.o \ | |
| evp_pkey.o evp_pbe.o p5_crpt.o p5_crpt2.o \ | |
| e_old.o pmeth_lib.o pmeth_fn.o pmeth_gn.o m_sigver.o \ | |
| - e_aes_cbc_hmac_sha1.o e_aes_cbc_hmac_sha256.o e_rc4_hmac_md5.o | |
| + e_aes_cbc_hmac_sha1.o e_aes_cbc_hmac_sha256.o e_rc4_hmac_md5.o \ | |
| + e_chacha20poly1305.o | |
| SRC= $(LIBSRC) | |
| @@ -264,6 +266,7 @@ e_cast.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h | |
| e_cast.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h | |
| e_cast.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h | |
| e_cast.o: ../../include/openssl/symhacks.h ../cryptlib.h e_cast.c evp_locl.h | |
| +e_chacha20poly1305.o: ../../include/openssl/chacha20poly1305.h e_chacha20poly1305.c | |
| e_des.o: ../../e_os.h ../../include/openssl/asn1.h ../../include/openssl/bio.h | |
| e_des.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h | |
| e_des.o: ../../include/openssl/des.h ../../include/openssl/des_old.h | |
| diff --git a/crypto/evp/e_chacha20poly1305.c b/crypto/evp/e_chacha20poly1305.c | |
| new file mode 100644 | |
| index 0000000..17f8cb4 | |
| --- /dev/null | |
| +++ b/crypto/evp/e_chacha20poly1305.c | |
| @@ -0,0 +1,435 @@ | |
| +/* ==================================================================== | |
| + * Copyright (c) 2001-2014 The OpenSSL Project. All rights reserved. | |
| + * | |
| + * Redistribution and use in source and binary forms, with or without | |
| + * modification, are permitted provided that the following conditions | |
| + * are met: | |
| + * | |
| + * 1. Redistributions of source code must retain the above copyright | |
| + * notice, this list of conditions and the following disclaimer. | |
| + * | |
| + * 2. Redistributions in binary form must reproduce the above copyright | |
| + * notice, this list of conditions and the following disclaimer in | |
| + * the documentation and/or other materials provided with the | |
| + * distribution. | |
| + * | |
| + * 3. All advertising materials mentioning features or use of this | |
| + * software must display the following acknowledgment: | |
| + * "This product includes software developed by the OpenSSL Project | |
| + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" | |
| + * | |
| + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
| + * endorse or promote products derived from this software without | |
| + * prior written permission. For written permission, please contact | |
| + * openssl-core@openssl.org. | |
| + * | |
| + * 5. Products derived from this software may not be called "OpenSSL" | |
| + * nor may "OpenSSL" appear in their names without prior written | |
| + * permission of the OpenSSL Project. | |
| + * | |
| + * 6. Redistributions of any form whatsoever must retain the following | |
| + * acknowledgment: | |
| + * "This product includes software developed by the OpenSSL Project | |
| + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" | |
| + * | |
| + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
| + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
| + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
| + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
| + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
| + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
| + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
| + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
| + * OF THE POSSIBILITY OF SUCH DAMAGE. | |
| + * ==================================================================== | |
| + * | |
| + */ | |
| + | |
| +#include <openssl/opensslconf.h> | |
| +#ifndef OPENSSL_NO_CHACHA_POLY | |
| +#include <openssl/evp.h> | |
| +#include <openssl/err.h> | |
| +#include <openssl/chacha20poly1305.h> | |
| +#include "evp_locl.h" | |
| +#include <openssl/rand.h> | |
| + | |
| +#define FILL_BUFFER ((size_t)128) | |
| + | |
| +typedef struct { | |
| + uint8_t iv[12]; | |
| + uint8_t nonce[48]; | |
| + size_t aad_l; | |
| + size_t ct_l; | |
| + unsigned valid:1; | |
| + unsigned draft:1; | |
| + uint8_t poly_buffer[FILL_BUFFER]; | |
| + uint8_t chacha_buffer[FILL_BUFFER]; | |
| + uint16_t poly_buffer_used; | |
| + uint16_t chacha_used; | |
| +#ifdef CHAPOLY_x86_64_ASM | |
| + void (*poly1305_init_ptr)(poly1305_state *, const uint8_t *); | |
| + void (*poly1305_update_ptr)(poly1305_state *, const uint8_t *, size_t); | |
| + void (*poly1305_finish_ptr)(poly1305_state *, uint8_t *); | |
| + poly1305_state poly_state; | |
| + #define poly_init aead_ctx->poly1305_init_ptr | |
| + #define poly_update poly1305_update_wrapper | |
| + #define poly_finish poly1305_finish_wrapper | |
| +#else | |
| + #define poly_init CRYPTO_poly1305_init | |
| + #define poly_update(c,i,l) CRYPTO_poly1305_update(&c->poly_state,i,l) | |
| + #define poly_finish(c,m) CRYPTO_poly1305_finish(&c->poly_state,m) | |
| +#endif | |
| +} EVP_CHACHA20_POLY1305_CTX; | |
| + | |
| + | |
| +#ifdef CHAPOLY_x86_64_ASM | |
| +#include <immintrin.h> | |
| + | |
| +static void poly1305_update_wrapper(EVP_CHACHA20_POLY1305_CTX *ctx, | |
| + const uint8_t *in, | |
| + size_t in_len) | |
| +{ | |
| + int todo; | |
| + /* Attempt to fill as many bytes as possible before calling the update | |
| + function */ | |
| + if (in_len < FILL_BUFFER || ctx->poly_buffer_used) { | |
| + todo = FILL_BUFFER - ctx->poly_buffer_used; | |
| + todo = in_len < todo? in_len : todo; | |
| + memcpy(ctx->poly_buffer + ctx->poly_buffer_used, in, todo); | |
| + ctx->poly_buffer_used += todo; | |
| + in += todo; | |
| + in_len -= todo; | |
| + | |
| + if (ctx->poly_buffer_used == FILL_BUFFER) { | |
| + ctx->poly1305_update_ptr(&ctx->poly_state, | |
| + ctx->poly_buffer, | |
| + FILL_BUFFER); | |
| + ctx->poly_buffer_used = 0; | |
| + } | |
| + } | |
| + | |
| + if (in_len >= FILL_BUFFER) { | |
| + ctx->poly1305_update_ptr(&ctx->poly_state, in, in_len & (-FILL_BUFFER)); | |
| + in += in_len & (-FILL_BUFFER); | |
| + in_len &= (FILL_BUFFER - 1); | |
| + } | |
| + | |
| + if (in_len) { | |
| + memcpy(ctx->poly_buffer, in, in_len); | |
| + ctx->poly_buffer_used = in_len; | |
| + } | |
| +} | |
| + | |
| + | |
| +static void poly1305_finish_wrapper(EVP_CHACHA20_POLY1305_CTX *ctx, | |
| + uint8_t mac[POLY1305_MAC_LEN]) | |
| +{ | |
| + if (ctx->poly_buffer_used) { | |
| + | |
| + if (ctx->poly_buffer_used % POLY1305_PAD_LEN) { | |
| + memset(ctx->poly_buffer + ctx->poly_buffer_used, 0, | |
| + POLY1305_PAD_LEN - (ctx->poly_buffer_used % POLY1305_PAD_LEN)); | |
| + } | |
| + | |
| + ctx->poly1305_update_ptr(&ctx->poly_state, | |
| + ctx->poly_buffer, | |
| + ctx->poly_buffer_used); | |
| + } | |
| + | |
| + ctx->poly1305_finish_ptr(&ctx->poly_state, mac); | |
| + memset(ctx->poly_buffer, 0, FILL_BUFFER); | |
| +} | |
| +#endif | |
| + | |
| + | |
| +#ifdef CHAPOLY_x86_64_ASM | |
| +static void EVP_chacha20_poly1305_cpuid(EVP_CHACHA20_POLY1305_CTX *ctx) | |
| +{ | |
| + if ((OPENSSL_ia32cap_loc()[1] >> 5) & 1) { /* AVX2 */ | |
| + ctx->poly1305_init_ptr = poly1305_init_x64; /* Lazy init */ | |
| + ctx->poly1305_update_ptr = poly1305_update_avx2; | |
| + ctx->poly1305_finish_ptr = poly1305_finish_avx2; | |
| +/* | |
| + } else if (0 && (OPENSSL_ia32cap_loc()[0] >> 60) & 1) { // AVX -disabled | |
| + ctx->poly1305_init_ptr = poly1305_init_avx; | |
| + ctx->poly1305_update_ptr = poly1305_update_avx; | |
| + ctx->poly1305_finish_ptr = poly1305_finish_avx; | |
| +*/ | |
| + } else { /* x64 code */ | |
| + ctx->poly1305_init_ptr = poly1305_init_x64; | |
| + ctx->poly1305_update_ptr = poly1305_update_x64; | |
| + ctx->poly1305_finish_ptr = poly1305_finish_x64; | |
| + } | |
| +} | |
| +#endif | |
| + | |
| + | |
| +static int EVP_chacha20_poly1305_init_draft(EVP_CIPHER_CTX *ctx, | |
| + const unsigned char *key, | |
| + const unsigned char *iv, | |
| + int enc) | |
| +{ | |
| + EVP_CHACHA20_POLY1305_CTX *aead_ctx = ctx->cipher_data; | |
| + memcpy(aead_ctx->nonce, key, 32); | |
| + aead_ctx->valid = 0; | |
| + aead_ctx->draft = 1; | |
| + | |
| +#ifdef CHAPOLY_x86_64_ASM | |
| + EVP_chacha20_poly1305_cpuid(aead_ctx); | |
| +#endif | |
| + | |
| + return 1; | |
| +} | |
| + | |
| + | |
| +static int EVP_chacha20_poly1305_init(EVP_CIPHER_CTX *ctx, | |
| + const unsigned char *key, | |
| + const unsigned char *iv, | |
| + int enc) | |
| +{ | |
| + EVP_CHACHA20_POLY1305_CTX *aead_ctx = ctx->cipher_data; | |
| + memcpy(aead_ctx->nonce, key, 32); | |
| + memcpy(aead_ctx->iv, iv, 12); | |
| + aead_ctx->valid = 0; | |
| + aead_ctx->draft = 0; | |
| + | |
| +#ifdef CHAPOLY_x86_64_ASM | |
| + EVP_chacha20_poly1305_cpuid(aead_ctx); | |
| +#endif | |
| + | |
| + return 1; | |
| +} | |
| + | |
| + | |
| +static int EVP_chacha20_poly1305_cipher(EVP_CIPHER_CTX *ctx, | |
| + unsigned char *out, | |
| + const unsigned char *in, | |
| + size_t inl) | |
| +{ | |
| + EVP_CHACHA20_POLY1305_CTX *aead_ctx = ctx->cipher_data; | |
| + uint8_t poly_mac[POLY1305_MAC_LEN]; | |
| + uint8_t zero[POLY1305_PAD_LEN] = {0}; | |
| + uint64_t cmp; | |
| + int i, todo; | |
| + | |
| + if (!aead_ctx->valid) | |
| + return 0; | |
| + | |
| + if (inl < POLY1305_MAC_LEN) | |
| + return -1; | |
| + | |
| + /* Fix for MAC */ | |
| + inl -= POLY1305_MAC_LEN; | |
| + | |
| + if (!ctx->encrypt) { | |
| + poly_update(aead_ctx, in, inl); | |
| + } | |
| + | |
| + i = 0; | |
| + if (inl < 256) { | |
| + /* Consume the buffer we computed during poly initialization */ | |
| + todo = inl > (FILL_BUFFER - aead_ctx->chacha_used) ? | |
| + FILL_BUFFER - aead_ctx->chacha_used : | |
| + inl; | |
| + | |
| +#ifdef CHAPOLY_x86_64_ASM | |
| + for (; i <= todo - 16; i+=16) { | |
| + _mm_storeu_si128((__m128i*)&out[i], | |
| + _mm_xor_si128(_mm_loadu_si128((__m128i *)&in[i]), | |
| + _mm_loadu_si128((__m128i *)&aead_ctx->chacha_buffer[i + 64]))); | |
| + } | |
| +#endif | |
| + for (; i < todo; i++) { | |
| + out[i] = in[i] ^ aead_ctx->chacha_buffer[i + 64 /*aead_ctx->chacha_used*/]; | |
| + } | |
| + | |
| + } else { | |
| + /* For long messages don't use precomputed buffer */ | |
| + ((uint64_t *)(aead_ctx->nonce))[4]--; | |
| + } | |
| + | |
| + todo = inl - i; | |
| + | |
| + if (todo) { | |
| + CRYPTO_chacha_20(&out[i], &in[i], todo, aead_ctx->nonce); | |
| + } | |
| + | |
| + if (ctx->encrypt) { | |
| + poly_update(aead_ctx, out, inl); | |
| + } | |
| + | |
| + aead_ctx->ct_l += inl; | |
| + | |
| + if (!aead_ctx->draft) { | |
| + /* For RFC padd ciphertext with zeroes, then mac len(aad)||len(ct) */ | |
| + todo = aead_ctx->ct_l % POLY1305_PAD_LEN ? | |
| + POLY1305_PAD_LEN - (aead_ctx->ct_l % POLY1305_PAD_LEN) : | |
| + 0; | |
| + | |
| + if (todo) { | |
| + poly_update(aead_ctx, zero, todo); | |
| + } | |
| + | |
| + poly_update(aead_ctx, (uint8_t*)&aead_ctx->aad_l, sizeof(uint64_t)); | |
| + poly_update(aead_ctx, (uint8_t*)&aead_ctx->ct_l, sizeof(uint64_t)); | |
| + | |
| + } else { | |
| + /* For the draft don't pad, mac len(ct) */ | |
| + poly_update(aead_ctx, (uint8_t*)&aead_ctx->ct_l, sizeof(uint64_t)); | |
| + } | |
| + aead_ctx->valid = 0; | |
| + | |
| + if (ctx->encrypt) { | |
| + poly_finish(aead_ctx, &out[inl]); | |
| + return inl + POLY1305_MAC_LEN; | |
| + | |
| + } else { /* Decryption */ | |
| + poly_finish(aead_ctx, poly_mac); | |
| + /* Constant time comparison */ | |
| + cmp = (*(uint64_t *)(poly_mac)) ^ (*(uint64_t *)(in + inl)); | |
| + cmp |= (*(uint64_t *)(poly_mac + 8)) ^ (*(uint64_t *)(in + inl + 8)); | |
| + | |
| + if (cmp) { | |
| + OPENSSL_cleanse(out, inl); | |
| + return -1; | |
| + } | |
| + | |
| + return inl; | |
| + } | |
| +} | |
| + | |
| + | |
| +static int EVP_chacha20_poly1305_cleanup(EVP_CIPHER_CTX *ctx) | |
| +{ | |
| + return 1; | |
| +} | |
| + | |
| + | |
| +static int EVP_chacha20_poly1305_ctrl(EVP_CIPHER_CTX *ctx, | |
| + int type, | |
| + int arg, | |
| + void *ptr) | |
| +{ | |
| + EVP_CHACHA20_POLY1305_CTX *aead_ctx = ctx->cipher_data; | |
| + uint8_t aad[EVP_AEAD_TLS1_AAD_LEN + 8]; | |
| + uint64_t thirteen = EVP_AEAD_TLS1_AAD_LEN; | |
| + | |
| + switch (type) { | |
| + case EVP_CTRL_AEAD_TLS1_AAD: | |
| + | |
| + if (arg != EVP_AEAD_TLS1_AAD_LEN) | |
| + return 0; | |
| + | |
| + /* Initialize poly keys */ | |
| + memset(aead_ctx->chacha_buffer, 0, FILL_BUFFER); | |
| + | |
| + if (!aead_ctx->draft) { | |
| + /* RFC IV = (0 || iv) ^ seq_num */ | |
| + memset(aead_ctx->nonce + 32, 0, 4); | |
| + memcpy(aead_ctx->nonce + 36, aead_ctx->iv, 12); | |
| + *(uint64_t *)(aead_ctx->nonce + 40) ^= *(uint64_t *)(ptr); | |
| + | |
| + } else { | |
| + /* draft IV = 0 || seq_num */ | |
| + memset(aead_ctx->nonce + 32, 0, 8); | |
| + memcpy(aead_ctx->nonce + 40, ptr, 8); | |
| + } | |
| + /* Poly keys = ENC(0) */ | |
| + CRYPTO_chacha_20(aead_ctx->chacha_buffer, | |
| + aead_ctx->chacha_buffer, | |
| + FILL_BUFFER, | |
| + aead_ctx->nonce); | |
| + | |
| + poly_init(&aead_ctx->poly_state, aead_ctx->chacha_buffer); | |
| + | |
| + aead_ctx->chacha_used = 64; | |
| + aead_ctx->poly_buffer_used = 0; | |
| + aead_ctx->aad_l = arg; | |
| + aead_ctx->ct_l = 0; | |
| + | |
| + /* Absorb AAD */ | |
| + memcpy(aad, ptr, arg); | |
| + memset(aad + arg, 0, sizeof(aad) - arg); | |
| + | |
| + /* If decrypting fix length for tag */ | |
| + if (!ctx->encrypt) { | |
| + unsigned int len = (aad[arg-2] << 8) | aad[arg-1]; | |
| + len -= POLY1305_MAC_LEN; | |
| + aad[arg-2] = len>>8; | |
| + aad[arg-1] = len & 0xff; | |
| + } | |
| + | |
| + if (!aead_ctx->draft) { | |
| + /* In the RFC, AAD is padded with zeroes */ | |
| + poly_update(aead_ctx, aad, POLY1305_PAD_LEN); | |
| + | |
| + } else { | |
| + /* In the draft AAD is followed by len(AAD) */ | |
| + memcpy(&aad[arg], &thirteen, sizeof(thirteen)); | |
| + poly_update(aead_ctx, aad, arg + sizeof(thirteen)); | |
| + } | |
| + | |
| + aead_ctx->valid = 1; | |
| + return POLY1305_MAC_LEN; | |
| + | |
| + break; | |
| + | |
| + default: | |
| + return 0; | |
| + break; | |
| + } | |
| + | |
| + return 0; | |
| +} | |
| + | |
| + | |
| +#define CUSTOM_FLAGS (\ | |
| + EVP_CIPH_CUSTOM_IV | EVP_CIPH_FLAG_CUSTOM_CIPHER \ | |
| + | EVP_CIPH_ALWAYS_CALL_INIT \ | |
| + | EVP_CIPH_CUSTOM_COPY) | |
| + | |
| + | |
| +static const EVP_CIPHER chacha20_poly1305_d = { | |
| + 0, /* nid ??? */ | |
| + 1, /* block size, sorta */ | |
| + 32, /* key len */ | |
| + 0, /* iv len */ | |
| + CUSTOM_FLAGS|EVP_CIPH_FLAG_AEAD_CIPHER, /* flags */ | |
| + EVP_chacha20_poly1305_init_draft, | |
| + EVP_chacha20_poly1305_cipher, | |
| + EVP_chacha20_poly1305_cleanup, | |
| + sizeof(EVP_CHACHA20_POLY1305_CTX), /* ctx size */ | |
| + NULL, | |
| + NULL, | |
| + EVP_chacha20_poly1305_ctrl, | |
| + NULL | |
| + }; | |
| + | |
| + | |
| +static const EVP_CIPHER chacha20_poly1305 = { | |
| + 0, /* nid ??? */ | |
| + 1, /* block size, sorta */ | |
| + 32, /* key len */ | |
| + 12, /* iv len */ | |
| + CUSTOM_FLAGS|EVP_CIPH_FLAG_AEAD_CIPHER, /* flags */ | |
| + EVP_chacha20_poly1305_init, | |
| + EVP_chacha20_poly1305_cipher, | |
| + EVP_chacha20_poly1305_cleanup, | |
| + sizeof(EVP_CHACHA20_POLY1305_CTX), /* ctx size */ | |
| + NULL, | |
| + NULL, | |
| + EVP_chacha20_poly1305_ctrl, | |
| + NULL | |
| + }; | |
| + | |
| + | |
| +const EVP_CIPHER *EVP_chacha20_poly1305_draft(void) | |
| +{ return &chacha20_poly1305_d; } | |
| + | |
| + | |
| +const EVP_CIPHER *EVP_chacha20_poly1305(void) | |
| +{ return &chacha20_poly1305; } | |
| +#endif | |
| diff --git a/crypto/evp/evp.h b/crypto/evp/evp.h | |
| index 39ab793..53ed671 100644 | |
| --- a/crypto/evp/evp.h | |
| +++ b/crypto/evp/evp.h | |
| @@ -893,6 +893,10 @@ const EVP_CIPHER *EVP_camellia_256_cfb128(void); | |
| # define EVP_camellia_256_cfb EVP_camellia_256_cfb128 | |
| const EVP_CIPHER *EVP_camellia_256_ofb(void); | |
| # endif | |
| +# ifndef OPENSSL_NO_CHACHA_POLY | |
| +const EVP_CIPHER *EVP_chacha20_poly1305(void); | |
| +const EVP_CIPHER *EVP_chacha20_poly1305_draft(void); | |
| +# endif | |
| # ifndef OPENSSL_NO_SEED | |
| const EVP_CIPHER *EVP_seed_ecb(void); | |
| diff --git a/ssl/s3_lib.c b/ssl/s3_lib.c | |
| index 0385e03..a29f325 100644 | |
| --- a/ssl/s3_lib.c | |
| +++ b/ssl/s3_lib.c | |
| @@ -2945,6 +2945,111 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { | |
| 256}, | |
| #endif | |
| +#if !defined(OPENSSL_NO_CHACHA_POLY) | |
| + /* Draft ciphers */ | |
| + { | |
| + 1, | |
| + TLS1_TXT_ECDHE_RSA_WITH_CHACHA20_POLY1305_D, | |
| + TLS1_CK_ECDHE_RSA_WITH_CHACHA20_POLY1305_D, | |
| + SSL_kEECDH, | |
| + SSL_aRSA, | |
| + SSL_CHACHA20POLY1305_D, | |
| + SSL_AEAD, | |
| + SSL_TLSV1_2, | |
| + SSL_HIGH, | |
| + SSL_HANDSHAKE_MAC_SHA256 | TLS1_PRF_SHA256, | |
| + 256, | |
| + 256}, | |
| + { | |
| + 1, | |
| + TLS1_TXT_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_D, | |
| + TLS1_CK_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_D, | |
| + SSL_kEECDH, | |
| + SSL_aECDSA, | |
| + SSL_CHACHA20POLY1305_D, | |
| + SSL_AEAD, | |
| + SSL_TLSV1_2, | |
| + SSL_HIGH, | |
| + SSL_HANDSHAKE_MAC_SHA256 | TLS1_PRF_SHA256, | |
| + 256, | |
| + 256}, | |
| + { | |
| + 1, | |
| + TLS1_TXT_DHE_RSA_WITH_CHACHA20_POLY1305_D, | |
| + TLS1_CK_DHE_RSA_WITH_CHACHA20_POLY1305_D, | |
| + SSL_kEDH, | |
| + SSL_aRSA, | |
| + SSL_CHACHA20POLY1305_D, | |
| + SSL_AEAD, | |
| + SSL_TLSV1_2, | |
| + SSL_HIGH, | |
| + SSL_HANDSHAKE_MAC_SHA256 | TLS1_PRF_SHA256, | |
| + 256, | |
| + 256}, | |
| + /* RFC ciphers */ | |
| + /* Cipher CCA8 as per draft-ietf-tls-chacha20-poly1305-03 */ | |
| + { | |
| + 1, | |
| + TLS1_TXT_ECDHE_RSA_WITH_CHACHA20_POLY1305, | |
| + TLS1_CK_ECDHE_RSA_WITH_CHACHA20_POLY1305, | |
| + SSL_kECDHE, | |
| + SSL_aRSA, | |
| + SSL_CHACHA20POLY1305, | |
| + SSL_AEAD, | |
| + SSL_TLSV1_2, | |
| + SSL_HIGH, | |
| + SSL_HANDSHAKE_MAC_SHA256 | TLS1_PRF_SHA256, | |
| + 256, | |
| + 256, | |
| + }, | |
| + /* Cipher CCA9 */ | |
| + { | |
| + 1, | |
| + TLS1_TXT_ECDHE_ECDSA_WITH_CHACHA20_POLY1305, | |
| + TLS1_CK_ECDHE_ECDSA_WITH_CHACHA20_POLY1305, | |
| + SSL_kECDHE, | |
| + SSL_aECDSA, | |
| + SSL_CHACHA20POLY1305, | |
| + SSL_AEAD, | |
| + SSL_TLSV1_2, | |
| + SSL_HIGH, | |
| + SSL_HANDSHAKE_MAC_SHA256 | TLS1_PRF_SHA256, | |
| + 256, | |
| + 256, | |
| + }, | |
| + /* Cipher CCAA */ | |
| + { | |
| + 1, | |
| + TLS1_TXT_DHE_RSA_WITH_CHACHA20_POLY1305, | |
| + TLS1_CK_DHE_RSA_WITH_CHACHA20_POLY1305, | |
| + SSL_kDHE, | |
| + SSL_aRSA, | |
| + SSL_CHACHA20POLY1305, | |
| + SSL_AEAD, | |
| + SSL_TLSV1_2, | |
| + SSL_HIGH, | |
| + SSL_HANDSHAKE_MAC_SHA256 | TLS1_PRF_SHA256, | |
| + 256, | |
| + 256, | |
| + }, | |
| + /* Cipher CCAB */ | |
| + { | |
| + 1, | |
| + TLS1_TXT_PSK_WITH_CHACHA20_POLY1305, | |
| + TLS1_CK_PSK_WITH_CHACHA20_POLY1305, | |
| + SSL_kPSK, | |
| + SSL_aPSK, | |
| + SSL_CHACHA20POLY1305, | |
| + SSL_AEAD, | |
| + SSL_TLSV1_2, | |
| + SSL_HIGH, | |
| + SSL_HANDSHAKE_MAC_SHA256 | TLS1_PRF_SHA256, | |
| + 256, | |
| + 256, | |
| + }, | |
| +#endif | |
| + | |
| + | |
| /* end of list */ | |
| }; | |
| @@ -4090,6 +4195,7 @@ SSL_CIPHER *ssl3_choose_cipher(SSL *s, STACK_OF(SSL_CIPHER) *clnt, | |
| int i, ii, ok; | |
| CERT *cert; | |
| unsigned long alg_k, alg_a, mask_k, mask_a, emask_k, emask_a; | |
| + int use_chacha = 0; | |
| /* Let's see which ciphers we can support */ | |
| cert = s->cert; | |
| @@ -4123,9 +4229,17 @@ SSL_CIPHER *ssl3_choose_cipher(SSL *s, STACK_OF(SSL_CIPHER) *clnt, | |
| if (s->options & SSL_OP_CIPHER_SERVER_PREFERENCE || tls1_suiteb(s)) { | |
| prio = srvr; | |
| allow = clnt; | |
| + /* Use ChaCha20+Poly1305 iff it's client's most preferred cipher suite */ | |
| + if (sk_SSL_CIPHER_num(clnt) > 0) { | |
| + c = sk_SSL_CIPHER_value(clnt, 0); | |
| + if (c->algorithm_enc == SSL_CHACHA20POLY1305 || | |
| + c->algorithm_enc == SSL_CHACHA20POLY1305_D) | |
| + use_chacha = 1; | |
| + } | |
| } else { | |
| prio = clnt; | |
| allow = srvr; | |
| + use_chacha = 1; | |
| } | |
| tls1_set_cert_validity(s); | |
| @@ -4137,6 +4251,11 @@ SSL_CIPHER *ssl3_choose_cipher(SSL *s, STACK_OF(SSL_CIPHER) *clnt, | |
| if ((c->algorithm_ssl & SSL_TLSV1_2) && !SSL_USE_TLS1_2_CIPHERS(s)) | |
| continue; | |
| + /* Skip ChaCha unless top client priority */ | |
| + if ((c->algorithm_enc == SSL_CHACHA20POLY1305 || | |
| + c->algorithm_enc == SSL_CHACHA20POLY1305_D) && !use_chacha) | |
| + continue; | |
| + | |
| ssl_set_cert_masks(cert, c); | |
| mask_k = cert->mask_k; | |
| mask_a = cert->mask_a; | |
| diff --git a/ssl/ssl.h b/ssl/ssl.h | |
| index 90aeb0c..dc1cde2 100644 | |
| --- a/ssl/ssl.h | |
| +++ b/ssl/ssl.h | |
| @@ -294,6 +294,8 @@ extern "C" { | |
| # define SSL_TXT_AES256 "AES256" | |
| # define SSL_TXT_AES "AES" | |
| # define SSL_TXT_AES_GCM "AESGCM" | |
| +# define SSL_TXT_CHACHA20_D "CHACHA20-draft" | |
| +# define SSL_TXT_CHACHA20 "CHACHA20" | |
| # define SSL_TXT_CAMELLIA128 "CAMELLIA128" | |
| # define SSL_TXT_CAMELLIA256 "CAMELLIA256" | |
| # define SSL_TXT_CAMELLIA "CAMELLIA" | |
| diff --git a/ssl/ssl_ciph.c b/ssl/ssl_ciph.c | |
| index 2ad8f43..09a162a 100644 | |
| --- a/ssl/ssl_ciph.c | |
| +++ b/ssl/ssl_ciph.c | |
| @@ -150,25 +150,27 @@ | |
| #endif | |
| #include "ssl_locl.h" | |
| -#define SSL_ENC_DES_IDX 0 | |
| -#define SSL_ENC_3DES_IDX 1 | |
| -#define SSL_ENC_RC4_IDX 2 | |
| -#define SSL_ENC_RC2_IDX 3 | |
| -#define SSL_ENC_IDEA_IDX 4 | |
| -#define SSL_ENC_NULL_IDX 5 | |
| -#define SSL_ENC_AES128_IDX 6 | |
| -#define SSL_ENC_AES256_IDX 7 | |
| -#define SSL_ENC_CAMELLIA128_IDX 8 | |
| -#define SSL_ENC_CAMELLIA256_IDX 9 | |
| -#define SSL_ENC_GOST89_IDX 10 | |
| -#define SSL_ENC_SEED_IDX 11 | |
| -#define SSL_ENC_AES128GCM_IDX 12 | |
| -#define SSL_ENC_AES256GCM_IDX 13 | |
| -#define SSL_ENC_NUM_IDX 14 | |
| +#define SSL_ENC_DES_IDX 0 | |
| +#define SSL_ENC_3DES_IDX 1 | |
| +#define SSL_ENC_RC4_IDX 2 | |
| +#define SSL_ENC_RC2_IDX 3 | |
| +#define SSL_ENC_IDEA_IDX 4 | |
| +#define SSL_ENC_NULL_IDX 5 | |
| +#define SSL_ENC_AES128_IDX 6 | |
| +#define SSL_ENC_AES256_IDX 7 | |
| +#define SSL_ENC_CAMELLIA128_IDX 8 | |
| +#define SSL_ENC_CAMELLIA256_IDX 9 | |
| +#define SSL_ENC_GOST89_IDX 10 | |
| +#define SSL_ENC_SEED_IDX 11 | |
| +#define SSL_ENC_AES128GCM_IDX 12 | |
| +#define SSL_ENC_AES256GCM_IDX 13 | |
| +#define SSL_ENC_CHACHA20POLY1305_DRAFT_IDX 14 | |
| +#define SSL_ENC_CHACHA20POLY1305_IDX 15 | |
| +#define SSL_ENC_NUM_IDX 16 | |
| static const EVP_CIPHER *ssl_cipher_methods[SSL_ENC_NUM_IDX] = { | |
| NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | |
| - NULL, NULL | |
| + NULL, NULL, NULL, NULL | |
| }; | |
| #define SSL_COMP_NULL_IDX 0 | |
| @@ -362,6 +364,9 @@ static const SSL_CIPHER cipher_aliases[] = { | |
| {0, SSL3_TXT_DHE_RSA_DES_192_CBC3_SHA, 0, | |
| SSL_kDHE, SSL_aRSA, SSL_3DES, SSL_SHA1, SSL_SSLV3, | |
| SSL_NOT_EXP | SSL_HIGH | SSL_FIPS, 0, 0, 0,}, | |
| + | |
| + {0, SSL_TXT_CHACHA20_D, 0, 0, 0, SSL_CHACHA20POLY1305_D, 0, 0, 0, 0, 0, 0}, | |
| + {0, SSL_TXT_CHACHA20, 0, 0, 0, SSL_CHACHA20POLY1305, 0, 0, 0, 0, 0, 0}, | |
| }; | |
| /* | |
| @@ -431,6 +436,11 @@ void ssl_load_ciphers(void) | |
| ssl_cipher_methods[SSL_ENC_AES256GCM_IDX] = | |
| EVP_get_cipherbyname(SN_aes_256_gcm); | |
| + ssl_cipher_methods[SSL_ENC_CHACHA20POLY1305_DRAFT_IDX] = | |
| + EVP_chacha20_poly1305_draft(); | |
| + ssl_cipher_methods[SSL_ENC_CHACHA20POLY1305_IDX] = | |
| + EVP_chacha20_poly1305(); | |
| + | |
| ssl_digest_methods[SSL_MD_MD5_IDX] = EVP_get_digestbyname(SN_md5); | |
| ssl_mac_secret_size[SSL_MD_MD5_IDX] = | |
| EVP_MD_size(ssl_digest_methods[SSL_MD_MD5_IDX]); | |
| @@ -581,6 +591,12 @@ int ssl_cipher_get_evp(const SSL_SESSION *s, const EVP_CIPHER **enc, | |
| case SSL_AES256GCM: | |
| i = SSL_ENC_AES256GCM_IDX; | |
| break; | |
| + case SSL_CHACHA20POLY1305_D: | |
| + i = SSL_ENC_CHACHA20POLY1305_DRAFT_IDX; | |
| + break; | |
| + case SSL_CHACHA20POLY1305: | |
| + i = SSL_ENC_CHACHA20POLY1305_IDX; | |
| + break; | |
| default: | |
| i = -1; | |
| break; | |
| @@ -796,6 +812,12 @@ static void ssl_cipher_get_disabled(unsigned long *mkey, unsigned long *auth, | |
| (ssl_cipher_methods[SSL_ENC_AES256GCM_IDX] == | |
| NULL) ? SSL_AES256GCM : 0; | |
| *enc |= | |
| + (ssl_cipher_methods[SSL_ENC_CHACHA20POLY1305_DRAFT_IDX] == | |
| + NULL) ? SSL_CHACHA20POLY1305_D : 0; | |
| + *enc |= | |
| + (ssl_cipher_methods[SSL_ENC_CHACHA20POLY1305_IDX] == | |
| + NULL) ? SSL_CHACHA20POLY1305 : 0; | |
| + *enc |= | |
| (ssl_cipher_methods[SSL_ENC_CAMELLIA128_IDX] == | |
| NULL) ? SSL_CAMELLIA128 : 0; | |
| *enc |= | |
| @@ -1812,6 +1834,12 @@ char *SSL_CIPHER_description(const SSL_CIPHER *cipher, char *buf, int len) | |
| case SSL_AES256GCM: | |
| enc = "AESGCM(256)"; | |
| break; | |
| + case SSL_CHACHA20POLY1305_D: | |
| + enc = "ChaCha20-Poly1305-draft"; | |
| + break; | |
| + case SSL_CHACHA20POLY1305: | |
| + enc = "ChaCha20-Poly1305"; | |
| + break; | |
| case SSL_CAMELLIA128: | |
| enc = "Camellia(128)"; | |
| break; | |
| diff --git a/ssl/ssl_locl.h b/ssl/ssl_locl.h | |
| index 6df725f..5e2fbe4 100644 | |
| --- a/ssl/ssl_locl.h | |
| +++ b/ssl/ssl_locl.h | |
| @@ -354,6 +354,8 @@ | |
| # define SSL_SEED 0x00000800L | |
| # define SSL_AES128GCM 0x00001000L | |
| # define SSL_AES256GCM 0x00002000L | |
| +# define SSL_CHACHA20POLY1305_D 0x00004000L | |
| +# define SSL_CHACHA20POLY1305 0x00080000U /* Value from openssl */ | |
| # define SSL_AES (SSL_AES128|SSL_AES256|SSL_AES128GCM|SSL_AES256GCM) | |
| # define SSL_CAMELLIA (SSL_CAMELLIA128|SSL_CAMELLIA256) | |
| diff --git a/ssl/tls1.h b/ssl/tls1.h | |
| index 7e237d0..fb0c981 100644 | |
| --- a/ssl/tls1.h | |
| +++ b/ssl/tls1.h | |
| @@ -563,6 +563,20 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb) | |
| # define TLS1_CK_ECDH_RSA_WITH_AES_128_GCM_SHA256 0x0300C031 | |
| # define TLS1_CK_ECDH_RSA_WITH_AES_256_GCM_SHA384 0x0300C032 | |
| +/* ChaCha20-Poly1305 ciphersuites draft-agl-tls-chacha20poly1305-01 */ | |
| +# define TLS1_CK_ECDHE_RSA_WITH_CHACHA20_POLY1305_D 0x0300CC13 | |
| +# define TLS1_CK_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_D 0x0300CC14 | |
| +# define TLS1_CK_DHE_RSA_WITH_CHACHA20_POLY1305_D 0x0300CC15 | |
| + | |
| +/* ChaCha20-Poly1305 ciphersuites from RFC */ | |
| +# define TLS1_CK_ECDHE_RSA_WITH_CHACHA20_POLY1305 0x0300CCA8 | |
| +# define TLS1_CK_ECDHE_ECDSA_WITH_CHACHA20_POLY1305 0x0300CCA9 | |
| +# define TLS1_CK_DHE_RSA_WITH_CHACHA20_POLY1305 0x0300CCAA | |
| +# define TLS1_CK_PSK_WITH_CHACHA20_POLY1305 0x0300CCAB | |
| +# define TLS1_CK_ECDHE_PSK_WITH_CHACHA20_POLY1305 0x0300CCAC | |
| +# define TLS1_CK_DHE_PSK_WITH_CHACHA20_POLY1305 0x0300CCAD | |
| +# define TLS1_CK_RSA_PSK_WITH_CHACHA20_POLY1305 0x0300CCAE | |
| + | |
| /* | |
| * XXX * Backward compatibility alert: + * Older versions of OpenSSL gave | |
| * some DHE ciphers names with "EDH" + * instead of "DHE". Going forward, we | |
| @@ -713,6 +727,20 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb) | |
| # define TLS1_TXT_ECDH_RSA_WITH_AES_128_GCM_SHA256 "ECDH-RSA-AES128-GCM-SHA256" | |
| # define TLS1_TXT_ECDH_RSA_WITH_AES_256_GCM_SHA384 "ECDH-RSA-AES256-GCM-SHA384" | |
| +/* ChaCha20-Poly1305 ciphersuites draft-agl-tls-chacha20poly1305-01 */ | |
| +# define TLS1_TXT_ECDHE_RSA_WITH_CHACHA20_POLY1305_D "ECDHE-RSA-CHACHA20-POLY1305-D" | |
| +# define TLS1_TXT_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_D "ECDHE-ECDSA-CHACHA20-POLY1305-D" | |
| +# define TLS1_TXT_DHE_RSA_WITH_CHACHA20_POLY1305_D "DHE-RSA-CHACHA20-POLY1305-D" | |
| + | |
| +/* Chacha20-Poly1305 ciphersuites from RFC */ | |
| +# define TLS1_TXT_ECDHE_RSA_WITH_CHACHA20_POLY1305 "ECDHE-RSA-CHACHA20-POLY1305" | |
| +# define TLS1_TXT_ECDHE_ECDSA_WITH_CHACHA20_POLY1305 "ECDHE-ECDSA-CHACHA20-POLY1305" | |
| +# define TLS1_TXT_DHE_RSA_WITH_CHACHA20_POLY1305 "DHE-RSA-CHACHA20-POLY1305" | |
| +# define TLS1_TXT_PSK_WITH_CHACHA20_POLY1305 "PSK-CHACHA20-POLY1305" | |
| +# define TLS1_TXT_ECDHE_PSK_WITH_CHACHA20_POLY1305 "ECDHE-PSK-CHACHA20-POLY1305" | |
| +# define TLS1_TXT_DHE_PSK_WITH_CHACHA20_POLY1305 "DHE-PSK-CHACHA20-POLY1305" | |
| +# define TLS1_TXT_RSA_PSK_WITH_CHACHA20_POLY1305 "RSA-PSK-CHACHA20-POLY1305" | |
| + | |
| # define TLS_CT_RSA_SIGN 1 | |
| # define TLS_CT_DSS_SIGN 2 | |
| # define TLS_CT_RSA_FIXED_DH 3 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment