Last active
February 12, 2018 01:05
-
-
Save laruence/f293d9e39ab932e20cbf62c2ec072356 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/Zend/zend_cpuinfo.h b/Zend/zend_cpuinfo.h | |
index d0d3a93..f0c298d 100644 | |
--- a/Zend/zend_cpuinfo.h | |
+++ b/Zend/zend_cpuinfo.h | |
@@ -106,42 +106,42 @@ ZEND_API int zend_cpu_supports(zend_cpu_feature feature); | |
* before all PLT symbols are resloved. in other words, | |
* resolver functions should not depends any external | |
* functions */ | |
-static zend_always_inline int zend_cpu_support_sse2() { | |
+static zend_always_inline int zend_cpu_supports_sse2() { | |
#if PHP_HAVE_BUILTIN_CPU_INIT | |
__builtin_cpu_init(); | |
#endif | |
return __builtin_cpu_supports("sse2"); | |
} | |
-static zend_always_inline int zend_cpu_support_sse3() { | |
+static zend_always_inline int zend_cpu_supports_ssse3() { | |
#if PHP_HAVE_BUILTIN_CPU_INIT | |
__builtin_cpu_init(); | |
#endif | |
- return __builtin_cpu_supports("sse3"); | |
+ return __builtin_cpu_supports("ssse3"); | |
} | |
-static zend_always_inline int zend_cpu_support_sse41() { | |
+static zend_always_inline int zend_cpu_supports_sse41() { | |
#if PHP_HAVE_BUILTIN_CPU_INIT | |
__builtin_cpu_init(); | |
#endif | |
return __builtin_cpu_supports("sse4.1"); | |
} | |
-static zend_always_inline int zend_cpu_support_sse42() { | |
+static zend_always_inline int zend_cpu_supports_sse42() { | |
#if PHP_HAVE_BUILTIN_CPU_INIT | |
__builtin_cpu_init(); | |
#endif | |
return __builtin_cpu_supports("sse4.2"); | |
} | |
-static zend_always_inline int zend_cpu_support_avx() { | |
+static zend_always_inline int zend_cpu_supports_avx() { | |
#if PHP_HAVE_BUILTIN_CPU_INIT | |
__builtin_cpu_init(); | |
#endif | |
return __builtin_cpu_supports("avx"); | |
} | |
-static zend_always_inline int zend_cpu_support_avx2() { | |
+static zend_always_inline int zend_cpu_supports_avx2() { | |
#if PHP_HAVE_BUILTIN_CPU_INIT | |
__builtin_cpu_init(); | |
#endif | |
@@ -149,29 +149,28 @@ static zend_always_inline int zend_cpu_support_avx2() { | |
} | |
#else | |
-static zend_always_inline int zend_cpu_support_sse2() { | |
+static zend_always_inline int zend_cpu_supports_sse2() { | |
return zend_cpu_supports(ZEND_CPU_FEATURE_SSE2); | |
} | |
-static zend_always_inline int zend_cpu_support_sse3() { | |
- return zend_cpu_supports(ZEND_CPU_FEATURE_SSE3); | |
+static zend_always_inline int zend_cpu_supports_ssse3() { | |
+ return zend_cpu_supports(ZEND_CPU_FEATURE_SSSE3); | |
} | |
-static zend_always_inline int zend_cpu_support_sse41() { | |
+static zend_always_inline int zend_cpu_supports_sse41() { | |
return zend_cpu_supports(ZEND_CPU_FEATURE_SSE41); | |
} | |
-static zend_always_inline int zend_cpu_support_sse42() { | |
+static zend_always_inline int zend_cpu_supports_sse42() { | |
return zend_cpu_supports(ZEND_CPU_FEATURE_SSE42); | |
} | |
-static zend_always_inline int zend_cpu_support_avx() { | |
+static zend_always_inline int zend_cpu_supports_avx() { | |
return zend_cpu_supports(ZEND_CPU_FEATURE_AVX); | |
} | |
-static zend_always_inline int zend_cpu_support_avx2() { | |
- /* TODO */ | |
- return 0; | |
+static zend_always_inline int zend_cpu_supports_avx2() { | |
+ return zend_cpu_supports(ZEND_CPU_FEATURE_AVX2); | |
} | |
#endif | |
diff --git a/Zend/zend_portability.h b/Zend/zend_portability.h | |
index bd3e23e..abf7dcc 100644 | |
--- a/Zend/zend_portability.h | |
+++ b/Zend/zend_portability.h | |
@@ -520,10 +520,48 @@ static zend_always_inline double _zend_get_nan(void) /* {{{ */ | |
# define ZEND_INTRIN_HAVE_IFUNC_TARGET 1 | |
#endif | |
+#if (defined(__i386__) || defined(__x86_64__)) | |
+# if PHP_HAVE_SSSE3_INSTRUCTIONS && defined(HAVE_TMMINTRIN_H) | |
+# define PHP_HAVE_SSSE3 | |
+# endif | |
+ | |
+# if PHP_HAVE_SSE4_2_INSTRUCTIONS && defined(HAVE_NMMINTRIN_H) | |
+# define PHP_HAVE_SSE4_2 | |
+# endif | |
+ | |
+# if PHP_HAVE_AVX2_INSTRUCTIONS && defined(HAVE_IMMINTRIN_H) | |
+# define PHP_HAVE_AVX2 | |
+# endif | |
+#endif | |
+ | |
+#ifdef __SSSE3__ | |
+/* Instructions compiled directly. */ | |
+# define ZEND_INTRIN_SSSE3_NATIVE 1 | |
+#elif (defined(HAVE_FUNC_ATTRIBUTE_TARGET) && defined(PHP_HAVE_SSSE3)) || defined(ZEND_WIN32) | |
+/* Function resolved by ifunc or MINIT. */ | |
+# define ZEND_INTRIN_SSSE3_RESOLVER 1 | |
+#endif | |
+ | |
+#if ZEND_INTRIN_HAVE_IFUNC_TARGET && (ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER) | |
+# define ZEND_INTRIN_SSSE3_FUNC_PROTO 1 | |
+#elif ZEND_INTRIN_SSSE3_RESOLVER | |
+# define ZEND_INTRIN_SSSE3_FUNC_PTR 1 | |
+#endif | |
+ | |
+#if ZEND_INTRIN_SSSE3_RESOLVER | |
+# if defined(HAVE_FUNC_ATTRIBUTE_TARGET) | |
+# define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func __attribute__((target("ssse3"))) | |
+# else | |
+# define ZEND_INTRIN_SSSE3_FUNC_DECL(func) func | |
+# endif | |
+#else | |
+# define ZEND_INTRIN_SSSE3_FUNC_DECL(func) | |
+#endif | |
+ | |
#ifdef __SSE4_2__ | |
/* Instructions compiled directly. */ | |
# define ZEND_INTRIN_SSE4_2_NATIVE 1 | |
-#elif (defined(__i386__) || defined(__x86_64__)) && defined(HAVE_NMMINTRIN_H) || defined(ZEND_WIN32) | |
+#elif (defined(HAVE_FUNC_ATTRIBUTE_TARGET) && defined(PHP_HAVE_SSE4_2)) || defined(ZEND_WIN32) | |
/* Function resolved by ifunc or MINIT. */ | |
# define ZEND_INTRIN_SSE4_2_RESOLVER 1 | |
#endif | |
@@ -544,6 +582,30 @@ static zend_always_inline double _zend_get_nan(void) /* {{{ */ | |
# define ZEND_INTRIN_SSE4_2_FUNC_DECL(func) | |
#endif | |
+#ifdef __AVX2__ | |
+/* Instructions compiled directly. */ | |
+# define ZEND_INTRIN_AVX2_NATIVE 1 | |
+#elif (defined(HAVE_FUNC_ATTRIBUTE_TARGET) && defined(PHP_HAVE_AVX2)) || defined(ZEND_WIN32) | |
+/* Function resolved by ifunc or MINIT. */ | |
+# define ZEND_INTRIN_AVX2_RESOLVER 1 | |
+#endif | |
+ | |
+#if ZEND_INTRIN_HAVE_IFUNC_TARGET && (ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER) | |
+# define ZEND_INTRIN_AVX2_FUNC_PROTO 1 | |
+#elif ZEND_INTRIN_AVX2_RESOLVER | |
+# define ZEND_INTRIN_AVX2_FUNC_PTR 1 | |
+#endif | |
+ | |
+#if ZEND_INTRIN_AVX2_RESOLVER | |
+# if defined(HAVE_FUNC_ATTRIBUTE_TARGET) | |
+# define ZEND_INTRIN_AVX2_FUNC_DECL(func) ZEND_API func __attribute__((target("avx2"))) | |
+# else | |
+# define ZEND_INTRIN_AVX2_FUNC_DECL(func) func | |
+# endif | |
+#else | |
+# define ZEND_INTRIN_AVX2_FUNC_DECL(func) | |
+#endif | |
+ | |
/* Intrinsics macros end. */ | |
#ifdef ZEND_WIN32 | |
diff --git a/acinclude.m4 b/acinclude.m4 | |
index 5c6a5c5..b8902f0 100644 | |
--- a/acinclude.m4 | |
+++ b/acinclude.m4 | |
@@ -3271,7 +3271,7 @@ AC_DEFUN([PHP_CHECK_BUILTIN_CPU_SUPPORTS], [ | |
AC_MSG_CHECKING([for __builtin_cpu_supports]) | |
AC_TRY_LINK(, [ | |
- return __builtin_cpu_supports("sse2")? 1 : 0; | |
+ return __builtin_cpu_supports("sse")? 1 : 0; | |
], [ | |
have_builtin_cpu_supports=1 | |
AC_MSG_RESULT([yes]) | |
@@ -3282,7 +3282,28 @@ AC_DEFUN([PHP_CHECK_BUILTIN_CPU_SUPPORTS], [ | |
AC_DEFINE_UNQUOTED([PHP_HAVE_BUILTIN_CPU_SUPPORTS], | |
[$have_builtin_cpu_supports], [Whether the compiler supports __builtin_cpu_supports]) | |
+]) | |
+dnl PHP_CHECK_CPU_SUPPORTS | |
+AC_DEFUN([PHP_CHECK_CPU_SUPPORTS], [ | |
+ AC_REQUIRE([PHP_CHECK_BUILTIN_CPU_INIT]) | |
+ AC_REQUIRE([PHP_CHECK_BUILTIN_CPU_SUPPORTS]) | |
+ have_ext_instructions=0 | |
+ if test $have_builtin_cpu_supports = 1; then | |
+ AC_MSG_CHECKING([for $1 instructions supports]) | |
+ AC_TRY_RUN([ | |
+int main() { | |
+ return __builtin_cpu_supports("$1")? 0 : 1; | |
+} | |
+ ], [ | |
+ have_ext_instructions=1 | |
+ AC_MSG_RESULT([yes]) | |
+ ], [ | |
+ AC_MSG_RESULT([no]) | |
+ ]) | |
+ fi | |
+ AC_DEFINE_UNQUOTED(AS_TR_CPP([PHP_HAVE_$1_INSTRUCTIONS]), | |
+ [$have_ext_instructions], [Whether the compiler supports $1 instructions]) | |
]) | |
dnl Load the AX_CHECK_COMPILE_FLAG macro from the autoconf archive. | |
diff --git a/configure.ac b/configure.ac | |
index 811faa9..531e6b4 100644 | |
--- a/configure.ac | |
+++ b/configure.ac | |
@@ -496,7 +496,9 @@ sys/utsname.h \ | |
sys/ipc.h \ | |
dlfcn.h \ | |
assert.h \ | |
-nmmintrin.h | |
+tmmintrin.h \ | |
+nmmintrin.h \ | |
+immintrin.h | |
],[],[],[ | |
#ifdef HAVE_SYS_PARAM_H | |
#include <sys/param.h> | |
@@ -571,6 +573,12 @@ PHP_CHECK_BUILTIN_CPU_INIT | |
dnl Check __builtin_cpu_supports | |
PHP_CHECK_BUILTIN_CPU_SUPPORTS | |
+dnl Check instructions | |
+PHP_CHECK_CPU_SUPPORTS([ssse3]) | |
+PHP_CHECK_CPU_SUPPORTS([sse4.2]) | |
+PHP_CHECK_CPU_SUPPORTS([avx]) | |
+PHP_CHECK_CPU_SUPPORTS([avx2]) | |
+ | |
dnl Check for members of the stat structure | |
AC_STRUCT_ST_BLKSIZE | |
dnl AC_STRUCT_ST_BLOCKS will screw QNX because fileblocks.o does not exists | |
@@ -590,7 +598,6 @@ AC_TYPE_UID_T | |
dnl Checks for sockaddr_storage and sockaddr.sa_len | |
PHP_SOCKADDR_CHECKS | |
-AC_MSG_CHECKING([checking building environment]) | |
AX_GCC_FUNC_ATTRIBUTE([ifunc]) | |
AX_GCC_FUNC_ATTRIBUTE([target]) | |
diff --git a/ext/standard/base64.c b/ext/standard/base64.c | |
index 06856b8..bbe21d8 100644 | |
--- a/ext/standard/base64.c | |
+++ b/ext/standard/base64.c | |
@@ -53,47 +53,439 @@ static const short base64_reverse_table[256] = { | |
}; | |
/* }}} */ | |
-PHPAPI zend_string *php_base64_encode(const unsigned char *str, size_t length) /* {{{ */ | |
+static zend_always_inline unsigned char *php_base64_encode_impl(const unsigned char *in, size_t inl, unsigned char *out) /* {{{ */ | |
{ | |
- const unsigned char *current = str; | |
- unsigned char *p; | |
- zend_string *result; | |
- | |
- result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0); | |
- p = (unsigned char *)ZSTR_VAL(result); | |
- while (length > 2) { /* keep going until we have less than 24 bits */ | |
- *p++ = base64_table[current[0] >> 2]; | |
- *p++ = base64_table[((current[0] & 0x03) << 4) + (current[1] >> 4)]; | |
- *p++ = base64_table[((current[1] & 0x0f) << 2) + (current[2] >> 6)]; | |
- *p++ = base64_table[current[2] & 0x3f]; | |
+ while (inl > 2) { /* keep going until we have less than 24 bits */ | |
+ *out++ = base64_table[in[0] >> 2]; | |
+ *out++ = base64_table[((in[0] & 0x03) << 4) + (in[1] >> 4)]; | |
+ *out++ = base64_table[((in[1] & 0x0f) << 2) + (in[2] >> 6)]; | |
+ *out++ = base64_table[in[2] & 0x3f]; | |
- current += 3; | |
- length -= 3; /* we just handle 3 octets of data */ | |
+ in += 3; | |
+ inl -= 3; /* we just handle 3 octets of data */ | |
} | |
/* now deal with the tail end of things */ | |
- if (length != 0) { | |
- *p++ = base64_table[current[0] >> 2]; | |
- if (length > 1) { | |
- *p++ = base64_table[((current[0] & 0x03) << 4) + (current[1] >> 4)]; | |
- *p++ = base64_table[(current[1] & 0x0f) << 2]; | |
- *p++ = base64_pad; | |
+ if (inl != 0) { | |
+ *out++ = base64_table[in[0] >> 2]; | |
+ if (inl > 1) { | |
+ *out++ = base64_table[((in[0] & 0x03) << 4) + (in[1] >> 4)]; | |
+ *out++ = base64_table[(in[1] & 0x0f) << 2]; | |
+ *out++ = base64_pad; | |
} else { | |
- *p++ = base64_table[(current[0] & 0x03) << 4]; | |
- *p++ = base64_pad; | |
- *p++ = base64_pad; | |
+ *out++ = base64_table[(in[0] & 0x03) << 4]; | |
+ *out++ = base64_pad; | |
+ *out++ = base64_pad; | |
} | |
} | |
- *p = '\0'; | |
- ZSTR_LEN(result) = (p - (unsigned char *)ZSTR_VAL(result)); | |
+ *out = '\0'; | |
+ | |
+ return out; | |
+} | |
+/* }}} */ | |
+ | |
+static zend_always_inline int php_base64_decode_impl(const unsigned char *in, size_t inl, unsigned char *out, size_t *outl, zend_bool strict) /* {{{ */ | |
+{ | |
+ int ch, i = 0, padding = 0, j = *outl; | |
+ | |
+ /* run through the whole string, converting as we go */ | |
+ while (inl-- > 0) { | |
+ ch = *in++; | |
+ if (ch == base64_pad) { | |
+ padding++; | |
+ continue; | |
+ } | |
+ | |
+ ch = base64_reverse_table[ch]; | |
+ if (!strict) { | |
+ /* skip unknown characters and whitespace */ | |
+ if (ch < 0) { | |
+ continue; | |
+ } | |
+ } else { | |
+ /* skip whitespace */ | |
+ if (ch == -1) { | |
+ continue; | |
+ } | |
+ /* fail on bad characters or if any data follows padding */ | |
+ if (ch == -2 || padding) { | |
+ goto fail; | |
+ } | |
+ } | |
+ | |
+ switch (i % 4) { | |
+ case 0: | |
+ out[j] = ch << 2; | |
+ break; | |
+ case 1: | |
+ out[j++] |= ch >> 4; | |
+ out[j] = (ch & 0x0f) << 4; | |
+ break; | |
+ case 2: | |
+ out[j++] |= ch >>2; | |
+ out[j] = (ch & 0x03) << 6; | |
+ break; | |
+ case 3: | |
+ out[j++] |= ch; | |
+ break; | |
+ } | |
+ i++; | |
+ } | |
+ | |
+ /* fail if the input is truncated (only one char in last group) */ | |
+ if (strict && i % 4 == 1) { | |
+ goto fail; | |
+ } | |
+ | |
+ /* fail if the padding length is wrong (not VV==, VVV=), but accept zero padding | |
+ * RFC 4648: "In some circumstances, the use of padding [--] is not required" */ | |
+ if (strict && padding && (padding > 2 || (i + padding) % 4 != 0)) { | |
+ goto fail; | |
+ } | |
+ | |
+ *outl = j; | |
+ out[j] = '\0'; | |
+ | |
+ return 1; | |
+ | |
+fail: | |
+ return 0; | |
+} | |
+/* }}} */ | |
+ | |
+/* {{{ php_base64_encode */ | |
+ | |
+#if ZEND_INTRIN_AVX2_NATIVE | |
+# undef ZEND_INTRIN_SSSE3_NATIVE | |
+# undef ZEND_INTRIN_SSSE3_RESOLVER | |
+# undef ZEND_INTRIN_SSSE3_FUNC_PROTO | |
+# undef ZEND_INTRIN_SSSE3_FUNC_PTR | |
+#elif ZEND_INTRIN_AVX2_FUNC_PROTO && ZEND_INTRIN_SSSE3_NATIVE | |
+# undef ZEND_INTRIN_SSSE3_NATIVE | |
+# define ZEND_INTRIN_SSSE3_RESOLVER 1 | |
+# define ZEND_INTRIN_SSSE3_FUNC_PROTO 1 | |
+# undef ZEND_INTRIN_SSSE3_FUNC_DECL | |
+# ifdef HAVE_FUNC_ATTRIBUTE_TARGET | |
+# define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func __attribute__((target("ssse3"))) | |
+# else | |
+# define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func | |
+# endif | |
+#elif ZEND_INTRIN_AVX2_FUNC_PTR && ZEND_INTRIN_SSSE3_NATIVE | |
+# undef ZEND_INTRIN_SSSE3_NATIVE | |
+# undef ZEND_INTRIN_SSSE3_RESOLVER | |
+# define ZEND_INTRIN_SSSE3_RESOLVER 1 | |
+# define ZEND_INTRIN_SSSE3_FUNC_PTR 1 | |
+# undef ZEND_INTRIN_SSSE3_FUNC_DECL | |
+# ifdef HAVE_FUNC_ATTRIBUTE_TARGET | |
+# define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func __attribute__((target("ssse3"))) | |
+# else | |
+# define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func | |
+# endif | |
+#endif | |
+ | |
+#if ZEND_INTRIN_AVX2_NATIVE | |
+# include <immintrin.h> | |
+#elif ZEND_INTRIN_SSSE3_NATIVE | |
+# include <tmmintrin.h> | |
+#elif (ZEND_INTRIN_SSSE3_RESOLVER || ZEND_INTRIN_AVX2_RESOLVER) | |
+# if ZEND_INTRIN_AVX2_RESOLVER | |
+# include <immintrin.h> | |
+# else | |
+# include <tmmintrin.h> | |
+# endif /* (ZEND_INTRIN_SSSE3_RESOLVER || ZEND_INTRIN_AVX2_RESOLVER) */ | |
+# include "Zend/zend_cpuinfo.h" | |
+ | |
+# if ZEND_INTRIN_AVX2_RESOLVER | |
+ZEND_INTRIN_AVX2_FUNC_DECL(zend_string *php_base64_encode_avx2(const unsigned char *str, size_t length)); | |
+ZEND_INTRIN_AVX2_FUNC_DECL(zend_string *php_base64_decode_ex_avx2(const unsigned char *str, size_t length, zend_bool strict)); | |
+# endif | |
+ | |
+# if ZEND_INTRIN_SSSE3_RESOLVER | |
+ZEND_INTRIN_SSSE3_FUNC_DECL(zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length)); | |
+ZEND_INTRIN_SSSE3_FUNC_DECL(zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, zend_bool strict)); | |
+# endif | |
+ | |
+zend_string *php_base64_encode_default(const unsigned char *str, size_t length); | |
+zend_string *php_base64_decode_ex_default(const unsigned char *str, size_t length, zend_bool strict); | |
+ | |
+# if (ZEND_INTRIN_AVX2_FUNC_PROTO || ZEND_INTRIN_SSSE3_FUNC_PROTO) | |
+PHPAPI zend_string *php_base64_encode(const unsigned char *str, size_t length) __attribute__((ifunc("resolve_base64_encode"))); | |
+PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, zend_bool strict) __attribute__((ifunc("resolve_base64_decode"))); | |
+ | |
+static void *resolve_base64_encode() { | |
+# if ZEND_INTRIN_AVX2_FUNC_PROTO | |
+ if (zend_cpu_supports_avx2()) { | |
+ return php_base64_encode_avx2; | |
+ } else | |
+# endif | |
+ if (zend_cpu_supports_ssse3()) { | |
+ return php_base64_encode_ssse3; | |
+ } | |
+ return php_base64_encode_default; | |
+} | |
+ | |
+static void *resolve_base64_decode() { | |
+# if ZEND_INTRIN_AVX2_FUNC_PROTO | |
+ if (zend_cpu_supports_avx2()) { | |
+ return php_base64_decode_ex_avx2; | |
+ } else | |
+# endif | |
+ if (zend_cpu_supports_ssse3()) { | |
+ return php_base64_decode_ex_ssse3; | |
+ } | |
+ return php_base64_decode_ex_default; | |
+} | |
+# else /* (ZEND_INTRIN_AVX2_FUNC_PROTO || ZEND_INTRIN_SSSE3_FUNC_PROTO) */ | |
+ | |
+PHPAPI zend_string *(*php_base64_encode)(const unsigned char *str, size_t length) = NULL; | |
+PHPAPI zend_string *(*php_base64_decode_ex)(const unsigned char *str, size_t length, zend_bool strict) = NULL; | |
+ | |
+PHP_MINIT_FUNCTION(base64_intrin) | |
+{ | |
+# if ZEND_INTRIN_AVX2_FUNC_PTR | |
+ if (zend_cpu_supports_avx2()) { | |
+ php_base64_encode = php_base64_encode_avx2; | |
+ php_base64_decode_ex = php_base64_decode_ex_avx2; | |
+ } else | |
+# endif | |
+ if (zend_cpu_supports_ssse3()) { | |
+ php_base64_encode = php_base64_encode_ssse3; | |
+ php_base64_decode_ex = php_base64_decode_ex_ssse3; | |
+ } else { | |
+ php_base64_encode = php_base64_encode_default; | |
+ php_base64_decode_ex = php_base64_decode_ex_default; | |
+ } | |
+ return SUCCESS; | |
+} | |
+# endif /* (ZEND_INTRIN_AVX2_FUNC_PROTO || ZEND_INTRIN_SSSE3_FUNC_PROTO) */ | |
+#endif /* ZEND_INTRIN_AVX2_NATIVE */ | |
+ | |
+#if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER | |
+# if ZEND_INTRIN_AVX2_RESOLVER && defined(HAVE_FUNC_ATTRIBUTE_TARGET) | |
+static __m256i php_base64_encode_avx2_reshuffle(__m256i in) __attribute__((target("avx2"))); | |
+static __m256i php_base64_encode_avx2_translate(__m256i in) __attribute__((target("avx2"))); | |
+# endif | |
+static __m256i php_base64_encode_avx2_reshuffle(__m256i in) | |
+{ | |
+ /* This one works with shifted (4 bytes) input in order to | |
+ * be able to work efficiently in the 2 128-bit lanes */ | |
+ __m256i t0, t1, t2, t3; | |
+ | |
+ /* input, bytes MSB to LSB: | |
+ * 0 0 0 0 x w v u t s r q p o n m | |
+ * l k j i h g f e d c b a 0 0 0 0 */ | |
+ in = _mm256_shuffle_epi8(in, _mm256_set_epi8( | |
+ 10, 11, 9, 10, | |
+ 7, 8, 6, 7, | |
+ 4, 5, 3, 4, | |
+ 1, 2, 0, 1, | |
+ | |
+ 14, 15, 13, 14, | |
+ 11, 12, 10, 11, | |
+ 8, 9, 7, 8, | |
+ 5, 6, 4, 5)); | |
+ | |
+ t0 = _mm256_and_si256(in, _mm256_set1_epi32(0x0fc0fc00)); | |
+ | |
+ t1 = _mm256_mulhi_epu16(t0, _mm256_set1_epi32(0x04000040)); | |
+ | |
+ t2 = _mm256_and_si256(in, _mm256_set1_epi32(0x003f03f0)); | |
+ | |
+ t3 = _mm256_mullo_epi16(t2, _mm256_set1_epi32(0x01000010)); | |
+ | |
+ return _mm256_or_si256(t1, t3); | |
+ /* 00xxxxxx 00wwwwXX 00vvWWWW 00VVVVVV | |
+ * 00uuuuuu 00ttttUU 00ssTTTT 00SSSSSS | |
+ * 00rrrrrr 00qqqqRR 00ppQQQQ 00PPPPPP | |
+ * 00oooooo 00nnnnOO 00mmNNNN 00MMMMMM | |
+ * 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ | |
+ * 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG | |
+ * 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD | |
+ * 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA */ | |
+} | |
+ | |
+static __m256i php_base64_encode_avx2_translate(__m256i in) | |
+{ | |
+ __m256i lut, indices, mask; | |
+ | |
+ lut = _mm256_setr_epi8( | |
+ 65, 71, -4, -4, -4, -4, -4, -4, | |
+ -4, -4, -4, -4, -19, -16, 0, 0, | |
+ 65, 71, -4, -4, -4, -4, -4, -4, | |
+ -4, -4, -4, -4, -19, -16, 0, 0); | |
+ | |
+ indices = _mm256_subs_epu8(in, _mm256_set1_epi8(51)); | |
+ | |
+ mask = _mm256_cmpgt_epi8(in, _mm256_set1_epi8(25)); | |
+ | |
+ indices = _mm256_sub_epi8(indices, mask); | |
+ | |
+ return _mm256_add_epi8(in, _mm256_shuffle_epi8(lut, indices)); | |
+ | |
+} | |
+#endif /* ZEND_INTRIN_AVX2_NATIVE || (ZEND_INTRIN_AVX2_RESOLVER && !ZEND_INTRIN_SSSE3_NATIVE) */ | |
+ | |
+#if ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER | |
+# if ZEND_INTRIN_SSSE3_RESOLVER && defined(HAVE_FUNC_ATTRIBUTE_TARGET) | |
+static __m128i php_base64_encode_ssse3_reshuffle(__m128i in) __attribute__((target("ssse3"))); | |
+static __m128i php_base64_encode_ssse3_translate(__m128i in) __attribute__((target("ssse3"))); | |
+# endif | |
+ | |
+static __m128i php_base64_encode_ssse3_reshuffle(__m128i in) | |
+{ | |
+ __m128i t0, t1, t2, t3; | |
+ | |
+ /* input, bytes MSB to LSB: | |
+ * 0 0 0 0 l k j i h g f e d c b a */ | |
+ in = _mm_shuffle_epi8(in, _mm_set_epi8( | |
+ 10, 11, 9, 10, | |
+ 7, 8, 6, 7, | |
+ 4, 5, 3, 4, | |
+ 1, 2, 0, 1)); | |
+ | |
+ t0 = _mm_and_si128(in, _mm_set1_epi32(0x0fc0fc00)); | |
+ | |
+ t1 = _mm_mulhi_epu16(t0, _mm_set1_epi32(0x04000040)); | |
+ | |
+ t2 = _mm_and_si128(in, _mm_set1_epi32(0x003f03f0)); | |
+ | |
+ t3 = _mm_mullo_epi16(t2, _mm_set1_epi32(0x01000010)); | |
+ | |
+ /* output (upper case are MSB, lower case are LSB): | |
+ * 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ | |
+ * 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG | |
+ * 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD | |
+ * 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA */ | |
+ return _mm_or_si128(t1, t3); | |
+} | |
+ | |
+static __m128i php_base64_encode_ssse3_translate(__m128i in) | |
+{ | |
+ __m128i mask, indices; | |
+ __m128i lut = _mm_setr_epi8( | |
+ 65, 71, -4, -4, | |
+ -4, -4, -4, -4, | |
+ -4, -4, -4, -4, | |
+ -19, -16, 0, 0 | |
+ ); | |
+ | |
+ /* Translate values 0..63 to the Base64 alphabet. There are five sets: | |
+ * # From To Abs Index Characters | |
+ * 0 [0..25] [65..90] +65 0 ABCDEFGHIJKLMNOPQRSTUVWXYZ | |
+ * 1 [26..51] [97..122] +71 1 abcdefghijklmnopqrstuvwxyz | |
+ * 2 [52..61] [48..57] -4 [2..11] 0123456789 | |
+ * 3 [62] [43] -19 12 + | |
+ * 4 [63] [47] -16 13 / */ | |
+ | |
+ /* Create LUT indices from input: | |
+ * the index for range #0 is right, others are 1 less than expected: */ | |
+ indices = _mm_subs_epu8(in, _mm_set1_epi8(51)); | |
+ | |
+ /* mask is 0xFF (-1) for range #[1..4] and 0x00 for range #0: */ | |
+ mask = _mm_cmpgt_epi8(in, _mm_set1_epi8(25)); | |
+ | |
+ /* substract -1, so add 1 to indices for range #[1..4], All indices are now correct: */ | |
+ indices = _mm_sub_epi8(indices, mask); | |
+ | |
+ /* Add offsets to input values: */ | |
+ return _mm_add_epi8(in, _mm_shuffle_epi8(lut, indices)); | |
+} | |
+#endif /* ZEND_INTRIN_SSSE3_NATIVE || (ZEND_INTRIN_SSSE3_RESOLVER && !ZEND_INTRIN_AVX2_NATIVE) */ | |
+ | |
+#if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER | |
+# if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_SSSE3_NATIVE | |
+PHPAPI zend_string *php_base64_encode(const unsigned char *str, size_t length) | |
+# elif ZEND_INTRIN_AVX2_RESOLVER | |
+zend_string *php_base64_encode_avx2(const unsigned char *str, size_t length) | |
+# elif ZEND_INTRIN_SSSE3_RESOLVER | |
+zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length) | |
+# endif | |
+{ | |
+ const unsigned char *c = str; | |
+ unsigned char *o; | |
+ zend_string *result; | |
+ | |
+ result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0); | |
+ o = (unsigned char *)ZSTR_VAL(result); | |
+# if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER | |
+ if (length > 31) { | |
+ __m256i s = _mm256_loadu_si256((__m256i *)c); | |
+ | |
+ s = _mm256_permutevar8x32_epi32(s, _mm256_setr_epi32(0, 0, 1, 2, 3, 4, 5, 6)); | |
+ | |
+ for (;;) { | |
+ s = php_base64_encode_avx2_reshuffle(s); | |
+ | |
+ s = php_base64_encode_avx2_translate(s); | |
+ | |
+ _mm256_storeu_si256((__m256i *)o, s); | |
+ c += 24; | |
+ o += 32; | |
+ length -= 24; | |
+ if (length < 28) { | |
+ break; | |
+ } | |
+ s = _mm256_loadu_si256((__m256i *)(c - 4)); | |
+ } | |
+ } | |
+# else | |
+ while (length > 15) { | |
+ __m128i s = _mm_loadu_si128((__m128i *)c); | |
+ | |
+ s = php_base64_encode_ssse3_reshuffle(s); | |
+ | |
+ s = php_base64_encode_ssse3_translate(s); | |
+ | |
+ _mm_storeu_si128((__m128i *)o, s); | |
+ c += 12; | |
+ o += 16; | |
+ length -= 12; | |
+ } | |
+# endif | |
+ | |
+ o = php_base64_encode_impl(c, length, o); | |
+ | |
+ ZSTR_LEN(result) = (o - (unsigned char *)ZSTR_VAL(result)); | |
return result; | |
} | |
+ | |
+# if ZEND_INTRIN_SSSE3_RESOLVER && ZEND_INTRIN_AVX2_RESOLVER | |
+zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length) | |
+{ | |
+ const unsigned char *c = str; | |
+ unsigned char *o; | |
+ zend_string *result; | |
+ | |
+ result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0); | |
+ o = (unsigned char *)ZSTR_VAL(result); | |
+ while (length > 15) { | |
+ __m128i s = _mm_loadu_si128((__m128i *)c); | |
+ | |
+ s = php_base64_encode_ssse3_reshuffle(s); | |
+ | |
+ s = php_base64_encode_ssse3_translate(s); | |
+ | |
+ _mm_storeu_si128((__m128i *)o, s); | |
+ c += 12; | |
+ o += 16; | |
+ length -= 12; | |
+ } | |
+ | |
+ o = php_base64_encode_impl(c, length, o); | |
+ | |
+ ZSTR_LEN(result) = (o - (unsigned char *)ZSTR_VAL(result)); | |
+ | |
+ return result; | |
+} | |
+# endif | |
+#endif /* ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER */ | |
+ | |
/* }}} */ | |
-/* {{{ */ | |
+/* {{{ php_base64_decode_ex */ | |
/* generate reverse table (do not set index 0 to 64) | |
static unsigned short base64_reverse_table[256]; | |
#define rt base64_reverse_table | |
@@ -125,78 +517,300 @@ void php_base64_init(void) | |
efree(s); | |
} | |
*/ | |
-/* }}} */ | |
-PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, zend_bool strict) /* {{{ */ | |
+#if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER | |
+# if ZEND_INTRIN_AVX2_RESOLVER && defined(HAVE_FUNC_ATTRIBUTE_TARGET) | |
+static __m256i php_base64_decode_avx2_reshuffle(__m256i in) __attribute__((target("avx2"))); | |
+# endif | |
+ | |
+static __m256i php_base64_decode_avx2_reshuffle(__m256i in) | |
{ | |
- const unsigned char *current = str; | |
- int ch, i = 0, j = 0, padding = 0; | |
+ __m256i merge_ab_and_bc, out; | |
+ | |
+ merge_ab_and_bc = _mm256_maddubs_epi16(in, _mm256_set1_epi32(0x01400140)); | |
+ | |
+ out = _mm256_madd_epi16(merge_ab_and_bc, _mm256_set1_epi32(0x00011000)); | |
+ | |
+ out = _mm256_shuffle_epi8(out, _mm256_setr_epi8( | |
+ 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1, | |
+ 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1)); | |
+ | |
+ return _mm256_permutevar8x32_epi32(out, _mm256_setr_epi32(0, 1, 2, 4, 5, 6, -1, -1)); | |
+} | |
+#endif | |
+ | |
+#if ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER | |
+# if ZEND_INTRIN_SSSE3_RESOLVER && defined(HAVE_FUNC_ATTRIBUTE_TARGET) | |
+static __m128i php_base64_decode_ssse3_reshuffle(__m128i in) __attribute__((target("ssse3"))); | |
+# endif | |
+ | |
+static __m128i php_base64_decode_ssse3_reshuffle(__m128i in) | |
+{ | |
+ __m128i merge_ab_and_bc, out; | |
+ | |
+ merge_ab_and_bc = _mm_maddubs_epi16(in, _mm_set1_epi32(0x01400140)); | |
+ /* 0000kkkk LLllllll 0000JJJJ JJjjKKKK | |
+ * 0000hhhh IIiiiiii 0000GGGG GGggHHHH | |
+ * 0000eeee FFffffff 0000DDDD DDddEEEE | |
+ * 0000bbbb CCcccccc 0000AAAA AAaaBBBB */ | |
+ | |
+ out = _mm_madd_epi16(merge_ab_and_bc, _mm_set1_epi32(0x00011000)); | |
+ /* 00000000 JJJJJJjj KKKKkkkk LLllllll | |
+ * 00000000 GGGGGGgg HHHHhhhh IIiiiiii | |
+ * 00000000 DDDDDDdd EEEEeeee FFffffff | |
+ * 00000000 AAAAAAaa BBBBbbbb CCcccccc */ | |
+ | |
+ return _mm_shuffle_epi8(out, _mm_setr_epi8( | |
+ 2, 1, 0, | |
+ 6, 5, 4, | |
+ 10, 9, 8, | |
+ 14, 13, 12, | |
+ -1, -1, -1, -1)); | |
+ /* 00000000 00000000 00000000 00000000 | |
+ * LLllllll KKKKkkkk JJJJJJjj IIiiiiii | |
+ * HHHHhhhh GGGGGGgg FFffffff EEEEeeee | |
+ * DDDDDDdd CCcccccc BBBBbbbb AAAAAAaa */ | |
+} | |
+#endif | |
+ | |
+#if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER | |
+# if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_SSSE3_NATIVE | |
+PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, zend_bool strict) | |
+# elif ZEND_INTRIN_AVX2_RESOLVER | |
+zend_string *php_base64_decode_ex_avx2(const unsigned char *str, size_t length, zend_bool strict) | |
+# else | |
+zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, zend_bool strict) | |
+# endif | |
+{ | |
+ const unsigned char *c = str; | |
+ unsigned char *o; | |
+ size_t outl = 0; | |
zend_string *result; | |
result = zend_string_alloc(length, 0); | |
+ o = (unsigned char *)ZSTR_VAL(result); | |
- /* run through the whole string, converting as we go */ | |
- while (length-- > 0) { | |
- ch = *current++; | |
- if (ch == base64_pad) { | |
- padding++; | |
- continue; | |
- } | |
+ /* See: "Faster Base64 Encoding and Decoding using AVX2 Instructions" | |
+ * https://arxiv.org/pdf/1704.00605.pdf */ | |
+# if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER | |
+ while (length > 31 + 2) { | |
+ __m256i lut_lo, lut_hi, lut_roll; | |
+ __m256i hi_nibbles, lo_nibbles, hi, lo; | |
+ __m256i str = _mm256_loadu_si256((__m256i *)c); | |
- ch = base64_reverse_table[ch]; | |
- if (!strict) { | |
- /* skip unknown characters and whitespace */ | |
- if (ch < 0) { | |
- continue; | |
- } | |
+ lut_lo = _mm256_setr_epi8( | |
+ 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, | |
+ 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A, | |
+ 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, | |
+ 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A); | |
+ | |
+ lut_hi = _mm256_setr_epi8( | |
+ 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, | |
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, | |
+ 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, | |
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10); | |
+ | |
+ lut_roll = _mm256_setr_epi8( | |
+ 0, 16, 19, 4, -65, -65, -71, -71, | |
+ 0, 0, 0, 0, 0, 0, 0, 0, | |
+ 0, 16, 19, 4, -65, -65, -71, -71, | |
+ 0, 0, 0, 0, 0, 0, 0, 0); | |
+ | |
+ hi_nibbles = _mm256_and_si256(_mm256_srli_epi32(str, 4), _mm256_set1_epi8(0x2f)); | |
+ lo_nibbles = _mm256_and_si256(str, _mm256_set1_epi8(0x2f)); | |
+ hi = _mm256_shuffle_epi8(lut_hi, hi_nibbles); | |
+ lo = _mm256_shuffle_epi8(lut_lo, lo_nibbles); | |
+ | |
+ if (!_mm256_testz_si256(lo, hi)) { | |
+ break; | |
} else { | |
- /* skip whitespace */ | |
- if (ch == -1) { | |
- continue; | |
- } | |
- /* fail on bad characters or if any data follows padding */ | |
- if (ch == -2 || padding) { | |
- goto fail; | |
- } | |
+ __m256i eq_2f, roll; | |
+ eq_2f = _mm256_cmpeq_epi8(str, _mm256_set1_epi8(0x2f)); | |
+ roll = _mm256_shuffle_epi8(lut_roll, _mm256_add_epi8(eq_2f, hi_nibbles)); | |
+ | |
+ | |
+ str = _mm256_add_epi8(str, roll); | |
+ | |
+ str = php_base64_decode_avx2_reshuffle(str); | |
+ | |
+ _mm256_storeu_si256((__m256i *)o, str); | |
+ | |
+ c += 32; | |
+ o += 24; | |
+ outl += 24; | |
+ length -= 32; | |
} | |
+ } | |
+# else | |
+ while (length > 15 + 2) { | |
+ __m128i lut_lo, lut_hi, lut_roll; | |
+ __m128i hi_nibbles, lo_nibbles, hi, lo; | |
- switch(i % 4) { | |
- case 0: | |
- ZSTR_VAL(result)[j] = ch << 2; | |
- break; | |
- case 1: | |
- ZSTR_VAL(result)[j++] |= ch >> 4; | |
- ZSTR_VAL(result)[j] = (ch & 0x0f) << 4; | |
- break; | |
- case 2: | |
- ZSTR_VAL(result)[j++] |= ch >>2; | |
- ZSTR_VAL(result)[j] = (ch & 0x03) << 6; | |
- break; | |
- case 3: | |
- ZSTR_VAL(result)[j++] |= ch; | |
+ __m128i s = _mm_loadu_si128((__m128i *)c); | |
+ | |
+ lut_lo = _mm_setr_epi8( | |
+ 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, | |
+ 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A); | |
+ | |
+ lut_hi = _mm_setr_epi8( | |
+ 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, | |
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10); | |
+ | |
+ lut_roll = _mm_setr_epi8( | |
+ 0, 16, 19, 4, -65, -65, -71, -71, | |
+ 0, 0, 0, 0, 0, 0, 0, 0); | |
+ | |
+ hi_nibbles = _mm_and_si128(_mm_srli_epi32(s, 4), _mm_set1_epi8(0x2f)); | |
+ lo_nibbles = _mm_and_si128(s, _mm_set1_epi8(0x2f)); | |
+ hi = _mm_shuffle_epi8(lut_hi, hi_nibbles); | |
+ lo = _mm_shuffle_epi8(lut_lo, lo_nibbles); | |
+ | |
+ /* Check for invalid input: if any "and" values from lo and hi are not zero, | |
+ fall back on bytewise code to do error checking and reporting: */ | |
+ if (UNEXPECTED(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_and_si128(lo, hi), _mm_set1_epi8(0))) != 0)) { | |
break; | |
+ } else { | |
+ __m128i eq_2f, roll; | |
+ | |
+ eq_2f = _mm_cmpeq_epi8(s, _mm_set1_epi8(0x2f)); | |
+ roll = _mm_shuffle_epi8(lut_roll, _mm_add_epi8(eq_2f, hi_nibbles)); | |
+ | |
+ s = _mm_add_epi8(s, roll); | |
+ | |
+ s = php_base64_decode_ssse3_reshuffle(s); | |
+ | |
+ _mm_storeu_si128((__m128i *)o, s); | |
+ | |
+ c += 16; | |
+ o += 12; | |
+ outl += 12; | |
+ length -= 16; | |
} | |
- i++; | |
} | |
- /* fail if the input is truncated (only one char in last group) */ | |
- if (strict && i % 4 == 1) { | |
- goto fail; | |
+# endif | |
+ | |
+ if (!php_base64_decode_impl(c, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) { | |
+ zend_string_free(result); | |
+ return NULL; | |
} | |
- /* fail if the padding length is wrong (not VV==, VVV=), but accept zero padding | |
- * RFC 4648: "In some circumstances, the use of padding [--] is not required" */ | |
- if (strict && padding && (padding > 2 || (i + padding) % 4 != 0)) { | |
- goto fail; | |
+ | |
+ ZSTR_LEN(result) = outl; | |
+ | |
+ return result; | |
+} | |
+ | |
+# if ZEND_INTRIN_SSSE3_RESOLVER && ZEND_INTRIN_AVX2_RESOLVER | |
+zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, zend_bool strict) | |
+{ | |
+ const unsigned char *c = str; | |
+ unsigned char *o; | |
+ size_t outl = 0; | |
+ zend_string *result; | |
+ | |
+ result = zend_string_alloc(length, 0); | |
+ o = (unsigned char *)ZSTR_VAL(result); | |
+ | |
+ while (length > 15 + 2) { | |
+ __m128i lut_lo, lut_hi, lut_roll; | |
+ __m128i hi_nibbles, lo_nibbles, hi, lo; | |
+ | |
+ __m128i s = _mm_loadu_si128((__m128i *)c); | |
+ | |
+ lut_lo = _mm_setr_epi8( | |
+ 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, | |
+ 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A); | |
+ | |
+ lut_hi = _mm_setr_epi8( | |
+ 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, | |
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10); | |
+ | |
+ lut_roll = _mm_setr_epi8( | |
+ 0, 16, 19, 4, -65, -65, -71, -71, | |
+ 0, 0, 0, 0, 0, 0, 0, 0); | |
+ | |
+ hi_nibbles = _mm_and_si128(_mm_srli_epi32(s, 4), _mm_set1_epi8(0x2f)); | |
+ lo_nibbles = _mm_and_si128(s, _mm_set1_epi8(0x2f)); | |
+ hi = _mm_shuffle_epi8(lut_hi, hi_nibbles); | |
+ lo = _mm_shuffle_epi8(lut_lo, lo_nibbles); | |
+ | |
+ /* Check for invalid input: if any "and" values from lo and hi are not zero, | |
+ fall back on bytewise code to do error checking and reporting: */ | |
+ if (UNEXPECTED(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_and_si128(lo, hi), _mm_set1_epi8(0))) != 0)) { | |
+ break; | |
+ } else { | |
+ __m128i eq_2f, roll; | |
+ | |
+ eq_2f = _mm_cmpeq_epi8(s, _mm_set1_epi8(0x2f)); | |
+ roll = _mm_shuffle_epi8(lut_roll, _mm_add_epi8(eq_2f, hi_nibbles)); | |
+ | |
+ s = _mm_add_epi8(s, roll); | |
+ | |
+ s = php_base64_decode_ssse3_reshuffle(s); | |
+ | |
+ _mm_storeu_si128((__m128i *)o, s); | |
+ | |
+ c += 16; | |
+ o += 12; | |
+ outl += 12; | |
+ length -= 16; | |
+ } | |
+ } | |
+ | |
+ if (!php_base64_decode_impl(c, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) { | |
+ zend_string_free(result); | |
+ return NULL; | |
} | |
- ZSTR_LEN(result) = j; | |
- ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0'; | |
+ ZSTR_LEN(result) = outl; | |
return result; | |
+} | |
+# endif | |
+#endif /* ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER */ | |
-fail: | |
- zend_string_free(result); | |
- return NULL; | |
+#if !ZEND_INTRIN_AVX2_NATIVE && !ZEND_INTRIN_SSSE3_NATIVE | |
+#if ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_RESOLVER | |
+zend_string *php_base64_encode_default(const unsigned char *str, size_t length) | |
+#else | |
+PHPAPI zend_string *php_base64_encode(const unsigned char *str, size_t length) | |
+#endif | |
+{ | |
+ unsigned char *p; | |
+ zend_string *result; | |
+ | |
+ result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0); | |
+ p = (unsigned char *)ZSTR_VAL(result); | |
+ | |
+ p = php_base64_encode_impl(str, length, p); | |
+ | |
+ ZSTR_LEN(result) = (p - (unsigned char *)ZSTR_VAL(result)); | |
+ | |
+ return result; | |
+} | |
+#endif | |
+ | |
+#if !ZEND_INTRIN_AVX2_NATIVE && !ZEND_INTRIN_SSSE3_NATIVE | |
+#if ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_RESOLVER | |
+zend_string *php_base64_decode_ex_default(const unsigned char *str, size_t length, zend_bool strict) | |
+#else | |
+PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, zend_bool strict) | |
+#endif | |
+{ | |
+ zend_string *result; | |
+ size_t outl = 0; | |
+ | |
+ result = zend_string_alloc(length, 0); | |
+ | |
+ if (!php_base64_decode_impl(str, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) { | |
+ zend_string_free(result); | |
+ return NULL; | |
+ } | |
+ | |
+ ZSTR_LEN(result) = outl; | |
+ | |
+ return result; | |
} | |
+#endif | |
/* }}} */ | |
/* {{{ proto string base64_encode(string str) | |
diff --git a/ext/standard/base64.h b/ext/standard/base64.h | |
index f380d3c..c9fe26e 100644 | |
--- a/ext/standard/base64.h | |
+++ b/ext/standard/base64.h | |
@@ -21,16 +21,56 @@ | |
#ifndef BASE64_H | |
#define BASE64_H | |
+/* | |
+ * SSSE3 and AVX2 implementation are based on https://github.com/aklomp/base64 | |
+ * which is copyrighted to: | |
+ * | |
+ * Copyright (c) 2005-2007, Nick Galbreath | |
+ * Copyright (c) 2013-2017, Alfred Klomp | |
+ * Copyright (c) 2015-2017, Wojciech Mula | |
+ * Copyright (c) 2016-2017, Matthieu Darbois | |
+ * All rights reserved. | |
+ * | |
+ * Redistribution and use in source and binary forms, with or without | |
+ * modification, are permitted provided that the following conditions are | |
+ * met: | |
+ * | |
+ * - Redistributions of source code must retain the above copyright notice, | |
+ * this list of conditions and the following disclaimer. | |
+ * | |
+ * - Redistributions in binary form must reproduce the above copyright | |
+ * notice, this list of conditions and the following disclaimer in the | |
+ * documentation and/or other materials provided with the distribution. | |
+ * | |
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS | |
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | |
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A | |
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED | |
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | |
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
+ */ | |
+ | |
PHP_FUNCTION(base64_decode); | |
PHP_FUNCTION(base64_encode); | |
+#if (ZEND_INTRIN_AVX2_FUNC_PTR || ZEND_INTRIN_SSSE3_FUNC_PTR) && !ZEND_INTRIN_AVX2_NATIVE | |
+PHP_MINIT_FUNCTION(base64_intrin); | |
+PHPAPI extern zend_string *(*php_base64_encode)(const unsigned char *, size_t); | |
+PHPAPI extern zend_string *(*php_base64_decode_ex)(const unsigned char *, size_t, zend_bool); | |
+#else | |
PHPAPI extern zend_string *php_base64_encode(const unsigned char *, size_t); | |
+PHPAPI extern zend_string *php_base64_decode_ex(const unsigned char *, size_t, zend_bool); | |
+#endif | |
+ | |
static inline zend_string *php_base64_encode_str(const zend_string *str) { | |
return php_base64_encode((const unsigned char*)(ZSTR_VAL(str)), ZSTR_LEN(str)); | |
} | |
-PHPAPI extern zend_string *php_base64_decode_ex(const unsigned char *, size_t, zend_bool); | |
- | |
static inline zend_string *php_base64_decode(const unsigned char *str, size_t len) { | |
return php_base64_decode_ex(str, len, 0); | |
} | |
diff --git a/ext/standard/basic_functions.c b/ext/standard/basic_functions.c | |
index b322caa..4404ad1 100644 | |
--- a/ext/standard/basic_functions.c | |
+++ b/ext/standard/basic_functions.c | |
@@ -3692,6 +3692,10 @@ PHP_MINIT_FUNCTION(basic) /* {{{ */ | |
BASIC_MINIT_SUBMODULE(string_intrin) | |
#endif | |
+#if ZEND_INTRIN_AVX2_FUNC_PTR || ZEND_INTRIN_SSSE3_FUNC_PTR | |
+ BASIC_MINIT_SUBMODULE(base64_intrin) | |
+#endif | |
+ | |
BASIC_MINIT_SUBMODULE(crypt) | |
BASIC_MINIT_SUBMODULE(lcg) | |
diff --git a/ext/standard/string.c b/ext/standard/string.c | |
index 73bbb24..0023c4b 100644 | |
--- a/ext/standard/string.c | |
+++ b/ext/standard/string.c | |
@@ -3873,7 +3873,7 @@ zend_string *php_addslashes_default(zend_string *str, int should_free); | |
PHPAPI zend_string *php_addslashes(zend_string *str, int should_free) __attribute__((ifunc("resolve_addslashes"))); | |
static void *resolve_addslashes() { | |
- if (zend_cpu_support_sse42()) { | |
+ if (zend_cpu_supports_sse42()) { | |
return php_addslashes_sse42; | |
} | |
return php_addslashes_default; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment