Last active
February 10, 2018 02:00
-
-
Save laruence/bba84da5e1a837bae666cc6f08cae5e9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/Zend/zend_portability.h b/Zend/zend_portability.h | |
index bd3e23e..5b91e5a 100644 | |
--- a/Zend/zend_portability.h | |
+++ b/Zend/zend_portability.h | |
@@ -520,6 +520,30 @@ static zend_always_inline double _zend_get_nan(void) /* {{{ */ | |
# define ZEND_INTRIN_HAVE_IFUNC_TARGET 1 | |
#endif | |
+#ifdef __SSSE3__ | |
+/* Instructions compiled directly. */ | |
+# define ZEND_INTRIN_SSSE3_NATIVE 1 | |
+#elif (defined(__i386__) || defined(__x86_64__)) && defined(HAVE_TMMINTRIN_H) || defined(ZEND_WIN32) | |
+/* Function resolved by ifunc or MINIT. */ | |
+# define ZEND_INTRIN_SSSE3_RESOLVER 1 | |
+#endif | |
+ | |
+#if ZEND_INTRIN_HAVE_IFUNC_TARGET && (ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER) | |
+# define ZEND_INTRIN_SSSE3_FUNC_PROTO 1 | |
+#elif ZEND_INTRIN_SSSE3_RESOLVER | |
+# define ZEND_INTRIN_SSSE3_FUNC_PTR 1 | |
+#endif | |
+ | |
+#if ZEND_INTRIN_SSSE3_RESOLVER | |
+# if defined(HAVE_FUNC_ATTRIBUTE_TARGET) | |
+# define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func __attribute__((target("ssse3"))) | |
+# else | |
+# define ZEND_INTRIN_SSSE3_FUNC_DECL(func) func | |
+# endif | |
+#else | |
+# define ZEND_INTRIN_SSSE3_FUNC_DECL(func) | |
+#endif | |
+ | |
#ifdef __SSE4_2__ | |
/* Instructions compiled directly. */ | |
# define ZEND_INTRIN_SSE4_2_NATIVE 1 | |
diff --git a/configure.ac b/configure.ac | |
index 7c0d007..7f14d6b 100644 | |
--- a/configure.ac | |
+++ b/configure.ac | |
@@ -496,6 +496,7 @@ sys/utsname.h \ | |
sys/ipc.h \ | |
dlfcn.h \ | |
assert.h \ | |
+tmmintrin.h \ | |
nmmintrin.h | |
],[],[],[ | |
#ifdef HAVE_SYS_PARAM_H | |
diff --git a/ext/standard/base64.c b/ext/standard/base64.c | |
index 06856b8..565a40b 100644 | |
--- a/ext/standard/base64.c | |
+++ b/ext/standard/base64.c | |
@@ -22,6 +22,39 @@ | |
#include "php.h" | |
#include "base64.h" | |
+/* This file integrates several modified parts from https://github.com/aklomp/base64 | |
+ * which is copyrighted to: | |
+ * | |
+ * Copyright (c) 2005-2007, Nick Galbreath | |
+ * Copyright (c) 2013-2017, Alfred Klomp | |
+ * Copyright (c) 2015-2017, Wojciech Mula | |
+ * Copyright (c) 2016-2017, Matthieu Darbois | |
+ * All rights reserved. | |
+ * | |
+ * Redistribution and use in source and binary forms, with or without | |
+ * modification, are permitted provided that the following conditions are | |
+ * met: | |
+ * | |
+ * - Redistributions of source code must retain the above copyright notice, | |
+ * this list of conditions and the following disclaimer. | |
+ * | |
+ * - Redistributions in binary form must reproduce the above copyright | |
+ * notice, this list of conditions and the following disclaimer in the | |
+ * documentation and/or other materials provided with the distribution. | |
+ * | |
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS | |
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | |
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A | |
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED | |
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | |
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
+ */ | |
+ | |
/* {{{ base64 tables */ | |
static const char base64_table[] = { | |
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', | |
@@ -53,47 +86,263 @@ static const short base64_reverse_table[256] = { | |
}; | |
/* }}} */ | |
-PHPAPI zend_string *php_base64_encode(const unsigned char *str, size_t length) /* {{{ */ | |
+static zend_always_inline unsigned char *php_base64_encode_impl(const unsigned char *in, size_t inl, unsigned char *out) /* {{{ */ | |
{ | |
- const unsigned char *current = str; | |
- unsigned char *p; | |
- zend_string *result; | |
- | |
- result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0); | |
- p = (unsigned char *)ZSTR_VAL(result); | |
- while (length > 2) { /* keep going until we have less than 24 bits */ | |
- *p++ = base64_table[current[0] >> 2]; | |
- *p++ = base64_table[((current[0] & 0x03) << 4) + (current[1] >> 4)]; | |
- *p++ = base64_table[((current[1] & 0x0f) << 2) + (current[2] >> 6)]; | |
- *p++ = base64_table[current[2] & 0x3f]; | |
+ while (inl > 2) { /* keep going until we have less than 24 bits */ | |
+ *out++ = base64_table[in[0] >> 2]; | |
+ *out++ = base64_table[((in[0] & 0x03) << 4) + (in[1] >> 4)]; | |
+ *out++ = base64_table[((in[1] & 0x0f) << 2) + (in[2] >> 6)]; | |
+ *out++ = base64_table[in[2] & 0x3f]; | |
- current += 3; | |
- length -= 3; /* we just handle 3 octets of data */ | |
+ in += 3; | |
+ inl -= 3; /* we just handle 3 octets of data */ | |
} | |
/* now deal with the tail end of things */ | |
- if (length != 0) { | |
- *p++ = base64_table[current[0] >> 2]; | |
- if (length > 1) { | |
- *p++ = base64_table[((current[0] & 0x03) << 4) + (current[1] >> 4)]; | |
- *p++ = base64_table[(current[1] & 0x0f) << 2]; | |
- *p++ = base64_pad; | |
+ if (inl != 0) { | |
+ *out++ = base64_table[in[0] >> 2]; | |
+ if (inl > 1) { | |
+ *out++ = base64_table[((in[0] & 0x03) << 4) + (in[1] >> 4)]; | |
+ *out++ = base64_table[(in[1] & 0x0f) << 2]; | |
+ *out++ = base64_pad; | |
} else { | |
- *p++ = base64_table[(current[0] & 0x03) << 4]; | |
- *p++ = base64_pad; | |
- *p++ = base64_pad; | |
+ *out++ = base64_table[(in[0] & 0x03) << 4]; | |
+ *out++ = base64_pad; | |
+ *out++ = base64_pad; | |
} | |
} | |
- *p = '\0'; | |
- ZSTR_LEN(result) = (p - (unsigned char *)ZSTR_VAL(result)); | |
+ *out = '\0'; | |
+ | |
+ return out; | |
+} | |
+/* }}} */ | |
+ | |
+static zend_always_inline int php_base64_decode_impl(const unsigned char *in, size_t inl, unsigned char *out, size_t *outl, zend_bool strict) /* {{{ */ | |
+{ | |
+ int ch, i = 0, padding = 0, j = *outl; | |
+ | |
+ /* run through the whole string, converting as we go */ | |
+ while (inl-- > 0) { | |
+ ch = *in++; | |
+ if (ch == base64_pad) { | |
+ padding++; | |
+ continue; | |
+ } | |
+ | |
+ ch = base64_reverse_table[ch]; | |
+ if (!strict) { | |
+ /* skip unknown characters and whitespace */ | |
+ if (ch < 0) { | |
+ continue; | |
+ } | |
+ } else { | |
+ /* skip whitespace */ | |
+ if (ch == -1) { | |
+ continue; | |
+ } | |
+ /* fail on bad characters or if any data follows padding */ | |
+ if (ch == -2 || padding) { | |
+ goto fail; | |
+ } | |
+ } | |
+ | |
+ switch (i % 4) { | |
+ case 0: | |
+ out[j] = ch << 2; | |
+ break; | |
+ case 1: | |
+ out[j++] |= ch >> 4; | |
+ out[j] = (ch & 0x0f) << 4; | |
+ break; | |
+ case 2: | |
+ out[j++] |= ch >>2; | |
+ out[j] = (ch & 0x03) << 6; | |
+ break; | |
+ case 3: | |
+ out[j++] |= ch; | |
+ break; | |
+ } | |
+ i++; | |
+ } | |
+ | |
+ /* fail if the input is truncated (only one char in last group) */ | |
+ if (strict && i % 4 == 1) { | |
+ goto fail; | |
+ } | |
+ | |
+ /* fail if the padding length is wrong (not VV==, VVV=), but accept zero padding | |
+ * RFC 4648: "In some circumstances, the use of padding [--] is not required" */ | |
+ if (strict && padding && (padding > 2 || (i + padding) % 4 != 0)) { | |
+ goto fail; | |
+ } | |
+ | |
+ *outl = j; | |
+ out[j] = '\0'; | |
+ | |
+ return 1; | |
+ | |
+fail: | |
+ return 0; | |
+} | |
+/* }}} */ | |
+ | |
+/* {{{ php_base64_encode */ | |
+ | |
+#if ZEND_INTRIN_SSSE3_NATIVE | |
+# include <tmmintrin.h> | |
+#elif ZEND_INTRIN_SSSE3_RESOLVER | |
+# include <tmmintrin.h> | |
+# include "Zend/zend_cpuinfo.h" | |
+ | |
+ZEND_INTRIN_SSSE3_FUNC_DECL(zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length)); | |
+ZEND_INTRIN_SSSE3_FUNC_DECL(zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, zend_bool strict)); | |
+ | |
+zend_string *php_base64_encode_default(const unsigned char *str, size_t length); | |
+zend_string *php_base64_decode_ex_default(const unsigned char *str, size_t length, zend_bool strict); | |
+ | |
+# if ZEND_INTRIN_SSSE3_FUNC_PROTO | |
+PHPAPI zend_string *php_base64_encode(const unsigned char *str, size_t length) __attribute__((ifunc("resolve_base64_encode"))); | |
+PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, zend_bool strict) __attribute__((ifunc("resolve_base64_decode"))); | |
+ | |
+static void *resolve_base64_encode() { | |
+ if (zend_cpu_supports(ZEND_CPU_FEATURE_SSSE3)) { | |
+ return php_base64_encode_ssse3; | |
+ } | |
+ return php_base64_encode_default; | |
+} | |
+ | |
+static void *resolve_base64_decode() { | |
+ if (zend_cpu_supports(ZEND_CPU_FEATURE_SSSE3)) { | |
+ return php_base64_decode_ex_ssse3; | |
+ } | |
+ return php_base64_decode_ex_default; | |
+} | |
+# else /* ZEND_INTRIN_SSSE3_FUNC_PROTO */ | |
+ | |
+PHPAPI zend_string *(*php_base64_encode)(const unsigned char *str, size_t length) = NULL; | |
+PHPAPI zend_string *(*php_base64_decode_ex)(const unsigned char *str, size_t length, zend_bool strict) = NULL; | |
+ | |
+PHP_MINIT_FUNCTION(base64_intrin) | |
+{ | |
+ if (zend_cpu_supports(ZEND_CPU_FEATURE_SSSE3)) { | |
+ php_base64_encode = php_base64_encode_ssse3; | |
+ php_base64_decode_ex = php_base64_decode_ex_ssse3; | |
+ } else { | |
+ php_base64_encode = php_base64_encode_default; | |
+ php_base64_decode_ex = php_base64_decode_ex_default; | |
+ } | |
+ return SUCCESS; | |
+} | |
+ | |
+# endif /* ZEND_INTRIN_SSSE3_FUNC_PROTO */ | |
+#endif /* ZEND_INTRIN_SSSE3_NATIVE */ | |
+ | |
+#if ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER | |
+ | |
+#if ZEND_INTRIN_SSSE3_RESOLVER | |
+static __m128i php_base64_encode_reshuffle(__m128i in) __attribute__((target("ssse3"))); | |
+static __m128i php_base64_encode_translate(__m128i in) __attribute__((target("ssse3"))); | |
+#endif | |
+ | |
+static __m128i php_base64_encode_reshuffle(__m128i in) | |
+{ | |
+ __m128i t0, t1, t2, t3; | |
+ | |
+ /* input, bytes MSB to LSB: | |
+ * 0 0 0 0 l k j i h g f e d c b a */ | |
+ in = _mm_shuffle_epi8(in, _mm_set_epi8( | |
+ 10, 11, 9, 10, | |
+ 7, 8, 6, 7, | |
+ 4, 5, 3, 4, | |
+ 1, 2, 0, 1)); | |
+ | |
+ t0 = _mm_and_si128(in, _mm_set1_epi32(0x0fc0fc00)); | |
+ | |
+ t1 = _mm_mulhi_epu16(t0, _mm_set1_epi32(0x04000040)); | |
+ | |
+ t2 = _mm_and_si128(in, _mm_set1_epi32(0x003f03f0)); | |
+ | |
+ t3 = _mm_mullo_epi16(t2, _mm_set1_epi32(0x01000010)); | |
+ | |
+ /* output (upper case are MSB, lower case are LSB): | |
+ * 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ | |
+ * 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG | |
+ * 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD | |
+ * 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA */ | |
+ return _mm_or_si128(t1, t3); | |
+} | |
+ | |
+static __m128i php_base64_encode_translate(__m128i in) | |
+{ | |
+ __m128i mask, indices; | |
+ __m128i lut = _mm_setr_epi8( | |
+ 65, 71, -4, -4, | |
+ -4, -4, -4, -4, | |
+ -4, -4, -4, -4, | |
+ -19, -16, 0, 0 | |
+ ); | |
+ | |
+ /* Translate values 0..63 to the Base64 alphabet. There are five sets: | |
+ * # From To Abs Index Characters | |
+ * 0 [0..25] [65..90] +65 0 ABCDEFGHIJKLMNOPQRSTUVWXYZ | |
+ * 1 [26..51] [97..122] +71 1 abcdefghijklmnopqrstuvwxyz | |
+ * 2 [52..61] [48..57] -4 [2..11] 0123456789 | |
+ * 3 [62] [43] -19 12 + | |
+ * 4 [63] [47] -16 13 / */ | |
+ | |
+ /* Create LUT indices from input: | |
+ * the index for range #0 is right, others are 1 less than expected: */ | |
+ indices = _mm_subs_epu8(in, _mm_set1_epi8(51)); | |
+ | |
+ /* mask is 0xFF (-1) for range #[1..4] and 0x00 for range #0: */ | |
+ mask = _mm_cmpgt_epi8(in, _mm_set1_epi8(25)); | |
+ | |
+ /* substract -1, so add 1 to indices for range #[1..4], All indices are now correct: */ | |
+ indices = _mm_sub_epi8(indices, mask); | |
+ | |
+ /* Add offsets to input values: */ | |
+ return _mm_add_epi8(in, _mm_shuffle_epi8(lut, indices)); | |
+} | |
+ | |
+# if ZEND_INTRIN_SSSE3_NATIVE | |
+PHPAPI zend_string *php_base64_encode(const unsigned char *str, size_t length) | |
+# else | |
+zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length) | |
+# endif | |
+{ | |
+ const unsigned char *c = str; | |
+ unsigned char *o; | |
+ zend_string *result; | |
+ | |
+ result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0); | |
+ o = (unsigned char *)ZSTR_VAL(result); | |
+ | |
+ while (length > 15) { | |
+ __m128i s = _mm_loadu_si128((__m128i *)c); | |
+ | |
+ s = php_base64_encode_reshuffle(s); | |
+ | |
+ s = php_base64_encode_translate(s); | |
+ | |
+ _mm_storeu_si128((__m128i *)o, s); | |
+ c += 12; | |
+ o += 16; | |
+ length -= 12; | |
+ } | |
+ | |
+ o = php_base64_encode_impl(c, length, o); | |
+ | |
+ ZSTR_LEN(result) = (o - (unsigned char *)ZSTR_VAL(result)); | |
return result; | |
} | |
+#endif | |
+ | |
/* }}} */ | |
-/* {{{ */ | |
+/* {{{ php_base64_decode_ex */ | |
/* generate reverse table (do not set index 0 to 64) | |
static unsigned short base64_reverse_table[256]; | |
#define rt base64_reverse_table | |
@@ -125,78 +374,157 @@ void php_base64_init(void) | |
efree(s); | |
} | |
*/ | |
-/* }}} */ | |
-PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, zend_bool strict) /* {{{ */ | |
+#if ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER | |
+ | |
+#if ZEND_INTRIN_SSSE3_RESOLVER | |
+static __m128i php_base64_decode_reshuffle(__m128i in) __attribute__((target("ssse3"))); | |
+#endif | |
+ | |
+static __m128i php_base64_decode_reshuffle(__m128i in) | |
+{ | |
+ __m128i merge_ab_and_bc, out; | |
+ | |
+ merge_ab_and_bc = _mm_maddubs_epi16(in, _mm_set1_epi32(0x01400140)); | |
+ /* 0000kkkk LLllllll 0000JJJJ JJjjKKKK | |
+ * 0000hhhh IIiiiiii 0000GGGG GGggHHHH | |
+ * 0000eeee FFffffff 0000DDDD DDddEEEE | |
+ * 0000bbbb CCcccccc 0000AAAA AAaaBBBB */ | |
+ | |
+ out = _mm_madd_epi16(merge_ab_and_bc, _mm_set1_epi32(0x00011000)); | |
+ /* 00000000 JJJJJJjj KKKKkkkk LLllllll | |
+ * 00000000 GGGGGGgg HHHHhhhh IIiiiiii | |
+ * 00000000 DDDDDDdd EEEEeeee FFffffff | |
+ * 00000000 AAAAAAaa BBBBbbbb CCcccccc */ | |
+ | |
+ return _mm_shuffle_epi8(out, _mm_setr_epi8( | |
+ 2, 1, 0, | |
+ 6, 5, 4, | |
+ 10, 9, 8, | |
+ 14, 13, 12, | |
+ -1, -1, -1, -1)); | |
+ /* 00000000 00000000 00000000 00000000 | |
+ * LLllllll KKKKkkkk JJJJJJjj IIiiiiii | |
+ * HHHHhhhh GGGGGGgg FFffffff EEEEeeee | |
+ * DDDDDDdd CCcccccc BBBBbbbb AAAAAAaa */ | |
+} | |
+ | |
+#if ZEND_INTRIN_SSSE3_NATIVE | |
+PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, zend_bool strict) | |
+#else | |
+zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, zend_bool strict) | |
+#endif | |
{ | |
- const unsigned char *current = str; | |
- int ch, i = 0, j = 0, padding = 0; | |
+ const unsigned char *c = str; | |
+ unsigned char *o; | |
+ size_t outl = 0; | |
zend_string *result; | |
result = zend_string_alloc(length, 0); | |
+ o = (unsigned char *)ZSTR_VAL(result); | |
- /* run through the whole string, converting as we go */ | |
- while (length-- > 0) { | |
- ch = *current++; | |
- if (ch == base64_pad) { | |
- padding++; | |
- continue; | |
- } | |
+ while (length > 15 + 2) { | |
+ __m128i lut_lo, lut_hi, lut_roll; | |
+ __m128i hi_nibbles, lo_nibbles, hi, lo; | |
- ch = base64_reverse_table[ch]; | |
- if (!strict) { | |
- /* skip unknown characters and whitespace */ | |
- if (ch < 0) { | |
- continue; | |
- } | |
- } else { | |
- /* skip whitespace */ | |
- if (ch == -1) { | |
- continue; | |
- } | |
- /* fail on bad characters or if any data follows padding */ | |
- if (ch == -2 || padding) { | |
- goto fail; | |
- } | |
- } | |
+ __m128i s = _mm_loadu_si128((__m128i *)c); | |
- switch(i % 4) { | |
- case 0: | |
- ZSTR_VAL(result)[j] = ch << 2; | |
- break; | |
- case 1: | |
- ZSTR_VAL(result)[j++] |= ch >> 4; | |
- ZSTR_VAL(result)[j] = (ch & 0x0f) << 4; | |
- break; | |
- case 2: | |
- ZSTR_VAL(result)[j++] |= ch >>2; | |
- ZSTR_VAL(result)[j] = (ch & 0x03) << 6; | |
- break; | |
- case 3: | |
- ZSTR_VAL(result)[j++] |= ch; | |
+ /* See: "Faster Base64 Encoding and Decoding using AVX2 Instructions" | |
+ * https://arxiv.org/pdf/1704.00605.pdf */ | |
+ lut_lo = _mm_setr_epi8( | |
+ 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, | |
+ 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A); | |
+ | |
+ lut_hi = _mm_setr_epi8( | |
+ 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, | |
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10); | |
+ | |
+ lut_roll = _mm_setr_epi8( | |
+ 0, 16, 19, 4, -65, -65, -71, -71, | |
+ 0, 0, 0, 0, 0, 0, 0, 0); | |
+ | |
+ hi_nibbles = _mm_and_si128(_mm_srli_epi32(s, 4), _mm_set1_epi8(0x2f)); | |
+ lo_nibbles = _mm_and_si128(s, _mm_set1_epi8(0x2f)); | |
+ hi = _mm_shuffle_epi8(lut_hi, hi_nibbles); | |
+ lo = _mm_shuffle_epi8(lut_lo, lo_nibbles); | |
+ | |
+ /* Check for invalid input: if any "and" values from lo and hi are not zero, | |
+ fall back on bytewise code to do error checking and reporting: */ | |
+ if (UNEXPECTED(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_and_si128(lo, hi), _mm_set1_epi8(0))) != 0)) { | |
break; | |
+ } else { | |
+ __m128i eq_2f, roll; | |
+ | |
+ eq_2f = _mm_cmpeq_epi8(s, _mm_set1_epi8(0x2f)); | |
+ roll = _mm_shuffle_epi8(lut_roll, _mm_add_epi8(eq_2f, hi_nibbles)); | |
+ | |
+ s = _mm_add_epi8(s, roll); | |
+ | |
+ s = php_base64_decode_reshuffle(s); | |
+ | |
+ _mm_storeu_si128((__m128i *)o, s); | |
+ | |
+ c += 16; | |
+ o += 12; | |
+ outl += 12; | |
+ length -= 16; | |
} | |
- i++; | |
- } | |
- /* fail if the input is truncated (only one char in last group) */ | |
- if (strict && i % 4 == 1) { | |
- goto fail; | |
} | |
- /* fail if the padding length is wrong (not VV==, VVV=), but accept zero padding | |
- * RFC 4648: "In some circumstances, the use of padding [--] is not required" */ | |
- if (strict && padding && (padding > 2 || (i + padding) % 4 != 0)) { | |
- goto fail; | |
+ | |
+ if (!php_base64_decode_impl(c, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) { | |
+ zend_string_free(result); | |
+ return NULL; | |
} | |
- ZSTR_LEN(result) = j; | |
- ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0'; | |
+ ZSTR_LEN(result) = outl; | |
+ | |
+ return result; | |
+} | |
+#endif | |
+ | |
+#if !ZEND_INTRIN_SSSE3_NATIVE | |
+#if ZEND_INTRIN_SSSE3_RESOLVER | |
+zend_string *php_base64_encode_default(const unsigned char *str, size_t length) | |
+#else | |
+PHPAPI zend_string *php_base64_encode(const unsigned char *str, size_t length) | |
+#endif | |
+{ | |
+ unsigned char *p; | |
+ zend_string *result; | |
+ | |
+ result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0); | |
+ p = (unsigned char *)ZSTR_VAL(result); | |
+ | |
+ p = php_base64_encode_impl(str, length, p); | |
+ | |
+ ZSTR_LEN(result) = (p - (unsigned char *)ZSTR_VAL(result)); | |
return result; | |
+} | |
+#endif | |
-fail: | |
- zend_string_free(result); | |
- return NULL; | |
+#if !ZEND_INTRIN_SSSE3_NATIVE | |
+#if ZEND_INTRIN_SSSE3_RESOLVER | |
+zend_string *php_base64_decode_ex_default(const unsigned char *str, size_t length, zend_bool strict) | |
+#else | |
+PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, zend_bool strict) | |
+#endif | |
+{ | |
+ zend_string *result; | |
+ size_t outl = 0; | |
+ | |
+ result = zend_string_alloc(length, 0); | |
+ | |
+ if (!php_base64_decode_impl(str, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) { | |
+ zend_string_free(result); | |
+ return NULL; | |
+ } | |
+ | |
+ ZSTR_LEN(result) = outl; | |
+ | |
+ return result; | |
} | |
+#endif | |
/* }}} */ | |
/* {{{ proto string base64_encode(string str) | |
diff --git a/ext/standard/base64.h b/ext/standard/base64.h | |
index f380d3c..2f9a8a0 100644 | |
--- a/ext/standard/base64.h | |
+++ b/ext/standard/base64.h | |
@@ -24,7 +24,16 @@ | |
PHP_FUNCTION(base64_decode); | |
PHP_FUNCTION(base64_encode); | |
+#if ZEND_INTRIN_SSSE3_FUNC_PTR | |
+PHP_MINIT_FUNCTION(base64_intrin); | |
+#endif | |
+ | |
+#if ZEND_INTRIN_SSSE3_FUNC_PTR | |
+PHPAPI extern zend_string *(*php_base64_encode*)(const unsigned char *, size_t); | |
+#else | |
PHPAPI extern zend_string *php_base64_encode(const unsigned char *, size_t); | |
+#endif | |
+ | |
static inline zend_string *php_base64_encode_str(const zend_string *str) { | |
return php_base64_encode((const unsigned char*)(ZSTR_VAL(str)), ZSTR_LEN(str)); | |
} | |
diff --git a/ext/standard/basic_functions.c b/ext/standard/basic_functions.c | |
index b322caa..0be3eda 100644 | |
--- a/ext/standard/basic_functions.c | |
+++ b/ext/standard/basic_functions.c | |
@@ -3692,6 +3692,10 @@ PHP_MINIT_FUNCTION(basic) /* {{{ */ | |
BASIC_MINIT_SUBMODULE(string_intrin) | |
#endif | |
+#if ZEND_INTRIN_SSSE3_FUNC_PTR | |
+ BASIC_MINIT_SUBMODULE(base64_intrin) | |
+#endif | |
+ | |
BASIC_MINIT_SUBMODULE(crypt) | |
BASIC_MINIT_SUBMODULE(lcg) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment