Created
February 14, 2018 01:09
-
-
Save laruence/ab7245d3160bebe1385d7bc78055c8ca to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/ext/opcache/ZendAccelerator.c b/ext/opcache/ZendAccelerator.c | |
index 1c870e9..d9d2cdb 100644 | |
--- a/ext/opcache/ZendAccelerator.c | |
+++ b/ext/opcache/ZendAccelerator.c | |
@@ -1358,7 +1358,7 @@ static zend_persistent_script *store_script_in_file_cache(zend_persistent_script | |
memory_used = zend_accel_script_persist_calc(new_persistent_script, NULL, 0, 0); | |
/* Allocate memory block */ | |
-#ifdef __SSE2__ | |
+#if defined(__AVX__) || defined(__SSE2__) | |
/* Align to 64-byte boundary */ | |
ZCG(mem) = zend_arena_alloc(&CG(arena), memory_used + 64); | |
ZCG(mem) = (void*)(((zend_uintptr_t)ZCG(mem) + 63L) & ~63L); | |
@@ -1465,7 +1465,7 @@ static zend_persistent_script *cache_script_in_shared_memory(zend_persistent_scr | |
memory_used = zend_accel_script_persist_calc(new_persistent_script, key, key_length, 1); | |
/* Allocate shared memory */ | |
-#ifdef __SSE2__ | |
+#if defined(__AVX__) || defined(__SSE2__) | |
/* Align to 64-byte boundary */ | |
ZCG(mem) = zend_shared_alloc(memory_used + 64); | |
if (ZCG(mem)) { | |
diff --git a/ext/opcache/zend_accelerator_util_funcs.c b/ext/opcache/zend_accelerator_util_funcs.c | |
index bd46a86..bed05e7 100644 | |
--- a/ext/opcache/zend_accelerator_util_funcs.c | |
+++ b/ext/opcache/zend_accelerator_util_funcs.c | |
@@ -596,8 +596,74 @@ static void zend_accel_class_hash_copy(HashTable *target, HashTable *source, uni | |
return; | |
} | |
-#ifdef __SSE2__ | |
-# include <mmintrin.h> | |
+#if defined(__AVX__) | |
+# include <nmmintrin.h> | |
+# if defined(__GNUC__) && defined(__i386__) | |
+static zend_always_inline void fast_memcpy(void *dest, const void *src, size_t size) | |
+{ | |
+ size_t delta = (char*)dest - (char*)src; | |
+ | |
+ __asm__ volatile ( | |
+ ".align 16\n\t" | |
+ ".LL0%=:\n\t" | |
+ "prefetchnta 0x40(%1)\n\t" | |
+ "vmovaps (%1), %%ymm0\n\t" | |
+ "vmovaps 0x20(%1), %%ymm1\n\t" | |
+ "vmovaps %%ymm0, (%1,%2)\n\t" | |
+ "vmovaps %%ymm1, 0x20(%1,%2)\n\t" | |
+ "addl $0x40, %1\n\t" | |
+ "subl $0x40, %0\n\t" | |
+ "ja .LL0%=" | |
+ : "+r"(size), | |
+ "+r"(src) | |
+ : "r"(delta) | |
+ : "cc", "memory", "%ymm0", "%ymm1"); | |
+} | |
+# elif defined(__GNUC__) && defined(__x86_64__) | |
+static zend_always_inline void fast_memcpy(void *dest, const void *src, size_t size) | |
+{ | |
+ size_t delta = (char*)dest - (char*)src; | |
+ | |
+ __asm__ volatile ( | |
+ ".align 16\n\t" | |
+ ".LL0%=:\n\t" | |
+ "prefetchnta 0x40(%1)\n\t" | |
+ "vmovaps (%1), %%ymm0\n\t" | |
+ "vmovaps 0x20(%1), %%ymm1\n\t" | |
+ "vmovaps %%ymm0, (%1,%2)\n\t" | |
+ "vmovaps %%ymm1, 0x20(%1,%2)\n\t" | |
+ "addq $0x40, %1\n\t" | |
+ "subq $0x40, %0\n\t" | |
+ "ja .LL0%=" | |
+ : "+r"(size), | |
+ "+r"(src) | |
+ : "r"(delta) | |
+ : "cc", "memory", "%ymm0", "%ymm1"); | |
+} | |
+# else | |
+static zend_always_inline void fast_memcpy(void *dest, const void *src, size_t size) | |
+{ | |
+ __m256i *dqdest = (__m256i*)dest; | |
+ const __m256i *dqsrc = (const __m256i*)src; | |
+ const __m256i *end = (const __m256i*)((const char*)src + size); | |
+ | |
+ do { | |
+#ifdef PHP_WIN32 | |
+ _mm_prefetch((const char *)(dqsrc + 2), _MM_HINT_NTA); | |
+#else | |
+ _mm_prefetch(dqsrc + 2, _MM_HINT_NTA); | |
+#endif | |
+ | |
+ __m256i ymm0 = _mm256_load_ps(dqsrc + 0); | |
+ __m256i ymm1 = _mm256_load_ps(dqsrc + 1); | |
+ dqsrc += 2; | |
+ _mm256_store_ps(dqdest + 0, ymm0); | |
+ _mm256_store_ps(dqdest + 1, ymm1); | |
+ dqdest += 2; | |
+ } while (dqsrc != end); | |
+} | |
+# endif | |
+#elif defined(__SSE2__) | |
# include <emmintrin.h> | |
# if defined(__GNUC__) && defined(__i386__) | |
static zend_always_inline void fast_memcpy(void *dest, const void *src, size_t size) | |
@@ -691,7 +757,7 @@ zend_op_array* zend_accel_load_script(zend_persistent_script *persistent_script, | |
ZCG(current_persistent_script) = persistent_script; | |
ZCG(arena_mem) = NULL; | |
if (EXPECTED(persistent_script->arena_size)) { | |
-#ifdef __SSE2__ | |
+#if defined(__AVX__) || defined(__SSE2__) | |
/* Target address must be aligned to 64-byte boundary */ | |
_mm_prefetch(persistent_script->arena_mem, _MM_HINT_NTA); | |
ZCG(arena_mem) = zend_arena_alloc(&CG(arena), persistent_script->arena_size + 64); | |
diff --git a/ext/opcache/zend_file_cache.c b/ext/opcache/zend_file_cache.c | |
index de54949..e48ccc2 100644 | |
--- a/ext/opcache/zend_file_cache.c | |
+++ b/ext/opcache/zend_file_cache.c | |
@@ -809,7 +809,7 @@ int zend_file_cache_script_store(zend_persistent_script *script, int in_shm) | |
return FAILURE; | |
} | |
-#ifdef __SSE2__ | |
+#if defined(__AVX__) || defined(__SSE2__) | |
/* Align to 64-byte boundary */ | |
mem = emalloc(script->size + 64); | |
buf = (void*)(((zend_uintptr_t)mem + 63L) & ~63L); | |
@@ -1391,7 +1391,7 @@ zend_persistent_script *zend_file_cache_script_load(zend_file_handle *file_handl | |
} | |
checkpoint = zend_arena_checkpoint(CG(arena)); | |
-#ifdef __SSE2__ | |
+#if defined(__AVX__) || defined(__SSE2__) | |
/* Align to 64-byte boundary */ | |
mem = zend_arena_alloc(&CG(arena), info.mem_size + info.str_size + 64); | |
mem = (void*)(((zend_uintptr_t)mem + 63L) & ~63L); | |
@@ -1452,7 +1452,7 @@ zend_persistent_script *zend_file_cache_script_load(zend_file_handle *file_handl | |
goto use_process_mem; | |
} | |
-#ifdef __SSE2__ | |
+#if defined(__AVX__) || defined(__SSE2__) | |
/* Align to 64-byte boundary */ | |
buf = zend_shared_alloc(info.mem_size + 64); | |
buf = (void*)(((zend_uintptr_t)buf + 63L) & ~63L); | |
diff --git a/ext/opcache/zend_persist.c b/ext/opcache/zend_persist.c | |
index 8c16538..9fc6da7 100644 | |
--- a/ext/opcache/zend_persist.c | |
+++ b/ext/opcache/zend_persist.c | |
@@ -870,7 +870,7 @@ zend_persistent_script *zend_accel_script_persist(zend_persistent_script *script | |
} | |
zend_accel_store_interned_string(script->script.filename); | |
-#ifdef __SSE2__ | |
+#if defined(__AVX__) || defined(__SSE2__) | |
/* Align to 64-byte boundary */ | |
ZCG(mem) = (void*)(((zend_uintptr_t)ZCG(mem) + 63L) & ~63L); | |
#else | |
diff --git a/ext/opcache/zend_persist_calc.c b/ext/opcache/zend_persist_calc.c | |
index eb80232..27d9fef 100644 | |
--- a/ext/opcache/zend_persist_calc.c | |
+++ b/ext/opcache/zend_persist_calc.c | |
@@ -415,7 +415,7 @@ uint32_t zend_accel_script_persist_calc(zend_persistent_script *new_persistent_s | |
} | |
ADD_STRING(new_persistent_script->script.filename); | |
-#ifdef __SSE2__ | |
+#if defined(__AVX__) || defined(__SSE2__) | |
/* Align size to 64-byte boundary */ | |
new_persistent_script->size = (new_persistent_script->size + 63) & ~63; | |
#endif | |
@@ -430,7 +430,7 @@ uint32_t zend_accel_script_persist_calc(zend_persistent_script *new_persistent_s | |
zend_hash_persist_calc(&new_persistent_script->script.function_table, zend_persist_op_array_calc); | |
zend_persist_op_array_calc_ex(&new_persistent_script->script.main_op_array); | |
-#ifdef __SSE2__ | |
+#if defined(__AVX__) || defined(__SSE2__) | |
/* Align size to 64-byte boundary */ | |
new_persistent_script->arena_size = (new_persistent_script->arena_size + 63) & ~63; | |
#endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment