Created
June 27, 2020 15:25
-
-
Save shibayan/8839930e07898cae4de7be91bff9d5f6 to your computer and use it in GitHub Desktop.
libwebp patch for Windows on ARM (ARM64)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/Makefile.vc b/Makefile.vc | |
index 886f981f..d9b1ba4b 100644 | |
--- a/Makefile.vc | |
+++ b/Makefile.vc | |
@@ -11,6 +11,8 @@ LIBWEBPDEMUX_BASENAME = libwebpdemux | |
ARCH = x86 | |
!ELSE IF ! [ cl 2>&1 | find "x64" > NUL ] | |
ARCH = x64 | |
+!ELSE IF ! [ cl 2>&1 | find "ARM64" > NUL ] | |
+ARCH = ARM64 | |
!ELSE IF ! [ cl 2>&1 | find "ARM" > NUL ] | |
ARCH = ARM | |
!ELSE | |
diff --git a/src/dec/tree_dec.c b/src/dec/tree_dec.c | |
index 1c6fdea2..4c9f48b6 100644 | |
--- a/src/dec/tree_dec.c | |
+++ b/src/dec/tree_dec.c | |
@@ -15,7 +15,7 @@ | |
#include "src/utils/bit_reader_inl_utils.h" | |
#if !defined(USE_GENERIC_TREE) | |
-#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) | |
+#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64) | |
// using a table is ~1-2% slower on ARM. Prefer the coded-tree approach then. | |
#define USE_GENERIC_TREE 1 // ALTERNATE_CODE | |
#else | |
diff --git a/src/dsp/cost_neon.c b/src/dsp/cost_neon.c | |
index 8cc8ce58..da1b561e 100644 | |
--- a/src/dsp/cost_neon.c | |
+++ b/src/dsp/cost_neon.c | |
@@ -29,7 +29,7 @@ static void SetResidualCoeffs_NEON(const int16_t* const coeffs, | |
const uint8x16_t eob = vcombine_u8(vqmovn_u16(eob_0), vqmovn_u16(eob_1)); | |
const uint8x16_t masked = vandq_u8(eob, vld1q_u8(position)); | |
-#ifdef __aarch64__ | |
+#if defined(__aarch64__) || defined(_M_ARM64) | |
res->last = vmaxvq_u8(masked) - 1; | |
#else | |
const uint8x8_t eob_8x8 = vmax_u8(vget_low_u8(masked), vget_high_u8(masked)); | |
@@ -43,7 +43,7 @@ static void SetResidualCoeffs_NEON(const int16_t* const coeffs, | |
vst1_lane_s32(&res->last, vreinterpret_s32_u32(eob_32x2), 0); | |
--res->last; | |
-#endif // __aarch64__ | |
+#endif // defined(__aarch64__) || defined(_M_ARM64) | |
res->coeffs = coeffs; | |
} | |
diff --git a/src/dsp/dec_neon.c b/src/dsp/dec_neon.c | |
index fa851707..2b387e81 100644 | |
--- a/src/dsp/dec_neon.c | |
+++ b/src/dsp/dec_neon.c | |
@@ -1428,7 +1428,7 @@ static WEBP_INLINE void DC8_NEON(uint8_t* dst, int do_top, int do_left) { | |
if (do_top) { | |
const uint8x8_t A = vld1_u8(dst - BPS); // top row | |
-#if defined(__aarch64__) | |
+#if defined(__aarch64__) || defined(_M_ARM64) | |
const uint16_t p2 = vaddlv_u8(A); | |
sum_top = vdupq_n_u16(p2); | |
#else | |
@@ -1511,7 +1511,7 @@ static WEBP_INLINE void DC16_NEON(uint8_t* dst, int do_top, int do_left) { | |
if (do_top) { | |
const uint8x16_t A = vld1q_u8(dst - BPS); // top row | |
-#if defined(__aarch64__) | |
+#if defined(__aarch64__) || defined(_M_ARM64) | |
const uint16_t p3 = vaddlvq_u8(A); | |
sum_top = vdupq_n_u16(p3); | |
#else | |
diff --git a/src/dsp/dsp.h b/src/dsp/dsp.h | |
index 78fc20a7..6a34e564 100644 | |
--- a/src/dsp/dsp.h | |
+++ b/src/dsp/dsp.h | |
@@ -95,6 +95,11 @@ extern "C" { | |
#define WEBP_USE_INTRINSICS | |
#endif | |
+#if defined(_MSC_VER) && _MSC_VER >= 1927 && defined(_M_ARM64) | |
+#define WEBP_USE_NEON | |
+#define WEBP_USE_INTRINSICS | |
+#endif | |
+ | |
#if defined(__mips__) && !defined(__mips64) && \ | |
defined(__mips_isa_rev) && (__mips_isa_rev >= 1) && (__mips_isa_rev < 6) | |
#define WEBP_USE_MIPS32 | |
diff --git a/src/utils/bit_reader_utils.c b/src/utils/bit_reader_utils.c | |
index 857cd609..935f6003 100644 | |
--- a/src/utils/bit_reader_utils.c | |
+++ b/src/utils/bit_reader_utils.c | |
@@ -121,7 +121,7 @@ int32_t VP8GetSignedValue(VP8BitReader* const br, int bits, | |
#define VP8L_LOG8_WBITS 4 // Number of bytes needed to store VP8L_WBITS bits. | |
-#if defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || \ | |
+#if defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || defined(_M_ARM64) \ | |
defined(__i386__) || defined(_M_IX86) || \ | |
defined(__x86_64__) || defined(_M_X64) | |
#define VP8L_USE_FAST_LOAD | |
diff --git a/src/utils/bit_reader_utils.h b/src/utils/bit_reader_utils.h | |
index e64156e3..46bd8c81 100644 | |
--- a/src/utils/bit_reader_utils.h | |
+++ b/src/utils/bit_reader_utils.h | |
@@ -58,17 +58,17 @@ extern "C" { | |
// BITS can be any multiple of 8 from 8 to 56 (inclusive). | |
// Pick values that fit natural register size. | |
-#if defined(__i386__) || defined(_M_IX86) // x86 32bit | |
+#if defined(__i386__) || defined(_M_IX86) // x86 32bit | |
#define BITS 24 | |
-#elif defined(__x86_64__) || defined(_M_X64) // x86 64bit | |
+#elif defined(__x86_64__) || defined(_M_X64) // x86 64bit | |
#define BITS 56 | |
-#elif defined(__arm__) || defined(_M_ARM) // ARM | |
+#elif defined(__arm__) || defined(_M_ARM) // ARM | |
#define BITS 24 | |
-#elif defined(__aarch64__) // ARM 64bit | |
+#elif defined(__aarch64__) || defined(_M_ARM64) // ARM 64bit | |
#define BITS 56 | |
-#elif defined(__mips__) // MIPS | |
+#elif defined(__mips__) // MIPS | |
#define BITS 24 | |
-#else // reasonable default | |
+#else // reasonable default | |
#define BITS 24 | |
#endif | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment