Created
August 16, 2018 14:08
-
-
Save mdouze/682b5af4931dc8e4b3c6bdc3c1580daf to your computer and use it in GitHub Desktop.
make SSE code compatible with AMD cpu
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/tests/test_build_blocks.py b/tests/test_build_blocks.py | |
index 2492eac..ffbec50 100644 | |
--- a/tests/test_build_blocks.py | |
+++ b/tests/test_build_blocks.py | |
@@ -213,5 +213,24 @@ class TestOrthognalReconstruct(unittest.TestCase): | |
else: | |
self.assertFalse('should do an exception') | |
+class TestMAdd(unittest.TestCase): | |
+ | |
+ def test_1(self): | |
+ # try with dimensions that are multiples of 16 or not | |
+ rs = np.random.RandomState(123) | |
+ swig_ptr = faiss.swig_ptr | |
+ for dim in 16, 32, 20, 25: | |
+ for repeat in 1, 2, 3, 4, 5: | |
+ a = rs.rand(dim).astype('float32') | |
+ b = rs.rand(dim).astype('float32') | |
+ c = np.zeros(dim, dtype='float32') | |
+ bf = rs.uniform(5.0) - 2.5 | |
+ idx = faiss.fvec_madd_and_argmin( | |
+ dim, swig_ptr(a), bf, swig_ptr(b), | |
+ swig_ptr(c)) | |
+ ref_c = a + b * bf | |
+ assert np.abs(c - ref_c).max() < 1e-5 | |
+ assert idx == ref_c.argmin() | |
+ | |
if __name__ == '__main__': | |
unittest.main() | |
diff --git a/utils.cpp b/utils.cpp | |
index 5c8930c..7a0cf12 100644 | |
--- a/utils.cpp | |
+++ b/utils.cpp | |
@@ -1955,7 +1955,7 @@ static inline int fvec_madd_and_argmin_sse (size_t n, const float *a, | |
_mm_andnot_si128 (mask, imin4)); | |
// vmin4 = _mm_min_ps (vmin4, vc4); | |
} | |
- return _mm_extract_epi32 (imin4, 0); | |
+ return _mm_cvtsi128_si32 (imin4); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment