Created
March 30, 2014 04:37
-
-
Save dberlin/9867614 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/lib/Basic/SourceManager.cpp b/lib/Basic/SourceManager.cpp | |
index b78e9f5..f6aa264 100644 | |
--- a/lib/Basic/SourceManager.cpp | |
+++ b/lib/Basic/SourceManager.cpp | |
@@ -1200,8 +1200,11 @@ unsigned SourceManager::getPresumedColumnNumber(SourceLocation Loc, | |
if (isInvalid(Loc, Invalid)) return 0; | |
return getPresumedLoc(Loc).getColumn(); | |
} | |
- | |
-#ifdef __SSE2__ | |
+#ifdef __SSE4_2__ | |
+#include <nmmintrin.h> | |
+#elif __AVX2__ | |
+#include <immintrin.h> | |
+#elif __SSE2__ | |
#include <emmintrin.h> | |
#endif | |
@@ -1232,7 +1235,63 @@ static void ComputeLineNumbers(DiagnosticsEngine &Diag, ContentCache *FI, | |
// Skip over the contents of the line. | |
const unsigned char *NextBuf = (const unsigned char *)Buf; | |
-#ifdef __SSE2__ | |
+#ifdef __SSE4_2__ | |
+ __m128i CRLF = _mm_set_epi8(0,0,0,0,0,0,0,0,0,0,0,0,0,0,'\r','\n'); | |
+ // First fix up the alignment to 16 bytes. | |
+ while (((uintptr_t)NextBuf & (0xf)) != 0) { | |
+ if (*NextBuf == '\n' || *NextBuf == '\r' || *NextBuf == '\0') | |
+ goto FoundSpecialChar; | |
+ ++NextBuf; | |
+ } | |
+ // Scan 16 byte chunks for '\r' and '\n'. Ignore '\0'. | |
+ while (NextBuf+16 <= End) { | |
+ const __m128i Chunk = *(const __m128i*)NextBuf; | |
+ int index = _mm_cmpestri(CRLF, 2, Chunk, 16, | |
+ _SIDD_UBYTE_OPS| | |
+ _SIDD_CMP_EQUAL_ANY| | |
+ _SIDD_MASKED_POSITIVE_POLARITY| | |
+ _SIDD_LEAST_SIGNIFICANT); | |
+ | |
+ // If we found a newline, adjust the pointer and jump to the | |
+ // handling code. | |
+ if (index != 16) { | |
+ NextBuf += index; | |
+ goto FoundSpecialChar; | |
+ } | |
+ NextBuf += 16; | |
+ } | |
+#elif __AVX2__ | |
+ // Try to skip to the next newline using SSE instructions. This is very | |
+ // performance sensitive for programs with lots of diagnostics and in -E | |
+ // mode. | |
+ __m128i CRs128 = _mm_set1_epi8('\r'); | |
+ __m256i CRs = _mm256_broadcastb_epi8(CRs128); | |
+ __m128i LFs128 = _mm_set1_epi8('\n'); | |
+ __m256i LFs = _mm256_broadcastb_epi8(LFs128); | |
+ | |
+ // First fix up the alignment to 32 bytes. | |
+ while (((uintptr_t)NextBuf & (0x1f)) != 0) { | |
+ if (*NextBuf == '\n' || *NextBuf == '\r' || *NextBuf == '\0') | |
+ goto FoundSpecialChar; | |
+ ++NextBuf; | |
+ } | |
+ | |
+ // Scan 32 byte chunks for '\r' and '\n'. Ignore '\0'. | |
+ while (NextBuf+32 <= End) { | |
+ const __m256i Chunk = *(const __m256i*)NextBuf; | |
+ __m256i Cmp = _mm256_or_si256(_mm256_cmpeq_epi8(Chunk, CRs), | |
+ _mm256_cmpeq_epi8(Chunk, LFs)); | |
+ unsigned Mask = _mm256_movemask_epi8(Cmp); | |
+ | |
+ // If we found a newline, adjust the pointer and jump to the handling code. | |
+ if (Mask != 0) { | |
+ NextBuf += llvm::countTrailingZeros(Mask); | |
+ goto FoundSpecialChar; | |
+ } | |
+ NextBuf += 32; | |
+ } | |
+ | |
+#elif __SSE2__ | |
// Try to skip to the next newline using SSE instructions. This is very | |
// performance sensitive for programs with lots of diagnostics and in -E | |
// mode. | |
@@ -1265,7 +1324,7 @@ static void ComputeLineNumbers(DiagnosticsEngine &Diag, ContentCache *FI, | |
while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0') | |
++NextBuf; | |
-#ifdef __SSE2__ | |
+#if defined(__SSE2__) || defined(__AVX2__) | |
FoundSpecialChar: | |
#endif | |
Offs += NextBuf-Buf; | |
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp | |
index 0955cc5..10805bb 100644 | |
--- a/lib/Lex/Lexer.cpp | |
+++ b/lib/Lex/Lexer.cpp | |
@@ -2249,7 +2249,9 @@ static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, | |
return true; | |
} | |
-#ifdef __SSE2__ | |
+#ifdef __AVX2__ | |
+#include <immintrin.h> | |
+#elif __SSE2__ | |
#include <emmintrin.h> | |
#elif __ALTIVEC__ | |
#include <altivec.h> | |
@@ -2306,13 +2308,32 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr, | |
// If there is a code-completion point avoid the fast scan because it | |
// doesn't check for '\0'. | |
!(PP && PP->getCodeCompletionFileLoc() == FileLoc)) { | |
- // While not aligned to a 16-byte boundary. | |
- while (C != '/' && ((intptr_t)CurPtr & 0x0F) != 0) | |
+ | |
+#ifdef __AVX2__ | |
+ const int alignment = 32; | |
+#else | |
+ const int alignment = 16; | |
+#endif | |
+ // While not properly aligned to the alignment we need | |
+ while (C != '/' && ((intptr_t)CurPtr & (alignment-1)) != 0) | |
C = *CurPtr++; | |
if (C == '/') goto FoundSlash; | |
- | |
-#ifdef __SSE2__ | |
+#ifdef __AVX2__ | |
+ __m128i Slashes128 = _mm_set1_epi8('/'); | |
+ __m256i Slashes = _mm256_broadcastb_epi8(Slashes128); | |
+ while (CurPtr+32 <= BufferEnd) { | |
+ int cmp = _mm256_movemask_epi8(_mm256_cmpeq_epi8(*(const __m256i*)CurPtr, Slashes)); | |
+ if (cmp != 0) { | |
+ // Adjust the pointer to point directly after the first slash. It's | |
+ // not necessary to set C here, it will be overwritten at the end of | |
+ // the outer loop. | |
+ CurPtr += llvm::countTrailingZeros<unsigned>(cmp) + 1; | |
+ goto FoundSlash; | |
+ } | |
+ CurPtr += 32; | |
+ } | |
+#elif __SSE2__ | |
__m128i Slashes = _mm_set1_epi8('/'); | |
while (CurPtr+16 <= BufferEnd) { | |
int cmp = _mm_movemask_epi8(_mm_cmpeq_epi8(*(const __m128i*)CurPtr, |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment