Created
February 6, 2019 06:06
-
-
Save easyaspi314/9d31e5c0f9cead66aba2ede248b74d64 to your computer and use it in GitHub Desktop.
SIMD functions to apply toupper/tolower to each character in a string
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Created by easyaspi314. Released into the public domain. */ | |
#include <ctype.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#ifdef __SSE2__ | |
#include <immintrin.h> | |
/* A SIMD function for SSE2 which changes all uppercase ASCII digits | |
* to lowercase. */ | |
void StringToLower(char *str) | |
{ | |
size_t len = strlen(str); | |
const __m128i asciiA = _mm_set1_epi8('A'); | |
const __m128i asciiZ = _mm_set1_epi8('Z' + 1); | |
const __m128i diff = _mm_set1_epi8('a' - 'A'); | |
while (len >= 16) { | |
__m128i inp = _mm_loadu_si128((__m128i*)str); | |
/* > 'A': 0xff, < 'A': 0x00 */ | |
__m128i greaterThanA = _mm_cmpgt_epi8(inp, asciiA); | |
/* <= 'Z': 0xff, > 'Z': 0x00 */ | |
__m128i lessEqualZ = _mm_cmplt_epi8(inp, asciiZ); | |
/* 'Z' >= x >= 'A': 0xFF, else 0x00 */ | |
__m128i mask = _mm_and_si128(greaterThanA, lessEqualZ); | |
/* 'Z' >= x >= 'A': 'a' - 'A', else 0x00 */ | |
__m128i toAdd = _mm_and_si128(mask, diff); | |
/* add to change to lowercase */ | |
__m128i added = _mm_add_epi8(inp, toAdd); | |
_mm_storeu_si128((__m128i *)str, added); | |
len -= 16; | |
str += 16; | |
} | |
while (len-- > 0) { | |
*str = tolower(*str); | |
++str; | |
} | |
} | |
/* Same, but to uppercase. */ | |
void StringToUpper(char *str) | |
{ | |
size_t len = strlen(str); | |
const __m128i asciia = _mm_set1_epi8('a'); | |
const __m128i asciiz = _mm_set1_epi8('z' + 1); | |
const __m128i diff = _mm_set1_epi8('a' - 'A'); | |
while (len >= 16) { | |
__m128i inp = _mm_loadu_si128((__m128i*)str); | |
/* > 'a': 0xff, < 'a': 0x00 */ | |
__m128i greaterThana = _mm_cmpgt_epi8(inp, asciia); | |
/* <= 'z': 0xff, > 'z': 0x00 */ | |
__m128i lessEqualz = _mm_cmplt_epi8(inp, asciiz); | |
/* 'z' >= x >= 'a': 0xFF, else 0x00 */ | |
__m128i mask = _mm_and_si128(greaterThana, lessEqualz); | |
/* 'z' >= x >= 'a': 'a' - 'A', else 0x00 */ | |
__m128i toSub = _mm_and_si128(mask, diff); | |
/* subtract to change to uppercase */ | |
__m128i added = _mm_sub_epi8(inp, toSub); | |
_mm_storeu_si128((__m128i *)str, added); | |
len -= 16; | |
str += 16; | |
} | |
while (len-- > 0) { | |
*str = toupper(*str); | |
++str; | |
} | |
} | |
#elif defined(__ARM_NEON) || defined(__ARM_NEON__) | |
#include <arm_neon.h> | |
/* Literally the exact same code as above, but for NEON. */ | |
void StringToLower(char *str) | |
{ | |
size_t len = strlen(str); | |
const uint8x16_t asciiA = vdupq_n_u8('A'); | |
const uint8x16_t asciiZ = vdupq_n_u8('Z' + 1); | |
const uint8x16_t diff = vdupq_n_u8('a' - 'A'); | |
while (len >= 16) { | |
uint8x16_t inp = vld1q_u8((uint8_t *)str); | |
uint8x16_t greaterThanA = vcgtq_u8(inp, asciiA); | |
uint8x16_t lessEqualZ = vcltq_u8(inp, asciiZ); | |
uint8x16_t mask = vandq_u8(greaterThanA, lessEqualZ); | |
uint8x16_t toAdd = vandq_u8(mask, diff); | |
uint8x16_t added = vaddq_u8(inp, toAdd); | |
vst1q_u8((uint8_t *)str, added); | |
len -= 16; | |
str += 16; | |
} | |
while (len-- > 0) { | |
*str = tolower(*str); | |
++str; | |
} | |
} | |
/* Literally the exact same code as above, but for NEON. */ | |
void StringToUpper(char *str) | |
{ | |
size_t len = strlen(str); | |
const uint8x16_t asciia = vdupq_n_u8('a'); | |
const uint8x16_t asciiz = vdupq_n_u8('z' + 1); | |
const uint8x16_t diff = vdupq_n_u8('a' - 'A'); | |
while (len >= 16) { | |
uint8x16_t inp = vld1q_u8((uint8_t *)str); | |
uint8x16_t greaterThana = vcgtq_u8(inp, asciia); | |
uint8x16_t lessEqualz = vcltq_u8(inp, asciiz); | |
uint8x16_t mask = vandq_u8(greaterThana, lessEqualz); | |
uint8x16_t toSub = vandq_u8(mask, diff); | |
uint8x16_t added = vsubq_u8(inp, toSub); | |
vst1q_u8((uint8_t *)str, added); | |
len -= 16; | |
str += 16; | |
} | |
while (len-- > 0) { | |
*str = toupper(*str); | |
++str; | |
} | |
} | |
#else | |
/* Just go scalar. */ | |
void StringToLower(char *str) | |
{ | |
size_t len = strlen(str); | |
while (len-- > 0) { | |
*str = tolower(*str); | |
++str; | |
} | |
} | |
void StringToUpper(char *str) | |
{ | |
size_t len = strlen(str); | |
while (len-- > 0) { | |
*str = toupper(*str); | |
++str; | |
} | |
} | |
#endif | |
#ifdef TEST | |
#include <assert.h> | |
int main() | |
{ | |
char str[] = "Hello world 12345 HI AZaz"; | |
StringToLower(str); | |
assert(!strcmp(str, "hello world 12345 hi azaz")); | |
puts(str); | |
StringToUpper(str); | |
assert(!strcmp(str, "HELLO WORLD 12345 HI AZAZ")); | |
puts(str); | |
} | |
#endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi, thanks for the useful code example.
It seems
TEST
does not work as intended. Sincestrlen(str) == 26
, the last part (HI AZaz
) is processed with scalar loop.If it passes
str[] = "Hello world 12345 hi ABXYZ abxyz"
or longer string,TEST
will fail properly.As for actual functions, as @bb1950328 suggested, the following patch may work:
patch