-
-
Save siqin/4201667 to your computer and use it in GitHub Desktop.
// XCode 4.2.1 | |
@implementation NSString(EmojiExtension) | |
- (NSString*)removeEmoji { | |
__block NSMutableString* temp = [NSMutableString string]; | |
[self enumerateSubstringsInRange: NSMakeRange(0, [self length]) options:NSStringEnumerationByComposedCharacterSequences usingBlock: | |
^(NSString *substring, NSRange substringRange, NSRange enclosingRange, BOOL *stop){ | |
const unichar hs = [substring characterAtIndex: 0]; | |
// surrogate pair | |
if (0xd800 <= hs && hs <= 0xdbff) { | |
const unichar ls = [substring characterAtIndex: 1]; | |
const int uc = ((hs - 0xd800) * 0x400) + (ls - 0xdc00) + 0x10000; | |
[temp appendString: (0x1d000 <= uc && uc <= 0x1f77f)? @"": substring]; // U+1D000-1F77F | |
// non surrogate | |
} else { | |
[temp appendString: (0x2100 <= hs && hs <= 0x26ff)? @"": substring]; // U+2100-26FF | |
} | |
}]; | |
return temp; | |
} | |
@end |
'Measuring length of a string' at the Apple docs https://developer.apple.com/documentation/swift/string brought me to another solution, without the need for knowledge about the unicode pages. I just want letters to to remain in the string and skip all that is an icon:
#include <string.h>
inline static NSString* _Nonnull nsstring_remove_emoji_v2(NSString* const _Nonnull origString) {
NSMutableString* const result = [NSMutableString stringWithCapacity:0];
NSUInteger const len = origString.length;
NSString* subStr;
for (NSUInteger index = 0; index < len; index++) {
subStr = [origString substringWithRange:NSMakeRange(index, 1)];
const char* utf8Rep = subStr.UTF8String; // will return NULL for icons that consist of 2 chars
if (utf8Rep != NULL) {
unsigned long const length = strlen(utf8Rep);
if (length <= 2) {
[result appendString:subStr];
}
}
}
return result.copy;
}
I have no clue, what this does with chinese or japanese text. But it works for all german letters.
I have no clue, what this does with chinese or japanese text. But it works for all german letters.
Came across this gist, and I happen to have strings with Chinese + emoji, this code will remove all Chinese character due to there strlen are 3 :)
Much simpler way is to use a string transform: -- this will move all emoji code points, and preserve non-latin characters, accents etc
Eg
[@"🤯!!! ক❤️testé🧡💚💛せぬ❤️🔥👩🏿🦰" stringByApplyingTransform: @"[:emoji:] remove" reverse:NO]
returns
!!! ক️testéせぬ️
just to followup. -- apparently the [:emoji:]
property used in the ICU transform includes digits, some punctuation, other things not generally though to be emoji.
I am finding this method on an NSString category working better
- (NSString *)stringByRemovingEmoji {
static NSRegularExpression * regex = nil;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
// remove all emoji less those that are digits, punctuation, letters, latin 1 supplement or letter like symbols
// or BIDI Non-Spacing Mark
NSError * error = nil;
regex = [NSRegularExpression regularExpressionWithPattern:@"([[:emoji:]--[:digit:]--[:punctuation:]--[:letter:]--[:block=Latin-1_sup:]--[:block=letter-like-symbols:]]|\\uFE0F)" options: 0 error:&error];
if (error){
NSLog(@"Error forming regex");
}
});
return [regex stringByReplacingMatchesInString:self options:0 range:NSMakeRange(0, self.length) withTemplate:@""];
}
the emoj of heart ❤️ does not work!