Created
May 23, 2023 01:06
-
-
Save stephancasas/ac4c0a6b3af04f9a26e20dd434be25a4 to your computer and use it in GitHub Desktop.
Convert literal escaped unicode character to NSString.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// main.m | |
// EscapedUnicodeCharConvertExample | |
// | |
// Created by Stephan Casas on 5/22/23. | |
// | |
#import <Foundation/Foundation.h> | |
NSString* escapedUnicharToString(NSString* escapedUnicharString) { | |
const char * escapedUnicharCString = [escapedUnicharString UTF8String]; | |
/// Marshal the hex chars to byte values. | |
char unicharBytes[8] = {'\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0'}; | |
unicharBytes[0] = escapedUnicharCString[2]; | |
unicharBytes[1] = escapedUnicharCString[3]; | |
unicharBytes[2] = escapedUnicharCString[4]; | |
unicharBytes[3] = escapedUnicharCString[5]; | |
unicharBytes[4] = escapedUnicharCString[6]; | |
unicharBytes[5] = escapedUnicharCString[7]; | |
unicharBytes[6] = escapedUnicharCString[8]; | |
unicharBytes[7] = escapedUnicharCString[9]; | |
/// Convert the marshaled bytes to their unichar-equivalent | |
/// (`unsigned long`) value. | |
unsigned long unicharIndex = strtoul(unicharBytes, NULL, 16); | |
/// Convert the `unsigned long` to a binary string. | |
NSMutableString *unicharBinaryString = [NSMutableString new]; | |
while (unicharIndex > 0) { | |
unsigned long remainder = unicharIndex % 2; | |
[unicharBinaryString appendFormat:@"%lu", remainder]; | |
unicharIndex /= 2; | |
} | |
/// Use the conversion mask for the *last* series of unicode chars. | |
/// | |
/// **Note** | |
/// This must change if you're converting in a different range of | |
/// unicode characters. | |
/// | |
/// See https://stackoverflow.com/a/6240184/12770455 for a list of | |
/// alternate conversion masks. | |
/// | |
NSString *conversionMask = @"11110xxx10xxxxxx10xxxxxx10xxxxxx"; | |
NSMutableString *utf8BinaryString = [NSMutableString new]; | |
unsigned long utf8Offset = 0; | |
/// Use the conversion mask to drop bits from the unichar binary | |
/// string — beginning at the rightmost position and moving | |
/// leftwards. | |
/// | |
/// The unichar binary string is reversed, so we'll fill-in | |
/// each char using index 0. | |
for (unsigned long i = [conversionMask length]; i > 0; i--) { | |
unichar bit = [conversionMask characterAtIndex: (i - 1)]; | |
/// Fill-in the "x" characters from the unichar binary string. | |
if (bit == 'x') { | |
/// Fill with "0" when no chars remain. | |
if (utf8Offset == [unicharBinaryString length]) { | |
[utf8BinaryString insertString:@"0" atIndex:0]; | |
continue; | |
} | |
/// Fill with the current unichar binary offset char. | |
[utf8BinaryString insertString:[NSString stringWithFormat:@"%c", | |
[unicharBinaryString characterAtIndex: utf8Offset]] atIndex:0]; | |
utf8Offset++; | |
continue; | |
} | |
/// Fill with the conversion mask's char. | |
[utf8BinaryString insertString:[NSString stringWithFormat:@"%c", bit] atIndex:0]; | |
} | |
/// Convert the UTF8-equivalent binary into its decimal-equivalent value. | |
int decimal = 0; | |
for (NSUInteger i = 0; i < [utf8BinaryString length]; i++) { | |
unichar character = [utf8BinaryString characterAtIndex:i]; | |
int bit = character - '0'; | |
decimal = (decimal * 2) + bit; | |
} | |
/// Convert the UTF8-equivalent decimal to its hex-string equivalent. | |
NSMutableString *percentEncoded = [NSMutableString stringWithFormat:@"%08X", decimal]; | |
/// Insert percent chars before each hex-represented byte. | |
[percentEncoded insertString:@"%" atIndex:0]; | |
[percentEncoded insertString:@"%" atIndex:3]; | |
[percentEncoded insertString:@"%" atIndex:6]; | |
[percentEncoded insertString:@"%" atIndex:9]; | |
/// Decode the percent-encoded UTF8 char, and return to caller. | |
return [percentEncoded stringByRemovingPercentEncoding]; | |
} | |
int main(int argc, const char * argv[]) { | |
@autoreleasepool { | |
NSLog(@"%@", escapedUnicharToString(@"\\U0001D670")); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Oh, boy I didn't think this would take me nearly all day.
Without calling an external program, please, someone tell me I missed something obvious and there was an easier way.