Skip to content

Instantly share code, notes, and snippets.

@stephancasas
Created May 23, 2023 01:06
Show Gist options
  • Save stephancasas/ac4c0a6b3af04f9a26e20dd434be25a4 to your computer and use it in GitHub Desktop.
Save stephancasas/ac4c0a6b3af04f9a26e20dd434be25a4 to your computer and use it in GitHub Desktop.
Convert literal escaped unicode character to NSString.
//
// main.m
// EscapedUnicodeCharConvertExample
//
// Created by Stephan Casas on 5/22/23.
//
#import <Foundation/Foundation.h>
NSString* escapedUnicharToString(NSString* escapedUnicharString) {
const char * escapedUnicharCString = [escapedUnicharString UTF8String];
/// Marshal the hex chars to byte values.
char unicharBytes[8] = {'\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0'};
unicharBytes[0] = escapedUnicharCString[2];
unicharBytes[1] = escapedUnicharCString[3];
unicharBytes[2] = escapedUnicharCString[4];
unicharBytes[3] = escapedUnicharCString[5];
unicharBytes[4] = escapedUnicharCString[6];
unicharBytes[5] = escapedUnicharCString[7];
unicharBytes[6] = escapedUnicharCString[8];
unicharBytes[7] = escapedUnicharCString[9];
/// Convert the marshaled bytes to their unichar-equivalent
/// (`unsigned long`) value.
unsigned long unicharIndex = strtoul(unicharBytes, NULL, 16);
/// Convert the `unsigned long` to a binary string.
NSMutableString *unicharBinaryString = [NSMutableString new];
while (unicharIndex > 0) {
unsigned long remainder = unicharIndex % 2;
[unicharBinaryString appendFormat:@"%lu", remainder];
unicharIndex /= 2;
}
/// Use the conversion mask for the *last* series of unicode chars.
///
/// **Note**
/// This must change if you're converting in a different range of
/// unicode characters.
///
/// See https://stackoverflow.com/a/6240184/12770455 for a list of
/// alternate conversion masks.
///
NSString *conversionMask = @"11110xxx10xxxxxx10xxxxxx10xxxxxx";
NSMutableString *utf8BinaryString = [NSMutableString new];
unsigned long utf8Offset = 0;
/// Use the conversion mask to drop bits from the unichar binary
/// string — beginning at the rightmost position and moving
/// leftwards.
///
/// The unichar binary string is reversed, so we'll fill-in
/// each char using index 0.
for (unsigned long i = [conversionMask length]; i > 0; i--) {
unichar bit = [conversionMask characterAtIndex: (i - 1)];
/// Fill-in the "x" characters from the unichar binary string.
if (bit == 'x') {
/// Fill with "0" when no chars remain.
if (utf8Offset == [unicharBinaryString length]) {
[utf8BinaryString insertString:@"0" atIndex:0];
continue;
}
/// Fill with the current unichar binary offset char.
[utf8BinaryString insertString:[NSString stringWithFormat:@"%c",
[unicharBinaryString characterAtIndex: utf8Offset]] atIndex:0];
utf8Offset++;
continue;
}
/// Fill with the conversion mask's char.
[utf8BinaryString insertString:[NSString stringWithFormat:@"%c", bit] atIndex:0];
}
/// Convert the UTF8-equivalent binary into its decimal-equivalent value.
int decimal = 0;
for (NSUInteger i = 0; i < [utf8BinaryString length]; i++) {
unichar character = [utf8BinaryString characterAtIndex:i];
int bit = character - '0';
decimal = (decimal * 2) + bit;
}
/// Convert the UTF8-equivalent decimal to its hex-string equivalent.
NSMutableString *percentEncoded = [NSMutableString stringWithFormat:@"%08X", decimal];
/// Insert percent chars before each hex-represented byte.
[percentEncoded insertString:@"%" atIndex:0];
[percentEncoded insertString:@"%" atIndex:3];
[percentEncoded insertString:@"%" atIndex:6];
[percentEncoded insertString:@"%" atIndex:9];
/// Decode the percent-encoded UTF8 char, and return to caller.
return [percentEncoded stringByRemovingPercentEncoding];
}
int main(int argc, const char * argv[]) {
@autoreleasepool {
NSLog(@"%@", escapedUnicharToString(@"\\U0001D670"));
}
return 0;
}
@stephancasas
Copy link
Author

Oh, boy I didn't think this would take me nearly all day.

Without calling an external program, please, someone tell me I missed something obvious and there was an easier way.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment