Last active
October 4, 2023 17:27
-
-
Save wearhere/97528e81da82387d5fa4aa788856cdc1 to your computer and use it in GitHub Desktop.
Apply skin tone modifiers to emojis per the Unicode spec.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Foundation | |
import UIKit | |
import CoreText | |
// You can run this by copying this file into a Swift playground. | |
extension String { | |
// Returns the number of glyphs used to render the string. If this string | |
// should behave like a single emoji character (regardless of whether it is, | |
// internally, a sequence of characters), this property will return `1`. | |
var visibleCount: Int { | |
// https://crunchybagel.com/using-emoji-skin-tone-modifiers-in-swift/ | |
// suggests that we could enumerate composed character sequences | |
// instead, but that implementation reports `1` even when the sequence | |
// won't actually be rendered as a single character. | |
let typesetString = CTLineCreateWithAttributedString(NSAttributedString(string: self)) | |
return CTLineGetGlyphCount(typesetString) | |
} | |
// If this string contains multiple modifier bases, this function will | |
// apply the same modifier to all of them: `"π".modifiedBy("π»")` -> `"ππ»"`. | |
// The spec https://unicode.org/reports/tr51/#multiperson_skintones and | |
// implementation (at least of macOS 10.15.4) allow for each base to take a | |
// different modifier e.g. π©π»βπ€βπ©πΏ but this function does not yet support that. | |
// When we add support for that, we should enforce that modifying any base | |
// requires modifying all bases (in some way or another), per the spec. | |
// | |
// This function doesn't attempt to assess RGI status; some modified | |
// multi-person groupings, in particular, may not be rendered as of Emoji | |
// 13.0 https://unicode.org/reports/tr51/#multiperson_skintones . | |
// You can check `visibleCount` to guard against that, although the platform | |
// may ignore unsupported modifiers anyway, e.g. | |
// "\u{0001F46F}\u{0001F3FB}\u{200D}\u{2640}" and | |
// "\u{0001F46F}\u{200D}\u{2640}" will both render as π―ββ on macOS 10.15.4. | |
func modifiedBy(_ modifier: Unicode.Scalar) -> String { | |
assert(modifier.properties.isEmojiModifier, "Scalar must be emoji modifier.") | |
return String(unicodeScalars.reduce(into: UnicodeScalarView(), { (modifiedScalars, scalar) in | |
// Strip variation selectors following modifiers because a) they'll | |
// break ZWJ sequences, at least on macOS 10.15.4 b) the spec says | |
// that "the emoji modifier automatically implies the emoji | |
// presentation" style https://unicode.org/reports/tr51/#Diversity | |
// and so recommends against including emoji presentation | |
// selectors. It's silent on whether text presentation selectors | |
// should be included or no but insofar as that would break the | |
// emoji modifier, seems like we should remove those too. | |
// | |
// Strip modifiers following modifiers because those are now | |
// invalid. | |
if scalar.properties.isVariationSelector || scalar.properties.isEmojiModifier, | |
let previousScalar = modifiedScalars.last, | |
previousScalar.properties.isEmojiModifier { | |
return | |
} | |
// If the current scalar is an emoji presentation selector and an | |
// emoji earlier in the sequence (but not immediately preceding) | |
// has been modified, it may not be strictly necessary to preserve | |
// this selector, depending on the preceding emoji's presentation | |
// style. However, we preserve such selectors to support modifying | |
// sequences generated by input devices, which the spec recommends | |
// https://unicode.org/reports/tr51/#Emoji_Variation_Selector_Notes | |
// only generate fully-qualified sequences. | |
// | |
// Also, if this scalar is a *text* presentation selector, it | |
// *must* be preserved, as it will break a ZWJ sequence as | |
// described in | |
// https://unicode.org/reports/tr51/#Emoji_Variation_Selector_Notes. | |
modifiedScalars.append(scalar) | |
// Emoji modifiers immediately follow base characters per | |
// https://unicode.org/reports/tr51/#Diversity , for single emojis | |
// as well as in ZWJ sequences | |
// https://unicode.org/reports/tr51/#multiperson_skintones . | |
if scalar.properties.isEmojiModifierBase { | |
modifiedScalars.append(modifier) | |
} | |
})) | |
} | |
} | |
let modifier = Unicode.Scalar("\u{0001F3FB}") | |
let modifier2 = Unicode.Scalar("\u{0001F3FF}") | |
// Check that we're correctly applying the spec https://unicode.org/reports/tr51/#Diversity. | |
assert("π".modifiedBy(modifier) == "π", "Emojis that do not accept modifiers should not be modified.") | |
assert("π".modifiedBy(modifier) == "ππ»", "Emojis should be modified.") | |
assert("πββ".modifiedBy(modifier) == "ππ»ββ", "Emoji sequences should be modified.") | |
assert("π".modifiedBy(modifier) == "ππ»", "Multi-person groupings should be modified.") | |
assert("ππ»".modifiedBy(modifier2) == "ππΏ", "Previous modifiers should be overridden.") | |
assert("\u{0001F590}\u{0001F3FB}".modifiedBy(modifier2) == "\u{0001F590}\u{0001F3FF}", "Previous modifiers should be stripped.") | |
// We write the emojis below as escape sequences so that we can more easily see, | |
// and guarantee, when they do / don't contain variation selectors. Comments | |
// describe what they actually look like. | |
// βοΈ -> βοΈ | |
assert("\u{2642}\u{FE0F}".modifiedBy(modifier) == "\u{2642}\u{FE0F}", "Variation selectors should be preserved when emojis are not modified.") | |
// βοΈ -> βοΈ | |
assert("\u{2642}\u{FE0E}".modifiedBy(modifier) == "\u{2642}\u{FE0E}", "Variation selectors should be preserved when emojis are not modified.") | |
// ποΈ -> ππ» | |
assert("\u{0001F590}\u{FE0F}".modifiedBy(modifier) == "\u{0001F590}\u{0001F3FB}", "Variation selectors should be stripped when an emoji is modified.") | |
// ποΈ (should be text presentation, though macOS forces emoji it seems) -> ππ» | |
assert("\u{0001F590}\u{FE0E}".modifiedBy(modifier) == "\u{0001F590}\u{0001F3FB}", "Variation selectors should be stripped when an emoji is modified.") | |
// π΅οΈββοΈ -> π΅π»ββοΈ | |
// Note that this preserves the final selector i.e. outputs a fully-qualified | |
// sequence; see the note inside `modifiedBy(_:)`. | |
assert("\u{0001F575}\u{FE0F}\u{200D}\u{2642}\u{FE0F}".modifiedBy(modifier) == "\u{0001F575}\u{0001F3FB}\u{200D}\u{2642}\u{FE0F}", "Variation selectors should be stripped when an emoji sequence is modified.") | |
// Check that modifications never decompose into multiple emojis. | |
assert("\u{0001F590}".modifiedBy(modifier).visibleCount == 1); | |
assert("\u{0001F486}\u{200D}\u{2642}".modifiedBy(modifier).visibleCount == 1); | |
assert("\u{2642}\u{FE0F}".modifiedBy(modifier).visibleCount == 1); | |
assert("\u{2642}\u{FE0E}".modifiedBy(modifier).visibleCount == 1); | |
assert("\u{0001F590}\u{FE0F}".modifiedBy(modifier).visibleCount == 1); | |
assert("\u{0001F590}\u{FE0E}".modifiedBy(modifier).visibleCount == 1); | |
assert("\u{0001F575}\u{FE0F}\u{200D}\u{2642}\u{FE0F}".modifiedBy(modifier).visibleCount == 1) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment