Skip to content

Instantly share code, notes, and snippets.

@wearhere
Last active October 4, 2023 17:27
Show Gist options
  • Save wearhere/97528e81da82387d5fa4aa788856cdc1 to your computer and use it in GitHub Desktop.
Save wearhere/97528e81da82387d5fa4aa788856cdc1 to your computer and use it in GitHub Desktop.
Apply skin tone modifiers to emojis per the Unicode spec.
import Foundation
import UIKit
import CoreText
// You can run this by copying this file into a Swift playground.
extension String {
// Returns the number of glyphs used to render the string. If this string
// should behave like a single emoji character (regardless of whether it is,
// internally, a sequence of characters), this property will return `1`.
var visibleCount: Int {
// https://crunchybagel.com/using-emoji-skin-tone-modifiers-in-swift/
// suggests that we could enumerate composed character sequences
// instead, but that implementation reports `1` even when the sequence
// won't actually be rendered as a single character.
let typesetString = CTLineCreateWithAttributedString(NSAttributedString(string: self))
return CTLineGetGlyphCount(typesetString)
}
// If this string contains multiple modifier bases, this function will
// apply the same modifier to all of them: `"πŸ‘­".modifiedBy("🏻")` -> `"πŸ‘­πŸ»"`.
// The spec https://unicode.org/reports/tr51/#multiperson_skintones and
// implementation (at least of macOS 10.15.4) allow for each base to take a
// different modifier e.g. πŸ‘©πŸ»β€πŸ€β€πŸ‘©πŸΏ but this function does not yet support that.
// When we add support for that, we should enforce that modifying any base
// requires modifying all bases (in some way or another), per the spec.
//
// This function doesn't attempt to assess RGI status; some modified
// multi-person groupings, in particular, may not be rendered as of Emoji
// 13.0 https://unicode.org/reports/tr51/#multiperson_skintones .
// You can check `visibleCount` to guard against that, although the platform
// may ignore unsupported modifiers anyway, e.g.
// "\u{0001F46F}\u{0001F3FB}\u{200D}\u{2640}" and
// "\u{0001F46F}\u{200D}\u{2640}" will both render as πŸ‘―β€β™€ on macOS 10.15.4.
func modifiedBy(_ modifier: Unicode.Scalar) -> String {
assert(modifier.properties.isEmojiModifier, "Scalar must be emoji modifier.")
return String(unicodeScalars.reduce(into: UnicodeScalarView(), { (modifiedScalars, scalar) in
// Strip variation selectors following modifiers because a) they'll
// break ZWJ sequences, at least on macOS 10.15.4 b) the spec says
// that "the emoji modifier automatically implies the emoji
// presentation" style https://unicode.org/reports/tr51/#Diversity
// and so recommends against including emoji presentation
// selectors. It's silent on whether text presentation selectors
// should be included or no but insofar as that would break the
// emoji modifier, seems like we should remove those too.
//
// Strip modifiers following modifiers because those are now
// invalid.
if scalar.properties.isVariationSelector || scalar.properties.isEmojiModifier,
let previousScalar = modifiedScalars.last,
previousScalar.properties.isEmojiModifier {
return
}
// If the current scalar is an emoji presentation selector and an
// emoji earlier in the sequence (but not immediately preceding)
// has been modified, it may not be strictly necessary to preserve
// this selector, depending on the preceding emoji's presentation
// style. However, we preserve such selectors to support modifying
// sequences generated by input devices, which the spec recommends
// https://unicode.org/reports/tr51/#Emoji_Variation_Selector_Notes
// only generate fully-qualified sequences.
//
// Also, if this scalar is a *text* presentation selector, it
// *must* be preserved, as it will break a ZWJ sequence as
// described in
// https://unicode.org/reports/tr51/#Emoji_Variation_Selector_Notes.
modifiedScalars.append(scalar)
// Emoji modifiers immediately follow base characters per
// https://unicode.org/reports/tr51/#Diversity , for single emojis
// as well as in ZWJ sequences
// https://unicode.org/reports/tr51/#multiperson_skintones .
if scalar.properties.isEmojiModifierBase {
modifiedScalars.append(modifier)
}
}))
}
}
let modifier = Unicode.Scalar("\u{0001F3FB}")
let modifier2 = Unicode.Scalar("\u{0001F3FF}")
// Check that we're correctly applying the spec https://unicode.org/reports/tr51/#Diversity.
assert("πŸŽ‰".modifiedBy(modifier) == "πŸŽ‰", "Emojis that do not accept modifiers should not be modified.")
assert("πŸ–".modifiedBy(modifier) == "πŸ–πŸ»", "Emojis should be modified.")
assert("πŸ’†β€β™‚".modifiedBy(modifier) == "πŸ’†πŸ»β€β™‚", "Emoji sequences should be modified.")
assert("πŸ‘­".modifiedBy(modifier) == "πŸ‘­πŸ»", "Multi-person groupings should be modified.")
assert("πŸ–πŸ»".modifiedBy(modifier2) == "πŸ–πŸΏ", "Previous modifiers should be overridden.")
assert("\u{0001F590}\u{0001F3FB}".modifiedBy(modifier2) == "\u{0001F590}\u{0001F3FF}", "Previous modifiers should be stripped.")
// We write the emojis below as escape sequences so that we can more easily see,
// and guarantee, when they do / don't contain variation selectors. Comments
// describe what they actually look like.
// ♂️ -> ♂️
assert("\u{2642}\u{FE0F}".modifiedBy(modifier) == "\u{2642}\u{FE0F}", "Variation selectors should be preserved when emojis are not modified.")
// β™‚οΈŽ -> β™‚οΈŽ
assert("\u{2642}\u{FE0E}".modifiedBy(modifier) == "\u{2642}\u{FE0E}", "Variation selectors should be preserved when emojis are not modified.")
// πŸ–οΈ -> πŸ–πŸ»
assert("\u{0001F590}\u{FE0F}".modifiedBy(modifier) == "\u{0001F590}\u{0001F3FB}", "Variation selectors should be stripped when an emoji is modified.")
// πŸ–οΈŽ (should be text presentation, though macOS forces emoji it seems) -> πŸ–πŸ»
assert("\u{0001F590}\u{FE0E}".modifiedBy(modifier) == "\u{0001F590}\u{0001F3FB}", "Variation selectors should be stripped when an emoji is modified.")
// πŸ•΅οΈβ€β™‚οΈ -> πŸ•΅πŸ»β€β™‚οΈ
// Note that this preserves the final selector i.e. outputs a fully-qualified
// sequence; see the note inside `modifiedBy(_:)`.
assert("\u{0001F575}\u{FE0F}\u{200D}\u{2642}\u{FE0F}".modifiedBy(modifier) == "\u{0001F575}\u{0001F3FB}\u{200D}\u{2642}\u{FE0F}", "Variation selectors should be stripped when an emoji sequence is modified.")
// Check that modifications never decompose into multiple emojis.
assert("\u{0001F590}".modifiedBy(modifier).visibleCount == 1);
assert("\u{0001F486}\u{200D}\u{2642}".modifiedBy(modifier).visibleCount == 1);
assert("\u{2642}\u{FE0F}".modifiedBy(modifier).visibleCount == 1);
assert("\u{2642}\u{FE0E}".modifiedBy(modifier).visibleCount == 1);
assert("\u{0001F590}\u{FE0F}".modifiedBy(modifier).visibleCount == 1);
assert("\u{0001F590}\u{FE0E}".modifiedBy(modifier).visibleCount == 1);
assert("\u{0001F575}\u{FE0F}\u{200D}\u{2642}\u{FE0F}".modifiedBy(modifier).visibleCount == 1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment