Skip to content

Instantly share code, notes, and snippets.

@goooseman
Created November 26, 2020 16:09
Show Gist options
  • Save goooseman/b67c9440b05fcd4f22c5b25c327492df to your computer and use it in GitHub Desktop.
Save goooseman/b67c9440b05fcd4f22c5b25c327492df to your computer and use it in GitHub Desktop.
TS Get unicode aware string length (1 char for every emoji char)
import { getUnicodeAwareStringLength } from './strings';
describe('strings -> getUnicodeAwareStringLength', () => {
it('should work for a regular string', () => {
expect(getUnicodeAwareStringLength('foo')).toBe(3);
});
it('should work for an emoji string', () => {
expect(getUnicodeAwareStringLength('πŸš—πŸš—')).toBe(2);
});
it('should return 1 for a simple emoji', () => {
expect(getUnicodeAwareStringLength('πŸ‘Ύ')).toBe(1);
});
it('should return 2 for a emoji with skin tone modifier', () => {
expect(getUnicodeAwareStringLength('πŸ™‹πŸ½')).toBe(1);
});
it('should return 3 for a emoji sequence using combining glyph', () => {
expect(getUnicodeAwareStringLength('πŸ§‘β€πŸ’»')).toBe(1);
});
it('should return 7 for a four-emoji sequence using 3 combining glyphs', () => {
expect(getUnicodeAwareStringLength('πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦')).toBe(1);
});
});
// https://www.mmbyte.com/article/41645.html (Answer 2)
export const getUnicodeAwareStringLength = (str: string): number => {
const joiner = '\u{200D}';
const split = str.split(joiner);
let count = 0;
const REGEXP_EMOJI_VARIATION = /[\ufe00-\ufe0f]/g;
// http://www.unicode.org/reports/tr51/#Diversity
const REGEXP_SKIN_TONE_MODIFIER = /[\u{1F3FB}-\u{1F3FF}]/gu;
for (const s of split) {
count += Array.from(s.replace(REGEXP_EMOJI_VARIATION, '').replace(REGEXP_SKIN_TONE_MODIFIER, '')).length;
}
return count / split.length;
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment