Created
May 26, 2014 06:43
-
-
Save mathiasbynens/bbe7f870208abcfec860 to your computer and use it in GitHub Desktop.
A regular expression to match lone surrogates only
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var assert = require('assert'); | |
// The goal is to match lone surrogates, i.e. any high surrogates | |
// (`[\uD800-\uDBFF]`) that are not directly followed by a low surrogate | |
// (`[\uDC00-\uDFFF]`), and any low surrogates (`[\uDC00-\uDFFF]`) that are not | |
// directly preceded by a high surrogate (`[\uD800-\uDBFF]`). | |
var regex = /[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF]/; | |
assert.equal(regex.test('foo\uDC00bar'), true); | |
assert.equal(regex.test('\uDC00bar'), true); | |
assert.equal(regex.test('foo\uDC00'), true); | |
assert.equal(regex.test('foo\uDBFFbar'), true); | |
assert.equal(regex.test('\uDBFFbar'), true); | |
assert.equal(regex.test('foo\uDBFF'), true); | |
assert.equal(regex.test('foo\uD800bar'), true); | |
assert.equal(regex.test('\uD800bar'), true); | |
assert.equal(regex.test('foo\uD800'), true); | |
assert.equal(regex.test('foo\uDFFFbar'), true); | |
assert.equal(regex.test('\uDFFFbar'), true); | |
assert.equal(regex.test('foo\uDFFF'), true); | |
assert.equal(regex.test('foo\uD834\uDF06bar'), false); // astral symbol | |
assert.equal(regex.test('\uD834\uDF06bar'), false); // astral symbol | |
assert.equal(regex.test('foo\uD834\uDF06'), false); // astral symbol | |
console.log('If you can see this, the implementation is correct.'); |
Incredibly helpful, thank you.
Thank you!
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
As used in regenerate.