Last active
August 29, 2015 14:21
-
-
Save tanihiro/21cd8960c0b719c154f3 to your computer and use it in GitHub Desktop.
スマホ絵文字の正規表現文字列を作成するscript
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* http://apps.timwhitlock.info/emoji/tables/unicode | |
* 上記ページにまとめられているリストから正規表現文字列を作成する | |
*/ | |
function createEmojiRegExpString() { | |
var results = {single: [], multi: []}; | |
var regExpArr = []; | |
// サロゲートペアの文字を4桁の16進数に分割する | |
function unicodeFromCodePoint(codeNum) { | |
var cp = codeNum - 0x10000; | |
var high = 0xD800 | (cp >> 10); | |
var low = 0xDC00 | (cp & 0x3FF); | |
return [high.toString(16).toUpperCase(), low.toString(16).toUpperCase()]; | |
} | |
// リストからunicodeを取得 | |
$('tr').each(function() { | |
var codes = $(this).find('.code').eq(0).find('a').text().match(/\+(.{4,5})/g); | |
if (!codes) return; | |
var key = codes.length == 1 ? 'single' : 'multi'; | |
var value = ''; | |
$.each(codes, function(i, val) { | |
var code = val.replace(/\+| /g, ''); | |
if (code.length == 4) { | |
value += '\\u' + code; | |
} else { | |
var unicode = unicodeFromCodePoint(parseInt(code, 16)); | |
key = unicode[0]; | |
value += '\\u' + unicode[1]; | |
} | |
}); | |
if (key == 'multi') { | |
value = '(' + value + ')'; | |
} | |
if (!results[key]) { | |
results[key] = [value]; | |
} else { | |
results[key].push(value); | |
} | |
}); | |
// 取得したユニコード文字列から正規表現文字列を作成 | |
$.each(results, function(key, values) { | |
switch (key) { | |
case 'single': | |
regExpArr.push('[' + values.join('') + ']'); | |
break; | |
case 'multi': | |
regExpArr.push(values.join('|')); | |
break; | |
default: | |
regExpArr.push('(\\u' + key + '[' + values.join('') + '])'); | |
break; | |
} | |
}); | |
return regExpArr.join('|'); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment