This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import sys | |
def utf8_chr(cp): | |
if 0xFFFF < sys.maxunicode or cp < 0x10000: | |
return unichr(cp) | |
cp -= 0x10000 | |
high = cp >> 10 | 0xD800 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <string> | |
#include <map> | |
#include <vector> | |
void print_each_grapheme(std::string); | |
std::tuple<int, int> utf8_next(std::string, int, int); | |
bool in_char_class(int, std::vector<std::string>); | |
int main(void) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdbool.h> | |
enum { | |
Any = 2, | |
CR = 4, | |
LF = 8, | |
CONTROL = 16, | |
EXTEND = 32, | |
REGIONAL_INDICATOR = 64, | |
SPACINGMARK = 128, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <stdbool.h> | |
#include <string.h> | |
#define is_trail(c) (c > 0x7F && c < 0xC0) | |
#define SUCCESS 1 | |
#define FAILURE -1 | |
enum { |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php $expected = [[[0x0020],[0x0020]], | |
[[0x0020,0x0308],[0x0020]], | |
[[0x0020],[0x000D]], | |
[[0x0020,0x0308],[0x000D]], | |
[[0x0020],[0x000A]], | |
[[0x0020,0x0308],[0x000A]], | |
[[0x0020],[0x0001]], | |
[[0x0020,0x0308],[0x0001]], | |
[[0x0020,0x0300]], | |
[[0x0020,0x0308,0x0300]], |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
array(3) { | |
["all cases: "]=> | |
int(348) | |
["skipped cases: "]=> | |
int(54) | |
["not pass: "]=> | |
int(17) | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var benchmark = function(callback) { | |
var max = 1000000; | |
var start = Date.now(); | |
for (var i = 0; i < max; ++i) { | |
callback(); | |
} | |
var end = Date.now(); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
$array = [ | |
0x300,0x301,0x302,0x303,0x304,0x305,0x306,0x307,0x308,0x309,0x30A,0x30B,0x30C,0x30D,0x30E,0x30F,0x310,0x311,0x312,0x313,0x314,0x315,0x316,0x317,0x318,0x319,0x31A,0x31B,0x31C,0x31D,0x31E,0x31F,0x320,0x321,0x322,0x323,0x324,0x325,0x326,0x327,0x328,0x329,0x32A,0x32B,0x32C,0x32D,0x32E,0x32F,0x330,0x331,0x332,0x333,0x334,0x335,0x336,0x337,0x338,0x339,0x33A,0x33B,0x33C,0x33D,0x33E,0x33F,0x340,0x341,0x342,0x343,0x344,0x345,0x346,0x347,0x348,0x349,0x34A,0x34B,0x34C,0x34D,0x34E,0x34F,0x350,0x351,0x352,0x353,0x354,0x355,0x356,0x357,0x358,0x359,0x35A,0x35B,0x35C,0x35D,0x35E,0x35F,0x360,0x361,0x362,0x363,0x364,0x365,0x366,0x367,0x368,0x369,0x36A,0x36B,0x36C,0x36D,0x36E,0x36F,0x483,0x484,0x485,0x486,0x487,0x488,0x489,0x591,0x592,0x593,0x594,0x595,0x596,0x597,0x598,0x599,0x59A,0x59B,0x59C,0x59D,0x59E,0x59F,0x5A0,0x5A1,0x5A2,0x5A3,0x5A4,0x5A5,0x5A6,0x5A7,0x5A8,0x5A9,0x5AA,0x5AB,0x5AC,0x5AD,0x5AE,0x5AF,0x5B0,0x5B1,0x5B2,0x5B3,0x5B4,0x5B5,0x5B6,0x5B7,0x5B8,0x5B9,0x5BA,0x5BB,0x5BC,0x5BD,0x5BF,0x5C1,0x5C2,0x |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$str = "葛\xF3\xA0\x84\x81飾区"; | |
var_dump( | |
3 === grapheme_length($str) | |
); | |
function grapheme_length($str) | |
{ | |
$length = mb_strlen($str, 'UTF-8'); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
$data = [0x300,0x301,0x302,0x303,0x304,0x305,0x306,0x307,0x308,0x309,0x30A,0x30B,0x30C,0x30D,0x30E,0x30F,0x310,0x311,0x312,0x313,0x314,0x315,0x316,0x317,0x318,0x319,0x31A,0x31B,0x31C,0x31D,0x31E,0x31F,0x320,0x321,0x322,0x323,0x324,0x325,0x326,0x327,0x328,0x329,0x32A,0x32B,0x32C,0x32D,0x32E,0x32F,0x330,0x331,0x332,0x333,0x334,0x335,0x336,0x337,0x338,0x339,0x33A,0x33B,0x33C,0x33D,0x33E,0x33F,0x340,0x341,0x342,0x343,0x344,0x345,0x346,0x347,0x348,0x349,0x34A,0x34B,0x34C,0x34D,0x34E,0x34F,0x350,0x351,0x352,0x353,0x354,0x355,0x356,0x357,0x358,0x359,0x35A,0x35B,0x35C,0x35D,0x35E,0x35F,0x360,0x361,0x362,0x363,0x364,0x365,0x366,0x367,0x368,0x369,0x36A,0x36B,0x36C,0x36D,0x36E,0x36F,0x483,0x484,0x485,0x486,0x487,0x488,0x489,0x591,0x592,0x593,0x594,0x595,0x596,0x597,0x598,0x599,0x59A,0x59B,0x59C,0x59D,0x59E,0x59F,0x5A0,0x5A1,0x5A2,0x5A3,0x5A4,0x5A5,0x5A6,0x5A7,0x5A8,0x5A9,0x5AA,0x5AB,0x5AC,0x5AD,0x5AE,0x5AF,0x5B0,0x5B1,0x5B2,0x5B3,0x5B4,0x5B5,0x5B6,0x5B7,0x5B8,0x5B9,0x5BA,0x5BB,0x5BC,0x5BD,0x5BF,0x5C1,0x5C2,0x5C4, |