This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php $expected = [[[0x0020],[0x0020]], | |
[[0x0020,0x0308],[0x0020]], | |
[[0x0020],[0x000D]], | |
[[0x0020,0x0308],[0x000D]], | |
[[0x0020],[0x000A]], | |
[[0x0020,0x0308],[0x000A]], | |
[[0x0020],[0x0001]], | |
[[0x0020,0x0308],[0x0001]], | |
[[0x0020,0x0300]], | |
[[0x0020,0x0308,0x0300]], |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <stdbool.h> | |
#include <string.h> | |
#define is_trail(c) (c > 0x7F && c < 0xC0) | |
#define SUCCESS 1 | |
#define FAILURE -1 | |
enum { |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdbool.h> | |
enum { | |
Any = 2, | |
CR = 4, | |
LF = 8, | |
CONTROL = 16, | |
EXTEND = 32, | |
REGIONAL_INDICATOR = 64, | |
SPACINGMARK = 128, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <string> | |
#include <map> | |
#include <vector> | |
void print_each_grapheme(std::string); | |
std::tuple<int, int> utf8_next(std::string, int, int); | |
bool in_char_class(int, std::vector<std::string>); | |
int main(void) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import sys | |
def utf8_chr(cp): | |
if 0xFFFF < sys.maxunicode or cp < 0x10000: | |
return unichr(cp) | |
cp -= 0x10000 | |
high = cp >> 10 | 0xD800 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"unicode/utf8" | |
) | |
func main() { | |
str := "葛\U000E0101飾区" | |
print_each_grapheme(str) | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require('es6-shim'); | |
function in_char_class(cp, classes) { | |
// http://www.unicode.org/Public/UNIDATA/auxiliary/GraphemeBreakProperty.txt | |
var db = { | |
'cr': [0xD], | |
'lf': [0xA], | |
'control': [0x0,0x1,0x2,0x3,0x4,0x5,0x6,0x7,0x8,0x9,0xB,0xC,0xE,0xF,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,0x7F,0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,0xAD,0x600,0x601,0x602,0x603,0x604,0x605,0x61C,0x6DD,0x70F,0x180E,0x200B,0x200E,0x200F,0x2028,0x2029,0x202A,0x202B,0x202C,0x202D,0x202E,0x2060,0x2061,0x2062,0x2063,0x2064,0x2065,0x2066,0x2067,0x2068,0x2069,0x206A,0x206B,0x206C,0x206D,0x206E,0x206F,0xD800,0xD801,0xD802,0xD803,0xD804,0xD805,0xD806,0xD807,0xD808,0xD809,0xD80A,0xD80B,0xD80C,0xD80D,0xD80E,0xD80F,0xD810,0xD811,0xD812,0xD813,0xD814,0xD815,0xD816,0xD817,0xD818,0xD819,0xD81A,0xD81B,0xD81C,0xD81D,0xD81E,0xD81F,0xD820,0xD821,0xD822,0xD823,0xD8 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require('es6-shim'); | |
function next_char_info(str, pos) { | |
var cp = str.codePointAt(pos); | |
var size = cp > 0xFFFF ? 2 : 1; | |
return {'cp': cp, 'size': size}; | |
} | |
var ANY = 2; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class String | |
def grapheme_length | |
length = self.length | |
grapheme_length = 0 | |
pos = 0 | |
while (pos < length) do | |
next_length = grapheme_next_length(pos) | |
pos += next_length | |
grapheme_length += 1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
define('ANY', 2); | |
define('CR', 4); | |
define('LF', 8); | |
define('CONTROL', 16); | |
define('EXTEND', 32); | |
define('REGIONAL_INDICATOR', 64); | |
define('SPACINGMARK', 128); | |
define('L', 256); | |
define('V', 512); |