Skip to content

Instantly share code, notes, and snippets.

# -*- coding: utf-8 -*-
import sys
def utf8_chr(cp):
if 0xFFFF < sys.maxunicode or cp < 0x10000:
return unichr(cp)
cp -= 0x10000
high = cp >> 10 | 0xD800
#include <iostream>
#include <string>
#include <map>
#include <vector>
void print_each_grapheme(std::string);
std::tuple<int, int> utf8_next(std::string, int, int);
bool in_char_class(int, std::vector<std::string>);
int main(void)
@masakielastic
masakielastic / breakprop.h
Last active August 29, 2015 14:11
print each grapheme
#include <stdbool.h>
enum {
Any = 2,
CR = 4,
LF = 8,
CONTROL = 16,
EXTEND = 32,
REGIONAL_INDICATOR = 64,
SPACINGMARK = 128,
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#define is_trail(c) (c > 0x7F && c < 0xC0)
#define SUCCESS 1
#define FAILURE -1
enum {
<?php $expected = [[[0x0020],[0x0020]],
[[0x0020,0x0308],[0x0020]],
[[0x0020],[0x000D]],
[[0x0020,0x0308],[0x000D]],
[[0x0020],[0x000A]],
[[0x0020,0x0308],[0x000A]],
[[0x0020],[0x0001]],
[[0x0020,0x0308],[0x0001]],
[[0x0020,0x0300]],
[[0x0020,0x0308,0x0300]],
@masakielastic
masakielastic / ret.php
Last active August 29, 2015 14:11
Test cases for extended grapheme cluster
array(3) {
["all cases: "]=>
int(348)
["skipped cases: "]=>
int(54)
["not pass: "]=>
int(17)
}
var benchmark = function(callback) {
var max = 1000000;
var start = Date.now();
for (var i = 0; i < max; ++i) {
callback();
}
var end = Date.now();
<?php
$array = [
0x300,0x301,0x302,0x303,0x304,0x305,0x306,0x307,0x308,0x309,0x30A,0x30B,0x30C,0x30D,0x30E,0x30F,0x310,0x311,0x312,0x313,0x314,0x315,0x316,0x317,0x318,0x319,0x31A,0x31B,0x31C,0x31D,0x31E,0x31F,0x320,0x321,0x322,0x323,0x324,0x325,0x326,0x327,0x328,0x329,0x32A,0x32B,0x32C,0x32D,0x32E,0x32F,0x330,0x331,0x332,0x333,0x334,0x335,0x336,0x337,0x338,0x339,0x33A,0x33B,0x33C,0x33D,0x33E,0x33F,0x340,0x341,0x342,0x343,0x344,0x345,0x346,0x347,0x348,0x349,0x34A,0x34B,0x34C,0x34D,0x34E,0x34F,0x350,0x351,0x352,0x353,0x354,0x355,0x356,0x357,0x358,0x359,0x35A,0x35B,0x35C,0x35D,0x35E,0x35F,0x360,0x361,0x362,0x363,0x364,0x365,0x366,0x367,0x368,0x369,0x36A,0x36B,0x36C,0x36D,0x36E,0x36F,0x483,0x484,0x485,0x486,0x487,0x488,0x489,0x591,0x592,0x593,0x594,0x595,0x596,0x597,0x598,0x599,0x59A,0x59B,0x59C,0x59D,0x59E,0x59F,0x5A0,0x5A1,0x5A2,0x5A3,0x5A4,0x5A5,0x5A6,0x5A7,0x5A8,0x5A9,0x5AA,0x5AB,0x5AC,0x5AD,0x5AE,0x5AF,0x5B0,0x5B1,0x5B2,0x5B3,0x5B4,0x5B5,0x5B6,0x5B7,0x5B8,0x5B9,0x5BA,0x5BB,0x5BC,0x5BD,0x5BF,0x5C1,0x5C2,0x
$str = "\xF3\xA0\x84\x81飾区";
var_dump(
3 === grapheme_length($str)
);
function grapheme_length($str)
{
$length = mb_strlen($str, 'UTF-8');
<?php
$data = [0x300,0x301,0x302,0x303,0x304,0x305,0x306,0x307,0x308,0x309,0x30A,0x30B,0x30C,0x30D,0x30E,0x30F,0x310,0x311,0x312,0x313,0x314,0x315,0x316,0x317,0x318,0x319,0x31A,0x31B,0x31C,0x31D,0x31E,0x31F,0x320,0x321,0x322,0x323,0x324,0x325,0x326,0x327,0x328,0x329,0x32A,0x32B,0x32C,0x32D,0x32E,0x32F,0x330,0x331,0x332,0x333,0x334,0x335,0x336,0x337,0x338,0x339,0x33A,0x33B,0x33C,0x33D,0x33E,0x33F,0x340,0x341,0x342,0x343,0x344,0x345,0x346,0x347,0x348,0x349,0x34A,0x34B,0x34C,0x34D,0x34E,0x34F,0x350,0x351,0x352,0x353,0x354,0x355,0x356,0x357,0x358,0x359,0x35A,0x35B,0x35C,0x35D,0x35E,0x35F,0x360,0x361,0x362,0x363,0x364,0x365,0x366,0x367,0x368,0x369,0x36A,0x36B,0x36C,0x36D,0x36E,0x36F,0x483,0x484,0x485,0x486,0x487,0x488,0x489,0x591,0x592,0x593,0x594,0x595,0x596,0x597,0x598,0x599,0x59A,0x59B,0x59C,0x59D,0x59E,0x59F,0x5A0,0x5A1,0x5A2,0x5A3,0x5A4,0x5A5,0x5A6,0x5A7,0x5A8,0x5A9,0x5AA,0x5AB,0x5AC,0x5AD,0x5AE,0x5AF,0x5B0,0x5B1,0x5B2,0x5B3,0x5B4,0x5B5,0x5B6,0x5B7,0x5B8,0x5B9,0x5BA,0x5BB,0x5BC,0x5BD,0x5BF,0x5C1,0x5C2,0x5C4,