Skip to content

Instantly share code, notes, and snippets.

<?php $expected = [[[0x0020],[0x0020]],
[[0x0020,0x0308],[0x0020]],
[[0x0020],[0x000D]],
[[0x0020,0x0308],[0x000D]],
[[0x0020],[0x000A]],
[[0x0020,0x0308],[0x000A]],
[[0x0020],[0x0001]],
[[0x0020,0x0308],[0x0001]],
[[0x0020,0x0300]],
[[0x0020,0x0308,0x0300]],
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#define is_trail(c) (c > 0x7F && c < 0xC0)
#define SUCCESS 1
#define FAILURE -1
enum {
@masakielastic
masakielastic / breakprop.h
Last active August 29, 2015 14:11
print each grapheme
#include <stdbool.h>
enum {
Any = 2,
CR = 4,
LF = 8,
CONTROL = 16,
EXTEND = 32,
REGIONAL_INDICATOR = 64,
SPACINGMARK = 128,
#include <iostream>
#include <string>
#include <map>
#include <vector>
void print_each_grapheme(std::string);
std::tuple<int, int> utf8_next(std::string, int, int);
bool in_char_class(int, std::vector<std::string>);
int main(void)
# -*- coding: utf-8 -*-
import sys
def utf8_chr(cp):
if 0xFFFF < sys.maxunicode or cp < 0x10000:
return unichr(cp)
cp -= 0x10000
high = cp >> 10 | 0xD800
package main
import (
"unicode/utf8"
)
func main() {
str := "葛\U000E0101飾区"
print_each_grapheme(str)
}
@masakielastic
masakielastic / grapheme_length.js
Last active November 9, 2017 14:22
grapheme_length.js
require('es6-shim');
function in_char_class(cp, classes) {
// http://www.unicode.org/Public/UNIDATA/auxiliary/GraphemeBreakProperty.txt
var db = {
'cr': [0xD],
'lf': [0xA],
'control': [0x0,0x1,0x2,0x3,0x4,0x5,0x6,0x7,0x8,0x9,0xB,0xC,0xE,0xF,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,0x7F,0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,0xAD,0x600,0x601,0x602,0x603,0x604,0x605,0x61C,0x6DD,0x70F,0x180E,0x200B,0x200E,0x200F,0x2028,0x2029,0x202A,0x202B,0x202C,0x202D,0x202E,0x2060,0x2061,0x2062,0x2063,0x2064,0x2065,0x2066,0x2067,0x2068,0x2069,0x206A,0x206B,0x206C,0x206D,0x206E,0x206F,0xD800,0xD801,0xD802,0xD803,0xD804,0xD805,0xD806,0xD807,0xD808,0xD809,0xD80A,0xD80B,0xD80C,0xD80D,0xD80E,0xD80F,0xD810,0xD811,0xD812,0xD813,0xD814,0xD815,0xD816,0xD817,0xD818,0xD819,0xD81A,0xD81B,0xD81C,0xD81D,0xD81E,0xD81F,0xD820,0xD821,0xD822,0xD823,0xD8
@masakielastic
masakielastic / print_each_grapheme.js
Last active April 19, 2023 18:39
print_each_grapheme.js
require('es6-shim');
function next_char_info(str, pos) {
var cp = str.codePointAt(pos);
var size = cp > 0xFFFF ? 2 : 1;
return {'cp': cp, 'size': size};
}
var ANY = 2;
class String
def grapheme_length
length = self.length
grapheme_length = 0
pos = 0
while (pos < length) do
next_length = grapheme_next_length(pos)
pos += next_length
grapheme_length += 1
@masakielastic
masakielastic / print_each_grapheme.php
Last active August 29, 2015 14:10
print_each_grapheme.php
<?php
define('ANY', 2);
define('CR', 4);
define('LF', 8);
define('CONTROL', 16);
define('EXTEND', 32);
define('REGIONAL_INDICATOR', 64);
define('SPACINGMARK', 128);
define('L', 256);
define('V', 512);