Skip to content

Instantly share code, notes, and snippets.

@masakielastic
Last active August 29, 2015 14:11
Show Gist options
  • Save masakielastic/030431316dd177c46ce3 to your computer and use it in GitHub Desktop.
Save masakielastic/030431316dd177c46ce3 to your computer and use it in GitHub Desktop.
Test cases for extended grapheme cluster
array(3) {
["all cases: "]=>
int(348)
["skipped cases: "]=>
int(54)
["not pass: "]=>
int(17)
}
<?php
// https://gist.github.com/masakielastic/c3341a74aaffa941f518
require 'GraphemeBreakTest.php';
$count = count($input);
$skipped_cases = 0;
$passed_cases = 0;
for ($i = 0; $i < $count; ++$i) {
if (contains_surrogate($input[$i])) {
++$skipped_cases;
continue;
}
$str = generate_str_from_array($input[$i]);
$length = count($expected[$i]);
$input_length = grapheme_length($str);
if ($length !== $input_length) {
var_dump([
'input: ' => $input[$i],
'expected: ' => $expected[$i],
'counting by \X: ' => $input_length,
]);
++$passed_cases;
}
}
var_dump([
'all cases: ' => count($input) - $skipped_cases,
'skipped cases: ' => $skipped_cases,
'not pass: ' => $passed_cases
]);
function grapheme_length($str)
{
return preg_match_all('/\X/u', $str);
}
function contains_surrogate($array) {
foreach ($array as $v) {
if (0xD800 <= $v && $v <= 0xDFFF) {
return true;
}
}
return false;
}
function generate_str_from_array($array)
{
$ret = '';
foreach ($array as $a) {
$ret .= utf8_chr($a);
}
return $ret;
}
function utf8_chr($cp) {
if (!is_int($cp)) {
exit("$cp is not integer\n");
}
if ($cp < 0 || (0xD7FF < $cp && $cp < 0xE000) || 0x10FFFF < $cp) {
exit("$cp is out of range\n");
}
if ($cp < 0x80) {
return chr($cp);
} else if ($cp < 0xA0) {
return chr(0x1C0 | $cp >> 6).chr(0x80 | $cp & 0x3F);
}
return html_entity_decode('&#'.$cp.';');
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment