Skip to content

Instantly share code, notes, and snippets.

@masakielastic
Last active August 29, 2015 14:11
Show Gist options
  • Save masakielastic/c3341a74aaffa941f518 to your computer and use it in GitHub Desktop.
Save masakielastic/c3341a74aaffa941f518 to your computer and use it in GitHub Desktop.
<?php
// http://www.unicode.org/Public/7.0.0/ucd/auxiliary/GraphemeBreakTest.txt
$file = new SplFileObject('GraphemeBreakTest.txt');
$data = '<?php $expected = [';
while (!$file->eof()) {
$buf = $file->fgets();
preg_match('/÷\s(.+)\s÷\s+\#/u', $buf, $matches);
if (isset($matches[1])) {
$ret = preg_split('/\s÷\s/u', $matches[1]);
$ret = array_map(function($v) { return preg_split('/\s×\s/u', $v); }, $ret);
$ret_size = count($ret);
$data .= '[';
foreach ($ret as $key => $value) {
$data .= '[';
$value_size = count($value);
for ($i = 0; $i < $value_size; ++$i) {
$data .= '0x'.$value[$i];
$data .= $i < $value_size - 1 ? ',' : '';
}
$data .= ']';
$data .= $key < $ret_size - 1 ? ',' : '';
}
$data .= '],'.PHP_EOL;
}
}
$file = null;
$data .= '];'.PHP_EOL;
$data .= <<<'EOD'
$input = [];
foreach ($expected as $key => $value) {
$it = new RecursiveIteratorIterator(new RecursiveArrayIterator($value));
foreach($it as $v) {
$input[$key][] = $v;
}
}
EOD;
file_put_contents('GraphemeBreakTest.php', $data);
<?php $expected = [[[0x0020],[0x0020]],
[[0x0020,0x0308],[0x0020]],
[[0x0020],[0x000D]],
[[0x0020,0x0308],[0x000D]],
[[0x0020],[0x000A]],
[[0x0020,0x0308],[0x000A]],
[[0x0020],[0x0001]],
[[0x0020,0x0308],[0x0001]],
[[0x0020,0x0300]],
[[0x0020,0x0308,0x0300]],
[[0x0020,0x0903]],
[[0x0020,0x0308,0x0903]],
[[0x0020],[0x1100]],
[[0x0020,0x0308],[0x1100]],
[[0x0020],[0x1160]],
[[0x0020,0x0308],[0x1160]],
[[0x0020],[0x11A8]],
[[0x0020,0x0308],[0x11A8]],
[[0x0020],[0xAC00]],
[[0x0020,0x0308],[0xAC00]],
[[0x0020],[0xAC01]],
[[0x0020,0x0308],[0xAC01]],
[[0x0020],[0x1F1E6]],
[[0x0020,0x0308],[0x1F1E6]],
[[0x0020],[0x0378]],
[[0x0020,0x0308],[0x0378]],
[[0x0020],[0xD800]],
[[0x0020,0x0308],[0xD800]],
[[0x000D],[0x0020]],
[[0x000D],[0x0308],[0x0020]],
[[0x000D],[0x000D]],
[[0x000D],[0x0308],[0x000D]],
[[0x000D,0x000A]],
[[0x000D],[0x0308],[0x000A]],
[[0x000D],[0x0001]],
[[0x000D],[0x0308],[0x0001]],
[[0x000D],[0x0300]],
[[0x000D],[0x0308,0x0300]],
[[0x000D],[0x0903]],
[[0x000D],[0x0308,0x0903]],
[[0x000D],[0x1100]],
[[0x000D],[0x0308],[0x1100]],
[[0x000D],[0x1160]],
[[0x000D],[0x0308],[0x1160]],
[[0x000D],[0x11A8]],
[[0x000D],[0x0308],[0x11A8]],
[[0x000D],[0xAC00]],
[[0x000D],[0x0308],[0xAC00]],
[[0x000D],[0xAC01]],
[[0x000D],[0x0308],[0xAC01]],
[[0x000D],[0x1F1E6]],
[[0x000D],[0x0308],[0x1F1E6]],
[[0x000D],[0x0378]],
[[0x000D],[0x0308],[0x0378]],
[[0x000D],[0xD800]],
[[0x000D],[0x0308],[0xD800]],
[[0x000A],[0x0020]],
[[0x000A],[0x0308],[0x0020]],
[[0x000A],[0x000D]],
[[0x000A],[0x0308],[0x000D]],
[[0x000A],[0x000A]],
[[0x000A],[0x0308],[0x000A]],
[[0x000A],[0x0001]],
[[0x000A],[0x0308],[0x0001]],
[[0x000A],[0x0300]],
[[0x000A],[0x0308,0x0300]],
[[0x000A],[0x0903]],
[[0x000A],[0x0308,0x0903]],
[[0x000A],[0x1100]],
[[0x000A],[0x0308],[0x1100]],
[[0x000A],[0x1160]],
[[0x000A],[0x0308],[0x1160]],
[[0x000A],[0x11A8]],
[[0x000A],[0x0308],[0x11A8]],
[[0x000A],[0xAC00]],
[[0x000A],[0x0308],[0xAC00]],
[[0x000A],[0xAC01]],
[[0x000A],[0x0308],[0xAC01]],
[[0x000A],[0x1F1E6]],
[[0x000A],[0x0308],[0x1F1E6]],
[[0x000A],[0x0378]],
[[0x000A],[0x0308],[0x0378]],
[[0x000A],[0xD800]],
[[0x000A],[0x0308],[0xD800]],
[[0x0001],[0x0020]],
[[0x0001],[0x0308],[0x0020]],
[[0x0001],[0x000D]],
[[0x0001],[0x0308],[0x000D]],
[[0x0001],[0x000A]],
[[0x0001],[0x0308],[0x000A]],
[[0x0001],[0x0001]],
[[0x0001],[0x0308],[0x0001]],
[[0x0001],[0x0300]],
[[0x0001],[0x0308,0x0300]],
[[0x0001],[0x0903]],
[[0x0001],[0x0308,0x0903]],
[[0x0001],[0x1100]],
[[0x0001],[0x0308],[0x1100]],
[[0x0001],[0x1160]],
[[0x0001],[0x0308],[0x1160]],
[[0x0001],[0x11A8]],
[[0x0001],[0x0308],[0x11A8]],
[[0x0001],[0xAC00]],
[[0x0001],[0x0308],[0xAC00]],
[[0x0001],[0xAC01]],
[[0x0001],[0x0308],[0xAC01]],
[[0x0001],[0x1F1E6]],
[[0x0001],[0x0308],[0x1F1E6]],
[[0x0001],[0x0378]],
[[0x0001],[0x0308],[0x0378]],
[[0x0001],[0xD800]],
[[0x0001],[0x0308],[0xD800]],
[[0x0300],[0x0020]],
[[0x0300,0x0308],[0x0020]],
[[0x0300],[0x000D]],
[[0x0300,0x0308],[0x000D]],
[[0x0300],[0x000A]],
[[0x0300,0x0308],[0x000A]],
[[0x0300],[0x0001]],
[[0x0300,0x0308],[0x0001]],
[[0x0300,0x0300]],
[[0x0300,0x0308,0x0300]],
[[0x0300,0x0903]],
[[0x0300,0x0308,0x0903]],
[[0x0300],[0x1100]],
[[0x0300,0x0308],[0x1100]],
[[0x0300],[0x1160]],
[[0x0300,0x0308],[0x1160]],
[[0x0300],[0x11A8]],
[[0x0300,0x0308],[0x11A8]],
[[0x0300],[0xAC00]],
[[0x0300,0x0308],[0xAC00]],
[[0x0300],[0xAC01]],
[[0x0300,0x0308],[0xAC01]],
[[0x0300],[0x1F1E6]],
[[0x0300,0x0308],[0x1F1E6]],
[[0x0300],[0x0378]],
[[0x0300,0x0308],[0x0378]],
[[0x0300],[0xD800]],
[[0x0300,0x0308],[0xD800]],
[[0x0903],[0x0020]],
[[0x0903,0x0308],[0x0020]],
[[0x0903],[0x000D]],
[[0x0903,0x0308],[0x000D]],
[[0x0903],[0x000A]],
[[0x0903,0x0308],[0x000A]],
[[0x0903],[0x0001]],
[[0x0903,0x0308],[0x0001]],
[[0x0903,0x0300]],
[[0x0903,0x0308,0x0300]],
[[0x0903,0x0903]],
[[0x0903,0x0308,0x0903]],
[[0x0903],[0x1100]],
[[0x0903,0x0308],[0x1100]],
[[0x0903],[0x1160]],
[[0x0903,0x0308],[0x1160]],
[[0x0903],[0x11A8]],
[[0x0903,0x0308],[0x11A8]],
[[0x0903],[0xAC00]],
[[0x0903,0x0308],[0xAC00]],
[[0x0903],[0xAC01]],
[[0x0903,0x0308],[0xAC01]],
[[0x0903],[0x1F1E6]],
[[0x0903,0x0308],[0x1F1E6]],
[[0x0903],[0x0378]],
[[0x0903,0x0308],[0x0378]],
[[0x0903],[0xD800]],
[[0x0903,0x0308],[0xD800]],
[[0x1100],[0x0020]],
[[0x1100,0x0308],[0x0020]],
[[0x1100],[0x000D]],
[[0x1100,0x0308],[0x000D]],
[[0x1100],[0x000A]],
[[0x1100,0x0308],[0x000A]],
[[0x1100],[0x0001]],
[[0x1100,0x0308],[0x0001]],
[[0x1100,0x0300]],
[[0x1100,0x0308,0x0300]],
[[0x1100,0x0903]],
[[0x1100,0x0308,0x0903]],
[[0x1100,0x1100]],
[[0x1100,0x0308],[0x1100]],
[[0x1100,0x1160]],
[[0x1100,0x0308],[0x1160]],
[[0x1100],[0x11A8]],
[[0x1100,0x0308],[0x11A8]],
[[0x1100,0xAC00]],
[[0x1100,0x0308],[0xAC00]],
[[0x1100,0xAC01]],
[[0x1100,0x0308],[0xAC01]],
[[0x1100],[0x1F1E6]],
[[0x1100,0x0308],[0x1F1E6]],
[[0x1100],[0x0378]],
[[0x1100,0x0308],[0x0378]],
[[0x1100],[0xD800]],
[[0x1100,0x0308],[0xD800]],
[[0x1160],[0x0020]],
[[0x1160,0x0308],[0x0020]],
[[0x1160],[0x000D]],
[[0x1160,0x0308],[0x000D]],
[[0x1160],[0x000A]],
[[0x1160,0x0308],[0x000A]],
[[0x1160],[0x0001]],
[[0x1160,0x0308],[0x0001]],
[[0x1160,0x0300]],
[[0x1160,0x0308,0x0300]],
[[0x1160,0x0903]],
[[0x1160,0x0308,0x0903]],
[[0x1160],[0x1100]],
[[0x1160,0x0308],[0x1100]],
[[0x1160,0x1160]],
[[0x1160,0x0308],[0x1160]],
[[0x1160,0x11A8]],
[[0x1160,0x0308],[0x11A8]],
[[0x1160],[0xAC00]],
[[0x1160,0x0308],[0xAC00]],
[[0x1160],[0xAC01]],
[[0x1160,0x0308],[0xAC01]],
[[0x1160],[0x1F1E6]],
[[0x1160,0x0308],[0x1F1E6]],
[[0x1160],[0x0378]],
[[0x1160,0x0308],[0x0378]],
[[0x1160],[0xD800]],
[[0x1160,0x0308],[0xD800]],
[[0x11A8],[0x0020]],
[[0x11A8,0x0308],[0x0020]],
[[0x11A8],[0x000D]],
[[0x11A8,0x0308],[0x000D]],
[[0x11A8],[0x000A]],
[[0x11A8,0x0308],[0x000A]],
[[0x11A8],[0x0001]],
[[0x11A8,0x0308],[0x0001]],
[[0x11A8,0x0300]],
[[0x11A8,0x0308,0x0300]],
[[0x11A8,0x0903]],
[[0x11A8,0x0308,0x0903]],
[[0x11A8],[0x1100]],
[[0x11A8,0x0308],[0x1100]],
[[0x11A8],[0x1160]],
[[0x11A8,0x0308],[0x1160]],
[[0x11A8,0x11A8]],
[[0x11A8,0x0308],[0x11A8]],
[[0x11A8],[0xAC00]],
[[0x11A8,0x0308],[0xAC00]],
[[0x11A8],[0xAC01]],
[[0x11A8,0x0308],[0xAC01]],
[[0x11A8],[0x1F1E6]],
[[0x11A8,0x0308],[0x1F1E6]],
[[0x11A8],[0x0378]],
[[0x11A8,0x0308],[0x0378]],
[[0x11A8],[0xD800]],
[[0x11A8,0x0308],[0xD800]],
[[0xAC00],[0x0020]],
[[0xAC00,0x0308],[0x0020]],
[[0xAC00],[0x000D]],
[[0xAC00,0x0308],[0x000D]],
[[0xAC00],[0x000A]],
[[0xAC00,0x0308],[0x000A]],
[[0xAC00],[0x0001]],
[[0xAC00,0x0308],[0x0001]],
[[0xAC00,0x0300]],
[[0xAC00,0x0308,0x0300]],
[[0xAC00,0x0903]],
[[0xAC00,0x0308,0x0903]],
[[0xAC00],[0x1100]],
[[0xAC00,0x0308],[0x1100]],
[[0xAC00,0x1160]],
[[0xAC00,0x0308],[0x1160]],
[[0xAC00,0x11A8]],
[[0xAC00,0x0308],[0x11A8]],
[[0xAC00],[0xAC00]],
[[0xAC00,0x0308],[0xAC00]],
[[0xAC00],[0xAC01]],
[[0xAC00,0x0308],[0xAC01]],
[[0xAC00],[0x1F1E6]],
[[0xAC00,0x0308],[0x1F1E6]],
[[0xAC00],[0x0378]],
[[0xAC00,0x0308],[0x0378]],
[[0xAC00],[0xD800]],
[[0xAC00,0x0308],[0xD800]],
[[0xAC01],[0x0020]],
[[0xAC01,0x0308],[0x0020]],
[[0xAC01],[0x000D]],
[[0xAC01,0x0308],[0x000D]],
[[0xAC01],[0x000A]],
[[0xAC01,0x0308],[0x000A]],
[[0xAC01],[0x0001]],
[[0xAC01,0x0308],[0x0001]],
[[0xAC01,0x0300]],
[[0xAC01,0x0308,0x0300]],
[[0xAC01,0x0903]],
[[0xAC01,0x0308,0x0903]],
[[0xAC01],[0x1100]],
[[0xAC01,0x0308],[0x1100]],
[[0xAC01],[0x1160]],
[[0xAC01,0x0308],[0x1160]],
[[0xAC01,0x11A8]],
[[0xAC01,0x0308],[0x11A8]],
[[0xAC01],[0xAC00]],
[[0xAC01,0x0308],[0xAC00]],
[[0xAC01],[0xAC01]],
[[0xAC01,0x0308],[0xAC01]],
[[0xAC01],[0x1F1E6]],
[[0xAC01,0x0308],[0x1F1E6]],
[[0xAC01],[0x0378]],
[[0xAC01,0x0308],[0x0378]],
[[0xAC01],[0xD800]],
[[0xAC01,0x0308],[0xD800]],
[[0x1F1E6],[0x0020]],
[[0x1F1E6,0x0308],[0x0020]],
[[0x1F1E6],[0x000D]],
[[0x1F1E6,0x0308],[0x000D]],
[[0x1F1E6],[0x000A]],
[[0x1F1E6,0x0308],[0x000A]],
[[0x1F1E6],[0x0001]],
[[0x1F1E6,0x0308],[0x0001]],
[[0x1F1E6,0x0300]],
[[0x1F1E6,0x0308,0x0300]],
[[0x1F1E6,0x0903]],
[[0x1F1E6,0x0308,0x0903]],
[[0x1F1E6],[0x1100]],
[[0x1F1E6,0x0308],[0x1100]],
[[0x1F1E6],[0x1160]],
[[0x1F1E6,0x0308],[0x1160]],
[[0x1F1E6],[0x11A8]],
[[0x1F1E6,0x0308],[0x11A8]],
[[0x1F1E6],[0xAC00]],
[[0x1F1E6,0x0308],[0xAC00]],
[[0x1F1E6],[0xAC01]],
[[0x1F1E6,0x0308],[0xAC01]],
[[0x1F1E6,0x1F1E6]],
[[0x1F1E6,0x0308],[0x1F1E6]],
[[0x1F1E6],[0x0378]],
[[0x1F1E6,0x0308],[0x0378]],
[[0x1F1E6],[0xD800]],
[[0x1F1E6,0x0308],[0xD800]],
[[0x0378],[0x0020]],
[[0x0378,0x0308],[0x0020]],
[[0x0378],[0x000D]],
[[0x0378,0x0308],[0x000D]],
[[0x0378],[0x000A]],
[[0x0378,0x0308],[0x000A]],
[[0x0378],[0x0001]],
[[0x0378,0x0308],[0x0001]],
[[0x0378,0x0300]],
[[0x0378,0x0308,0x0300]],
[[0x0378,0x0903]],
[[0x0378,0x0308,0x0903]],
[[0x0378],[0x1100]],
[[0x0378,0x0308],[0x1100]],
[[0x0378],[0x1160]],
[[0x0378,0x0308],[0x1160]],
[[0x0378],[0x11A8]],
[[0x0378,0x0308],[0x11A8]],
[[0x0378],[0xAC00]],
[[0x0378,0x0308],[0xAC00]],
[[0x0378],[0xAC01]],
[[0x0378,0x0308],[0xAC01]],
[[0x0378],[0x1F1E6]],
[[0x0378,0x0308],[0x1F1E6]],
[[0x0378],[0x0378]],
[[0x0378,0x0308],[0x0378]],
[[0x0378],[0xD800]],
[[0x0378,0x0308],[0xD800]],
[[0xD800],[0x0020]],
[[0xD800],[0x0308],[0x0020]],
[[0xD800],[0x000D]],
[[0xD800],[0x0308],[0x000D]],
[[0xD800],[0x000A]],
[[0xD800],[0x0308],[0x000A]],
[[0xD800],[0x0001]],
[[0xD800],[0x0308],[0x0001]],
[[0xD800],[0x0300]],
[[0xD800],[0x0308,0x0300]],
[[0xD800],[0x0903]],
[[0xD800],[0x0308,0x0903]],
[[0xD800],[0x1100]],
[[0xD800],[0x0308],[0x1100]],
[[0xD800],[0x1160]],
[[0xD800],[0x0308],[0x1160]],
[[0xD800],[0x11A8]],
[[0xD800],[0x0308],[0x11A8]],
[[0xD800],[0xAC00]],
[[0xD800],[0x0308],[0xAC00]],
[[0xD800],[0xAC01]],
[[0xD800],[0x0308],[0xAC01]],
[[0xD800],[0x1F1E6]],
[[0xD800],[0x0308],[0x1F1E6]],
[[0xD800],[0x0378]],
[[0xD800],[0x0308],[0x0378]],
[[0xD800],[0xD800]],
[[0xD800],[0x0308],[0xD800]],
[[0x0061],[0x1F1E6],[0x0062]],
[[0x1F1F7,0x1F1FA]],
[[0x1F1F7,0x1F1FA,0x1F1F8]],
[[0x1F1F7,0x1F1FA,0x1F1F8,0x1F1EA]],
[[0x1F1F7,0x1F1FA],[0x200B],[0x1F1F8,0x1F1EA]],
[[0x1F1E6,0x1F1E7,0x1F1E8]],
[[0x1F1E6,0x200D],[0x1F1E7,0x1F1E8]],
[[0x1F1E6,0x1F1E7,0x200D],[0x1F1E8]],
[[0x0020,0x200D],[0x0646]],
[[0x0646,0x200D],[0x0020]],
];
$input = [];
foreach ($expected as $key => $value) {
$it = new RecursiveIteratorIterator(new RecursiveArrayIterator($value));
foreach($it as $v) {
$input[$key][] = $v;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment