Last active
December 18, 2015 14:08
-
-
Save masakielastic/5794615 to your computer and use it in GitHub Desktop.
Benchmark for validating ascii characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ctype] | |
valid:true | |
time:4.9642469882965 | |
[byte comparison] | |
valid:true | |
time:5.0199990272522 | |
[preg_match] | |
valid:true | |
time:5.9788029193878 | |
[mb_check_encoding] | |
valid:true | |
time:6.5968248844147 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// for avoiding Mac OSX's bug in C library | |
setlocale(LC_ALL, 'C'); | |
$result = [ | |
'ctype' => timer('is_ascii'), | |
'byte comparison' => timer('is_ascii2'), | |
'preg_match' => timer('is_ascii3'), | |
'mb_check_encoding' => timer('is_ascii4') | |
]; | |
foreach ($result as $desc => $elem) { | |
echo '[', $desc, ']', PHP_EOL, | |
'valid:', $ret['valid'] ? 'true' : 'false', PHP_EOL, | |
'time:', $ret['time'], PHP_EOL; | |
} | |
function timer(callable $callable) | |
{ | |
$ret = []; | |
$expected = array_map('chr', range(0, 0x7F)); | |
$start = microtime(true); | |
for ($i = 0; $i < 0x10FFFF; $i += 1) { | |
$char = utf8_chr($i); | |
if ($callable($char)) { | |
$ret[] = $char; | |
} | |
} | |
$stop = microtime(true); | |
return ['valid' => $expected === $expected, 'time' => $stop - $start]; | |
} | |
function utf8_chr($code_point) | |
{ | |
if ($code_point < 0 || (0xD800 <= $code_point && $code_point <= 0xDFFF) || 0x10FFFF < $code_point) { | |
return false; | |
} | |
$ret = str_repeat('0', 8 - strlen(dechex($code_point))).dechex($code_point); | |
$ret = hex2bin($ret); | |
return mb_convert_encoding($ret, 'UTF-8', 'UTF-32BE'); | |
} | |
function is_ascii($str) | |
{ | |
return ctype_print($str) || ctype_cntrl($str); | |
} | |
function is_ascii2($str) | |
{ | |
$len = strlen($str); | |
for ($i = 0; $i < $len; $i += 1) { | |
if ("\x7F" < $str[$i]) { | |
return false; | |
} | |
} | |
return true; | |
} | |
function is_ascii3($str) | |
{ | |
return preg_match('/[\x00-\x7F]/', $str); | |
} | |
function is_ascii4($str) | |
{ | |
return mb_check_encoding($str, 'ASCII'); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment