Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save masakielastic/5794615 to your computer and use it in GitHub Desktop.
Save masakielastic/5794615 to your computer and use it in GitHub Desktop.
Benchmark for validating ascii characters
[ctype]
valid:true
time:4.9642469882965
[byte comparison]
valid:true
time:5.0199990272522
[preg_match]
valid:true
time:5.9788029193878
[mb_check_encoding]
valid:true
time:6.5968248844147
<?php
// for avoiding Mac OSX's bug in C library
setlocale(LC_ALL, 'C');
$result = [
'ctype' => timer('is_ascii'),
'byte comparison' => timer('is_ascii2'),
'preg_match' => timer('is_ascii3'),
'mb_check_encoding' => timer('is_ascii4')
];
foreach ($result as $desc => $elem) {
echo '[', $desc, ']', PHP_EOL,
'valid:', $ret['valid'] ? 'true' : 'false', PHP_EOL,
'time:', $ret['time'], PHP_EOL;
}
function timer(callable $callable)
{
$ret = [];
$expected = array_map('chr', range(0, 0x7F));
$start = microtime(true);
for ($i = 0; $i < 0x10FFFF; $i += 1) {
$char = utf8_chr($i);
if ($callable($char)) {
$ret[] = $char;
}
}
$stop = microtime(true);
return ['valid' => $expected === $expected, 'time' => $stop - $start];
}
function utf8_chr($code_point)
{
if ($code_point < 0 || (0xD800 <= $code_point && $code_point <= 0xDFFF) || 0x10FFFF < $code_point) {
return false;
}
$ret = str_repeat('0', 8 - strlen(dechex($code_point))).dechex($code_point);
$ret = hex2bin($ret);
return mb_convert_encoding($ret, 'UTF-8', 'UTF-32BE');
}
function is_ascii($str)
{
return ctype_print($str) || ctype_cntrl($str);
}
function is_ascii2($str)
{
$len = strlen($str);
for ($i = 0; $i < $len; $i += 1) {
if ("\x7F" < $str[$i]) {
return false;
}
}
return true;
}
function is_ascii3($str)
{
return preg_match('/[\x00-\x7F]/', $str);
}
function is_ascii4($str)
{
return mb_check_encoding($str, 'ASCII');
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment