Created
February 5, 2017 13:41
-
-
Save jesseschalken/5b5798e87d049c7019af2a840b9b249d to your computer and use it in GitHub Desktop.
ord() and chr() for UTF-8
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
function utf8_ord(string $s, int $i = 0): int { | |
$b0 = 0b00000000; | |
$b1 = 0b10000000; | |
$b2 = 0b11000000; | |
$b3 = 0b11100000; | |
$b4 = 0b11110000; | |
$b5 = 0b11111000; | |
$o = \ord($s[$i++]); | |
if (($o & $b1) == $b0) { | |
// 0xxxxxxx | |
return $o; | |
} else if (($o & $b3) == $b2) { | |
// 110xxxxx 10xxxxxx | |
$l = 1; | |
$r = $o & ~$b3; | |
} else if (($o & $b4) == $b3) { | |
// 1110xxxx 10xxxxxx 10xxxxxx | |
$l = 2; | |
$r = $o & ~$b4; | |
} else if (($o & $b5) == $b4) { | |
// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx | |
$l = 3; | |
$r = $o & ~$b5; | |
} else { | |
throw new \Exception(); | |
} | |
for (; $l > 0; $l--) { | |
$o = \ord($s[$i++]); | |
if (($o & $b2) == $b1) { | |
// 10xxxxxx | |
$r = ($r << 6) | ($o & ~$b2); | |
} else { | |
throw new \Exception(); | |
} | |
} | |
return $r; | |
} | |
function utf8_chr(int $n): string { | |
$b0 = 0b00000000; | |
$b1 = 0b10000000; | |
$b2 = 0b11000000; | |
$b3 = 0b11100000; | |
$b4 = 0b11110000; | |
$b5 = 0b11111000; | |
if ($n < 0) { | |
throw new \Exception(); | |
} else if ($n < (1 << 7)) { | |
// 7 bits | |
// 0xxxxxxx | |
return \chr($n); | |
} else if ($n < (1 << 11)) { | |
// 11 bits | |
// 110xxxxx 10xxxxxx | |
$l = 1; | |
$p = $b2; | |
} else if ($n < (1 << 16)) { | |
// 16 bits | |
// 1110xxxx 10xxxxxx 10xxxxxx | |
$l = 2; | |
$p = $b3; | |
} else if ($n < (1 << 21)) { | |
// 21 bits | |
// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx | |
$l = 3; | |
$p = $b4; | |
} else { | |
throw new \Exception(); | |
} | |
$s = ''; | |
for (; $l > 0; $l--) { | |
// 6 bits | |
// 10xxxxxx | |
$s = \chr($b1 | ($n & 0xFF & ~$b2)).$s; | |
$n >>= 6; | |
} | |
$s = \chr($p | $n).$s; | |
return $s; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment