Skip to content

Instantly share code, notes, and snippets.

@brunoczim
Created February 20, 2018 01:01
Show Gist options
  • Save brunoczim/e2802c0f3c81cb5da9ca6a06d7640d04 to your computer and use it in GitHub Desktop.
Save brunoczim/e2802c0f3c81cb5da9ca6a06d7640d04 to your computer and use it in GitHub Desktop.
<?php
function utf8_to_charcode(string $utf8, &$i = null): int {
$i = (int) $i;
if ($i >= strlen($utf8)) {
return -1;
}
$first = ord($utf8[$i]);
if ($first >> 3 === 0xf << 1) {
$code = $first & 0x7;
for ($j = 0; $j < 3; $j++) {
$i++;
if ($i >= strlen($utf8)) {
return -1;
}
$byte = ord($utf8[$i]);
if ($byte >> 6 !== 0x2) {
return -1;
}
$code = $code << 6 | $byte & 0x3f;
}
return $code;
}
if ($first >> 4 === 0x7 << 1) {
$code = $first & 0xf;
for ($j = 0; $j < 2; $j++) {
$i++;
if ($i >= strlen($utf8)) {
return -1;
}
$byte = ord($utf8[$i]);
if ($byte >> 6 !== 0x2) {
return -1;
}
$code = $code << 6 | $byte & 0x3f;
}
return $code;
}
if ($first >> 5 === 0x3 << 1) {
$i++;
if ($i >= strlen($utf8)) {
return -1;
}
$second = ord($utf8[$i]);
if ($second >> 6 !== 0x2) {
return -1;
}
return $first & 0x1f << 6 | $second & 0x3f;
}
if ($first >> 7 === 0x0) {
return $first;
}
return -1;
}
function utf8_from_charcode(int $code): string {
if ($code > 0x1fffff || $code < 0) {
return '';
}
if ($code > 0xffff) {
$str = '';
for ($i = 0; $i < 3; $i++) {
$str = chr(0x80 | $code & 0x3f) . $str;
$code >>= 6;
}
return chr(0xf0 | $code) . $str;
}
if ($code > 0x7ff) {
$str = '';
for ($i = 0; $i < 2; $i++) {
$str = chr(0x80 | $code & 0x3f) . $str;
$code >>= 6;
}
return chr(0xe0 | $code) . $str;
}
if ($code > 0x7f) {
$str = chr(0x80 | $code & 0x3f);
$code >>= 6;
return chr(0xc0 | $code) . $str;
}
return chr($code);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment