Created
March 14, 2025 15:43
-
-
Save jonkerw85/583cdc366fd7c3c7652e870e682863ac to your computer and use it in GitHub Desktop.
Polyfill for PHP's Deprecated utf8_encode() Function
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace LegacySupport; | |
/** | |
* Polyfill for the deprecated utf8_encode() function in PHP. | |
* | |
* PHP's built-in utf8_encode() function was deprecated in PHP 8.2 and is scheduled for removal. | |
* This function is a direct reimplementation of the original algorithm in PHP, | |
* ensuring behavioral consistency with the native function. | |
* | |
* The original C implementation can be found here: | |
* @see https://github.com/php/php-src/blob/f75dd82866382bf5511193a77d8391639652607f/ext/standard/string.c#L6192 | |
* | |
* This function converts a Latin-1 (ISO-8859-1) encoded string to UTF-8 by creating | |
* a two-byte UTF-8 sequence for characters outside the ASCII range (0x80 and above). | |
* | |
* @param string $s The input string encoded in Latin-1 (ISO-8859-1). | |
* | |
* @return string The UTF-8 encoded representation of the input string. | |
*/ | |
function utf8_encode(string $s): string { | |
$len = strlen($s); | |
$utf8_str = ''; | |
for ($i = 0; $i < $len; $i++) { | |
$c = ord($s[$i]); // Get the byte value of the character | |
if ($c < 0x80) { | |
// ASCII character (unchanged) | |
$utf8_str .= chr($c); | |
} else { | |
// Convert Latin-1 to UTF-8 (2-byte sequence) | |
$utf8_str .= chr(0xc0 | ($c >> 6)); // First byte (110xxxxx) | |
$utf8_str .= chr(0x80 | ($c & 0x3f)); // Second byte (10xxxxxx) | |
} | |
} | |
return $utf8_str; | |
} | |
// A Latin-1 encoded string containing "Héllo Wörld" | |
$latin1_string = chr(72) . chr(233) . chr(108) . chr(108) . chr(111) . " " . | |
chr(87) . chr(246) . chr(114) . chr(108) . chr(100); | |
echo 'Latin-1 string : ' . $latin1_string . PHP_EOL; // H�llo W�rld | |
echo 'UTF-8 string : ' . \LegacySupport\utf8_encode($latin1_string) .PHP_EOL; // Héllo Wörld | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment