-
-
Save giriannamalai/d97a60d8e9f68c46307a706016fc8e5f to your computer and use it in GitHub Desktop.
Function to fix ut8 special characters displayed as 2 characters (utf-8 interpreted as ISO-8859-1 or Windows-1252)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php header('Content-Type: text/html; charset=utf-8'); ?> | |
<html> | |
<head> | |
<title>Fix wrong encoded UTF8 characters</title> | |
<meta http-equiv="Content-type" content="text/html; charset=utf-8" /> | |
</head> | |
<body> | |
<pre> | |
<?php | |
/* Problem description: | |
A common problem is for characters encoded as UTF-8 to have their individual bytes interpreted as ISO-8859-1 or Windows-1252. | |
Instead of an expected character, a sequence of Latin characters is shown, typically starting with à or Â. For example, instead of "è" these characters occur: "è". | |
A Web page is encoded as UTF-8 characters. The Web server mistakenly declares the charset to be ISO-8859-1 in the HTTP protocol that delivers the page to the browser. | |
The browser will then display each of the UTF-8 bytes in the Web page as Latin-1 characters. | |
source: http://www.i18nqa.com/debug/bug-utf-8-latin1.html | |
code source: https://github.com/devgeniem/wp-sanitize-accented-uploads/blob/master/plugin.php#L152 | |
table source: http://www.i18nqa.com/debug/utf8-debug.html | |
https://www.kasperkamperman.com/ 2018-07-03 | |
*/ | |
$str = 'BLØF - ZOUTELANDE, MØ - FINAL SONG, Fédération Camerounaise de Football, It’s Getting the Best of Me'; | |
echo "original string: ".$str.'<br/>'; | |
echo "fixed string: ".fixWrongUTF8Encoding($str).'<br/>'; | |
// displays: BLØF - ZOUTELANDE, MØ - FINAL SONG, Fédération Camerounaise de Football, It’s Getting the Best of Me | |
function fixWrongUTF8Encoding($inputString) { | |
// code source: https://github.com/devgeniem/wp-sanitize-accented-uploads/blob/master/plugin.php#L152 | |
// table source: http://www.i18nqa.com/debug/utf8-debug.html | |
$fix_list = array( | |
// 3 char errors first | |
'‚' => '‚', '„' => '„', '…' => '…', '‡' => '‡', | |
'‰' => '‰', '‹' => '‹', '‘' => '‘', '’' => '’', | |
'“' => '“', '•' => '•', '–' => '–', '—' => '—', | |
'â„¢' => '™', '›' => '›', '€' => '€', | |
// 2 char errors | |
'Â' => 'Â', 'Æ’' => 'ƒ', 'Ã' => 'Ã', 'Ä' => 'Ä', | |
'Ã…' => 'Å', 'â€' => '†', 'Æ' => 'Æ', 'Ç' => 'Ç', | |
'ˆ' => 'ˆ', 'È' => 'È', 'É' => 'É', 'Ê' => 'Ê', | |
'Ë' => 'Ë', 'Å’' => 'Œ', 'ÃŒ' => 'Ì', 'Ž' => 'Ž', | |
'ÃŽ' => 'Î', 'Ñ' => 'Ñ', 'Ã’' => 'Ò', 'Ó' => 'Ó', | |
'â€' => '”', 'Ô' => 'Ô', 'Õ' => 'Õ', 'Ö' => 'Ö', | |
'×' => '×', 'Ëœ' => '˜', 'Ø' => 'Ø', 'Ù' => 'Ù', | |
'Å¡' => 'š', 'Ú' => 'Ú', 'Û' => 'Û', 'Å“' => 'œ', | |
'Ãœ' => 'Ü', 'ž' => 'ž', 'Þ' => 'Þ', 'Ÿ' => 'Ÿ', | |
'ß' => 'ß', '¡' => '¡', 'á' => 'á', '¢' => '¢', | |
'â' => 'â', '£' => '£', 'ã' => 'ã', '¤' => '¤', | |
'ä' => 'ä', 'Â¥' => '¥', 'Ã¥' => 'å', '¦' => '¦', | |
'æ' => 'æ', '§' => '§', 'ç' => 'ç', '¨' => '¨', | |
'è' => 'è', '©' => '©', 'é' => 'é', 'ª' => 'ª', | |
'ê' => 'ê', '«' => '«', 'ë' => 'ë', '¬' => '¬', | |
'ì' => 'ì', '®' => '®', 'î' => 'î', '¯' => '¯', | |
'ï' => 'ï', '°' => '°', 'ð' => 'ð', '±' => '±', | |
'ñ' => 'ñ', '²' => '²', 'ò' => 'ò', '³' => '³', | |
'ó' => 'ó', '´' => '´', 'ô' => 'ô', 'µ' => 'µ', | |
'õ' => 'õ', '¶' => '¶', 'ö' => 'ö', '·' => '·', | |
'÷' => '÷', '¸' => '¸', 'ø' => 'ø', '¹' => '¹', | |
'ù' => 'ù', 'º' => 'º', 'ú' => 'ú', '»' => '»', | |
'û' => 'û', '¼' => '¼', 'ü' => 'ü', '½' => '½', | |
'ý' => 'ý', '¾' => '¾', 'þ' => 'þ', '¿' => '¿', | |
'ÿ' => 'ÿ', 'À' => 'À', | |
// 1 char errors last | |
'Ã' => 'Á', 'Å' => 'Š', 'Ã' => 'Í', 'Ã' => 'Ï', | |
'Ã' => 'Ð', 'Ã' => 'Ý', 'Ã' => 'à', 'Ã' => 'í' | |
); | |
$error_chars = array_keys($fix_list); | |
$real_chars = array_values($fix_list); | |
return str_replace($error_chars, $real_chars, $inputString); | |
} | |
?> | |
</pre> | |
</body> | |
</html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment