<?php header('Content-Type: text/html; charset=utf-8'); ?> <html> <head> <title>Fix wrong encoded UTF8 characters</title> <meta http-equiv="Content-type" content="text/html; charset=utf-8" /> </head> <body> <pre> <?php /* Problem description: A common problem is for characters encoded as UTF-8 to have their individual bytes interpreted as ISO-8859-1 or Windows-1252. Instead of an expected character, a sequence of Latin characters is shown, typically starting with à or Â. For example, instead of "è" these characters occur: "è". A Web page is encoded as UTF-8 characters. The Web server mistakenly declares the charset to be ISO-8859-1 in the HTTP protocol that delivers the page to the browser. The browser will then display each of the UTF-8 bytes in the Web page as Latin-1 characters. source: http://www.i18nqa.com/debug/bug-utf-8-latin1.html code source: https://github.com/devgeniem/wp-sanitize-accented-uploads/blob/master/plugin.php#L152 table source: http://www.i18nqa.com/debug/utf8-debug.html https://www.kasperkamperman.com/ 2018-07-03 */ $str = 'BLØF - ZOUTELANDE, MØ - FINAL SONG, Fédération Camerounaise de Football, It’s Getting the Best of Me'; echo "original string: ".$str.'<br/>'; echo "fixed string: ".fixWrongUTF8Encoding($str).'<br/>'; // displays: BLØF - ZOUTELANDE, MØ - FINAL SONG, Fédération Camerounaise de Football, It’s Getting the Best of Me function fixWrongUTF8Encoding($inputString) { // code source: https://github.com/devgeniem/wp-sanitize-accented-uploads/blob/master/plugin.php#L152 // table source: http://www.i18nqa.com/debug/utf8-debug.html $fix_list = array( // 3 char errors first '‚' => '‚', '„' => '„', '…' => '…', '‡' => '‡', '‰' => '‰', '‹' => '‹', '‘' => '‘', '’' => '’', '“' => '“', '•' => '•', '–' => '–', '—' => '—', 'â„¢' => '™', '›' => '›', '€' => '€', // 2 char errors 'Â' => 'Â', 'Æ’' => 'ƒ', 'Ã' => 'Ã', 'Ä' => 'Ä', 'Ã…' => 'Å', 'â€' => '†', 'Æ' => 'Æ', 'Ç' => 'Ç', 'ˆ' => 'ˆ', 'È' => 'È', 'É' => 'É', 'Ê' => 'Ê', 'Ë' => 'Ë', 'Å’' => 'Œ', 'ÃŒ' => 'Ì', 'Ž' => 'Ž', 'ÃŽ' => 'Î', 'Ñ' => 'Ñ', 'Ã’' => 'Ò', 'Ó' => 'Ó', 'â€' => '”', 'Ô' => 'Ô', 'Õ' => 'Õ', 'Ö' => 'Ö', '×' => '×', 'Ëœ' => '˜', 'Ø' => 'Ø', 'Ù' => 'Ù', 'Å¡' => 'š', 'Ú' => 'Ú', 'Û' => 'Û', 'Å“' => 'œ', 'Ü' => 'Ü', 'ž' => 'ž', 'Þ' => 'Þ', 'Ÿ' => 'Ÿ', 'ß' => 'ß', '¡' => '¡', 'á' => 'á', '¢' => '¢', 'â' => 'â', '£' => '£', 'ã' => 'ã', '¤' => '¤', 'ä' => 'ä', 'Â¥' => '¥', 'Ã¥' => 'å', '¦' => '¦', 'æ' => 'æ', '§' => '§', 'ç' => 'ç', '¨' => '¨', 'è' => 'è', '©' => '©', 'é' => 'é', 'ª' => 'ª', 'ê' => 'ê', '«' => '«', 'ë' => 'ë', '¬' => '¬', 'ì' => 'ì', '®' => '®', 'î' => 'î', '¯' => '¯', 'ï' => 'ï', '°' => '°', 'ð' => 'ð', '±' => '±', 'ñ' => 'ñ', '²' => '²', 'ò' => 'ò', '³' => '³', 'ó' => 'ó', '´' => '´', 'ô' => 'ô', 'µ' => 'µ', 'õ' => 'õ', '¶' => '¶', 'ö' => 'ö', '·' => '·', '÷' => '÷', '¸' => '¸', 'ø' => 'ø', '¹' => '¹', 'ù' => 'ù', 'º' => 'º', 'ú' => 'ú', '»' => '»', 'û' => 'û', '¼' => '¼', 'ü' => 'ü', '½' => '½', 'ý' => 'ý', '¾' => '¾', 'þ' => 'þ', '¿' => '¿', 'ÿ' => 'ÿ', 'À' => 'À', // 1 char errors last 'Ã' => 'Á', 'Å' => 'Š', 'Ã' => 'Í', 'Ã' => 'Ï', 'Ã' => 'Ð', 'Ã' => 'Ý', 'Ã' => 'à', 'Ã' => 'í' ); $error_chars = array_keys($fix_list); $real_chars = array_values($fix_list); return str_replace($error_chars, $real_chars, $inputString); } ?> </pre> </body> </html>