<?php header('Content-Type: text/html; charset=utf-8');  ?>
<html>
<head>
    <title>Fix wrong encoded UTF8 characters</title>
    <meta http-equiv="Content-type" content="text/html; charset=utf-8" />
</head>
<body>
<pre>
<?php
    /*  Problem description:

        A common problem is for characters encoded as UTF-8 to have their individual bytes interpreted as ISO-8859-1 or Windows-1252.
        Instead of an expected character, a sequence of Latin characters is shown, typically starting with à or Â. For example, instead of "è" these characters occur: "è".

        A Web page is encoded as UTF-8 characters. The Web server mistakenly declares the charset to be ISO-8859-1 in the HTTP protocol that delivers the page to the browser. 
        The browser will then display each of the UTF-8 bytes in the Web page as Latin-1 characters.
        
        source: http://www.i18nqa.com/debug/bug-utf-8-latin1.html
        
        code source:  https://github.com/devgeniem/wp-sanitize-accented-uploads/blob/master/plugin.php#L152
        table source: http://www.i18nqa.com/debug/utf8-debug.html
        
        https://www.kasperkamperman.com/ 2018-07-03

    */

    $str = 'BLØF - ZOUTELANDE, MØ - FINAL SONG, Fédération Camerounaise de Football, It’s Getting the Best of Me';
   
    echo "original string: ".$str.'<br/>';
    echo "fixed string:    ".fixWrongUTF8Encoding($str).'<br/>';

    // displays: BLØF - ZOUTELANDE, MØ - FINAL SONG, Fédération Camerounaise de Football, It’s Getting the Best of Me
    
    function fixWrongUTF8Encoding($inputString) {

        // code source:  https://github.com/devgeniem/wp-sanitize-accented-uploads/blob/master/plugin.php#L152
        // table source: http://www.i18nqa.com/debug/utf8-debug.html
        
        $fix_list = array(
            // 3 char errors first
            '‚' => '‚', '„' => '„', '…' => '…', '‡' => '‡',
            '‰' => '‰', '‹' => '‹', '‘' => '‘', '’' => '’',
            '“' => '“', '•' => '•', '–' => '–', '—' => '—',
            'â„¢' => '™', '›' => '›', '€' => '€',
            // 2 char errors
            'Â'  => 'Â', 'Æ’'  => 'ƒ', 'Ã'  => 'Ã', 'Ä'  => 'Ä',
            'Ã…'  => 'Å', 'â€'  => '†', 'Æ'  => 'Æ', 'Ç'  => 'Ç',
            'ˆ'  => 'ˆ', 'È'  => 'È', 'É'  => 'É', 'Ê'  => 'Ê',
            'Ë'  => 'Ë', 'Å’'  => 'Œ', 'ÃŒ'  => 'Ì', 'Ž'  => 'Ž',
            'ÃŽ'  => 'Î', 'Ñ'  => 'Ñ', 'Ã’'  => 'Ò', 'Ó'  => 'Ó',
            'â€'  => '”', 'Ô'  => 'Ô', 'Õ'  => 'Õ', 'Ö'  => 'Ö',
            '×'  => '×', 'Ëœ'  => '˜', 'Ø'  => 'Ø', 'Ù'  => 'Ù',
            'Å¡'  => 'š', 'Ú'  => 'Ú', 'Û'  => 'Û', 'Å“'  => 'œ',
            'Ü'  => 'Ü', 'ž'  => 'ž', 'Þ'  => 'Þ', 'Ÿ'  => 'Ÿ',
            'ß'  => 'ß', '¡'  => '¡', 'á'  => 'á', '¢'  => '¢',
            'â'  => 'â', '£'  => '£', 'ã'  => 'ã', '¤'  => '¤',
            'ä'  => 'ä', 'Â¥'  => '¥', 'Ã¥'  => 'å', '¦'  => '¦',
            'æ'  => 'æ', '§'  => '§', 'ç'  => 'ç', '¨'  => '¨',
            'è'  => 'è', '©'  => '©', 'é'  => 'é', 'ª'  => 'ª',
            'ê'  => 'ê', '«'  => '«', 'ë'  => 'ë', '¬'  => '¬',
            'ì'  => 'ì', '®'  => '®', 'î'  => 'î', '¯'  => '¯',
            'ï'  => 'ï', '°'  => '°', 'ð'  => 'ð', '±'  => '±',
            'ñ'  => 'ñ', '²'  => '²', 'ò'  => 'ò', '³'  => '³',
            'ó'  => 'ó', '´'  => '´', 'ô'  => 'ô', 'µ'  => 'µ',
            'õ'  => 'õ', '¶'  => '¶', 'ö'  => 'ö', '·'  => '·',
            '÷'  => '÷', '¸'  => '¸', 'ø'  => 'ø', '¹'  => '¹',
            'ù'  => 'ù', 'º'  => 'º', 'ú'  => 'ú', '»'  => '»',
            'û'  => 'û', '¼'  => '¼', 'ü'  => 'ü', '½'  => '½',
            'ý'  => 'ý', '¾'  => '¾', 'þ'  => 'þ', '¿'  => '¿',
            'ÿ'  => 'ÿ', 'À'  => 'À',
            // 1 char errors last
            'Ã' => 'Á', 'Å' => 'Š', 'Ã' => 'Í', 'Ã' => 'Ï',
            'Ã' => 'Ð', 'Ã' => 'Ý', 'Ã' => 'à', 'í' => 'í'
        );
    
        $error_chars = array_keys($fix_list);
        $real_chars  = array_values($fix_list);     

        return str_replace($error_chars, $real_chars, $inputString);

    }

?>
</pre>
</body>
</html>