Created
September 7, 2017 01:10
-
-
Save liuxd/b20d3a92766aadb187851accda553ce0 to your computer and use it in GitHub Desktop.
remove_not_utf8.php
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?php | |
| /** | |
| * 用正则将所有`UTF8`字符拿出来,扔进一个数组。 | |
| * 将数组的所有字符连接起来,构成结果字符串。 | |
| */ | |
| /** | |
| * 处理函数 | |
| * @param string $str 待过滤字符串。 | |
| * @return string | |
| */ | |
| function utf8filter($str) | |
| { | |
| # 匹配 utf8 字符的正则表达式 | |
| $pattern = '%( | |
| [\x09\x0A\x0D\x20-\x7E] | |
| | [\xC2-\xDF][\x80-\xBF] | |
| | \xE0[\xA0-\xBF][\x80-\xBF] | |
| | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} | |
| | \xED[\x80-\x9F][\x80-\xBF] | |
| | \xF0[\x90-\xBF][\x80-\xBF]{2} | |
| | [\xF1-\xF3][\x80-\xBF]{3} | |
| | \xF4[\x80-\x8F][\x80-\xBF]{2} | |
| )%xs'; | |
| # 匹配到所有utf8字符 | |
| preg_match_all($pattern, $str, $matches); | |
| # 所有utf8字符连接成一个字符串 | |
| $result = implode($matches[0]); | |
| return $result; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment