Last active
August 29, 2015 14:11
-
-
Save dojohnso/423e4f2b08304a11c9b9 to your computer and use it in GitHub Desktop.
function remove_non_utf8_characters() - just a handle function that can take an array or string and strip out those pesky non-utf8 characters that you see (or don't) sometimes.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* $data mixed - will accept strings or arrays - the data to be cleaned | |
* $key mixed - will accept strings or arrays - if passed in, the array_walk will only clean those fields | |
* | |
*/ | |
function remove_non_utf8_characters( $data, $key = null ) | |
{ | |
// from http://magp.ie/2011/01/06/remove-non-utf8-characters-from-string-with-php/ | |
$regex = '/[\x00-\x08\x10\x0B\x0C\x0E-\x19\x7F]|[\x00-\x7F][\x80-\xBF]+|([\xC0\xC1]|[\xF0-\xFF])[\x80-\xBF]*|[\xC2-\xDF]((?![\x80-\xBF])|[\x80-\xBF]{2,})|[\xE0-\xEF](([\x80-\xBF](?![\x80-\xBF]))|(?![\x80-\xBF]{2})|[\x80-\xBF]{3,})/S'; | |
if ( is_array( $data ) ) | |
{ | |
array_walk( $data, function( &$array ) use ($key, $regex) { | |
if ( !empty( $key ) ) | |
{ | |
if ( !is_array( $key ) ) | |
{ | |
$key = array( $key ); | |
} | |
foreach ( $key AS $k ) | |
{ | |
$array[$k] = preg_replace($regex, '$1', $array[$k]); | |
} | |
} | |
else | |
{ | |
foreach ( $array AS &$value ) | |
{ | |
// TODO consider going recursive to get even more | |
if ( !is_array( $value ) ) | |
{ | |
$value = preg_replace($regex, '$1', $value); | |
} | |
} | |
} | |
} ); | |
} | |
elseif ( is_string( $data ) ) | |
{ | |
$data = preg_replace($regex, '$1', $data); | |
} | |
return $data; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment