Created
November 6, 2013 19:37
-
-
Save pjdietz/7342772 to your computer and use it in GitHub Desktop.
PHP Functions for converting string to ASCII
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Normalize non-ASCII characters to ASCII counterparts where possible. | |
* | |
* @param string $str | |
* @return string | |
*/ | |
function squashCharacters($str) | |
{ | |
static $normalizeChars = null; | |
if ($normalizeChars === null) { | |
$normalizeChars = array( | |
'À'=>'A', 'Á'=>'A', 'Â'=>'A', 'Ã'=>'A', 'Ä'=>'A', 'Å'=>'A', 'Æ'=>'Ae', | |
'Ç'=>'C', | |
'È'=>'E', 'É'=>'E', 'Ê'=>'E', 'Ë'=>'E', | |
'Ì'=>'I', 'Í'=>'I', 'Î'=>'I', 'Ï'=>'I', | |
'Ð'=>'Dj', | |
'Ñ'=>'N', | |
'Ò'=>'O', 'Ó'=>'O', 'Ô'=>'O', 'Õ'=>'O', 'Ö'=>'O', 'Ø'=>'O', | |
'Ù'=>'U', 'Ú'=>'U', 'Û'=>'U', 'Ü'=>'U', | |
'Ý'=>'Y', | |
'Þ'=>'B', | |
'ß'=>'Ss', | |
'à'=>'a', 'á'=>'a', 'â'=>'a', 'ã'=>'a', 'ä'=>'a', 'å'=>'a', 'æ'=>'ae', | |
'ç'=>'c', 'è'=>'e', 'é'=>'e', 'ê'=>'e', 'ë'=>'e', | |
'ì'=>'i', 'í'=>'i', 'î'=>'i', 'ï'=>'i', | |
'ð'=>'o', 'ñ'=>'n', 'ò'=>'o', 'ó'=>'o', 'ô'=>'o', 'õ'=>'o', 'ö'=>'o', 'ø'=>'o', | |
'ù'=>'u', 'ú'=>'u', 'û'=>'u', | |
'ý'=>'y', | |
'þ'=>'b', | |
'ÿ'=>'y', | |
'Š'=>'S', 'š'=>'s', 'ś' => 's', | |
'Ž'=>'Z', 'ž'=>'z', | |
'ƒ'=>'f' | |
); | |
} | |
return strtr($str, $normalizeChars); | |
} | |
/** | |
* Convert all fields in $item to ASCII. | |
* | |
* Do this by first normalizing the characters (á -> a, ñ -> n, etc.). If any | |
* non-ASCII characters remain, replace with a default value. | |
* | |
* @param array|object $item Array or object containing fields to convert | |
* @param array|object $template Contains template fields | |
* @param string $default Value to use when field not present in $template | |
* @param array $ignore Optional list of fields to ignore. | |
*/ | |
function convertFieldsToAscii(&$item, $template=null, $default='', array $ignore=null) | |
{ | |
foreach ($item as $field => &$value) { | |
// Skip fields in the $ignore array. | |
if ($ignore && in_array($field, $ignore)) { | |
continue; | |
} | |
// Normalize non-ASCII characters with ASCII counterparts. | |
$value = squashCharacters($value); | |
// Replace fields that contain non-ASCII characters with a default. | |
if (mb_convert_encoding($value, 'ascii') !== $value) { | |
// If template is provided, use the template field, if set. | |
if ($template) { | |
if (is_object($template) && isset($template->{$field})) { | |
$value = $template->{$field}; | |
} elseif (is_array($template) && isset($template[$field])) { | |
$value = $template[$field]; | |
} else { | |
$value = $default; | |
} | |
} else { | |
$value = $default; | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks. I'd like to mention that German umlauts are converted to two char sequences:
[ 'ä' => 'ae', 'ö' => 'oe', 'ü' => 'ue', 'Ä' => 'Ae', 'Ö' => 'Oe', 'Ü' => 'Ue'] and also ['ß' => 'sz' ]