Created
November 16, 2010 13:13
-
-
Save gcoop/701814 to your computer and use it in GitHub Desktop.
Simple cleanse function to remove special chars from a string (amongst other things) in PHP
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
function cleanse($string, $allowedTags = array()) | |
{ | |
if (get_magic_quotes_gpc()) { | |
$string = stripslashes($stringIn); | |
} | |
// $string = kses($string, $allowedTags); // For kses {@see http://sourceforge.net/projects/kses/} | |
// ============ | |
// Remove MS Word Special Characters | |
// ============ | |
$search = array('–','“','‘','’','£','¬','™'); | |
$replace = array('-','“','‘','’','£','¬','™'); | |
$string = str_replace($search, $replace, $string); | |
$string = str_replace('â€', '”', $string); | |
$search = array("'", "\xc3\xa2\xc2\x80\xc2\x99", "\xc3\xa2\xc2\x80\xc2\x93", "\xc3\xa2\xc2\x80\xc2\x9d", "\xc3\xa2\x3f\x3f"); | |
$resplace = array("'", "'", ' - ', '"', "'"); | |
$string = str_replace($search, $replace, $string); | |
$quotes = array( | |
"\xC2\xAB" => '"', | |
"\xC2\xBB" => '"', | |
"\xE2\x80\x98" => "'", | |
"\xE2\x80\x99" => "'", | |
"\xE2\x80\x9A" => "'", | |
"\xE2\x80\x9B" => "'", | |
"\xE2\x80\x9C" => '"', | |
"\xE2\x80\x9D" => '"', | |
"\xE2\x80\x9E" => '"', | |
"\xE2\x80\x9F" => '"', | |
"\xE2\x80\xB9" => "'", | |
"\xE2\x80\xBA" => "'", | |
"\xe2\x80\x93" => "-", | |
"\xc2\xb0" => "°", | |
"\xc2\xba" => "°", | |
"\xc3\xb1" => "ñ", | |
"\x96" => "ñ", | |
"\xe2\x81\x83" => '•', | |
"\xd5" => "'" | |
); | |
$string = strtr($string, $quotes); | |
/* | |
// Use the below to get the byte of the special char and put it in the array above + the replacement. | |
if (strpos($string, "Live Wave Buoy Data") !== false) | |
{ | |
for ($i=strpos($string, "Live Wave Buoy Data") ; $i<strlen($string) ; $i++) { | |
$byte = $string[$i]; | |
$char = ord($byte); | |
printf('%s:0x%02x ', $byte, $char); | |
} | |
} | |
var_dump($string); | |
exit; | |
*/ | |
// ============ | |
// END | |
// ============ | |
return $string; | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment