Last active
March 4, 2016 16:30
-
-
Save dhaupin/9b6ea6f1e49f1d86ba46 to your computer and use it in GitHub Desktop.
Function - Filter PDF/print rogue special characters from WYSIWYG "full-paste" or rando DB source
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// @last https://3v4l.org/0LHkO | |
// Start sample | |
$str = '® ‡ ¼'; | |
echo clean($str, false, false); | |
// End sample | |
function clean($str, $strict = true, $punct = true) { | |
$base_pool = array( | |
'‘' => '‘', | |
'’' => '’', | |
'‚' => '‚', | |
'“' => '“', | |
'”' => '”', | |
'„' => '„', | |
'©' => '©', | |
'®' => '®', | |
'™' => '™', | |
'°' => '°', | |
'•' => '•', | |
'–' => '–', | |
'—' => '—', | |
'…' => '…', | |
'¼' => '¼', | |
'½' => '½', | |
'¾' => '¾', | |
'‰' => '‰', | |
'±' => '±', | |
'†' => '†', | |
'‡' => '‡', | |
' ' => ' ', | |
'&amp;' => '&' | |
); | |
$strict_pool = array( | |
'\'' => ''', | |
'"' => '"', | |
"\r" => ' ', | |
"\n" => ' ', | |
"\t" => ' ' | |
); | |
// if strict param is true we will clean harder (default true) | |
if ($strict) { | |
$base_pool = array_merge($strict_pool, $base_pool); | |
} | |
foreach ($base_pool as $key => $value) { | |
$str = str_replace($key, $value, $str); // iterate through pool key => values and replace in $str | |
} | |
// if punct param is true we will add spaces after punctuation (default true) | |
if ($punct) { | |
$str = preg_replace('/([!?,.])(\S)/', '$1 $2', $str); // parse punctuation and add trail space | |
} | |
$str = preg_replace("/\s+/", ' ', $str); // filter multiple spaces into single | |
return $str; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment