Created
May 15, 2015 10:34
-
-
Save mcjwsk/9992903fc29d720b0c47 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Generate regexp for naughty words censoring | |
* Depends on whether installed PHP version supports unicode properties | |
* | |
* @param string $word word template to be replaced | |
* @param bool $use_unicode whether or not to take advantage of PCRE supporting unicode | |
* | |
* @return string $preg_expr regex to use with word censor | |
*/ | |
function get_censor_preg_expression($word, $use_unicode = true) | |
{ | |
static $unicode_support = null; | |
// Check whether PHP version supports unicode properties | |
if (is_null($unicode_support)) | |
{ | |
$unicode_support = ((version_compare(PHP_VERSION, '5.1.0', '>=') || (version_compare(PHP_VERSION, '5.0.0-dev', '<=') && version_compare(PHP_VERSION, '4.4.0', '>='))) && @preg_match('/\p{L}/u', 'a') !== false) ? true : false; | |
} | |
// Unescape the asterisk to simplify further conversions | |
$word = str_replace('\*', '*', preg_quote($word, '#')); | |
if ($use_unicode && $unicode_support) | |
{ | |
// Replace asterisk(s) inside the pattern, at the start and at the end of it with regexes | |
$word = preg_replace(array('#(?<=[\p{Nd}\p{L}_])\*+(?=[\p{Nd}\p{L}_])#iu', '#^\*+#', '#\*+$#'), array('([\x20]*?|[\p{Nd}\p{L}_-]*?)', '[\p{Nd}\p{L}_-]*?', '[\p{Nd}\p{L}_-]*?'), $word); | |
// Generate the final substitution | |
$preg_expr = '#(?<![\p{Nd}\p{L}_-])(' . $word . ')(?![\p{Nd}\p{L}_-])#iu'; | |
} | |
else | |
{ | |
// Replace the asterisk inside the pattern, at the start and at the end of it with regexes | |
$word = preg_replace(array('#(?<=\S)\*+(?=\S)#iu', '#^\*+#', '#\*+$#'), array('(\x20*?\S*?)', '\S*?', '\S*?'), $word); | |
// Generate the final substitution | |
$preg_expr = '#(?<!\S)(' . $word . ')(?!\S)#iu'; | |
} | |
return $preg_expr; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment