Created
July 10, 2020 17:33
-
-
Save ArrayIterator/99c39ffe4a7419bbd775675f92ebbbe6 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
declare(strict_types=1); | |
namespace ArrayIterator\App\Util; | |
/** | |
* Class Normalizer | |
* @package ArrayIterator\App\Util | |
*/ | |
final class Normalizer | |
{ | |
protected static $conversionTables = [ | |
'À' => 'A', 'Á' => 'A', 'Â' => 'A', 'Ã' => 'A', 'Ä' => 'A', 'Å' => 'A', | |
'Æ' => 'AE', 'Ç' => 'C', 'È' => 'E', 'É' => 'E', 'Ê' => 'E', 'Ë' => 'E', | |
'Ì' => 'I', 'Í' => 'I', 'Î' => 'I', 'Ï' => 'I', 'Ð' => 'D', 'Ñ' => 'N', | |
'Ò' => 'O', 'Ó' => 'O', 'Ô' => 'O', 'Õ' => 'O', 'Ö' => 'O', '×' => 'x', | |
'Ø' => '0', 'Ù' => 'U', 'Ú' => 'U', 'Û' => 'U', 'Ü' => 'U', 'Ý' => 'Y', | |
'Þ' => 'b', 'ß' => 'B', 'à' => 'a', 'á' => 'a', 'â' => 'a', 'ã' => 'a', | |
'ä' => 'a', 'å' => 'a', 'æ' => 'ae', 'ç' => 'c', 'è' => 'e', 'é' => 'e', | |
'ê' => 'e', 'ë' => 'e', 'ì' => 'i', 'í' => 'i', 'î' => 'i', 'ï' => 'i', | |
'ð' => 'o', 'ñ' => 'n', 'ò' => 'o', 'ó' => 'o', 'ô' => 'o', 'õ' => 'o', | |
'ö' => 'o', '÷' => '+', 'ø' => 'o', 'ù' => 'i', 'ú' => 'u', 'û' => 'u', | |
'ü' => 'u', 'ý' => 'y', 'þ' => 'B', 'ÿ' => 'y', | |
]; | |
/** | |
* @return array | |
*/ | |
public static function getConversionTables(): array | |
{ | |
return self::$conversionTables; | |
} | |
/** | |
* @param string $string | |
* @return string | |
*/ | |
public static function normalizeFileName( | |
string $string | |
) : string { | |
$contains = false; | |
$string = preg_replace_callback('~[\xc0-\xff]+~', function ($match) use (&$contains) { | |
$contains = true; | |
return utf8_encode($match[0]); | |
}, $string); | |
$string = str_replace("\t", " ", $string); | |
// replace whitespace except space to empty character | |
$string = preg_replace('~\x0-\x31~', '', $string); | |
if ($contains) { | |
// normalize ascii extended to ascii utf8 | |
$string = str_replace( | |
array_keys(self::$conversionTables), | |
array_values(self::$conversionTables), | |
$string | |
); | |
} | |
return preg_replace( | |
'~[^0-9A-Za-z\-_()@\~\x32.]~', | |
'-', | |
$string | |
); | |
} | |
/** | |
* @param string $class | |
* @param string $fallback | |
* @return null|string|string[] | |
*/ | |
public static function normalizeHtmlClass( | |
string $class, | |
string $fallback = '' | |
) { | |
$sanitized = trim($class); | |
if ($class) { | |
$sanitized = preg_replace('|%[a-fA-F0-9][a-fA-F0-9]|', '', $class); | |
//Limit to A-Z,a-z,0-9,_,- | |
$sanitized = preg_replace('/[^A-Za-z0-9_-]/', '', $sanitized); | |
} | |
if ('' === $sanitized && $fallback !== '') { | |
return self::normalizeHtmlClass($fallback); | |
} | |
return $sanitized; | |
} | |
/** | |
* @param string $data | |
* @return null|string|string[] | |
*/ | |
public static function removeJSContent(string $data) : string | |
{ | |
return preg_replace( | |
'/<(script)[^>]+?>.*?<\/\\1>/smi', | |
'', | |
$data | |
); | |
} | |
/** | |
* Normalize Directory Separator | |
* | |
* @param string $path | |
* @return string | |
*/ | |
public static function normalizeSeparator(string $path) : string | |
{ | |
return preg_replace('~[\\\|/]+~', DIRECTORY_SEPARATOR, $path); | |
} | |
/** | |
* Balances tags of string using a modified stack. | |
* | |
* @author Leonard Lin <[email protected]> | |
* @license GPL | |
* @copyright November 4, 2001 | |
* @version 1.1 | |
* | |
* Modified by Scott Reilly (coffee2code) 02 Aug 2004 | |
* 1.1 Fixed handling of append/stack pop order of end text | |
* Added Cleaning Hooks | |
* 1.0 First Version | |
* | |
* @param string $text Text to be balanced. | |
* @return string Balanced text. | |
* | |
* Custom mods to be fixed to handle by system result output | |
*/ | |
public static function forceBalanceTags(string $text) | |
{ | |
$tagStack = []; | |
$stackSize = 0; | |
$tagQueue = ''; | |
$newText = ''; | |
// Known single-entity/self-closing tags | |
$single_tags = [ | |
'area', 'base', 'basefont', 'br', 'col', | |
'command', 'embed', 'frame', 'hr', 'img', | |
'input', 'isindex', 'link', 'meta', 'param', 'source' | |
]; | |
$single_tags_2 = [ | |
'img', 'meta', 'link', 'input' | |
]; | |
// Tags that can be immediately nested within themselves | |
$nestable_tags = ['blockquote', 'div', 'object', 'q', 'span']; | |
// check if contains <html> tag and split it | |
// fix doctype | |
$text = preg_replace('/<(\s+)?!(\s+)?(DOCTYPE)/i', '<!$3', $text); | |
$rand = sprintf('%1$s_%2$s_%1$s', '%', mt_rand(10000, 50000)); | |
$randQuote = preg_quote($rand, '~'); | |
$text = str_replace('<!', '< '.$rand, $text); | |
// bug fix for comments - in case you REALLY meant to type '< !--' | |
$text = str_replace('< !--', '< !--', $text); | |
// bug fix for LOVE <3 (and other situations with '<' before a number) | |
$text = preg_replace('#<([0-9])#', '<$1', $text); | |
while (preg_match( | |
"~<((?!(?:\s+){$randQuote})/?[\w:]*)\s*([^>]*)>~", | |
$text, | |
$regex | |
)) { | |
$newText .= $tagQueue; | |
$i = strpos($text, $regex[0]); | |
$l = strlen($regex[0]); | |
// clear the shifter | |
$tagQueue = ''; | |
// Pop or Push | |
if (isset($regex[1][0]) && '/' == $regex[1][0]) { // End Tag | |
$tag = strtolower(substr($regex[1], 1)); | |
// if too many closing tags | |
if ($stackSize <= 0) { | |
$tag = ''; | |
// or close to be safe $tag = '/' . $tag; | |
} elseif ($tagStack[$stackSize - 1] == $tag) { | |
// if stacktop value = tag close value then pop | |
// found closing tag | |
$tag = '</' . $tag . '>'; // Close Tag | |
// Pop | |
array_pop($tagStack); | |
$stackSize--; | |
} else { // closing tag not at top, search for it | |
for ($j = $stackSize-1; $j >= 0; $j--) { | |
if ($tagStack[$j] == $tag) { | |
// add tag to tagqueue | |
for ($k = $stackSize-1; $k >= $j; $k--) { | |
$tagQueue .= '</' . array_pop($tagStack) . '>'; | |
$stackSize--; | |
} | |
break; | |
} | |
} | |
$tag = ''; | |
} | |
} else { // Begin Tag | |
$tag = strtolower($regex[1]); | |
// Tag Cleaning | |
// If it's an empty tag "< >", do nothing | |
/** @noinspection PhpStatementHasEmptyBodyInspection */ | |
if ('' == $tag | |
// ElseIf it's a known single-entity tag but it doesn't close itself, do so | |
// $regex[2] .= ''; | |
|| in_array($tag, $single_tags_2) | |
) { | |
// do nothing | |
} elseif (substr($regex[2], -1) == '/') { | |
// ElseIf it presents itself as a self-closing tag... | |
// ---- | |
// ...but it isn't a known single-entity self-closing tag, | |
// then don't let it be treated as such and | |
// immediately close it with a closing tag (the tag will encapsulate no text as a result) | |
if (! in_array($tag, $single_tags)) { | |
$regex[2] = trim(substr($regex[2], 0, -1)) . "></$tag"; | |
} | |
} elseif (in_array($tag, $single_tags)) { | |
// ElseIf it's a known single-entity tag but it doesn't close itself, do so | |
$regex[2] .= '/'; | |
} else { | |
// Else it's not a single-entity tag | |
// --------- | |
// If the top of the stack is the same as the tag we want to push, close previous tag | |
if ($stackSize > 0 && !in_array($tag, $nestable_tags) | |
&& $tagStack[$stackSize - 1] == $tag | |
) { | |
$tagQueue = '</' . array_pop($tagStack) . '>'; | |
/** @noinspection PhpUnusedLocalVariableInspection */ | |
$stackSize--; | |
} | |
$stackSize = array_push($tagStack, $tag); | |
} | |
// Attributes | |
$attributes = $regex[2]; | |
if (! empty($attributes) && $attributes[0] != '>') { | |
$attributes = ' ' . $attributes; | |
} | |
$tag = '<' . $tag . $attributes . '>'; | |
//If already queuing a close tag, then put this tag on, too | |
if (! empty($tagQueue)) { | |
$tagQueue .= $tag; | |
$tag = ''; | |
} | |
} | |
$newText .= substr($text, 0, $i) . $tag; | |
$text = substr($text, $i + $l); | |
} | |
// Clear Tag Queue | |
$newText .= $tagQueue; | |
// Add Remaining text | |
$newText .= $text; | |
unset($text); // freed memory | |
// Empty Stack | |
while ($x = array_pop($tagStack)) { | |
$newText .= '</' . $x . '>'; // Add remaining tags to close | |
} | |
// fix for the bug with HTML comments | |
$newText = str_replace("< {$rand}", "<!", $newText); | |
$newText = str_replace("< !--", "<!--", $newText); | |
$newText = str_replace("< !--", "< !--", $newText); | |
return $newText; | |
} | |
/** | |
* Set cookie domain with .domain.ext for multi sub domain | |
* | |
* @param string $domain | |
* @return string|null|false $return domain ( .domain.com ) | |
*/ | |
public static function wildcardCrossDomain(string $domain) | |
{ | |
// make it domain lower | |
$domain = strtolower($domain); | |
$domain = preg_replace('~^\s*(?:(http|ftp)s?|sftp|xmp)://~i', '', $domain); | |
$domain = preg_replace('~/.*$~', '', $domain); | |
$is_ip = filter_var($domain, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4); | |
if (!$is_ip) { | |
$is_ip = filter_var($domain, FILTER_VALIDATE_IP, FILTER_FLAG_IPV6); | |
} | |
if (!$is_ip) { | |
$parse = parse_url('http://'.$domain.'/'); | |
$domain = isset($parse['host']) ? $parse['host'] : null; | |
if ($domain === null) { | |
return null; | |
} | |
} | |
if (!preg_match('/^((\[[0-9a-f:]+\])|(\d{1,3}(\.\d{1,3}){3})|[a-z0-9\-.]+)(:\d+)?$/i', $domain) | |
|| $is_ip | |
|| $domain == '127.0.0.1' | |
|| $domain == 'localhost' | |
) { | |
return $domain; | |
} | |
$domain = preg_replace('~[~!@#$%^&*()+`{}\]\[/\';<>,\"?=|\\\]~', '', $domain); | |
if (strpos($domain, '.') !== false) { | |
if (preg_match('~(.*\.)+(.*\.)+(.*)~', $domain)) { | |
$return = '.'.preg_replace('~(.*\.)+(.*\.)+(.*)~', '$2$3', $domain); | |
} else { | |
$return = '.'.$domain; | |
} | |
} else { | |
$return = $domain; | |
} | |
return $return; | |
} | |
/** | |
* @param string $slug | |
* @return string | |
*/ | |
public static function normalizeSlug(string $slug) : string | |
{ | |
$slug = preg_replace('~[^a-z0-9\-_]~i', '-', trim($slug)); | |
$slug = preg_replace('~([\-_])+~', '$1', $slug); | |
$slug = trim($slug, '-_'); | |
return $slug; | |
} | |
/** | |
* @param string $slug | |
* @param array $slugCollections | |
* @return string | |
*/ | |
public static function uniqueSlug(string $slug, array $slugCollections) | |
{ | |
return static::uniqueSlugCallback($slug, function (string $slug) use ($slugCollections) { | |
return !in_array($slug, $slugCollections); | |
}); | |
} | |
/** | |
* @param string $slug | |
* @param callable $callback | |
* @return string | |
*/ | |
public static function uniqueSlugCallback(string $slug, callable $callback) | |
{ | |
$separator = '-'; | |
$inc = 1; | |
$slug = static::normalizeSlug($slug); | |
$baseSlug = $slug; | |
while ($callback($slug) === false) { | |
$slug = $baseSlug . $separator . $inc++; | |
} | |
return $slug; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment