Skip to content

Instantly share code, notes, and snippets.

@AlekVolsk
Last active April 12, 2020 21:48
Show Gist options
  • Save AlekVolsk/71e963fadaaca16e4e3ae303a3cecabb to your computer and use it in GitHub Desktop.
Save AlekVolsk/71e963fadaaca16e4e3ae303a3cecabb to your computer and use it in GitHub Desktop.
Класс минификации html на выходе
<?php
/*
Usage
$html = Optimize::html($html);
*/
class Optimize
{
private static $_html_is_js, $_html_is_css;
public static function strip_spaces($s)
{
return preg_replace('/ [\x20\t]*+[\r\n][\x03-\x20]*+/sxSX', "\r", $s);
}
public static function css($s)
{
if (strpos($s, '/*') !== false) {
$s = preg_replace('~/\*.*?\*/~sSX', ' ', $s);
}
if (preg_match('/[\x03-\x20]/sSX', $s)) {
$s = preg_replace('/\)[\x03-\x20]++(?=[-a-zA-Z\d])/sSX', ")\x01", $s);
$a = preg_split('/([{}():;,%!*=]++)/sSX', $s, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
$s = implode('', array_map('trim', $a));
$s = str_replace(")\x01", ') ', $s);
$s = preg_replace('/[\x03-\x20]++/sSX', ' ', $s);
$s = preg_replace('/ (?<![\d\.])0(?:em|ex|px|in|cm|mm|pt|pc|%)(?![a-zA-Z%])/sxSX', '0', $s);
$s = preg_replace('/ :\# ([\da-fA-F])\1([\da-fA-F])\2([\da-fA-F])\3(?:([\da-fA-F])\4)?+(?![\da-fA-F])/sxSX', ':#$1$2$3$4', $s);
}
return $s;
}
public static function js($s, $is_vacuumize = true, $is_script_tag = false)
{
return self::javascript($s, $is_vacuumize, $is_script_tag);
}
public static function javascript($s, $is_vacuumize = true, $is_script_tag = false)
{
if ($is_vacuumize) {
$re_chunks = ($is_script_tag ? '|<!-- (?!\/\/-->)' : '') . '|[\x20\r\n\t]* [;{}()][;{}()\x20\r\n\t]*|[\x20\r\n\t]+(?![a-zA-Z\d\_\$])|(?<![a-zA-Z\d\_\$]|\x01@\x02)[\x20\r\n\t]+';
} else {
$re_chunks = '';
}
$s = preg_replace_callback('/ \/\* .*? \*\/ | \/\/ (?>(?!\/\/) [^\r\n])* | " (?>[^"\\\\\r\n]+ |\\\\.)* " | \' (?>[^\'\\\\\r\n]+|\\\\.)* \' | \/ (?>[^\/\\\\\r\n]+|\\\\.)+ \/ | \+ [\r\n\t]++ (?=\+) | - [\r\n\t]++(?=\-)' . $re_chunks . '/sxSX', array('self', '_js_vacuumize'), $s);
return str_replace("\x01@\x02", '', $s);
}
public static function html($s, $is_js = false, $is_css = false)
{
static $re_attrs_fast_safe = '(?![a-zA-Z\d])(?>[^>"\']++|(?<=[\=\x03-\x20]|\xc2\xa0)"[^"]*+"|(?<=[\=\x03-\x20]|\xc2\xa0)\'[^\']*+\')*[^>]*+';
$s = preg_replace_callback('/<(pre|code|textarea|nooptimize)(' . $re_attrs_fast_safe . ')(>.*?<\/\\1)>/sxiSX', array('self', '_html_pre'), $s);
self::$_html_is_js = $is_js;
self::$_html_is_css = $is_css;
$s = preg_replace_callback('/(<((?i:script|style))' . $re_attrs_fast_safe . '(?<!\/)>)((?> [^<]+|(?!<\/?+(?i:\\2)' . $re_attrs_fast_safe . '(?<!\/)>).)++)(<\/(?i:\\2)>)|(<!--\[[\x03-\x20]*+ if [^a-zA-Z] [^\]]++\]>)|<!-- .*? -->' . ($is_js || $is_css ? '|(?<=[\x20\r\n\t"\']|\xc2\xa0)(on[a-zA-Z]{3,}+|style)(?>[\x03-\x20]+|\xc2\xa0)*+\=(?>[\x03-\x20]+|\xc2\xa0)*+("[^"]*+"|\'[^\']*+\')' : '') . '/sxSX', array('self', '_html_chunks'), $s);
self::$_html_is_js = null;
$a = preg_split('/((?><\/?+(?:br|p|div|li|ol|ul|table|t[drh]|meta|link|h[1-6]|form|option|select|title|script|style|map|area|head|body|html)' . $re_attrs_fast_safe . '>|<!--\[if [^\]]++\]>|<!\[endif\]-->)(?:<\/?+noindex>)?+)/sxiSX', $s, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
$s = implode('', array_map('trim', $a));
$s = preg_replace('/(?<=[\x03-\x20])<[a-z][a-z\d]*+ (?<!<input|<img)' . $re_attrs_fast_safe . '>\K[\x03-\x20]++/sxiSX', '', $s);
$a = preg_split('/(?<=[\x03-\x20])(<\/[a-zA-Z][a-zA-Z\d]*+>)(?=[\x03-\x20])/sxSX', $s, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
$s = implode('', array_map('rtrim', $a));
$s = self::strip_spaces($s);
$s = self::_html_placeholder($s, $is_restore = true);
return str_replace(array('<nooptimize>', '</nooptimize>'), '', $s);
}
private static function _html_pre(array &$m)
{
return '<' . $m[1] . $m[2] . self::_html_placeholder($m[3]) . '>';
}
private static function _html_placeholder($s, $is_restore = false)
{
static $tags = array();
if ($is_restore) {
$s = strtr($s, $tags);
$tags = array();
return $s;
}
$key = "\x01" . count($tags) . "\x02";
$tags[$key] = $s;
return $key;
}
private static function _html_chunks(array &$m)
{
if (@$m[1]) {
if (!$m[3]) {
return $m[0];
}
$s = (strtolower($m[2]) === 'script') ? self::javascript($m[3], self::$_html_is_js, $is_script_tag = true) : self::css($m[3]);
return $m[1] . self::_html_placeholder(self::strip_spaces($s)) . $m[4];
}
if (@$m[6] === 'style') {
if (self::$_html_is_css) {
$m[7] = self::css($m[7]);
}
return self::_html_placeholder('style=' . self::strip_spaces($m[7]));
}
if (@$m[6]) {
if (!self::$_html_is_js) {
return self::_html_placeholder(self::strip_spaces($m[6] . '=' . $m[7]));
}
$attr = &$m[6];
$value = substr($m[7], 1, -1);
if (!function_exists('utf8_html_entity_decode')) {
require_once 'utf8_html_entity_decode.php';
}
$value = utf8_html_entity_decode($value, $is_htmlspecialchars = true);
return self::_html_placeholder($attr . '="' . htmlspecialchars(self::strip_spaces(self::javascript($value, self::$_html_is_js, $is_script_tag = false))) . '"');
}
if (@$m[5]) {
return $m[0];
}
if (preg_match('/^<!--(?:[\x20-\x7e]{4,60}+$|\xc2\xa0|&nbsp;)/sSX', $m[0]) && !preg_match('/<[a-zA-Z][a-zA-Z\d]*+ [^>]*+ >/sxSX', $m[0])) {
return $m[0];
}
return '';
}
private static function _js_vacuumize(array &$m)
{
$s = &$m[0];
$token_type = substr($s, 0, 2);
if ($token_type == '/*') {
return '';
}
if ($token_type == '//') {
if (strpos($s, '-->') !== false || strpos($s, '<![CDATA[') !== false || strpos($s, ']]>') !== false) {
return $s . "\r\x01@\x02";
}
return '';
}
if ($token_type == '<!') {
return $s . "\r";
}
if (strpos('"\'/+-', $s[0]) !== false) {
return $s;
}
$s = str_replace(array(' ', "\r", "\n", "\t"), '', $s);
return preg_replace('/ ;++ (\}++) $/sxSX', '$1;', $s);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment