Skip to content

Instantly share code, notes, and snippets.

@mnaberez
Created February 13, 2012 22:09
Show Gist options
  • Save mnaberez/1820897 to your computer and use it in GitHub Desktop.
Save mnaberez/1820897 to your computer and use it in GitHub Desktop.
Trim unnecessary whitespace from an HTML file or ERB template.
<?php
/**
* Trim unnecessary whitespace from an HTML file or ERB template.
*
* This is taken directly from Smarty 3.1.7 (http://www.smarty.net):
* libs/plugins.outfilter.trimwhitespace.php
*
* Changes:
* - Removed argument "Smarty_Internal_Template $smarty".
* - Added ERB "<%" and "<%=" to "capture html elements not to be messed with".
*/
function trimwhitespace($source)
{
$store = array();
$_store = 0;
$_offset = 0;
// Unify Line-Breaks to \n
$source = preg_replace("/\015\012|\015|\012/", "\n", $source);
// capture Internet Explorer Conditional Comments
if (preg_match_all('#<!--\[[^\]]+\]>.*?<!\[[^\]]+\]-->#is', $source, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER)) {
foreach ($matches as $match) {
$store[] = $match[0][0];
$_length = strlen($match[0][0]);
$replace = '@!@SMARTY:' . $_store . ':SMARTY@!@';
$source = substr_replace($source, $replace, $match[0][1] - $_offset, $_length);
$_offset += $_length - strlen($replace);
$_store++;
}
}
// Strip all HTML-Comments
$source = preg_replace( '#<!--.*?-->#ms', '', $source );
// capture html elements not to be messed with
$_offset = 0;
if (preg_match_all('#<(%|%=|script|pre|textarea)[^>]*>.*?</\\1>#is', $source, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER)) {
foreach ($matches as $match) {
$store[] = $match[0][0];
$_length = strlen($match[0][0]);
$replace = '@!@SMARTY:' . $_store . ':SMARTY@!@';
$source = substr_replace($source, $replace, $match[0][1] - $_offset, $_length);
$_offset += $_length - strlen($replace);
$_store++;
}
}
$expressions = array(
// replace multiple spaces between tags by a single space
// can't remove them entirely, becaue that might break poorly implemented CSS display:inline-block elements
'#(:SMARTY@!@|>)\s+(?=@!@SMARTY:|<)#s' => '\1 \2',
// remove spaces between attributes (but not in attribute values!)
'#(([a-z0-9]\s*=\s*(["\'])[^\3]*?\3)|<[a-z0-9_]+)\s+([a-z/>])#is' => '\1 \4',
// note: for some very weird reason trim() seems to remove spaces inside attributes.
// maybe a \0 byte or something is interfering?
'#^\s+<#Ss' => '<',
'#>\s+$#Ss' => '>',
);
$source = preg_replace( array_keys($expressions), array_values($expressions), $source );
// note: for some very weird reason trim() seems to remove spaces inside attributes.
// maybe a \0 byte or something is interfering?
// $source = trim( $source );
// capture html elements not to be messed with
$_offset = 0;
if (preg_match_all('#@!@SMARTY:([0-9]+):SMARTY@!@#is', $source, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER)) {
foreach ($matches as $match) {
$store[] = $match[0][0];
$_length = strlen($match[0][0]);
$replace = array_shift($store);
$source = substr_replace($source, $replace, $match[0][1] + $_offset, $_length);
$_offset += strlen($replace) - $_length;
$_store++;
}
}
return $source;
}
// errors as exceptions
function exception_error_handler($errno, $errstr, $errfile, $errline ) {
throw new ErrorException($errstr, 0, $errno, $errfile, $errline);
}
set_error_handler("exception_error_handler");
// trim whitespace
if (empty($argv[1]) || empty($argv[2])) {
die("Usage: php trimwhitespace.php <in_filename> <out_filename>\n");
}
$source = file_get_contents($argv[1]);
file_put_contents($argv[2], trimwhitespace($source));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment