Created
March 3, 2011 11:27
-
-
Save nickdunn/852641 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
//Function to seperate multiple tags one line | |
function fix_newlines_for_clean_html($fixthistext) | |
{ | |
$fixthistext_array = explode("\n", $fixthistext); | |
foreach ($fixthistext_array as $unfixedtextkey => $unfixedtextvalue) | |
{ | |
//Makes sure empty lines are ignores | |
if (!preg_match("/^(\s)*$/", $unfixedtextvalue)) | |
{ | |
$fixedtextvalue = preg_replace("/>(\s|\t)*</U", ">\n<", $unfixedtextvalue); | |
$fixedtext_array[$unfixedtextkey] = $fixedtextvalue; | |
} | |
} | |
return implode("\n", $fixedtext_array); | |
} | |
function clean_html_code($uncleanhtml) | |
{ | |
//Set wanted indentation | |
$indent = " "; | |
//Uses previous function to seperate tags | |
$fixed_uncleanhtml = fix_newlines_for_clean_html($uncleanhtml); | |
$uncleanhtml_array = explode("\n", $fixed_uncleanhtml); | |
//Sets no indentation | |
$indentlevel = 0; | |
foreach ($uncleanhtml_array as $uncleanhtml_key => $currentuncleanhtml) | |
{ | |
//Removes all indentation | |
$currentuncleanhtml = preg_replace("/\t+/", "", $currentuncleanhtml); | |
$currentuncleanhtml = preg_replace("/^\s+/", "", $currentuncleanhtml); | |
$replaceindent = ""; | |
//Sets the indentation from current indentlevel | |
for ($o = 0; $o < $indentlevel; $o++) | |
{ | |
$replaceindent .= $indent; | |
} | |
//If self-closing tag, simply apply indent | |
if (preg_match("/<(.+)\/>/", $currentuncleanhtml)) | |
{ | |
$cleanhtml_array[$uncleanhtml_key] = $replaceindent.$currentuncleanhtml; | |
} | |
//If doctype declaration, simply apply indent | |
else if (preg_match("/<!(.*)>/", $currentuncleanhtml)) | |
{ | |
$cleanhtml_array[$uncleanhtml_key] = $replaceindent.$currentuncleanhtml; | |
} | |
//If opening AND closing tag on same line, simply apply indent | |
else if (preg_match("/<[^\/](.*)>/", $currentuncleanhtml) && preg_match("/<\/(.*)>/", $currentuncleanhtml)) | |
{ | |
$cleanhtml_array[$uncleanhtml_key] = $replaceindent.$currentuncleanhtml; | |
} | |
//If closing HTML tag or closing JavaScript clams, decrease indentation and then apply the new level | |
//If closing HTML tag or closing JavaScript clams, decrease indentation and then apply the new level | |
else if (preg_match("/<\/(.*)>/", $currentuncleanhtml) || preg_match("/^(\s|\t)*\}{1}(\s|\t)*$/", $currentuncleanhtml)) | |
{ | |
$indentlevel--; | |
$replaceindent = ""; | |
for ($o = 0; $o < $indentlevel; $o++) | |
{ | |
$replaceindent .= $indent; | |
} | |
// fix for textarea whitespace and in my opinion nicer looking script tags | |
if($currentuncleanhtml == '</textarea>' || $currentuncleanhtml == '</script>') | |
{ | |
$cleanhtml_array[$uncleanhtml_key] = $cleanhtml_array[($uncleanhtml_key - 1)] . $currentuncleanhtml; | |
unset($cleanhtml_array[($uncleanhtml_key - 1)]); | |
} | |
else | |
{ | |
$cleanhtml_array[$uncleanhtml_key] = $replaceindent.$currentuncleanhtml; | |
} | |
} | |
//If opening HTML tag AND not a stand-alone tag, or opening JavaScript clams, increase indentation and then apply new level | |
else if ((preg_match("/<[^\/](.*)>/", $currentuncleanhtml) && !preg_match("/<(link|meta|base|br|img|hr)(.*)>/", $currentuncleanhtml)) || preg_match("/^(\s|\t)*\{{1}(\s|\t)*$/", $currentuncleanhtml)) | |
{ | |
$cleanhtml_array[$uncleanhtml_key] = $replaceindent.$currentuncleanhtml; | |
$indentlevel++; | |
$replaceindent = ""; | |
for ($o = 0; $o < $indentlevel; $o++) | |
{ | |
$replaceindent .= $indent; | |
} | |
} | |
else | |
//Else, only apply indentation | |
{$cleanhtml_array[$uncleanhtml_key] = $replaceindent.$currentuncleanhtml;} | |
} | |
//Return single string seperated by newline | |
return implode("\n", $cleanhtml_array); | |
} | |
Class extension_html5_doctype extends Extension{ | |
public function about(){ | |
return array( | |
'name' => 'HTML5 doctype', | |
'description' => 'Replace any generated HTML doctype with basic HTML5 doctype', | |
'version' => '1.0', | |
'release-date' => '2010-07-13', | |
'author' => array( | |
'name' => 'Nick Dunn' | |
) | |
); | |
} | |
public function getSubscribedDelegates(){ | |
return array( | |
array( | |
'page' => '/frontend/', | |
'delegate' => 'FrontendOutputPostGenerate', | |
'callback' => 'parse_html' | |
), | |
); | |
} | |
/*private function indent($str){ | |
$str = preg_replace("/\n/", '', $str); | |
$ret = ""; | |
$indent = 0; | |
$indentInc = 1; | |
$noIndent = false; | |
$indent_char = "\t"; | |
while (($l = strpos($str,"<",$i)) !== false) { | |
if($l!=$r && $indent>0) { | |
$ret .= "\n" . str_repeat($indent_char,$indent) . substr($str,$r,($l-$r)); | |
} | |
$i = $l+1; | |
$r = strpos($str,">",$i)+1; | |
$t = substr($str,$l,($r-$l)); | |
if(strpos($t,"/")==1){ | |
$indent -= $indentInc; | |
$noIndent = true; | |
} | |
else if(($r-$l-strpos($t,"/"))==2 || substr($t,0,2)=="<?"){ $noIndent = true; } | |
if($indent<0){ $indent = 0; } | |
if($ret){ $ret .= "\n"; } | |
$ret .= str_repeat($indent_char,$indent); | |
$ret .= $t; | |
if(!$noIndent){ $indent += $indentInc; } | |
$noIndent = false; | |
} | |
//$ret .= "\n"; | |
return($ret); | |
}*/ | |
public function parse_html($context) { | |
$html = $context['output']; | |
$html = preg_replace("/<!DOCTYPE [^>]+>/", "<!DOCTYPE html>", $html); | |
//$html = clean_html_code($html); | |
$context['output'] = $html; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment