Created
May 16, 2013 21:12
-
-
Save alixaxel/5595151 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?php | |
| /** | |
| * The MIT License | |
| * http://creativecommons.org/licenses/MIT/ | |
| * | |
| * Tidy Wrapper for HTML 5 Indentation | |
| * Copyright (c) 2013 Alix Axel <alix.axel@gmail.com> | |
| **/ | |
| function Tidy5($string, $options = null, $encoding = 'utf8') | |
| { | |
| if (extension_loaded('tidy') === true) | |
| { | |
| $default = array | |
| ( | |
| 'anchor-as-name' => false, | |
| 'break-before-br' => true, | |
| 'char-encoding' => $encoding, | |
| 'decorate-inferred-ul' => false, | |
| 'doctype' => 'omit', | |
| 'drop-empty-paras' => false, | |
| 'drop-font-tags' => true, | |
| 'drop-proprietary-attributes' => false, | |
| 'force-output' => false, # might wanna set this to true if using user defined tags | |
| 'hide-comments' => false, | |
| 'indent' => true, | |
| 'indent-attributes' => false, | |
| 'indent-spaces' => 2, # might wanna set this to 0 to remove whitespace (except in pre-like tags) | |
| 'input-encoding' => $encoding, | |
| 'join-styles' => false, | |
| 'logical-emphasis' => false, | |
| 'merge-divs' => false, | |
| 'merge-spans' => false, | |
| 'new-blocklevel-tags' => 'article aside audio details dialog figcaption figure footer header hgroup menutidy nav section source summary track video', | |
| 'new-empty-tags' => 'command embed keygen source track wbr', | |
| 'new-inline-tags' => 'canvas command data datalist embed keygen mark meter output progress time wbr', | |
| 'newline' => 0, | |
| 'numeric-entities' => false, | |
| 'output-bom' => false, | |
| 'output-encoding' => $encoding, | |
| 'output-html' => true, | |
| 'preserve-entities' => true, | |
| 'quiet' => true, | |
| 'quote-ampersand' => true, | |
| 'quote-marks' => false, | |
| 'repeated-attributes' => 1, | |
| 'show-body-only' => true, | |
| 'show-warnings' => false, | |
| 'sort-attributes' => 1, | |
| 'tab-size' => 4, | |
| 'tidy-mark' => false, | |
| 'vertical-space' => true, | |
| 'wrap' => 0, | |
| ); | |
| $doctype = $menu = null; | |
| if ((strncasecmp($string, '<!DOCTYPE', 9) === 0) || (strncasecmp($string, '<html', 5) === 0)) | |
| { | |
| $doctype = '<!DOCTYPE html>'; $options['show-body-only'] = false; | |
| } | |
| $options = (is_array($options) === true) ? array_merge($default, $options) : $default; | |
| if (strpos($string, '<menu') !== false) | |
| { | |
| $menu = array | |
| ( | |
| '<menu' => '<menutidy', | |
| '</menu' => '</menutidy', | |
| ); | |
| } | |
| if (isset($menu) === true) | |
| { | |
| $string = str_replace(array_keys($menu), $menu, $string); | |
| } | |
| $string = tidy_repair_string($string, $options, $encoding); | |
| if (empty($string) !== true) | |
| { | |
| if (isset($menu) === true) | |
| { | |
| $string = str_replace($menu, array_keys($menu), $string); | |
| } | |
| if (isset($doctype) === true) | |
| { | |
| $string = $doctype . "\n" . $string; | |
| } | |
| return $string; | |
| } | |
| } | |
| return false; | |
| } |
Author
How about this
<!DOCTYPE html>
<head>
<title>test</title>
</head>
<body>
<a href=""><div>asas</div></a>
</body>
</html>
Would result
<!DOCTYPE html>
<html>
<head>
<title>
test
</title>
</head>
<body>
<a href=""></a>
<div>
asas
</div>
</body>
</html>
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@xeoncross:
Sorry for the delay but I didn't get any notification, don't know why!
Anyway, I don't post-process DOM with libxml, I have a wrapper around DOMDocument and SimpleXML to act as a convinient XPath selector and I have another one that purifies HTML. But none of them, (de-)indent HTML, which is the purpose of this helper (besides being able to work with HTML5). I tried doing the same with DOMDocument, but the results were very poor, I can't remember exactly but I think I recall problems with comments and self-closing tags. Like I said, can't remember exactly, but there had to be a reason for me to abandon that approach. This is mostly to hide where your partials are coming from (blind de-indentation with something like preg_replace does not respect the indentation within pre tags), if you don't care about that, this is mostly useless. =)