Created
October 4, 2016 15:39
-
-
Save jtsternberg/5fdb15bf7cc5d7029b0bae8179d0388e to your computer and use it in GitHub Desktop.
DOMDocument sub-class for getting content w/o body tag, etc
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
use DOMDocument; | |
class MyDom extends DOMDocument { | |
/** | |
* Initiate the DOMDocument object, ensuring UTF-8 | |
* | |
* @see http://stackoverflow.com/a/8218649/1883421 | |
* | |
* @since 3.0.0 | |
* | |
* @param string $content HTML content | |
*/ | |
public function __construct( $content ) { | |
@$this->loadHTML( '<?xml encoding="UTF-8">' . $content ); | |
// Fixes data attributes like: | |
// `data-gcatts="{"align":"right","linkto":"attachment-page","size":"full"}"` | |
// to correct: | |
// data-gcatts='{"align":"right","linkto":"attachment-page","size":"full"}' | |
$this->normalizeDocument(); | |
} | |
/** | |
* Returns the normalized content. | |
* | |
* @since 3.0.0 | |
* | |
* @return string HTML content | |
*/ | |
public function get_content() { | |
$body = $this->saveHTML( $this->getElementsByTagName( 'body' )->item(0) ); | |
return str_replace( array( '<body>', '</body>' ), '', $body ); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This only works when there is any attributes or classes on the body.