gmazzap · February 18, 2018 20:08 · gmazzap · Feb 19, 2018
diff --git a/Brain_HtmlTokenizer.php b/Brain_HtmlTokenizer.php
 <?php
 /**
 * Brain_HtmlTokenizer class file.
 *
 * (c) Giuseppe Mazzapica
 *
 * @license http://opensource.org/licenses/MIT MIT
 * @author  Giuseppe Mazzapica <[email protected]>
 */

 /**
 * A basic, PHP 5.2+ compatible, HTML parser.
 */
 class Brain_HtmlTokenizer {


 	/**
 	 * Parsed HTML tokens.
 	 *
 	 * @var array[]
 	 */
 	private $tokens = array();

 	/**
 	 * Named constructor, creates an instance from given HTML string.
 	 *
 	 * @param string $html HTML string to created instance from.
 	 *
 	 * @return Brain_HtmlTokenizer New Instance.
 	 */
 	public static function load_html( $html ) {

 		if ( ! is_string( $html ) || ! $html ) {
 			return new static( array() );
 		}

 		$instance         = new static();
 		$instance->tokens = $instance->tokenize( $html );

 		return $instance;
 	}

 	/**
 	 * Private on purpose, use named constructor;
 	 *
 	 * @param array[] $tokens HTML tokens.
 	 */
 	private function __construct( array $tokens = array() ) {

 		$this->tokens = $tokens;
 	}

 	/**
 	 * Return (maybe) processed HTML string.
 	 *
 	 * @return string processed HTML string.
 	 */
 	public function __toString() {

 		return implode( '', array_map( array( $this, 'serialize_token' ), $this->tokens ) );
 	}

 	/**
 	 * Strips given HTML tags.
 	 *
 	 * @param string[] $tags HTML tags to remove.
 	 *
 	 * @return Brain_HtmlTokenizer Instance without given tags.
 	 */
 	public function strip_tags( array $tags ) {

 		if ( ! $tags ) {
 			return $this;
 		}

 		$tokens = array();
 		foreach ( $this->tokens as $i => $token ) {

 			list($tag) = $token['tag'];
 			if (
 				( ! $tag && ! array_intersect( $token['parents'], $tags ) )
 				|| ( $tag && ! in_array( $tag, $tags, true )
 					&& ! in_array( $tag, $token['parents'], true ) )
 			) {
 				$tokens[ $i ] = $token;
 			}
 		}

 		$this->tokens = $tokens;

 		return $this;
 	}

 	/**
 	 * Keep only given HTML tags and remove all the others.
 	 *
 	 * @param string[] $tags HTML tags to keep.
 	 *
 	 * @return Brain_HtmlTokenizer Instance with only given tags.
 	 */
 	public function keep_tags( array $tags ) {

 		$tokens = array();
 		foreach ( $this->tokens as $i => $token ) {

 			if ( array_diff( $token['parents'], $tags ) ) {
 				continue;
 			}

 			list($tag) = $token['tag'];
 			if ( ! $tag && $token['parents'] ) {
 				$tag = end( $token['parents'] );
 			}

 			if ( ! $tag || in_array( $tag, $tags, true ) ) {
 				$tokens[ $i ] = $token;
 			}
 		}

 		$this->tokens = $tokens;

 		return $this;
 	}

 	/**
 	 * Strips given attributes from given HTML tags (or all of them).
 	 *
 	 * @param array      $attributes Attributes to remove.
 	 * @param array|null $tags       HTML tags to remove attributes from. When null (default),
 	 *                               means "all of them".
 	 *
 	 * @return Brain_HtmlTokenizer Instance without given attributes in given tags.
 	 */
 	public function strip_attributes( array $attributes, array $tags = null ) {

 		if ( array() === $tags || array() === $attributes ) {
 			return $this;
 		}

 		$attributes = array_filter( array_filter( array_values( $attributes ), 'is_string' ) );
 		if ( ! $attributes || array() === $tags ) {
 			return $this;
 		}

 		$tokens = array();
 		foreach ( $this->tokens as $i => $token ) {

 			list($tag) = $token['tag'];
 			if (
 				! $tag
 				|| ! $token['attributes']
 				|| ( is_array( $tags ) && ! in_array( $tag, $tags, true ) )
 			) {
 				$tokens[ $i ] = $token;
 				continue;
 			}

 			$attributes_blacklist = array_fill_keys( $attributes, '' );
 			$token['attributes']  = array_diff_key( $token['attributes'], $attributes_blacklist );

 			$tokens[ $i ] = $token;
 		}

 		$this->tokens = $tokens;

 		return $this;
 	}

 	/**
 	 * Keep only given attributes in given HTML tags and remove all the others.
 	 *
 	 * @param array      $attributes Attributes to keep.
 	 * @param array|null $tags       HTML tags to keep attributes for. When null (default), means
 	 *                               "all of them".
 	 *
 	 * @return Brain_HtmlTokenizer Instance with only given attributes in given tags.
 	 */
 	public function keep_attributes( array $attributes, array $tags = null ) {

 		if ( array() === $tags ) {
 			return $this;
 		}

 		$attributes = array_filter( array_filter( array_values( $attributes ), 'is_string' ) );
 		$tokens     = array();
 		foreach ( $this->tokens as $i => $token ) {

 			list($tag) = $token['tag'];
 			if (
 				! $tag
 				|| ! $token['attributes']
 				|| ( is_array( $tags ) && ! in_array( $tag, $tags, true ) )
 			) {
 				$tokens[ $i ] = $token;
 				continue;
 			}

 			if ( ! $attributes ) {
 				$token['attributes'] = array();
 				$tokens[ $i ]        = $token;
 				continue;
 			}

 			$attributes_whitelist = array_fill_keys( $attributes, '' );
 			$token['attributes']  = array_intersect_key(
 				$token['attributes'],
 				$attributes_whitelist
 			);

 			$tokens[ $i ] = $token;
 		}

 		$this->tokens = $tokens;

 		return $this;
 	}

 	/**
 	 * Process the given HTML string and build a stack of token.
 	 *
 	 * @param string $html HTML string to process.
 	 *
 	 * @return array[] Array of tokens, each is an array with keys: 'tag', 'content', and
 	 *                 'attributes'.
 	 */
 	private function tokenize( $html ) {

 		$first_pass = array();
 		$tokens     = array();
 		$parents    = array();
 		$token      = strtok( $html, '<' );
 		while ( false !== $token ) {
 			$first_pass[] = $token;
 			$token        = strtok( '<' );
 		}

 		foreach ( $first_pass as $i => $part ) {

 			$token_parts = explode( '>', $part, 2 );
 			if ( array( $part ) === $token_parts ) {
 				$tokens[] = $this->no_tag_token( $part, $parents );
 				continue;
 			}

 			list($tag_name, $tag_type, $attributes) = $this->tokenize_tag( $token_parts[0] );

 			if ( $parents && 'close' === $tag_type && end( $parents ) === $tag_name ) {
 				array_pop( $parents );
 			}

 			$tokens[] = array(
 				'tag'        => array( $tag_name, $tag_type ),
 				'attributes' => 'close' !== $tag_type ? $this->tokenize_attributes( $attributes )
 					: array(),
 				'content'    => '',
 				'parents'    => $parents,
 			);

 			if ( 'open' === $tag_type ) {
 				$parents[] = $tag_name;
 			}

 			$content = $token_parts[1];
 			if ( $content ) {
 				$tokens[] = $this->no_tag_token( $content, $parents );
 			}
 		}

 		return $tokens;
 	}

 	/**
 	 * Creates a token for a string that does not belong to any tag.
 	 *
 	 * @param string $content HTML chunk content.
 	 * @param array  $parents HTML tags stack HTML chunk content belongs to.
 	 *
 	 * @return array
 	 */
 	private function no_tag_token( $content, array $parents ) {

 		return array(
 			'tag'        => array( null, null ),
 			'attributes' => null,
 			'content'    => $content,
 			'parents'    => $parents,
 		);
 	}

 	/**
 	 * Process given HTML tag string and return tag name, tag type (open, close, self-close) and
 	 * the "raw" attributes.
 	 *
 	 * @param string $tag Piece of HTML that contains, one HTML tag.
 	 *
 	 * @return string[] A 3 items array:
 	 *                  - Tag name, e.g. "div"
 	 *                  - Tag type, can be "open", "close", and "self-close"
 	 *                  - Not parsed attributes string
 	 */
 	private function tokenize_tag( $tag ) {

 		$self_close = substr( $tag, -1, 1 ) === '/';
 		$tag_type   = '';
 		if ( $self_close ) {
 			$tag_type = 'self-close';
 			$tag      = substr( $tag, 0, -1 );
 		}

 		$split      = preg_split( '~\s+~', trim( $tag ), 2 );
 		$attributes = count( $split ) === 2 ? end( $split ) : '';
 		$tag_name   = reset( $split );
 		if ( ! $self_close ) {
 			$tag_type = substr( $tag_name, 0, 1 ) === '/' ? 'close' : 'open';
 		}

 		return array( trim( $tag_name, '/' ), $tag_type, $attributes );
 	}

 	/**
 	 * Process given HTML tag attributes string and return an array of processed attributes in
 	 * array form.
 	 *
 	 * @param string $attr   HTML tag attributes string to process.
 	 * @param array  $tokens Array of parsed tag attributes, keys are attributes names, values
 	 *                       attributes values.
 	 *
 	 * @return array
 	 */
 	private function tokenize_attributes( $attr, array $tokens = null ) {

 		if ( is_null( $tokens ) ) {
 			$tokens = array();
 			$attr   = preg_replace( '~\s+~', ' ', $attr );
 		}

 		$attr = trim( $attr );
 		if ( ! $attr ) {
 			return $tokens;
 		}

 		$search_eq    = strpos( $attr, '=' );
 		$search_space = strpos( $attr, ' ' );

 		// Flag and last attribute.
 		if ( false === $search_space && false === $search_eq ) {
 			$tokens[ $attr ] = true;

 			return $tokens;
 		}

 		// Flag.
 		if ( false !== $search_space && ( false === $search_eq || ( $search_space < $search_eq ) ) ) {
 			$attr_parts               = explode( ' ', $attr, 2 );
 			$tokens[ $attr_parts[0] ] = true;
 			if ( empty( $attr_parts[1] ) ) {
 				return $tokens;
 			}

 			return $this->tokenize_attributes( $attr_parts[1], $tokens );
 		}

 		$attr_key    = strtok( $attr, '=' );
 		$token_split = $this->attr_value_token_split( $attr );
 		$attr_value  = strtok( $token_split );

 		$tokens[ $attr_key ] = $this->strip_quote( $attr_value );

 		$cursor   = strlen( $attr_key ) + strlen( $attr_value );
 		$cursor  += ' ' === $token_split ? 2 : 3;
 		$to_parse = substr( $attr, $cursor );

 		if ( $to_parse ) {
 			return $this->tokenize_attributes( $to_parse, $tokens );
 		}

 		return $tokens;
 	}

 	/**
 	 * Calculate token value to use with `strtok` to split attribute key from value.
 	 *
 	 * It is necessary because attributes might or might not have wrapping quotes around values.
 	 *
 	 * @param string $attributes_string Raw attributes string.
 	 *
 	 * @return string The token value to use with `strtok` to split attribute key from value
 	 */
 	private function attr_value_token_split( $attributes_string ) {

 		$double_quote_index = strpos( $attributes_string, '"' );
 		$single_quote_index = strpos( $attributes_string, '\'' );

 		$quotes = array_filter( compact( 'double_quote_index', 'single_quote_index' ), 'is_int' );
 		if ( ! $quotes ) {
 			return ' ';
 		}

 		$first_quote = min( $quotes );
 		if ( $first_quote > strpos( $attributes_string, ' ' ) ) {
 			return ' ';
 		}

 		return array_search( $first_quote, $quotes, true ) === 'double_quote_index' ? '"' : "'";

 	}

 	/**
 	 * Remove wrapping quotes from attributes value, if any.
 	 *
 	 * Also ensure the value is suitable to be wrapped in double quotes ie. does not contain
 	 * non-slashed double quotes.
 	 *
 	 * @param string $attribute_value Attribute value.
 	 *
 	 * @return string Attribute value without wrapping quotes.
 	 */
 	private function strip_quote( $attribute_value ) {

 		$first = substr( $attribute_value, 0, 1 );
 		$last  = substr( $attribute_value, -1, 1 );
 		if ( $first === $last && in_array( $first, array( '"', '\'' ), true ) ) {
 			$attribute_value = substr( $attribute_value, 1, -1 );
 		}

 		if ( substr_count( $attribute_value, '"' ) !== substr_count( $attribute_value, '\"' ) ) {
 			$attribute_value = str_replace( '\"', '"', $attribute_value );
 			$attribute_value = str_replace( '"', '\"', $attribute_value );
 		}

 		return $attribute_value;
 	}

 	/**
 	 * Take a processed token in array form and serialize back to string.
 	 *
 	 * @param array $token HTML token.
 	 *
 	 * @return string
 	 */
 	private function serialize_token( array $token ) {

 		list($tag, $type) = $token['tag'];
 		$content          = $token['content'];

 		if ( ! $tag ) {
 			return $content;
 		}

 		if ( 'close' === $type ) {
 			return "</{$tag}>{$content}";
 		}

 		$attributes = $token['attributes'];
 		$string     = "<{$tag}";
 		foreach ( $attributes as $name => $value ) {
 			if ( true === $value ) {
 				$string .= " {$name}";
 				continue;
 			}
 			$string .= " {$name}=\"{$value}\"";
 		}

 		return 'self-close' === $type ? "{$string}/>{$content}" : "{$string}>{$content}";

 	}
 }
	<?php
	/**
	* Brain_HtmlTokenizer class file.
	*
	* (c) Giuseppe Mazzapica
	*
	* @license http://opensource.org/licenses/MIT MIT
	* @author Giuseppe Mazzapica <[email protected]>
	*/

	/**
	* A basic, PHP 5.2+ compatible, HTML parser.
	*/
	class Brain_HtmlTokenizer {


	/**
	* Parsed HTML tokens.
	*
	* @var array[]
	*/
	private $tokens = array();

	/**
	* Named constructor, creates an instance from given HTML string.
	*
	* @param string $html HTML string to created instance from.
	*
	* @return Brain_HtmlTokenizer New Instance.
	*/
	public static function load_html( $html ) {

	if ( ! is_string( $html ) \|\| ! $html ) {
	return new static( array() );
	}

	$instance = new static();
	$instance->tokens = $instance->tokenize( $html );

	return $instance;
	}

	/**
	* Private on purpose, use named constructor;
	*
	* @param array[] $tokens HTML tokens.
	*/
	private function __construct( array $tokens = array() ) {

	$this->tokens = $tokens;
	}

	/**
	* Return (maybe) processed HTML string.
	*
	* @return string processed HTML string.
	*/
	public function __toString() {

	return implode( '', array_map( array( $this, 'serialize_token' ), $this->tokens ) );
	}

	/**
	* Strips given HTML tags.
	*
	* @param string[] $tags HTML tags to remove.
	*
	* @return Brain_HtmlTokenizer Instance without given tags.
	*/
	public function strip_tags( array $tags ) {

	if ( ! $tags ) {
	return $this;
	}

	$tokens = array();
	foreach ( $this->tokens as $i => $token ) {

	list($tag) = $token['tag'];
	if (
	( ! $tag && ! array_intersect( $token['parents'], $tags ) )
	\|\| ( $tag && ! in_array( $tag, $tags, true )
	&& ! in_array( $tag, $token['parents'], true ) )
	) {
	$tokens[ $i ] = $token;
	}
	}

	$this->tokens = $tokens;

	return $this;
	}

	/**
	* Keep only given HTML tags and remove all the others.
	*
	* @param string[] $tags HTML tags to keep.
	*
	* @return Brain_HtmlTokenizer Instance with only given tags.
	*/
	public function keep_tags( array $tags ) {

	$tokens = array();
	foreach ( $this->tokens as $i => $token ) {

	if ( array_diff( $token['parents'], $tags ) ) {
	continue;
	}

	list($tag) = $token['tag'];
	if ( ! $tag && $token['parents'] ) {
	$tag = end( $token['parents'] );
	}

	if ( ! $tag \|\| in_array( $tag, $tags, true ) ) {
	$tokens[ $i ] = $token;
	}
	}

	$this->tokens = $tokens;

	return $this;
	}

	/**
	* Strips given attributes from given HTML tags (or all of them).
	*
	* @param array $attributes Attributes to remove.
	* @param array\|null $tags HTML tags to remove attributes from. When null (default),
	* means "all of them".
	*
	* @return Brain_HtmlTokenizer Instance without given attributes in given tags.
	*/
	public function strip_attributes( array $attributes, array $tags = null ) {

	if ( array() === $tags \|\| array() === $attributes ) {
	return $this;
	}

	$attributes = array_filter( array_filter( array_values( $attributes ), 'is_string' ) );
	if ( ! $attributes \|\| array() === $tags ) {
	return $this;
	}

	$tokens = array();
	foreach ( $this->tokens as $i => $token ) {

	list($tag) = $token['tag'];
	if (
	! $tag
	\|\| ! $token['attributes']
	\|\| ( is_array( $tags ) && ! in_array( $tag, $tags, true ) )
	) {
	$tokens[ $i ] = $token;
	continue;
	}

	$attributes_blacklist = array_fill_keys( $attributes, '' );
	$token['attributes'] = array_diff_key( $token['attributes'], $attributes_blacklist );

	$tokens[ $i ] = $token;
	}

	$this->tokens = $tokens;

	return $this;
	}

	/**
	* Keep only given attributes in given HTML tags and remove all the others.
	*
	* @param array $attributes Attributes to keep.
	* @param array\|null $tags HTML tags to keep attributes for. When null (default), means
	* "all of them".
	*
	* @return Brain_HtmlTokenizer Instance with only given attributes in given tags.
	*/
	public function keep_attributes( array $attributes, array $tags = null ) {

	if ( array() === $tags ) {
	return $this;
	}

	$attributes = array_filter( array_filter( array_values( $attributes ), 'is_string' ) );
	$tokens = array();
	foreach ( $this->tokens as $i => $token ) {

	list($tag) = $token['tag'];
	if (
	! $tag
	\|\| ! $token['attributes']
	\|\| ( is_array( $tags ) && ! in_array( $tag, $tags, true ) )
	) {
	$tokens[ $i ] = $token;
	continue;
	}

	if ( ! $attributes ) {
	$token['attributes'] = array();
	$tokens[ $i ] = $token;
	continue;
	}

	$attributes_whitelist = array_fill_keys( $attributes, '' );
	$token['attributes'] = array_intersect_key(
	$token['attributes'],
	$attributes_whitelist
	);

	$tokens[ $i ] = $token;
	}

	$this->tokens = $tokens;

	return $this;
	}

	/**
	* Process the given HTML string and build a stack of token.
	*
	* @param string $html HTML string to process.
	*
	* @return array[] Array of tokens, each is an array with keys: 'tag', 'content', and
	* 'attributes'.
	*/
	private function tokenize( $html ) {

	$first_pass = array();
	$tokens = array();
	$parents = array();
	$token = strtok( $html, '<' );
	while ( false !== $token ) {
	$first_pass[] = $token;
	$token = strtok( '<' );
	}

	foreach ( $first_pass as $i => $part ) {

	$token_parts = explode( '>', $part, 2 );
	if ( array( $part ) === $token_parts ) {
	$tokens[] = $this->no_tag_token( $part, $parents );
	continue;
	}

	list($tag_name, $tag_type, $attributes) = $this->tokenize_tag( $token_parts[0] );

	if ( $parents && 'close' === $tag_type && end( $parents ) === $tag_name ) {
	array_pop( $parents );
	}

	$tokens[] = array(
	'tag' => array( $tag_name, $tag_type ),
	'attributes' => 'close' !== $tag_type ? $this->tokenize_attributes( $attributes )
	: array(),
	'content' => '',
	'parents' => $parents,
	);

	if ( 'open' === $tag_type ) {
	$parents[] = $tag_name;
	}

	$content = $token_parts[1];
	if ( $content ) {
	$tokens[] = $this->no_tag_token( $content, $parents );
	}
	}

	return $tokens;
	}

	/**
	* Creates a token for a string that does not belong to any tag.
	*
	* @param string $content HTML chunk content.
	* @param array $parents HTML tags stack HTML chunk content belongs to.
	*
	* @return array
	*/
	private function no_tag_token( $content, array $parents ) {

	return array(
	'tag' => array( null, null ),
	'attributes' => null,
	'content' => $content,
	'parents' => $parents,
	);
	}

	/**
	* Process given HTML tag string and return tag name, tag type (open, close, self-close) and
	* the "raw" attributes.
	*
	* @param string $tag Piece of HTML that contains, one HTML tag.
	*
	* @return string[] A 3 items array:
	* - Tag name, e.g. "div"
	* - Tag type, can be "open", "close", and "self-close"
	* - Not parsed attributes string
	*/
	private function tokenize_tag( $tag ) {

	$self_close = substr( $tag, -1, 1 ) === '/';
	$tag_type = '';
	if ( $self_close ) {
	$tag_type = 'self-close';
	$tag = substr( $tag, 0, -1 );
	}

	$split = preg_split( '~\s+~', trim( $tag ), 2 );
	$attributes = count( $split ) === 2 ? end( $split ) : '';
	$tag_name = reset( $split );
	if ( ! $self_close ) {
	$tag_type = substr( $tag_name, 0, 1 ) === '/' ? 'close' : 'open';
	}

	return array( trim( $tag_name, '/' ), $tag_type, $attributes );
	}

	/**
	* Process given HTML tag attributes string and return an array of processed attributes in
	* array form.
	*
	* @param string $attr HTML tag attributes string to process.
	* @param array $tokens Array of parsed tag attributes, keys are attributes names, values
	* attributes values.
	*
	* @return array
	*/
	private function tokenize_attributes( $attr, array $tokens = null ) {

	if ( is_null( $tokens ) ) {
	$tokens = array();
	$attr = preg_replace( '~\s+~', ' ', $attr );
	}

	$attr = trim( $attr );
	if ( ! $attr ) {
	return $tokens;
	}

	$search_eq = strpos( $attr, '=' );
	$search_space = strpos( $attr, ' ' );

	// Flag and last attribute.
	if ( false === $search_space && false === $search_eq ) {
	$tokens[ $attr ] = true;

	return $tokens;
	}

	// Flag.
	if ( false !== $search_space && ( false === $search_eq \|\| ( $search_space < $search_eq ) ) ) {
	$attr_parts = explode( ' ', $attr, 2 );
	$tokens[ $attr_parts[0] ] = true;
	if ( empty( $attr_parts[1] ) ) {
	return $tokens;
	}

	return $this->tokenize_attributes( $attr_parts[1], $tokens );
	}

	$attr_key = strtok( $attr, '=' );
	$token_split = $this->attr_value_token_split( $attr );
	$attr_value = strtok( $token_split );

	$tokens[ $attr_key ] = $this->strip_quote( $attr_value );

	$cursor = strlen( $attr_key ) + strlen( $attr_value );
	$cursor += ' ' === $token_split ? 2 : 3;
	$to_parse = substr( $attr, $cursor );

	if ( $to_parse ) {
	return $this->tokenize_attributes( $to_parse, $tokens );
	}

	return $tokens;
	}

	/**
	* Calculate token value to use with `strtok` to split attribute key from value.
	*
	* It is necessary because attributes might or might not have wrapping quotes around values.
	*
	* @param string $attributes_string Raw attributes string.
	*
	* @return string The token value to use with `strtok` to split attribute key from value
	*/
	private function attr_value_token_split( $attributes_string ) {

	$double_quote_index = strpos( $attributes_string, '"' );
	$single_quote_index = strpos( $attributes_string, '\'' );

	$quotes = array_filter( compact( 'double_quote_index', 'single_quote_index' ), 'is_int' );
	if ( ! $quotes ) {
	return ' ';
	}

	$first_quote = min( $quotes );
	if ( $first_quote > strpos( $attributes_string, ' ' ) ) {
	return ' ';
	}

	return array_search( $first_quote, $quotes, true ) === 'double_quote_index' ? '"' : "'";

	}

	/**
	* Remove wrapping quotes from attributes value, if any.
	*
	* Also ensure the value is suitable to be wrapped in double quotes ie. does not contain
	* non-slashed double quotes.
	*
	* @param string $attribute_value Attribute value.
	*
	* @return string Attribute value without wrapping quotes.
	*/
	private function strip_quote( $attribute_value ) {

	$first = substr( $attribute_value, 0, 1 );
	$last = substr( $attribute_value, -1, 1 );
	if ( $first === $last && in_array( $first, array( '"', '\'' ), true ) ) {
	$attribute_value = substr( $attribute_value, 1, -1 );
	}

	if ( substr_count( $attribute_value, '"' ) !== substr_count( $attribute_value, '\"' ) ) {
	$attribute_value = str_replace( '\"', '"', $attribute_value );
	$attribute_value = str_replace( '"', '\"', $attribute_value );
	}

	return $attribute_value;
	}

	/**
	* Take a processed token in array form and serialize back to string.
	*
	* @param array $token HTML token.
	*
	* @return string
	*/
	private function serialize_token( array $token ) {

	list($tag, $type) = $token['tag'];
	$content = $token['content'];

	if ( ! $tag ) {
	return $content;
	}

	if ( 'close' === $type ) {
	return "</{$tag}>{$content}";
	}

	$attributes = $token['attributes'];
	$string = "<{$tag}";
	foreach ( $attributes as $name => $value ) {
	if ( true === $value ) {
	$string .= " {$name}";
	continue;
	}
	$string .= " {$name}=\"{$value}\"";
	}

	return 'self-close' === $type ? "{$string}/>{$content}" : "{$string}>{$content}";

	}
	}