Skip to content

Instantly share code, notes, and snippets.

@westonruter
Last active October 22, 2024 00:12
Show Gist options
  • Save westonruter/c27694c39100ea4011da2e0ecd588326 to your computer and use it in GitHub Desktop.
Save westonruter/c27694c39100ea4011da2e0ecd588326 to your computer and use it in GitHub Desktop.
Warning: The next_token() method was called another time for +HTML. in /var/www/html/wp-content/plugins/html-processor-debug/html-processor-debug.php on line 80
<HTML xpath="/*[2][self::HTML]">
Warning: The next_token() method was called another time for +HEAD. in /var/www/html/wp-content/plugins/html-processor-debug/html-processor-debug.php on line 80
<HEAD xpath="/*[2][self::HTML]/*[2][self::HEAD]">
Warning: The next_token() method was called another time for +META. in /var/www/html/wp-content/plugins/html-processor-debug/html-processor-debug.php on line 80
<META xpath="/*[2][self::HTML]/*[2][self::HEAD]/*[3][self::META]">
Warning: The next_token() method was called another time for +TITLE. in /var/www/html/wp-content/plugins/html-processor-debug/html-processor-debug.php on line 80
<TITLE xpath="/*[2][self::HTML]/*[2][self::HEAD]/*[6][self::TITLE]">
Warning: The next_token() method was called another time for -HEAD. in /var/www/html/wp-content/plugins/html-processor-debug/html-processor-debug.php on line 80
</HEAD>
Warning: The next_token() method was called another time for +BODY. in /var/www/html/wp-content/plugins/html-processor-debug/html-processor-debug.php on line 80
<BODY xpath="/*[4][self::HEAD]/*[3][self::BODY]">
Warning: The next_token() method was called another time for +H1. in /var/www/html/wp-content/plugins/html-processor-debug/html-processor-debug.php on line 80
<H1 xpath="/*[4][self::HEAD]/*[3][self::BODY]/*[3][self::H1]">
Warning: The next_token() method was called another time for -H1. in /var/www/html/wp-content/plugins/html-processor-debug/html-processor-debug.php on line 80
</H1>
Warning: The next_token() method was called another time for +IMG. in /var/www/html/wp-content/plugins/html-processor-debug/html-processor-debug.php on line 80
<IMG xpath="/*[4][self::HEAD]/*[5][self::H1]/*[3][self::IMG]">
Warning: The next_token() method was called another time for +P. in /var/www/html/wp-content/plugins/html-processor-debug/html-processor-debug.php on line 80
<P xpath="/*[4][self::HEAD]/*[5][self::H1]/*[6][self::P]">
Warning: The next_token() method was called another time for -P. in /var/www/html/wp-content/plugins/html-processor-debug/html-processor-debug.php on line 80
</P>
<FOOTER xpath="/*[4][self::HEAD]/*[7][self::P]/*[1][self::FOOTER]">
Warning: The next_token() method was called another time for -FOOTER. in /var/www/html/wp-content/plugins/html-processor-debug/html-processor-debug.php on line 80
</FOOTER>
Warning: Unable to set bookmark: end_of_body in /var/www/html/wp-content/plugins/html-processor-debug/html-processor-debug.php on line 57
Warning: The next_token() method was called another time for -BODY. in /var/www/html/wp-content/plugins/html-processor-debug/html-processor-debug.php on line 80
Warning: Unable to set bookmark: end_of_body in /var/www/html/wp-content/plugins/html-processor-debug/html-processor-debug.php on line 57
Warning: The next_token() method was called another time for -BODY. in /var/www/html/wp-content/plugins/html-processor-debug/html-processor-debug.php on line 80
Warning: Unable to set bookmark: end_of_body in /var/www/html/wp-content/plugins/html-processor-debug/html-processor-debug.php on line 57
</BODY>
</HTML>
Warning: Bookmark was not set: end_of_body in /var/www/html/wp-content/plugins/html-processor-debug/html-processor-debug.php on line 154
Counts for next_token() being called for tags in the document:
Array
(
[+HTML] => 2
[+HEAD] => 2
[+META] => 3
[+TITLE] => 3
[-HEAD] => 3
[+BODY] => 3
[+H1] => 3
[-H1] => 3
[+IMG] => 3
[+P] => 3
[-P] => 3
[+FOOTER] => 1
[-FOOTER] => 3
[-BODY] => 3
[-HTML] => 1
)
<HTML xpath="/*[1][self::HTML]">
<HEAD xpath="/*[1][self::HTML]/*[1][self::HEAD]">
<META xpath="/*[1][self::HTML]/*[1][self::HEAD]/*[1][self::META]">
<TITLE xpath="/*[1][self::HTML]/*[1][self::HEAD]/*[2][self::TITLE]">
</HEAD>
<BODY xpath="/*[1][self::HTML]/*[2][self::BODY]">
<H1 xpath="/*[1][self::HTML]/*[2][self::BODY]/*[1][self::H1]">
</H1>
<IMG xpath="/*[1][self::HTML]/*[2][self::BODY]/*[2][self::IMG]">
<P xpath="/*[1][self::HTML]/*[2][self::BODY]/*[3][self::P]">
</P>
<FOOTER xpath="/*[1][self::HTML]/*[2][self::BODY]/*[4][self::FOOTER]">
</FOOTER>
</BODY>
</HTML>
Counts for next_token() being called for tags in the document:
Array
(
[+HTML] => 1
[+HEAD] => 1
[+META] => 1
[+TITLE] => 1
[-HEAD] => 1
[+BODY] => 1
[+H1] => 1
[-H1] => 1
[+IMG] => 1
[+P] => 1
[-P] => 1
[+FOOTER] => 1
[-FOOTER] => 1
[-BODY] => 1
[-HTML] => 1
)
<?php
/**
* Plugin Name: HTML Processor Debug
* Description: Demonstration of a couple issues encountered when extending HTML Processor, namely that next_token() is called repeatedly for tags and a bookmark cannot be set for the closing body tag.
* Author: Weston Ruter
* Author URI: https://weston.ruter.net/
*/
// phpcs:disable WordPress.Security.EscapeOutput.OutputNotEscaped
// phpcs:disable SlevomatCodingStandard.TypeHints.ParameterTypeHint.MissingAnyTypeHint
class Extended_HTML_Processor extends WP_HTML_Processor {
/**
* List of tags that have already been seen.
*
* @var array<string, int>
*/
public $tag_seen_count = array();
/**
* Previous depth.
*
* @var int|null
*/
private $previous_depth = null;
/**
* Open stack indices.
*
* @since n.e.x.t
* @var array<int, array{tag_name: string, index: int}>
*/
private $open_stack_indices = array();
/**
* Gets XPath for the current open tag.
*
* @return string XPath.
*/
public function get_xpath(): string {
$xpath = '';
foreach ( $this->open_stack_indices as $level ) {
$xpath .= sprintf( '/*[%d][self::%s]', $level['index'] + 1, $level['tag_name'] );
}
return $xpath;
}
/**
* Sets a bookmark in the HTML document.
*
* @param string $bookmark_name Bookmark name.
* @return bool Whether the bookmark was successfully created.
*/
public function set_bookmark( $bookmark_name ): bool {
$result = parent::set_bookmark( $bookmark_name );
if ( ! $result ) {
trigger_error( "Unable to set bookmark: $bookmark_name", E_USER_WARNING );
}
return $result;
}
/**
* Gets next token.
*
* @return bool Whether next token was matched.
*/
public function next_token(): bool {
$result = parent::next_token();
$current_depth = (int) $this->get_current_depth(); // @phpstan-ignore method.notFound (Not yet part of szepeviktor/phpstan-wordpress.)
$current_tag = (string) $this->get_tag();
$current_depth--; // Because HTML starts at depth 1.
if ( $this->get_token_type() === '#tag' ) {
$token_name = ( $this->is_tag_closer() ? '-' : '+' ) . $current_tag;
if ( ! isset( $this->tag_seen_count[ $token_name ] ) ) {
$this->tag_seen_count[ $token_name ] = 1;
} else {
trigger_error( "The next_token() method was called another time for $token_name.", E_USER_WARNING );
++$this->tag_seen_count[ $token_name ];
// Note: If I `return $result` here then the resulting XPaths are correct.
// return $result;
}
if ( ! isset( $this->open_stack_indices[ $current_depth ] ) ) {
$this->open_stack_indices[ $current_depth ] = array(
'tag_name' => $current_tag,
'index' => 0,
);
} elseif ( $this->previous_depth === $current_depth ) {
$this->open_stack_indices[ $current_depth ]['tag_name'] = $current_tag;
++$this->open_stack_indices[ $current_depth ]['index'];
} else {
array_splice(
$this->open_stack_indices,
$this->is_tag_closer() ? $current_depth + 1 : $current_depth
);
}
if ( $this->is_tag_closer() ) {
if ( 'HEAD' === $current_tag ) {
$this->set_bookmark( 'end_of_head' );
} elseif ( 'BODY' === $current_tag ) {
$this->set_bookmark( 'end_of_body' );
}
}
}
$this->previous_depth = $current_depth;
return $result;
}
}
function test_extended_html_processor(): void {
// For this test, note that each tag occurs only once.
$processor = Extended_HTML_Processor::create_full_parser(
'
<html>
<head>
<meta charset="utf-8">
<title>Hello World</title>
</head>
<body>
<h1>Hello World!</h1>
<img src="example.png">
<p>Each tag should occur only once in this document.<!--Closing P tag omitted intentionally.-->
<footer>The end.</footer>
</body>
</html>
'
);
while ( $processor->next_tag( array( 'tag_closers' => 'visit' ) ) ) {
$depth = $processor->get_current_depth();
if ( $processor->is_tag_closer() ) {
++$depth;
}
echo str_repeat( "\t", $depth - 1 );
echo '<';
if ( $processor->is_tag_closer() ) {
echo '/';
}
echo $processor->get_tag();
if ( ! $processor->is_tag_closer() ) {
printf( ' xpath="%s"', $processor->get_xpath() );
}
echo ">\n";
}
foreach ( array( 'end_of_head', 'end_of_body' ) as $expected_bookmark ) {
if ( ! $processor->has_bookmark( $expected_bookmark) ) {
trigger_error( "Bookmark was not set: $expected_bookmark", E_USER_WARNING );
}
}
echo "Counts for next_token() being called for tags in the document:\n";
print_r( $processor->tag_seen_count );
}
if ( defined( 'WP_CLI' ) ) {
WP_CLI::add_command(
'html-processor-debug',
static function (): void {
test_extended_html_processor();
}
);
}
@westonruter
Copy link
Author

Actually, this isn't generating the actual expected output. See WordPress/wordpress-develop#7607

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment