Last active
October 22, 2024 00:12
-
-
Save westonruter/c27694c39100ea4011da2e0ecd588326 to your computer and use it in GitHub Desktop.
OBSOLETE. See https://core.trac.wordpress.org/ticket/62269
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Warning: The next_token() method was called another time for +HTML. in /var/www/html/wp-content/plugins/html-processor-debug/html-processor-debug.php on line 80 | |
<HTML xpath="/*[2][self::HTML]"> | |
Warning: The next_token() method was called another time for +HEAD. in /var/www/html/wp-content/plugins/html-processor-debug/html-processor-debug.php on line 80 | |
<HEAD xpath="/*[2][self::HTML]/*[2][self::HEAD]"> | |
Warning: The next_token() method was called another time for +META. in /var/www/html/wp-content/plugins/html-processor-debug/html-processor-debug.php on line 80 | |
<META xpath="/*[2][self::HTML]/*[2][self::HEAD]/*[3][self::META]"> | |
Warning: The next_token() method was called another time for +TITLE. in /var/www/html/wp-content/plugins/html-processor-debug/html-processor-debug.php on line 80 | |
<TITLE xpath="/*[2][self::HTML]/*[2][self::HEAD]/*[6][self::TITLE]"> | |
Warning: The next_token() method was called another time for -HEAD. in /var/www/html/wp-content/plugins/html-processor-debug/html-processor-debug.php on line 80 | |
</HEAD> | |
Warning: The next_token() method was called another time for +BODY. in /var/www/html/wp-content/plugins/html-processor-debug/html-processor-debug.php on line 80 | |
<BODY xpath="/*[4][self::HEAD]/*[3][self::BODY]"> | |
Warning: The next_token() method was called another time for +H1. in /var/www/html/wp-content/plugins/html-processor-debug/html-processor-debug.php on line 80 | |
<H1 xpath="/*[4][self::HEAD]/*[3][self::BODY]/*[3][self::H1]"> | |
Warning: The next_token() method was called another time for -H1. in /var/www/html/wp-content/plugins/html-processor-debug/html-processor-debug.php on line 80 | |
</H1> | |
Warning: The next_token() method was called another time for +IMG. in /var/www/html/wp-content/plugins/html-processor-debug/html-processor-debug.php on line 80 | |
<IMG xpath="/*[4][self::HEAD]/*[5][self::H1]/*[3][self::IMG]"> | |
Warning: The next_token() method was called another time for +P. in /var/www/html/wp-content/plugins/html-processor-debug/html-processor-debug.php on line 80 | |
<P xpath="/*[4][self::HEAD]/*[5][self::H1]/*[6][self::P]"> | |
Warning: The next_token() method was called another time for -P. in /var/www/html/wp-content/plugins/html-processor-debug/html-processor-debug.php on line 80 | |
</P> | |
<FOOTER xpath="/*[4][self::HEAD]/*[7][self::P]/*[1][self::FOOTER]"> | |
Warning: The next_token() method was called another time for -FOOTER. in /var/www/html/wp-content/plugins/html-processor-debug/html-processor-debug.php on line 80 | |
</FOOTER> | |
Warning: Unable to set bookmark: end_of_body in /var/www/html/wp-content/plugins/html-processor-debug/html-processor-debug.php on line 57 | |
Warning: The next_token() method was called another time for -BODY. in /var/www/html/wp-content/plugins/html-processor-debug/html-processor-debug.php on line 80 | |
Warning: Unable to set bookmark: end_of_body in /var/www/html/wp-content/plugins/html-processor-debug/html-processor-debug.php on line 57 | |
Warning: The next_token() method was called another time for -BODY. in /var/www/html/wp-content/plugins/html-processor-debug/html-processor-debug.php on line 80 | |
Warning: Unable to set bookmark: end_of_body in /var/www/html/wp-content/plugins/html-processor-debug/html-processor-debug.php on line 57 | |
</BODY> | |
</HTML> | |
Warning: Bookmark was not set: end_of_body in /var/www/html/wp-content/plugins/html-processor-debug/html-processor-debug.php on line 154 | |
Counts for next_token() being called for tags in the document: | |
Array | |
( | |
[+HTML] => 2 | |
[+HEAD] => 2 | |
[+META] => 3 | |
[+TITLE] => 3 | |
[-HEAD] => 3 | |
[+BODY] => 3 | |
[+H1] => 3 | |
[-H1] => 3 | |
[+IMG] => 3 | |
[+P] => 3 | |
[-P] => 3 | |
[+FOOTER] => 1 | |
[-FOOTER] => 3 | |
[-BODY] => 3 | |
[-HTML] => 1 | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<HTML xpath="/*[1][self::HTML]"> | |
<HEAD xpath="/*[1][self::HTML]/*[1][self::HEAD]"> | |
<META xpath="/*[1][self::HTML]/*[1][self::HEAD]/*[1][self::META]"> | |
<TITLE xpath="/*[1][self::HTML]/*[1][self::HEAD]/*[2][self::TITLE]"> | |
</HEAD> | |
<BODY xpath="/*[1][self::HTML]/*[2][self::BODY]"> | |
<H1 xpath="/*[1][self::HTML]/*[2][self::BODY]/*[1][self::H1]"> | |
</H1> | |
<IMG xpath="/*[1][self::HTML]/*[2][self::BODY]/*[2][self::IMG]"> | |
<P xpath="/*[1][self::HTML]/*[2][self::BODY]/*[3][self::P]"> | |
</P> | |
<FOOTER xpath="/*[1][self::HTML]/*[2][self::BODY]/*[4][self::FOOTER]"> | |
</FOOTER> | |
</BODY> | |
</HTML> | |
Counts for next_token() being called for tags in the document: | |
Array | |
( | |
[+HTML] => 1 | |
[+HEAD] => 1 | |
[+META] => 1 | |
[+TITLE] => 1 | |
[-HEAD] => 1 | |
[+BODY] => 1 | |
[+H1] => 1 | |
[-H1] => 1 | |
[+IMG] => 1 | |
[+P] => 1 | |
[-P] => 1 | |
[+FOOTER] => 1 | |
[-FOOTER] => 1 | |
[-BODY] => 1 | |
[-HTML] => 1 | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Plugin Name: HTML Processor Debug | |
* Description: Demonstration of a couple issues encountered when extending HTML Processor, namely that next_token() is called repeatedly for tags and a bookmark cannot be set for the closing body tag. | |
* Author: Weston Ruter | |
* Author URI: https://weston.ruter.net/ | |
*/ | |
// phpcs:disable WordPress.Security.EscapeOutput.OutputNotEscaped | |
// phpcs:disable SlevomatCodingStandard.TypeHints.ParameterTypeHint.MissingAnyTypeHint | |
class Extended_HTML_Processor extends WP_HTML_Processor { | |
/** | |
* List of tags that have already been seen. | |
* | |
* @var array<string, int> | |
*/ | |
public $tag_seen_count = array(); | |
/** | |
* Previous depth. | |
* | |
* @var int|null | |
*/ | |
private $previous_depth = null; | |
/** | |
* Open stack indices. | |
* | |
* @since n.e.x.t | |
* @var array<int, array{tag_name: string, index: int}> | |
*/ | |
private $open_stack_indices = array(); | |
/** | |
* Gets XPath for the current open tag. | |
* | |
* @return string XPath. | |
*/ | |
public function get_xpath(): string { | |
$xpath = ''; | |
foreach ( $this->open_stack_indices as $level ) { | |
$xpath .= sprintf( '/*[%d][self::%s]', $level['index'] + 1, $level['tag_name'] ); | |
} | |
return $xpath; | |
} | |
/** | |
* Sets a bookmark in the HTML document. | |
* | |
* @param string $bookmark_name Bookmark name. | |
* @return bool Whether the bookmark was successfully created. | |
*/ | |
public function set_bookmark( $bookmark_name ): bool { | |
$result = parent::set_bookmark( $bookmark_name ); | |
if ( ! $result ) { | |
trigger_error( "Unable to set bookmark: $bookmark_name", E_USER_WARNING ); | |
} | |
return $result; | |
} | |
/** | |
* Gets next token. | |
* | |
* @return bool Whether next token was matched. | |
*/ | |
public function next_token(): bool { | |
$result = parent::next_token(); | |
$current_depth = (int) $this->get_current_depth(); // @phpstan-ignore method.notFound (Not yet part of szepeviktor/phpstan-wordpress.) | |
$current_tag = (string) $this->get_tag(); | |
$current_depth--; // Because HTML starts at depth 1. | |
if ( $this->get_token_type() === '#tag' ) { | |
$token_name = ( $this->is_tag_closer() ? '-' : '+' ) . $current_tag; | |
if ( ! isset( $this->tag_seen_count[ $token_name ] ) ) { | |
$this->tag_seen_count[ $token_name ] = 1; | |
} else { | |
trigger_error( "The next_token() method was called another time for $token_name.", E_USER_WARNING ); | |
++$this->tag_seen_count[ $token_name ]; | |
// Note: If I `return $result` here then the resulting XPaths are correct. | |
// return $result; | |
} | |
if ( ! isset( $this->open_stack_indices[ $current_depth ] ) ) { | |
$this->open_stack_indices[ $current_depth ] = array( | |
'tag_name' => $current_tag, | |
'index' => 0, | |
); | |
} elseif ( $this->previous_depth === $current_depth ) { | |
$this->open_stack_indices[ $current_depth ]['tag_name'] = $current_tag; | |
++$this->open_stack_indices[ $current_depth ]['index']; | |
} else { | |
array_splice( | |
$this->open_stack_indices, | |
$this->is_tag_closer() ? $current_depth + 1 : $current_depth | |
); | |
} | |
if ( $this->is_tag_closer() ) { | |
if ( 'HEAD' === $current_tag ) { | |
$this->set_bookmark( 'end_of_head' ); | |
} elseif ( 'BODY' === $current_tag ) { | |
$this->set_bookmark( 'end_of_body' ); | |
} | |
} | |
} | |
$this->previous_depth = $current_depth; | |
return $result; | |
} | |
} | |
function test_extended_html_processor(): void { | |
// For this test, note that each tag occurs only once. | |
$processor = Extended_HTML_Processor::create_full_parser( | |
' | |
<html> | |
<head> | |
<meta charset="utf-8"> | |
<title>Hello World</title> | |
</head> | |
<body> | |
<h1>Hello World!</h1> | |
<img src="example.png"> | |
<p>Each tag should occur only once in this document.<!--Closing P tag omitted intentionally.--> | |
<footer>The end.</footer> | |
</body> | |
</html> | |
' | |
); | |
while ( $processor->next_tag( array( 'tag_closers' => 'visit' ) ) ) { | |
$depth = $processor->get_current_depth(); | |
if ( $processor->is_tag_closer() ) { | |
++$depth; | |
} | |
echo str_repeat( "\t", $depth - 1 ); | |
echo '<'; | |
if ( $processor->is_tag_closer() ) { | |
echo '/'; | |
} | |
echo $processor->get_tag(); | |
if ( ! $processor->is_tag_closer() ) { | |
printf( ' xpath="%s"', $processor->get_xpath() ); | |
} | |
echo ">\n"; | |
} | |
foreach ( array( 'end_of_head', 'end_of_body' ) as $expected_bookmark ) { | |
if ( ! $processor->has_bookmark( $expected_bookmark) ) { | |
trigger_error( "Bookmark was not set: $expected_bookmark", E_USER_WARNING ); | |
} | |
} | |
echo "Counts for next_token() being called for tags in the document:\n"; | |
print_r( $processor->tag_seen_count ); | |
} | |
if ( defined( 'WP_CLI' ) ) { | |
WP_CLI::add_command( | |
'html-processor-debug', | |
static function (): void { | |
test_extended_html_processor(); | |
} | |
); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Actually, this isn't generating the actual expected output. See WordPress/wordpress-develop#7607