Skip to content

Instantly share code, notes, and snippets.

@westonruter
Last active October 24, 2024 00:34
Show Gist options
  • Save westonruter/c1e000c900ee54a9bc9ff23b0c1170e7 to your computer and use it in GitHub Desktop.
Save westonruter/c1e000c900ee54a9bc9ff23b0c1170e7 to your computer and use it in GitHub Desktop.
<?php
/**
* This file is intended to be executed using WP-CLI's eval-file command.
*/
$bookmark_name = 'the-bookmark';
$html = <<<HTML
<html lang="en">
<head>
<meta charset="utf-8">
<title>...</title>
</head>
<body>
<div id="$bookmark_name"></div>
</body>
</html>
HTML;
$processor = WP_HTML_Processor::create_full_parser( $html );
while ( $processor->next_tag() ) {
if ( 'DIV' === $processor->get_tag() && ! $processor->set_bookmark( $bookmark_name ) ) {
throw new Exception( "Failed to set bookmark" );
}
}
if ( ! $processor->has_bookmark( $bookmark_name ) ) {
throw new Exception( "Unexpectedly has_bookmark returned false." );
}
if ( ! $processor->seek( $bookmark_name ) ) {
throw new Exception( "Failed to seek to bookmark." );
}
echo "Success!";
<?php
/**
* This code extracts optimization logic from the Embed Optimizer plugin from the WordPress Core Performance Team.
* It demonstrates an issue where seeking fails when using WP_HTML_Processor when it succeeds with WP_HTML_Tag_Processor.
*
* This file is intended to be executed using WP-CLI's eval-file command.
*
* Assuming this Gist is cloned into the plugins/optimization-detective directory of the WordPress/performance project,
* the following will re-run the script whenever the file is changed:
*
* while true; do inotifywait -e modify repro-html-processor-seek-issue.php 2> /dev/null; clear; npm run wp-env run cli wp eval-file /var/www/html/wp-content/plugins/optimization-detective/repro-html-processor-seek-issue/repro-html-processor-seek-issue.php; done
*/
namespace ReproHtmlProcessorSeekIssue;
use WP_HTML_Processor;
use WP_HTML_Tag_Processor;
use Exception;
$html = <<<HTML
<html lang="en">
<head>
<meta charset="utf-8">
<title>...</title>
</head>
<body>
<figure class="wp-block-embed is-type-video is-provider-wordpress-tv wp-block-embed-wordpress-tv wp-embed-aspect-16-9 wp-has-aspect-ratio">
<div class="wp-block-embed__wrapper">
<iframe title="VideoPress Video Player" aria-label=\'VideoPress Video Player\' width=\'750\' height=\'422\' src=\'https://video.wordpress.com/embed/vaWm9zO6?hd=1&amp;cover=1\' frameborder=\'0\' allowfullscreen allow=\'clipboard-write\'></iframe>
<script src=\'https://v0.wordpress.com/js/next/videopress-iframe.js?m=1674852142\'></script>
</div>
</figure>
</body>
</html>
HTML;
/**
* Applies changes to HTML in the supplied tag processor to lazy-load the embed.
*
* @since 0.2.0
* @link https://github.com/WordPress/performance/blob/356fa9b8b7ff86633578b9ccdf92323eb04ed24d/plugins/embed-optimizer/hooks.php#L168-L307
*
* phpcs:disable Squiz.Commenting.FunctionCommentThrowTag.Missing -- The exception is caught.
*
* @param WP_HTML_Tag_Processor $html_processor HTML Processor.
* @return bool Whether the lazy-loading script is required.
*/
function embed_optimizer_update_markup( WP_HTML_Tag_Processor $html_processor ): bool {
$bookmark_names = array(
'script' => 'embed_optimizer_script',
'iframe' => 'embed_optimizer_iframe',
);
$trigger_error = static function ( string $message ): void {
trigger_error( $message, E_USER_WARNING );
};
try {
/*
* Determine how to lazy load the embed.
*
* - If there is only one iframe, set loading="lazy".
* - Prevent making scripts lazy if there is an inline script.
* - Only make script lazy if there is a single external script (since if there are
* multiple they may not get loaded in the right order).
* - Ensure that both the iframe and the script are made lazy if both occur in the same embed.
*/
$iframe_count = 0;
$script_count = 0;
$needs_lazy_script = false;
$has_inline_script = false;
$figure_depth = 0;
// Locate the iframes and scripts.
do {
// When iterating over an embed inside a larger document, stop once we reach closing </figure> tag.
if ( 'FIGURE' === $html_processor->get_tag() ) {
if ( $html_processor->is_tag_closer() ) {
--$figure_depth;
if ( $figure_depth <= 0 ) {
// We reached the end of the embed.
break;
}
} else {
++$figure_depth;
// Move to next element to start looking for IFRAME or SCRIPT tag.
continue;
}
}
if ( 0 === $figure_depth ) {
continue;
}
if ( 'IFRAME' === $html_processor->get_tag() ) {
$loading_value = $html_processor->get_attribute( 'loading' );
// Per the HTML spec: "The attribute's missing value default and invalid value default are both the Eager state".
if ( 'lazy' !== $loading_value ) {
++$iframe_count;
if ( ! $html_processor->set_bookmark( $bookmark_names['iframe'] ) ) {
throw new Exception(
/* translators: %s is bookmark name */
sprintf( __( 'Embed Optimizer unable to set %s bookmark.', 'embed-optimizer' ), $bookmark_names['iframe'] )
);
}
}
} elseif ( 'SCRIPT' === $html_processor->get_tag() ) {
if ( null === $html_processor->get_attribute( 'src' ) ) {
$has_inline_script = true;
} else {
++$script_count;
if ( ! $html_processor->set_bookmark( $bookmark_names['script'] ) ) {
throw new Exception(
/* translators: %s is bookmark name */
sprintf( __( 'Embed Optimizer unable to set %s bookmark.', 'embed-optimizer' ), $bookmark_names['script'] )
);
}
}
}
} while ( $html_processor->next_tag() );
// If there was only one non-inline script, make it lazy.
if ( 1 === $script_count && ! $has_inline_script && $html_processor->has_bookmark( $bookmark_names['script'] ) ) {
$needs_lazy_script = true;
if ( $html_processor->seek( $bookmark_names['script'] ) ) {
if ( is_string( $html_processor->get_attribute( 'type' ) ) ) {
$html_processor->set_attribute( 'data-original-type', $html_processor->get_attribute( 'type' ) );
}
$html_processor->set_attribute( 'type', 'application/vnd.embed-optimizer.javascript' );
} else {
$trigger_error(
/* translators: %s is bookmark name */
sprintf( __( 'Embed Optimizer unable to seek to %s bookmark.', 'embed-optimizer' ), $bookmark_names['script'] )
);
}
}
// If there was only one iframe, make it lazy.
if ( 1 === $iframe_count && $html_processor->has_bookmark( $bookmark_names['iframe'] ) ) {
if ( $html_processor->seek( $bookmark_names['iframe'] ) ) {
$html_processor->set_attribute( 'loading', 'lazy' );
// For post embeds, use visibility:hidden instead of clip since browsers will consistently load the
// lazy-loaded iframe (where Chromium is unreliably with clip) while at the same time improve accessibility
// by preventing links in the hidden iframe from receiving focus.
if ( true === $html_processor->has_class( 'wp-embedded-content' ) ) {
$style = $html_processor->get_attribute( 'style' );
if ( is_string( $style ) ) {
// WordPress core injects this clip CSS property:
// <https://github.com/WordPress/wordpress-develop/blob/6974b994de5/src/wp-includes/embed.php#L968>.
$style = str_replace( 'clip: rect(1px, 1px, 1px, 1px);', 'visibility: hidden;', $style );
// Note: wp-embed.js removes the style attribute entirely when the iframe is loaded:
// <https://github.com/WordPress/wordpress-develop/blob/6974b994d/src/js/_enqueues/wp/embed.js#L60>.
$html_processor->set_attribute( 'style', $style );
}
}
} else {
$trigger_error(
/* translators: %s is bookmark name */
sprintf( __( 'Embed Optimizer unable to seek to %s bookmark.', 'embed-optimizer' ), $bookmark_names['iframe'] )
);
}
}
} catch ( Exception $exception ) {
$trigger_error( $exception->getMessage() );
$needs_lazy_script = false;
}
// Since there is a limit to the number of bookmarks we can add, make sure any new ones we add get removed.
foreach ( $bookmark_names as $bookmark_name ) {
$html_processor->release_bookmark( $bookmark_name );
}
return $needs_lazy_script;
}
/**
* Processes document.
*
* @param WP_HTML_Tag_Processor|WP_HTML_Processor $processor
* @return string
*/
function process_document( $processor ): string {
$needed_lazy_load_script = false;
while ( $processor->next_tag() ) {
if (
! $processor->is_tag_closer()
&&
'FIGURE' === $processor->get_tag()
&&
true === $processor->has_class( 'wp-block-embed' )
) {
if ( embed_optimizer_update_markup( $processor ) ) {
$needed_lazy_load_script = true;
}
}
}
$updated_html = $processor->get_updated_html();
if ( $needed_lazy_load_script ) {
$updated_html .= "\n<!--TODO: Also inject the lazy-load script at the end of the BODY.-->";
}
return $updated_html;
}
echo "Parsing with HTML Tag Processor...\n";
$html_tag_processor = new WP_HTML_Tag_Processor( $html );
$html_tag_processor_result = process_document( $html_tag_processor );
echo "Done.\n\n";
echo "Parsing with HTML Processor...\n";
$html_processor = WP_HTML_Processor::create_full_parser( $html );
$html_processor_result = process_document( $html_processor );
echo "Done.\n\n";
if ( $html_tag_processor_result !== $html_processor_result ) {
echo "FAIL. The HTML Processor and HTML Tag Processor results are different.\n\n";
echo "### HTML Tag Processor ###\n";
echo "$html_tag_processor_result\n\n\n";
echo "### HTML Processor Result ###\n";
echo "$html_processor_result\n";
exit( 1 );
} else {
echo "PASS. The HTML Processor and HTML Tag Processor results are the same:\n";
echo $html_processor_result;
exit( 0 );
}
@westonruter
Copy link
Author

westonruter commented Oct 23, 2024

Current output as of WordPress 6.8-alpha-59285:

Parsing with HTML Tag Processor...
Done.

Parsing with HTML Processor...
Notice: Function WP_HTML_Tag_Processor::seek was called <strong>incorrectly</strong>. Unknown bookmark name. Please see <a href="https://developer.wordpress.org/advanced-administration/debug/debug-wordpress/">Debugging in WordPress</a> for more information. (This message was added in version 6.2.0.) in /var/www/html/wp-includes/functions.php on line 6087
Warning: Embed Optimizer unable to seek to embed_optimizer_script bookmark. in phar:///usr/local/bin/wp/vendor/wp-cli/eval-command/src/EvalFile_Command.php(85) : eval()'d code on line 54
Notice: Function WP_HTML_Tag_Processor::seek was called <strong>incorrectly</strong>. Unknown bookmark name. Please see <a href="https://developer.wordpress.org/advanced-administration/debug/debug-wordpress/">Debugging in WordPress</a> for more information. (This message was added in version 6.2.0.) in /var/www/html/wp-includes/functions.php on line 6087
Warning: Embed Optimizer unable to seek to embed_optimizer_iframe bookmark. in phar:///usr/local/bin/wp/vendor/wp-cli/eval-command/src/EvalFile_Command.php(85) : eval()'d code on line 54
Done.

FAIL. The HTML Processor and HTML Tag Processor results are different.

### HTML Tag Processor ###
<html lang="en">
        <head>
                <meta charset="utf-8">
                <title>...</title>
        </head>
        <body>
                <figure class="wp-block-embed is-type-video is-provider-wordpress-tv wp-block-embed-wordpress-tv wp-embed-aspect-16-9 wp-has-aspect-ratio">
                        <div class="wp-block-embed__wrapper">
                                <iframe loading="lazy" title="VideoPress Video Player" aria-label=\'VideoPress Video Player\' width=\'750\' height=\'422\' src=\'https://video.wordpress.com/embed/vaWm9zO6?hd=1&amp;cover=1\' frameborder=\'0\' allowfullscreen allow=\'clipboard-write\'></iframe>
                                <script type="application/vnd.embed-optimizer.javascript" src=\'https://v0.wordpress.com/js/next/videopress-iframe.js?m=1674852142\'></script>
                        </div>
                </figure>
        </body>
</html>
<!--TODO: Also inject the lazy-load script at the end of the BODY.-->


### HTML Processor Result ###
<html lang="en">
        <head>
                <meta charset="utf-8">
                <title>...</title>
        </head>
        <body>
                <figure class="wp-block-embed is-type-video is-provider-wordpress-tv wp-block-embed-wordpress-tv wp-embed-aspect-16-9 wp-has-aspect-ratio">
                        <div class="wp-block-embed__wrapper">
                                <iframe title="VideoPress Video Player" aria-label=\'VideoPress Video Player\' width=\'750\' height=\'422\' src=\'https://video.wordpress.com/embed/vaWm9zO6?hd=1&amp;cover=1\' frameborder=\'0\' allowfullscreen allow=\'clipboard-write\'></iframe>
                                <script src=\'https://v0.wordpress.com/js/next/videopress-iframe.js?m=1674852142\'></script>
                        </div>
                </figure>
        </body>
</html>
<!--TODO: Also inject the lazy-load script at the end of the BODY.-->
✖ Command failed with exit code 1

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment