Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 61 additions & 5 deletions src/wp-includes/html-api/class-wp-html-tag-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -971,6 +971,7 @@ private function skip_script_data() {
* closing `>`; these are left for other methods.
*
* @since 6.2.0
* @since 6.2.1 Support abruptly-closed comments, invalid-tag-closer-comments, and empty elements.
*
* @return bool Whether a tag was found before the end of the document.
*/
Expand Down Expand Up @@ -1039,13 +1040,42 @@ private function parse_next_tag() {
'-' === $html[ $at + 2 ] &&
'-' === $html[ $at + 3 ]
) {
$closer_at = strpos( $html, '-->', $at + 4 );
if ( false === $closer_at ) {
$closer_at = $at + 4;
// If it's not possible to close the comment then there is nothing more to scan.
if ( strlen( $html ) <= $closer_at ) {
return false;
}

$at = $closer_at + 3;
continue;
// Abruptly-closed empty comments are a sequence of dashes followed by `>`.
$span_of_dashes = strspn( $html, '-', $closer_at );
if ( '>' === $html[ $closer_at + $span_of_dashes ] ) {
$at = $closer_at + $span_of_dashes + 1;
continue;
}

/*
* Comments may be closed by either a --> or an invalid --!>.
* The first occurrence closes the comment.
*
* See https://html.spec.whatwg.org/#parse-error-incorrectly-closed-comment
*/
$closer_at--; // Pre-increment inside condition below reduces risk of accidental infinite looping.
while ( ++$closer_at < strlen( $html ) ) {
$closer_at = strpos( $html, '--', $closer_at );
if ( false === $closer_at ) {
return false;
}

if ( $closer_at + 2 < strlen( $html ) && '>' === $html[ $closer_at + 2 ] ) {
$at = $closer_at + 3;
continue 2;
}

if ( $closer_at + 3 < strlen( $html ) && '!' === $html[ $closer_at + 2 ] && '>' === $html[ $closer_at + 3 ] ) {
$at = $closer_at + 4;
continue 2;
}
}
}

/*
Expand Down Expand Up @@ -1104,9 +1134,19 @@ private function parse_next_tag() {
continue;
}

/*
* </> is a missing end tag name, which is ignored.
*
* See https://html.spec.whatwg.org/#parse-error-missing-end-tag-name
*/
if ( '>' === $html[ $at + 1 ] ) {
$at++;
continue;
}

/*
* <? transitions to a bogus comment state – skip to the nearest >
* https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
* See https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
*/
if ( '?' === $html[ $at + 1 ] ) {
$closer_at = strpos( $html, '>', $at + 2 );
Expand All @@ -1118,6 +1158,22 @@ private function parse_next_tag() {
continue;
}

/*
* If a non-alpha starts the tag name in a tag closer it's a comment.
* Find the first `>`, which closes the comment.
*
* See https://html.spec.whatwg.org/#parse-error-invalid-first-character-of-tag-name
*/
if ( $this->is_closing_tag ) {
$closer_at = strpos( $html, '>', $at + 3 );
if ( false === $closer_at ) {
return false;
}

$at = $closer_at + 1;
continue;
}

++$at;
}

Expand Down
134 changes: 134 additions & 0 deletions tests/phpunit/tests/html-api/wpHtmlTagProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -1664,6 +1664,47 @@ public function data_next_tag_ignores_script_tag_contents() {
);
}

/**
* Invalid tag names are comments on tag closers.
*
* @ticket 58007
*
* @link https://html.spec.whatwg.org/#parse-error-invalid-first-character-of-tag-name
*
* @dataProvider data_next_tag_ignores_invalid_first_character_of_tag_name_comments
Comment on lines +1673 to +1674
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
*
* @dataProvider data_next_tag_ignores_invalid_first_character_of_tag_name_comments
*
* @covers WP_HTML_Tag_Processor::next_tag
*
* @dataProvider data_next_tag_ignores_invalid_first_character_of_tag_name_comments

See this ticket as a reference for the test docblock order/spacing of annotations.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

spaced out in 8813327

*
* @param string $html_with_markers HTML containing an invalid tag closer whose element before and
* element after contain the "start" and "end" CSS classes.
*/
public function test_next_tag_ignores_invalid_first_character_of_tag_name_comments( $html_with_markers ) {
$p = new WP_HTML_Tag_Processor( $html_with_markers );
$p->next_tag( array( 'class_name' => 'start' ) );
$p->next_tag();

$this->assertSame( 'end', $p->get_attribute( 'class' ) );
}

/**
* Data provider.
*
* @return array[]
*/
public function data_next_tag_ignores_invalid_first_character_of_tag_name_comments() {
return array(
'Invalid tag openers as normal text' => array(
'<ul><li><div class=start>I <3 when outflow > inflow</div><img class=end></li></ul>',
),

'Invalid tag closers as comments' => array(
'<ul><li><div class=start>I </3 when <img> outflow <br class=end> inflow</div></li></ul>',
),

'Unexpected question mark instead of tag name' => array(
'<div class=start><?xml-stylesheet type="text/css" href="style.css"?><hr class=end>',
),
);
}

/**
* @ticket 56299
*
Expand Down Expand Up @@ -1716,6 +1757,99 @@ public function data_next_tag_ignores_contents_of_rcdata_tag() {
);
}

/**
* Ensures that the invalid comment closing syntax "--!>" properly closes a comment.
*
* @ticket 58007
*
* @covers WP_HTML_Tag_Processor::next_tag
*
*/
public function test_allows_incorrectly_closed_comments() {
$p = new WP_HTML_Tag_Processor( '<img id=before><!-- <img id=inside> --!><img id=after>--><img id=final>' );

$p->next_tag();
$this->assertSame( 'before', $p->get_attribute( 'id' ), 'Did not find starting tag.' );

$p->next_tag();
$this->assertSame( 'after', $p->get_attribute( 'id' ), 'Did not properly close improperly-closed comment.' );

$p->next_tag();
$this->assertSame( 'final', $p->get_attribute( 'id' ), 'Did not skip over unopened comment-closer.' );
}

/**
* Ensures that unclosed and invalid comments don't trigger warnings or errors.
*
* @ticket 58007
*
* @covers WP_HTML_Tag_Processor::next_tag
*
* @dataProvider data_html_with_unclosed_comments
*
* @param string $html_ending_before_comment_close HTML with opened comments that aren't closed
*/
public function test_documents_may_end_with_unclosed_comment( $html_ending_before_comment_close ) {
$p = new WP_HTML_Tag_Processor( $html_ending_before_comment_close );

$this->assertFalse( $p->next_tag() );
}

/**
* Data provider.
*
* @return array[]
*/
public function data_html_with_unclosed_comments() {
return array(
'Shortest open valid comment' => array( '<!--' ),
'Basic truncated comment' => array( '<!-- this ends --' ),
'Comment with closer look-alike' => array( '<!-- this ends --x' ),
'Comment with closer look-alike 2' => array( '<!-- this ends --!x' ),
'Invalid tag-closer comment' => array( '</(when will this madness end?)' ),
'Invalid tag-closer comment 2' => array( '</(when will this madness end?)--' ),
);
}

/**
* Ensures that abruptly-closed empty comments are properly closed.
*
* @ticket 58007
*
* @covers WP_HTML_Tag_Processor::next_tag
*
* @dataProvider data_abruptly_closed_empty_comments
*
* @param string $html_with_after_marker HTML to test with "id=after" on element immediately following an abruptly closed comment.
*/
public function test_closes_abrupt_closing_of_empty_comment( $html_with_after_marker ) {
$p = new WP_HTML_Tag_Processor( $html_with_after_marker );
$p->next_tag();
$p->next_tag();

$this->assertSame( 'after', $p->get_attribute( 'id' ), 'Did not find tag after closing abruptly-closed comment' );
}

/**
* Data provider.
*
* @return array[]
*/
public function data_abruptly_closed_empty_comments() {
return array(
'Empty comment with two dashes only' => array( '<hr><!--><hr id=after>' ),
'Empty comment with two dashes only, improperly closed' => array( '<hr><!--!><hr id=inside>--><hr id=after>' ),
'Comment with two dashes only, improperly closed twice' => array( '<hr><!--!><hr id=inside>--!><hr id=after>' ),
'Empty comment with three dashes' => array( '<hr><!---><hr id=after>' ),
'Empty comment with three dashes, improperly closed' => array( '<hr><!---!><hr id=inside>--><hr id=after>' ),
'Comment with three dashes, improperly closed twice' => array( '<hr><!---!><hr id=inside>--!><hr id=after>' ),
'Empty comment with four dashes' => array( '<hr><!----><hr id=after>' ),
'Empty comment with four dashes, improperly closed' => array( '<hr><!----!><hr id=after>--><hr id=final>' ),
'Comment with four dashes, improperly closed twice' => array( '<hr><!----!><hr id=after>--!><hr id=final>' ),
'Comment with almost-closer inside' => array( '<hr><!-- ---!><hr id=after>--!><hr id=final>' ),
);
}

/**
* @ticket 56299
*
Expand Down