From 5b04c8a2e447c88a4e277da283271175919fd531 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 22 Jun 2026 17:59:34 +0200 Subject: [PATCH 1/7] Move IN BODY "any other end tag" processing to separate method --- .../html-api/class-wp-html-processor.php | 68 ++++++++++++------- 1 file changed, 45 insertions(+), 23 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 967d616129647..8b04c41f2fd16 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -3256,38 +3256,60 @@ private function step_in_body(): bool { /* * > Any other end tag */ + return $this->step_in_body_any_other_end_tag(); + } - /* - * Find the corresponding tag opener in the stack of open elements, if - * it exists before reaching a special element, which provides a kind - * of boundary in the stack. For example, a `` should not - * close anything beyond its containing `P` or `DIV` element. - */ - foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) { - if ( 'html' === $node->namespace && $token_name === $node->node_name ) { - break; - } + $this->bail( 'Should not have been able to reach end of IN BODY processing. Check HTML API code.' ); + // This unnecessary return prevents tools from inaccurately reporting type errors. + return false; + } - if ( self::is_special( $node ) ) { - // This is a parse error, ignore the token. - return $this->step(); - } + /** + * Applies the "any other end tag" parsing instructions for the 'in body' insertion mode. + * + * @since 7.1.0 + * @ignore + * + * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. + * + * @see https://html.spec.whatwg.org/#parsing-main-inbody + * @see WP_HTML_Processor::step + * + * @return bool Whether an element was found. + */ + private function step_in_body_any_other_end_tag(): bool { + $token_name = $this->get_token_name(); + + /* + * Find the corresponding tag opener in the stack of open elements, if + * it exists before reaching a special element, which provides a kind + * of boundary in the stack. For example, a `` should not + * close anything beyond its containing `P` or `DIV` element. + */ + foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) { + if ( 'html' === $node->namespace && $token_name === $node->node_name ) { + break; } - $this->generate_implied_end_tags( $token_name ); - if ( $node !== $this->state->stack_of_open_elements->current_node() ) { - // @todo Record parse error: this error doesn't impact parsing. + if ( self::is_special( $node ) ) { + // This is a parse error, ignore the token. + return $this->step(); } + } - foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { - $this->state->stack_of_open_elements->pop(); - if ( $node === $item ) { - return true; - } + $this->generate_implied_end_tags( $token_name ); + if ( $node !== $this->state->stack_of_open_elements->current_node() ) { + // @todo Record parse error: this error doesn't impact parsing. + } + + foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { + $this->state->stack_of_open_elements->pop(); + if ( $node === $item ) { + return true; } } - $this->bail( 'Should not have been able to reach end of IN BODY processing. Check HTML API code.' ); + $this->bail( 'Should not have been able to reach end of "any other end tag" IN BODY processing. Check HTML API code.' ); // This unnecessary return prevents tools from inaccurately reporting type errors. return false; } From 6110bdaffc30ce3fb9f6e3bc585f56051e5a66fb Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 22 Jun 2026 18:32:56 +0200 Subject: [PATCH 2/7] Improve function docs --- src/wp-includes/html-api/class-wp-html-processor.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 8b04c41f2fd16..2fd51db80a7bc 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -3265,7 +3265,7 @@ private function step_in_body(): bool { } /** - * Applies the "any other end tag" parsing instructions for the 'in body' insertion mode. + * Applies the "any other end tag" parsing instructions for the IN BODY insertion mode. * * @since 7.1.0 * @ignore @@ -3273,7 +3273,7 @@ private function step_in_body(): bool { * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. * * @see https://html.spec.whatwg.org/#parsing-main-inbody - * @see WP_HTML_Processor::step + * @see WP_HTML_Processor::step_in_body * * @return bool Whether an element was found. */ From 36838731936aa8022b187e9be828532ab891c04a Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 22 Jun 2026 19:07:16 +0200 Subject: [PATCH 3/7] Add adoption agency algorithm tests --- .../html-api/wpHtmlProcessor-serialize.php | 41 ++++++++++++ .../html-api/wpHtmlProcessorBreadcrumbs.php | 1 + .../html-api/wpHtmlProcessorSemanticRules.php | 67 +++++++++++++++++++ 3 files changed, 109 insertions(+) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php b/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php index 5afe37a010a41..43a54c12fb0c0 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php @@ -265,6 +265,47 @@ public function test_unexpected_closing_tags_are_removed() { ); } + /** + * Ensures that unexpected closing formatting tags are ignored. + * + * @ticket 65383 + * + * @dataProvider data_formatting_tag_names + * + * @param string $formatting_tag_name Formatting tag name with no active formatting element. + */ + public function test_unexpected_closing_formatting_tags_are_ignored( string $formatting_tag_name ) { + $this->assertSame( + 'onetwo', + WP_HTML_Processor::normalize( "onetwo" ), + "Should have ignored unexpected {$formatting_tag_name} closer." + ); + } + + /** + * Data provider. + * + * @return array[string, array{0: string}] + */ + public static function data_formatting_tag_names(): array { + return array( + 'A tag' => array( 'a' ), + 'B tag' => array( 'b' ), + 'BIG tag' => array( 'big' ), + 'CODE tag' => array( 'code' ), + 'EM tag' => array( 'em' ), + 'FONT tag' => array( 'font' ), + 'I tag' => array( 'i' ), + 'NOBR tag' => array( 'nobr' ), + 'S tag' => array( 's' ), + 'SMALL tag' => array( 'small' ), + 'STRIKE tag' => array( 'strike' ), + 'STRONG tag' => array( 'strong' ), + 'TT tag' => array( 'tt' ), + 'U tag' => array( 'u' ), + ); + } + /** * Ensures that self-closing elements in foreign content retain their self-closing flag. * diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php index b54fc047ab040..8408e698cb580 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php @@ -577,6 +577,7 @@ public static function data_virtual_nodes_breadcrumbs() { 'Implied P tag opener on unmatched closer' => array( '

', 1, 'P', 'open', array( 'HTML', 'BODY', 'P' ) ), 'Implied heading tag closer on heading child' => array( '

', 2, 'H1', 'close', array( 'HTML', 'BODY' ) ), 'Implied A tag closer on A tag child' => array( '', 2, 'A', 'close', array( 'HTML', 'BODY' ) ), + 'Redundant A closer after sibling A' => array( '', 4, 'A', 'close', array( 'HTML', 'BODY' ) ), 'Implied A tag closer on A tag descendent' => array( '', 4, 'A', 'close', array( 'HTML', 'BODY' ) ), ); } diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php b/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php index da6d959eb75e0..eebb5af27fa10 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php @@ -405,6 +405,73 @@ public function test_in_body_any_other_end_tag_with_unclosed_non_special_element $this->assertSame( array( 'HTML', 'BODY', 'DIV', 'DIV' ), $processor->get_breadcrumbs(), 'Failed to produce expected DOM nesting: SPAN should be closed and DIV should be its sibling.' ); } + /** + * Verifies that when the adoption agency algorithm finds no matching + * active formatting element, it acts like "any other end tag". + * + * @covers WP_HTML_Processor::step_in_body + * + * @ticket 65383 + * + * @dataProvider data_in_body_adoption_agency_fallback_end_tags + * + * @param string $formatting_tag_name Formatting tag name with no active formatting element. + */ + public function test_in_body_adoption_agency_fallback_ignores_unexpected_formatting_end_tag( string $formatting_tag_name ) { + $processor = WP_HTML_Processor::create_fragment( "
" ); + + $this->assertTrue( $processor->next_tag( 'SPAN' ), 'Failed to find the SPAN opener before an unexpected formatting end tag.' ); + $this->assertSame( 'SPAN', $processor->get_tag(), "Expected to start test on SPAN element but found {$processor->get_tag()} instead." ); + $this->assertSame( array( 'HTML', 'BODY', 'DIV', 'SPAN' ), $processor->get_breadcrumbs(), 'Failed to produce expected DOM nesting before unexpected formatting closer.' ); + + $this->assertTrue( $processor->next_tag( 'CODE' ), "Failed to ignore unexpected {$formatting_tag_name} closer and advance to CODE opener." ); + $this->assertSame( 'CODE', $processor->get_tag(), "Expected to find CODE element, but found {$processor->get_tag()} instead." ); + $this->assertSame( array( 'HTML', 'BODY', 'DIV', 'SPAN', 'CODE' ), $processor->get_breadcrumbs(), 'Failed to keep SPAN open after unexpected formatting closer.' ); + } + + /** + * Verifies that the adoption agency fallback preserves the "any other end tag" + * step result when the ignored token is followed by EOF. + * + * @covers WP_HTML_Processor::step_in_body + * + * @ticket 65383 + * + * @dataProvider data_in_body_adoption_agency_fallback_end_tags + * + * @param string $tag_name Formatting tag name with no active formatting element. + */ + public function test_in_body_adoption_agency_fallback_preserves_ignored_end_tag_step_result( string $tag_name ): void { + $processor = WP_HTML_Processor::create_fragment( "" ); + $this->assertTrue( $processor->next_token(), 'Failed to find the SPAN opener before an unexpected end tag.' ); + $this->assertSame( 'SPAN', $processor->get_tag(), "Expected to start test on SPAN element but found {$processor->get_tag()} instead." ); + $this->assertFalse( $processor->next_token(), "Expected unexpected {$tag_name} end tag followed by EOF to return false." ); + } + + /** + * Data provider. + * + * @return array[string, array{0: string}] + */ + public static function data_in_body_adoption_agency_fallback_end_tags(): array { + return array( + 'A tag' => array( 'a' ), + 'B tag' => array( 'b' ), + 'BIG tag' => array( 'big' ), + 'CODE tag' => array( 'code' ), + 'EM tag' => array( 'em' ), + 'FONT tag' => array( 'font' ), + 'I tag' => array( 'i' ), + 'NOBR tag' => array( 'nobr' ), + 'S tag' => array( 's' ), + 'SMALL tag' => array( 'small' ), + 'STRIKE tag' => array( 'strike' ), + 'STRONG tag' => array( 'strong' ), + 'TT tag' => array( 'tt' ), + 'U tag' => array( 'u' ), + ); + } + /** * Ensures that closing `
` tags are appropriately treated as opening tags with no attributes. * From b1f26dfbc8ef656b236803be4e2fa5295e474fee Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 22 Jun 2026 19:24:27 +0200 Subject: [PATCH 4/7] Update specification text See https://github.com/whatwg/html/commit/9f50934d44024efb11e276945aca31c4341f3277 --- src/wp-includes/html-api/class-wp-html-processor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 2fd51db80a7bc..d8c940e969335 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -6285,7 +6285,7 @@ private function run_adoption_agency_algorithm(): void { } } - // > If there is no such element, then return and instead act as described in the "any other end tag" entry above. + // > If there is no such element, then act as described in the "any other end tag" entry above and return. if ( null === $formatting_element ) { $this->bail( 'Cannot run adoption agency when "any other end tag" is required.' ); } From 93a048e667ab406d16bb3eb24a7ceadd2ddb6497 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 22 Jun 2026 19:24:50 +0200 Subject: [PATCH 5/7] Run any other end tag in AAA --- src/wp-includes/html-api/class-wp-html-processor.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index d8c940e969335..cc2c1f0307385 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -6287,7 +6287,8 @@ private function run_adoption_agency_algorithm(): void { // > If there is no such element, then act as described in the "any other end tag" entry above and return. if ( null === $formatting_element ) { - $this->bail( 'Cannot run adoption agency when "any other end tag" is required.' ); + $this->step_in_body_any_other_end_tag(); + return; } // > If formatting element is not in the stack of open elements, then this is a parse error; remove the element from the list, and return. From d57246feffc45989169b071f947fd044d7e1d6df Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 22 Jun 2026 19:38:21 +0200 Subject: [PATCH 6/7] Propagate adoption agency fallback result --- .../html-api/class-wp-html-processor.php | 20 +++++++++---------- .../html-api/wpHtmlProcessorSemanticRules.php | 6 ++---- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index cc2c1f0307385..e0dfd37e2cb34 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -2920,8 +2920,7 @@ private function step_in_body(): bool { case '-STRONG': case '-TT': case '-U': - $this->run_adoption_agency_algorithm(); - return true; + return $this->run_adoption_agency_algorithm(); /* * > A start tag whose tag name is one of: "applet", "marquee", "object" @@ -6245,8 +6244,10 @@ private function reset_insertion_mode_appropriately(): void { * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. * * @see https://html.spec.whatwg.org/#adoption-agency-algorithm + * + * @return bool Whether an element was found. */ - private function run_adoption_agency_algorithm(): void { + private function run_adoption_agency_algorithm(): bool { $budget = 1000; $subject = $this->get_tag(); $current_node = $this->state->stack_of_open_elements->current_node(); @@ -6258,13 +6259,13 @@ private function run_adoption_agency_algorithm(): void { ! $this->state->active_formatting_elements->contains_node( $current_node ) ) { $this->state->stack_of_open_elements->pop(); - return; + return true; } $outer_loop_counter = 0; while ( $budget-- > 0 ) { if ( $outer_loop_counter++ >= 8 ) { - return; + return true; } /* @@ -6287,19 +6288,18 @@ private function run_adoption_agency_algorithm(): void { // > If there is no such element, then act as described in the "any other end tag" entry above and return. if ( null === $formatting_element ) { - $this->step_in_body_any_other_end_tag(); - return; + return $this->step_in_body_any_other_end_tag(); } // > If formatting element is not in the stack of open elements, then this is a parse error; remove the element from the list, and return. if ( ! $this->state->stack_of_open_elements->contains_node( $formatting_element ) ) { $this->state->active_formatting_elements->remove_node( $formatting_element ); - return; + return true; } // > If formatting element is in the stack of open elements, but the element is not in scope, then this is a parse error; return. if ( ! $this->state->stack_of_open_elements->has_element_in_scope( $formatting_element->node_name ) ) { - return; + return true; } /* @@ -6335,7 +6335,7 @@ private function run_adoption_agency_algorithm(): void { if ( $formatting_element->bookmark_name === $item->bookmark_name ) { $this->state->active_formatting_elements->remove_node( $formatting_element ); - return; + return true; } } } diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php b/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php index eebb5af27fa10..6e49cbab0fccb 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php @@ -442,10 +442,8 @@ public function test_in_body_adoption_agency_fallback_ignores_unexpected_formatt * @param string $tag_name Formatting tag name with no active formatting element. */ public function test_in_body_adoption_agency_fallback_preserves_ignored_end_tag_step_result( string $tag_name ): void { - $processor = WP_HTML_Processor::create_fragment( "" ); - $this->assertTrue( $processor->next_token(), 'Failed to find the SPAN opener before an unexpected end tag.' ); - $this->assertSame( 'SPAN', $processor->get_tag(), "Expected to start test on SPAN element but found {$processor->get_tag()} instead." ); - $this->assertFalse( $processor->next_token(), "Expected unexpected {$tag_name} end tag followed by EOF to return false." ); + $processor = WP_HTML_Processor::create_fragment( "" ); + $this->assertFalse( $processor->step(), "Expected unexpected {$tag_name} end tag followed by EOF to return false." ); } /** From 3be5a0c14105a0c721eaae2c2871028e7d152c81 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 22 Jun 2026 19:46:31 +0200 Subject: [PATCH 7/7] Cover inactive formatting element continuation --- .../html-api/class-wp-html-processor.php | 2 +- .../html-api/wpHtmlProcessorSemanticRules.php | 21 +++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index e0dfd37e2cb34..f88759d909c13 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -6245,7 +6245,7 @@ private function reset_insertion_mode_appropriately(): void { * * @see https://html.spec.whatwg.org/#adoption-agency-algorithm * - * @return bool Whether an element was found. + * @return bool Whether the current token was handled without exhausting input. */ private function run_adoption_agency_algorithm(): bool { $budget = 1000; diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php b/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php index 6e49cbab0fccb..2025646418c13 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php @@ -446,6 +446,27 @@ public function test_in_body_adoption_agency_fallback_preserves_ignored_end_tag_ $this->assertFalse( $processor->step(), "Expected unexpected {$tag_name} end tag followed by EOF to return false." ); } + /** + * Verifies that when the adoption agency algorithm returns after removing + * a formatting element from the active formatting elements list, it does + * not report the current token as EOF. + * + * @covers WP_HTML_Processor::step_in_body + * + * @ticket 65383 + * + * @dataProvider data_in_body_adoption_agency_fallback_end_tags + * + * @param string $tag_name Formatting tag name with no open element. + */ + public function test_in_body_adoption_agency_removes_inactive_formatting_element_and_continues( string $tag_name ): void { + $processor = WP_HTML_Processor::create_fragment( "

<{$tag_name}>

" ); + + $this->assertTrue( $processor->next_tag( $tag_name ), "Failed to find the {$tag_name} opener before it is popped by the P closer." ); + $this->assertTrue( $processor->next_tag( 'SPAN' ), "Failed to advance past the inactive {$tag_name} closer to the following SPAN opener." ); + $this->assertSame( array( 'HTML', 'BODY', 'SPAN' ), $processor->get_breadcrumbs(), "Expected SPAN to be a BODY child after the inactive {$tag_name} closer." ); + } + /** * Data provider. *