Skip to content

Commit b317bb8

Browse files
Handle </p> and </br> in foreign contexts
The HTML spec changed to handle `</p>` and `</br>` in foreign context the same way `<p>` and `<br>` are. The fragment case is handled identically to the nonfragment case now.
1 parent af4e4b2 commit b317bb8

2 files changed

Lines changed: 31 additions & 13 deletions

File tree

gumbo-parser/src/parser.c

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4418,6 +4418,7 @@ static void handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
44184418
|| token_has_attribute(token, "size")
44194419
)
44204420
)
4421+
|| tag_in(token, kEndTag, &(const TagSet) { TAG(BR), TAG(P) })
44214422
) {
44224423
/* Parse error */
44234424
parser_add_parse_error(parser, token);
@@ -4427,20 +4428,17 @@ static void handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
44274428
* fragment parsing algorithm, then act as described in the "any other
44284429
* start tag" entry below.
44294430
*/
4430-
if (!is_fragment_parser(parser)) {
4431-
do {
4432-
pop_current_node(parser);
4433-
} while (
4434-
!(
4435-
is_mathml_integration_point(get_current_node(parser))
4436-
|| is_html_integration_point(get_current_node(parser))
4437-
|| get_current_node(parser)->v.element.tag_namespace == GUMBO_NAMESPACE_HTML
4438-
)
4439-
);
4440-
parser->_parser_state->_reprocess_current_token = true;
4441-
return;
4431+
while (
4432+
!(
4433+
is_mathml_integration_point(get_current_node(parser))
4434+
|| is_html_integration_point(get_current_node(parser))
4435+
|| get_current_node(parser)->v.element.tag_namespace == GUMBO_NAMESPACE_HTML
4436+
)
4437+
) {
4438+
pop_current_node(parser);
44424439
}
4443-
// This is a start tag so the next if's then branch will be taken.
4440+
handle_in_body(parser, token);
4441+
return;
44444442
}
44454443

44464444
if (token->type == GUMBO_TOKEN_START_TAG) {

gumbo-parser/test/parser.cc

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2220,4 +2220,24 @@ TEST_F(GumboParserTest, FragmentWithoutForm) {
22202220
EXPECT_EQ(0, GetChildCount(span));
22212221
}
22222222

2223+
TEST_F(GumboParserTest, ForeignFragment) {
2224+
ParseFragment("</p><foo>", "svg", GUMBO_NAMESPACE_SVG);
2225+
EXPECT_EQ(1, GetChildCount(root_));
2226+
GumboNode* html = GetChild(root_, 0);
2227+
ASSERT_EQ(GUMBO_NODE_ELEMENT, html->type);
2228+
EXPECT_EQ(GUMBO_TAG_HTML, html->v.element.tag);
2229+
EXPECT_EQ(2, GetChildCount(html));
2230+
2231+
ASSERT_EQ(2, GetChildCount(html));
2232+
GumboNode* p = GetChild(html, 0);
2233+
ASSERT_EQ(GUMBO_NODE_ELEMENT, p->type);
2234+
ASSERT_EQ(GUMBO_TAG_P, p->v.element.tag);
2235+
ASSERT_EQ(GUMBO_NAMESPACE_HTML, p->v.element.tag_namespace);
2236+
2237+
GumboNode* foo = GetChild(html, 1);
2238+
ASSERT_EQ(GUMBO_NODE_ELEMENT, foo->type);
2239+
ASSERT_EQ(std::string("foo"), foo->v.element.name);
2240+
ASSERT_EQ(GUMBO_NAMESPACE_SVG, foo->v.element.tag_namespace);
2241+
}
2242+
22232243
} // namespace

0 commit comments

Comments
 (0)