Skip to content

Commit e714ef1

Browse files
committed
Improved newline and whitespace normalization
Fixes #1787 In TextNode, if this is a blank, check the next Element if it will indent. If so, can skip. Previously would check if the parent el would indent, which is wrong for inline elements. Also made empty tags (like <img>) not indent, but inline. And fixed up how whitespace is normalized at the end of an element, and after the body tag.
1 parent 7c21b2e commit e714ef1

File tree

13 files changed

+97
-52
lines changed

13 files changed

+97
-52
lines changed

CHANGES

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,11 @@ jsoup changelog
2121
what should have been preformatted output to instead be a run of text.
2222
<https://github.com/jhy/jsoup/issues/1776>
2323

24+
* Bugfix: when pretty-print serializing HTML, newlines separating phrasing content (e.g. a <span> tag within a <p> tag
25+
would be incorrectly skipped, instead of normalized to a space. Additionally, improved space normalization between
26+
other end of line occurences, and whitespace handling after a closing </body>
27+
<https://github.com/jhy/jsoup/issues/1787>
28+
2429
*** Release 1.15.1 [2022-May-15]
2530
* Change: removed previously deprecated methods and classes (including org.jsoup.safety.Whitelist; use
2631
org.jsoup.safety.Safelist instead).

src/main/java/org/jsoup/nodes/Element.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1842,7 +1842,6 @@ private boolean isFormatAsBlock(Document.OutputSettings out) {
18421842

18431843
private boolean isInlineable(Document.OutputSettings out) {
18441844
return tag().isInline()
1845-
&& !tag().isEmpty()
18461845
&& (parent() == null || parent().isBlock())
18471846
&& previousSibling() != null
18481847
&& !out.outline();

src/main/java/org/jsoup/nodes/TextNode.java

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
package org.jsoup.nodes;
22

3-
import org.jsoup.internal.StringUtil;
43
import org.jsoup.helper.Validate;
4+
import org.jsoup.internal.StringUtil;
55

66
import java.io.IOException;
77

@@ -80,17 +80,30 @@ public TextNode splitText(int offset) {
8080
return tailNode;
8181
}
8282

83-
void outerHtmlHead(Appendable accum, int depth, Document.OutputSettings out) throws IOException {
83+
void outerHtmlHead(Appendable accum, int depth, Document.OutputSettings out) throws IOException {
8484
final boolean prettyPrint = out.prettyPrint();
8585
final Element parent = parentNode instanceof Element ? ((Element) parentNode) : null;
86-
final boolean parentIndent = parent != null && parent.shouldIndent(out);
8786
final boolean blank = isBlank();
8887
final boolean normaliseWhite = prettyPrint && !Element.preserveWhitespace(parentNode);
8988

90-
if (normaliseWhite && parentIndent && StringUtil.startsWithNewline(coreValue()) && blank) // we are skippable whitespace
91-
return;
92-
93-
if (prettyPrint && ((siblingIndex == 0 && parent != null && parent.tag().formatAsBlock() && !blank) || (out.outline() && siblingNodes().size()>0 && !blank) ))
89+
// if this text is just whitespace, and the next node will cause an indent, skip this text:
90+
if (normaliseWhite && blank) {
91+
boolean canSkip = false;
92+
Node next = this.nextSibling();
93+
if (next instanceof Element) {
94+
Element nextEl = (Element) next;
95+
canSkip = nextEl.shouldIndent(out);
96+
} else if (next == null && parent != null) { // we are the last child, check parent
97+
canSkip = parent.shouldIndent(out);
98+
} else if (next instanceof TextNode && (((TextNode) next).isBlank())) {
99+
// sometimes get a run of textnodes from parser if nodes are re-parented
100+
canSkip = true;
101+
}
102+
if (canSkip)
103+
return;
104+
}
105+
106+
if (prettyPrint && ((siblingIndex == 0 && parent != null && parent.tag().formatAsBlock() && !blank) || (out.outline() && siblingNodes().size() > 0 && !blank)))
94107
indent(accum, depth, out);
95108

96109
final boolean stripWhite = prettyPrint && parentNode instanceof Document;

src/main/java/org/jsoup/parser/HtmlTreeBuilder.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -558,6 +558,14 @@ boolean resetInsertionMode() {
558558
return state != origState;
559559
}
560560

561+
/** Places the body back onto the stack and moves to InBody, for cases in AfterBody / AfterAfterBody when more content comes */
562+
void resetBody() {
563+
if (!onStack("body")) {
564+
stack.add(doc.body());
565+
}
566+
transition(HtmlTreeBuilderState.InBody);
567+
}
568+
561569
// todo: tidy up in specific scope methods
562570
private String[] specificScopeTarget = {null};
563571

src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -718,6 +718,7 @@ private boolean inBodyEndTag(Token t, HtmlTreeBuilder tb) {
718718
return false;
719719
} else {
720720
// todo: error if stack contains something not dd, dt, li, optgroup, option, p, rp, rt, tbody, td, tfoot, th, thead, tr, body, html
721+
anyOtherEndTag(t, tb);
721722
tb.transition(AfterBody);
722723
}
723724
break;
@@ -1597,13 +1598,14 @@ boolean process(Token t, HtmlTreeBuilder tb) {
15971598
tb.error(this);
15981599
return false;
15991600
} else {
1601+
if (tb.onStack("html")) tb.popStackToClose("html");
16001602
tb.transition(AfterAfterBody);
16011603
}
16021604
} else if (t.isEOF()) {
16031605
// chillax! we're done
16041606
} else {
16051607
tb.error(this);
1606-
tb.transition(InBody);
1608+
tb.resetBody();
16071609
return tb.process(t);
16081610
}
16091611
return true;
@@ -1688,21 +1690,12 @@ boolean process(Token t, HtmlTreeBuilder tb) {
16881690
} else if (t.isDoctype() || (t.isStartTag() && t.asStartTag().normalName().equals("html"))) {
16891691
return tb.process(t, InBody);
16901692
} else if (isWhitespace(t)) {
1691-
// allows space after </html>, and put the body back on stack to allow subsequent tags if any
1692-
// todo - might be better for </body> and </html> to close them, allow trailing space, and then reparent
1693-
// that space into body if other tags get re-added. but that's overkill for now
1694-
Element html = tb.popStackToClose("html");
16951693
tb.insert(t.asCharacter());
1696-
if (html != null) {
1697-
tb.stack.add(html);
1698-
Element body = html.selectFirst("body");
1699-
if (body != null) tb.stack.add(body);
1700-
}
17011694
}else if (t.isEOF()) {
17021695
// nice work chuck
17031696
} else {
17041697
tb.error(this);
1705-
tb.transition(InBody);
1698+
tb.resetBody();
17061699
return tb.process(t);
17071700
}
17081701
return true;

src/test/java/org/jsoup/nodes/DocumentTest.java

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,11 +109,17 @@ public class DocumentTest {
109109
Document doc = Jsoup.parse("<title>Hello</title> <p>One<p>Two");
110110
Document clone = doc.clone();
111111

112-
assertEquals("<html><head><title>Hello</title> </head><body><p>One</p><p>Two</p></body></html>", TextUtil.stripNewlines(clone.html()));
112+
assertEquals("<html><head><title>Hello</title></head><body><p>One</p><p>Two</p></body></html>", TextUtil.stripNewlines(clone.html()));
113113
clone.title("Hello there");
114-
clone.select("p").first().text("One more").attr("id", "1");
115-
assertEquals("<html><head><title>Hello there</title> </head><body><p id=\"1\">One more</p><p>Two</p></body></html>", TextUtil.stripNewlines(clone.html()));
116-
assertEquals("<html><head><title>Hello</title> </head><body><p>One</p><p>Two</p></body></html>", TextUtil.stripNewlines(doc.html()));
114+
clone.expectFirst("p").text("One more").attr("id", "1");
115+
assertEquals("<html><head><title>Hello there</title></head><body><p id=\"1\">One more</p><p>Two</p></body></html>", TextUtil.stripNewlines(clone.html()));
116+
assertEquals("<html><head><title>Hello</title></head><body><p>One</p><p>Two</p></body></html>", TextUtil.stripNewlines(doc.html()));
117+
}
118+
119+
@Test void testBasicIndent() {
120+
Document doc = Jsoup.parse("<title>Hello</title> <p>One<p>Two");
121+
String expect = "<html>\n <head>\n <title>Hello</title>\n </head>\n <body>\n <p>One</p>\n <p>Two</p>\n </body>\n</html>";
122+
assertEquals(expect, doc.html());
117123
}
118124

119125
@Test public void testClonesDeclarations() {

src/test/java/org/jsoup/nodes/ElementTest.java

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -536,7 +536,7 @@ public void testContainerOutput() {
536536
Document doc = Jsoup.parse("<title>Hello there</title> <div><p>Hello</p><p>there</p></div> <div>Another</div>");
537537
assertEquals("<title>Hello there</title>", doc.select("title").first().outerHtml());
538538
assertEquals("<div>\n <p>Hello</p>\n <p>there</p>\n</div>", doc.select("div").first().outerHtml());
539-
assertEquals("<div>\n <p>Hello</p>\n <p>there</p>\n</div> \n<div>\n Another\n</div>", doc.select("body").first().html());
539+
assertEquals("<div>\n <p>Hello</p>\n <p>there</p>\n</div>\n<div>\n Another\n</div>", doc.select("body").first().html());
540540
}
541541

542542
@Test
@@ -2269,4 +2269,19 @@ void prettySerializationRoundTrips(Document.OutputSettings settings) {
22692269
}
22702270
assertTrue(threw);
22712271
}
2272+
2273+
@Test void spanRunsMaintainSpace() {
2274+
// https://github.com/jhy/jsoup/issues/1787
2275+
Document doc = Jsoup.parse("<p><span>One</span>\n<span>Two</span>\n<span>Three</span></p>");
2276+
String text = "One Two Three";
2277+
Element body = doc.body();
2278+
assertEquals(text, body.text());
2279+
2280+
Element p = doc.expectFirst("p");
2281+
String html = p.html();
2282+
p.html(html);
2283+
assertEquals(text, body.text());
2284+
2285+
assertEquals("<p><span>One</span> <span>Two</span> <span>Three</span></p>", body.html());
2286+
}
22722287
}

src/test/java/org/jsoup/parser/HtmlParserTest.java

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -273,13 +273,13 @@ public class HtmlParserTest {
273273

274274
@Test public void handlesNestedImplicitTable() {
275275
Document doc = Jsoup.parse("<table><td>1</td></tr> <td>2</td></tr> <td> <table><td>3</td> <td>4</td></table> <tr><td>5</table>");
276-
assertEquals("<table><tbody><tr><td>1</td></tr> <tr><td>2</td></tr> <tr><td> <table><tbody><tr><td>3</td> <td>4</td></tr></tbody></table> </td></tr><tr><td>5</td></tr></tbody></table>", TextUtil.stripNewlines(doc.body().html()));
276+
assertEquals("<table><tbody><tr><td>1</td></tr><tr><td>2</td></tr><tr><td><table><tbody><tr><td>3</td><td>4</td></tr></tbody></table></td></tr><tr><td>5</td></tr></tbody></table>", TextUtil.stripNewlines(doc.body().html()));
277277
}
278278

279279
@Test public void handlesWhatWgExpensesTableExample() {
280280
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tabular-data.html#examples-0
281281
Document doc = Jsoup.parse("<table> <colgroup> <col> <colgroup> <col> <col> <col> <thead> <tr> <th> <th>2008 <th>2007 <th>2006 <tbody> <tr> <th scope=rowgroup> Research and development <td> $ 1,109 <td> $ 782 <td> $ 712 <tr> <th scope=row> Percentage of net sales <td> 3.4% <td> 3.3% <td> 3.7% <tbody> <tr> <th scope=rowgroup> Selling, general, and administrative <td> $ 3,761 <td> $ 2,963 <td> $ 2,433 <tr> <th scope=row> Percentage of net sales <td> 11.6% <td> 12.3% <td> 12.6% </table>");
282-
assertEquals("<table> <colgroup> <col> </colgroup><colgroup> <col> <col> <col> </colgroup><thead> <tr> <th> </th><th>2008 </th><th>2007 </th><th>2006 </th></tr></thead><tbody> <tr> <th scope=\"rowgroup\"> Research and development </th><td> $ 1,109 </td><td> $ 782 </td><td> $ 712 </td></tr><tr> <th scope=\"row\"> Percentage of net sales </th><td> 3.4% </td><td> 3.3% </td><td> 3.7% </td></tr></tbody><tbody> <tr> <th scope=\"rowgroup\"> Selling, general, and administrative </th><td> $ 3,761 </td><td> $ 2,963 </td><td> $ 2,433 </td></tr><tr> <th scope=\"row\"> Percentage of net sales </th><td> 11.6% </td><td> 12.3% </td><td> 12.6% </td></tr></tbody></table>", TextUtil.stripNewlines(doc.body().html()));
282+
assertEquals("<table><colgroup><col></colgroup><colgroup><col><col><col></colgroup><thead><tr><th></th><th>2008 </th><th>2007 </th><th>2006 </th></tr></thead><tbody><tr><th scope=\"rowgroup\"> Research and development </th><td> $ 1,109 </td><td> $ 782 </td><td> $ 712 </td></tr><tr><th scope=\"row\"> Percentage of net sales </th><td> 3.4% </td><td> 3.3% </td><td> 3.7% </td></tr></tbody><tbody><tr><th scope=\"rowgroup\"> Selling, general, and administrative </th><td> $ 3,761 </td><td> $ 2,963 </td><td> $ 2,433 </td></tr><tr><th scope=\"row\"> Percentage of net sales </th><td> 11.6% </td><td> 12.3% </td><td> 12.6% </td></tr></tbody></table>", TextUtil.stripNewlines(doc.body().html()));
283283
}
284284

285285
@Test public void handlesTbodyTable() {
@@ -294,7 +294,7 @@ public class HtmlParserTest {
294294

295295
@Test public void noTableDirectInTable() {
296296
Document doc = Jsoup.parse("<table> <td>One <td><table><td>Two</table> <table><td>Three");
297-
assertEquals("<table> <tbody><tr><td>One </td><td><table><tbody><tr><td>Two</td></tr></tbody></table> <table><tbody><tr><td>Three</td></tr></tbody></table></td></tr></tbody></table>",
297+
assertEquals("<table><tbody><tr><td>One </td><td><table><tbody><tr><td>Two</td></tr></tbody></table><table><tbody><tr><td>Three</td></tr></tbody></table></td></tr></tbody></table>",
298298
TextUtil.stripNewlines(doc.body().html()));
299299
}
300300

@@ -472,7 +472,7 @@ public class HtmlParserTest {
472472
// if a known tag, allow self closing outside of spec, but force an end tag. unknown tags can be self closing.
473473
String h = "<div id='1' /><script src='/foo' /><div id=2><img /><img></div><a id=3 /><i /><foo /><foo>One</foo> <hr /> hr text <hr> hr text two";
474474
Document doc = Jsoup.parse(h);
475-
assertEquals("<div id=\"1\"></div><script src=\"/foo\"></script><div id=\"2\"><img><img></div><a id=\"3\"></a><i></i><foo /><foo>One</foo> <hr> hr text <hr> hr text two", TextUtil.stripNewlines(doc.body().html()));
475+
assertEquals("<div id=\"1\"></div><script src=\"/foo\"></script><div id=\"2\"><img><img></div><a id=\"3\"></a><i></i><foo /><foo>One</foo><hr> hr text <hr> hr text two", TextUtil.stripNewlines(doc.body().html()));
476476
}
477477

478478
@Test public void handlesKnownEmptyNoFrames() {
@@ -599,7 +599,7 @@ public class HtmlParserTest {
599599
@Test public void testHgroup() {
600600
// jsoup used to not allow hgroup in h{n}, but that's not in spec, and browsers are OK
601601
Document doc = Jsoup.parse("<h1>Hello <h2>There <hgroup><h1>Another<h2>headline</hgroup> <hgroup><h1>More</h1><p>stuff</p></hgroup>");
602-
assertEquals("<h1>Hello </h1><h2>There <hgroup><h1>Another</h1><h2>headline</h2></hgroup> <hgroup><h1>More</h1><p>stuff</p></hgroup></h2>", TextUtil.stripNewlines(doc.body().html()));
602+
assertEquals("<h1>Hello </h1><h2>There <hgroup><h1>Another</h1><h2>headline</h2></hgroup><hgroup><h1>More</h1><p>stuff</p></hgroup></h2>", TextUtil.stripNewlines(doc.body().html()));
603603
}
604604

605605
@Test public void testRelaxedTags() {
@@ -611,7 +611,7 @@ public class HtmlParserTest {
611611
// h* tags (h1 .. h9) in browsers can handle any internal content other than other h*. which is not per any
612612
// spec, which defines them as containing phrasing content only. so, reality over theory.
613613
Document doc = Jsoup.parse("<h1>Hello <div>There</div> now</h1> <h2>More <h3>Content</h3></h2>");
614-
assertEquals("<h1>Hello <div>There</div> now</h1> <h2>More </h2><h3>Content</h3>", TextUtil.stripNewlines(doc.body().html()));
614+
assertEquals("<h1>Hello <div>There</div> now</h1><h2>More </h2><h3>Content</h3>", TextUtil.stripNewlines(doc.body().html()));
615615
}
616616

617617
@Test public void testSpanContents() {
@@ -720,7 +720,7 @@ public class HtmlParserTest {
720720
// and the <i> inside the table and does not leak out.
721721
String h = "<p><b>One</p> <table><tr><td><p><i>Three<p>Four</i></td></tr></table> <p>Five</p>";
722722
Document doc = Jsoup.parse(h);
723-
String want = "<p><b>One</b></p><b> \n" +
723+
String want = "<p><b>One</b></p><b>\n" +
724724
" <table>\n" +
725725
" <tbody>\n" +
726726
" <tr>\n" +
@@ -1246,7 +1246,7 @@ public void testInvalidTableContents() throws IOException {
12461246
File in = ParseTest.getFile("/htmltests/comments.html");
12471247
Document doc = Jsoup.parse(in, "UTF-8");
12481248

1249-
assertEquals("<!--?xml version=\"1.0\" encoding=\"utf-8\"?--><!-- so --><!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\"><!-- what --> <html xml:lang=\"en\" lang=\"en\" xmlns=\"http://www.w3.org/1999/xhtml\"><!-- now --> <head><!-- then --> <meta http-equiv=\"Content-type\" content=\"text/html; charset=utf-8\"> <title>A Certain Kind of Test</title> </head> <body> <h1>Hello</h1>h1&gt; (There is a UTF8 hidden BOM at the top of this file.) </body> </html>",
1249+
assertEquals("<!--?xml version=\"1.0\" encoding=\"utf-8\"?--><!-- so --><!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\"><!-- what --> <html xml:lang=\"en\" lang=\"en\" xmlns=\"http://www.w3.org/1999/xhtml\"> <!-- now --> <head> <!-- then --> <meta http-equiv=\"Content-type\" content=\"text/html; charset=utf-8\"> <title>A Certain Kind of Test</title> </head> <body> <h1>Hello</h1>h1&gt; (There is a UTF8 hidden BOM at the top of this file.) </body> </html>",
12501250
StringUtil.normaliseWhitespace(doc.html()));
12511251

12521252
assertEquals("A Certain Kind of Test", doc.head().select("title").text());
@@ -1399,15 +1399,14 @@ public void testUNewlines() {
13991399
String html = "\n<!doctype html>\n<html>\n<head>\n<title>Hello</title>\n</head>\n<body>\n<p>One</p>\n</body>\n</html>\n";
14001400
Document doc = Jsoup.parse(html);
14011401
doc.outputSettings().prettyPrint(false);
1402-
assertEquals("<!doctype html>\n<html>\n<head>\n<title>Hello</title>\n</head>\n<body>\n<p>One</p>\n\n</body></html>\n", doc.outerHtml());
1402+
assertEquals("<!doctype html>\n<html>\n<head>\n<title>Hello</title>\n</head>\n<body>\n<p>One</p>\n</body>\n</html>\n", doc.outerHtml());
14031403
}
14041404

14051405
@Test public void handleContentAfterBody() {
14061406
String html = "<body>One</body> <p>Hello!</p></html> <p>There</p>";
1407-
// todo - ideally would move that space afer /html to the body when the There <p> is seen
14081407
Document doc = Jsoup.parse(html);
14091408
doc.outputSettings().prettyPrint(false);
1410-
assertEquals("<html><head></head><body>One <p>Hello!</p><p>There</p></body></html> ", doc.outerHtml());
1409+
assertEquals("<html><head></head><body>One<p>Hello!</p><p>There</p></body> </html> ", doc.outerHtml());
14111410
}
14121411

14131412
@Test public void preservesTabs() {
@@ -1487,7 +1486,7 @@ private boolean didAddElements(String input) {
14871486
String html = "<a>\n<b>\n<div>\n<a>test</a>\n</div>\n</b>\n</a>";
14881487
Document doc = Jsoup.parse(html);
14891488
assertNotNull(doc);
1490-
assertEquals("<a><b> </b></a><b><div><a></a><a>test</a></div> </b>", TextUtil.stripNewlines(doc.body().html()));
1489+
assertEquals("<a> <b> </b></a><b><div><a></a><a>test</a></div> </b>", TextUtil.stripNewlines(doc.body().html()));
14911490
}
14921491

14931492
@Test public void tagsMustStartWithAscii() {

src/test/java/org/jsoup/parser/HtmlTreeBuilderStateTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ public void nestedAnchorElements01() {
7575
String s = Jsoup.parse(html).toString();
7676
assertEquals("<html>\n" +
7777
" <head></head>\n" +
78-
" <body><a href=\"#1\"> </a>\n" +
78+
" <body> <a href=\"#1\"> </a>\n" +
7979
" <div>\n" +
8080
" <a href=\"#1\"></a><a href=\"#2\">child</a>\n" +
8181
" </div>\n" +
@@ -99,7 +99,7 @@ public void nestedAnchorElements02() {
9999
String s = Jsoup.parse(html).toString();
100100
assertEquals("<html>\n" +
101101
" <head></head>\n" +
102-
" <body><a href=\"#1\"> </a>\n" +
102+
" <body> <a href=\"#1\"> </a>\n" +
103103
" <div>\n" +
104104
" <a href=\"#1\"></a>\n" +
105105
" <div>\n" +

src/test/java/org/jsoup/parser/XmlTreeBuilderTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ public void testSupplyParserToDataStream() throws IOException, URISyntaxExceptio
9494
public void testDoesNotForceSelfClosingKnownTags() {
9595
// html will force "<br>one</br>" to logically "<br />One<br />". XML should be stay "<br>one</br> -- don't recognise tag.
9696
Document htmlDoc = Jsoup.parse("<br>one</br>");
97-
assertEquals("<br>one\n<br>", htmlDoc.body().html());
97+
assertEquals("<br>one<br>", htmlDoc.body().html());
9898

9999
Document xmlDoc = Jsoup.parse("<br>one</br>", "", Parser.xmlParser());
100100
assertEquals("<br>one</br>", xmlDoc.html());

0 commit comments

Comments
 (0)