Skip to content

Commit ed0ca22

Browse files
author
Damjan Jovanovic
committed
Add support for trailing text after the closing quote, for Excel compatibility.
1 parent 86b2bfa commit ed0ca22

3 files changed

Lines changed: 64 additions & 11 deletions

File tree

src/main/java/org/apache/commons/csv/CSVFormat.java

Lines changed: 41 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,8 @@ public static Builder create(final CSVFormat csvFormat) {
208208

209209
private boolean allowMissingColumnNames;
210210

211+
private boolean allowTrailingText;
212+
211213
private boolean autoFlush;
212214

213215
private Character commentMarker;
@@ -264,6 +266,7 @@ private Builder(final CSVFormat csvFormat) {
264266
this.autoFlush = csvFormat.autoFlush;
265267
this.quotedNullString = csvFormat.quotedNullString;
266268
this.duplicateHeaderMode = csvFormat.duplicateHeaderMode;
269+
this.allowTrailingText = csvFormat.allowTrailingText;
267270
}
268271

269272
/**
@@ -301,6 +304,20 @@ public Builder setAllowMissingColumnNames(final boolean allowMissingColumnNames)
301304
return this;
302305
}
303306

307+
/**
308+
* Sets whether to allow trailing text in a quoted field, after the closing quote.
309+
*
310+
* @param allowTrailingText the trailing text behavior, {@code true} to append that text to the field contents, {@code false} to throw
311+
* an {@link IOException}.
312+
*
313+
* @return This instance.
314+
* @since 1.10.0
315+
*/
316+
public Builder setAllowTrailingText(final boolean allowTrailingText) {
317+
this.allowTrailingText = allowTrailingText;
318+
return this;
319+
}
320+
304321
/**
305322
* Sets whether to flush on close.
306323
*
@@ -810,7 +827,7 @@ public CSVFormat getFormat() {
810827
* @see Predefined#Default
811828
*/
812829
public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF, null, null, null, false, false, false,
813-
false, false, false, DuplicateHeaderMode.ALLOW_ALL);
830+
false, false, false, DuplicateHeaderMode.ALLOW_ALL, false);
814831

815832
/**
816833
* Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale dependent, it might be necessary
@@ -834,6 +851,7 @@ public CSVFormat getFormat() {
834851
* <li>{@code setIgnoreEmptyLines(false)}</li>
835852
* <li>{@code setAllowMissingColumnNames(true)}</li>
836853
* <li>{@code setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}</li>
854+
* <li>{@code setAllowTrailingText(true)}</li>
837855
* </ul>
838856
* <p>
839857
* Note: This is currently like {@link #RFC4180} plus {@link Builder#setAllowMissingColumnNames(boolean) Builder#setAllowMissingColumnNames(true)} and
@@ -846,6 +864,7 @@ public CSVFormat getFormat() {
846864
public static final CSVFormat EXCEL = DEFAULT.builder()
847865
.setIgnoreEmptyLines(false)
848866
.setAllowMissingColumnNames(true)
867+
.setAllowTrailingText(true)
849868
.build();
850869
// @formatter:on
851870

@@ -1268,7 +1287,7 @@ private static boolean isTrimChar(final CharSequence charSequence, final int pos
12681287
*/
12691288
public static CSVFormat newFormat(final char delimiter) {
12701289
return new CSVFormat(String.valueOf(delimiter), null, null, null, null, false, false, null, null, null, null, false, false, false, false, false, false,
1271-
DuplicateHeaderMode.ALLOW_ALL);
1290+
DuplicateHeaderMode.ALLOW_ALL, false);
12721291
}
12731292

12741293
static String[] toStringArray(final Object[] values) {
@@ -1312,6 +1331,8 @@ public static CSVFormat valueOf(final String format) {
13121331

13131332
private final boolean allowMissingColumnNames;
13141333

1334+
private final boolean allowTrailingText;
1335+
13151336
private final boolean autoFlush;
13161337

13171338
private final Character commentMarker; // null if commenting is disabled
@@ -1366,6 +1387,7 @@ private CSVFormat(final Builder builder) {
13661387
this.autoFlush = builder.autoFlush;
13671388
this.quotedNullString = builder.quotedNullString;
13681389
this.duplicateHeaderMode = builder.duplicateHeaderMode;
1390+
this.allowTrailingText = builder.allowTrailingText;
13691391
validate();
13701392
}
13711393

@@ -1396,7 +1418,7 @@ private CSVFormat(final String delimiter, final Character quoteChar, final Quote
13961418
final boolean ignoreSurroundingSpaces, final boolean ignoreEmptyLines, final String recordSeparator, final String nullString,
13971419
final Object[] headerComments, final String[] header, final boolean skipHeaderRecord, final boolean allowMissingColumnNames,
13981420
final boolean ignoreHeaderCase, final boolean trim, final boolean trailingDelimiter, final boolean autoFlush,
1399-
final DuplicateHeaderMode duplicateHeaderMode) {
1421+
final DuplicateHeaderMode duplicateHeaderMode, final boolean allowTrailingText) {
14001422
this.delimiter = delimiter;
14011423
this.quoteCharacter = quoteChar;
14021424
this.quoteMode = quoteMode;
@@ -1416,6 +1438,7 @@ private CSVFormat(final String delimiter, final Character quoteChar, final Quote
14161438
this.autoFlush = autoFlush;
14171439
this.quotedNullString = quoteCharacter + nullString + quoteCharacter;
14181440
this.duplicateHeaderMode = duplicateHeaderMode;
1441+
this.allowTrailingText = allowTrailingText;
14191442
validate();
14201443
}
14211444

@@ -1469,7 +1492,8 @@ public boolean equals(final Object obj) {
14691492
ignoreHeaderCase == other.ignoreHeaderCase && ignoreSurroundingSpaces == other.ignoreSurroundingSpaces &&
14701493
Objects.equals(nullString, other.nullString) && Objects.equals(quoteCharacter, other.quoteCharacter) && quoteMode == other.quoteMode &&
14711494
Objects.equals(quotedNullString, other.quotedNullString) && Objects.equals(recordSeparator, other.recordSeparator) &&
1472-
skipHeaderRecord == other.skipHeaderRecord && trailingDelimiter == other.trailingDelimiter && trim == other.trim;
1495+
skipHeaderRecord == other.skipHeaderRecord && trailingDelimiter == other.trailingDelimiter && trim == other.trim &&
1496+
allowTrailingText == other.allowTrailingText;
14731497
}
14741498

14751499
/**
@@ -1512,6 +1536,16 @@ public boolean getAllowMissingColumnNames() {
15121536
return allowMissingColumnNames;
15131537
}
15141538

1539+
/**
1540+
* Gets whether quoted fields allow trailing text after the closing quote.
1541+
*
1542+
* @return {@code true} if allowed, {@code false} to throw an {@link IOException}.
1543+
* @since 1.10.0
1544+
*/
1545+
public boolean getAllowTrailingText() {
1546+
return allowTrailingText;
1547+
}
1548+
15151549
/**
15161550
* Gets whether to flush on close.
15171551
*
@@ -1692,9 +1726,9 @@ public int hashCode() {
16921726
int result = 1;
16931727
result = prime * result + Arrays.hashCode(headers);
16941728
result = prime * result + Arrays.hashCode(headerComments);
1695-
return prime * result + Objects.hash(duplicateHeaderMode, allowMissingColumnNames, autoFlush, commentMarker, delimiter, escapeCharacter,
1696-
ignoreEmptyLines, ignoreHeaderCase, ignoreSurroundingSpaces, nullString, quoteCharacter, quoteMode, quotedNullString, recordSeparator,
1697-
skipHeaderRecord, trailingDelimiter, trim);
1729+
return prime * result + Objects.hash(duplicateHeaderMode, allowMissingColumnNames, allowTrailingText, autoFlush, commentMarker, delimiter,
1730+
escapeCharacter, ignoreEmptyLines, ignoreHeaderCase, ignoreSurroundingSpaces, nullString, quoteCharacter, quoteMode, quotedNullString,
1731+
recordSeparator, skipHeaderRecord, trailingDelimiter, trim);
16981732
}
16991733

17001734
/**

src/main/java/org/apache/commons/csv/Lexer.java

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ final class Lexer implements Closeable {
5757

5858
private final boolean ignoreSurroundingSpaces;
5959
private final boolean ignoreEmptyLines;
60+
private final boolean allowTrailingText;
6061

6162
/** The input stream */
6263
private final ExtendedBufferedReader reader;
@@ -72,6 +73,7 @@ final class Lexer implements Closeable {
7273
this.commentStart = mapNullToDisabled(format.getCommentMarker());
7374
this.ignoreSurroundingSpaces = format.getIgnoreSurroundingSpaces();
7475
this.ignoreEmptyLines = format.getIgnoreEmptyLines();
76+
this.allowTrailingText = format.getAllowTrailingText();
7577
this.delimiterBuf = new char[delimiter.length - 1];
7678
this.escapeDelimiterBuf = new char[2 * delimiter.length - 1];
7779
}
@@ -364,10 +366,14 @@ private Token parseEncapsulatedToken(final Token token) throws IOException {
364366
token.type = EORECORD;
365367
return token;
366368
}
367-
if (!Character.isWhitespace((char)c)) {
368-
// error invalid char between token and next delimiter
369-
throw new IOException("(line " + getCurrentLineNumber() +
370-
") invalid char between encapsulated token and delimiter");
369+
if (allowTrailingText) {
370+
token.content.append((char) c);
371+
} else {
372+
if (!Character.isWhitespace((char)c)) {
373+
// error invalid char between token and next delimiter
374+
throw new IOException("(line " + getCurrentLineNumber() +
375+
") invalid char between encapsulated token and delimiter");
376+
}
371377
}
372378
}
373379
}

src/test/java/org/apache/commons/csv/LexerTest.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -431,4 +431,17 @@ public void testTrimTrailingSpacesZeroLength() throws Exception {
431431
lexer.trimTrailingSpaces(buffer);
432432
assertThat(lexer.nextToken(new Token()), matches(EOF, ""));
433433
}
434+
435+
@Test
436+
public void testTrailingTextAfterQuote() throws Exception {
437+
final String code = "\"a\" b,\"a\" \" b,\"a\" b \"\"";
438+
try (final Lexer parser = createLexer(code, CSVFormat.Builder.create().setAllowTrailingText(true).build())) {
439+
assertThat(parser.nextToken(new Token()), matches(TOKEN, "a b"));
440+
assertThat(parser.nextToken(new Token()), matches(TOKEN, "a \" b"));
441+
assertThat(parser.nextToken(new Token()), matches(EOF, "a b \"\""));
442+
}
443+
try (final Lexer parser = createLexer(code, CSVFormat.Builder.create().setAllowTrailingText(false).build())) {
444+
assertThrows(IOException.class, () -> lexer.nextToken(new Token()));
445+
}
446+
}
434447
}

0 commit comments

Comments
 (0)