Skip to content

Commit c6ae6a7

Browse files
Merge pull request #10418 from ClickHouse/fix_logical_error_in_diagnostic_info
Better diagnostic info in input formats
2 parents 4c17542 + 53dcce5 commit c6ae6a7

13 files changed

+102
-43
lines changed

src/IO/readDecimalText.h

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -33,17 +33,14 @@ inline bool readDigits(ReadBuffer & buf, T & x, unsigned int & digits, int & exp
3333
return false;
3434
}
3535

36-
if (!buf.eof())
36+
switch (*buf.position())
3737
{
38-
switch (*buf.position())
39-
{
40-
case '-':
41-
sign = -1;
42-
[[fallthrough]];
43-
case '+':
44-
++buf.position();
45-
break;
46-
}
38+
case '-':
39+
sign = -1;
40+
[[fallthrough]];
41+
case '+':
42+
++buf.position();
43+
break;
4744
}
4845

4946
bool stop = false;

src/Processors/Formats/IRowInputFormat.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ namespace ErrorCodes
1818
extern const int CANNOT_PARSE_UUID;
1919
extern const int TOO_LARGE_STRING_SIZE;
2020
extern const int INCORRECT_NUMBER_OF_COLUMNS;
21+
extern const int ARGUMENT_OUT_OF_BOUND;
22+
extern const int INCORRECT_DATA;
2123
}
2224

2325

@@ -30,7 +32,9 @@ bool isParseError(int code)
3032
|| code == ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT
3133
|| code == ErrorCodes::CANNOT_PARSE_NUMBER
3234
|| code == ErrorCodes::CANNOT_PARSE_UUID
33-
|| code == ErrorCodes::TOO_LARGE_STRING_SIZE;
35+
|| code == ErrorCodes::TOO_LARGE_STRING_SIZE
36+
|| code == ErrorCodes::ARGUMENT_OUT_OF_BOUND /// For Decimals
37+
|| code == ErrorCodes::INCORRECT_DATA; /// For some ReadHelpers
3438
}
3539

3640

@@ -128,6 +132,10 @@ Chunk IRowInputFormat::generate()
128132
{
129133
verbose_diagnostic = getDiagnosticInfo();
130134
}
135+
catch (const Exception & exception)
136+
{
137+
verbose_diagnostic = "Cannot get verbose diagnostic: " + exception.message();
138+
}
131139
catch (...)
132140
{
133141
/// Error while trying to obtain verbose diagnostic. Ok to ignore.

src/Processors/Formats/Impl/CSVRowInputFormat.cpp

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,7 @@ bool CSVRowInputFormat::parseRowAndPrintDiagnosticInfo(MutableColumns & columns,
273273
return false;
274274
}
275275

276+
skipWhitespacesAndTabs(in);
276277
if (column_indexes_for_input_fields[file_column].has_value())
277278
{
278279
const auto & header = getPort().getHeader();
@@ -289,6 +290,7 @@ bool CSVRowInputFormat::parseRowAndPrintDiagnosticInfo(MutableColumns & columns,
289290
if (!deserializeFieldAndPrintDiagnosticInfo(skipped_column_str, skipped_column_type, *skipped_column, out, file_column))
290291
return false;
291292
}
293+
skipWhitespacesAndTabs(in);
292294

293295
/// Delimiters
294296
if (file_column + 1 == column_indexes_for_input_fields.size())
@@ -351,12 +353,8 @@ void CSVRowInputFormat::syncAfterError()
351353
skipToNextLineOrEOF(in);
352354
}
353355

354-
void CSVRowInputFormat::tryDeserializeFiled(const DataTypePtr & type, IColumn & column, size_t file_column,
355-
ReadBuffer::Position & prev_pos, ReadBuffer::Position & curr_pos)
356+
void CSVRowInputFormat::tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column)
356357
{
357-
skipWhitespacesAndTabs(in);
358-
prev_pos = in.position();
359-
360358
if (column_indexes_for_input_fields[file_column])
361359
{
362360
const bool is_last_file_column = file_column + 1 == column_indexes_for_input_fields.size();
@@ -367,9 +365,6 @@ void CSVRowInputFormat::tryDeserializeFiled(const DataTypePtr & type, IColumn &
367365
String tmp;
368366
readCSVString(tmp, in, format_settings.csv);
369367
}
370-
371-
curr_pos = in.position();
372-
skipWhitespacesAndTabs(in);
373368
}
374369

375370
bool CSVRowInputFormat::readField(IColumn & column, const DataTypePtr & type, bool is_last_file_column)

src/Processors/Formats/Impl/CSVRowInputFormat.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,10 @@ class CSVRowInputFormat : public RowInputFormatWithDiagnosticInfo
5555
void addInputColumn(const String & column_name);
5656

5757
bool parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) override;
58-
void tryDeserializeFiled(const DataTypePtr & type, IColumn & column, size_t file_column,
59-
ReadBuffer::Position & prev_pos, ReadBuffer::Position & curr_pos) override;
58+
void tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) override;
6059
bool isGarbageAfterField(size_t, ReadBuffer::Position pos) override
6160
{
62-
return *pos != '\n' && *pos != '\r' && *pos != format_settings.csv.delimiter;
61+
return *pos != '\n' && *pos != '\r' && *pos != format_settings.csv.delimiter && *pos != ' ' && *pos != '\t';
6362
}
6463

6564
bool readField(IColumn & column, const DataTypePtr & type, bool is_last_file_column);

src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -318,10 +318,8 @@ bool TabSeparatedRowInputFormat::parseRowAndPrintDiagnosticInfo(MutableColumns &
318318
return true;
319319
}
320320

321-
void TabSeparatedRowInputFormat::tryDeserializeFiled(const DataTypePtr & type, IColumn & column, size_t file_column,
322-
ReadBuffer::Position & prev_pos, ReadBuffer::Position & curr_pos)
321+
void TabSeparatedRowInputFormat::tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column)
323322
{
324-
prev_pos = in.position();
325323
if (column_indexes_for_input_fields[file_column])
326324
{
327325
const bool is_last_file_column = file_column + 1 == column_indexes_for_input_fields.size();
@@ -332,7 +330,6 @@ void TabSeparatedRowInputFormat::tryDeserializeFiled(const DataTypePtr & type, I
332330
NullSink null_sink;
333331
readEscapedStringInto(null_sink, in);
334332
}
335-
curr_pos = in.position();
336333
}
337334

338335
void TabSeparatedRowInputFormat::syncAfterError()

src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,7 @@ class TabSeparatedRowInputFormat : public RowInputFormatWithDiagnosticInfo
5050
void fillUnreadColumnsWithDefaults(MutableColumns & columns, RowReadExtension & row_read_extension);
5151

5252
bool parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) override;
53-
void tryDeserializeFiled(const DataTypePtr & type, IColumn & column, size_t file_column,
54-
ReadBuffer::Position & prev_pos, ReadBuffer::Position & curr_pos) override;
53+
void tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) override;
5554
bool isGarbageAfterField(size_t, ReadBuffer::Position pos) override { return *pos != '\n' && *pos != '\t'; }
5655
};
5756

src/Processors/Formats/Impl/TemplateRowInputFormat.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -410,15 +410,12 @@ void TemplateRowInputFormat::writeErrorStringForWrongDelimiter(WriteBuffer & out
410410
out << '\n';
411411
}
412412

413-
void TemplateRowInputFormat::tryDeserializeFiled(const DataTypePtr & type, IColumn & column, size_t file_column,
414-
ReadBuffer::Position & prev_pos, ReadBuffer::Position & curr_pos)
413+
void TemplateRowInputFormat::tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column)
415414
{
416-
prev_pos = buf.position();
417415
if (row_format.format_idx_to_column_idx[file_column])
418416
deserializeField(type, column, file_column);
419417
else
420418
skipField(row_format.formats[file_column]);
421-
curr_pos = buf.position();
422419
}
423420

424421
bool TemplateRowInputFormat::isGarbageAfterField(size_t, ReadBuffer::Position)

src/Processors/Formats/Impl/TemplateRowInputFormat.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,7 @@ class TemplateRowInputFormat : public RowInputFormatWithDiagnosticInfo
4242
[[noreturn]] void throwUnexpectedEof();
4343

4444
bool parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) override;
45-
void tryDeserializeFiled(const DataTypePtr & type, IColumn & column, size_t file_column, ReadBuffer::Position & prev_pos,
46-
ReadBuffer::Position & curr_pos) override;
45+
void tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) override;
4746
bool isGarbageAfterField(size_t after_col_idx, ReadBuffer::Position pos) override;
4847
void writeErrorStringForWrongDelimiter(WriteBuffer & out, const String & description, const String & delim);
4948

src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,10 @@ namespace DB
2020
namespace ErrorCodes
2121
{
2222
extern const int LOGICAL_ERROR;
23-
extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
2423
extern const int SYNTAX_ERROR;
2524
extern const int TYPE_MISMATCH;
2625
extern const int SUPPORT_IS_DISABLED;
26+
extern const int ARGUMENT_OUT_OF_BOUND;
2727
}
2828

2929

@@ -167,7 +167,9 @@ bool ValuesBlockInputFormat::tryReadValue(IColumn & column, size_t column_idx)
167167
}
168168
catch (const Exception & e)
169169
{
170-
if (!isParseError(e.code()) && e.code() != ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED)
170+
/// Do not consider decimal overflow as parse error to avoid attempts to parse it as expression with float literal
171+
bool decimal_overflow = e.code() == ErrorCodes::ARGUMENT_OUT_OF_BOUND;
172+
if (!isParseError(e.code()) || decimal_overflow)
171173
throw;
172174
if (rollback_on_exception)
173175
column.popBack(1);
@@ -226,7 +228,8 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx
226228
}
227229
catch (const Exception & e)
228230
{
229-
if (!isParseError(e.code()))
231+
bool decimal_overflow = e.code() == ErrorCodes::ARGUMENT_OUT_OF_BOUND;
232+
if (!isParseError(e.code()) || decimal_overflow)
230233
throw;
231234
}
232235
if (ok)

src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@ void RowInputFormatWithDiagnosticInfo::updateDiagnosticInfo()
3737

3838
String RowInputFormatWithDiagnosticInfo::getDiagnosticInfo()
3939
{
40-
if (in.eof()) /// Buffer has gone, cannot extract information about what has been parsed.
41-
return {};
40+
if (in.eof())
41+
return "Buffer has gone, cannot extract information about what has been parsed.";
4242

4343
WriteBufferFromOwnString out;
4444

@@ -102,17 +102,17 @@ bool RowInputFormatWithDiagnosticInfo::deserializeFieldAndPrintDiagnosticInfo(co
102102
<< "type: " << alignedName(type->getName(), max_length_of_data_type_name);
103103

104104
auto * prev_position = in.position();
105-
auto * curr_position = in.position();
106105
std::exception_ptr exception;
107106

108107
try
109108
{
110-
tryDeserializeFiled(type, column, file_column, prev_position, curr_position);
109+
tryDeserializeField(type, column, file_column);
111110
}
112111
catch (...)
113112
{
114113
exception = std::current_exception();
115114
}
115+
auto * curr_position = in.position();
116116

117117
if (curr_position < prev_position)
118118
throw Exception("Logical error: parsing is non-deterministic.", ErrorCodes::LOGICAL_ERROR);

0 commit comments

Comments
 (0)