Skip to content

Commit fb4d272

Browse files
Merge pull request ClickHouse#11258 from vitlibar/protobuf-fix-handling-bad-data
Fix handling bad data while reading in Protobuf format.
2 parents 7eb2e4f + 99626a5 commit fb4d272

File tree

4 files changed

+34
-15
lines changed

4 files changed

+34
-15
lines changed

src/Formats/ProtobufReader.cpp

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,12 @@ namespace
3232
BITS32 = 5,
3333
};
3434

35-
// The following condition must always be true:
36-
// any_cursor_position < min(END_OF_VARINT, END_OF_GROUP)
37-
// This inequation helps to check conditions in SimpleReader.
38-
constexpr UInt64 END_OF_VARINT = static_cast<UInt64>(-1);
39-
constexpr UInt64 END_OF_GROUP = static_cast<UInt64>(-2);
35+
// The following conditions must always be true:
36+
// any_cursor_position > END_OF_VARINT
37+
// any_cursor_position > END_OF_GROUP
38+
// Those inequations helps checking conditions in ProtobufReader::SimpleReader.
39+
constexpr Int64 END_OF_VARINT = -1;
40+
constexpr Int64 END_OF_GROUP = -2;
4041

4142
Int64 decodeZigZag(UInt64 n) { return static_cast<Int64>((n >> 1) ^ (~(n & 1) + 1)); }
4243

@@ -77,7 +78,7 @@ void ProtobufReader::SimpleReader::endMessage(bool ignore_errors)
7778
if (!current_message_level)
7879
return;
7980

80-
UInt64 root_message_end = (current_message_level == 1) ? current_message_end : parent_message_ends.front();
81+
Int64 root_message_end = (current_message_level == 1) ? current_message_end : parent_message_ends.front();
8182
if (cursor != root_message_end)
8283
{
8384
if (cursor < root_message_end)
@@ -95,6 +96,9 @@ void ProtobufReader::SimpleReader::endMessage(bool ignore_errors)
9596
void ProtobufReader::SimpleReader::startNestedMessage()
9697
{
9798
assert(current_message_level >= 1);
99+
if ((cursor > field_end) && (field_end != END_OF_GROUP))
100+
throwUnknownFormat();
101+
98102
// Start reading a nested message which is located inside a length-delimited field
99103
// of another message.
100104
parent_message_ends.emplace_back(current_message_end);
@@ -146,7 +150,7 @@ bool ProtobufReader::SimpleReader::readFieldNumber(UInt32 & field_number)
146150
throwUnknownFormat();
147151
}
148152

149-
if (cursor >= current_message_end)
153+
if ((cursor >= current_message_end) && (current_message_end != END_OF_GROUP))
150154
return false;
151155

152156
UInt64 varint = readVarint();
@@ -196,11 +200,17 @@ bool ProtobufReader::SimpleReader::readFieldNumber(UInt32 & field_number)
196200

197201
bool ProtobufReader::SimpleReader::readUInt(UInt64 & value)
198202
{
203+
if (field_end == END_OF_VARINT)
204+
{
205+
value = readVarint();
206+
field_end = cursor;
207+
return true;
208+
}
209+
199210
if (unlikely(cursor >= field_end))
200211
return false;
212+
201213
value = readVarint();
202-
if (field_end == END_OF_VARINT)
203-
field_end = cursor;
204214
return true;
205215
}
206216

@@ -227,6 +237,7 @@ bool ProtobufReader::SimpleReader::readFixed(T & value)
227237
{
228238
if (unlikely(cursor >= field_end))
229239
return false;
240+
230241
readBinary(&value, sizeof(T));
231242
return true;
232243
}

src/Formats/ProtobufReader.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -124,12 +124,12 @@ class ProtobufReader : private boost::noncopyable
124124
void ignoreGroup();
125125

126126
ReadBuffer & in;
127-
UInt64 cursor;
127+
Int64 cursor;
128128
size_t current_message_level;
129-
UInt64 current_message_end;
130-
std::vector<UInt64> parent_message_ends;
131-
UInt64 field_end;
132-
UInt64 last_string_pos;
129+
Int64 current_message_end;
130+
std::vector<Int64> parent_message_ends;
131+
Int64 field_end;
132+
Int64 last_string_pos;
133133
};
134134

135135
class IConverter

tests/queries/0_stateless/00825_protobuf_format_input.reference

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@ a7522158-3d41-4b77-ad69-6c598ee55c49 Ivan Petrov male 1980-12-29 png +7495123456
88
0 0
99
2 4
1010
3 9
11+
ok

tests/queries/0_stateless/00825_protobuf_format_input.sh

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
44
. $CURDIR/../shell_config.sh
55

6-
set -e -o pipefail
6+
set -eo pipefail
77

88
# Run the client.
99
$CLICKHOUSE_CLIENT --multiquery <<'EOF'
@@ -48,5 +48,12 @@ source $CURDIR/00825_protobuf_format_input.insh
4848
$CLICKHOUSE_CLIENT --query "SELECT * FROM in_persons_00825 ORDER BY uuid;"
4949
$CLICKHOUSE_CLIENT --query "SELECT * FROM in_squares_00825 ORDER BY number;"
5050

51+
# Try to input malformed data.
52+
set +eo pipefail
53+
echo -ne '\xe0\x80\x3f\x0b' \
54+
| $CLICKHOUSE_CLIENT --query="INSERT INTO in_persons_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format:Person'" 2>&1 \
55+
| grep -qF "Protobuf messages are corrupted" && echo "ok" || echo "fail"
56+
set -eo pipefail
57+
5158
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS in_persons_00825;"
5259
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS in_squares_00825;"

0 commit comments

Comments
 (0)