Skip to content

Commit 488c141

Browse files
Backport #85440 to 25.5: Fix possible UB in case of MEMORY_LIMIT_EXCEEDED during String deserialization
1 parent c6854c8 commit 488c141

File tree

3 files changed

+103
-3
lines changed

3 files changed

+103
-3
lines changed

src/DataTypes/Serializations/SerializationString.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@ void SerializationString::serializeBinaryBulk(const IColumn & column, WriteBuffe
149149

150150
template <int UNROLL_TIMES>
151151
static NO_INLINE void deserializeBinarySSE2(ColumnString::Chars & data, ColumnString::Offsets & offsets, ReadBuffer & istr, size_t limit)
152+
try
152153
{
153154
size_t offset = data.size();
154155
/// Avoiding calling resize in a loop improves the performance.
@@ -171,8 +172,6 @@ static NO_INLINE void deserializeBinarySSE2(ColumnString::Chars & data, ColumnSt
171172
max_string_size);
172173

173174
offset += size + 1;
174-
offsets.push_back(offset);
175-
176175
if (unlikely(offset > data.size()))
177176
data.resize_exact(roundUpToPowerOfTwoOrZero(std::max(offset, data.size() * 2)));
178177

@@ -205,10 +204,18 @@ static NO_INLINE void deserializeBinarySSE2(ColumnString::Chars & data, ColumnSt
205204
}
206205

207206
data[offset - 1] = 0;
207+
208+
offsets.push_back(offset);
208209
}
209210

210211
data.resize_exact(offset);
211212
}
213+
catch (...)
214+
{
215+
/// We are doing resize_exact() of bigger values than we have, let's make sure that it will be correct (even in case of exceptions)
216+
data.resize_exact(offsets.back());
217+
throw;
218+
}
212219

213220

214221
void SerializationString::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t rows_offset, size_t limit, double avg_value_size_hint) const
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
#include <Columns/ColumnString.h>
2+
#include <DataTypes/DataTypeFactory.h>
3+
#include <DataTypes/Serializations/SerializationString.h>
4+
#include <IO/ReadBufferFromString.h>
5+
#include <Common/MemoryTracker.h>
6+
#include <Common/ThreadStatus.h>
7+
8+
#include <gtest/gtest.h>
9+
10+
namespace DB
11+
{
12+
namespace ErrorCodes
13+
{
14+
extern const int MEMORY_LIMIT_EXCEEDED;
15+
}
16+
}
17+
18+
using namespace DB;
19+
20+
TEST(StringSerialization, IncorrectStateAfterMemoryLimitExceeded)
21+
{
22+
MainThreadStatus::getInstance();
23+
24+
constexpr size_t rows = 1'000'000;
25+
26+
WriteBufferFromOwnString out;
27+
28+
auto src_column = ColumnString::create();
29+
src_column->insertMany("foobar", rows);
30+
31+
{
32+
auto serialization = std::make_shared<SerializationString>();
33+
ISerialization::SerializeBinaryBulkSettings settings;
34+
ISerialization::SerializeBinaryBulkStatePtr state;
35+
settings.position_independent_encoding = false;
36+
settings.getter = [&out](const auto &) { return &out; };
37+
serialization->serializeBinaryBulkWithMultipleStreams(*src_column, 0, src_column->size(), settings, state);
38+
}
39+
40+
size_t memory_limit_exceeded_errors = 0;
41+
auto run_with_memory_failures = [&](auto cb)
42+
{
43+
total_memory_tracker.setFaultProbability(0.2);
44+
try
45+
{
46+
cb();
47+
}
48+
catch (Exception & e)
49+
{
50+
if (e.code() != ErrorCodes::MEMORY_LIMIT_EXCEEDED)
51+
throw;
52+
53+
++memory_limit_exceeded_errors;
54+
total_memory_tracker.setFaultProbability(0);
55+
}
56+
total_memory_tracker.setFaultProbability(0);
57+
};
58+
59+
auto type_string = std::make_shared<DataTypeString>();
60+
size_t non_empty_result = 0;
61+
while (memory_limit_exceeded_errors < 10 || non_empty_result < 10)
62+
{
63+
ColumnPtr result_column = type_string->createColumn();
64+
ReadBufferFromOwnString in(out.str());
65+
66+
auto serialization = type_string->getDefaultSerialization();
67+
ISerialization::DeserializeBinaryBulkSettings settings;
68+
ISerialization::DeserializeBinaryBulkStatePtr state;
69+
settings.position_independent_encoding = false;
70+
settings.getter = [&in](const auto &) { return &in; };
71+
72+
run_with_memory_failures([&]() { serialization->deserializeBinaryBulkWithMultipleStreams(result_column, 0, src_column->size(), settings, state, nullptr); });
73+
74+
auto & result = assert_cast<ColumnString &>(*result_column->assumeMutable());
75+
if (!result.empty())
76+
{
77+
++non_empty_result;
78+
ASSERT_EQ(result.getDataAt(0), "foobar");
79+
ASSERT_EQ(result.getDataAt(result.size() - 1), "foobar");
80+
}
81+
}
82+
}

src/Interpreters/tests/gtest_filecache.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <thread>
1010

1111
#include <Core/ServerUUID.h>
12+
#include <Common/ThreadStatus.h>
1213
#include <Common/iota.h>
1314
#include <Common/randomSeed.h>
1415
#include <DataTypes/DataTypesNumber.h>
@@ -304,7 +305,11 @@ void increasePriority(const HolderPtr & holder, size_t pos)
304305
class FileCacheTest : public ::testing::Test
305306
{
306307
public:
307-
FileCacheTest() {
308+
FileCacheTest()
309+
{
310+
/// Reset current_thread to avoid conflicts of ThreadStatus with MainThreadStatus
311+
current_thread = nullptr;
312+
308313
/// Context has to be created before calling cache.initialize();
309314
/// Otherwise the tests which run before FileCacheTest.get are failed
310315
/// It is logical to call destroyContext() at destructor.
@@ -313,6 +318,12 @@ class FileCacheTest : public ::testing::Test
313318
getContext();
314319
}
315320

321+
~FileCacheTest() override
322+
{
323+
/// Reset current_thread back
324+
current_thread = MainThreadStatus::get();
325+
}
326+
316327
static void setupLogs(const std::string & level)
317328
{
318329
Poco::AutoPtr<Poco::ConsoleChannel> channel(new Poco::ConsoleChannel(std::cerr));

0 commit comments

Comments
 (0)