Skip to content

Commit 349c461

Browse files
committed
Keep track of the number of async tables loading jobs. If there are some running jobs, do not update tail_ptr in TransactionLog::removeOldEntries
1 parent df8d10b commit 349c461

File tree

3 files changed

+40
-5
lines changed

3 files changed

+40
-5
lines changed

src/Databases/DatabaseOrdinary.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include <Common/logger_useful.h>
3737
#include <Common/quoteString.h>
3838
#include <Common/typeid_cast.h>
39+
#include <Interpreters/TransactionLog.h>
3940

4041
#include <boost/algorithm/string/replace.hpp>
4142

@@ -409,13 +410,15 @@ LoadTaskPtr DatabaseOrdinary::loadTableFromMetadataAsync(
409410
const ASTPtr & ast,
410411
LoadingStrictnessLevel mode)
411412
{
413+
TransactionLog::increaseAsyncTablesLoadingJobNumber();
412414
std::scoped_lock lock(mutex);
413415
auto job = makeLoadJob(
414416
std::move(load_after),
415417
TablesLoaderBackgroundLoadPoolId,
416418
fmt::format("load table {}", name.getFullName()),
417-
[this, local_context, file_path, name, ast, mode] (AsyncLoader &, const LoadJobPtr &)
419+
[this, local_context, file_path, name, ast, mode](AsyncLoader &, const LoadJobPtr &)
418420
{
421+
SCOPE_EXIT(TransactionLog::decreaseAsyncTablesLoadingJobNumber(););
419422
loadTableFromMetadata(local_context, file_path, name, ast, mode);
420423
});
421424

src/Interpreters/TransactionLog.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#include <atomic>
12
#include <Core/ServerUUID.h>
23
#include <IO/ReadBufferFromString.h>
34
#include <IO/ReadHelpers.h>
@@ -275,6 +276,17 @@ void TransactionLog::removeOldEntries()
275276
if (!global_context->isServerCompletelyStarted())
276277
return;
277278

279+
/// Because `loadTableFromMetadataAsync` is running asynchronously, it is possible that the outdated parts are loading while the `tail_ptr` is updated here.
280+
/// It might trigger assertion when the part `create_csn` is lower than `tail_ptr`. Refer: https://github.com/ClickHouse/ClickHouse/issues/60406
281+
/// We keep track of `asyncTablesLoadingJobNumber`, and not update `tail_ptr` if there are running jobs.
282+
if (!updated_tail_ptr.load(std::memory_order_relaxed) && asyncTablesLoadingJobNumber() != 0)
283+
{
284+
LOG_TRACE(log, "There are running async tables loading jobs, skip updating tail_ptr");
285+
return;
286+
}
287+
288+
updated_tail_ptr.store(true, std::memory_order_relaxed);
289+
278290
/// Also similar problem is possible if some table was not attached during startup (for example, if table is detached permanently).
279291
/// Also we write CSNs into data parts without fsync, so it's theoretically possible that we wrote CSN, finished transaction,
280292
/// removed its entry from the log, but after that server restarts and CSN is not actually saved to metadata on disk.
@@ -638,4 +650,16 @@ void TransactionLog::sync() const
638650
waitForCSNLoaded(newest_csn);
639651
}
640652

653+
void TransactionLog::increaseAsyncTablesLoadingJobNumber()
654+
{
655+
async_tables_loading_job_number.fetch_add(1);
656+
}
657+
void TransactionLog::decreaseAsyncTablesLoadingJobNumber()
658+
{
659+
async_tables_loading_job_number.fetch_sub(1);
660+
}
661+
Int64 TransactionLog::asyncTablesLoadingJobNumber()
662+
{
663+
return async_tables_loading_job_number.load();
664+
}
641665
}

src/Interpreters/TransactionLog.h

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
#pragma once
2+
#include <mutex>
3+
#include <unordered_map>
24
#include <Interpreters/MergeTreeTransaction.h>
35
#include <Interpreters/MergeTreeTransactionHolder.h>
4-
#include <Common/ZooKeeper/ZooKeeper.h>
5-
#include <Common/ThreadPool.h>
6+
#include <base/types.h>
67
#include <boost/noncopyable.hpp>
7-
#include <mutex>
8-
#include <unordered_map>
8+
#include <Common/ThreadPool.h>
9+
#include <Common/ZooKeeper/ZooKeeper.h>
910

1011
namespace DB
1112
{
@@ -131,6 +132,10 @@ class TransactionLog final : public SingletonHelper<TransactionLog>
131132

132133
void sync() const;
133134

135+
static void increaseAsyncTablesLoadingJobNumber();
136+
static void decreaseAsyncTablesLoadingJobNumber();
137+
static Int64 asyncTablesLoadingJobNumber();
138+
134139
private:
135140
void loadLogFromZooKeeper() TSA_REQUIRES(mutex);
136141
void runUpdatingThread();
@@ -148,6 +153,8 @@ class TransactionLog final : public SingletonHelper<TransactionLog>
148153
static TransactionID deserializeTID(const String & csn_node_content);
149154
static String serializeTID(const TransactionID & tid);
150155

156+
inline static std::atomic<Int64> async_tables_loading_job_number{0};
157+
151158
ZooKeeperPtr getZooKeeper() const;
152159

153160
/// Some time a transaction could be committed concurrently, in order to resolve it provide failback_with_strict_load_csn
@@ -191,6 +198,7 @@ class TransactionLog final : public SingletonHelper<TransactionLog>
191198
String last_loaded_entry TSA_GUARDED_BY(mutex);
192199
/// The oldest CSN such that we store in log entries with TransactionIDs containing this CSN.
193200
std::atomic<CSN> tail_ptr = Tx::UnknownCSN;
201+
std::atomic<bool> updated_tail_ptr{false};
194202

195203
zkutil::EventPtr log_updated_event = std::make_shared<Poco::Event>();
196204

0 commit comments

Comments
 (0)