Skip to content

Commit a0ef3e7

Browse files
Backport #93779 to 25.8: Fix attaching Replicated DBs when the interserver host changed after restarting
1 parent 44d69c8 commit a0ef3e7

File tree

1 file changed

+51
-4
lines changed

1 file changed

+51
-4
lines changed

src/Databases/DatabaseReplicated.cpp

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1+
#include <Core/UUID.h>
12
#include <DataTypes/DataTypeString.h>
23

34
#include <atomic>
5+
#include <tuple>
46
#include <utility>
57

68
#include <Backups/IRestoreCoordination.h>
@@ -104,6 +106,7 @@ namespace ErrorCodes
104106
extern const int QUERY_IS_PROHIBITED;
105107
extern const int SUPPORT_IS_DISABLED;
106108
extern const int ASYNC_LOAD_CANCELED;
109+
extern const int SYNTAX_ERROR;
107110
}
108111

109112
namespace FailPoints
@@ -126,6 +129,21 @@ static inline String getHostID(ContextPtr global_context, const UUID & db_uuid,
126129
return Cluster::Address::toString(getFQDNOrHostName(), port) + ':' + toString(db_uuid);
127130
}
128131

132+
// Return <address, port, uuid>
133+
static inline std::tuple<String, UInt16, UUID> parseHostID(const String & content)
134+
{
135+
auto pos = content.find_last_of(':');
136+
if (pos == std::string::npos || pos + 1 >= content.size())
137+
throw Exception(ErrorCodes::SYNTAX_ERROR, "Invalid host ID '{}'", content);
138+
139+
auto [address, port] = Cluster::Address::fromString(content.substr(0, pos));
140+
UUID db_uuid;
141+
if (!tryParse(db_uuid, content.substr(pos + 1)))
142+
throw Exception(ErrorCodes::SYNTAX_ERROR, "Invalid host ID '{}'", content);
143+
144+
return {address, port, db_uuid};
145+
}
146+
129147
static inline UInt64 getMetadataHash(const String & table_name, const String & metadata)
130148
{
131149
SipHash hash;
@@ -511,10 +529,39 @@ void DatabaseReplicated::tryConnectToZooKeeperAndInitDatabase(LoadingStrictnessL
511529

512530
if (replica_host_id != host_id && replica_host_id != host_id_default)
513531
{
514-
throw Exception(
515-
ErrorCodes::REPLICA_ALREADY_EXISTS,
516-
"Replica {} of shard {} of replicated database at {} already exists. Replica host ID: '{}', current host ID: '{}'",
517-
replica_name, shard_name, zookeeper_path, replica_host_id, host_id);
532+
UUID uuid_in_keeper = UUIDHelpers::Nil;
533+
try
534+
{
535+
uuid_in_keeper = std::get<2>(parseHostID(replica_host_id));
536+
}
537+
catch (const Exception & e)
538+
{
539+
LOG_WARNING(log, "Failed to parse host_id {} in zookeeper, error {}", replica_host_id, e.what());
540+
}
541+
542+
if (uuid_in_keeper != db_uuid)
543+
throw Exception(
544+
ErrorCodes::REPLICA_ALREADY_EXISTS,
545+
"Replica {} of shard {} of replicated database at {} already exists. Replica host ID: '{}', current host ID: '{}'",
546+
replica_name,
547+
shard_name,
548+
zookeeper_path,
549+
replica_host_id,
550+
host_id);
551+
552+
// After restarting, InterserverIOAddress might change (e.g: config updated, `getFQDNOrHostName` returns a different one)
553+
// If the UUID in the keeper is the same as the current server UUID, we will update the host_id in keeper
554+
LOG_INFO(
555+
log,
556+
"Replicated database replica: {}, shard {}, zk_path: {} already exists with the same UUID, replica host ID: '{}', "
557+
"current host ID: '{}', will set the host_id to the current host ID",
558+
replica_name,
559+
shard_name,
560+
zookeeper_path,
561+
replica_host_id,
562+
host_id);
563+
current_zookeeper->set(replica_path, host_id, -1);
564+
createEmptyLogEntry(current_zookeeper);
518565
}
519566

520567
/// Before 24.6 we always created host_id with insecure port, even if cluster_auth_info.cluster_secure_connection was true.

0 commit comments

Comments
 (0)