Skip to content

Commit dc47bdf

Browse files
author
Yao Xiao
committed
compaction knobs
1 parent 4b02729 commit dc47bdf

File tree

4 files changed

+64
-4
lines changed

4 files changed

+64
-4
lines changed

fdbclient/ServerKnobs.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,8 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
428428
init( ROCKSDB_MAX_SUBCOMPACTIONS, 0 );
429429
init( ROCKSDB_SOFT_PENDING_COMPACT_BYTES_LIMIT, 64000000000 ); // 64GB, Rocksdb option, Writes will slow down.
430430
init( ROCKSDB_HARD_PENDING_COMPACT_BYTES_LIMIT, 100000000000 ); // 100GB, Rocksdb option, Writes will stall.
431+
init( SHARD_SOFT_PENDING_COMPACT_BYTES_LIMIT, 0 );
432+
init( SHARD_HARD_PENDING_COMPACT_BYTES_LIMIT, 0 );
431433
init( ROCKSDB_CAN_COMMIT_COMPACT_BYTES_LIMIT, 50000000000 ); // 50GB, Commit waits.
432434
// Enable this knob only for experminatal purpose, never enable this in production.
433435
// If enabled, all the committed in-memory memtable writes are lost on a crash.
@@ -461,6 +463,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
461463
init( ROCKSDB_DELETE_OBSOLETE_FILE_PERIOD, 21600 ); // 6h, RocksDB default.
462464
init( ROCKSDB_PHYSICAL_SHARD_CLEAN_UP_DELAY, isSimulated ? 10.0 : 300.0 ); // Delays shard clean up, must be larger than ROCKSDB_READ_VALUE_TIMEOUT to prevent reading deleted shard.
463465
init( ROCKSDB_RETURN_OVERLOADED_ON_TIMEOUT, false ); if ( randomize && BUGGIFY ) ROCKSDB_RETURN_OVERLOADED_ON_TIMEOUT = true;
466+
init( ROCKSDB_COMPACTION_PRI, 0x3); // kMinOverlappingRatio, RocksDB default.
464467

465468
// Leader election
466469
bool longLeaderElection = randomize && BUGGIFY;

fdbclient/include/fdbclient/ServerKnobs.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,8 @@ class ServerKnobs : public KnobsImpl<ServerKnobs> {
348348
int ROCKSDB_MAX_SUBCOMPACTIONS;
349349
int64_t ROCKSDB_SOFT_PENDING_COMPACT_BYTES_LIMIT;
350350
int64_t ROCKSDB_HARD_PENDING_COMPACT_BYTES_LIMIT;
351+
int64_t SHARD_SOFT_PENDING_COMPACT_BYTES_LIMIT;
352+
int64_t SHARD_HARD_PENDING_COMPACT_BYTES_LIMIT;
351353
int64_t ROCKSDB_CAN_COMMIT_COMPACT_BYTES_LIMIT;
352354
int ROCKSDB_CAN_COMMIT_DELAY_ON_OVERLOAD;
353355
int ROCKSDB_CAN_COMMIT_DELAY_TIMES_ON_OVERLOAD;
@@ -370,6 +372,7 @@ class ServerKnobs : public KnobsImpl<ServerKnobs> {
370372
int64_t ROCKSDB_DELETE_OBSOLETE_FILE_PERIOD;
371373
double ROCKSDB_PHYSICAL_SHARD_CLEAN_UP_DELAY;
372374
bool ROCKSDB_RETURN_OVERLOADED_ON_TIMEOUT;
375+
int ROCKSDB_COMPACTION_PRI;
373376

374377
// Leader election
375378
int MAX_NOTIFICATIONS;

fdbserver/KeyValueStoreShardedRocksDB.actor.cpp

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <rocksdb/listener.h>
1111
#include <rocksdb/metadata.h>
1212
#include <rocksdb/options.h>
13+
#include <rocksdb/advanced_options.h>
1314
#include <rocksdb/slice_transform.h>
1415
#include <rocksdb/statistics.h>
1516
#include <rocksdb/table.h>
@@ -230,16 +231,63 @@ Error statusToError(const rocksdb::Status& s) {
230231
}
231232
}
232233

234+
rocksdb::CompactionPri getCompactionPriority() {
235+
switch (SERVER_KNOBS->ROCKSDB_COMPACTION_PRI) {
236+
case 0:
237+
return rocksdb::CompactionPri::kByCompensatedSize;
238+
case 1:
239+
return rocksdb::CompactionPri::kOldestLargestSeqFirst;
240+
case 2:
241+
return rocksdb::CompactionPri::kOldestSmallestSeqFirst;
242+
case 3:
243+
return rocksdb::CompactionPri::kMinOverlappingRatio;
244+
case 4:
245+
return rocksdb::CompactionPri::kRoundRobin;
246+
default:
247+
TraceEvent(SevWarn, "InvalidCompactionPriority").detail("KnobValue", SERVER_KNOBS->ROCKSDB_COMPACTION_PRI);
248+
return rocksdb::CompactionPri::kMinOverlappingRatio;
249+
}
250+
}
251+
233252
rocksdb::ColumnFamilyOptions getCFOptions() {
234253
rocksdb::ColumnFamilyOptions options;
235254
options.level_compaction_dynamic_level_bytes = SERVER_KNOBS->ROCKSDB_LEVEL_COMPACTION_DYNAMIC_LEVEL_BYTES;
236255
options.OptimizeLevelStyleCompaction(SERVER_KNOBS->ROCKSDB_MEMTABLE_BYTES);
237256
if (SERVER_KNOBS->ROCKSDB_PERIODIC_COMPACTION_SECONDS > 0) {
238257
options.periodic_compaction_seconds = SERVER_KNOBS->ROCKSDB_PERIODIC_COMPACTION_SECONDS;
239258
}
259+
260+
options.disable_auto_compactions = SERVER_KNOBS->ROCKSDB_DISABLE_AUTO_COMPACTIONS;
261+
if (SERVER_KNOBS->SHARD_SOFT_PENDING_COMPACT_BYTES_LIMIT > 0) {
262+
options.soft_pending_compaction_bytes_limit = SERVER_KNOBS->SHARD_SOFT_PENDING_COMPACT_BYTES_LIMIT;
263+
}
264+
if (SERVER_KNOBS->SHARD_HARD_PENDING_COMPACT_BYTES_LIMIT > 0) {
265+
options.hard_pending_compaction_bytes_limit = SERVER_KNOBS->SHARD_HARD_PENDING_COMPACT_BYTES_LIMIT;
266+
}
267+
240268
// Compact sstables when there's too much deleted stuff.
241269
options.table_properties_collector_factories = { rocksdb::NewCompactOnDeletionCollectorFactory(128, 1) };
242270

271+
// Compact sstables when there's too much deleted stuff.
272+
if (SERVER_KNOBS->ROCKSDB_ENABLE_COMPACT_ON_DELETION) {
273+
// Creates a factory of a table property collector that marks a SST
274+
// file as need-compaction when it observe at least "D" deletion
275+
// entries in any "N" consecutive entries, or the ratio of tombstone
276+
// entries >= deletion_ratio.
277+
278+
// @param sliding_window_size "N". Note that this number will be
279+
// round up to the smallest multiple of 128 that is no less
280+
// than the specified size.
281+
// @param deletion_trigger "D". Note that even when "N" is changed,
282+
// the specified number for "D" will not be changed.
283+
// @param deletion_ratio, if <= 0 or > 1, disable triggering compaction
284+
// based on deletion ratio. Disabled by default.
285+
options.table_properties_collector_factories = { rocksdb::NewCompactOnDeletionCollectorFactory(
286+
SERVER_KNOBS->ROCKSDB_CDCF_SLIDING_WINDOW_SIZE,
287+
SERVER_KNOBS->ROCKSDB_CDCF_DELETION_TRIGGER,
288+
SERVER_KNOBS->ROCKSDB_CDCF_DELETION_RATIO) };
289+
}
290+
243291
rocksdb::BlockBasedTableOptions bbOpts;
244292
// TODO: Add a knob for the block cache size. (Default is 8 MB)
245293
if (SERVER_KNOBS->ROCKSDB_PREFIX_LEN > 0) {
@@ -273,6 +321,8 @@ rocksdb::ColumnFamilyOptions getCFOptions() {
273321

274322
options.table_factory.reset(rocksdb::NewBlockBasedTableFactory(bbOpts));
275323

324+
options.compaction_pri = getCompactionPriority();
325+
276326
return options;
277327
}
278328

@@ -638,6 +688,7 @@ class ShardManager {
638688
}
639689

640690
rocksdb::Status init() {
691+
double start = now();
641692
// Open instance.
642693
TraceEvent(SevInfo, "ShardedRocksShardManagerInitBegin", this->logId).detail("DataPath", path);
643694
if (SERVER_KNOBS->ROCKSDB_WRITE_RATE_LIMITER_BYTES_PER_SEC > 0) {
@@ -801,7 +852,9 @@ class ShardManager {
801852
if (SERVER_KNOBS->ROCKSDB_WRITE_RATE_LIMITER_BYTES_PER_SEC > 0) {
802853
dbOptions.rate_limiter->SetBytesPerSecond(SERVER_KNOBS->ROCKSDB_WRITE_RATE_LIMITER_BYTES_PER_SEC);
803854
}
804-
TraceEvent(SevInfo, "ShardedRocksShardManagerInitEnd", this->logId).detail("DataPath", path);
855+
TraceEvent(SevInfo, "ShardedRocksShardManagerInitEnd", this->logId)
856+
.detail("DataPath", path)
857+
.detail("Duration", now() - start);
805858
return status;
806859
}
807860

@@ -1003,6 +1056,8 @@ class ShardManager {
10031056
TraceEvent(SevDebug, "ShardedRocksDB").detail("ClearNonExistentRange", it.range());
10041057
continue;
10051058
}
1059+
1060+
// Do not compact clear range.
10061061
writeBatch->DeleteRange(it.value()->physicalShard->cf, toSlice(range.begin), toSlice(range.end));
10071062
dirtyShards->insert(it.value()->physicalShard);
10081063
}

packaging/docker/build-images.sh

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,6 @@ function build_and_push_images () {
194194
docker build \
195195
--label "org.foundationdb.version=${fdb_version}" \
196196
--label "org.foundationdb.build_date=${build_date}" \
197-
--label "org.foundationdb.commit=${commit_sha}" \
198197
--progress plain \
199198
--build-arg FDB_VERSION="${fdb_version}" \
200199
--build-arg FDB_LIBRARY_VERSIONS="${fdb_library_versions[*]}" \
@@ -245,7 +244,7 @@ aws_account_id=$(aws --output text sts get-caller-identity --query 'Account')
245244
build_date=$(date +"%Y-%m-%dT%H:%M:%S%z")
246245
build_output_directory="${script_dir}/../../"
247246
source_code_diretory=$(awk -F= '/foundationdb_SOURCE_DIR:STATIC/{print $2}' "${build_output_directory}/CMakeCache.txt")
248-
commit_sha=$(cd "${source_code_diretory}" && git rev-parse --verify HEAD --short=10)
247+
# commit_sha=$(cd "${source_code_diretory}" && git rev-parse --verify HEAD --short=10)
249248
fdb_version=$(cat "${build_output_directory}/version.txt")
250249
fdb_library_versions=( '5.1.7' '6.1.13' '6.2.30' '6.3.18' "${fdb_version}" )
251250
fdb_website="https://github.com/apple/foundationdb/releases/download"
@@ -256,7 +255,7 @@ image_list=(
256255
'foundationdb'
257256
# 'foundationdb-kubernetes-monitor'
258257
'foundationdb-kubernetes-sidecar'
259-
'ycsb'
258+
# 'ycsb'
260259
)
261260
registry=""
262261
tag_base="foundationdb/"

0 commit comments

Comments
 (0)