Skip to content

Conversation

@sears
Copy link
Contributor

@sears sears commented Dec 9, 2020

This merge was reasonably clean.

diff --cc fdbserver/Knobs.cpp
index ff96cadc3,79b4986dc..000000000
--- a/fdbserver/Knobs.cpp
+++ b/fdbserver/Knobs.cpp
@@@ -376,11 -340,11 +376,15 @@@ void ServerKnobs::initialize(bool rando
        init( REQUIRED_MIN_RECOVERY_DURATION,                       0.080 ); if( shortRecoveryDuration ) REQUIRED_MIN_RECOVERY_DURATION = 0.01;
        init( ALWAYS_CAUSAL_READ_RISKY,                             false );
        init( MAX_COMMIT_UPDATES,                                    2000 ); if( randomize && BUGGIFY ) MAX_COMMIT_UPDATES = 1;
 -      init( MIN_PROXY_COMPUTE,                                    0.001 );
        init( MAX_PROXY_COMPUTE,                                      2.0 );
 +      init( MAX_COMPUTE_PER_OPERATION,                              0.1 );
        init( PROXY_COMPUTE_BUCKETS,                                20000 );
        init( PROXY_COMPUTE_GROWTH_RATE,                             0.01 );
++<<<<<<< HEAD
 +      init( TXN_STATE_SEND_AMOUNT,                                    4 );
++=======
+       init( PROXY_REJECT_BATCH_QUEUED_TOO_LONG,                    true );
++>>>>>>> upstream/release-6.2
  
        init( RESET_MASTER_BATCHES,                                   200 );
        init( RESET_RESOLVER_BATCHES,                                 200 );
diff --cc fdbserver/Knobs.h
index 2789ce8cc,2047a0439..000000000
--- a/fdbserver/Knobs.h
+++ b/fdbserver/Knobs.h
@@@ -306,11 -285,11 +306,15 @@@ public
        double REQUIRED_MIN_RECOVERY_DURATION;
        bool ALWAYS_CAUSAL_READ_RISKY;
        int MAX_COMMIT_UPDATES;
 -      double MIN_PROXY_COMPUTE;
        double MAX_PROXY_COMPUTE;
 +      double MAX_COMPUTE_PER_OPERATION;
        int PROXY_COMPUTE_BUCKETS;
        double PROXY_COMPUTE_GROWTH_RATE;
++<<<<<<< HEAD
 +      int TXN_STATE_SEND_AMOUNT;
++=======
+       bool PROXY_REJECT_BATCH_QUEUED_TOO_LONG;
++>>>>>>> upstream/release-6.2
  
        int RESET_MASTER_BATCHES;
        int RESET_RESOLVER_BATCHES;
diff --cc fdbserver/MasterProxyServer.actor.cpp
index e8e81c1a4,cc5f9a2e8..000000000
--- a/fdbserver/MasterProxyServer.actor.cpp
+++ b/fdbserver/MasterProxyServer.actor.cpp
@@@ -854,7 -567,20 +854,24 @@@ ACTOR Future<Void> releaseResolvingAfte
        return Void();
  }
  
++<<<<<<< HEAD
 +// Commit one batch of transactions trs
++=======
+ // Try to identify recovery transaction and backup's apply mutations (blind writes).
+ // Both cannot be rejected and are approximated by looking at first mutation
+ // starting with 0xff.
+ bool canReject(const std::vector<CommitTransactionRequest>& trs) {
+       for (const auto& tr : trs) {
+               if (tr.transaction.mutations.empty()) continue;
+               if (tr.transaction.mutations[0].param1.startsWith(LiteralStringRef("\xff")) ||
+                   tr.transaction.read_conflict_ranges.empty()) {
+                       return false;
+               }
+       }
+       return true;
+ }
+ 
++>>>>>>> upstream/release-6.2
  ACTOR Future<Void> commitBatch(
        ProxyCommitData* self,
        vector<CommitTransactionRequest> trs,
@@@ -909,10 -638,34 +927,36 @@@
        }
  
        /////// Phase 1: Pre-resolution processing (CPU bound except waiting for a version # which is separately pipelined and *should* be available by now (unless empty commit); ordered; currently atomic but could yield)
 -      TEST(self->latestLocalCommitBatchResolving.get() < localBatchNumber-1); // Queuing pre-resolution commit processing 
 +
 +      // Queuing pre-resolution commit processing
 +      TEST(self->latestLocalCommitBatchResolving.get() < localBatchNumber - 1);
        wait(self->latestLocalCommitBatchResolving.whenAtLeast(localBatchNumber-1));
+       double queuingDelay = g_network->now() - timeStart;
+       if ((queuingDelay > (double)SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS / SERVER_KNOBS->VERSIONS_PER_SECOND ||
+            (g_network->isSimulated() && BUGGIFY_WITH_PROB(0.01))) &&
+           SERVER_KNOBS->PROXY_REJECT_BATCH_QUEUED_TOO_LONG && canReject(trs)) {
+               // Disabled for the recovery transaction. otherwise, recovery can't finish and keeps doing more recoveries.
+               TEST(true); // Reject transactions in the batch
+               TraceEvent(SevWarnAlways, "ProxyReject", self->dbgid)
+                   .suppressFor(0.1)
+                   .detail("QDelay", queuingDelay)
+                   .detail("Transactions", trs.size())
+                   .detail("BatchNumber", localBatchNumber);
+               ASSERT(self->latestLocalCommitBatchResolving.get() == localBatchNumber - 1);
+               self->latestLocalCommitBatchResolving.set(localBatchNumber);
+ 
+               wait(self->latestLocalCommitBatchLogging.whenAtLeast(localBatchNumber-1));
+               ASSERT(self->latestLocalCommitBatchLogging.get() == localBatchNumber - 1);
+               self->latestLocalCommitBatchLogging.set(localBatchNumber);
+               for (const auto& tr : trs) {
+                       tr.reply.sendError(transaction_too_old());
+               }
+               ++self->stats.commitBatchOut;
+               self->stats.txnCommitOut += trs.size();
+               self->stats.txnConflicts += trs.size();
+               return Void();
+       }
+ 
        state Future<Void> releaseDelay = delay(std::min(SERVER_KNOBS->MAX_PROXY_COMPUTE, batchOperations*self->commitComputePerOperation[latencyBucket]), TaskPriority::ProxyMasterVersionReply);
  
        if (debugID.present())
diff --cc flow/error_definitions.h
index 2041ce957,17abec2a8..000000000
--- a/flow/error_definitions.h
+++ b/flow/error_definitions.h
@@@ -90,8 -86,6 +90,11 @@@ ERROR( please_reboot_delete, 1208, "Reb
  ERROR( master_proxy_failed, 1209, "Master terminating because a Proxy failed" )
  ERROR( master_resolver_failed, 1210, "Master terminating because a Resolver failed" )
  ERROR( server_overloaded, 1211, "Server is under too much load and cannot respond" )
++<<<<<<< HEAD
 +ERROR( master_backup_worker_failed, 1212, "Master terminating because a backup worker failed")
 +ERROR( tag_throttled, 1213, "Transaction tag is being throttled" )
++=======
++>>>>>>> upstream/release-6.2
  ERROR( dd_tracker_cancelled, 1215, "The data distribution tracker has been cancelled" )
  
  // 15xx Platform errors

sfc-gh-anoyes and others added 24 commits November 24, 2020 16:27
This is safe because it's not been in a release, and it doesn't get
serialized.
…ode-conflict

Change dd_tracker_cancelled to 1215 to avoid conflict
Otherwise, the recovery just keeps repeating.
Disable the proxy rejection feature for backup workload, because of the
ApplyMutationsError.
These mutations should always commit.
Updated docker repositories to Vault repos
…g-of-client-range-reads

Fix double counting of range reads in TransactionMetrics
… CCACHE flags from Dockerfile.devel to Dockerfile
Proxy rejects long enqueued transactions
@jzhou77 jzhou77 merged commit 5f06c20 into apple:release-6.3 Dec 10, 2020
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

6 participants