Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions src/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,6 @@ BITCOIN_CORE_H = \
script/sign.h \
script/signingprovider.h \
script/solver.h \
shutdown.h \
signet.h \
streams.h \
support/allocators/pool.h \
Expand Down Expand Up @@ -459,7 +458,6 @@ libbitcoin_node_a_SOURCES = \
rpc/signmessage.cpp \
rpc/txoutproof.cpp \
script/sigcache.cpp \
shutdown.cpp \
signet.cpp \
timedata.cpp \
torcontrol.cpp \
Expand Down
4 changes: 2 additions & 2 deletions src/bitcoin-chainstate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,14 +125,14 @@ int main(int argc, char* argv[])
.blocks_dir = abs_datadir / "blocks",
.notifications = chainman_opts.notifications,
};
ChainstateManager chainman{kernel_context.interrupt, chainman_opts, blockman_opts};
util::SignalInterrupt interrupt;
ChainstateManager chainman{interrupt, chainman_opts, blockman_opts};

node::CacheSizes cache_sizes;
cache_sizes.block_tree_db = 2 << 20;
cache_sizes.coins_db = 2 << 22;
cache_sizes.coins = (450 << 20) - (2 << 20) - (2 << 22);
node::ChainstateLoadOptions options;
options.check_interrupt = [] { return false; };
auto [status, error] = node::LoadChainstate(chainman, cache_sizes, options);
if (status != node::ChainstateLoadStatus::SUCCESS) {
std::cerr << "Failed to load Chain state from your datadir." << std::endl;
Expand Down
5 changes: 1 addition & 4 deletions src/bitcoind.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#include <node/context.h>
#include <node/interface_ui.h>
#include <noui.h>
#include <shutdown.h>
#include <util/check.h>
#include <util/exception.h>
#include <util/strencodings.h>
Expand Down Expand Up @@ -272,9 +271,7 @@ MAIN_FUNCTION
if (ProcessInitCommands(args)) return EXIT_SUCCESS;

// Start application
if (AppInit(node)) {
WaitForShutdown();
} else {
if (!AppInit(node) || !Assert(node.shutdown)->wait()) {
node.exit_status = EXIT_FAILURE;
}
Interrupt(node);
Expand Down
13 changes: 7 additions & 6 deletions src/httpserver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
#include <netbase.h>
#include <node/interface_ui.h>
#include <rpc/protocol.h> // For HTTP status codes
#include <shutdown.h>
#include <sync.h>
#include <util/check.h>
#include <util/signalinterrupt.h>
#include <util/strencodings.h>
#include <util/threadnames.h>
#include <util/translation.h>
Expand Down Expand Up @@ -284,7 +284,7 @@ static void http_request_cb(struct evhttp_request* req, void* arg)
}
}
}
std::unique_ptr<HTTPRequest> hreq(new HTTPRequest(req));
auto hreq{std::make_unique<HTTPRequest>(req, *static_cast<const util::SignalInterrupt*>(arg))};

// Early address-based allow check
if (!ClientAllowed(hreq->GetPeer())) {
Expand Down Expand Up @@ -425,7 +425,7 @@ static void libevent_log_cb(int severity, const char *msg)
LogPrintLevel(BCLog::LIBEVENT, level, "%s\n", msg);
}

bool InitHTTPServer()
bool InitHTTPServer(const util::SignalInterrupt& interrupt)
{
if (!InitHTTPAllowList())
return false;
Expand Down Expand Up @@ -454,7 +454,7 @@ bool InitHTTPServer()
evhttp_set_timeout(http, gArgs.GetIntArg("-rpcservertimeout", DEFAULT_HTTP_SERVER_TIMEOUT));
evhttp_set_max_headers_size(http, MAX_HEADERS_SIZE);
evhttp_set_max_body_size(http, MAX_SIZE);
evhttp_set_gencb(http, http_request_cb, nullptr);
evhttp_set_gencb(http, http_request_cb, (void*)&interrupt);

if (!HTTPBindAddresses(http)) {
LogPrintf("Unable to bind any endpoint for RPC server\n");
Expand Down Expand Up @@ -579,7 +579,8 @@ void HTTPEvent::trigger(struct timeval* tv)
else
evtimer_add(ev, tv); // trigger after timeval passed
}
HTTPRequest::HTTPRequest(struct evhttp_request* _req, bool _replySent) : req(_req), replySent(_replySent)
HTTPRequest::HTTPRequest(struct evhttp_request* _req, const util::SignalInterrupt& interrupt, bool _replySent)
: req(_req), m_interrupt(interrupt), replySent(_replySent)
{
}

Expand Down Expand Up @@ -639,7 +640,7 @@ void HTTPRequest::WriteHeader(const std::string& hdr, const std::string& value)
void HTTPRequest::WriteReply(int nStatus, const std::string& strReply)
{
assert(!replySent && req);
if (ShutdownRequested()) {
if (m_interrupt) {
WriteHeader("Connection", "close");
}
// Send event to main http thread to send reply message
Expand Down
9 changes: 7 additions & 2 deletions src/httpserver.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
#include <optional>
#include <string>

namespace util {
class SignalInterrupt;
} // namespace util

static const int DEFAULT_HTTP_THREADS=4;
static const int DEFAULT_HTTP_WORKQUEUE=16;
static const int DEFAULT_HTTP_SERVER_TIMEOUT=30;
Expand All @@ -21,7 +25,7 @@ class HTTPRequest;
/** Initialize HTTP server.
* Call this before RegisterHTTPHandler or EventBase().
*/
bool InitHTTPServer();
bool InitHTTPServer(const util::SignalInterrupt& interrupt);
/** Start HTTP server.
* This is separate from InitHTTPServer to give users race-condition-free time
* to register their handlers between InitHTTPServer and StartHTTPServer.
Expand Down Expand Up @@ -57,10 +61,11 @@ class HTTPRequest
{
private:
struct evhttp_request* req;
const util::SignalInterrupt& m_interrupt;
bool replySent;

public:
explicit HTTPRequest(struct evhttp_request* req, bool replySent = false);
explicit HTTPRequest(struct evhttp_request* req, const util::SignalInterrupt& interrupt, bool replySent = false);
~HTTPRequest();

enum RequestMethod {
Expand Down
3 changes: 1 addition & 2 deletions src/index/base.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
#include <node/context.h>
#include <node/database_args.h>
#include <node/interface_ui.h>
#include <shutdown.h>
#include <tinyformat.h>
#include <util/thread.h>
#include <util/translation.h>
Expand All @@ -32,7 +31,7 @@ template <typename... Args>
void BaseIndex::FatalErrorf(const char* fmt, const Args&... args)
{
auto message = tfm::format(fmt, args...);
node::AbortNode(m_chain->context()->exit_status, message);
node::AbortNode(m_chain->context()->shutdown, m_chain->context()->exit_status, message);
}

CBlockLocator GetLocator(interfaces::Chain& chain, const uint256& block_hash)
Expand Down
67 changes: 45 additions & 22 deletions src/init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@
#include <rpc/util.h>
#include <scheduler.h>
#include <script/sigcache.h>
#include <shutdown.h>
#include <sync.h>
#include <timedata.h>
#include <torcontrol.h>
Expand Down Expand Up @@ -192,6 +191,16 @@ static void RemovePidFile(const ArgsManager& args)
}
}

static std::optional<util::SignalInterrupt> g_shutdown;

void InitContext(NodeContext& node)
{
assert(!g_shutdown);
g_shutdown.emplace();

node.args = &gArgs;
node.shutdown = &*g_shutdown;
}

//////////////////////////////////////////////////////////////////////////////
//
Expand All @@ -204,11 +213,9 @@ static void RemovePidFile(const ArgsManager& args)
// The network-processing threads are all part of a thread group
// created by AppInit() or the Qt main() function.
//
// A clean exit happens when StartShutdown() or the SIGTERM
// signal handler sets ShutdownRequested(), which makes main thread's
// WaitForShutdown() interrupts the thread group.
// And then, WaitForShutdown() makes all other on-going threads
// in the thread group join the main thread.
// A clean exit happens when the SignalInterrupt object is triggered, which
// makes the main thread's SignalInterrupt::wait() call return, and join all
// other ongoing threads in the thread group to the main thread.
// Shutdown() is then called to clean up database connections, and stop other
// threads that should only be stopped after the main network-processing
// threads have exited.
Expand All @@ -218,6 +225,11 @@ static void RemovePidFile(const ArgsManager& args)
// shutdown thing.
//

bool ShutdownRequested(node::NodeContext& node)
{
return bool{*Assert(node.shutdown)};
}

#if HAVE_SYSTEM
static void ShutdownNotify(const ArgsManager& args)
{
Expand Down Expand Up @@ -382,7 +394,9 @@ void Shutdown(NodeContext& node)
#ifndef WIN32
static void HandleSIGTERM(int)
{
StartShutdown();
// Return value is intentionally ignored because there is not a better way
// of handling this failure in a signal handler.
(void)(*Assert(g_shutdown))();
}

static void HandleSIGHUP(int)
Expand All @@ -392,7 +406,10 @@ static void HandleSIGHUP(int)
#else
static BOOL WINAPI consoleCtrlHandler(DWORD dwCtrlType)
{
StartShutdown();
if (!(*Assert(g_shutdown))()) {
LogPrintf("Error: failed to send shutdown signal on Ctrl-C\n");
return false;
}
Sleep(INFINITE);
return true;
}
Expand Down Expand Up @@ -690,8 +707,9 @@ static bool AppInitServers(NodeContext& node)
const ArgsManager& args = *Assert(node.args);
RPCServer::OnStarted(&OnRPCStarted);
RPCServer::OnStopped(&OnRPCStopped);
if (!InitHTTPServer())
if (!InitHTTPServer(*Assert(node.shutdown))) {
return false;
}
StartRPC();
node.rpc_interruption_point = RpcInterruptionPoint;
if (!StartHTTPRPC(&node))
Expand Down Expand Up @@ -1140,11 +1158,13 @@ bool AppInitMain(NodeContext& node, interfaces::BlockAndHeaderTipInfo* tip_info)
}, std::chrono::minutes{1});

// Check disk space every 5 minutes to avoid db corruption.
node.scheduler->scheduleEvery([&args]{
node.scheduler->scheduleEvery([&args, &node]{
constexpr uint64_t min_disk_space = 50 << 20; // 50 MB
if (!CheckDiskSpace(args.GetBlocksDirPath(), min_disk_space)) {
LogPrintf("Shutting down due to lack of disk space!\n");
StartShutdown();
if (!(*Assert(node.shutdown))()) {
LogPrintf("Error: failed to send shutdown signal after disk space check\n");
}
Comment on lines +1165 to +1167
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This interface of dealing with shutdown failures feels a bit awkward to me. It's verbose and quite difficult to parse. These shutdown failures aren't really related to the callsite, and it looks like mostly we just log and then ignore them anyway.

What do you think about adding failure callbacks to SignalInterrupt? It still allows the caller to individually handle shutdown failures when it makes sense (still returning bools), but in most cases the generic callback will probably suffice?

437a0c2...stickies-v:bitcoin:pr/28051-use-cbs

I think the first commit can be used ~as is, the second would probably have to be split across multiple existing commits. I think this PR could be an appropriate place to implement it, but otherwise (if you think the approach makes sense) I'm also happy to open a follow-up pull.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

re: #28051 (comment)

This interface of dealing with shutdown failures feels a bit awkward to me. It's verbose and quite difficult to parse. These shutdown failures aren't really related to the callsite, and it looks like mostly we just log and then ignore them anyway.

This is intentional. The problem is that we have low-level code scattered in the codebase that is able to send shutdown signals, and this is not a good design. Lower level code should just be returning errors and notifications, and high level code should be using that information to decide whether to shut down, not reacting to shutdowns that could be triggered anywhere. The fact that bad code sending shutdown signals is more verbose now does not seem a problem, since code like this shoudl be discouraged and cleaned up anyway. This PR takes a step in that direction by removing the global StartShutdown function and discouraging more code like this from being written in the future.

Another thing this PR is trying to do is add appropriate error handling when sending shutdown signals fails, instead of crashing or ignoring the failures. The right way to handle these errors depends on where the errors happen. If an error happens in the stop RPC, an error should be returned to the RPC caller. If the error happens in a windows ctrl-c handler, the handler should log the failure and return false. If the error happens in a unix signal handler, the only thing that can be done without triggering undefined behavior is to ignore it. The problem with the suggested use-cbs branch is that it doesn't treat different errors differently. I think also it complicates control flow without a good justification. It is true that [[nodiscard]] bool error handling is a little verbose, but it's also more straightforward than std::function callbacks. And the verbosity can go away if more code changed to send status information instead of shutdown signals, which is something that should happen anyway.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lower level code should just be returning errors and notifications, and high level code should be using that information to decide whether to shut down

That does seem like a better design, indeed. I agree then that having the bad approach be more verbose is not something to worry about, and my suggestion should be discarded. Thank you for providing the rationale, I think this might be useful to add to the PR description?

Copy link
Contributor Author

@ryanofsky ryanofsky Dec 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

re: #28051 (comment)

Thank you for providing the rationale, I think this might be useful to add to the PR description?

Definitely, and the PR description was pretty bad so I rewrote it. This is mentioned now at the end.

}
}, std::chrono::minutes{5});

Expand Down Expand Up @@ -1431,7 +1451,7 @@ bool AppInitMain(NodeContext& node, interfaces::BlockAndHeaderTipInfo* tip_info)

// ********************************************************* Step 7: load block chain

node.notifications = std::make_unique<KernelNotifications>(node.exit_status);
node.notifications = std::make_unique<KernelNotifications>(*Assert(node.shutdown), node.exit_status);
ReadNotificationArgs(args, *node.notifications);
fReindex = args.GetBoolArg("-reindex", false);
bool fReindexChainState = args.GetBoolArg("-reindex-chainstate", false);
Expand Down Expand Up @@ -1482,10 +1502,10 @@ bool AppInitMain(NodeContext& node, interfaces::BlockAndHeaderTipInfo* tip_info)
}
LogPrintf("* Using %.1f MiB for in-memory UTXO set (plus up to %.1f MiB of unused mempool space)\n", cache_sizes.coins * (1.0 / 1024 / 1024), mempool_opts.max_size_bytes * (1.0 / 1024 / 1024));

for (bool fLoaded = false; !fLoaded && !ShutdownRequested();) {
for (bool fLoaded = false; !fLoaded && !ShutdownRequested(node);) {
node.mempool = std::make_unique<CTxMemPool>(mempool_opts);

node.chainman = std::make_unique<ChainstateManager>(node.kernel->interrupt, chainman_opts, blockman_opts);
node.chainman = std::make_unique<ChainstateManager>(*Assert(node.shutdown), chainman_opts, blockman_opts);
ChainstateManager& chainman = *node.chainman;

// This is defined and set here instead of inline in validation.h to avoid a hard
Expand Down Expand Up @@ -1515,7 +1535,6 @@ bool AppInitMain(NodeContext& node, interfaces::BlockAndHeaderTipInfo* tip_info)
options.check_blocks = args.GetIntArg("-checkblocks", DEFAULT_CHECKBLOCKS);
options.check_level = args.GetIntArg("-checklevel", DEFAULT_CHECKLEVEL);
options.require_full_verification = args.IsArgSet("-checkblocks") || args.IsArgSet("-checklevel");
options.check_interrupt = ShutdownRequested;
options.coins_error_cb = [] {
uiInterface.ThreadSafeMessageBox(
_("Error reading from database, shutting down."),
Expand Down Expand Up @@ -1550,7 +1569,7 @@ bool AppInitMain(NodeContext& node, interfaces::BlockAndHeaderTipInfo* tip_info)
return InitError(error);
}

if (!fLoaded && !ShutdownRequested()) {
if (!fLoaded && !ShutdownRequested(node)) {
// first suggest a reindex
if (!options.reindex) {
bool fRet = uiInterface.ThreadSafeQuestion(
Expand All @@ -1559,7 +1578,9 @@ bool AppInitMain(NodeContext& node, interfaces::BlockAndHeaderTipInfo* tip_info)
"", CClientUIInterface::MSG_ERROR | CClientUIInterface::BTN_ABORT);
if (fRet) {
fReindex = true;
AbortShutdown();
if (!Assert(node.shutdown)->reset()) {
LogPrintf("Internal error: failed to reset shutdown signal.\n");
}
} else {
LogPrintf("Aborted block database rebuild. Exiting.\n");
return false;
Expand All @@ -1573,7 +1594,7 @@ bool AppInitMain(NodeContext& node, interfaces::BlockAndHeaderTipInfo* tip_info)
// As LoadBlockIndex can take several minutes, it's possible the user
// requested to kill the GUI during the last operation. If so, exit.
// As the program has not fully started yet, Shutdown() is possibly overkill.
if (ShutdownRequested()) {
if (ShutdownRequested(node)) {
LogPrintf("Shutdown requested. Exiting.\n");
return false;
}
Expand Down Expand Up @@ -1694,7 +1715,9 @@ bool AppInitMain(NodeContext& node, interfaces::BlockAndHeaderTipInfo* tip_info)
ImportBlocks(chainman, vImportFiles);
if (args.GetBoolArg("-stopafterblockimport", DEFAULT_STOPAFTERBLOCKIMPORT)) {
LogPrintf("Stopping after block import\n");
StartShutdown();
if (!(*Assert(node.shutdown))()) {
LogPrintf("Error: failed to send shutdown signal after finishing block import\n");
}
return;
}

Expand All @@ -1714,16 +1737,16 @@ bool AppInitMain(NodeContext& node, interfaces::BlockAndHeaderTipInfo* tip_info)
// Wait for genesis block to be processed
{
WAIT_LOCK(g_genesis_wait_mutex, lock);
// We previously could hang here if StartShutdown() is called prior to
// We previously could hang here if shutdown was requested prior to
// ImportBlocks getting started, so instead we just wait on a timer to
// check ShutdownRequested() regularly.
while (!fHaveGenesis && !ShutdownRequested()) {
while (!fHaveGenesis && !ShutdownRequested(node)) {
g_genesis_wait_cv.wait_for(lock, std::chrono::milliseconds(500));
}
block_notify_genesis_wait_connection.disconnect();
}

if (ShutdownRequested()) {
if (ShutdownRequested(node)) {
return false;
}

Expand Down
5 changes: 5 additions & 0 deletions src/init.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ namespace node {
struct NodeContext;
} // namespace node

/** Initialize node context shutdown and args variables. */
void InitContext(node::NodeContext& node);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit in the last commit: Shouldn't new node code be in the node namespace? Otherwise it will have to be touched again when it is moved into the node namespace later?

Copy link
Contributor Author

@ryanofsky ryanofsky Dec 13, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

re: #28051 (comment)

Nit in the last commit: Shouldn't new node code be in the node namespace? Otherwise it will have to be touched again when it is moved into the node namespace later?

This is true, but it is not different from other init.h methods. Logically init.h and init.cpp belong in node subdirectory and should use the node namespace, so I didn't want to make this one function stand out. The function also does need to be located in this file because it is accesses a static variable. So just keeping the Init functions in the same namespace seemed like the most straightforward approach for now.

/** Return whether node shutdown was requested. */
bool ShutdownRequested(node::NodeContext& node);

/** Interrupt threads */
void Interrupt(node::NodeContext& node);
void Shutdown(node::NodeContext& node);
Expand Down
4 changes: 2 additions & 2 deletions src/init/bitcoin-gui.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.

#include <common/args.h>
#include <init.h>
#include <interfaces/chain.h>
#include <interfaces/echo.h>
#include <interfaces/init.h>
Expand All @@ -23,7 +23,7 @@ class BitcoinGuiInit : public interfaces::Init
public:
BitcoinGuiInit(const char* arg0) : m_ipc(interfaces::MakeIpc(EXE_NAME, arg0, *this))
{
m_node.args = &gArgs;
InitContext(m_node);
m_node.init = this;
}
std::unique_ptr<interfaces::Node> makeNode() override { return interfaces::MakeNode(m_node); }
Expand Down
4 changes: 2 additions & 2 deletions src/init/bitcoin-node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.

#include <common/args.h>
#include <init.h>
#include <interfaces/chain.h>
#include <interfaces/echo.h>
#include <interfaces/init.h>
Expand All @@ -25,7 +25,7 @@ class BitcoinNodeInit : public interfaces::Init
: m_node(node),
m_ipc(interfaces::MakeIpc(EXE_NAME, arg0, *this))
{
m_node.args = &gArgs;
InitContext(m_node);
m_node.init = this;
}
std::unique_ptr<interfaces::Node> makeNode() override { return interfaces::MakeNode(m_node); }
Expand Down
Loading