Skip to content

Commit 66b8d3c

Browse files
committed
reindex, log, test: fixes #21379
This fixes a blk file size calculation made during reindex that results in increased blk file malformedness. The fix is to avoid double counting the size of the serialization header during reindex. This adds a unit test to reproduce the bug before the fix and to ensure that it does not recur. These changes include a log message change also so as to not be as alarming. This is a common and recoverable data corruption. These messages can now be filtered by the debug log reindex category.
1 parent e14f0fa commit 66b8d3c

File tree

4 files changed

+66
-6
lines changed

4 files changed

+66
-6
lines changed

src/Makefile.test.include

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ BITCOIN_TESTS =\
7878
test/blockencodings_tests.cpp \
7979
test/blockfilter_index_tests.cpp \
8080
test/blockfilter_tests.cpp \
81+
test/blockmanager_tests.cpp \
8182
test/bloom_tests.cpp \
8283
test/bswap_tests.cpp \
8384
test/checkqueue_tests.cpp \

src/node/blockstorage.cpp

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -785,19 +785,27 @@ bool ReadRawBlockFromDisk(std::vector<uint8_t>& block, const FlatFilePos& pos, c
785785
return true;
786786
}
787787

788-
/** Store block on disk. If dbp is non-nullptr, the file is known to already reside on disk */
789788
FlatFilePos BlockManager::SaveBlockToDisk(const CBlock& block, int nHeight, CChain& active_chain, const CChainParams& chainparams, const FlatFilePos* dbp)
790789
{
791790
unsigned int nBlockSize = ::GetSerializeSize(block, CLIENT_VERSION);
792791
FlatFilePos blockPos;
793-
if (dbp != nullptr) {
792+
const auto position_known {dbp != nullptr}; // if dbp is not nullptr, then the file and block within it are known to already reside on disk
793+
if (position_known) {
794794
blockPos = *dbp;
795-
}
796-
if (!FindBlockPos(blockPos, nBlockSize + 8, nHeight, active_chain, block.GetBlockTime(), dbp != nullptr)) {
795+
} else {
796+
// when known, blockPos.nPos points at the offset of the block data in the blk file. that already accounts for
797+
// the serialization header present in the file (the 4 magic message start bytes + the 4 length bytes = 8 bytes).
798+
// we add 8 only for new blocks since they will have the serialization header added when written to disk.
799+
// this avoids over accounting for the serialization header on existing blocks. such over accounting was a long
800+
// standing bug that added undesirable 8 byte gaps into blk data files following a -reindex operation.
801+
// for more info, see https://github.com/bitcoin/bitcoin/issues/21379
802+
nBlockSize += 8U;
803+
}
804+
if (!FindBlockPos(blockPos, nBlockSize, nHeight, active_chain, block.GetBlockTime(), position_known)) {
797805
error("%s: FindBlockPos failed", __func__);
798806
return FlatFilePos();
799807
}
800-
if (dbp == nullptr) {
808+
if (!position_known) {
801809
if (!WriteBlockToDisk(block, blockPos, chainparams.MessageStart())) {
802810
AbortNode("Failed to write block");
803811
return FlatFilePos();

src/test/blockmanager_tests.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
// Copyright (c) 2022 The Bitcoin Core developers
2+
// Distributed under the MIT software license, see the accompanying
3+
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
4+
5+
#include <chainparams.h>
6+
#include <node/blockstorage.h>
7+
#include <node/context.h>
8+
#include <validation.h>
9+
10+
#include <boost/test/unit_test.hpp>
11+
#include <test/util/setup_common.h>
12+
13+
using node::BlockManager;
14+
15+
// use BasicTestingSetup here for the data directory configuration, setup, and cleanup
16+
BOOST_FIXTURE_TEST_SUITE(blockmanager_tests, BasicTestingSetup)
17+
18+
BOOST_AUTO_TEST_CASE(blockmanager_find_block_pos)
19+
{
20+
const auto params {CreateChainParams(ArgsManager{}, CBaseChainParams::MAIN)};
21+
BlockManager blockman {};
22+
CChain chain {};
23+
// simulate adding a genesis block normally
24+
BOOST_CHECK_EQUAL(blockman.SaveBlockToDisk(params->GenesisBlock(), 0, chain, *params, nullptr).nPos, 8U);
25+
// simulate what happens during reindex
26+
// simulate a well-formed genesis block being found at offset 8 in the blk00000.dat file
27+
// the block is found at offset 8 because there is an 8 byte serialization header
28+
// consisting of 4 magic bytes + 4 length bytes before each block in a well-formed blk file.
29+
FlatFilePos pos{0, 8};
30+
BOOST_CHECK_EQUAL(blockman.SaveBlockToDisk(params->GenesisBlock(), 0, chain, *params, &pos).nPos, 8U);
31+
// now simulate what happens after reindex for the first new block processed
32+
// the actual block contents don't matter, just that it's a block.
33+
// verify that the write position is at offset 0x12d.
34+
// this is a check to make sure that https://github.com/bitcoin/bitcoin/issues/21379 does not recur
35+
// 8 bytes (for serialization header) + 285 (for serialized genesis block) = 293
36+
// add another 8 bytes for the second block's serialization header and we get 293 + 8 = 301
37+
BOOST_CHECK_EQUAL(blockman.SaveBlockToDisk(params->GenesisBlock(), 1, chain, *params, nullptr).nPos, 301U);
38+
}
39+
40+
BOOST_AUTO_TEST_SUITE_END()

src/validation.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4356,7 +4356,18 @@ void CChainState::LoadExternalBlockFile(FILE* fileIn, FlatFilePos* dbp)
43564356
}
43574357
}
43584358
} catch (const std::exception& e) {
4359-
LogPrintf("%s: Deserialize or I/O error - %s\n", __func__, e.what());
4359+
// historical bugs added extra data to the block files that does not deserialize cleanly.
4360+
// commonly this data is between readable blocks, but it does not really matter. such data is not fatal to the import process.
4361+
// the code that reads the block files deals with invalid data by simply ignoring it.
4362+
// it continues to search for the next {4 byte magic message start bytes + 4 byte length + block} that does deserialize cleanly
4363+
// and passes all of the other block validation checks dealing with POW and the merkle root, etc...
4364+
// we merely note with this informational log message when unexpected data is encountered.
4365+
// we could also be experiencing a storage system read error, or a read of a previous bad write. these are possible, but
4366+
// less likely scenarios. we don't have enough information to tell a difference here.
4367+
// the reindex process is not the place to attempt to clean and/or compact the block files. if so desired, a studious node operator
4368+
// may use knowledge of the fact that the block files are not entirely pristine in order to prepare a set of pristine, and
4369+
// perhaps ordered, block files for later reindexing.
4370+
LogPrint(BCLog::REINDEX, "%s: unexpected data at file offset 0x%x - %s. continuing\n", __func__, (nRewind - 1), e.what());
43604371
}
43614372
}
43624373
} catch (const std::runtime_error& e) {

0 commit comments

Comments
 (0)