Skip to content

Commit 17b556a

Browse files
UdjinM6claude
andcommitted
feat: add evodb verify and repair RPC commands
Add two new RPC commands for verifying and repairing corrupted evodb diff records between snapshots stored every 576 blocks. evodb verify (read-only): - Verifies that applying stored diffs between snapshots produces correct results - Reports verification errors without modifying database - Defaults to full range from DIP0003 activation to chain tip evodb repair (writes to database): - First runs verification on all snapshot pairs - For failed pairs, recalculates diffs from actual blockchain data - Writes repaired diffs to database using efficient batching (16MB chunks) - Clears both diff and list caches to prevent serving stale data - Only commits repairs if recalculation verification passes Key implementation details: - Uses BuildNewListFromBlock overload with dummy coins view to avoid UTXO lookups - Breaks circular dependency by rebuilding from trusted snapshots, not corrupted diffs - Handles missing initial snapshot at DIP0003 (creates empty list) - Treats any other missing snapshot as critical error requiring reindex - Logs progress every 100 snapshot pairs to avoid spam - Separate error reporting for verification vs repair phases New public types: - CDeterministicMNManager::RecalcDiffsResult - result struct with detailed stats - Forward declarations for ChainstateManager and CSpecialTxProcessor Co-authored-by: Claude (Anthropic AI) <[email protected]>
1 parent 5379fce commit 17b556a

File tree

3 files changed

+555
-0
lines changed

3 files changed

+555
-0
lines changed

src/evo/deterministicmns.cpp

Lines changed: 344 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include <util/irange.h>
2626
#include <util/pointer.h>
2727

28+
#include <functional>
2829
#include <optional>
2930
#include <memory>
3031

@@ -1578,3 +1579,346 @@ bool CDeterministicMNManager::MigrateLegacyDiffs(const CBlockIndex* const tip_in
15781579

15791580
return true;
15801581
}
1582+
1583+
CDeterministicMNManager::RecalcDiffsResult CDeterministicMNManager::RecalculateAndRepairDiffs(
1584+
const CBlockIndex* start_index, const CBlockIndex* stop_index, ChainstateManager& chainman,
1585+
BuildListFromBlockFunc build_list_func, bool repair)
1586+
{
1587+
AssertLockHeld(::cs_main);
1588+
1589+
RecalcDiffsResult result;
1590+
result.start_height = start_index->nHeight;
1591+
result.stop_height = stop_index->nHeight;
1592+
1593+
const auto& consensus_params = Params().GetConsensus();
1594+
1595+
// Clamp start height to DIP0003 activation (no snapshots/diffs exist before this)
1596+
if (start_index->nHeight < consensus_params.DIP0003Height) {
1597+
start_index = stop_index->GetAncestor(consensus_params.DIP0003Height);
1598+
if (!start_index) {
1599+
result.verification_errors.push_back(strprintf("Stop height %d is below DIP0003 activation height %d",
1600+
stop_index->nHeight, consensus_params.DIP0003Height));
1601+
return result;
1602+
}
1603+
LogPrintf("CDeterministicMNManager::%s -- Clamped start height from %d to DIP0003 activation height %d\n",
1604+
__func__, result.start_height, consensus_params.DIP0003Height);
1605+
// Update result to reflect the clamped start height
1606+
result.start_height = start_index->nHeight;
1607+
}
1608+
1609+
// Collect all snapshot blocks in the range
1610+
std::vector<const CBlockIndex*> snapshot_blocks = CollectSnapshotBlocks(start_index, stop_index, consensus_params);
1611+
1612+
if (snapshot_blocks.empty()) {
1613+
result.verification_errors.push_back("Could not find starting snapshot");
1614+
return result;
1615+
}
1616+
1617+
if (snapshot_blocks.size() < 2) {
1618+
result.verification_errors.push_back(strprintf("Need at least 2 snapshots, found %d", snapshot_blocks.size()));
1619+
return result;
1620+
}
1621+
1622+
LogPrintf("CDeterministicMNManager::%s -- Processing %d snapshot pairs between heights %d and %d\n", __func__,
1623+
snapshot_blocks.size() - 1, result.start_height, result.stop_height);
1624+
1625+
// Storage for recalculated diffs if we plan to repair
1626+
std::vector<std::pair<uint256, CDeterministicMNListDiff>> recalculated_diffs;
1627+
1628+
// Process each pair of consecutive snapshots
1629+
for (size_t i = 0; i < snapshot_blocks.size() - 1; ++i) {
1630+
const CBlockIndex* from_index = snapshot_blocks[i];
1631+
const CBlockIndex* to_index = snapshot_blocks[i + 1];
1632+
1633+
// Load the snapshots from disk
1634+
CDeterministicMNList from_snapshot;
1635+
CDeterministicMNList to_snapshot;
1636+
1637+
bool has_from_snapshot;
1638+
bool has_to_snapshot;
1639+
{
1640+
LOCK(cs);
1641+
has_from_snapshot = m_evoDb.Read(std::make_pair(DB_LIST_SNAPSHOT, from_index->GetBlockHash()), from_snapshot);
1642+
has_to_snapshot = m_evoDb.Read(std::make_pair(DB_LIST_SNAPSHOT, to_index->GetBlockHash()), to_snapshot);
1643+
}
1644+
1645+
// Handle missing snapshots
1646+
if (!has_from_snapshot) {
1647+
// The initial snapshot at DIP0003 activation might not exist in the database on nodes
1648+
// that synced before the fix to explicitly write it. This is the only acceptable case.
1649+
if (from_index->nHeight == consensus_params.DIP0003Height) {
1650+
// Create an empty initial snapshot (matching what GetListForBlockInternal does)
1651+
from_snapshot = CDeterministicMNList(from_index->GetBlockHash(), from_index->nHeight, 0);
1652+
LogPrintf("CDeterministicMNManager::%s -- Using empty initial snapshot at DIP0003 height %d\n",
1653+
__func__, from_index->nHeight);
1654+
} else {
1655+
// Any other missing snapshot is critical corruption beyond our repair capability
1656+
result.verification_errors.push_back(strprintf("CRITICAL: Snapshot missing at height %d. "
1657+
"This cannot be repaired by this tool - full reindex required.", from_index->nHeight));
1658+
return result;
1659+
}
1660+
}
1661+
1662+
if (!has_to_snapshot) {
1663+
// Missing target snapshot is always critical - we cannot repair snapshots, only diffs
1664+
result.verification_errors.push_back(strprintf("CRITICAL: Snapshot missing at height %d. "
1665+
"This cannot be repaired by this tool - full reindex required.", to_index->nHeight));
1666+
return result;
1667+
}
1668+
1669+
// Verify this snapshot pair
1670+
bool is_snapshot_pair_valid;
1671+
{
1672+
LOCK(cs);
1673+
is_snapshot_pair_valid = VerifySnapshotPair(from_index, to_index, from_snapshot, to_snapshot, result, i, snapshot_blocks.size() - 1);
1674+
}
1675+
1676+
// If repair mode is enabled and verification failed, recalculate diffs from blockchain
1677+
if (repair && !is_snapshot_pair_valid) {
1678+
auto temp_diffs = RepairSnapshotPair(from_index, to_index, from_snapshot, to_snapshot, build_list_func, result);
1679+
if (temp_diffs.empty()) {
1680+
// RepairSnapshotPair failed - this is a critical error, cannot continue
1681+
return result;
1682+
}
1683+
// Only commit diffs if recalculation verification passed
1684+
recalculated_diffs.insert(recalculated_diffs.end(), temp_diffs.begin(), temp_diffs.end());
1685+
result.diffs_recalculated += temp_diffs.size();
1686+
}
1687+
}
1688+
1689+
// Write repaired diffs to database
1690+
if (repair) {
1691+
LOCK(cs);
1692+
WriteRepairedDiffs(recalculated_diffs, result);
1693+
}
1694+
1695+
return result;
1696+
}
1697+
1698+
std::vector<const CBlockIndex*> CDeterministicMNManager::CollectSnapshotBlocks(
1699+
const CBlockIndex* start_index, const CBlockIndex* stop_index, const Consensus::Params& consensus_params)
1700+
{
1701+
AssertLockHeld(::cs_main);
1702+
1703+
std::vector<const CBlockIndex*> snapshot_blocks;
1704+
1705+
// Add the starting snapshot (find the snapshot at or before start)
1706+
// Walk backwards to find a snapshot block (divisible by DISK_SNAPSHOT_PERIOD)
1707+
// or the initial snapshot at DIP0003 activation height
1708+
const CBlockIndex* snapshot_start_index = start_index;
1709+
while (snapshot_start_index && snapshot_start_index->nHeight > consensus_params.DIP0003Height &&
1710+
(snapshot_start_index->nHeight % DISK_SNAPSHOT_PERIOD) != 0) {
1711+
snapshot_start_index = snapshot_start_index->pprev;
1712+
}
1713+
1714+
if (!snapshot_start_index) {
1715+
return snapshot_blocks; // Empty vector indicates error
1716+
}
1717+
1718+
// Collect all snapshot blocks up to and including the stop block
1719+
snapshot_blocks.push_back(snapshot_start_index);
1720+
1721+
// Find all subsequent snapshot heights
1722+
int current_snapshot_height = snapshot_start_index->nHeight;
1723+
while (true) {
1724+
// Calculate next snapshot height
1725+
int next_snapshot_height;
1726+
if (current_snapshot_height == consensus_params.DIP0003Height) {
1727+
// If we're at DIP0003 activation (initial snapshot), next is at first regular interval
1728+
next_snapshot_height = ((consensus_params.DIP0003Height / DISK_SNAPSHOT_PERIOD) + 1) * DISK_SNAPSHOT_PERIOD;
1729+
} else {
1730+
// Otherwise, add DISK_SNAPSHOT_PERIOD
1731+
next_snapshot_height = current_snapshot_height + DISK_SNAPSHOT_PERIOD;
1732+
}
1733+
1734+
if (next_snapshot_height > stop_index->nHeight) {
1735+
break;
1736+
}
1737+
1738+
const CBlockIndex* next_snapshot_index = stop_index->GetAncestor(next_snapshot_height);
1739+
if (!next_snapshot_index) {
1740+
break;
1741+
}
1742+
1743+
snapshot_blocks.push_back(next_snapshot_index);
1744+
current_snapshot_height = next_snapshot_height;
1745+
}
1746+
1747+
return snapshot_blocks;
1748+
}
1749+
1750+
bool CDeterministicMNManager::VerifySnapshotPair(
1751+
const CBlockIndex* from_index, const CBlockIndex* to_index, const CDeterministicMNList& from_snapshot,
1752+
const CDeterministicMNList& to_snapshot, RecalcDiffsResult& result, size_t pair_index, size_t total_pairs)
1753+
{
1754+
AssertLockHeld(cs);
1755+
AssertLockHeld(::cs_main);
1756+
1757+
// Log progress periodically (every 100 snapshot pairs) to avoid spam
1758+
if (pair_index % 100 == 0) {
1759+
LogPrintf("CDeterministicMNManager::%s -- Progress: verifying snapshot pair %d/%d (heights %d-%d)\n",
1760+
__func__, pair_index + 1, total_pairs, from_index->nHeight, to_index->nHeight);
1761+
}
1762+
1763+
// Verify this snapshot pair by applying all stored diffs sequentially
1764+
CDeterministicMNList test_list = from_snapshot;
1765+
1766+
try {
1767+
for (int nHeight = from_index->nHeight + 1; nHeight <= to_index->nHeight; ++nHeight) {
1768+
const CBlockIndex* pIndex = to_index->GetAncestor(nHeight);
1769+
if (!pIndex) {
1770+
result.verification_errors.push_back(strprintf("Failed to get ancestor at height %d", nHeight));
1771+
return false;
1772+
}
1773+
1774+
CDeterministicMNListDiff diff;
1775+
if (!m_evoDb.Read(std::make_pair(DB_LIST_DIFF, pIndex->GetBlockHash()), diff)) {
1776+
result.verification_errors.push_back(strprintf("Failed to read diff at height %d", nHeight));
1777+
return false;
1778+
}
1779+
1780+
diff.nHeight = nHeight;
1781+
test_list.ApplyDiff(pIndex, diff);
1782+
}
1783+
} catch (const std::exception& e) {
1784+
result.verification_errors.push_back(strprintf("Exception during verification: %s", e.what()));
1785+
return false;
1786+
}
1787+
1788+
// Verify that applying all diffs results in the target snapshot
1789+
bool is_snapshot_pair_valid = test_list.IsEqual(to_snapshot);
1790+
1791+
if (is_snapshot_pair_valid) {
1792+
result.snapshots_verified++;
1793+
} else {
1794+
result.verification_errors.push_back(
1795+
strprintf("Verification failed between snapshots at heights %d and %d: "
1796+
"Applied diffs do not match target snapshot",
1797+
from_index->nHeight, to_index->nHeight));
1798+
}
1799+
1800+
return is_snapshot_pair_valid;
1801+
}
1802+
1803+
std::vector<std::pair<uint256, CDeterministicMNListDiff>> CDeterministicMNManager::RepairSnapshotPair(
1804+
const CBlockIndex* from_index, const CBlockIndex* to_index, const CDeterministicMNList& from_snapshot,
1805+
const CDeterministicMNList& to_snapshot, BuildListFromBlockFunc build_list_func, RecalcDiffsResult& result)
1806+
{
1807+
AssertLockHeld(::cs_main);
1808+
1809+
CDeterministicMNList current_list = from_snapshot;
1810+
// Temporary storage for recalculated diffs (one per block in this snapshot interval)
1811+
std::vector<std::pair<uint256, CDeterministicMNListDiff>> temp_diffs;
1812+
temp_diffs.reserve(to_index->nHeight - from_index->nHeight);
1813+
1814+
LogPrintf("CDeterministicMNManager::%s -- Repairing: recalculating diffs between snapshots at heights %d and %d\n",
1815+
__func__, from_index->nHeight, to_index->nHeight);
1816+
1817+
try {
1818+
for (int nHeight = from_index->nHeight + 1; nHeight <= to_index->nHeight; ++nHeight) {
1819+
const CBlockIndex* pIndex = to_index->GetAncestor(nHeight);
1820+
1821+
// Read the actual block from disk
1822+
CBlock block;
1823+
if (!node::ReadBlockFromDisk(block, pIndex, Params().GetConsensus())) {
1824+
result.repair_errors.push_back(strprintf("CRITICAL: Failed to read block at height %d. "
1825+
"Cannot repair - full reindex required.", nHeight));
1826+
return {}; // Critical error - cannot continue repair
1827+
}
1828+
1829+
// Use a dummy coins view to avoid UTXO lookups. At chain tip, coins from
1830+
// historical blocks may already be spent. Since these blocks were fully
1831+
// validated when originally connected, we don't need to re-verify coin
1832+
// availability - we only need to extract special transactions.
1833+
CCoinsView view_dummy;
1834+
CCoinsViewCache view(&view_dummy);
1835+
1836+
// Build the new list by processing this block's special transactions
1837+
// Starting from current_list (our trusted state), not from corrupted diffs
1838+
CDeterministicMNList next_list;
1839+
BlockValidationState state;
1840+
if (!build_list_func(block, pIndex->pprev, current_list, view, false, state, next_list)) {
1841+
result.repair_errors.push_back(
1842+
strprintf("CRITICAL: Failed to build list for block at height %d: %s. "
1843+
"Cannot repair - full reindex required.", nHeight, state.ToString()));
1844+
return {}; // Critical error - cannot continue repair
1845+
}
1846+
1847+
// Set the correct block hash
1848+
next_list.SetBlockHash(pIndex->GetBlockHash());
1849+
1850+
// Calculate the diff between current and next
1851+
CDeterministicMNListDiff recalc_diff = current_list.BuildDiff(next_list);
1852+
recalc_diff.nHeight = nHeight;
1853+
// Store in temporary vector for this snapshot pair
1854+
temp_diffs.emplace_back(pIndex->GetBlockHash(), recalc_diff);
1855+
1856+
// Move forward
1857+
current_list = std::move(next_list);
1858+
}
1859+
1860+
// Verify that applying all diffs results in the target snapshot
1861+
if (current_list.IsEqual(to_snapshot)) {
1862+
LogPrintf("CDeterministicMNManager::%s -- Successfully recalculated %d diffs between heights %d and %d\n",
1863+
__func__, temp_diffs.size(), from_index->nHeight, to_index->nHeight);
1864+
return temp_diffs; // Success - return recalculated diffs
1865+
} else {
1866+
result.repair_errors.push_back(
1867+
strprintf("CRITICAL: Recalculation failed between snapshots at heights %d and %d: "
1868+
"Applied diffs do not match target snapshot. Cannot repair - full reindex required.",
1869+
from_index->nHeight, to_index->nHeight));
1870+
return {}; // Failed verification - return empty vector
1871+
}
1872+
} catch (const std::exception& e) {
1873+
result.repair_errors.push_back(strprintf("CRITICAL: Exception during recalculation: %s. "
1874+
"Cannot repair - full reindex required.", e.what()));
1875+
return {}; // Exception - return empty vector
1876+
}
1877+
}
1878+
1879+
void CDeterministicMNManager::WriteRepairedDiffs(
1880+
const std::vector<std::pair<uint256, CDeterministicMNListDiff>>& recalculated_diffs, RecalcDiffsResult& result)
1881+
{
1882+
AssertLockHeld(cs);
1883+
1884+
if (recalculated_diffs.empty()) {
1885+
return;
1886+
}
1887+
1888+
CDBBatch batch(m_evoDb.GetRawDB());
1889+
const size_t BATCH_SIZE_THRESHOLD = 1 << 24; // 16MB
1890+
size_t diffs_written = 0;
1891+
1892+
LogPrintf("CDeterministicMNManager::%s -- Writing %d repaired diffs to database...\n",
1893+
__func__, recalculated_diffs.size());
1894+
1895+
for (const auto& [block_hash, diff] : recalculated_diffs) {
1896+
batch.Write(std::make_pair(DB_LIST_DIFF, block_hash), diff);
1897+
diffs_written++;
1898+
1899+
// Write batch when it gets too large
1900+
if (batch.SizeEstimate() >= BATCH_SIZE_THRESHOLD) {
1901+
LogPrintf("CDeterministicMNManager::%s -- Flushing batch (%d diffs written so far)...\n",
1902+
__func__, diffs_written);
1903+
m_evoDb.GetRawDB().WriteBatch(batch);
1904+
batch.Clear();
1905+
}
1906+
}
1907+
1908+
// Write any remaining diffs in the batch
1909+
if (batch.SizeEstimate() > 0) {
1910+
LogPrintf("CDeterministicMNManager::%s -- Writing final batch...\n", __func__);
1911+
m_evoDb.GetRawDB().WriteBatch(batch);
1912+
batch.Clear();
1913+
}
1914+
1915+
// Clear caches for repaired diffs so next read gets fresh data from disk
1916+
// Must clear both diff cache and list cache since lists were built from old diffs
1917+
for (const auto& [block_hash, diff] : recalculated_diffs) {
1918+
mnListDiffsCache.erase(block_hash);
1919+
mnListsCache.erase(block_hash);
1920+
}
1921+
1922+
LogPrintf("CDeterministicMNManager::%s -- Successfully repaired %d diffs (caches cleared)\n", __func__,
1923+
recalculated_diffs.size());
1924+
}

0 commit comments

Comments
 (0)