Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 25 additions & 17 deletions src/crypto/muhash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ namespace {
using limb_t = Num3072::limb_t;
using double_limb_t = Num3072::double_limb_t;
constexpr int LIMB_SIZE = Num3072::LIMB_SIZE;
constexpr int LIMBS = Num3072::LIMBS;
/** 2^3072 - 1103717, the largest 3072-bit safe prime number, is used as the modulus. */
constexpr limb_t MAX_PRIME_DIFF = 1103717;

Expand Down Expand Up @@ -123,7 +122,7 @@ inline void square_n_mul(Num3072& in_out, const int sq, const Num3072& mul)

} // namespace

/** Indicates wether d is larger than the modulus. */
/** Indicates whether d is larger than the modulus. */
bool Num3072::IsOverflow() const
{
if (this->limbs[0] <= std::numeric_limits<limb_t>::max() - MAX_PRIME_DIFF) return false;
Expand Down Expand Up @@ -276,18 +275,33 @@ void Num3072::Divide(const Num3072& a)
if (this->IsOverflow()) this->FullReduce();
}

Num3072 MuHash3072::ToNum3072(Span<const unsigned char> in) {
Num3072 out{};
uint256 hashed_in = (CHashWriter(SER_DISK, 0) << in).GetSHA256();
unsigned char tmp[BYTE_SIZE];
ChaCha20(hashed_in.data(), hashed_in.size()).Keystream(tmp, BYTE_SIZE);
Num3072::Num3072(const unsigned char (&data)[BYTE_SIZE]) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In commit "refactor: Improve encapsulation between MuHash3072 and Num3072" (a1fccea)

Note in case it helps other reviewers: I found this commit easier to review rearranging the method order to ToNum3072 Num3072 ToBytes instead of Num3072 ToBytes ToNum3072 so diff was smaller with code moving around less.

for (int i = 0; i < LIMBS; ++i) {
if (sizeof(limb_t) == 4) {
this->limbs[i] = ReadLE32(data + 4 * i);
} else if (sizeof(limb_t) == 8) {
this->limbs[i] = ReadLE64(data + 8 * i);
}
}
}

void Num3072::ToBytes(unsigned char (&out)[BYTE_SIZE]) {
for (int i = 0; i < LIMBS; ++i) {
if (sizeof(limb_t) == 4) {
out.limbs[i] = ReadLE32(tmp + 4 * i);
WriteLE32(out + i * 4, this->limbs[i]);
} else if (sizeof(limb_t) == 8) {
out.limbs[i] = ReadLE64(tmp + 8 * i);
WriteLE64(out + i * 8, this->limbs[i]);
}
}
}

Num3072 MuHash3072::ToNum3072(Span<const unsigned char> in) {
unsigned char tmp[Num3072::BYTE_SIZE];

uint256 hashed_in = (CHashWriter(SER_DISK, 0) << in).GetSHA256();
ChaCha20(hashed_in.data(), hashed_in.size()).Keystream(tmp, Num3072::BYTE_SIZE);
Num3072 out{tmp};

return out;
}

Expand All @@ -301,14 +315,8 @@ void MuHash3072::Finalize(uint256& out) noexcept
m_numerator.Divide(m_denominator);
m_denominator.SetToOne(); // Needed to keep the MuHash object valid

unsigned char data[384];
for (int i = 0; i < LIMBS; ++i) {
if (sizeof(limb_t) == 4) {
WriteLE32(data + i * 4, m_numerator.limbs[i]);
} else if (sizeof(limb_t) == 8) {
WriteLE64(data + i * 8, m_numerator.limbs[i]);
}
}
unsigned char data[Num3072::BYTE_SIZE];
m_numerator.ToBytes(data);

out = (CHashWriter(SER_DISK, 0) << data).GetSHA256();
}
Expand Down
7 changes: 4 additions & 3 deletions src/crypto/muhash.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class Num3072
Num3072 GetInverse() const;

public:
static constexpr size_t BYTE_SIZE = 384;

#ifdef HAVE___INT128
typedef unsigned __int128 double_limb_t;
Expand All @@ -48,8 +49,10 @@ class Num3072
void Divide(const Num3072& a);
void SetToOne();
void Square();
void ToBytes(unsigned char (&out)[BYTE_SIZE]);

Num3072() { this->SetToOne(); };
Num3072(const unsigned char (&data)[BYTE_SIZE]);

SERIALIZE_METHODS(Num3072, obj)
{
Expand Down Expand Up @@ -78,7 +81,7 @@ class Num3072
* arbitrary subset of the update operations, allowing them to be
* efficiently combined later.
*
* Muhash does not support checking if an element is already part of the
* MuHash does not support checking if an element is already part of the
* set. That is why this class does not enforce the use of a set as the
* data it represents because there is no efficient way to do so.
* It is possible to add elements more than once and also to remove
Expand All @@ -91,8 +94,6 @@ class Num3072
class MuHash3072
{
private:
static constexpr size_t BYTE_SIZE = 384;

Num3072 m_numerator;
Num3072 m_denominator;

Expand Down
63 changes: 46 additions & 17 deletions src/node/coinstats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <node/coinstats.h>

#include <coins.h>
#include <crypto/muhash.h>
#include <hash.h>
#include <serialize.h>
#include <uint256.h>
Expand All @@ -24,31 +25,47 @@ static uint64_t GetBogoSize(const CScript& scriptPubKey)
scriptPubKey.size() /* scriptPubKey */;
}

static void ApplyStats(CCoinsStats& stats, CHashWriter& ss, const uint256& hash, const std::map<uint32_t, Coin>& outputs)
static void ApplyHash(CCoinsStats& stats, CHashWriter& ss, const uint256& hash, const std::map<uint32_t, Coin>& outputs, std::map<uint32_t, Coin>::const_iterator it)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In commit "refactor: Separate hash and stats calculation in coinstats" (2474645)

stats arguments in unused in all three overloads and could be dropped.

Also, passing iterators around like this seems unnecessarily elaborate. It makes sense to separate stats and hash computations, but is there a reason both computations need to share the a single for loop over the output map? I would think you could make all the functions separate like:

static void ApplyHash(CHashWriter& ss, const uint256& hash, const std::map<uint32_t, Coin>& outputs);
static void ApplyHash(MuHash3072& muhash, const uint256& hash, const std::map<uint32_t, Coin>& outputs);
static void ApplyHash(std::nullptr_t, const uint256& hash, const std::map<uint32_t, Coin>& outputs);
static void ApplyStats(CCoinsStats& stats, const uint256& hash, const std::map<uint32_t, Coin>& outputs);

{
assert(!outputs.empty());
ss << hash;
ss << VARINT(outputs.begin()->second.nHeight * 2 + outputs.begin()->second.fCoinBase ? 1u : 0u);
stats.nTransactions++;
for (const auto& output : outputs) {
ss << VARINT(output.first + 1);
ss << output.second.out.scriptPubKey;
ss << VARINT_MODE(output.second.out.nValue, VarIntMode::NONNEGATIVE_SIGNED);
stats.nTransactionOutputs++;
stats.nTotalAmount += output.second.out.nValue;
stats.nBogoSize += GetBogoSize(output.second.out.scriptPubKey);
if (it == outputs.begin()) {
ss << hash;
ss << VARINT(it->second.nHeight * 2 + it->second.fCoinBase ? 1u : 0u);
}

ss << VARINT(it->first + 1);
ss << it->second.out.scriptPubKey;
ss << VARINT_MODE(it->second.out.nValue, VarIntMode::NONNEGATIVE_SIGNED);

if (it == std::prev(outputs.end())) {
ss << VARINT(0u);
}
ss << VARINT(0u);
}

static void ApplyStats(CCoinsStats& stats, std::nullptr_t, const uint256& hash, const std::map<uint32_t, Coin>& outputs)
static void ApplyHash(CCoinsStats& stats, std::nullptr_t, const uint256& hash, const std::map<uint32_t, Coin>& outputs, std::map<uint32_t, Coin>::const_iterator it) {}

static void ApplyHash(CCoinsStats& stats, MuHash3072& muhash, const uint256& hash, const std::map<uint32_t, Coin>& outputs, std::map<uint32_t, Coin>::const_iterator it)
{
COutPoint outpoint = COutPoint(hash, it->first);
Coin coin = it->second;

CDataStream ss(SER_DISK, PROTOCOL_VERSION);
ss << outpoint;
ss << static_cast<uint32_t>(coin.nHeight * 2 + coin.fCoinBase);
ss << coin.out;
muhash.Insert(MakeUCharSpan(ss));
}

template <typename T>
static void ApplyStats(CCoinsStats& stats, T& hash_obj, const uint256& hash, const std::map<uint32_t, Coin>& outputs)
{
assert(!outputs.empty());
stats.nTransactions++;
for (const auto& output : outputs) {
for (auto it = outputs.begin(); it != outputs.end(); ++it) {
ApplyHash(stats, hash_obj, hash, outputs, it);

stats.nTransactionOutputs++;
stats.nTotalAmount += output.second.out.nValue;
stats.nBogoSize += GetBogoSize(output.second.out.scriptPubKey);
stats.nTotalAmount += it->second.out.nValue;
stats.nBogoSize += GetBogoSize(it->second.out.scriptPubKey);
}
}

Expand Down Expand Up @@ -104,6 +121,10 @@ bool GetUTXOStats(CCoinsView* view, CCoinsStats& stats, CoinStatsHashType hash_t
CHashWriter ss(SER_GETHASH, PROTOCOL_VERSION);
return GetUTXOStats(view, stats, ss, interruption_point);
}
case(CoinStatsHashType::MUHASH): {
MuHash3072 muhash;
return GetUTXOStats(view, stats, muhash, interruption_point);
}
case(CoinStatsHashType::NONE): {
return GetUTXOStats(view, stats, nullptr, interruption_point);
}
Expand All @@ -116,10 +137,18 @@ static void PrepareHash(CHashWriter& ss, const CCoinsStats& stats)
{
ss << stats.hashBlock;
}
// MuHash does not need the prepare step
static void PrepareHash(MuHash3072& muhash, CCoinsStats& stats) {}
static void PrepareHash(std::nullptr_t, CCoinsStats& stats) {}

static void FinalizeHash(CHashWriter& ss, CCoinsStats& stats)
{
stats.hashSerialized = ss.GetHash();
}
static void FinalizeHash(MuHash3072& muhash, CCoinsStats& stats)
{
uint256 out;
muhash.Finalize(out);
stats.hashSerialized = out;
}
static void FinalizeHash(std::nullptr_t, CCoinsStats& stats) {}
1 change: 1 addition & 0 deletions src/node/coinstats.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ class CCoinsView;

enum class CoinStatsHashType {
HASH_SERIALIZED,
MUHASH,
NONE,
};

Expand Down
23 changes: 20 additions & 3 deletions src/rpc/blockchain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1047,13 +1047,26 @@ static RPCHelpMan pruneblockchain()
};
}

CoinStatsHashType ParseHashType(const std::string& hash_type_input)
{
if (hash_type_input == "hash_serialized_2") {
return CoinStatsHashType::HASH_SERIALIZED;
} else if (hash_type_input == "muhash") {
return CoinStatsHashType::MUHASH;
} else if (hash_type_input == "none") {
return CoinStatsHashType::NONE;
} else {
throw JSONRPCError(RPC_INVALID_PARAMETER, strprintf("%s is not a valid hash_type", hash_type_input));
}
}

static RPCHelpMan gettxoutsetinfo()
{
return RPCHelpMan{"gettxoutsetinfo",
"\nReturns statistics about the unspent transaction output set.\n"
"Note this call may take some time.\n",
{
{"hash_type", RPCArg::Type::STR, /* default */ "hash_serialized_2", "Which UTXO set hash should be calculated. Options: 'hash_serialized_2' (the legacy algorithm), 'none'."},
{"hash_type", RPCArg::Type::STR, /* default */ "hash_serialized_2", "Which UTXO set hash should be calculated. Options: 'hash_serialized_2' (the legacy algorithm), 'muhash', 'none'."},
},
RPCResult{
RPCResult::Type::OBJ, "", "",
Expand All @@ -1063,7 +1076,8 @@ static RPCHelpMan gettxoutsetinfo()
{RPCResult::Type::NUM, "transactions", "The number of transactions with unspent outputs"},
{RPCResult::Type::NUM, "txouts", "The number of unspent transaction outputs"},
{RPCResult::Type::NUM, "bogosize", "A meaningless metric for UTXO set size"},
{RPCResult::Type::STR_HEX, "hash_serialized_2", "The serialized hash (only present if 'hash_serialized_2' hash_type is chosen)"},
{RPCResult::Type::STR_HEX, "hash_serialized_2", /* optional */ true, "The serialized hash (only present if 'hash_serialized_2' hash_type is chosen)"},
{RPCResult::Type::STR_HEX, "muhash", /* optional */ true, "The serialized hash (only present if 'muhash' hash_type is chosen)"},
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit, in these two lines, could replace the single quotes with double ones for consistency, e.g. s/'/\"/

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, will do if I have to retouch! Thanks for re-reviewing!

{RPCResult::Type::NUM, "disk_size", "The estimated size of the chainstate on disk"},
{RPCResult::Type::STR_AMOUNT, "total_amount", "The total amount"},
}},
Expand All @@ -1078,7 +1092,7 @@ static RPCHelpMan gettxoutsetinfo()
CCoinsStats stats;
::ChainstateActive().ForceFlushStateToDisk();

const CoinStatsHashType hash_type = ParseHashType(request.params[0], CoinStatsHashType::HASH_SERIALIZED);
const CoinStatsHashType hash_type{request.params[0].isNull() ? CoinStatsHashType::HASH_SERIALIZED : ParseHashType(request.params[0].get_str())};

CCoinsView* coins_view = WITH_LOCK(cs_main, return &ChainstateActive().CoinsDB());
NodeContext& node = EnsureNodeContext(request.context);
Expand All @@ -1091,6 +1105,9 @@ static RPCHelpMan gettxoutsetinfo()
if (hash_type == CoinStatsHashType::HASH_SERIALIZED) {
ret.pushKV("hash_serialized_2", stats.hashSerialized.GetHex());
}
if (hash_type == CoinStatsHashType::MUHASH) {
ret.pushKV("muhash", stats.hashSerialized.GetHex());
}
ret.pushKV("disk_size", stats.nDiskSize);
ret.pushKV("total_amount", ValueFromAmount(stats.nTotalAmount));
} else {
Expand Down
17 changes: 0 additions & 17 deletions src/rpc/util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,23 +113,6 @@ std::vector<unsigned char> ParseHexO(const UniValue& o, std::string strKey)
return ParseHexV(find_value(o, strKey), strKey);
}

CoinStatsHashType ParseHashType(const UniValue& param, const CoinStatsHashType default_type)
{
if (param.isNull()) {
return default_type;
} else {
std::string hash_type_input = param.get_str();

if (hash_type_input == "hash_serialized_2") {
return CoinStatsHashType::HASH_SERIALIZED;
} else if (hash_type_input == "none") {
return CoinStatsHashType::NONE;
} else {
throw JSONRPCError(RPC_INVALID_PARAMETER, strprintf("%d is not a valid hash_type", hash_type_input));
}
}
}

std::string HelpExampleCli(const std::string& methodname, const std::string& args)
{
return "> bitcoin-cli " + methodname + " " + args + "\n";
Expand Down
2 changes: 0 additions & 2 deletions src/rpc/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,6 @@ extern uint256 ParseHashO(const UniValue& o, std::string strKey);
extern std::vector<unsigned char> ParseHexV(const UniValue& v, std::string strName);
extern std::vector<unsigned char> ParseHexO(const UniValue& o, std::string strKey);

CoinStatsHashType ParseHashType(const UniValue& param, const CoinStatsHashType default_type);

extern CAmount AmountFromValue(const UniValue& value);
extern std::string HelpExampleCli(const std::string& methodname, const std::string& args);
extern std::string HelpExampleRpc(const std::string& methodname, const std::string& args);
Expand Down
86 changes: 86 additions & 0 deletions test/functional/feature_utxo_set_hash.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#!/usr/bin/env python3
# Copyright (c) 2020-2021 The Bitcoin Core developers
# Distributed under the MIT software license, see the accompanying
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
"""Test UTXO set hash value calculation in gettxoutsetinfo."""

import struct

from test_framework.blocktools import create_transaction
from test_framework.messages import (
CBlock,
COutPoint,
FromHex,
)
from test_framework.muhash import MuHash3072
from test_framework.test_framework import BitcoinTestFramework
from test_framework.util import assert_equal

class UTXOSetHashTest(BitcoinTestFramework):
def set_test_params(self):
self.num_nodes = 1
self.setup_clean_chain = True

def skip_test_if_missing_module(self):
self.skip_if_no_wallet()

def test_deterministic_hash_results(self):
self.log.info("Test deterministic UTXO set hash results")

# These depend on the setup_clean_chain option, the chain loaded from the cache
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there is no loaded chain and the utxo set is empty. Which is also useful to check, but doesn't check if the hash is deterministic?

Did you forget to load the chain from ./test/functional/data/rpc_getblockstats.json?

assert_equal(self.nodes[0].gettxoutsetinfo()['hash_serialized_2'], "b32ec1dda5a53cd025b95387aad344a801825fe46a60ff952ce26528f01d3be8")
assert_equal(self.nodes[0].gettxoutsetinfo("muhash")['muhash'], "dd5ad2a105c2d29495f577245c357409002329b9f4d6182c0af3dc2f462555c8")

def test_muhash_implementation(self):
self.log.info("Test MuHash implementation consistency")

node = self.nodes[0]

# Generate 100 blocks and remove the first since we plan to spend its
# coinbase
block_hashes = node.generate(100)
blocks = list(map(lambda block: FromHex(CBlock(), node.getblock(block, False)), block_hashes))
spending = blocks.pop(0)

# Create a spending transaction and mine a block which includes it
tx = create_transaction(node, spending.vtx[0].rehash(), node.getnewaddress(), amount=49)
txid = node.sendrawtransaction(hexstring=tx.serialize_with_witness().hex(), maxfeerate=0)

tx_block = node.generateblock(output=node.getnewaddress(), transactions=[txid])
blocks.append(FromHex(CBlock(), node.getblock(tx_block['hash'], False)))

# Serialize the outputs that should be in the UTXO set and add them to
# a MuHash object
muhash = MuHash3072()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would this test be a little better if instead of forming the utxo set at the end, you maintained a running sum and added as blocks were mined, and then added/subtracted when UTXOs were spent and created in tx1, tx2?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think so because we need to somehow keep track of what is really in the UTXO set and I think this would make test harder to follow if I did it this way. And I feel like this is more of the standard way of how we structure our tests.


for height, block in enumerate(blocks):
# The Genesis block coinbase is not part of the UTXO set and we
# spent the first mined block
height += 2

for tx in block.vtx:
for n, tx_out in enumerate(tx.vout):
coinbase = 1 if not tx.vin[0].prevout.hash else 0

# Skip witness commitment
if (coinbase and n > 0):
continue

data = COutPoint(int(tx.rehash(), 16), n).serialize()
data += struct.pack("<i", height * 2 + coinbase)
data += tx_out.serialize()

muhash.insert(data)

finalized = muhash.digest()
node_muhash = node.gettxoutsetinfo("muhash")['muhash']

assert_equal(finalized[::-1].hex(), node_muhash)

def run_test(self):
self.test_deterministic_hash_results()
self.test_muhash_implementation()


if __name__ == '__main__':
UTXOSetHashTest().main()
Loading