Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

index: Verify the block filter hash when reading the filter from disk. #24832

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 62 additions & 17 deletions src/bench/gcs_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,39 +5,84 @@
#include <bench/bench.h>
#include <blockfilter.h>

static void ConstructGCSFilter(benchmark::Bench& bench)
static const GCSFilter::ElementSet GenerateGCSTestElements()
{
GCSFilter::ElementSet elements;
for (int i = 0; i < 10000; ++i) {

// Testing the benchmarks with different number of elements show that a filter
// with at least 100,000 elements results in benchmarks that have the same
// ns/op. This makes it easy to reason about how long (in nanoseconds) a single
// filter element takes to process.
for (int i = 0; i < 100000; ++i) {
GCSFilter::Element element(32);
element[0] = static_cast<unsigned char>(i);
element[1] = static_cast<unsigned char>(i >> 8);
elements.insert(std::move(element));
}

return elements;
}

static void GCSBlockFilterGetHash(benchmark::Bench& bench)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could name it GCSFilterGetHash instead of GCSBlockFilterGetHash for consistency with the other benchmarks.

{
auto elements = GenerateGCSTestElements();

GCSFilter filter({0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, elements);
BlockFilter block_filter(BlockFilterType::BASIC, {}, filter.GetEncoded(), /*skip_decode_check=*/false);

bench.run([&] {
block_filter.GetHash();
});
}

static void GCSFilterConstruct(benchmark::Bench& bench)
{
auto elements = GenerateGCSTestElements();

uint64_t siphash_k0 = 0;
bench.batch(elements.size()).unit("elem").run([&] {
GCSFilter filter({siphash_k0, 0, 20, 1 << 20}, elements);
bench.run([&]{
GCSFilter filter({siphash_k0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, elements);

siphash_k0++;
});
}

static void MatchGCSFilter(benchmark::Bench& bench)
static void GCSFilterDecode(benchmark::Bench& bench)
{
GCSFilter::ElementSet elements;
for (int i = 0; i < 10000; ++i) {
GCSFilter::Element element(32);
element[0] = static_cast<unsigned char>(i);
element[1] = static_cast<unsigned char>(i >> 8);
elements.insert(std::move(element));
}
GCSFilter filter({0, 0, 20, 1 << 20}, elements);
auto elements = GenerateGCSTestElements();

bench.unit("elem").run([&] {
filter.Match(GCSFilter::Element());
GCSFilter filter({0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, elements);
auto encoded = filter.GetEncoded();

bench.run([&] {
GCSFilter filter({0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, encoded, /*skip_decode_check=*/false);
});
}

BENCHMARK(ConstructGCSFilter);
BENCHMARK(MatchGCSFilter);
static void GCSFilterDecodeSkipCheck(benchmark::Bench& bench)
{
auto elements = GenerateGCSTestElements();

GCSFilter filter({0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, elements);
auto encoded = filter.GetEncoded();

bench.run([&] {
GCSFilter filter({0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, encoded, /*skip_decode_check=*/true);
});
}

static void GCSFilterMatch(benchmark::Bench& bench)
{
auto elements = GenerateGCSTestElements();

GCSFilter filter({0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, elements);

bench.run([&] {
filter.Match(GCSFilter::Element());
});
}
BENCHMARK(GCSBlockFilterGetHash);
BENCHMARK(GCSFilterConstruct);
BENCHMARK(GCSFilterDecode);
BENCHMARK(GCSFilterDecodeSkipCheck);
BENCHMARK(GCSFilterMatch);
8 changes: 5 additions & 3 deletions src/blockfilter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ GCSFilter::GCSFilter(const Params& params)
: m_params(params), m_N(0), m_F(0), m_encoded{0}
{}

GCSFilter::GCSFilter(const Params& params, std::vector<unsigned char> encoded_filter)
GCSFilter::GCSFilter(const Params& params, std::vector<unsigned char> encoded_filter, bool skip_decode_check)
: m_params(params), m_encoded(std::move(encoded_filter))
{
SpanReader stream{GCS_SER_TYPE, GCS_SER_VERSION, m_encoded};
Expand All @@ -59,6 +59,8 @@ GCSFilter::GCSFilter(const Params& params, std::vector<unsigned char> encoded_fi
}
m_F = static_cast<uint64_t>(m_N) * static_cast<uint64_t>(m_params.m_M);

if (skip_decode_check) return;

// Verify that the encoded filter contains exactly N elements. If it has too much or too little
// data, a std::ios_base::failure exception will be raised.
BitStreamReader<SpanReader> bitreader{stream};
Expand Down Expand Up @@ -219,14 +221,14 @@ static GCSFilter::ElementSet BasicFilterElements(const CBlock& block,
}

BlockFilter::BlockFilter(BlockFilterType filter_type, const uint256& block_hash,
std::vector<unsigned char> filter)
std::vector<unsigned char> filter, bool skip_decode_check)
: m_filter_type(filter_type), m_block_hash(block_hash)
{
GCSFilter::Params params;
if (!BuildParams(params)) {
throw std::invalid_argument("unknown filter_type");
}
m_filter = GCSFilter(params, std::move(filter));
m_filter = GCSFilter(params, std::move(filter), skip_decode_check);
}

BlockFilter::BlockFilter(BlockFilterType filter_type, const CBlock& block, const CBlockUndo& block_undo)
Expand Down
6 changes: 3 additions & 3 deletions src/blockfilter.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class GCSFilter
explicit GCSFilter(const Params& params = Params());

/** Reconstructs an already-created filter from an encoding. */
GCSFilter(const Params& params, std::vector<unsigned char> encoded_filter);
GCSFilter(const Params& params, std::vector<unsigned char> encoded_filter, bool skip_decode_check);

/** Builds a new filter from the params and set of elements. */
GCSFilter(const Params& params, const ElementSet& elements);
Expand Down Expand Up @@ -122,7 +122,7 @@ class BlockFilter

//! Reconstruct a BlockFilter from parts.
BlockFilter(BlockFilterType filter_type, const uint256& block_hash,
std::vector<unsigned char> filter);
std::vector<unsigned char> filter, bool skip_decode_check);

//! Construct a new BlockFilter of the specified type from a block.
BlockFilter(BlockFilterType filter_type, const CBlock& block, const CBlockUndo& block_undo);
Expand Down Expand Up @@ -164,7 +164,7 @@ class BlockFilter
if (!BuildParams(params)) {
throw std::ios_base::failure("unknown filter_type");
}
m_filter = GCSFilter(params, std::move(encoded_filter));
m_filter = GCSFilter(params, std::move(encoded_filter), /*skip_decode_check=*/false);
}
};

Expand Down
13 changes: 9 additions & 4 deletions src/index/blockfilterindex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <map>

#include <dbwrapper.h>
#include <hash.h>
#include <index/blockfilterindex.h>
#include <node/blockstorage.h>
#include <util/system.h>
Expand Down Expand Up @@ -143,18 +144,22 @@ bool BlockFilterIndex::CommitInternal(CDBBatch& batch)
return BaseIndex::CommitInternal(batch);
}

bool BlockFilterIndex::ReadFilterFromDisk(const FlatFilePos& pos, BlockFilter& filter) const
bool BlockFilterIndex::ReadFilterFromDisk(const FlatFilePos& pos, const uint256& hash, BlockFilter& filter) const
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Since this function also has a block_hash variable, maybe filter_hash would be a clearer disambiguation?

Suggested change
bool BlockFilterIndex::ReadFilterFromDisk(const FlatFilePos& pos, const uint256& hash, BlockFilter& filter) const
bool BlockFilterIndex::ReadFilterFromDisk(const FlatFilePos& pos, const uint256& filter_hash, BlockFilter& filter) const

{
CAutoFile filein(m_filter_fileseq->Open(pos, true), SER_DISK, CLIENT_VERSION);
if (filein.IsNull()) {
return false;
}

// Check that the hash of the encoded_filter matches the one stored in the db.
uint256 block_hash;
std::vector<uint8_t> encoded_filter;
try {
filein >> block_hash >> encoded_filter;
filter = BlockFilter(GetFilterType(), block_hash, std::move(encoded_filter));
uint256 result;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

calculated_hash might be more intuitive?

Suggested change
uint256 result;
uint256 calculated_hash;

CHash256().Write(encoded_filter).Finalize(result);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think {} list initialization is generally preferred? Here, and in some other places in the PR.

Suggested change
CHash256().Write(encoded_filter).Finalize(result);
CHash256{}.Write(encoded_filter).Finalize(result);

if (result != hash) return error("Checksum mismatch in filter decode.");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be helpful to include pos in the error message? Also, not sure if "filter decode" is the most accurate description? What about e.g. "Checksum mismatch for filter loaded from <pos>"?

filter = BlockFilter(GetFilterType(), block_hash, std::move(encoded_filter), /*skip_decode_check=*/true);
}
Comment on lines +154 to 163
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would you consider moving the hash checking into a separate function for readability and maintainability?

catch (const std::exception& e) {
return error("%s: Failed to deserialize block filter from disk: %s", __func__, e.what());
Expand Down Expand Up @@ -381,7 +386,7 @@ bool BlockFilterIndex::LookupFilter(const CBlockIndex* block_index, BlockFilter&
return false;
}

return ReadFilterFromDisk(entry.pos, filter_out);
return ReadFilterFromDisk(entry.pos, entry.hash, filter_out);
}

bool BlockFilterIndex::LookupFilterHeader(const CBlockIndex* block_index, uint256& header_out)
Expand Down Expand Up @@ -425,7 +430,7 @@ bool BlockFilterIndex::LookupFilterRange(int start_height, const CBlockIndex* st
filters_out.resize(entries.size());
auto filter_pos_it = filters_out.begin();
for (const auto& entry : entries) {
if (!ReadFilterFromDisk(entry.pos, *filter_pos_it)) {
if (!ReadFilterFromDisk(entry.pos, entry.hash, *filter_pos_it)) {
return false;
}
++filter_pos_it;
Expand Down
2 changes: 1 addition & 1 deletion src/index/blockfilterindex.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class BlockFilterIndex final : public BaseIndex
FlatFilePos m_next_filter_pos;
std::unique_ptr<FlatFileSeq> m_filter_fileseq;

bool ReadFilterFromDisk(const FlatFilePos& pos, BlockFilter& filter) const;
bool ReadFilterFromDisk(const FlatFilePos& pos, const uint256& hash, BlockFilter& filter) const;
size_t WriteFilterToDisk(FlatFilePos& pos, const BlockFilter& filter);

Mutex m_cs_headers_cache;
Expand Down