From b0a53d50d9142bed51a8372eeb848816bfa94da8 Mon Sep 17 00:00:00 2001 From: Patrick Strateman Date: Sun, 14 Jun 2020 20:14:34 -0400 Subject: [PATCH 1/4] Make sanity check in GCSFilter constructor optional BlockFilterIndex will perform the cheaper check of verifying the filter hash when reading the filter from disk. --- src/blockfilter.cpp | 8 +++++--- src/blockfilter.h | 6 +++--- src/index/blockfilterindex.cpp | 13 +++++++++---- src/index/blockfilterindex.h | 2 +- 4 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/blockfilter.cpp b/src/blockfilter.cpp index 63a9ba498f522..1ad687214358a 100644 --- a/src/blockfilter.cpp +++ b/src/blockfilter.cpp @@ -47,7 +47,7 @@ GCSFilter::GCSFilter(const Params& params) : m_params(params), m_N(0), m_F(0), m_encoded{0} {} -GCSFilter::GCSFilter(const Params& params, std::vector encoded_filter) +GCSFilter::GCSFilter(const Params& params, std::vector encoded_filter, bool skip_decode_check) : m_params(params), m_encoded(std::move(encoded_filter)) { SpanReader stream{GCS_SER_TYPE, GCS_SER_VERSION, m_encoded}; @@ -59,6 +59,8 @@ GCSFilter::GCSFilter(const Params& params, std::vector encoded_fi } m_F = static_cast(m_N) * static_cast(m_params.m_M); + if (skip_decode_check) return; + // Verify that the encoded filter contains exactly N elements. If it has too much or too little // data, a std::ios_base::failure exception will be raised. BitStreamReader bitreader{stream}; @@ -219,14 +221,14 @@ static GCSFilter::ElementSet BasicFilterElements(const CBlock& block, } BlockFilter::BlockFilter(BlockFilterType filter_type, const uint256& block_hash, - std::vector filter) + std::vector filter, bool skip_decode_check) : m_filter_type(filter_type), m_block_hash(block_hash) { GCSFilter::Params params; if (!BuildParams(params)) { throw std::invalid_argument("unknown filter_type"); } - m_filter = GCSFilter(params, std::move(filter)); + m_filter = GCSFilter(params, std::move(filter), skip_decode_check); } BlockFilter::BlockFilter(BlockFilterType filter_type, const CBlock& block, const CBlockUndo& block_undo) diff --git a/src/blockfilter.h b/src/blockfilter.h index 96cefbf3b2f9f..d6a51e95c24e2 100644 --- a/src/blockfilter.h +++ b/src/blockfilter.h @@ -59,7 +59,7 @@ class GCSFilter explicit GCSFilter(const Params& params = Params()); /** Reconstructs an already-created filter from an encoding. */ - GCSFilter(const Params& params, std::vector encoded_filter); + GCSFilter(const Params& params, std::vector encoded_filter, bool skip_decode_check); /** Builds a new filter from the params and set of elements. */ GCSFilter(const Params& params, const ElementSet& elements); @@ -122,7 +122,7 @@ class BlockFilter //! Reconstruct a BlockFilter from parts. BlockFilter(BlockFilterType filter_type, const uint256& block_hash, - std::vector filter); + std::vector filter, bool skip_decode_check); //! Construct a new BlockFilter of the specified type from a block. BlockFilter(BlockFilterType filter_type, const CBlock& block, const CBlockUndo& block_undo); @@ -164,7 +164,7 @@ class BlockFilter if (!BuildParams(params)) { throw std::ios_base::failure("unknown filter_type"); } - m_filter = GCSFilter(params, std::move(encoded_filter)); + m_filter = GCSFilter(params, std::move(encoded_filter), /*skip_decode_check=*/false); } }; diff --git a/src/index/blockfilterindex.cpp b/src/index/blockfilterindex.cpp index 4f99eddfd77af..a8e1860481249 100644 --- a/src/index/blockfilterindex.cpp +++ b/src/index/blockfilterindex.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -143,18 +144,22 @@ bool BlockFilterIndex::CommitInternal(CDBBatch& batch) return BaseIndex::CommitInternal(batch); } -bool BlockFilterIndex::ReadFilterFromDisk(const FlatFilePos& pos, BlockFilter& filter) const +bool BlockFilterIndex::ReadFilterFromDisk(const FlatFilePos& pos, const uint256& hash, BlockFilter& filter) const { CAutoFile filein(m_filter_fileseq->Open(pos, true), SER_DISK, CLIENT_VERSION); if (filein.IsNull()) { return false; } + // Check that the hash of the encoded_filter matches the one stored in the db. uint256 block_hash; std::vector encoded_filter; try { filein >> block_hash >> encoded_filter; - filter = BlockFilter(GetFilterType(), block_hash, std::move(encoded_filter)); + uint256 result; + CHash256().Write(encoded_filter).Finalize(result); + if (result != hash) return error("Checksum mismatch in filter decode."); + filter = BlockFilter(GetFilterType(), block_hash, std::move(encoded_filter), /*skip_decode_check=*/true); } catch (const std::exception& e) { return error("%s: Failed to deserialize block filter from disk: %s", __func__, e.what()); @@ -381,7 +386,7 @@ bool BlockFilterIndex::LookupFilter(const CBlockIndex* block_index, BlockFilter& return false; } - return ReadFilterFromDisk(entry.pos, filter_out); + return ReadFilterFromDisk(entry.pos, entry.hash, filter_out); } bool BlockFilterIndex::LookupFilterHeader(const CBlockIndex* block_index, uint256& header_out) @@ -425,7 +430,7 @@ bool BlockFilterIndex::LookupFilterRange(int start_height, const CBlockIndex* st filters_out.resize(entries.size()); auto filter_pos_it = filters_out.begin(); for (const auto& entry : entries) { - if (!ReadFilterFromDisk(entry.pos, *filter_pos_it)) { + if (!ReadFilterFromDisk(entry.pos, entry.hash, *filter_pos_it)) { return false; } ++filter_pos_it; diff --git a/src/index/blockfilterindex.h b/src/index/blockfilterindex.h index a049019c020fe..09c3c78c6310c 100644 --- a/src/index/blockfilterindex.h +++ b/src/index/blockfilterindex.h @@ -31,7 +31,7 @@ class BlockFilterIndex final : public BaseIndex FlatFilePos m_next_filter_pos; std::unique_ptr m_filter_fileseq; - bool ReadFilterFromDisk(const FlatFilePos& pos, BlockFilter& filter) const; + bool ReadFilterFromDisk(const FlatFilePos& pos, const uint256& hash, BlockFilter& filter) const; size_t WriteFilterToDisk(FlatFilePos& pos, const BlockFilter& filter); Mutex m_cs_headers_cache; From 299023c1d9962628d158fac0306f8531506a0123 Mon Sep 17 00:00:00 2001 From: Patrick Strateman Date: Mon, 15 Jun 2020 12:30:22 -0400 Subject: [PATCH 2/4] Add GCSFilterDecode and GCSBlockFilterGetHash benchmarks. All of the benchmarks are standardized on the BASIC filter parameters so we can compare between all the benchmarks. All the GCS benchmarks are renamed to have "GCS" as the prefix. --- src/bench/gcs_filter.cpp | 57 +++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 15 deletions(-) diff --git a/src/bench/gcs_filter.cpp b/src/bench/gcs_filter.cpp index 607e4392b7a1a..c96e9b7f31162 100644 --- a/src/bench/gcs_filter.cpp +++ b/src/bench/gcs_filter.cpp @@ -5,7 +5,7 @@ #include #include -static void ConstructGCSFilter(benchmark::Bench& bench) +static const GCSFilter::ElementSet GenerateGCSTestElements() { GCSFilter::ElementSet elements; for (int i = 0; i < 10000; ++i) { @@ -15,29 +15,56 @@ static void ConstructGCSFilter(benchmark::Bench& bench) elements.insert(std::move(element)); } + return elements; +} + +static void GCSBlockFilterGetHash(benchmark::Bench& bench) +{ + auto elements = GenerateGCSTestElements(); + + GCSFilter filter({0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, elements); + BlockFilter block_filter(BlockFilterType::BASIC, {}, filter.GetEncoded(), /*skip_decode_check=*/false); + + bench.run([&] { + block_filter.GetHash(); + }); +} + +static void GCSFilterConstruct(benchmark::Bench& bench) +{ + auto elements = GenerateGCSTestElements(); + uint64_t siphash_k0 = 0; bench.batch(elements.size()).unit("elem").run([&] { - GCSFilter filter({siphash_k0, 0, 20, 1 << 20}, elements); + GCSFilter filter({siphash_k0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, elements); siphash_k0++; }); } -static void MatchGCSFilter(benchmark::Bench& bench) +static void GCSFilterDecode(benchmark::Bench& bench) { - GCSFilter::ElementSet elements; - for (int i = 0; i < 10000; ++i) { - GCSFilter::Element element(32); - element[0] = static_cast(i); - element[1] = static_cast(i >> 8); - elements.insert(std::move(element)); - } - GCSFilter filter({0, 0, 20, 1 << 20}, elements); + auto elements = GenerateGCSTestElements(); - bench.unit("elem").run([&] { - filter.Match(GCSFilter::Element()); + GCSFilter filter({0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, elements); + auto encoded = filter.GetEncoded(); + + bench.run([&] { + GCSFilter filter({0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, encoded, /*skip_decode_check=*/false); }); } -BENCHMARK(ConstructGCSFilter); -BENCHMARK(MatchGCSFilter); +static void GCSFilterMatch(benchmark::Bench& bench) +{ + auto elements = GenerateGCSTestElements(); + + GCSFilter filter({0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, elements); + + bench.run([&] { + filter.Match(GCSFilter::Element()); + }); +} +BENCHMARK(GCSBlockFilterGetHash); +BENCHMARK(GCSFilterConstruct); +BENCHMARK(GCSFilterDecode); +BENCHMARK(GCSFilterMatch); From aee9a8140b3a58b744766f9e89572f1d953a808b Mon Sep 17 00:00:00 2001 From: Patrick Strateman Date: Mon, 15 Jun 2020 19:41:18 -0400 Subject: [PATCH 3/4] Add GCSFilterDecodeSkipCheck benchmark This benchmark allows us to compare the differences between doing the sanity check for corruption via GolombRiceDecode() vs checking the hash of the encoded block filter. --- src/bench/gcs_filter.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/bench/gcs_filter.cpp b/src/bench/gcs_filter.cpp index c96e9b7f31162..0b43652453035 100644 --- a/src/bench/gcs_filter.cpp +++ b/src/bench/gcs_filter.cpp @@ -54,6 +54,18 @@ static void GCSFilterDecode(benchmark::Bench& bench) }); } +static void GCSFilterDecodeSkipCheck(benchmark::Bench& bench) +{ + auto elements = GenerateGCSTestElements(); + + GCSFilter filter({0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, elements); + auto encoded = filter.GetEncoded(); + + bench.run([&] { + GCSFilter filter({0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, encoded, /*skip_decode_check=*/true); + }); +} + static void GCSFilterMatch(benchmark::Bench& bench) { auto elements = GenerateGCSTestElements(); @@ -67,4 +79,5 @@ static void GCSFilterMatch(benchmark::Bench& bench) BENCHMARK(GCSBlockFilterGetHash); BENCHMARK(GCSFilterConstruct); BENCHMARK(GCSFilterDecode); +BENCHMARK(GCSFilterDecodeSkipCheck); BENCHMARK(GCSFilterMatch); From e734228d8585c0870c71ce8ba8c037f8cf8b249a Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Sun, 22 May 2022 14:17:15 +0900 Subject: [PATCH 4/4] Update GCSFilter benchmarks Element count used in the GCSFilter benchmarks are increased to 100,000 from 10,000. Testing the benchmarks with different element counts showed that a filter with 100,000 elements resulted in the same ns/op. This this a desirable thing to have as it allows us to reason about how long a single filter element takes to process, letting us easily calculate how long a filter with N elements (where N > 100,000) would take to process. GCSFilterConstruct benchmark is now called without batch. This makes intra-bench results more intuitive as all benchmarks are in ns/op instead of a custom unit. There are no downsides to this change as testing showed that there is no observable difference in error rates in the benchmarks when calling without batch. --- src/bench/gcs_filter.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/bench/gcs_filter.cpp b/src/bench/gcs_filter.cpp index 0b43652453035..80babb213b3e0 100644 --- a/src/bench/gcs_filter.cpp +++ b/src/bench/gcs_filter.cpp @@ -8,7 +8,12 @@ static const GCSFilter::ElementSet GenerateGCSTestElements() { GCSFilter::ElementSet elements; - for (int i = 0; i < 10000; ++i) { + + // Testing the benchmarks with different number of elements show that a filter + // with at least 100,000 elements results in benchmarks that have the same + // ns/op. This makes it easy to reason about how long (in nanoseconds) a single + // filter element takes to process. + for (int i = 0; i < 100000; ++i) { GCSFilter::Element element(32); element[0] = static_cast(i); element[1] = static_cast(i >> 8); @@ -35,7 +40,7 @@ static void GCSFilterConstruct(benchmark::Bench& bench) auto elements = GenerateGCSTestElements(); uint64_t siphash_k0 = 0; - bench.batch(elements.size()).unit("elem").run([&] { + bench.run([&]{ GCSFilter filter({siphash_k0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, elements); siphash_k0++;