Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
crypto: Add MuHash3072 implementation
Browse files Browse the repository at this point in the history
Co-authored-by: Pieter Wuille <pieter.wuille@gmail.com>
fjahr and sipa committed Dec 21, 2020
1 parent 0b4d290 commit adc708c
Showing 3 changed files with 131 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/Makefile.am
Original file line number Diff line number Diff line change
@@ -406,6 +406,8 @@ crypto_libbitcoin_crypto_base_a_SOURCES = \
crypto/hmac_sha512.h \
crypto/poly1305.h \
crypto/poly1305.cpp \
crypto/muhash.h \
crypto/muhash.cpp \
crypto/ripemd160.cpp \
crypto/ripemd160.h \
crypto/sha1.cpp \
61 changes: 61 additions & 0 deletions src/crypto/muhash.cpp
Original file line number Diff line number Diff line change
@@ -275,3 +275,64 @@ void Num3072::Divide(const Num3072& a)
this->Multiply(inv);
if (this->IsOverflow()) this->FullReduce();
}

Num3072 MuHash3072::ToNum3072(Span<const unsigned char> in) {
Num3072 out{};
uint256 hashed_in = (CHashWriter(SER_DISK, 0) << in).GetSHA256();
unsigned char tmp[BYTE_SIZE];
ChaCha20(hashed_in.data(), hashed_in.size()).Keystream(tmp, BYTE_SIZE);
for (int i = 0; i < LIMBS; ++i) {
if (sizeof(limb_t) == 4) {
out.limbs[i] = ReadLE32(tmp + 4 * i);
} else if (sizeof(limb_t) == 8) {
out.limbs[i] = ReadLE64(tmp + 8 * i);
}
}
return out;
}

MuHash3072::MuHash3072(Span<const unsigned char> in) noexcept
{
m_numerator = ToNum3072(in);
}

void MuHash3072::Finalize(uint256& out) noexcept
{
m_numerator.Divide(m_denominator);
m_denominator.SetToOne(); // Needed to keep the MuHash object valid

unsigned char data[384];
for (int i = 0; i < LIMBS; ++i) {
if (sizeof(limb_t) == 4) {
WriteLE32(data + i * 4, m_numerator.limbs[i]);
} else if (sizeof(limb_t) == 8) {
WriteLE64(data + i * 8, m_numerator.limbs[i]);
}
}

out = (CHashWriter(SER_DISK, 0) << data).GetSHA256();
}

MuHash3072& MuHash3072::operator*=(const MuHash3072& mul) noexcept
{
m_numerator.Multiply(mul.m_numerator);
m_denominator.Multiply(mul.m_denominator);
return *this;
}

MuHash3072& MuHash3072::operator/=(const MuHash3072& div) noexcept
{
m_numerator.Multiply(div.m_denominator);
m_denominator.Multiply(div.m_numerator);
return *this;
}

MuHash3072& MuHash3072::Insert(Span<const unsigned char> in) noexcept {
m_numerator.Multiply(ToNum3072(in));
return *this;
}

MuHash3072& MuHash3072::Remove(Span<const unsigned char> in) noexcept {
m_numerator.Divide(ToNum3072(in));
return *this;
}
68 changes: 68 additions & 0 deletions src/crypto/muhash.h
Original file line number Diff line number Diff line change
@@ -59,4 +59,72 @@ class Num3072
}
};

/** A class representing MuHash sets
*
* MuHash is a hashing algorithm that supports adding set elements in any
* order but also deleting in any order. As a result, it can maintain a
* running sum for a set of data as a whole, and add/remove when data
* is added to or removed from it. A downside of MuHash is that computing
* an inverse is relatively expensive. This is solved by representing
* the running value as a fraction, and multiplying added elements into
* the numerator and removed elements into the denominator. Only when the
* final hash is desired, a single modular inverse and multiplication is
* needed to combine the two. The combination is also run on serialization
* to allow for space-efficient storage on disk.
*
* As the update operations are also associative, H(a)+H(b)+H(c)+H(d) can
* in fact be computed as (H(a)+H(b)) + (H(c)+H(d)). This implies that
* all of this is perfectly parallellizable: each thread can process an
* arbitrary subset of the update operations, allowing them to be
* efficiently combined later.
*
* Muhash does not support checking if an element is already part of the
* set. That is why this class does not enforce the use of a set as the
* data it represents because there is no efficient way to do so.
* It is possible to add elements more than once and also to remove
* elements that have not been added before. However, this implementation
* is intended to represent a set of elements.
*
* See also https://cseweb.ucsd.edu/~mihir/papers/inchash.pdf and
* https://lists.linuxfoundation.org/pipermail/bitcoin-dev/2017-May/014337.html.
*/
class MuHash3072
{
private:
static constexpr size_t BYTE_SIZE = 384;

Num3072 m_numerator;
Num3072 m_denominator;

Num3072 ToNum3072(Span<const unsigned char> in);

public:
/* The empty set. */
MuHash3072() noexcept {};

/* A singleton with variable sized data in it. */
explicit MuHash3072(Span<const unsigned char> in) noexcept;

/* Insert a single piece of data into the set. */
MuHash3072& Insert(Span<const unsigned char> in) noexcept;

/* Remove a single piece of data from the set. */
MuHash3072& Remove(Span<const unsigned char> in) noexcept;

/* Multiply (resulting in a hash for the union of the sets) */
MuHash3072& operator*=(const MuHash3072& mul) noexcept;

/* Divide (resulting in a hash for the difference of the sets) */
MuHash3072& operator/=(const MuHash3072& div) noexcept;

/* Finalize into a 32-byte hash. Does not change this object's value. */
void Finalize(uint256& out) noexcept;

SERIALIZE_METHODS(MuHash3072, obj)
{
READWRITE(obj.m_numerator);
READWRITE(obj.m_denominator);
}
};

#endif // BITCOIN_CRYPTO_MUHASH_H

0 comments on commit adc708c

Please sign in to comment.