Skip to content

Commit

Permalink
Faster implementation using a lot of inlining
Browse files Browse the repository at this point in the history
  • Loading branch information
alainesp committed May 16, 2022
1 parent 3e136f3 commit 1b8f55e
Show file tree
Hide file tree
Showing 4 changed files with 292 additions and 102 deletions.
11 changes: 9 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,22 @@ elseif(CMAKE_SIZEOF_VOID_P EQUAL 4)# 32 bits
message(WARNING "32 bits detected -> Non-optimal performance")
endif()

option(BUILD_EXAMPLE "Build the example" OFF)
option(WY_BUILD_EXAMPLE "Build the example" OFF)
option(WY_BUILD_PERFORMANCE "Build the performance testing" OFF)

###############################################################################################################
# Library configuration
###############################################################################################################
add_library(wy STATIC "wy.cpp")
target_include_directories(wy PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})

if(BUILD_EXAMPLE)
if(WY_BUILD_EXAMPLE)
add_executable(wyexamples "example.cpp")
target_link_libraries(wyexamples PRIVATE wy)
endif()

if(WY_BUILD_PERFORMANCE)
add_executable(wyperformance "performance.cpp")
set_property(TARGET wyperformance PROPERTY CXX_STANDARD 20) # C++ language to use
target_link_libraries(wyperformance PRIVATE wy)
endif()
182 changes: 182 additions & 0 deletions performance.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
/////////////////////////////////////////////////////////////////////////////////
// This file is a C++ wrapper around wyhash:
// https://github.com/wangyi-fudan/wyhash
//
// Copyright (c) 2022 by Alain Espinosa.
/////////////////////////////////////////////////////////////////////////////////

// include the required header
#include <wy.hpp>
#include <chrono>
#include <iostream>
#include <format>

void main_rand()
{
constexpr size_t numIter = 1'000'000'000;
constexpr size_t numIterStream = 10'000'000;
// Create a pseudo-random generator
wy::rand r;
uint64_t no_op = 0;// variable to restrict compiler optimizations
double no_opd = 0;

std::cout << "--------------------------------------------------" << std::endl;
std::cout << "Random Performance" << std::endl;
std::cout << "--------------------------------------------------" << std::endl;

//////////////////////////////////////////////////////////////////////////////////////
// Common operation
//////////////////////////////////////////////////////////////////////////////////////
auto start = std::chrono::high_resolution_clock::now();
for (size_t i = 0; i < numIter; i++) no_op ^= r();// Generate a random number
auto stop = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
std::cout << std::format("Random : {}M op/sec\n", numIter / duration.count());// Show performance on millions operations per second

std::cout << "--------------------------------------------------" << std::endl;
//////////////////////////////////////////////////////////////////////////////////////
// Uniform operation
//////////////////////////////////////////////////////////////////////////////////////
start = std::chrono::high_resolution_clock::now();
for (size_t i = 0; i < numIter; i++) no_opd += r.uniform_dist();// Generate a random number
stop = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
std::cout << std::format("Uniform [0, 1) : {}M op/sec\n", numIter / duration.count());// Show performance on millions operations per second

//////////////////////////////////////////////////////////////////////////////////////
// Uniform operation
//////////////////////////////////////////////////////////////////////////////////////
start = std::chrono::high_resolution_clock::now();
for (size_t i = 0; i < numIter; i++) no_opd += r.uniform_dist(5.6, 11.7);// Generate a random number
stop = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
std::cout << std::format("Uniform [min, max) : {}M op/sec\n", numIter / duration.count());// Show performance on millions operations per second

//////////////////////////////////////////////////////////////////////////////////////
// Uniform operation
//////////////////////////////////////////////////////////////////////////////////////
start = std::chrono::high_resolution_clock::now();
for (size_t i = 0; i < numIter; i++) no_op ^= r.uniform_dist(5000);// Generate a random number
stop = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
std::cout << std::format("Uniform [0, k) : {}M op/sec\n", numIter / duration.count());// Show performance on millions operations per second

std::cout << "--------------------------------------------------" << std::endl;

//////////////////////////////////////////////////////////////////////////////////////
// Gaussian operation
//////////////////////////////////////////////////////////////////////////////////////
start = std::chrono::high_resolution_clock::now();
for (size_t i = 0; i < numIter; i++) no_opd += r.gaussian_dist();// Generate a random number
stop = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
std::cout << std::format("Gaussian [0, 1] : {}M op/sec\n", numIter / duration.count());// Show performance on millions operations per second

//////////////////////////////////////////////////////////////////////////////////////
// Gaussian operation
//////////////////////////////////////////////////////////////////////////////////////
start = std::chrono::high_resolution_clock::now();
for (size_t i = 0; i < numIter; i++) no_opd += r.gaussian_dist(1.2, 2.5);// Generate a random number
stop = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
std::cout << std::format("Gaussian [mean, std]: {}M op/sec\n", numIter / duration.count());// Show performance on millions operations per second

std::cout << "--------------------------------------------------" << std::endl;

//////////////////////////////////////////////////////////////////////////////////////
// Stream operation
//////////////////////////////////////////////////////////////////////////////////////
std::vector<uint8_t> vec(4096, 0);
start = std::chrono::high_resolution_clock::now();
for (size_t i = 0; i < numIterStream; i++)
{
r.generate_stream(vec, 1024);// Generate a stream of random numbers
no_op ^= vec[0];
}
stop = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
std::cout << std::format("Stream [1024] : {:.1f} GB/sec\n", (double)numIterStream / duration.count());// Show performance on millions operations per second

//////////////////////////////////////////////////////////////////////////////////////
// Stream operation
//////////////////////////////////////////////////////////////////////////////////////
start = std::chrono::high_resolution_clock::now();
for (size_t i = 0; i < numIterStream; i++)
{
r.generate_stream(vec, 4096);// Generate a stream of random numbers
no_op ^= vec[0];
}
stop = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
std::cout << std::format("Stream [4096] : {:.1f} GB/sec\n", (double)numIterStream * 4 / duration.count());// Show performance on millions operations per second

std::cout << "--------------------------------------------------" << std::endl;

// Ensures no optimization
std::cout << std::format("{}", (no_op + no_opd) ? "" : "Bad luck!");
}

void main_hash()
{
constexpr size_t numIter = 100'000'000;
uint64_t no_op = 0;// variable to restrict compiler optimizations

std::cout << "--------------------------------------------------" << std::endl;
std::cout << "Hashing Performance" << std::endl;
std::cout << "--------------------------------------------------" << std::endl;

//////////////////////////////////////////////////////////////////////////////////////
// uint64_t operation
//////////////////////////////////////////////////////////////////////////////////////
wy::hash<uint64_t> hasherUint64;
auto start = std::chrono::high_resolution_clock::now();
for (size_t i = 0; i < numIter; i++) no_op ^= hasherUint64(i);// Hash
auto stop = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
std::cout << std::format("uint64_t : {:4}M op/sec\n", numIter / duration.count());// Show performance on millions operations per second

//////////////////////////////////////////////////////////////////////////////////////
// String operation
//////////////////////////////////////////////////////////////////////////////////////
wy::hash<std::string> hasherStr;
std::string small("01234567890123");
start = std::chrono::high_resolution_clock::now();
for (size_t i = 0; i < numIter; i++) no_op ^= hasherStr(small);// Hash
stop = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
std::cout << std::format("std::string(14) : {:4}M op/sec\n", numIter / duration.count());// Show performance on millions operations per second

small += "01234567890123";
start = std::chrono::high_resolution_clock::now();
for (size_t i = 0; i < numIter; i++) no_op ^= hasherStr(small);// Hash
stop = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
std::cout << std::format("std::string(28) : {:4}M op/sec\n", numIter / duration.count());// Show performance on millions operations per second

small += small; small += small;
start = std::chrono::high_resolution_clock::now();
for (size_t i = 0; i < numIter; i++) no_op ^= hasherStr(small);// Hash
stop = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
std::cout << std::format("std::string(112) : {:4}M op/sec\n", numIter / duration.count());// Show performance on millions operations per second

small += small; small += small;
start = std::chrono::high_resolution_clock::now();
for (size_t i = 0; i < numIter; i++) no_op ^= hasherStr(small);// Hash
stop = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
std::cout << std::format("std::string(448) : {:4}M op/sec\n", numIter / duration.count());// Show performance on millions operations per second

std::cout << "--------------------------------------------------" << std::endl;

// Ensures no optimization
std::cout << std::format("{}", no_op ? "" : "Bad luck!");
}

int main()
{
main_rand();
main_hash();

return 0;
}
81 changes: 13 additions & 68 deletions wy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,6 @@
// Copyright (c) 2022 by Alain Espinosa.
/////////////////////////////////////////////////////////////////////////////////

#include <cassert>
#include <random>
#include <memory>
#include "wyhash.h"
#include "wy.hpp"

// Define 'byteswap64(uint64_t)' needed for 'rand::generate_stream(size_t)'
Expand All @@ -34,77 +30,26 @@

namespace wy {

rand::rand(uint64_t seed) noexcept : state(seed)
{}
rand::rand() noexcept
{
// Seed with a real random value, if available
std::random_device rd;
state = static_cast<uint64_t>(rd()) | static_cast<uint64_t>(rd()) << 32;
}
uint64_t rand::operator()() noexcept
{
return wyrand(&state);
}
double rand::uniform_dist() noexcept
{
return wy2u01(operator()());
}
double rand::uniform_dist(double min_value, double max_value) noexcept
{
assert(max_value > min_value);

return uniform_dist() * (max_value - min_value) + min_value;
}
uint64_t rand::uniform_dist(uint64_t max_value) noexcept
{
return wy2u0k(operator()(), max_value);
}
double rand::gaussian_dist() noexcept
{
return wy2gau(operator()());
}
double rand::gaussian_dist(double mean, double std) noexcept
{
assert(std > 0);

return gaussian_dist() * std + mean;
}
std::vector<uint8_t> rand::generate_stream(size_t size) noexcept
void rand::generate_stream(std::vector<uint8_t>& vec, size_t size) noexcept
{
size_t sizeOf64 = (size + sizeof(uint64_t) - 1) / sizeof(uint64_t); // The number of 64-bits numbers to generate
std::unique_ptr<uint64_t[]> buffer = std::make_unique<uint64_t[]>(sizeOf64);

// Create the memory on the vector
vec.resize(sizeOf64 * sizeof(uint64_t), 0);
uint8_t* dataPtr = vec.data();

// Generate random values
for (size_t i = 0; i < sizeOf64; i++)
{
#if WYHASH_LITTLE_ENDIAN
buffer[i] = operator()();
uint64_t val = operator()();
#else
buffer[i] = byteswap64(operator()());
uint64_t val = byteswap64(operator()());
#endif
// Convert to bytes
return std::vector<uint8_t>(reinterpret_cast<uint8_t*>(buffer.get()), reinterpret_cast<uint8_t*>(buffer.get()) + size);
}

namespace internal {
// Hash
hash_imp::hash_imp() noexcept : secret{ 0xa0761d6478bd642full, 0xe7037ed1a0b428dbull, 0x8ebc6af09c88c6e3ull, 0x589965cc75374cc3ull }
{}
hash_imp::hash_imp(uint64_t seed) noexcept
{
make_secret(seed, secret);
}
hash_imp::hash_imp(const uint64_t psecret[4]) noexcept
{
memcpy(secret, psecret, sizeof(secret));
}
uint64_t hash_imp::wyhash(const uint8_t* data, size_t len) const noexcept
{
return ::wyhash(data, len, 0, secret);
memcpy(dataPtr + i * sizeof(uint64_t), &val, sizeof(uint64_t));
}
uint64_t hash_imp::wyhash(uint64_t data) const noexcept
{
return ::wyhash64(data, secret[0]);
}
};

// Final size
vec.resize(size);
}
};
Loading

0 comments on commit 1b8f55e

Please sign in to comment.