Skip to content

Commit

Permalink
Adds elias-fano encoder, see #13
Browse files Browse the repository at this point in the history
  • Loading branch information
daniel-j-h committed Jul 31, 2021
1 parent b213c96 commit 84c6479
Show file tree
Hide file tree
Showing 11 changed files with 198 additions and 24 deletions.
1 change: 1 addition & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
*.o
*.d
*.so
*.png
26 changes: 23 additions & 3 deletions tinygraph-bits.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ uint32_t tinygraph_bits_select(uint64_t v, uint32_t n) {
TINYGRAPH_STATIC_ASSERT(sizeof(uint64_t) == sizeof(unsigned long long));
TINYGRAPH_ASSERT(n < tinygraph_bits_count(v));

return tinygraph_bits_trailing0(_pdep_u64(UINT64_C(1) << n, v));
return tinygraph_bits_trailing0_u64(_pdep_u64(UINT64_C(1) << n, v));
}

#else // TINYGRAPH_HAS_BMI2
Expand All @@ -55,7 +55,17 @@ uint32_t tinygraph_bits_select(uint64_t v, uint32_t n) {
#endif // TINYGRAPH_HAS_BMI2


uint32_t tinygraph_bits_leading0(uint64_t v) {
uint32_t tinygraph_bits_leading0_u32(uint32_t v) {
TINYGRAPH_STATIC_ASSERT(sizeof(uint32_t) == sizeof(unsigned int));

if (v == 0) {
return 32;
}

return __builtin_clz(v);
}

uint32_t tinygraph_bits_leading0_u64(uint64_t v) {
TINYGRAPH_STATIC_ASSERT(sizeof(uint64_t) == sizeof(unsigned long long));

if (v == 0) {
Expand All @@ -65,7 +75,17 @@ uint32_t tinygraph_bits_leading0(uint64_t v) {
return __builtin_clzll(v);
}

uint32_t tinygraph_bits_trailing0(uint64_t v) {
uint32_t tinygraph_bits_trailing0_u32(uint32_t v) {
TINYGRAPH_STATIC_ASSERT(sizeof(uint32_t) == sizeof(unsigned int));

if (v == 0) {
return 32;
}

return __builtin_ctz(v);
}

uint32_t tinygraph_bits_trailing0_u64(uint64_t v) {
TINYGRAPH_STATIC_ASSERT(sizeof(uint64_t) == sizeof(unsigned long long));

if (v == 0) {
Expand Down
10 changes: 8 additions & 2 deletions tinygraph-bits.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,16 @@ TINYGRAPH_WARN_UNUSED
uint32_t tinygraph_bits_select(uint64_t v, uint32_t n);

TINYGRAPH_WARN_UNUSED
uint32_t tinygraph_bits_leading0(uint64_t v);
uint32_t tinygraph_bits_leading0_u32(uint32_t v);

TINYGRAPH_WARN_UNUSED
uint32_t tinygraph_bits_trailing0(uint64_t v);
uint32_t tinygraph_bits_leading0_u64(uint64_t v);

TINYGRAPH_WARN_UNUSED
uint32_t tinygraph_bits_trailing0_u32(uint32_t v);

TINYGRAPH_WARN_UNUSED
uint32_t tinygraph_bits_trailing0_u64(uint64_t v);


#endif
35 changes: 21 additions & 14 deletions tinygraph-bitset.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@

typedef struct tinygraph_bitset {
uint64_t *blocks;
uint32_t blocks_len;
uint64_t blocks_len;
uint64_t size;
} tinygraph_bitset;


tinygraph_bitset* tinygraph_bitset_construct(uint32_t size) {
tinygraph_bitset* tinygraph_bitset_construct(uint64_t size) {
tinygraph_bitset *out = malloc(sizeof(tinygraph_bitset));

if (!out) {
Expand All @@ -23,13 +24,14 @@ tinygraph_bitset* tinygraph_bitset_construct(uint32_t size) {
*out = (tinygraph_bitset){
.blocks = NULL,
.blocks_len = 0,
.size = size,
};

if (size == 0) {
return out;
}

uint32_t num_blocks = ceil(size / 64.f);
uint64_t num_blocks = ceil(size / 64.f);

uint64_t *blocks = calloc(num_blocks, sizeof(uint64_t));

Expand Down Expand Up @@ -63,7 +65,7 @@ tinygraph_bitset* tinygraph_bitset_copy(tinygraph_bitset_s bitset) {
TINYGRAPH_ASSERT(copy->blocks);
TINYGRAPH_ASSERT(bitset->blocks);

memcpy(copy->blocks, bitset->blocks, bitset->blocks_len * sizeof(uint32_t));
memcpy(copy->blocks, bitset->blocks, bitset->blocks_len * sizeof(uint64_t));
}

return copy;
Expand All @@ -86,21 +88,26 @@ void tinygraph_bitset_destruct(tinygraph_bitset *bitset) {
}


void tinygraph_bitset_set_at(tinygraph_bitset *bitset, uint32_t i) {
void tinygraph_bitset_set_at(tinygraph_bitset *bitset, uint64_t i) {
TINYGRAPH_ASSERT(bitset);
TINYGRAPH_ASSERT(bitset->blocks_len > 0);
TINYGRAPH_ASSERT((i >> 6) < bitset->blocks_len);

bitset->blocks[i >> 6] |= (UINT64_C(1) << (i & UINT32_C(63)));
bitset->blocks[i >> 6] |= (UINT64_C(1) << (i & UINT64_C(63)));
}


bool tinygraph_bitset_get_at(tinygraph_bitset *bitset, uint32_t i) {
bool tinygraph_bitset_get_at(tinygraph_bitset *bitset, uint64_t i) {
TINYGRAPH_ASSERT(bitset);
TINYGRAPH_ASSERT(bitset->blocks_len > 0);
TINYGRAPH_ASSERT((i >> 6) < bitset->blocks_len);

return (bitset->blocks[i >> 6] & (UINT64_C(1) << (i & UINT32_C(63)))) != 0;
return (bitset->blocks[i >> 6] & (UINT64_C(1) << (i & UINT64_C(63)))) != 0;
}


uint64_t tinygraph_bitset_get_size(tinygraph_bitset_s bitset) {
return bitset->size;
}


Expand All @@ -111,14 +118,14 @@ void tinygraph_bitset_clear(tinygraph_bitset_s bitset) {
return;
}

memset(bitset->blocks, 0, bitset->blocks_len * sizeof(uint32_t));
memset(bitset->blocks, 0, bitset->blocks_len * sizeof(uint64_t));
}


void tinygraph_bitset_not(tinygraph_bitset_s bitset) {
TINYGRAPH_ASSERT(bitset);

for (uint32_t i = 0; i < bitset->blocks_len; ++i) {
for (uint64_t i = 0; i < bitset->blocks_len; ++i) {
bitset->blocks[i] = ~bitset->blocks[i];
}
}
Expand All @@ -128,7 +135,7 @@ void tinygraph_bitset_and(tinygraph_bitset_s bitset, const tinygraph_bitset_s ot
TINYGRAPH_ASSERT(bitset);
TINYGRAPH_ASSERT(bitset->blocks_len == other->blocks_len);

for (uint32_t i = 0; i < bitset->blocks_len; ++i) {
for (uint64_t i = 0; i < bitset->blocks_len; ++i) {
bitset->blocks[i] = bitset->blocks[i] & other->blocks[i];
}
}
Expand All @@ -138,7 +145,7 @@ void tinygraph_bitset_or(tinygraph_bitset_s bitset, const tinygraph_bitset_s oth
TINYGRAPH_ASSERT(bitset);
TINYGRAPH_ASSERT(bitset->blocks_len == other->blocks_len);

for (uint32_t i = 0; i < bitset->blocks_len; ++i) {
for (uint64_t i = 0; i < bitset->blocks_len; ++i) {
bitset->blocks[i] = bitset->blocks[i] | other->blocks[i];
}
}
Expand All @@ -148,7 +155,7 @@ void tinygraph_bitset_xor(tinygraph_bitset_s bitset, const tinygraph_bitset_s ot
TINYGRAPH_ASSERT(bitset);
TINYGRAPH_ASSERT(bitset->blocks_len == other->blocks_len);

for (uint32_t i = 0; i < bitset->blocks_len; ++i) {
for (uint64_t i = 0; i < bitset->blocks_len; ++i) {
bitset->blocks[i] = bitset->blocks[i] ^ other->blocks[i];
}
}
Expand All @@ -159,7 +166,7 @@ void tinygraph_bitset_print_internal(tinygraph_bitset *bitset) {

fprintf(stderr, "bitset internals\n");

for (uint32_t i = 0; i < bitset->blocks_len * 64; ++i) {
for (uint64_t i = 0; i < tinygraph_bitset_get_size(bitset); ++i) {
fprintf(stderr, "%ju ", (uintmax_t)tinygraph_bitset_get_at(bitset, i));
}

Expand Down
9 changes: 6 additions & 3 deletions tinygraph-bitset.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,20 @@ typedef struct tinygraph_bitset* tinygraph_bitset_s;


TINYGRAPH_WARN_UNUSED
tinygraph_bitset_s tinygraph_bitset_construct(uint32_t size);
tinygraph_bitset_s tinygraph_bitset_construct(uint64_t size);

TINYGRAPH_WARN_UNUSED
tinygraph_bitset_s tinygraph_bitset_copy(tinygraph_bitset_s bitset);

void tinygraph_bitset_destruct(tinygraph_bitset_s bitset);

void tinygraph_bitset_set_at(tinygraph_bitset_s bitset, uint32_t i);
void tinygraph_bitset_set_at(tinygraph_bitset_s bitset, uint64_t i);

TINYGRAPH_WARN_UNUSED
bool tinygraph_bitset_get_at(tinygraph_bitset_s bitset, uint32_t i);
bool tinygraph_bitset_get_at(tinygraph_bitset_s bitset, uint64_t i);

TINYGRAPH_WARN_UNUSED
uint64_t tinygraph_bitset_get_size(tinygraph_bitset_s bitset);

void tinygraph_bitset_clear(tinygraph_bitset_s bitset);

Expand Down
95 changes: 95 additions & 0 deletions tinygraph-eliasfano.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#include <math.h>
#include <stdio.h>

#include "tinygraph-bits.h"
#include "tinygraph-eliasfano.h"


/*
* Gamma coding
*
* Write floor(log2(x)) zeros, followed by bin(x), e.g.
* gamma(13) = 000 1101. Decoding works by counting n
* leading zeros, if n==0 return 1. Otherwise we read
* n + 1 bits which is the decoded binary value.
*
* Delta coding
*
* Write floor(log2(x)) + 1 as gamma code, followed by
* by bin(x), but skip the first 1 in bin(x) because it
* is always 1 (by design), e.g. delta(13) = 00100 101.
* Decoding works by decoding the gamma coded length,
* then reading length bits, pre-pending a one.
*
* See
* - https://www.lx.it.pt/~mtf/Elias.pdf
*/


TINYGRAPH_WARN_UNUSED
static uint32_t tinygraph_eliasfano_floorlog2_u32(uint32_t value) {
return 31 - tinygraph_bits_leading0_u32(value);
}

TINYGRAPH_WARN_UNUSED
static uint32_t tinygraph_eliasfano_gamma_num_bits(uint32_t value) {
return 2 * tinygraph_eliasfano_floorlog2_u32(value) + 1;
}

TINYGRAPH_WARN_UNUSED
static uint32_t tinygraph_eliasfano_delta_num_bits(uint32_t value) {
const uint32_t n = tinygraph_eliasfano_floorlog2_u32(value);
return tinygraph_eliasfano_gamma_num_bits(n + 1) + (n + 1) - 1;
}


tinygraph_bitset_s tinygraph_eliasfano_encode(const uint32_t *data, uint32_t n) {
uint64_t nbits = 0;

for (uint32_t i = 0; i < n; ++i) {
nbits += tinygraph_eliasfano_delta_num_bits(data[i]);
}

tinygraph_bitset_s out = tinygraph_bitset_construct(nbits);

uint64_t it = 0;

for (uint32_t i = 0; i < n; ++i) {
const uint32_t value = data[i];

TINYGRAPH_ASSERT(value > 0);

const uint32_t len = tinygraph_eliasfano_floorlog2_u32(value) + 1;

// Write number of digits in the binary value (len) as gamma code

const uint32_t lenlen = tinygraph_eliasfano_floorlog2_u32(len);

fprintf(stderr, "EF len=%ju\n", (uintmax_t)len);
fprintf(stderr, "EF lenlen=%ju\n", (uintmax_t)lenlen);

it += lenlen;

for (uint32_t j = 0; j < (lenlen + 1); ++j) {
if ((len & (UINT32_C(1) << (lenlen + 1 - j - 1))) != 0) {
tinygraph_bitset_set_at(out, it);
}

it += 1;
}

// Write binary value itself, but discard first bit (always 1)

for (uint32_t k = 0; k < (len - 1); ++k) {
if ((value & (UINT32_C(1) << (len - 1 - k - 1))) != 0) {
tinygraph_bitset_set_at(out, it);
}

it += 1;
}
}

TINYGRAPH_ASSERT(it == nbits);

return out;
}
18 changes: 18 additions & 0 deletions tinygraph-eliasfano.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#ifndef TINYGRAPH_ELIASFANO_H
#define TINYGRAPH_ELIASFANO_H

#include <stdint.h>

#include "tinygraph-bitset.h"
#include "tinygraph-utils.h"

/*
* Quasi succinct Elias-Fano coding.
*/


TINYGRAPH_WARN_UNUSED
tinygraph_bitset_s tinygraph_eliasfano_encode(const uint32_t *data, uint32_t n);


#endif
1 change: 1 addition & 0 deletions tinygraph-queue.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ typedef struct tinygraph_queue {
} tinygraph_queue;


TINYGRAPH_WARN_UNUSED
static bool tinygraph_queue_refill(tinygraph_queue *queue) {
TINYGRAPH_ASSERT(queue);
TINYGRAPH_ASSERT(queue->lhs);
Expand Down
17 changes: 15 additions & 2 deletions tinygraph-tests.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "tinygraph-zigzag.h"
#include "tinygraph-zorder.h"
#include "tinygraph-bits.h"
#include "tinygraph-eliasfano.h"


void test1() {
Expand Down Expand Up @@ -552,8 +553,8 @@ void test17() {
assert(tinygraph_bits_select(UINT64_C(0xd84c8a0), 3) == 14);
assert(tinygraph_bits_select(UINT64_C(0xd84c8a0), 9) == 27);

assert(tinygraph_bits_leading0(UINT64_C(0xd84c8a0)) == 36);
assert(tinygraph_bits_trailing0(UINT64_C(0xd84c8a0)) == 5);
assert(tinygraph_bits_leading0_u64(UINT64_C(0xd84c8a0)) == 36);
assert(tinygraph_bits_trailing0_u64(UINT64_C(0xd84c8a0)) == 5);

assert(tinygraph_bits_rank(UINT64_C(0xd84c8a0), 0) == 0);
assert(tinygraph_bits_rank(UINT64_C(0xd84c8a0), 64)
Expand All @@ -563,6 +564,17 @@ void test17() {
}


void test18() {
uint32_t data[] = {1, 2, 10, 19, 147};

tinygraph_bitset_s bits = tinygraph_eliasfano_encode(data, 5);

tinygraph_bitset_print_internal(bits);

tinygraph_bitset_destruct(bits);
}


int main() {
test1();
test2();
Expand All @@ -581,4 +593,5 @@ int main() {
test15();
test16();
test17();
test18();
}
Loading

0 comments on commit 84c6479

Please sign in to comment.