Skip to content

Commit

Permalink
Adds rank/select poppy-based implementation, closes #14
Browse files Browse the repository at this point in the history
  • Loading branch information
daniel-j-h committed Aug 28, 2022
1 parent c5604bd commit 48e4e96
Show file tree
Hide file tree
Showing 7 changed files with 246 additions and 2 deletions.
18 changes: 18 additions & 0 deletions tinygraph/tinygraph-align.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#define _POSIX_C_SOURCE 200809L
#include <stdlib.h>


void* tinygraph_align_malloc(size_t alignment, size_t size) {
void *p;

if (posix_memalign(&p, alignment, size) != 0) {
return NULL;
}

return p;
}


void tinygraph_align_free(void *p) {
free(p);
}
17 changes: 17 additions & 0 deletions tinygraph/tinygraph-align.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#ifndef TINYGRAPH_ALIGN_H
#define TINYGRAPH_ALIGN_H

#include <stdint.h>

#include "tinygraph-utils.h"


#define TINYGRAPH_ALIGN(x) __attribute__((aligned(x)))

TINYGRAPH_WARN_UNUSED
void* tinygraph_align_malloc(size_t alignment, size_t size);

void tinygraph_align_free(void *p);


#endif
9 changes: 9 additions & 0 deletions tinygraph/tinygraph-bitset.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,19 @@ bool tinygraph_bitset_get_at(tinygraph_bitset *bitset, uint64_t i) {


uint64_t tinygraph_bitset_get_size(tinygraph_bitset_s bitset) {
TINYGRAPH_ASSERT(bitset);

return bitset->size;
}


uint64_t* tinygraph_bitset_get_data(tinygraph_bitset_s bitset) {
TINYGRAPH_ASSERT(bitset);

return bitset->blocks;
}


void tinygraph_bitset_clear(tinygraph_bitset_s bitset) {
TINYGRAPH_ASSERT(bitset);

Expand Down
3 changes: 3 additions & 0 deletions tinygraph/tinygraph-bitset.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ bool tinygraph_bitset_get_at(tinygraph_bitset_s bitset, uint64_t i);
TINYGRAPH_WARN_UNUSED
uint64_t tinygraph_bitset_get_size(tinygraph_bitset_s bitset);

TINYGRAPH_WARN_UNUSED
uint64_t* tinygraph_bitset_get_data(tinygraph_bitset_s bitset);

void tinygraph_bitset_clear(tinygraph_bitset_s bitset);

void tinygraph_bitset_not(tinygraph_bitset_s bitset);
Expand Down
138 changes: 138 additions & 0 deletions tinygraph/tinygraph-poppy.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
#include <math.h>
#include <string.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>

#include "tinygraph-bits.h"
#include "tinygraph-align.h"
#include "tinygraph-poppy.h"


typedef struct tinygraph_poppy_rank {
uint64_t *level0;
uint64_t level0_len;

tinygraph_bitset_s bitset_view;
} tinygraph_poppy_rank;


tinygraph_poppy_rank* tinygraph_poppy_rank_construct(tinygraph_bitset_s bitset) {
TINYGRAPH_ASSERT(bitset);

tinygraph_poppy_rank *out = malloc(sizeof(tinygraph_poppy_rank));

if (!out) {
return NULL;
}

// It's important that we align the index structures at
// the cache line boundary, which is 64 bytes nowadays
const uint32_t cacheline = 64;

const uint64_t *bits = tinygraph_bitset_get_data(bitset);
const uint64_t nbits = tinygraph_bitset_get_size(bitset);

// 512 bits basic blocks for now
const uint64_t level0_len = nbits / 512;

if (level0_len == 0) {
*out = (tinygraph_poppy_rank){
.level0 = NULL,
.level0_len = 0,
.bitset_view = bitset,
};

return out;
}

// TODO: implement optimizations to get 12.5% down to 3.2%
// At the moment we only implement Fig. 3 from the paper

uint64_t *level0 = tinygraph_align_malloc(cacheline, level0_len);

if (!level0) {
free(out);

return NULL;
}

memset(level0, 0, level0_len);

uint64_t csum = 0;

for (uint64_t i = 0; i < level0_len; ++i) {
csum += tinygraph_bits_count(bits[i * 8 + 0]);
csum += tinygraph_bits_count(bits[i * 8 + 1]);
csum += tinygraph_bits_count(bits[i * 8 + 2]);
csum += tinygraph_bits_count(bits[i * 8 + 3]);
csum += tinygraph_bits_count(bits[i * 8 + 4]);
csum += tinygraph_bits_count(bits[i * 8 + 5]);
csum += tinygraph_bits_count(bits[i * 8 + 6]);
csum += tinygraph_bits_count(bits[i * 8 + 7]);

level0[i] = csum;
}

*out = (tinygraph_poppy_rank){
.level0 = level0,
.level0_len = level0_len,
.bitset_view = bitset,
};

return out;
}


void tinygraph_poppy_rank_destruct(tinygraph_poppy_rank *rank) {
if (!rank) {
return;
}

TINYGRAPH_ASSERT(rank->level0);

tinygraph_align_free(rank->level0);

rank->level0 = NULL;

free(rank);
}


uint64_t tinygraph_poppy_rank_get(tinygraph_poppy_rank_s rank, uint64_t n) {
TINYGRAPH_ASSERT(rank);
TINYGRAPH_ASSERT(n < tinygraph_bitset_get_size(rank->bitset_view));

const uint64_t block = n / 512;

TINYGRAPH_ASSERT(block <= rank->level0_len);

// pre-computed cumulative sums from index
const uint64_t leftof = block > 0 ? rank->level0[block - 1] : 0;

// bits within last block we need to count
const uint64_t *bits = tinygraph_bitset_get_data(rank->bitset_view);

uint64_t count = 0;

for (uint64_t i = block * 8; i < block * 8 + ((n % 512) / 64); ++i) {
count += tinygraph_bits_count(bits[i]);
}

count += tinygraph_bits_rank(bits[block * 8 + ((n % 512) / 64)], (n % 512) % 64);

return leftof + count;
}


void tinygraph_poppy_rank_print_internal(tinygraph_poppy_rank_s rank) {
TINYGRAPH_ASSERT(rank);

fprintf(stderr, "poppy rank internals\n");

for (uint64_t i = 0; i < rank->level0_len; ++i) {
fprintf(stderr, " %ju", (uintmax_t)rank->level0[i]);
}

fprintf(stderr, "\n");
}
35 changes: 35 additions & 0 deletions tinygraph/tinygraph-poppy.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#ifndef TINYGRAPH_POPPY_H
#define TINYGRAPH_POPPY_H

#include "tinygraph-bitset.h"
#include "tinygraph-utils.h"

/*
* The Poppy Rank/Select data structures from the paper
*
* "Space-Efficient, High-Performance Rank & Select
* Structures on Uncompressed Bit Sequences"
*
* with space-overhead 3.2% (rank) and 0.39% (select)
*
* See
* - https://www.cs.cmu.edu/~dga/papers/zhou-sea2013.pdf
*/


typedef struct tinygraph_poppy_rank* tinygraph_poppy_rank_s;


TINYGRAPH_WARN_UNUSED
tinygraph_poppy_rank_s tinygraph_poppy_rank_construct(tinygraph_bitset_s bitset);

void tinygraph_poppy_rank_destruct(tinygraph_poppy_rank_s rank);

TINYGRAPH_WARN_UNUSED
uint64_t tinygraph_poppy_rank_get(tinygraph_poppy_rank_s rank, uint64_t n);

void tinygraph_poppy_rank_print_internal(tinygraph_poppy_rank_s rank);



#endif
28 changes: 26 additions & 2 deletions tinygraph/tinygraph-tests.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "tinygraph-zigzag.h"
#include "tinygraph-zorder.h"
#include "tinygraph-bits.h"
#include "tinygraph-poppy.h"
#include "tinygraph-eliasfano.h"


Expand Down Expand Up @@ -569,8 +570,6 @@ void test18() {

tinygraph_bitset_s bits = tinygraph_eliasfano_encode(data, 5);

tinygraph_bitset_print_internal(bits);

tinygraph_bitset_destruct(bits);
}

Expand Down Expand Up @@ -647,6 +646,30 @@ void test21() {
}


void test22() {
tinygraph_bitset_s bits = tinygraph_bitset_construct(1098);

tinygraph_bitset_set_at(bits, 0);
tinygraph_bitset_set_at(bits, 512);
tinygraph_bitset_set_at(bits, 1097);

tinygraph_bitset_print_internal(bits);

tinygraph_poppy_rank_s rank = tinygraph_poppy_rank_construct(bits);

tinygraph_poppy_rank_print_internal(rank);

fprintf(stderr, "%ju\n", tinygraph_poppy_rank_get(rank, 0));
fprintf(stderr, "%ju\n", tinygraph_poppy_rank_get(rank, 1));
fprintf(stderr, "%ju\n", tinygraph_poppy_rank_get(rank, 511));
fprintf(stderr, "%ju\n", tinygraph_poppy_rank_get(rank, 512));
fprintf(stderr, "%ju\n", tinygraph_poppy_rank_get(rank, 513));
fprintf(stderr, "%ju\n", tinygraph_poppy_rank_get(rank, 1095));
fprintf(stderr, "%ju\n", tinygraph_poppy_rank_get(rank, 1096));
fprintf(stderr, "%ju\n", tinygraph_poppy_rank_get(rank, 1097));
}


int main() {
test1();
test2();
Expand All @@ -669,4 +692,5 @@ int main() {
test19();
test20();
test21();
test22();
}

0 comments on commit 48e4e96

Please sign in to comment.