Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds rank/select poppy-based implementation, closes #14 #34

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions tinygraph/tinygraph-align.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#define _POSIX_C_SOURCE 200809L
#include <stdlib.h>


void* tinygraph_align_malloc(size_t alignment, size_t size) {
void *p;

if (posix_memalign(&p, alignment, size) != 0) {
return NULL;
}

return p;
}


void tinygraph_align_free(void *p) {
free(p);
}
17 changes: 17 additions & 0 deletions tinygraph/tinygraph-align.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#ifndef TINYGRAPH_ALIGN_H
#define TINYGRAPH_ALIGN_H

#include <stdint.h>

#include "tinygraph-utils.h"


#define TINYGRAPH_ALIGN(x) __attribute__((aligned(x)))

TINYGRAPH_WARN_UNUSED
void* tinygraph_align_malloc(size_t alignment, size_t size);

void tinygraph_align_free(void *p);


#endif
9 changes: 9 additions & 0 deletions tinygraph/tinygraph-bitset.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,19 @@ bool tinygraph_bitset_get_at(tinygraph_bitset *bitset, uint64_t i) {


uint64_t tinygraph_bitset_get_size(tinygraph_bitset_s bitset) {
TINYGRAPH_ASSERT(bitset);

return bitset->size;
}


uint64_t* tinygraph_bitset_get_data(tinygraph_bitset_s bitset) {
TINYGRAPH_ASSERT(bitset);

return bitset->blocks;
}


void tinygraph_bitset_clear(tinygraph_bitset_s bitset) {
TINYGRAPH_ASSERT(bitset);

Expand Down
3 changes: 3 additions & 0 deletions tinygraph/tinygraph-bitset.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ bool tinygraph_bitset_get_at(tinygraph_bitset_s bitset, uint64_t i);
TINYGRAPH_WARN_UNUSED
uint64_t tinygraph_bitset_get_size(tinygraph_bitset_s bitset);

TINYGRAPH_WARN_UNUSED
uint64_t* tinygraph_bitset_get_data(tinygraph_bitset_s bitset);

void tinygraph_bitset_clear(tinygraph_bitset_s bitset);

void tinygraph_bitset_not(tinygraph_bitset_s bitset);
Expand Down
138 changes: 138 additions & 0 deletions tinygraph/tinygraph-poppy.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
#include <math.h>
#include <string.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>

#include "tinygraph-bits.h"
#include "tinygraph-align.h"
#include "tinygraph-poppy.h"


typedef struct tinygraph_poppy_rank {
uint64_t *level0;
uint64_t level0_len;

tinygraph_bitset_s bitset_view;
} tinygraph_poppy_rank;


tinygraph_poppy_rank* tinygraph_poppy_rank_construct(tinygraph_bitset_s bitset) {
TINYGRAPH_ASSERT(bitset);

tinygraph_poppy_rank *out = malloc(sizeof(tinygraph_poppy_rank));

if (!out) {
return NULL;
}

// It's important that we align the index structures at
// the cache line boundary, which is 64 bytes nowadays
const uint32_t cacheline = 64;

const uint64_t *bits = tinygraph_bitset_get_data(bitset);
const uint64_t nbits = tinygraph_bitset_get_size(bitset);

// 512 bits basic blocks for now
const uint64_t level0_len = nbits / 512;

if (level0_len == 0) {
*out = (tinygraph_poppy_rank){
.level0 = NULL,
.level0_len = 0,
.bitset_view = bitset,
};

return out;
}

// TODO: implement optimizations to get 12.5% down to 3.2%
// At the moment we only implement Fig. 3 from the paper

uint64_t *level0 = tinygraph_align_malloc(cacheline, level0_len);

if (!level0) {
free(out);

return NULL;
}

memset(level0, 0, level0_len);

uint64_t csum = 0;

for (uint64_t i = 0; i < level0_len; ++i) {
csum += tinygraph_bits_count(bits[i * 8 + 0]);
csum += tinygraph_bits_count(bits[i * 8 + 1]);
csum += tinygraph_bits_count(bits[i * 8 + 2]);
csum += tinygraph_bits_count(bits[i * 8 + 3]);
csum += tinygraph_bits_count(bits[i * 8 + 4]);
csum += tinygraph_bits_count(bits[i * 8 + 5]);
csum += tinygraph_bits_count(bits[i * 8 + 6]);
csum += tinygraph_bits_count(bits[i * 8 + 7]);

level0[i] = csum;
}

*out = (tinygraph_poppy_rank){
.level0 = level0,
.level0_len = level0_len,
.bitset_view = bitset,
};

return out;
}


void tinygraph_poppy_rank_destruct(tinygraph_poppy_rank *rank) {
if (!rank) {
return;
}

TINYGRAPH_ASSERT(rank->level0);

tinygraph_align_free(rank->level0);

rank->level0 = NULL;

free(rank);
}


uint64_t tinygraph_poppy_rank_get(tinygraph_poppy_rank_s rank, uint64_t n) {
TINYGRAPH_ASSERT(rank);
TINYGRAPH_ASSERT(n < tinygraph_bitset_get_size(rank->bitset_view));

const uint64_t block = n / 512;

TINYGRAPH_ASSERT(block <= rank->level0_len);

// pre-computed cumulative sums from index
const uint64_t leftof = block > 0 ? rank->level0[block - 1] : 0;

// bits within last block we need to count
const uint64_t *bits = tinygraph_bitset_get_data(rank->bitset_view);

uint64_t count = 0;

for (uint64_t i = block * 8; i < block * 8 + ((n % 512) / 64); ++i) {
count += tinygraph_bits_count(bits[i]);
}

count += tinygraph_bits_rank(bits[block * 8 + ((n % 512) / 64)], (n % 512) % 64);

return leftof + count;
}


void tinygraph_poppy_rank_print_internal(tinygraph_poppy_rank_s rank) {
TINYGRAPH_ASSERT(rank);

fprintf(stderr, "poppy rank internals\n");

for (uint64_t i = 0; i < rank->level0_len; ++i) {
fprintf(stderr, " %ju", (uintmax_t)rank->level0[i]);
}

fprintf(stderr, "\n");
}
35 changes: 35 additions & 0 deletions tinygraph/tinygraph-poppy.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#ifndef TINYGRAPH_POPPY_H
#define TINYGRAPH_POPPY_H

#include "tinygraph-bitset.h"
#include "tinygraph-utils.h"

/*
* The Poppy Rank/Select data structures from the paper
*
* "Space-Efficient, High-Performance Rank & Select
* Structures on Uncompressed Bit Sequences"
*
* with space-overhead 3.2% (rank) and 0.39% (select)
*
* See
* - https://www.cs.cmu.edu/~dga/papers/zhou-sea2013.pdf
*/


typedef struct tinygraph_poppy_rank* tinygraph_poppy_rank_s;


TINYGRAPH_WARN_UNUSED
tinygraph_poppy_rank_s tinygraph_poppy_rank_construct(tinygraph_bitset_s bitset);

void tinygraph_poppy_rank_destruct(tinygraph_poppy_rank_s rank);

TINYGRAPH_WARN_UNUSED
uint64_t tinygraph_poppy_rank_get(tinygraph_poppy_rank_s rank, uint64_t n);

void tinygraph_poppy_rank_print_internal(tinygraph_poppy_rank_s rank);



#endif
28 changes: 26 additions & 2 deletions tinygraph/tinygraph-tests.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "tinygraph-zigzag.h"
#include "tinygraph-zorder.h"
#include "tinygraph-bits.h"
#include "tinygraph-poppy.h"
#include "tinygraph-eliasfano.h"


Expand Down Expand Up @@ -569,8 +570,6 @@ void test18() {

tinygraph_bitset_s bits = tinygraph_eliasfano_encode(data, 5);

tinygraph_bitset_print_internal(bits);

tinygraph_bitset_destruct(bits);
}

Expand Down Expand Up @@ -647,6 +646,30 @@ void test21() {
}


void test22() {
tinygraph_bitset_s bits = tinygraph_bitset_construct(1098);

tinygraph_bitset_set_at(bits, 0);
tinygraph_bitset_set_at(bits, 512);
tinygraph_bitset_set_at(bits, 1097);

tinygraph_bitset_print_internal(bits);

tinygraph_poppy_rank_s rank = tinygraph_poppy_rank_construct(bits);

tinygraph_poppy_rank_print_internal(rank);

fprintf(stderr, "%ju\n", tinygraph_poppy_rank_get(rank, 0));
fprintf(stderr, "%ju\n", tinygraph_poppy_rank_get(rank, 1));
fprintf(stderr, "%ju\n", tinygraph_poppy_rank_get(rank, 511));
fprintf(stderr, "%ju\n", tinygraph_poppy_rank_get(rank, 512));
fprintf(stderr, "%ju\n", tinygraph_poppy_rank_get(rank, 513));
fprintf(stderr, "%ju\n", tinygraph_poppy_rank_get(rank, 1095));
fprintf(stderr, "%ju\n", tinygraph_poppy_rank_get(rank, 1096));
fprintf(stderr, "%ju\n", tinygraph_poppy_rank_get(rank, 1097));
}


int main() {
test1();
test2();
Expand All @@ -669,4 +692,5 @@ int main() {
test19();
test20();
test21();
test22();
}