-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adds rank/select poppy-based implementation, closes #14
- Loading branch information
1 parent
c5604bd
commit 48e4e96
Showing
7 changed files
with
246 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
#define _POSIX_C_SOURCE 200809L | ||
#include <stdlib.h> | ||
|
||
|
||
void* tinygraph_align_malloc(size_t alignment, size_t size) { | ||
void *p; | ||
|
||
if (posix_memalign(&p, alignment, size) != 0) { | ||
return NULL; | ||
} | ||
|
||
return p; | ||
} | ||
|
||
|
||
void tinygraph_align_free(void *p) { | ||
free(p); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
#ifndef TINYGRAPH_ALIGN_H | ||
#define TINYGRAPH_ALIGN_H | ||
|
||
#include <stdint.h> | ||
|
||
#include "tinygraph-utils.h" | ||
|
||
|
||
#define TINYGRAPH_ALIGN(x) __attribute__((aligned(x))) | ||
|
||
TINYGRAPH_WARN_UNUSED | ||
void* tinygraph_align_malloc(size_t alignment, size_t size); | ||
|
||
void tinygraph_align_free(void *p); | ||
|
||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
#include <math.h> | ||
#include <string.h> | ||
#include <stdint.h> | ||
#include <stdlib.h> | ||
#include <stdio.h> | ||
|
||
#include "tinygraph-bits.h" | ||
#include "tinygraph-align.h" | ||
#include "tinygraph-poppy.h" | ||
|
||
|
||
typedef struct tinygraph_poppy_rank { | ||
uint64_t *level0; | ||
uint64_t level0_len; | ||
|
||
tinygraph_bitset_s bitset_view; | ||
} tinygraph_poppy_rank; | ||
|
||
|
||
tinygraph_poppy_rank* tinygraph_poppy_rank_construct(tinygraph_bitset_s bitset) { | ||
TINYGRAPH_ASSERT(bitset); | ||
|
||
tinygraph_poppy_rank *out = malloc(sizeof(tinygraph_poppy_rank)); | ||
|
||
if (!out) { | ||
return NULL; | ||
} | ||
|
||
// It's important that we align the index structures at | ||
// the cache line boundary, which is 64 bytes nowadays | ||
const uint32_t cacheline = 64; | ||
|
||
const uint64_t *bits = tinygraph_bitset_get_data(bitset); | ||
const uint64_t nbits = tinygraph_bitset_get_size(bitset); | ||
|
||
// 512 bits basic blocks for now | ||
const uint64_t level0_len = nbits / 512; | ||
|
||
if (level0_len == 0) { | ||
*out = (tinygraph_poppy_rank){ | ||
.level0 = NULL, | ||
.level0_len = 0, | ||
.bitset_view = bitset, | ||
}; | ||
|
||
return out; | ||
} | ||
|
||
// TODO: implement optimizations to get 12.5% down to 3.2% | ||
// At the moment we only implement Fig. 3 from the paper | ||
|
||
uint64_t *level0 = tinygraph_align_malloc(cacheline, level0_len); | ||
|
||
if (!level0) { | ||
free(out); | ||
|
||
return NULL; | ||
} | ||
|
||
memset(level0, 0, level0_len); | ||
|
||
uint64_t csum = 0; | ||
|
||
for (uint64_t i = 0; i < level0_len; ++i) { | ||
csum += tinygraph_bits_count(bits[i * 8 + 0]); | ||
csum += tinygraph_bits_count(bits[i * 8 + 1]); | ||
csum += tinygraph_bits_count(bits[i * 8 + 2]); | ||
csum += tinygraph_bits_count(bits[i * 8 + 3]); | ||
csum += tinygraph_bits_count(bits[i * 8 + 4]); | ||
csum += tinygraph_bits_count(bits[i * 8 + 5]); | ||
csum += tinygraph_bits_count(bits[i * 8 + 6]); | ||
csum += tinygraph_bits_count(bits[i * 8 + 7]); | ||
|
||
level0[i] = csum; | ||
} | ||
|
||
*out = (tinygraph_poppy_rank){ | ||
.level0 = level0, | ||
.level0_len = level0_len, | ||
.bitset_view = bitset, | ||
}; | ||
|
||
return out; | ||
} | ||
|
||
|
||
void tinygraph_poppy_rank_destruct(tinygraph_poppy_rank *rank) { | ||
if (!rank) { | ||
return; | ||
} | ||
|
||
TINYGRAPH_ASSERT(rank->level0); | ||
|
||
tinygraph_align_free(rank->level0); | ||
|
||
rank->level0 = NULL; | ||
|
||
free(rank); | ||
} | ||
|
||
|
||
uint64_t tinygraph_poppy_rank_get(tinygraph_poppy_rank_s rank, uint64_t n) { | ||
TINYGRAPH_ASSERT(rank); | ||
TINYGRAPH_ASSERT(n < tinygraph_bitset_get_size(rank->bitset_view)); | ||
|
||
const uint64_t block = n / 512; | ||
|
||
TINYGRAPH_ASSERT(block <= rank->level0_len); | ||
|
||
// pre-computed cumulative sums from index | ||
const uint64_t leftof = block > 0 ? rank->level0[block - 1] : 0; | ||
|
||
// bits within last block we need to count | ||
const uint64_t *bits = tinygraph_bitset_get_data(rank->bitset_view); | ||
|
||
uint64_t count = 0; | ||
|
||
for (uint64_t i = block * 8; i < block * 8 + ((n % 512) / 64); ++i) { | ||
count += tinygraph_bits_count(bits[i]); | ||
} | ||
|
||
count += tinygraph_bits_rank(bits[block * 8 + ((n % 512) / 64)], (n % 512) % 64); | ||
|
||
return leftof + count; | ||
} | ||
|
||
|
||
void tinygraph_poppy_rank_print_internal(tinygraph_poppy_rank_s rank) { | ||
TINYGRAPH_ASSERT(rank); | ||
|
||
fprintf(stderr, "poppy rank internals\n"); | ||
|
||
for (uint64_t i = 0; i < rank->level0_len; ++i) { | ||
fprintf(stderr, " %ju", (uintmax_t)rank->level0[i]); | ||
} | ||
|
||
fprintf(stderr, "\n"); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
#ifndef TINYGRAPH_POPPY_H | ||
#define TINYGRAPH_POPPY_H | ||
|
||
#include "tinygraph-bitset.h" | ||
#include "tinygraph-utils.h" | ||
|
||
/* | ||
* The Poppy Rank/Select data structures from the paper | ||
* | ||
* "Space-Efficient, High-Performance Rank & Select | ||
* Structures on Uncompressed Bit Sequences" | ||
* | ||
* with space-overhead 3.2% (rank) and 0.39% (select) | ||
* | ||
* See | ||
* - https://www.cs.cmu.edu/~dga/papers/zhou-sea2013.pdf | ||
*/ | ||
|
||
|
||
typedef struct tinygraph_poppy_rank* tinygraph_poppy_rank_s; | ||
|
||
|
||
TINYGRAPH_WARN_UNUSED | ||
tinygraph_poppy_rank_s tinygraph_poppy_rank_construct(tinygraph_bitset_s bitset); | ||
|
||
void tinygraph_poppy_rank_destruct(tinygraph_poppy_rank_s rank); | ||
|
||
TINYGRAPH_WARN_UNUSED | ||
uint64_t tinygraph_poppy_rank_get(tinygraph_poppy_rank_s rank, uint64_t n); | ||
|
||
void tinygraph_poppy_rank_print_internal(tinygraph_poppy_rank_s rank); | ||
|
||
|
||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters