forked from rust-phf/rust-phf
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Move generation logic to its own crate
- Loading branch information
Showing
5 changed files
with
160 additions
and
132 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
[package] | ||
name = "phf_generator" | ||
authors = ["Steven Fackler <sfackler@gmail.com>"] | ||
version = "0.6.9" | ||
license = "MIT" | ||
description = "PHF generation logic" | ||
repository = "https://github.com/sfackler/rust-phf" | ||
documentation = "https://sfackler.github.io/rust-phf/doc/phf_generator" | ||
|
||
[dependencies] | ||
rand = "0.1" | ||
|
||
[dependencies.phf_shared] | ||
path = "../phf_shared" | ||
version = "=0.6.9" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
extern crate phf_shared; | ||
extern crate rand; | ||
|
||
use phf_shared::PhfHash; | ||
use rand::{SeedableRng, XorShiftRng, Rng}; | ||
|
||
const DEFAULT_LAMBDA: usize = 5; | ||
|
||
const FIXED_SEED: [u32; 4] = [3141592653, 589793238, 462643383, 2795028841]; | ||
|
||
pub struct HashState { | ||
pub key: u64, | ||
pub disps: Vec<(u32, u32)>, | ||
pub map: Vec<usize>, | ||
} | ||
|
||
pub fn generate_hash<H: PhfHash>(entries: &[H]) -> HashState { | ||
let mut rng: XorShiftRng = SeedableRng::from_seed(FIXED_SEED); | ||
loop { | ||
if let Some(s) = try_generate_hash(entries, &mut rng) { | ||
return s; | ||
} | ||
} | ||
} | ||
|
||
fn try_generate_hash<H: PhfHash>(entries: &[H], rng: &mut XorShiftRng) -> Option<HashState> { | ||
struct Bucket { | ||
idx: usize, | ||
keys: Vec<usize>, | ||
} | ||
|
||
struct Hashes { | ||
g: u32, | ||
f1: u32, | ||
f2: u32, | ||
} | ||
|
||
let key = rng.gen(); | ||
|
||
let hashes: Vec<_> = entries.iter().map(|entry| { | ||
let (g, f1, f2) = entry.phf_hash(key); | ||
Hashes { | ||
g: g, | ||
f1: f1, | ||
f2: f2 | ||
} | ||
}).collect(); | ||
|
||
let buckets_len = (entries.len() + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA; | ||
let mut buckets = (0..buckets_len) | ||
.map(|i| Bucket { idx: i, keys: vec![] }) | ||
.collect::<Vec<_>>(); | ||
|
||
for (i, hash) in hashes.iter().enumerate() { | ||
buckets[(hash.g % (buckets_len as u32)) as usize].keys.push(i); | ||
} | ||
|
||
// Sort descending | ||
buckets.sort_by(|a, b| a.keys.len().cmp(&b.keys.len()).reverse()); | ||
|
||
let table_len = entries.len(); | ||
let mut map = vec![None; table_len]; | ||
let mut disps = vec![(0u32, 0u32); buckets_len]; | ||
|
||
// store whether an element from the bucket being placed is | ||
// located at a certain position, to allow for efficient overlap | ||
// checks. It works by storing the generation in each cell and | ||
// each new placement-attempt is a new generation, so you can tell | ||
// if this is legitimately full by checking that the generations | ||
// are equal. (A u64 is far too large to overflow in a reasonable | ||
// time for current hardware.) | ||
let mut try_map = vec![0u64; table_len]; | ||
let mut generation = 0u64; | ||
|
||
// the actual values corresponding to the markers above, as | ||
// (index, key) pairs, for adding to the main map once we've | ||
// chosen the right disps. | ||
let mut values_to_add = vec![]; | ||
|
||
'buckets: for bucket in buckets.iter() { | ||
for d1 in 0..(table_len as u32) { | ||
'disps: for d2 in 0..(table_len as u32) { | ||
values_to_add.clear(); | ||
generation += 1; | ||
|
||
for &key in bucket.keys.iter() { | ||
let idx = (phf_shared::displace(hashes[key].f1, hashes[key].f2, d1, d2) | ||
% (table_len as u32)) as usize; | ||
if map[idx].is_some() || try_map[idx] == generation { | ||
continue 'disps; | ||
} | ||
try_map[idx] = generation; | ||
values_to_add.push((idx, key)); | ||
} | ||
|
||
// We've picked a good set of disps | ||
disps[bucket.idx] = (d1, d2); | ||
for &(idx, key) in values_to_add.iter() { | ||
map[idx] = Some(key); | ||
} | ||
continue 'buckets; | ||
} | ||
} | ||
|
||
// Unable to find displacements for a bucket | ||
return None; | ||
} | ||
|
||
Some(HashState { | ||
key: key, | ||
disps: disps, | ||
map: map.into_iter().map(|i| i.unwrap()).collect(), | ||
}) | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters