Move generation logic to its own crate

7h0ma5 · Feb 22, 2015 · cfeee87 · cfeee87
1 parent 40dbc32
commit cfeee87
Show file tree

Hide file tree

Showing 5 changed files with 160 additions and 132 deletions.
diff --git a/phf_generator/Cargo.toml b/phf_generator/Cargo.toml
@@ -0,0 +1,15 @@
+[package]
+name = "phf_generator"
+authors = ["Steven Fackler <sfackler@gmail.com>"]
+version = "0.6.9"
+license = "MIT"
+description = "PHF generation logic"
+repository = "https://github.com/sfackler/rust-phf"
+documentation = "https://sfackler.github.io/rust-phf/doc/phf_generator"
+
+[dependencies]
+rand = "0.1"
+
+[dependencies.phf_shared]
+path = "../phf_shared"
+version = "=0.6.9"
diff --git a/phf_generator/src/lib.rs b/phf_generator/src/lib.rs
@@ -0,0 +1,115 @@
+extern crate phf_shared;
+extern crate rand;
+
+use phf_shared::PhfHash;
+use rand::{SeedableRng, XorShiftRng, Rng};
+
+const DEFAULT_LAMBDA: usize = 5;
+
+const FIXED_SEED: [u32; 4] = [3141592653, 589793238, 462643383, 2795028841];
+
+pub struct HashState {
+    pub key: u64,
+    pub disps: Vec<(u32, u32)>,
+    pub map: Vec<usize>,
+}
+
+pub fn generate_hash<H: PhfHash>(entries: &[H]) -> HashState {
+    let mut rng: XorShiftRng = SeedableRng::from_seed(FIXED_SEED);
+    loop {
+        if let Some(s) = try_generate_hash(entries, &mut rng) {
+            return s;
+        }
+    }
+}
+
+fn try_generate_hash<H: PhfHash>(entries: &[H], rng: &mut XorShiftRng) -> Option<HashState> {
+    struct Bucket {
+        idx: usize,
+        keys: Vec<usize>,
+    }
+
+    struct Hashes {
+        g: u32,
+        f1: u32,
+        f2: u32,
+    }
+
+    let key = rng.gen();
+
+    let hashes: Vec<_> = entries.iter().map(|entry| {
+        let (g, f1, f2) = entry.phf_hash(key);
+        Hashes {
+            g: g,
+            f1: f1,
+            f2: f2
+        }
+    }).collect();
+
+    let buckets_len = (entries.len() + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA;
+    let mut buckets = (0..buckets_len)
+        .map(|i| Bucket { idx: i, keys: vec![] })
+        .collect::<Vec<_>>();
+
+    for (i, hash) in hashes.iter().enumerate() {
+        buckets[(hash.g % (buckets_len as u32)) as usize].keys.push(i);
+    }
+
+    // Sort descending
+    buckets.sort_by(|a, b| a.keys.len().cmp(&b.keys.len()).reverse());
+
+    let table_len = entries.len();
+    let mut map = vec![None; table_len];
+    let mut disps = vec![(0u32, 0u32); buckets_len];
+
+    // store whether an element from the bucket being placed is
+    // located at a certain position, to allow for efficient overlap
+    // checks. It works by storing the generation in each cell and
+    // each new placement-attempt is a new generation, so you can tell
+    // if this is legitimately full by checking that the generations
+    // are equal. (A u64 is far too large to overflow in a reasonable
+    // time for current hardware.)
+    let mut try_map = vec![0u64; table_len];
+    let mut generation = 0u64;
+
+    // the actual values corresponding to the markers above, as
+    // (index, key) pairs, for adding to the main map once we've
+    // chosen the right disps.
+    let mut values_to_add = vec![];
+
+    'buckets: for bucket in buckets.iter() {
+        for d1 in 0..(table_len as u32) {
+            'disps: for d2 in 0..(table_len as u32) {
+                values_to_add.clear();
+                generation += 1;
+
+                for &key in bucket.keys.iter() {
+                    let idx = (phf_shared::displace(hashes[key].f1, hashes[key].f2, d1, d2)
+                                % (table_len as u32)) as usize;
+                    if map[idx].is_some() || try_map[idx] == generation {
+                        continue 'disps;
+                    }
+                    try_map[idx] = generation;
+                    values_to_add.push((idx, key));
+                }
+
+                // We've picked a good set of disps
+                disps[bucket.idx] = (d1, d2);
+                for &(idx, key) in values_to_add.iter() {
+                    map[idx] = Some(key);
+                }
+                continue 'buckets;
+            }
+        }
+
+        // Unable to find displacements for a bucket
+        return None;
+    }
+
+    Some(HashState {
+        key: key,
+        disps: disps,
+        map: map.into_iter().map(|i| i.unwrap()).collect(),
+    })
+}
+
diff --git a/phf_macros/Cargo.toml b/phf_macros/Cargo.toml
@@ -15,8 +15,9 @@ test = false
 [features]
 stats = ["time"]
 
-[dependencies]
-rand = "0.1"
+[dependencies.phf_generator]
+path = "../phf_generator"
+version = "=0.6.9"
 
 [dependencies.phf_shared]
 path = "../phf_shared"

diff --git a/phf_macros/src/lib.rs b/phf_macros/src/lib.rs
@@ -2,14 +2,14 @@
 //!
 //! See the documentation for the `phf` crate for more details.
 #![doc(html_root_url="http://sfackler.github.io/rust-phf/doc")]
-#![feature(plugin_registrar, quote, rustc_private, core, env, std_misc)]
+#![feature(plugin_registrar, quote, rustc_private, env, std_misc)]
 
-extern crate rand;
 extern crate syntax;
 #[cfg(feature = "stats")]
 extern crate time;
 extern crate rustc;
 extern crate phf_shared;
+extern crate phf_generator;
 
 use std::collections::HashMap;
 use std::collections::hash_map::Entry::{Occupied, Vacant};
@@ -23,9 +23,11 @@ use syntax::parse;
 use syntax::parse::token::{InternedString, Comma, Eof, FatArrow};
 use syntax::print::pprust;
 use rustc::plugin::Registry;
+use phf_generator::HashState;
+use std::env;
 
 use util::{Entry, Key};
-use util::{generate_hash, create_map, create_set, create_ordered_map, create_ordered_set};
+use util::{create_map, create_set, create_ordered_map, create_ordered_set};
 
 pub mod util;
 
@@ -38,6 +40,23 @@ pub fn macro_registrar(reg: &mut Registry) {
     reg.register_macro("phf_ordered_set", expand_phf_ordered_set);
 }
 
+fn generate_hash(cx: &mut ExtCtxt, sp: Span, entries: &[Entry]) -> HashState {
+    #[cfg(feature = "stats")]
+    use time::precise_time_s;
+    #[cfg(not(feature = "stats"))]
+    fn precise_time_s() -> f64 { 0. }
+
+    let start = precise_time_s();
+    let state = phf_generator::generate_hash(entries);
+    let time = precise_time_s() - start;
+
+    if cfg!(feature = "stats") && env::var_os("PHF_STATS").is_some() {
+        cx.span_note(sp, &format!("PHF generation took {} seconds", time));
+    }
+
+    state
+}
+
 fn expand_phf_map(cx: &mut ExtCtxt, sp: Span, tts: &[TokenTree]) -> Box<MacResult+'static> {
     let entries = match parse_map(cx, tts) {
         Some(entries) => entries,

diff --git a/phf_macros/src/util.rs b/phf_macros/src/util.rs
@@ -1,21 +1,15 @@
-use std::env;
 use std::rc::Rc;
 use std::hash::{Hash, Hasher};
-use std::iter::repeat;
 
 use syntax::ast::Expr;
 use syntax::codemap::Span;
 use syntax::ext::base::{ExtCtxt, MacResult, MacExpr};
 use syntax::ext::build::AstBuilder;
 use syntax::parse::token::InternedString;
 use syntax::ptr::P;
-use rand::{Rng, SeedableRng, XorShiftRng};
 
-use phf_shared::{self, PhfHash};
-
-const DEFAULT_LAMBDA: usize = 5;
-
-const FIXED_SEED: [u32; 4] = [3141592653, 589793238, 462643383, 2795028841];
+use phf_shared::PhfHash;
+use phf_generator::HashState;
 
 #[derive(PartialEq, Eq, Clone)]
 pub enum Key {
@@ -77,126 +71,10 @@ pub struct Entry {
     pub value: P<Expr>
 }
 
-pub struct HashState {
-    key: u64,
-    disps: Vec<(u32, u32)>,
-    map: Vec<usize>,
-}
-
-pub fn generate_hash(cx: &mut ExtCtxt, sp: Span, entries: &[Entry]) -> HashState {
-    #[cfg(feature = "stats")]
-    use time::precise_time_s;
-    #[cfg(not(feature = "stats"))]
-    fn precise_time_s() -> f64 { 0.0 }
-
-    let mut rng: XorShiftRng = SeedableRng::from_seed(FIXED_SEED);
-    let start = precise_time_s();
-    let state;
-    loop {
-        match try_generate_hash(entries, &mut rng) {
-            Some(s) => {
-                state = s;
-                break;
-            }
-            None => {}
-        }
-    }
-    let time = precise_time_s() - start;
-    if cfg!(feature = "stats") && env::var_os("PHF_STATS").is_some() {
-        cx.span_note(sp, &*format!("PHF generation took {} seconds", time));
-    }
-
-    state
-}
-
-pub fn try_generate_hash(entries: &[Entry], rng: &mut XorShiftRng) -> Option<HashState> {
-    struct Bucket {
-        idx: usize,
-        keys: Vec<usize>,
-    }
-
-    struct Hashes {
-        g: u32,
-        f1: u32,
-        f2: u32,
-    }
-
-    let key = rng.gen();
-
-    let hashes: Vec<_> = entries.iter().map(|entry| {
-        let (g, f1, f2) = entry.key_contents.phf_hash(key);
-        Hashes {
-            g: g,
-            f1: f1,
-            f2: f2
-        }
-    }).collect();
-
-    let buckets_len = (entries.len() + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA;
-    let mut buckets = range(0, buckets_len)
-        .map(|i| Bucket { idx: i, keys: vec![] })
-        .collect::<Vec<_>>();
-
-    for (i, hash) in hashes.iter().enumerate() {
-        buckets[(hash.g % (buckets_len as u32)) as usize].keys.push(i);
-    }
-
-    // Sort descending
-    buckets.sort_by(|a, b| a.keys.len().cmp(&b.keys.len()).reverse());
-
-    let table_len = entries.len();
-    let mut map = repeat(None).take(table_len).collect::<Vec<_>>();
-    let mut disps = repeat((0u32, 0u32)).take(buckets_len).collect::<Vec<_>>();
-
-    // store whether an element from the bucket being placed is
-    // located at a certain position, to allow for efficient overlap
-    // checks. It works by storing the generation in each cell and
-    // each new placement-attempt is a new generation, so you can tell
-    // if this is legitimately full by checking that the generations
-    // are equal. (A u64 is far too large to overflow in a reasonable
-    // time for current hardware.)
-    let mut try_map = repeat(0u64).take(table_len).collect::<Vec<_>>();
-    let mut generation = 0u64;
-
-    // the actual values corresponding to the markers above, as
-    // (index, key) pairs, for adding to the main map once we've
-    // chosen the right disps.
-    let mut values_to_add = vec![];
-
-    'buckets: for bucket in buckets.iter() {
-        for d1 in range(0, table_len as u32) {
-            'disps: for d2 in range(0, table_len as u32) {
-                values_to_add.clear();
-                generation += 1;
-
-                for &key in bucket.keys.iter() {
-                    let idx = (phf_shared::displace(hashes[key].f1, hashes[key].f2, d1, d2)
-                                % (table_len as u32)) as usize;
-                    if map[idx].is_some() || try_map[idx] == generation {
-                        continue 'disps;
-                    }
-                    try_map[idx] = generation;
-                    values_to_add.push((idx, key));
-                }
-
-                // We've picked a good set of disps
-                disps[bucket.idx] = (d1, d2);
-                for &(idx, key) in values_to_add.iter() {
-                    map[idx] = Some(key);
-                }
-                continue 'buckets;
-            }
-        }
-
-        // Unable to find displacements for a bucket
-        return None;
+impl PhfHash for Entry {
+    fn phf_hash(&self, key: u64) -> (u32, u32, u32) {
+        self.key_contents.phf_hash(key)
     }
-
-    Some(HashState {
-        key: key,
-        disps: disps,
-        map: map.into_iter().map(|i| i.unwrap()).collect(),
-    })
 }
 
 pub fn create_map(cx: &mut ExtCtxt, sp: Span, entries: Vec<Entry>, state: HashState)