From f000b15b62a4b9cd3d5d0178dd9cdf09496c0005 Mon Sep 17 00:00:00 2001 From: Vadim Pisarevsky Date: Sun, 28 Mar 2021 23:22:18 +0800 Subject: [PATCH] * added initial implementation of mutable Hash table * added option to rebuild apps from scratch (discard pre-generated .c, .o files) * issue: for some reason, (...) and {...} do not alway work well --- compiler/Compiler.fx | 3 +- compiler/Options.fx | 4 + lib/Builtins.fx | 51 +++++++++ lib/Hashmap.fx | 236 ++++++++++++++++++++++++++++++++++++++ runtime/ficus/ficus.h | 24 ++-- test/test_ds.fx | 259 +++++++++++++++++++++++------------------- 6 files changed, 447 insertions(+), 130 deletions(-) create mode 100644 lib/Hashmap.fx diff --git a/compiler/Compiler.fx b/compiler/Compiler.fx index ab0fc41..e4a3e50 100644 --- a/compiler/Compiler.fx +++ b/compiler/Compiler.fx @@ -226,12 +226,13 @@ fun emit_c_files(fname0: string, cmods: C_form.cmodule_t list) val (new_cmod, ok) = if ok { val str_new = C_pp.pprint_top_to_string(cmod_ccode) - val str_old = + val str_old = if Options.opt.force_rebuild {""} else { try File.read_utf8(output_fname) catch { | IOError | FileOpenError => "" } + } val (recompile, ok) = if str_new == str_old { (false, ok) diff --git a/compiler/Options.fx b/compiler/Options.fx index cb27f16..b0dea65 100644 --- a/compiler/Options.fx +++ b/compiler/Options.fx @@ -9,6 +9,7 @@ type options_t = app_args: string list = []; app_filename: string = ""; arch64: bool = true; + force_rebuild: bool = false; build_dir: string = ""; build_rootdir: string = ""; cflags: string = ""; @@ -56,6 +57,7 @@ Run '{fxname} -h' to get more detailed help") Usage: {fxname} [options ...] [-- ] where options can be some of: + -rebuild Ignore cached files; rebuild everything from scratch -pr-tokens Print all the tokens in parsed files -pr-ast0 Print AST right after parsing -pr-ast Print typechecked AST of the parsed files @@ -119,6 +121,8 @@ fun parse_options(): bool { var ok = true while args != [] { args = match args { + | "-rebuild" :: next => + opt.force_rebuild = true; next | "-pr-tokens" :: next => opt.print_tokens = true; next | "-pr-ast0" :: next => diff --git a/lib/Builtins.fx b/lib/Builtins.fx index e21451d..4e75736 100644 --- a/lib/Builtins.fx +++ b/lib/Builtins.fx @@ -840,3 +840,54 @@ fun new_uniform_rng(seed: uint64) { (x :> int) } } + +@ccode { + +typedef uint64_t hash_t; +#define FNV_1A_PRIME 1099511628211ULL +#define FNV_1A_OFFSET 14695981039346656037ULL + +} + +type hash_t = uint64 +val FNV_1A_PRIME: hash_t = 1099511628211UL +val FNV_1A_OFFSET: hash_t = 14695981039346656037UL + +fun hash(x: 't): hash_t = uint64(x) +fun hash(x: int) = uint64(x) ^ FNV_1A_OFFSET +fun hash(x: int32) = uint64(x) ^ FNV_1A_OFFSET +fun hash(x: uint32) = uint64(x) ^ FNV_1A_OFFSET +fun hash(x: int8) = uint64(x) ^ FNV_1A_OFFSET +fun hash(x: uint8) = uint64(x) ^ FNV_1A_OFFSET +fun hash(x: int16) = uint64(x) ^ FNV_1A_OFFSET +fun hash(x: uint16) = uint64(x) ^ FNV_1A_OFFSET +fun hash(x: bool) = uint64(x) ^ FNV_1A_OFFSET +@pure @nothrow fun hash(x: float): hash_t = @ccode { + fx_bits32_t u; u.f = x; return u.u ^ 14695981039346656037ULL; +} +@pure @nothrow fun hash(x: double): hash_t = @ccode { + fx_bits64_t u; u.f = x; + return u.u ^ 14695981039346656037ULL; +} +@pure @nothrow fun hash(x: string): hash_t = @ccode { + uint64_t hash = FNV_1A_OFFSET; + int_ i, len = x->length; + char_* data = x->data; + for(i = 0; i < len; i++) { + hash ^= data[i]; + hash *= FNV_1A_PRIME; + } + return hash; +} + +fun hash(x: (...)): hash_t = + fold h = FNV_1A_OFFSET for xj <- x { + val h = h ^ hash(xj) + h * FNV_1A_PRIME + } + +fun hash(x: {...}): hash_t = + fold h = FNV_1A_OFFSET for (_, xj) <- x { + val h = h ^ hash(xj) + h * FNV_1A_PRIME + } diff --git a/lib/Hashmap.fx b/lib/Hashmap.fx new file mode 100644 index 0000000..19c41f7 --- /dev/null +++ b/lib/Hashmap.fx @@ -0,0 +1,236 @@ +/* + This file is a part of ficus language project. + See ficus/LICENSE for the licensing terms +*/ + +/* mutable hash table + + the implementation is influenced by CPython's dictobject implementation. + See https://github.com/python/cpython/blob/master/Objects/dictobject.c +*/ + +val HASH_EMPTY: hash_t = 0UL +val HASH_DELETED: hash_t = 1UL +val PERTURB_SHIFT = 5 + +object type ('k, 'd) t = +{ + hash_f: ('k -> hash_t)? + k0: 'k + d0: 'd + _state: ('k, 'd) hash_state_t ref +} + +type ('k, 'd) hash_state_t = +{ + nelems: int + nremoved: int + table: (hash_t, 'k, 'd) [] +} + +fun empty(size0: int, k0: 'k, d0: 'd, f: ('k->hash_t)? ): ('k, 'd) Hashmap.t +{ + var size = 16 + while size < size0 { size *= 2 } + val state = hash_state_t { + nelems=0, nremoved=0, table=array(size, (HASH_EMPTY, k0, d0)) } + Hashmap.t { hash_f=f, k0=k0, d0=d0, _state=ref state } +} + +fun empty(ht: ('k, 'd) Hashmap.t): bool = ht._state->nkeys == 0 +fun size(ht: ('k, 'd) Hashmap.t) = ht._state->nelems + +@private fun add_(ht: ('k, 'd) Hashmap.t, ht_table: (hash_t, 'k, 'd) [], (hv: hash_t, k: 'k, d: 'd)): (int, int) { + val tabsz = size(ht_table) + var perturb = hv, delta_nelems = -1, delta_nremoved = 0 + var j = int(hv) & (tabsz - 1), insert_pos = -1 + + for i <- 0:tabsz+14 { + val (hvj, kj, _) = ht_table[j] + if hvj == hv { + if kj == k { + if insert_pos >= 0 { + ht_table[insert_pos] = (hv, k, d) + ht_table[j] = (HASH_EMPTY, ht.k0, ht.d0) + } else { + ht_table[j].2 = d + } + delta_nelems = 0 + break + } + } else if hvj == HASH_EMPTY { + ht_table[(if insert_pos >= 0 {insert_pos} else {j})] = (hv, k, d) + delta_nelems = 1 + break + } else if hvj == HASH_DELETED && insert_pos < 0 { + insert_pos = j + delta_nremoved = -1 + } + perturb >>= PERTURB_SHIFT + j = int(uint64(j*5 + 1) + perturb) & (tabsz - 1) + } + if delta_nelems < 0 { + if insert_pos >= 0 { + ht_table[insert_pos] = (hv, k, d) + delta_nelems = 1 + } else { + throw Fail("can-not insert element into half-empty Hashtable (?!)") + } + } + (delta_nelems, delta_nremoved) +} + +@private fun grow(ht: ('k, 'd) Hashmap.t, new_size: int): void +{ + val ht_table = ht._state->table + val curr_size = size(ht_table) + val new_ht_table = array(new_size, (HASH_EMPTY, ht.k0, ht.d0)) + for j <- 0:curr_size { + if ht_table[j].0 > HASH_DELETED { + ignore(add_(ht, new_ht_table, ht_table[j])) + } + } + ht._state->table = new_ht_table + ht._state->nremoved = 0 +} + +fun find_idx(ht: ('k, 'd) Hashmap.t, k: 'k): int +{ + var hv = match ht.hash_f { + | Some(f) => f(k) + | _ => hash(k) + } + if hv <= HASH_DELETED { hv ^= FNV_1A_OFFSET } + val tabsz = size(ht._state->table) + var perturb = hv, found = -1 + var j = int(hv) & (tabsz - 1) + for i <- 0:tabsz+14 { + val (hvj, kj, _) = ht._state->table[j] + if hvj == hv { + if kj == k { + found = j + break + } + } else if hvj == HASH_EMPTY { break } + perturb >>= PERTURB_SHIFT + j = int(uint64(j*5 + 1) + perturb) & (tabsz - 1) + } + found +} + +fun mem(ht: ('k, 'd) Hashmap.t, k: 'k): bool = find_idx(ht, k) >= 0 +fun find_opt(ht: ('k, 'd) Hashmap.t, k: 'k): 'd? +{ + val j = find_idx(ht, k) + if j >= 0 { Some(ht._state->table[j].2) } else { None } +} + +fun find_idx_or_insert(ht: ('k, 'd) Hashmap.t, k: 'k): int +{ + var hv = match ht.hash_f { + | Some(f) => f(k) + | _ => hash(k) + } + if hv <= HASH_DELETED { hv ^= FNV_1A_OFFSET } + var tabsz = size(ht._state->table) + + if ht._state->nelems + ht._state->nremoved >= (tabsz >> 1) { + while tabsz <= (ht._state->nelems + ht._state->nremoved)*2 { tabsz *= 2 } + grow(ht, tabsz) + } + + var perturb = hv, found = -1, insert_pos = -1 + var j = int(hv) & (tabsz - 1) + for i <- 0:tabsz+14 { + val (hvj, kj, _) = ht._state->table[j] + if hvj == hv { + if kj == k { + found = j + break + } + } else if hvj == HASH_EMPTY { + if insert_pos < 0 { insert_pos = j } + break + } else if hvj == HASH_DELETED && insert_pos < 0 { + insert_pos = j + ht._state->nremoved -= 1 + } + perturb >>= PERTURB_SHIFT + j = int(uint64(j*5 + 1) + perturb) & (tabsz - 1) + } + if found >= 0 { + if insert_pos < 0 { + found + } else { + ht._state->table[insert_pos] = ht._state->table[found] + ht._state->table[found] = (HASH_EMPTY, ht.k0, ht.d0) + insert_pos + } + } + else if insert_pos >= 0 { + ht._state->table[insert_pos] = (hv, k, ht.d0) + ht._state->nelems += 1 + insert_pos + } else { + throw Fail("can-not insert element into half-empty Hashtable (?!)") + } +} + +fun add(ht: ('k, 'd) Hashmap.t, k: 'k, d: 'd) { + val idx = find_idx_or_insert(ht, k) + ht._state->table[idx].2 = d +} + +fun remove(ht: ('k, 'd) Hashmap.t, k: 'k) { + val idx = find_idx(ht, k) + if idx >= 0 { + ht._state->table[idx] = (HASH_DELETED, ht.k0, ht.d0) + ht._state->nelems -= 1 + ht._state->nremoved += 1 + } +} + +fun list(ht: ('k, 'd) Hashmap.t): ('k, 'd) list = + [: for j <- 0:size(ht._state->table) { + if ht._state->table[j].0 <= HASH_DELETED { continue } + val (_, kj, dj) = ht._state->table[j] + (kj, dj) + } :] + +fun add_list(ht: ('k, 'd) Hashmap.t, data: ('k, 'd) list) +{ + var datasz = ht._state->nelems + ht._state->nremoved + data.length() + var curr_size = size(ht._state->table), new_size = curr_size + while new_size <= datasz*2 { new_size *= 2 } + if new_size > curr_size { grow(ht, new_size) } + for (k, d) <- data { add(ht, k, d) } +} + +fun from_list(k0: 'k, d0: 'd, hash_f: ('k->hash_t)?, data: ('k, 'd) list): ('k, 'd) Hashmap.t +{ + val ht = empty(data.length()*2, k0, d0, hash_f) + add_list(ht, data) + ht +} + +fun app(ht: ('k, 'd) Hashmap.t, f: ('k, 'd)->void) { + val table = ht._state->table + for j <- 0:size(table) { + if table[j].0 > HASH_DELETED { + val (_, kj, dj) = table[j] + f(kj, dj) + } + } +} + +fun foldl(ht: ('k, 'd) Hashmap.t, f: ('k, 'd, 'r)->'r, res0: 'r): 'r { + val table = ht._state->table + var res = res0 + for j <- 0:size(table) { + if table[j].0 > HASH_DELETED { + val (_, kj, dj) = table[j] + res = f(kj, dj, res) + } + } + res +} diff --git a/runtime/ficus/ficus.h b/runtime/ficus/ficus.h index 14dc207..a48f782 100644 --- a/runtime/ficus/ficus.h +++ b/runtime/ficus/ficus.h @@ -182,8 +182,8 @@ void fx_copy_ptr(const void* src, void* pdst); ///////////////////////////// Numbers //////////////////////////// -typedef union fx_round64_t {double d; int64_t i;} fx_round64_t; -typedef union fx_round32_t {float f; int i;} fx_round32_t; +typedef union fx_bits64_t {double f; int64_t i; uint64_t u;} fx_bits64_t; +typedef union fx_bits32_t {float f; int i; unsigned u;} fx_bits32_t; #if FX_ARCH_64 @@ -193,27 +193,27 @@ typedef union fx_round32_t {float f; int i;} fx_round32_t; // instead, they do "reinterpret_cast(x+magic_number)" right in the registers. // in the end we propagate the sign bit of the 51-bit result. FX_INLINE int_ fx_roundf2I(float x) { - fx_round64_t u; - u.d = x + 6755399441055744.0; + fx_bits64_t u; + u.f = x + 6755399441055744.0; return (u.i << 13) >> 13; } FX_INLINE int_ fx_round2I(double x) { - fx_round64_t u; - u.d = x + 6755399441055744.0; + fx_bits64_t u; + u.f = x + 6755399441055744.0; return (u.i << 13) >> 13; } #else // on 32-bit machines we need just lower 32 bits of the result. FX_INLINE int_ fx_roundf2I(float x) { fx_round64_t u; - u.d = x + 6755399441055744.0; + u.f = x + 6755399441055744.0; return (int_)(int)u.i; } FX_INLINE int_ fx_round2I(double x) { - fx_round64_t u; - u.d = x + 6755399441055744.0; + fx_bits64_t u; + u.f = x + 6755399441055744.0; return (int_)(int)u.i; } #endif @@ -223,14 +223,14 @@ FX_INLINE int fx_roundf2i(float x) { // fast vectorized version inside vectorized loops. // the result is limited to [-2**22,2**22) value range, // it's still useful for float -> [u]int16 or [u]int8 conversion - fx_round32_t u; + fx_bits32_t u; u.f = x + 12582912.0f; return (u.i << 10) >> 10; } FX_INLINE int fx_round2i(double x) { - fx_round64_t u; - u.d = x + 6755399441055744.0; + fx_bits64_t u; + u.f = x + 6755399441055744.0; return (int)u.i; } diff --git a/test/test_ds.fx b/test/test_ds.fx index a0151d7..1c6292f 100644 --- a/test/test_ds.fx +++ b/test/test_ds.fx @@ -1,6 +1,5 @@ from UTest import * -import Set -import Map +import Set, Map, Hashmap TEST("ds.set", fun() { @@ -44,6 +43,121 @@ TEST("ds.set", fun() EXPECT_EQ(s1.list(), [: "a", "an", "binary", "for", "implementation", "is", "of", "set", "simple", "test", "this", "very" :]) }) +val poem = +"The first day of Christmas, +My true love sent to me +A partridge in a pear tree. + +The second day of Christmas, +My true love sent to me +Two turtle doves, and +A partridge in a pear tree. + +The third day of Christmas, +My true love sent to me +Three French hens, +Two turtle doves, and +A partridge in a pear tree. + +The fourth day of Christmas, +My true love sent to me +Four colly birds, +Three French hens, +Two turtle doves, and +A partridge in a pear tree. + +The fifth day of Christmas, +My true love sent to me +Five gold rings, +Four colly birds, +Three French hens, +Two turtle doves, and +A partridge in a pear tree. + +The sixth day of Christmas, +My true love sent to me +Six geese a-laying, +Five gold rings, +Four colly birds, +Three French hens, +Two turtle doves, and +A partridge in a pear tree. + +The seventh day of Christmas, +My true love sent to me +Seven swans a-swimming, +Six geese a-laying, +Five gold rings, +Four colly birds, +Three French hens, +Two turtle doves, and +A partridge in a pear tree. + +The eighth day of Christmas, +My true love sent to me +Eight maids a-milking, +Seven swans a-swimming, +Six geese a-laying, +Five gold rings, +Four colly birds, +Three French hens, +Two turtle doves, and +A partridge in a pear tree. + +The ninth day of Christmas, +My true love sent to me +Nine drummers drumming, +Eight maids a-milking, +Seven swans a-swimming, +Six geese a-laying, +Five gold rings, +Four colly birds, +Three French hens, +Two turtle doves, and +A partridge in a pear tree. + +The tenth day of Christmas, +My true love sent to me +Ten pipers piping, +Nine drummers drumming, +Eight maids a-milking, +Seven swans a-swimming, +Six geese a-laying, +Five gold rings, +Four colly birds, +Three French hens, +Two turtle doves, and +A partridge in a pear tree. + +The eleventh day of Christmas +My true love sent to me +Eleven ladies dancing, +Ten pipers piping, +Nine drummers drumming, +Eight maids a-milking, +Seven swans a-swimming, +Six geese a-laying, +Five gold rings, +Four colly birds, +Three French hens, +Two turtle doves, and +A partridge in a pear tree. + +The twelfth day of Christmas +My true love sent to me +Twelve fiddlers fiddling, +Eleven ladies dancing, +Ten pipers piping, +Nine drummers drumming, +Eight maids a-milking, +Seven swans a-swimming, +Six geese a-laying, +Five gold rings, +Four colly birds, +Three French hens, +Two turtle doves, and +A partridge in a pear tree." + TEST("ds.map", fun() { fun cmp(a: 't, b: 't) = a <=> b @@ -51,121 +165,6 @@ TEST("ds.map", fun() type si_map = (string, int) Map.t - val poem = - "The first day of Christmas, - My true love sent to me - A partridge in a pear tree. - - The second day of Christmas, - My true love sent to me - Two turtle doves, and - A partridge in a pear tree. - - The third day of Christmas, - My true love sent to me - Three French hens, - Two turtle doves, and - A partridge in a pear tree. - - The fourth day of Christmas, - My true love sent to me - Four colly birds, - Three French hens, - Two turtle doves, and - A partridge in a pear tree. - - The fifth day of Christmas, - My true love sent to me - Five gold rings, - Four colly birds, - Three French hens, - Two turtle doves, and - A partridge in a pear tree. - - The sixth day of Christmas, - My true love sent to me - Six geese a-laying, - Five gold rings, - Four colly birds, - Three French hens, - Two turtle doves, and - A partridge in a pear tree. - - The seventh day of Christmas, - My true love sent to me - Seven swans a-swimming, - Six geese a-laying, - Five gold rings, - Four colly birds, - Three French hens, - Two turtle doves, and - A partridge in a pear tree. - - The eighth day of Christmas, - My true love sent to me - Eight maids a-milking, - Seven swans a-swimming, - Six geese a-laying, - Five gold rings, - Four colly birds, - Three French hens, - Two turtle doves, and - A partridge in a pear tree. - - The ninth day of Christmas, - My true love sent to me - Nine drummers drumming, - Eight maids a-milking, - Seven swans a-swimming, - Six geese a-laying, - Five gold rings, - Four colly birds, - Three French hens, - Two turtle doves, and - A partridge in a pear tree. - - The tenth day of Christmas, - My true love sent to me - Ten pipers piping, - Nine drummers drumming, - Eight maids a-milking, - Seven swans a-swimming, - Six geese a-laying, - Five gold rings, - Four colly birds, - Three French hens, - Two turtle doves, and - A partridge in a pear tree. - - The eleventh day of Christmas - My true love sent to me - Eleven ladies dancing, - Ten pipers piping, - Nine drummers drumming, - Eight maids a-milking, - Seven swans a-swimming, - Six geese a-laying, - Five gold rings, - Four colly birds, - Three French hens, - Two turtle doves, and - A partridge in a pear tree. - - The twelfth day of Christmas - My true love sent to me - Twelve fiddlers fiddling, - Eleven ladies dancing, - Ten pipers piping, - Nine drummers drumming, - Eight maids a-milking, - Seven swans a-swimming, - Six geese a-laying, - Five gold rings, - Four colly birds, - Three French hens, - Two turtle doves, and - A partridge in a pear tree." - val words = poem.tokens(fun (c) {c.isspace() || c == '.' || c == ','}) val fold wcounter = (Map.empty(scmp) : si_map) for w <- words { wcounter.add(w, wcounter.find(w, 0)+1) @@ -211,3 +210,29 @@ TEST("ds.map", fun() EXPECT_EQ(wcounter_odd.list(), ll_odd.rev()) }) + +TEST("ds.hashmap", fun() { + type si_hash = (string, int) Hashmap.t + val words = poem.tokens(fun (c) {c.isspace() || c == '.' || c == ','}) + val wcounter = Hashmap.empty(16, "", 0, Some(hash : string->hash_t)) + for w <- words { + val idx = wcounter.find_idx_or_insert(w) + wcounter._state->table[idx].2 += 1 + } + + val ll = [: ("A", 12), ("Christmas", 12), ("Eight", 5), ("Eleven", 2), + ("Five", 8), ("Four", 9), ("French", 10), ("My", 12), ("Nine", 4), + ("Seven", 6), ("Six", 7), ("Ten", 3), ("The", 12), ("Three", 10), + ("Twelve", 1), ("Two", 11), ("a", 12), ("a-laying", 7), ("a-milking", 5), + ("a-swimming", 6), ("and", 11), ("birds", 9), ("colly", 9), ("dancing", 2), + ("day", 12), ("doves", 11), ("drummers", 4), ("drumming", 4), ("eighth", 1), + ("eleventh", 1), ("fiddlers", 1), ("fiddling", 1), ("fifth", 1), ("first", 1), + ("fourth", 1), ("geese", 7), ("gold", 8), ("hens", 10), ("in", 12), ("ladies", 2), + ("love", 12), ("maids", 5), ("me", 12), ("ninth", 1), ("of", 12), ("partridge", 12), + ("pear", 12), ("pipers", 3), ("piping", 3), ("rings", 8), ("second", 1), ("sent", 12), + ("seventh", 1), ("sixth", 1), ("swans", 6), ("tenth", 1), ("third", 1), ("to", 12), + ("tree", 12), ("true", 12), ("turtle", 11), ("twelfth", 1) :] + + val ll_fh = wcounter.list().sort((<)) + EXPECT_EQ(ll_fh, ll) +})