Skip to content

Commit

Permalink
* added initial implementation of mutable Hash table
Browse files Browse the repository at this point in the history
* added option to rebuild apps from scratch (discard pre-generated .c, .o files)
* issue: for some reason, (...) and {...} do not alway work well
  • Loading branch information
vpisarev committed Mar 28, 2021
1 parent 6d1ed6a commit f000b15
Show file tree
Hide file tree
Showing 6 changed files with 447 additions and 130 deletions.
3 changes: 2 additions & 1 deletion compiler/Compiler.fx
Original file line number Diff line number Diff line change
Expand Up @@ -226,12 +226,13 @@ fun emit_c_files(fname0: string, cmods: C_form.cmodule_t list)
val (new_cmod, ok) =
if ok {
val str_new = C_pp.pprint_top_to_string(cmod_ccode)
val str_old =
val str_old = if Options.opt.force_rebuild {""} else {
try
File.read_utf8(output_fname)
catch {
| IOError | FileOpenError => ""
}
}
val (recompile, ok) =
if str_new == str_old {
(false, ok)
Expand Down
4 changes: 4 additions & 0 deletions compiler/Options.fx
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ type options_t =
app_args: string list = [];
app_filename: string = "";
arch64: bool = true;
force_rebuild: bool = false;
build_dir: string = "";
build_rootdir: string = "";
cflags: string = "";
Expand Down Expand Up @@ -56,6 +57,7 @@ Run '{fxname} -h' to get more detailed help")
Usage: {fxname} [options ...] <input_file.fx> [-- <app_args ...>]

where options can be some of:
-rebuild Ignore cached files; rebuild everything from scratch
-pr-tokens Print all the tokens in parsed files
-pr-ast0 Print AST right after parsing
-pr-ast Print typechecked AST of the parsed files
Expand Down Expand Up @@ -119,6 +121,8 @@ fun parse_options(): bool {
var ok = true
while args != [] {
args = match args {
| "-rebuild" :: next =>
opt.force_rebuild = true; next
| "-pr-tokens" :: next =>
opt.print_tokens = true; next
| "-pr-ast0" :: next =>
Expand Down
51 changes: 51 additions & 0 deletions lib/Builtins.fx
Original file line number Diff line number Diff line change
Expand Up @@ -840,3 +840,54 @@ fun new_uniform_rng(seed: uint64) {
(x :> int)
}
}

@ccode {

typedef uint64_t hash_t;
#define FNV_1A_PRIME 1099511628211ULL
#define FNV_1A_OFFSET 14695981039346656037ULL

}

type hash_t = uint64
val FNV_1A_PRIME: hash_t = 1099511628211UL
val FNV_1A_OFFSET: hash_t = 14695981039346656037UL

fun hash(x: 't): hash_t = uint64(x)
fun hash(x: int) = uint64(x) ^ FNV_1A_OFFSET
fun hash(x: int32) = uint64(x) ^ FNV_1A_OFFSET
fun hash(x: uint32) = uint64(x) ^ FNV_1A_OFFSET
fun hash(x: int8) = uint64(x) ^ FNV_1A_OFFSET
fun hash(x: uint8) = uint64(x) ^ FNV_1A_OFFSET
fun hash(x: int16) = uint64(x) ^ FNV_1A_OFFSET
fun hash(x: uint16) = uint64(x) ^ FNV_1A_OFFSET
fun hash(x: bool) = uint64(x) ^ FNV_1A_OFFSET
@pure @nothrow fun hash(x: float): hash_t = @ccode {
fx_bits32_t u; u.f = x; return u.u ^ 14695981039346656037ULL;
}
@pure @nothrow fun hash(x: double): hash_t = @ccode {
fx_bits64_t u; u.f = x;
return u.u ^ 14695981039346656037ULL;
}
@pure @nothrow fun hash(x: string): hash_t = @ccode {
uint64_t hash = FNV_1A_OFFSET;
int_ i, len = x->length;
char_* data = x->data;
for(i = 0; i < len; i++) {
hash ^= data[i];
hash *= FNV_1A_PRIME;
}
return hash;
}

fun hash(x: (...)): hash_t =
fold h = FNV_1A_OFFSET for xj <- x {
val h = h ^ hash(xj)
h * FNV_1A_PRIME
}

fun hash(x: {...}): hash_t =
fold h = FNV_1A_OFFSET for (_, xj) <- x {
val h = h ^ hash(xj)
h * FNV_1A_PRIME
}
236 changes: 236 additions & 0 deletions lib/Hashmap.fx
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
/*
This file is a part of ficus language project.
See ficus/LICENSE for the licensing terms
*/

/* mutable hash table

the implementation is influenced by CPython's dictobject implementation.
See https://github.com/python/cpython/blob/master/Objects/dictobject.c
*/

val HASH_EMPTY: hash_t = 0UL
val HASH_DELETED: hash_t = 1UL
val PERTURB_SHIFT = 5

object type ('k, 'd) t =
{
hash_f: ('k -> hash_t)?
k0: 'k
d0: 'd
_state: ('k, 'd) hash_state_t ref
}

type ('k, 'd) hash_state_t =
{
nelems: int
nremoved: int
table: (hash_t, 'k, 'd) []
}

fun empty(size0: int, k0: 'k, d0: 'd, f: ('k->hash_t)? ): ('k, 'd) Hashmap.t
{
var size = 16
while size < size0 { size *= 2 }
val state = hash_state_t {
nelems=0, nremoved=0, table=array(size, (HASH_EMPTY, k0, d0)) }
Hashmap.t { hash_f=f, k0=k0, d0=d0, _state=ref state }
}

fun empty(ht: ('k, 'd) Hashmap.t): bool = ht._state->nkeys == 0
fun size(ht: ('k, 'd) Hashmap.t) = ht._state->nelems

@private fun add_(ht: ('k, 'd) Hashmap.t, ht_table: (hash_t, 'k, 'd) [], (hv: hash_t, k: 'k, d: 'd)): (int, int) {
val tabsz = size(ht_table)
var perturb = hv, delta_nelems = -1, delta_nremoved = 0
var j = int(hv) & (tabsz - 1), insert_pos = -1

for i <- 0:tabsz+14 {
val (hvj, kj, _) = ht_table[j]
if hvj == hv {
if kj == k {
if insert_pos >= 0 {
ht_table[insert_pos] = (hv, k, d)
ht_table[j] = (HASH_EMPTY, ht.k0, ht.d0)
} else {
ht_table[j].2 = d
}
delta_nelems = 0
break
}
} else if hvj == HASH_EMPTY {
ht_table[(if insert_pos >= 0 {insert_pos} else {j})] = (hv, k, d)
delta_nelems = 1
break
} else if hvj == HASH_DELETED && insert_pos < 0 {
insert_pos = j
delta_nremoved = -1
}
perturb >>= PERTURB_SHIFT
j = int(uint64(j*5 + 1) + perturb) & (tabsz - 1)
}
if delta_nelems < 0 {
if insert_pos >= 0 {
ht_table[insert_pos] = (hv, k, d)
delta_nelems = 1
} else {
throw Fail("can-not insert element into half-empty Hashtable (?!)")
}
}
(delta_nelems, delta_nremoved)
}

@private fun grow(ht: ('k, 'd) Hashmap.t, new_size: int): void
{
val ht_table = ht._state->table
val curr_size = size(ht_table)
val new_ht_table = array(new_size, (HASH_EMPTY, ht.k0, ht.d0))
for j <- 0:curr_size {
if ht_table[j].0 > HASH_DELETED {
ignore(add_(ht, new_ht_table, ht_table[j]))
}
}
ht._state->table = new_ht_table
ht._state->nremoved = 0
}

fun find_idx(ht: ('k, 'd) Hashmap.t, k: 'k): int
{
var hv = match ht.hash_f {
| Some(f) => f(k)
| _ => hash(k)
}
if hv <= HASH_DELETED { hv ^= FNV_1A_OFFSET }
val tabsz = size(ht._state->table)
var perturb = hv, found = -1
var j = int(hv) & (tabsz - 1)
for i <- 0:tabsz+14 {
val (hvj, kj, _) = ht._state->table[j]
if hvj == hv {
if kj == k {
found = j
break
}
} else if hvj == HASH_EMPTY { break }
perturb >>= PERTURB_SHIFT
j = int(uint64(j*5 + 1) + perturb) & (tabsz - 1)
}
found
}

fun mem(ht: ('k, 'd) Hashmap.t, k: 'k): bool = find_idx(ht, k) >= 0
fun find_opt(ht: ('k, 'd) Hashmap.t, k: 'k): 'd?
{
val j = find_idx(ht, k)
if j >= 0 { Some(ht._state->table[j].2) } else { None }
}

fun find_idx_or_insert(ht: ('k, 'd) Hashmap.t, k: 'k): int
{
var hv = match ht.hash_f {
| Some(f) => f(k)
| _ => hash(k)
}
if hv <= HASH_DELETED { hv ^= FNV_1A_OFFSET }
var tabsz = size(ht._state->table)

if ht._state->nelems + ht._state->nremoved >= (tabsz >> 1) {
while tabsz <= (ht._state->nelems + ht._state->nremoved)*2 { tabsz *= 2 }
grow(ht, tabsz)
}

var perturb = hv, found = -1, insert_pos = -1
var j = int(hv) & (tabsz - 1)
for i <- 0:tabsz+14 {
val (hvj, kj, _) = ht._state->table[j]
if hvj == hv {
if kj == k {
found = j
break
}
} else if hvj == HASH_EMPTY {
if insert_pos < 0 { insert_pos = j }
break
} else if hvj == HASH_DELETED && insert_pos < 0 {
insert_pos = j
ht._state->nremoved -= 1
}
perturb >>= PERTURB_SHIFT
j = int(uint64(j*5 + 1) + perturb) & (tabsz - 1)
}
if found >= 0 {
if insert_pos < 0 {
found
} else {
ht._state->table[insert_pos] = ht._state->table[found]
ht._state->table[found] = (HASH_EMPTY, ht.k0, ht.d0)
insert_pos
}
}
else if insert_pos >= 0 {
ht._state->table[insert_pos] = (hv, k, ht.d0)
ht._state->nelems += 1
insert_pos
} else {
throw Fail("can-not insert element into half-empty Hashtable (?!)")
}
}

fun add(ht: ('k, 'd) Hashmap.t, k: 'k, d: 'd) {
val idx = find_idx_or_insert(ht, k)
ht._state->table[idx].2 = d
}

fun remove(ht: ('k, 'd) Hashmap.t, k: 'k) {
val idx = find_idx(ht, k)
if idx >= 0 {
ht._state->table[idx] = (HASH_DELETED, ht.k0, ht.d0)
ht._state->nelems -= 1
ht._state->nremoved += 1
}
}

fun list(ht: ('k, 'd) Hashmap.t): ('k, 'd) list =
[: for j <- 0:size(ht._state->table) {
if ht._state->table[j].0 <= HASH_DELETED { continue }
val (_, kj, dj) = ht._state->table[j]
(kj, dj)
} :]

fun add_list(ht: ('k, 'd) Hashmap.t, data: ('k, 'd) list)
{
var datasz = ht._state->nelems + ht._state->nremoved + data.length()
var curr_size = size(ht._state->table), new_size = curr_size
while new_size <= datasz*2 { new_size *= 2 }
if new_size > curr_size { grow(ht, new_size) }
for (k, d) <- data { add(ht, k, d) }
}

fun from_list(k0: 'k, d0: 'd, hash_f: ('k->hash_t)?, data: ('k, 'd) list): ('k, 'd) Hashmap.t
{
val ht = empty(data.length()*2, k0, d0, hash_f)
add_list(ht, data)
ht
}

fun app(ht: ('k, 'd) Hashmap.t, f: ('k, 'd)->void) {
val table = ht._state->table
for j <- 0:size(table) {
if table[j].0 > HASH_DELETED {
val (_, kj, dj) = table[j]
f(kj, dj)
}
}
}

fun foldl(ht: ('k, 'd) Hashmap.t, f: ('k, 'd, 'r)->'r, res0: 'r): 'r {
val table = ht._state->table
var res = res0
for j <- 0:size(table) {
if table[j].0 > HASH_DELETED {
val (_, kj, dj) = table[j]
res = f(kj, dj, res)
}
}
res
}
24 changes: 12 additions & 12 deletions runtime/ficus/ficus.h
Original file line number Diff line number Diff line change
Expand Up @@ -182,8 +182,8 @@ void fx_copy_ptr(const void* src, void* pdst);

///////////////////////////// Numbers ////////////////////////////

typedef union fx_round64_t {double d; int64_t i;} fx_round64_t;
typedef union fx_round32_t {float f; int i;} fx_round32_t;
typedef union fx_bits64_t {double f; int64_t i; uint64_t u;} fx_bits64_t;
typedef union fx_bits32_t {float f; int i; unsigned u;} fx_bits32_t;

#if FX_ARCH_64

Expand All @@ -193,27 +193,27 @@ typedef union fx_round32_t {float f; int i;} fx_round32_t;
// instead, they do "reinterpret_cast<int64_t>(x+magic_number)" right in the registers.
// in the end we propagate the sign bit of the 51-bit result.
FX_INLINE int_ fx_roundf2I(float x) {
fx_round64_t u;
u.d = x + 6755399441055744.0;
fx_bits64_t u;
u.f = x + 6755399441055744.0;
return (u.i << 13) >> 13;
}

FX_INLINE int_ fx_round2I(double x) {
fx_round64_t u;
u.d = x + 6755399441055744.0;
fx_bits64_t u;
u.f = x + 6755399441055744.0;
return (u.i << 13) >> 13;
}
#else
// on 32-bit machines we need just lower 32 bits of the result.
FX_INLINE int_ fx_roundf2I(float x) {
fx_round64_t u;
u.d = x + 6755399441055744.0;
u.f = x + 6755399441055744.0;
return (int_)(int)u.i;
}

FX_INLINE int_ fx_round2I(double x) {
fx_round64_t u;
u.d = x + 6755399441055744.0;
fx_bits64_t u;
u.f = x + 6755399441055744.0;
return (int_)(int)u.i;
}
#endif
Expand All @@ -223,14 +223,14 @@ FX_INLINE int fx_roundf2i(float x) {
// fast vectorized version inside vectorized loops.
// the result is limited to [-2**22,2**22) value range,
// it's still useful for float -> [u]int16 or [u]int8 conversion
fx_round32_t u;
fx_bits32_t u;
u.f = x + 12582912.0f;
return (u.i << 10) >> 10;
}

FX_INLINE int fx_round2i(double x) {
fx_round64_t u;
u.d = x + 6755399441055744.0;
fx_bits64_t u;
u.f = x + 6755399441055744.0;
return (int)u.i;
}

Expand Down
Loading

0 comments on commit f000b15

Please sign in to comment.