-
-
Notifications
You must be signed in to change notification settings - Fork 5.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
optimized (and ordered) IdSet code (#52114)
We have had this smallintset code around for a while for internal purposes, though it was not quite fully general purpose (it didn't have pop). We also have around this tiny global_roots_table (about 1500 entries after building the sysig). This saves about 50% of the space for storing that table. It also optimizes all other IdSet code to not be an inefficient mutable wrapper around an IdDict, but instead be an ordered (by first insertion) set type of its own.
- Loading branch information
Showing
17 changed files
with
355 additions
and
135 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,36 +1,91 @@ | ||
# This file is a part of Julia. License is MIT: https://julialang.org/license | ||
|
||
# Like Set, but using IdDict | ||
mutable struct IdSet{T} <: AbstractSet{T} | ||
dict::IdDict{T,Nothing} | ||
|
||
IdSet{T}() where {T} = new(IdDict{T,Nothing}()) | ||
IdSet{T}(s::IdSet{T}) where {T} = new(copy(s.dict)) | ||
mutable struct IdSet{K} <: AbstractSet{K} | ||
list::Memory{Any} | ||
idxs::Union{Memory{UInt8}, Memory{UInt16}, Memory{UInt32}} | ||
count::Int | ||
max::Int # n.b. always <= length(list) | ||
IdSet{T}() where {T} = new(Memory{Any}(undef, 0), Memory{UInt8}(undef, 0), 0, 0) | ||
IdSet{T}(s::IdSet{T}) where {T} = new(copy(s.list), copy(s.idxs), s.count, s.max) | ||
end | ||
|
||
IdSet{T}(itr) where {T} = union!(IdSet{T}(), itr) | ||
IdSet() = IdSet{Any}() | ||
|
||
copymutable(s::IdSet) = typeof(s)(s) | ||
emptymutable(s::IdSet{T}, ::Type{U}=T) where {T,U} = IdSet{U}() | ||
copy(s::IdSet) = typeof(s)(s) | ||
|
||
isempty(s::IdSet) = isempty(s.dict) | ||
length(s::IdSet) = length(s.dict) | ||
in(@nospecialize(x), s::IdSet) = haskey(s.dict, x) | ||
push!(s::IdSet, @nospecialize(x)) = (s.dict[x] = nothing; s) | ||
pop!(s::IdSet, @nospecialize(x)) = (pop!(s.dict, x); x) | ||
pop!(s::IdSet, @nospecialize(x), @nospecialize(default)) = (x in s ? pop!(s, x) : default) | ||
delete!(s::IdSet, @nospecialize(x)) = (delete!(s.dict, x); s) | ||
haskey(s::IdSet, @nospecialize(key)) = ccall(:jl_idset_peek_bp, Int, (Any, Any, Any), s.list, s.idxs, key) != -1 | ||
isempty(s::IdSet) = s.count == 0 | ||
length(s::IdSet) = s.count | ||
in(@nospecialize(x), s::IdSet) = haskey(s, x) | ||
function push!(s::IdSet, @nospecialize(x)) | ||
idx = ccall(:jl_idset_peek_bp, Int, (Any, Any, Any), s.list, s.idxs, x) | ||
if idx >= 0 | ||
s.list[idx + 1] = x | ||
else | ||
if s.max < length(s.list) | ||
idx = s.max | ||
@assert !isassigned(s.list, idx + 1) | ||
s.list[idx + 1] = x | ||
s.max = idx + 1 | ||
else | ||
newidx = RefValue{Int}(0) | ||
setfield!(s, :list, ccall(:jl_idset_put_key, Any, (Any, Any, Ptr{Int}), s.list, x, newidx)) | ||
idx = newidx[] | ||
s.max = idx < 0 ? -idx : idx + 1 | ||
end | ||
@assert s.list[s.max] === x | ||
setfield!(s, :idxs, ccall(:jl_idset_put_idx, Any, (Any, Any, Int), s.list, s.idxs, idx)) | ||
s.count += 1 | ||
end | ||
s | ||
end | ||
function _pop!(s::IdSet, @nospecialize(x)) | ||
removed = ccall(:jl_idset_pop, Int, (Any, Any, Any), s.list, s.idxs, x) | ||
if removed != -1 | ||
s.count -= 1 | ||
while s.max > 0 && !isassigned(s.list, s.max) | ||
s.max -= 1 | ||
end | ||
end | ||
removed | ||
end | ||
pop!(s::IdSet, @nospecialize(x)) = _pop!(s, x) == -1 ? throw(KeyError(x)) : x | ||
pop!(s::IdSet, @nospecialize(x), @nospecialize(default)) = _pop!(s, x) == -1 ? default : x | ||
delete!(s::IdSet, @nospecialize(x)) = (_pop!(s, x); s) | ||
|
||
sizehint!(s::IdSet, newsz) = (sizehint!(s.dict, newsz); s) | ||
empty!(s::IdSet) = (empty!(s.dict); s) | ||
function sizehint!(s::IdSet, newsz) | ||
# TODO: grow/compact list and perform rehash, if profitable? | ||
# TODO: shrink? | ||
# s.list = resize(s.list, newsz) | ||
# newsz = _tablesz(newsz) | ||
# oldsz = length(s.idxs) | ||
# #grow at least 25% | ||
# if newsz < (oldsz*5)>>2 | ||
# return s | ||
# end | ||
# rehash!(s, newsz) | ||
nothing | ||
end | ||
|
||
function empty!(s::IdSet) | ||
fill!(s.idxs, 0x00) | ||
list = s.list | ||
for i = 1:s.max | ||
_unsetindex!(list, i) | ||
end | ||
s.count = 0 | ||
s.max = 0 | ||
s | ||
end | ||
|
||
filter!(f, d::IdSet) = unsafe_filter!(f, d) | ||
|
||
function iterate(s::IdSet, state...) | ||
y = iterate(s.dict, state...) | ||
y === nothing && return nothing | ||
((k, _), i) = y | ||
return (k, i) | ||
function iterate(s::IdSet{S}, state=0) where {S} | ||
while true | ||
state += 1 | ||
state > s.max && return nothing | ||
isassigned(s.list, state) && return s.list[state]::S, state | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -194,3 +194,4 @@ size_t jl_eqtable_nextind(jl_genericmemory_t *t, size_t i) | |
|
||
#undef hash_size | ||
#undef max_probe | ||
#undef h2index |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
// This file is a part of Julia. License is MIT: https://julialang.org/license | ||
|
||
|
||
static uint_t idset_hash(size_t idx, jl_value_t *data) | ||
{ | ||
jl_value_t *x = jl_genericmemory_ptr_ref(data, idx); | ||
// x should not be NULL, unless there was concurrent corruption | ||
return x == NULL ? 0 : jl_object_id(x); | ||
} | ||
|
||
static int idset_eq(size_t idx, const void *y, jl_value_t *data, uint_t hv) | ||
{ | ||
jl_value_t *x = jl_genericmemory_ptr_ref(data, idx); | ||
// x should not be NULL, unless there was concurrent corruption | ||
return x == NULL ? 0 : jl_egal(x, (jl_value_t*)y); | ||
} | ||
|
||
jl_genericmemory_t *jl_idset_rehash(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, size_t newsz) | ||
{ | ||
if (newsz == 0) | ||
return idxs; | ||
newsz = next_power_of_two(newsz); | ||
//if (idxs->length == newsz) | ||
// jl_idset_put_idx(keys, idxs, -newsz+1); | ||
//else | ||
return smallintset_rehash(idxs, idset_hash, (jl_value_t*)keys, newsz, 0); | ||
} | ||
|
||
// Return idx if key is in hash, otherwise -1 | ||
// Note: lookup in the IdSet is permitted concurrently, if you avoid deletions, | ||
// and assuming you do use an external lock around all insertions | ||
ssize_t jl_idset_peek_bp(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, jl_value_t *key) JL_NOTSAFEPOINT | ||
{ | ||
uintptr_t hv = jl_object_id(key); | ||
return jl_smallintset_lookup(idxs, idset_eq, key, (jl_value_t*)keys, hv, 0); | ||
} | ||
|
||
jl_value_t *jl_idset_get(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, jl_value_t *key) JL_NOTSAFEPOINT | ||
{ | ||
ssize_t idx = jl_idset_peek_bp(keys, idxs, key); | ||
if (idx == -1) | ||
return NULL; | ||
return jl_genericmemory_ptr_ref(keys, idx); | ||
} | ||
|
||
|
||
static ssize_t idset_compact(jl_genericmemory_t *keys) | ||
{ | ||
// compact keys before rehashing idxs | ||
ssize_t i, j; | ||
ssize_t rehash = 0; | ||
for (i = j = 0; i < keys->length; i++) { | ||
jl_value_t *k = jl_genericmemory_ptr_ref(keys, i); | ||
if (k != NULL) { | ||
if (i != j) { | ||
rehash = 1; | ||
jl_genericmemory_ptr_set(keys, j, k); | ||
jl_genericmemory_ptr_set(keys, i, NULL); | ||
} | ||
j++; | ||
} | ||
} | ||
return rehash ? -j : j; | ||
} | ||
|
||
jl_genericmemory_t *jl_idset_put_key(jl_genericmemory_t *keys, jl_value_t *key, ssize_t *newidx) | ||
{ | ||
ssize_t l = keys->length; | ||
ssize_t i = l; | ||
while (i > 0 && jl_genericmemory_ptr_ref(keys, i - 1) == NULL) | ||
i--; | ||
// i points to the place to insert | ||
*newidx = i; | ||
if (i == l) { | ||
i = idset_compact(keys); | ||
if (i < 0) { | ||
*newidx = i - 1; | ||
i = -i; | ||
} | ||
if (i >= l / 3 * 2) { | ||
size_t nl = l < 4 ? 4 : (l * 3) >> 1; // grow space by 50% if less than 33% free after compacting | ||
jl_genericmemory_t *nk = jl_alloc_genericmemory(jl_memory_any_type, nl); | ||
if (i > 0) | ||
memcpy(nk->ptr, keys->ptr, sizeof(void*) * i); | ||
keys = nk; | ||
} | ||
} | ||
assert(jl_genericmemory_ptr_ref(keys, i) == NULL); | ||
jl_genericmemory_ptr_set(keys, i, key); | ||
return keys; | ||
} | ||
|
||
jl_genericmemory_t *jl_idset_put_idx(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, ssize_t idx) | ||
{ | ||
_Atomic(jl_genericmemory_t*) newidxs = idxs; | ||
JL_GC_PUSH1(&newidxs); | ||
if (idx < 0) { // full rehash | ||
smallintset_empty(idxs); | ||
for (ssize_t i = 0; i < -idx; i++) | ||
if (jl_genericmemory_ptr_ref(keys, i) != NULL) | ||
jl_smallintset_insert(&newidxs, NULL, idset_hash, i, (jl_value_t*)keys); | ||
} | ||
else { | ||
jl_smallintset_insert(&newidxs, NULL, idset_hash, idx, (jl_value_t*)keys); | ||
} | ||
JL_GC_POP(); | ||
return jl_atomic_load_relaxed(&newidxs); | ||
} | ||
|
||
/* returns idx if key is in hash, otherwise -1 */ | ||
ssize_t jl_idset_pop(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, jl_value_t *key) JL_NOTSAFEPOINT | ||
{ | ||
uintptr_t hv = jl_object_id(key); | ||
ssize_t idx = jl_smallintset_lookup(idxs, idset_eq, key, (jl_value_t*)keys, hv, 1); | ||
if (idx != -1) | ||
jl_genericmemory_ptr_set(keys, idx, NULL); | ||
return idx; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.