From d3c0d28e605dc302562146406eef71e41c4cbd68 Mon Sep 17 00:00:00 2001 From: kpamnany Date: Tue, 27 Jun 2017 17:53:49 +0530 Subject: [PATCH 1/4] threading: Integrating partr (almost done!) Added partr code. Abstracted interface to threading infrastructure. --- Make.inc | 11 + base/Makefile | 5 + base/boot.jl | 12 + base/channels.jl | 200 +++- base/event.jl | 255 ++-- base/stream.jl | 8 +- base/summarysize.jl | 12 + base/task.jl | 53 +- base/threadingconstructs.jl | 6 +- contrib/julia-config.jl | 3 + doc/src/manual/faq.md | 3 +- src/Makefile | 4 +- src/atomics.h | 6 + src/builtins.c | 3 + src/dump.c | 8 +- src/forkjoin-ti.c | 356 ++++++ src/forkjoin-ti.h | 21 + src/gc-debug.c | 10 +- src/gc.c | 24 +- src/init.c | 5 +- src/julia.h | 111 +- src/julia_internal.h | 17 +- src/julia_threads.h | 6 +- src/locks.h | 16 + src/options.h | 6 + src/partr.c | 1154 +++++++++++++++++++ src/partr.h | 46 + src/staticdata.c | 3 + src/task.c | 169 ++- src/threadgroup.c | 206 ---- src/threadgroup.h | 44 - src/threading.c | 376 +----- src/threading.h | 70 +- stdlib/Distributed/test/distributed_exec.jl | 4 +- stdlib/FileWatching/src/FileWatching.jl | 6 +- stdlib/FileWatching/test/runtests.jl | 6 +- stdlib/Sockets/src/addrinfo.jl | 8 +- stdlib/Sockets/test/runtests.jl | 6 +- test/channels.jl | 40 +- test/file.jl | 2 +- test/misc.jl | 1 + test/read.jl | 3 +- test/spawn.jl | 6 +- test/threads.jl | 2 +- 44 files changed, 2459 insertions(+), 854 deletions(-) create mode 100644 src/forkjoin-ti.c create mode 100644 src/forkjoin-ti.h create mode 100644 src/partr.c create mode 100644 src/partr.h delete mode 100644 src/threadgroup.c delete mode 100644 src/threadgroup.h diff --git a/Make.inc b/Make.inc index 81ff923156ed8..19e6265b65b3e 100644 --- a/Make.inc +++ b/Make.inc @@ -68,6 +68,12 @@ USEIFC ?= 0 # Enable threading with one thread JULIA_THREADS := 1 +# Enable the parallel task runtime +JULIA_PARTR ?= 0 +ifeq ($(JULIA_THREADS), 0) +JULIA_PARTR := 0 +endif + ifeq ($(USE_MKL), 1) $(warning "The julia make variable USE_MKL has been renamed to USE_INTEL_MKL") USE_INTEL_MKL := 1 @@ -1060,6 +1066,11 @@ ifneq ($(JULIA_THREADS), 0) JCPPFLAGS += -DJULIA_ENABLE_THREADING -DJULIA_NUM_THREADS=$(JULIA_THREADS) endif +# Parallel task runtime +ifneq ($(JULIA_PARTR), 0) +JCPPFLAGS += -DJULIA_ENABLE_PARTR +endif + # Intel VTune Amplifier ifeq ($(USE_INTEL_JITEVENTS), 1) JCPPFLAGS += -DJL_USE_INTEL_JITEVENTS diff --git a/base/Makefile b/base/Makefile index b569ed5227ffc..2d0612368c17c 100644 --- a/base/Makefile +++ b/base/Makefile @@ -71,6 +71,11 @@ else @echo "const PRIVATE_LIBDIR = \"$(private_libdir_rel)\"" >> $@ @echo "const INCLUDEDIR = \"$(includedir_rel)\"" >> $@ endif +ifneq ($(JULIA_PARTR), 0) + @echo "const JULIA_PARTR = true" >> $@ +else + @echo "const JULIA_PARTR = false" >> $@ +endif @# This to ensure that we always rebuild this file, but only when it is modified do we touch build_h.jl, @# ensuring we rebuild the system image as infrequently as possible diff --git a/base/boot.jl b/base/boot.jl index 03b389b0f7e1f..67ad236105eaf 100644 --- a/base/boot.jl +++ b/base/boot.jl @@ -128,6 +128,17 @@ # name::Symbol #end +#if JULIA_PARTR +#mutable struct Task +# storage::Any +# state::Symbol +# result::Any +# exception::Any +# backtrace::Any +# logstate::Any +# code::Any +#end +#else #mutable struct Task # parent::Task # storage::Any @@ -139,6 +150,7 @@ # logstate::Any # code::Any #end +#end export # key types diff --git a/base/channels.jl b/base/channels.jl index 090fad3ad877f..6fc56ded3ee64 100644 --- a/base/channels.jl +++ b/base/channels.jl @@ -2,6 +2,10 @@ abstract type AbstractChannel{T} end +if JULIA_PARTR + +using Base.Threads + """ Channel{T}(sz::Int) @@ -21,7 +25,54 @@ mutable struct Channel{T} <: AbstractChannel{T} cond_take::Condition # waiting for data to become available cond_put::Condition # waiting for a writeable slot state::Symbol - excp::Union{Exception, Nothing} # exception to be thrown when state != :open + excp::Union{Exception,Nothing} # exception to be thrown when state != :open + + data::Vector{T} + sz_max::Int # maximum size of channel + lock::SpinLock + + # The following fields synchronize tasks that use unbuffered channels + # (sz_max == 0). + nwaiters::Atomic{Int} + takers::Vector{Task} + putters::Vector{Task} + + function Channel{T}(sz::Float64) where T + Channel{T}(sz == Inf ? typemax(Int) : convert(Int, sz)) + end + function Channel{T}(sz::Integer) where T + sz < 0 && throw(ArgumentError("Channel size must be 0, a positive integer, or Inf")) + ch = new(Condition(), Condition(), :open, nothing, Vector{T}(), sz, SpinLock(), Atomic()) + if sz == 0 + ch.takers = Vector{Task}() + ch.putters = Vector{Task}() + end + return ch + end +end + +else # !JULIA_PARTR + +""" + Channel{T}(sz::Int) + +Constructs a `Channel` with an internal buffer that can hold a maximum of `sz` objects +of type `T`. +[`put!`](@ref) calls on a full channel block until an object is removed with [`take!`](@ref). + +`Channel(0)` constructs an unbuffered channel. `put!` blocks until a matching `take!` is called. +And vice-versa. + +Other constructors: + +* `Channel(Inf)`: equivalent to `Channel{Any}(typemax(Int))` +* `Channel(sz)`: equivalent to `Channel{Any}(sz)` +""" +mutable struct Channel{T} <: AbstractChannel{T} + cond_take::Condition # waiting for data to become available + cond_put::Condition # waiting for a writeable slot + state::Symbol + excp::Union{Exception, Nothing} # exception to be thrown when state != :open data::Vector{T} sz_max::Int # maximum size of channel @@ -51,6 +102,8 @@ mutable struct Channel{T} <: AbstractChannel{T} end end +end # !JULIA_PARTR + Channel(sz) = Channel{Any}(sz) # special constructors @@ -88,13 +141,13 @@ Referencing the created task: ```jldoctest julia> taskref = Ref{Task}(); -julia> chnl = Channel(c->(@show take!(c)); taskref=taskref); +julia> chnl = Channel(c->println(take!(c)); taskref=taskref); julia> istaskdone(taskref[]) false julia> put!(chnl, "Hello"); -take!(c) = "Hello" +Hello julia> istaskdone(taskref[]) true @@ -110,7 +163,6 @@ function Channel(func::Function; ctype=Any, csize=0, taskref=nothing) return chnl end - closed_exception() = InvalidStateException("Channel is closed.", :closed) isbuffered(c::Channel) = c.sz_max==0 ? false : true @@ -121,6 +173,7 @@ function check_channel_state(c::Channel) throw(closed_exception()) end end + """ close(c::Channel) @@ -255,6 +308,25 @@ function put!(c::Channel{T}, v) where T isbuffered(c) ? put_buffered(c,v) : put_unbuffered(c,v) end +if JULIA_PARTR + +function put_buffered(c::Channel, v) + while true + lock(c.lock) + if length(c.data) == c.sz_max + unlock(c.lock) + wait(c.cond_put) + else + push!(c.data, v) + notify(c.cond_take, nothing, true, false) + unlock(c.lock) + return v + end + end +end + +else # !JULIA_PARTR + function put_buffered(c::Channel, v) while length(c.data) == c.sz_max wait(c.cond_put) @@ -266,6 +338,28 @@ function put_buffered(c::Channel, v) v end +end # !JULIA_PARTR + +if JULIA_PARTR + +function put_unbuffered(c::Channel, v) + while true + lock(c.lock) + if length(c.takers) > 0 + taker = popfirst!(c.takers) + unlock(c.lock) + yield(taker, v) + return v + else + unlock(c.lock) + c.nwaiters[] > 0 && notify(c.cond_take, nothing, false, false) + wait(c.cond_put) + end + end +end + +else # !JULIA_PARTR + function put_unbuffered(c::Channel, v) if length(c.takers) == 0 push!(c.putters, current_task()) @@ -283,8 +377,37 @@ function put_unbuffered(c::Channel, v) return v end +end # !JULIA_PARTR + push!(c::Channel, v) = put!(c, v) +if JULIA_PARTR + +""" + fetch(c::Channel) + +Wait for and get the first available item from the channel. Does not +remove the item. `fetch` is unsupported on an unbuffered (0-size) channel. +""" +function fetch(c::Channel) + c.sz_max == 0 && throw(ErrorException("`fetch` is not supported on an unbuffered Channel")) + while true + check_channel_state(c) + lock(c.lock) + if length(c.data) < 1 + unlock(c.lock) + # TODO: fix the race here + wait(c.cond_take) + else + v = c.data[1] + unlock(c.lock) + return v + end + end +end + +else # !JULIA_PARTR + """ fetch(c::Channel) @@ -298,6 +421,7 @@ function fetch_buffered(c::Channel) end fetch_unbuffered(c::Channel) = throw(ErrorException("`fetch` is not supported on an unbuffered Channel.")) +end # !JULIA_PARTR """ take!(c::Channel) @@ -308,6 +432,26 @@ For unbuffered channels, blocks until a [`put!`](@ref) is performed by a differe task. """ take!(c::Channel) = isbuffered(c) ? take_buffered(c) : take_unbuffered(c) + +if JULIA_PARTR + +function take_buffered(c::Channel) + while true + lock(c.lock) + if length(c.data) > 0 + v = popfirst!(c.data) + unlock(c.lock) + notify(c.cond_put, nothing, false, false) + return v + end + unlock(c.lock) + check_channel_state(c) + wait(c.cond_take) + end +end + +else # !JULIA_PARTR + function take_buffered(c::Channel) wait(c) v = popfirst!(c.data) @@ -315,7 +459,29 @@ function take_buffered(c::Channel) v end -popfirst!(c::Channel) = take!(c) +end # !JULIA_PARTR + +if JULIA_PARTR + +function take_unbuffered(c::Channel{T}) where T + check_channel_state(c) + lock(c.lock) + push!(c.takers, current_task()) + unlock(c.lock) + notify(c.cond_put, nothing, false, false) + try + # We wait here for a putter which will reschedule us with the + # value it is putting (which is returned by this wait call). + return wait()::T + catch ex + lock(c.lock) + filter!(x->x!=current_task(), c.takers) + unlock(c.lock) + rethrow(ex) + end +end + +else # !JULIA_PARTR # 0-size channel function take_unbuffered(c::Channel{T}) where T @@ -338,6 +504,10 @@ function take_unbuffered(c::Channel{T}) where T end end +end # !JULIA_PARTR + +popfirst!(c::Channel) = take!(c) + """ isready(c::Channel) @@ -348,7 +518,14 @@ For unbuffered channels returns `true` if there are tasks waiting on a [`put!`](@ref). """ isready(c::Channel) = n_avail(c) > 0 + +if JULIA_PARTR +n_avail(c::Channel) = lock(c.lock) do + isbuffered(c) ? length(c.data) : isempty(c.cond_put) ? 0 : 1 +end +else # !JULIA_PARTR n_avail(c::Channel) = isbuffered(c) ? length(c.data) : length(c.putters) +end # !JULIA_PARTR wait(c::Channel) = isbuffered(c) ? wait_impl(c) : wait_unbuffered(c) function wait_impl(c::Channel) @@ -359,6 +536,17 @@ function wait_impl(c::Channel) nothing end +if JULIA_PARTR +function wait_unbuffered(c::Channel) + atomic_add!(c.nwaiters, 1) + try + wait_impl(c) + finally + atomic_sub!(c.nwaiters, 1) + end + nothing +end +else # !JULIA_PARTR function wait_unbuffered(c::Channel) c.waiters += 1 try @@ -368,6 +556,7 @@ function wait_unbuffered(c::Channel) end nothing end +end # !JULIA_PARTR function notify_error(c::Channel, err) notify_error(c.cond_take, err) @@ -379,6 +568,7 @@ function notify_error(c::Channel, err) foreach(t->schedule(t, err; error=true), waiters) end end + notify_error(c::Channel) = notify_error(c, c.excp) eltype(::Type{Channel{T}}) where {T} = T diff --git a/base/event.jl b/base/event.jl index cf5e93cc25934..c0a28b23edbce 100644 --- a/base/event.jl +++ b/base/event.jl @@ -2,41 +2,45 @@ ## condition variables -""" - Condition() -Create an edge-triggered event source that tasks can wait for. Tasks that call [`wait`](@ref) on a -`Condition` are suspended and queued. Tasks are woken up when [`notify`](@ref) is later called on -the `Condition`. Edge triggering means that only tasks waiting at the time [`notify`](@ref) is -called can be woken up. For level-triggered notifications, you must keep extra state to keep -track of whether a notification has happened. The [`Channel`](@ref) type does -this, and so can be used for level-triggered events. -""" -mutable struct Condition - waitq::Vector{Any} +if JULIA_PARTR - Condition() = new([]) -end +import Core.Condition -""" - wait([x]) +Condition() = ccall(:jl_condition_new, Ref{Condition}, ()) -Block the current task until some event occurs, depending on the type of the argument: +wait(c::Condition) = ccall(:jl_task_wait, Any, (Ref{Condition},), c) -* [`Channel`](@ref): Wait for a value to be appended to the channel. -* [`Condition`](@ref): Wait for [`notify`](@ref) on a condition. -* `Process`: Wait for a process or process chain to exit. The `exitcode` field of a process - can be used to determine success or failure. -* [`Task`](@ref): Wait for a `Task` to finish. If the task fails with an exception, the - exception is propagated (re-thrown in the task that called `wait`). -* [`RawFD`](@ref): Wait for changes on a file descriptor (see the `FileWatching` package). +notify(c::Condition, arg, all, error) = ccall(:jl_task_notify, Cvoid, (Ref{Condition},Any,Int8,Int8), c, arg, all, error) +notify(c::Condition, @nospecialize(arg = nothing); all=true, error=false) = notify(c, arg, all, error) +notify_error(c::Condition, err) = notify(c, err, true, true) -If no argument is passed, the task blocks for an undefined period. A task can only be -restarted by an explicit call to [`schedule`](@ref) or [`yieldto`](@ref). +isempty(c::Condition) = ccall(:jl_condition_isempty, Cint, (Ref{Condition},), c) == 1 + +schedule(t::Task, @nospecialize(arg = nothing); error=false) = + ccall(:jl_task_spawn, Ref{Task}, (Ref{Task},Any,Int8,Int8,Int8), + t, arg, error, true, true) + +fetch(t::Task) = ccall(:jl_task_sync, Any, (Ref{Task},), t) + +yield() = ccall(:jl_task_yield, Any, (Cint,), 1) +yield(t::Task, @nospecialize x = nothing) = (schedule(t, x); yield()) +yieldto(t::Task, @nospecialize x = nothing) = (schedule(t, x); wait()) +try_yieldto(undo, reftask::Ref{Task}) = (schedule(reftask[]); wait()) +throwto(t::Task, @nospecialize exc) = (schedule(t, exc, error=true); wait()) + +wait() = ccall(:jl_task_yield, Any, (Cint,), 0) + + +else # !JULIA_PARTR + + +mutable struct Condition + waitq::Vector{Any} + + Condition() = new([]) +end -Often `wait` is called within a `while` loop to ensure a waited-for condition is met before -proceeding. -""" function wait(c::Condition) ct = current_task() @@ -50,15 +54,6 @@ function wait(c::Condition) end end -""" - notify(condition, val=nothing; all=true, error=false) - -Wake up tasks waiting for a condition, passing them `val`. If `all` is `true` (the default), -all waiting tasks are woken, otherwise only one is. If `error` is `true`, the passed value -is raised as an exception in the woken tasks. - -Return the count of tasks woken up. Return 0 if no tasks are waiting on `condition`. -""" notify(c::Condition, @nospecialize(arg = nothing); all=true, error=false) = notify(c, arg, all, error) function notify(c::Condition, arg, all, error) cnt = 0 @@ -78,6 +73,8 @@ end notify_error(c::Condition, err) = notify(c, err, true, true) +isempty(c::Condition) = isempty(c.waitq) + n_waiters(c::Condition) = length(c.waitq) ## scheduler and work queue @@ -94,36 +91,6 @@ end schedule(t::Task) = enq_work(t) -""" - schedule(t::Task, [val]; error=false) - -Add a [`Task`](@ref) to the scheduler's queue. This causes the task to run constantly when the system -is otherwise idle, unless the task performs a blocking operation such as [`wait`](@ref). - -If a second argument `val` is provided, it will be passed to the task (via the return value of -[`yieldto`](@ref)) when it runs again. If `error` is `true`, the value is raised as an exception in -the woken task. - -# Examples -```jldoctest -julia> a5() = sum(i for i in 1:1000); - -julia> b = Task(a5); - -julia> istaskstarted(b) -false - -julia> schedule(b); - -julia> yield(); - -julia> istaskstarted(b) -true - -julia> istaskdone(b) -true -``` -""" function schedule(t::Task, arg; error=false) # schedule a task to be (re)started with the given value or exception if error @@ -134,34 +101,8 @@ function schedule(t::Task, arg; error=false) return enq_work(t) end -# fast version of `schedule(t, arg); wait()` -function schedule_and_wait(t::Task, arg=nothing) - t.state == :runnable || error("schedule: Task not runnable") - if isempty(Workqueue) - return yieldto(t, arg) - else - t.result = arg - push!(Workqueue, t) - t.state = :queued - end - return wait() -end - -""" - yield() - -Switch to the scheduler to allow another scheduled task to run. A task that calls this -function is still runnable, and will be restarted immediately if there are no other runnable -tasks. -""" yield() = (enq_work(current_task()); wait()) -""" - yield(t::Task, arg = nothing) - -A fast, unfair-scheduling version of `schedule(t, arg); yield()` which -immediately yields to `t` before calling the scheduler. -""" function yield(t::Task, @nospecialize x = nothing) t.state == :runnable || error("schedule: Task not runnable") t.result = x @@ -169,14 +110,6 @@ function yield(t::Task, @nospecialize x = nothing) return try_yieldto(ensure_rescheduled, Ref(t)) end -""" - yieldto(t::Task, arg = nothing) - -Switch to the given task. The first time a task is switched to, the task's function is -called with no arguments. On subsequent switches, `arg` is returned from the task's last -call to `yieldto`. This is a low-level call that only switches tasks, not considering states -or scheduling in any way. Its use is discouraged. -""" function yieldto(t::Task, @nospecialize x = nothing) t.result = x return try_yieldto(identity, Ref(t)) @@ -262,6 +195,126 @@ function wait() # unreachable end +end # JULIA_PARTR + +""" + isempty(condition) + +Return `true` if no tasks are waiting on the condition, `false` otherwise. +""" +isempty(c::Condition) + +""" + Condition() + +Create an edge-triggered event source that tasks can wait for. Tasks that call [`wait`](@ref) on a +`Condition` are suspended and queued. Tasks are woken up when [`notify`](@ref) is later called on +the `Condition`. Edge triggering means that only tasks waiting at the time [`notify`](@ref) is +called can be woken up. For level-triggered notifications, you must keep extra state to keep +track of whether a notification has happened. The [`Channel`](@ref) type does +this, and so can be used for level-triggered events. +""" +Condition + +""" + wait([x]) + +Block the current task until some event occurs, depending on the type of the argument: + +* [`Channel`](@ref): Wait for a value to be appended to the channel. +* [`Condition`](@ref): Wait for [`notify`](@ref) on a condition. +* `Process`: Wait for a process or process chain to exit. The `exitcode` field of a process + can be used to determine success or failure. +* [`Task`](@ref): Wait for a `Task` to finish. If the task fails with an exception, the + exception is propagated (re-thrown in the task that called `wait`). +* [`RawFD`](@ref): Wait for changes on a file descriptor (see the `FileWatching` package). + +If no argument is passed, the task blocks for an undefined period. A task can only be +restarted by an explicit call to [`schedule`](@ref) or [`yieldto`](@ref). + +Often `wait` is called within a `while` loop to ensure a waited-for condition is met before +proceeding. +""" +wait + +""" + notify(condition, val=nothing; all=true, error=false) + +Wake up tasks waiting for a condition, passing them `val`. If `all` is `true` (the default), +all waiting tasks are woken, otherwise only one is. If `error` is `true`, the passed value +is raised as an exception in the woken tasks. + +Return the count of tasks woken up. Return 0 if no tasks are waiting on `condition`. +""" +notify + +""" + fetch(t::Task) + +Wait for a Task to finish, then return its result value. If the task fails with an +exception, the exception is propagated (re-thrown in the task that called fetch). +""" +fetch(t::Task) + +""" + yield() + +Switch to the scheduler to allow another scheduled task to run. A task that calls this +function is still runnable, and will be restarted immediately if there are no other runnable +tasks. +""" +yield + +""" + yield(t::Task, arg = nothing) + +A fast, unfair-scheduling version of `schedule(t, arg); yield()` which +immediately yields to `t` before calling the scheduler. +""" +yield(t::Task) + +""" + yieldto(t::Task, arg = nothing) + +Switch to the given task. The first time a task is switched to, the task's function is +called with no arguments. On subsequent switches, `arg` is returned from the task's last +call to `yieldto`. This is a low-level call that only switches tasks, not considering states +or scheduling in any way. Its use is discouraged. +""" +yieldto + +""" + schedule(t::Task, [val]; error=false) + +Add a [`Task`](@ref) to the scheduler's queue. This causes the task to run constantly when the system +is otherwise idle, unless the task performs a blocking operation such as [`wait`](@ref). + +If a second argument `val` is provided, it will be passed to the task (via the return value of +[`yieldto`](@ref)) when it runs again. If `error` is `true`, the value is raised as an exception in +the woken task. + +# Examples +```jldoctest +julia> a5() = sum(i for i in 1:1000); + +julia> b = Task(a5); + +julia> istaskstarted(b) +false + +julia> schedule(b); + +julia> yield(); + +julia> istaskstarted(b) +true + +julia> istaskdone(b) +true +``` +""" +schedule + if Sys.iswindows() pause() = ccall(:Sleep, stdcall, Cvoid, (UInt32,), 0xffffffff) else diff --git a/base/stream.jl b/base/stream.jl index 3d54865c89732..bee3e67b1b0ae 100644 --- a/base/stream.jl +++ b/base/stream.jl @@ -274,7 +274,7 @@ function wait_readbyte(x::LibuvStream, c::UInt8) wait(x.readnotify) end finally - if isempty(x.readnotify.waitq) + if isempty(x.readnotify) stop_reading(x) # stop reading iff there are currently no other read clients of the stream end unpreserve_handle(x) @@ -297,7 +297,7 @@ function wait_readnb(x::LibuvStream, nb::Int) wait(x.readnotify) end finally - if isempty(x.readnotify.waitq) + if isempty(x.readnotify) stop_reading(x) # stop reading iff there are currently no other read clients of the stream end if oldthrottle <= x.throttle <= nb @@ -703,7 +703,7 @@ function readbytes!(s::LibuvStream, a::Vector{UInt8}, nb::Int) return bytesavailable(newbuf) finally s.buffer = sbuf - if !isempty(s.readnotify.waitq) + if !isempty(s.readnotify) start_reading(s) # resume reading iff there are currently other read clients of the stream end end @@ -739,7 +739,7 @@ function unsafe_read(s::LibuvStream, p::Ptr{UInt8}, nb::UInt) nb == bytesavailable(newbuf) || throw(EOFError()) finally s.buffer = sbuf - if !isempty(s.readnotify.waitq) + if !isempty(s.readnotify) start_reading(s) # resume reading iff there are currently other read clients of the stream end end diff --git a/base/summarysize.jl b/base/summarysize.jl index a2974b967ce3e..7b54150546786 100644 --- a/base/summarysize.jl +++ b/base/summarysize.jl @@ -147,6 +147,16 @@ function (ss::SummarySize)(obj::Module) return size end +if JULIA_PARTR + +function (ss::SummarySize)(obj::Task) + haskey(ss.seen, obj) ? (return 0) : (ss.seen[obj] = true) + size::Int = Core.sizeof(obj) + return size +end + +else + function (ss::SummarySize)(obj::Task) haskey(ss.seen, obj) ? (return 0) : (ss.seen[obj] = true) size::Int = Core.sizeof(obj) @@ -161,3 +171,5 @@ function (ss::SummarySize)(obj::Task) # TODO: add stack size, and possibly traverse stack roots return size end + +end diff --git a/base/task.jl b/base/task.jl index 4045cde09ffa1..0a62394945a93 100644 --- a/base/task.jl +++ b/base/task.jl @@ -177,6 +177,15 @@ function task_local_storage(body::Function, key, val) end end +if JULIA_PARTR + +function wait(t::Task) + fetch(t) + return nothing +end + +else # !JULIA_PARTR + # NOTE: you can only wait for scheduled tasks function wait(t::Task) if !istaskdone(t) @@ -192,17 +201,12 @@ function wait(t::Task) end end -""" - fetch(t::Task) - -Wait for a Task to finish, then return its result value. If the task fails with an -exception, the exception is propagated (re-thrown in the task that called fetch). -""" function fetch(t::Task) wait(t) task_result(t) end +end # !JULIA_PARTR ## lexically-scoped waiting for multiple items @@ -248,8 +252,6 @@ macro sync(block) end end -# schedule an expression to run asynchronously - """ @async @@ -274,6 +276,39 @@ function register_taskdone_hook(t::Task, hook) t end +if JULIA_PARTR + +# runtime system hook called when a task finishes +function task_done_hook(t::Task) + # `finish_task` sets `sigatomic` before entering this function + err = istaskfailed(t) + result = task_result(t) + handled = false + if err + t.backtrace = catch_backtrace() + end + + # Execute any other hooks registered in the TLS + if isa(t.storage, IdDict) && haskey(t.storage, :TASKDONE_HOOKS) + foreach(hook -> hook(t), t.storage[:TASKDONE_HOOKS]) + delete!(t.storage, :TASKDONE_HOOKS) + handled = true + end + + if err && !handled + if isa(result,InterruptException) && isdefined(Base,:active_repl_backend) && + active_repl_backend.backend_task.state == :runnable && + #isempty(Workqueue) && # TODO + active_repl_backend.in_eval + throwto(active_repl_backend.backend_task, result) # this terminates the task + end + end + # Clear sigatomic before waiting + sigatomic_end() +end + +else # !JULIA_PARTR + # runtime system hook called when a task finishes function task_done_hook(t::Task) # `finish_task` sets `sigatomic` before entering this function @@ -321,6 +356,8 @@ function task_done_hook(t::Task) end end +end # !JULIA_PARTR + """ timedwait(testcb::Function, secs::Float64; pollint::Float64=0.1) diff --git a/base/threadingconstructs.jl b/base/threadingconstructs.jl index 61a1f598546a6..ebe11096391ce 100644 --- a/base/threadingconstructs.jl +++ b/base/threadingconstructs.jl @@ -96,7 +96,11 @@ macro threads(args...) throw(ArgumentError("need an expression argument to @threads")) end if ex.head === :for - return _threadsfor(ex.args[1],ex.args[2]) + if Base.JULIA_PARTR + return esc(ex) + else + return _threadsfor(ex.args[1],ex.args[2]) + end else throw(ArgumentError("unrecognized argument to @threads")) end diff --git a/contrib/julia-config.jl b/contrib/julia-config.jl index 8ac742fade6c1..db5a5f0c28de6 100755 --- a/contrib/julia-config.jl +++ b/contrib/julia-config.jl @@ -62,6 +62,9 @@ function cflags() if threadingOn() print(flags, " -DJULIA_ENABLE_THREADING=1") end + if Base.JULIA_PARTR + print(flags, " -DJULIA_ENABLE_PARTR") + end if Sys.isunix() print(flags, " -fPIC") end diff --git a/doc/src/manual/faq.md b/doc/src/manual/faq.md index e6ae749fd1b5a..4db755c652d89 100644 --- a/doc/src/manual/faq.md +++ b/doc/src/manual/faq.md @@ -774,8 +774,7 @@ julia> @sync for i in 1:3 You can lock your writes with a `ReentrantLock` like this: ```jldoctest -julia> l = ReentrantLock() -ReentrantLock(nothing, Condition(Any[]), 0) +julia> l = ReentrantLock(); julia> @sync for i in 1:3 @async begin diff --git a/src/Makefile b/src/Makefile index d8e1c6a49b115..cc8b459b7abb4 100644 --- a/src/Makefile +++ b/src/Makefile @@ -42,7 +42,7 @@ SRCS := \ jltypes gf typemap ast builtins module interpreter symbol \ dlload sys init task array dump staticdata toplevel jl_uv datatype \ simplevector APInt-C runtime_intrinsics runtime_ccall precompile \ - threadgroup threading stackwalk gc gc-debug gc-pages gc-stacks method \ + threading forkjoin-ti partr stackwalk gc gc-debug gc-pages gc-stacks method \ jlapi signal-handling safepoint jloptions timing subtype rtutils \ crc32c processor @@ -203,7 +203,7 @@ $(BUILDDIR)/gc-debug.o $(BUILDDIR)/gc-debug.dbg.obj: $(SRCDIR)/gc.h $(BUILDDIR)/gc-pages.o $(BUILDDIR)/gc-pages.dbg.obj: $(SRCDIR)/gc.h $(BUILDDIR)/signal-handling.o $(BUILDDIR)/signal-handling.dbg.obj: $(addprefix $(SRCDIR)/,signals-*.c) $(BUILDDIR)/dump.o $(BUILDDIR)/dump.dbg.obj: $(addprefix $(SRCDIR)/,common_symbols1.inc common_symbols2.inc) -$(addprefix $(BUILDDIR)/,threading.o threading.dbg.obj gc.o gc.dbg.obj init.c init.dbg.obj task.o task.dbg.obj): $(addprefix $(SRCDIR)/,threading.h threadgroup.h) +$(addprefix $(BUILDDIR)/,threading.o threading.dbg.obj gc.o gc.dbg.obj init.c init.dbg.obj task.o task.dbg.obj): $(addprefix $(SRCDIR)/,threading.h) $(addprefix $(BUILDDIR)/,APInt-C.o APInt-C.dbg.obj runtime_intrinsics.o runtime_intrinsics.dbg.obj): $(SRCDIR)/APInt-C.h # archive library file rules diff --git a/src/atomics.h b/src/atomics.h index 493f0297892bc..ebfc66bbd83f4 100644 --- a/src/atomics.h +++ b/src/atomics.h @@ -62,8 +62,12 @@ // the __atomic builtins or c11 atomics with GNU extension or c11 _Generic # define jl_atomic_compare_exchange(obj, expected, desired) \ __sync_val_compare_and_swap(obj, expected, desired) +# define jl_atomic_bool_compare_exchange(obj, expected, desired) \ + __sync_bool_compare_and_swap(obj, expected, desired) # define jl_atomic_exchange(obj, desired) \ __atomic_exchange_n(obj, desired, __ATOMIC_SEQ_CST) +# define jl_atomic_exchange_generic(obj, desired, orig)\ + __atomic_exchange(obj, desired, orig, __ATOMIC_SEQ_CST) # define jl_atomic_exchange_relaxed(obj, desired) \ __atomic_exchange_n(obj, desired, __ATOMIC_RELAXED) // TODO: Maybe add jl_atomic_compare_exchange_weak for spin lock @@ -115,6 +119,7 @@ jl_atomic_fetch_add(T *obj, T2 arg) { return (T)_InterlockedExchangeAdd64((volatile __int64*)obj, (__int64)arg); } +// TODO: jl_atomic_exchange_generic #define jl_atomic_fetch_add_relaxed(obj, arg) jl_atomic_fetch_add(obj, arg) // and @@ -200,6 +205,7 @@ jl_atomic_compare_exchange(volatile T *obj, T2 expected, T3 desired) return (T)_InterlockedCompareExchange64((volatile __int64*)obj, (__int64)desired, (__int64)expected); } +// TODO: jl_atomic_bool_compare_exchange // atomic exchange template static inline typename std::enable_if::type diff --git a/src/builtins.c b/src/builtins.c index edf5dc35ff4c1..c00ba5e189ec0 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -1243,6 +1243,9 @@ void jl_init_primitives(void) JL_GC_DISABLED add_builtin("Ref", (jl_value_t*)jl_ref_type); add_builtin("Ptr", (jl_value_t*)jl_pointer_type); add_builtin("Task", (jl_value_t*)jl_task_type); +#ifdef JULIA_ENABLE_PARTR + add_builtin("Condition", (jl_value_t*)jl_condition_type); +#endif add_builtin("AbstractArray", (jl_value_t*)jl_abstractarray_type); add_builtin("DenseArray", (jl_value_t*)jl_densearray_type); diff --git a/src/dump.c b/src/dump.c index d29ae8565bdde..c54cfc4af11e6 100644 --- a/src/dump.c +++ b/src/dump.c @@ -3191,7 +3191,10 @@ void jl_init_serializer(void) jl_box_int64(12), jl_box_int64(13), jl_box_int64(14), jl_box_int64(15), jl_box_int64(16), jl_box_int64(17), jl_box_int64(18), jl_box_int64(19), jl_box_int64(20), - jl_box_int64(21), jl_box_int64(22), + jl_box_int64(21), +#ifndef JULIA_ENABLE_PARTR + jl_box_int64(22), +#endif jl_bool_type, jl_linenumbernode_type, jl_pinode_type, jl_upsilonnode_type, jl_type_type, jl_bottom_type, jl_ref_type, @@ -3205,6 +3208,9 @@ void jl_init_serializer(void) jl_emptytuple_type, jl_array_uint8_type, jl_code_info_type, jl_typeofbottom_type, jl_namedtuple_type, jl_array_int32_type, jl_typedslot_type, jl_uint32_type, jl_uint64_type, +#ifdef JULIA_ENABLE_PARTR + jl_condition_type, +#endif ptls->root_task, diff --git a/src/forkjoin-ti.c b/src/forkjoin-ti.c new file mode 100644 index 0000000000000..8b7d2d3cc1620 --- /dev/null +++ b/src/forkjoin-ti.c @@ -0,0 +1,356 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + +#include +#include +#include +#include +#include + +#include "julia.h" +#include "julia_internal.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#include "options.h" +#include "threading.h" + +#ifdef JULIA_ENABLE_THREADING +#ifdef JULIA_ENABLE_FORKJOIN_TI + +// for the barrier +typedef struct { + int sense; +} ti_thread_sense_t; + +// thread group +typedef struct { + int16_t *tid_map, num_threads, added_threads; + uint8_t num_sockets, num_cores, num_threads_per_core; + + // fork/join/barrier + uint8_t group_sense; // Written only by master thread + ti_thread_sense_t **thread_sense; + void *envelope; + + // to let threads sleep + uv_mutex_t alarm_lock; + uv_cond_t alarm; +} ti_threadgroup_t; + +// thread state +enum { + TI_THREAD_INIT, + TI_THREAD_WORK +}; + +// passed to thread function +typedef struct { + int16_t volatile state; + ti_threadgroup_t *tg; +} ti_threadarg_t; + +// work command to thread function +typedef struct { + jl_method_instance_t *mfunc; + jl_callptr_t fptr; + jl_value_t **args; + uint32_t nargs; + jl_value_t *ret; + size_t world_age; +} ti_threadwork_t; + +// for broadcasting work to threads +static ti_threadwork_t threadwork; + +// only one thread group for now +static ti_threadgroup_t *tgworld; + +extern uint64_t jl_thread_sleep_threshold; + +// threadgroup functions +// --- +static int ti_threadgroup_create(uint8_t num_sockets, uint8_t num_cores, + uint8_t num_threads_per_core, + ti_threadgroup_t **newtg) +{ + int i; + ti_threadgroup_t *tg; + int num_threads = num_sockets * num_cores * num_threads_per_core; + + tg = (ti_threadgroup_t*)jl_malloc_aligned(sizeof(ti_threadgroup_t), 64); + tg->tid_map = (int16_t*)jl_malloc_aligned(num_threads * sizeof(int16_t), 64); + for (i = 0; i < num_threads; ++i) + tg->tid_map[i] = -1; + tg->num_sockets = num_sockets; + tg->num_cores = num_cores; + tg->num_threads_per_core = num_threads_per_core; + tg->num_threads = num_threads; + tg->added_threads = 0; + tg->thread_sense = (ti_thread_sense_t**) + jl_malloc_aligned(num_threads * sizeof(ti_thread_sense_t*), 64); + for (i = 0; i < num_threads; i++) + tg->thread_sense[i] = NULL; + jl_atomic_store_release(&tg->group_sense, 0); + + uv_mutex_init(&tg->alarm_lock); + uv_cond_init(&tg->alarm); + + *newtg = tg; + return 0; +} + +static int ti_threadgroup_addthread(ti_threadgroup_t *tg, int16_t ext_tid, + int16_t *tgtid) +{ + if (ext_tid < 0 || ext_tid >= tg->num_threads) + return -1; + if (tg->tid_map[ext_tid] != -1) + return -2; + if (tg->added_threads == tg->num_threads) + return -3; + + tg->tid_map[ext_tid] = tg->added_threads++; + if (tgtid) *tgtid = tg->tid_map[ext_tid]; + + return 0; +} + +static int ti_threadgroup_initthread(ti_threadgroup_t *tg, int16_t ext_tid) +{ + ti_thread_sense_t *ts; + + if (ext_tid < 0 || ext_tid >= tg->num_threads) + return -1; + if (tg->thread_sense[tg->tid_map[ext_tid]] != NULL) + return -2; + if (tg->num_threads == 0) + return -3; + + ts = (ti_thread_sense_t*)jl_malloc_aligned(sizeof(ti_thread_sense_t), 64); + ts->sense = 1; + tg->thread_sense[tg->tid_map[ext_tid]] = ts; + + return 0; +} + +static int ti_threadgroup_fork(ti_threadgroup_t *tg, int16_t ext_tid, void **bcast_val, int init) +{ + uint8_t *group_sense = &tg->group_sense; + int16_t tid = tg->tid_map[ext_tid]; + int thread_sense = tg->thread_sense[tid]->sense; + if (tid == 0) { + tg->envelope = bcast_val ? *bcast_val : NULL; + // synchronize `tg->envelope` and `tg->group_sense` + jl_atomic_store_release(group_sense, thread_sense); + + // if it's possible that threads are sleeping, signal them + if (jl_thread_sleep_threshold) { + uv_mutex_lock(&tg->alarm_lock); + uv_cond_broadcast(&tg->alarm); + uv_mutex_unlock(&tg->alarm_lock); + } + } + else { + // spin up to threshold ns (count sheep), then sleep + uint64_t spin_ns; + uint64_t spin_start = 0; + // synchronize `tg->envelope` and `tg->group_sense` + while (jl_atomic_load_acquire(group_sense) != thread_sense) { + if (jl_thread_sleep_threshold) { + if (!spin_start) { + // Lazily initialize spin_start since uv_hrtime is expensive + spin_start = uv_hrtime(); + continue; + } + spin_ns = uv_hrtime() - spin_start; + // In case uv_hrtime is not monotonic, we'll sleep earlier + if (init || spin_ns >= jl_thread_sleep_threshold) { + uv_mutex_lock(&tg->alarm_lock); + if (jl_atomic_load_acquire(group_sense) != thread_sense) { + uv_cond_wait(&tg->alarm, &tg->alarm_lock); + } + uv_mutex_unlock(&tg->alarm_lock); + spin_start = 0; + init = 0; + continue; + } + } + jl_cpu_pause(); + } + if (bcast_val) + *bcast_val = tg->envelope; + } + + return 0; +} + +static int ti_threadgroup_join(ti_threadgroup_t *tg, int16_t ext_tid) +{ + int *p_thread_sense = &tg->thread_sense[tg->tid_map[ext_tid]]->sense; + jl_atomic_store_release(p_thread_sense, !*p_thread_sense); + if (tg->tid_map[ext_tid] == 0) { + jl_ptls_t ptls = jl_get_ptls_states(); + int8_t group_sense = tg->group_sense; + for (int i = 1; i < tg->num_threads; ++i) { + while (jl_atomic_load_acquire(&tg->thread_sense[i]->sense) == group_sense) { + jl_gc_safepoint_(ptls); + jl_cpu_pause(); + } + } + } + + return 0; +} + + +// threading interface +// --- +void jl_init_threadinginfra(void) { } + +void jl_init_threadarg(jl_threadarg_t *targ) +{ + ti_threadarg_t *tiarg = (ti_threadarg_t *)malloc(sizeof (ti_threadarg_t)); + tiarg->state = TI_THREAD_INIT; + targ->arg = (void *)tiarg; +} + +void jl_init_started_threads(jl_threadarg_t **targs) +{ + // the analyzer doesn't know jl_n_threads doesn't change, help it + size_t nthreads = jl_n_threads; + + // set up the world thread group + ti_threadgroup_create(1, nthreads, 1, &tgworld); + for (int i = 0; i < nthreads; ++i) + ti_threadgroup_addthread(tgworld, i, NULL); + + jl_ptls_t ptls = jl_get_ptls_states(); + ti_threadgroup_initthread(tgworld, ptls->tid); + + // give the threads the world thread group; they will block waiting for fork + for (int i = 0; i < nthreads - 1; ++i) { + ti_threadarg_t *tiarg = (ti_threadarg_t *)targs[i]->arg; + tiarg->tg = tgworld; + jl_atomic_store_release(&tiarg->state, TI_THREAD_WORK); + } +} + +// thread function: used by all except the main thread +void jl_threadfun(void *arg) +{ + jl_ptls_t ptls = jl_get_ptls_states(); + jl_threadarg_t *targ = (jl_threadarg_t *)arg; + ti_threadarg_t *tiarg = (ti_threadarg_t *)targ->arg; + ti_threadgroup_t *tg; + ti_threadwork_t *work; + + // initialize this thread (set tid, create heap, etc.) + jl_init_threadtls(targ->tid); + void *stack_lo, *stack_hi; + jl_init_stack_limits(0, &stack_lo, &stack_hi); + + // set up tasking + jl_init_root_task(stack_lo, stack_hi); + + // wait for a thread group + while (jl_atomic_load_acquire(&tiarg->state) == TI_THREAD_INIT) + jl_cpu_pause(); + + // Assuming the functions called below don't contain unprotected GC + // critical region. In general, the following part of this function + // shouldn't call any managed code without calling `jl_gc_unsafe_enter` + // first. + jl_gc_state_set(ptls, JL_GC_STATE_SAFE, 0); + uv_barrier_wait(targ->barrier); + + // initialize this thread in the thread group + tg = tiarg->tg; + ti_threadgroup_initthread(tg, ptls->tid); + + // free the thread argument here + free(tiarg); + free(targ); + + int init = 1; + + // work loop + for (; ;) { + ti_threadgroup_fork(tg, ptls->tid, (void **)&work, init); + init = 0; + + JL_GC_PROMISE_ROOTED(work); + + if (work) { + // TODO: before we support getting return value from + // the work, and after we have proper GC transition + // support in the codegen and runtime we don't need to + // enter GC unsafe region when starting the work. + int8_t gc_state = jl_gc_unsafe_enter(ptls); + size_t last_age = ptls->world_age; + ptls->world_age = work->world_age; + jl_thread_run_fun(work->fptr, work->mfunc, work->args, work->nargs); + ptls->world_age = last_age; + jl_gc_unsafe_leave(ptls, gc_state); + } + + ti_threadgroup_join(tg, ptls->tid); + } +} + +// interface to user code: specialize and compile the user thread function +// and run it in all threads +JL_DLLEXPORT jl_value_t *jl_threading_run(jl_value_t *_args) +{ + jl_ptls_t ptls = jl_get_ptls_states(); + // GC safe + uint32_t nargs; + jl_value_t **args; + if (!jl_is_svec(_args)) { + nargs = 1; + args = &_args; + } + else { + nargs = jl_svec_len(_args); + args = jl_svec_data(_args); + } + + int8_t gc_state = jl_gc_unsafe_enter(ptls); + + size_t world = jl_get_ptls_states()->world_age; + + threadwork.mfunc = jl_lookup_generic(args, nargs, + jl_int32hash_fast(jl_return_address()), ptls->world_age); + // Ignore constant return value for now. + threadwork.fptr = jl_compile_method_internal(&threadwork.mfunc, world); + if (threadwork.fptr == jl_fptr_const_return) + return jl_nothing; + threadwork.args = args; + threadwork.nargs = nargs; + threadwork.ret = jl_nothing; + threadwork.world_age = world; + + // fork the world thread group + ti_threadwork_t *tw = &threadwork; + ti_threadgroup_fork(tgworld, ptls->tid, (void **)&tw, 0); + + JL_GC_PROMISE_ROOTED(threadwork.mfunc); + + // this thread must do work too + tw->ret = jl_thread_run_fun(threadwork.fptr, threadwork.mfunc, args, nargs); + + // wait for completion + ti_threadgroup_join(tgworld, ptls->tid); + + jl_gc_unsafe_leave(ptls, gc_state); + + return tw->ret; +} + +#endif // JULIA_ENABLE_FORKJOIN_TI +#endif // JULIA_ENABLE_THREADING + +#ifdef __cplusplus +} +#endif diff --git a/src/forkjoin-ti.h b/src/forkjoin-ti.h new file mode 100644 index 0000000000000..0b882cbb7eed4 --- /dev/null +++ b/src/forkjoin-ti.h @@ -0,0 +1,21 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + +#ifndef FORKJOINTI_H +#define FORKJOINTI_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// interface provided by this threading infrastructure +JL_DLLEXPORT jl_value_t *jl_threading_run(jl_value_t *_args); + + +#ifdef __cplusplus +} +#endif + +#endif /* FORKJOINTI_H */ + diff --git a/src/gc-debug.c b/src/gc-debug.c index 40dc55a4f0550..92c576cbec8bd 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -576,13 +576,15 @@ static void gc_scrub_range(char *low, char *high) static void gc_scrub_task(jl_task_t *ta) { - int16_t tid = ta->tid; + int16_t tid = ta->current_tid; jl_ptls_t ptls = jl_get_ptls_states(); - jl_ptls_t ptls2 = jl_all_tls_states[tid]; + jl_ptls_t ptls2 = NULL; + if (tid != -1) + ptls2 = jl_all_tls_states[tid]; char *low; char *high; - if (ta->copy_stack && ta == ptls2->current_task) { + if (ta->copy_stack && ptls2 && ta == ptls2->current_task) { low = (char*)ptls2->stackbase - ptls2->stacksize; high = (char*)ptls2->stackbase; } @@ -593,7 +595,7 @@ static void gc_scrub_task(jl_task_t *ta) else return; - if (ptls == ptls2 && ta == ptls2->current_task) { + if (ptls == ptls2 && ptls2 && ta == ptls2->current_task) { // scan up to current `sp` for current thread and task low = (char*)jl_get_frame_addr(); } diff --git a/src/gc.c b/src/gc.c index e5fb03cf95cc0..b3b2d236b856b 100644 --- a/src/gc.c +++ b/src/gc.c @@ -1634,6 +1634,13 @@ STATIC_INLINE int gc_mark_queue_obj(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_ return (int)nptr; } +#ifdef JULIA_ENABLE_PARTR +int jl_gc_mark_queue_obj_explicit(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp, jl_value_t *obj) +{ + return gc_mark_queue_obj(gc_cache, sp, obj); +} +#endif + JL_DLLEXPORT int jl_gc_mark_queue_obj(jl_ptls_t ptls, jl_value_t *obj) { return gc_mark_queue_obj(&ptls->gc_cache, &ptls->gc_mark_sp, obj); @@ -2330,8 +2337,10 @@ mark: { jl_task_t *ta = (jl_task_t*)new_obj; gc_scrub_record_task(ta); void *stkbuf = ta->stkbuf; - int16_t tid = ta->tid; - jl_ptls_t ptls2 = jl_all_tls_states[tid]; + int16_t tid = ta->current_tid; + jl_ptls_t ptls2 = NULL; + if (tid != -1) + ptls2 = jl_all_tls_states[tid]; if (gc_cblist_task_scanner) { export_gc_state(ptls, &sp); gc_invoke_callbacks(jl_gc_cb_task_scanner_t, @@ -2347,7 +2356,7 @@ mark: { uintptr_t offset = 0; uintptr_t lb = 0; uintptr_t ub = (uintptr_t)-1; - if (ta == ptls2->current_task) { + if (ptls2 && ta == ptls2->current_task) { s = ptls2->pgcstack; } else if (stkbuf) { @@ -2481,12 +2490,21 @@ static void jl_gc_queue_thread_local(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp gc_mark_queue_obj(gc_cache, sp, ptls2->previous_exception); } +#ifdef JULIA_ENABLE_PARTR +void jl_gc_mark_enqueued_tasks(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp); +#endif + // mark the initial root set static void mark_roots(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp) { // modules gc_mark_queue_obj(gc_cache, sp, jl_main_module); +#ifdef JULIA_ENABLE_PARTR + // tasks + jl_gc_mark_enqueued_tasks(gc_cache, sp); +#endif + // invisible builtin values if (jl_an_empty_vec_any != NULL) gc_mark_queue_obj(gc_cache, sp, jl_an_empty_vec_any); diff --git a/src/init.c b/src/init.c index cf3a420d76d98..2425b6c42ae4f 100644 --- a/src/init.c +++ b/src/init.c @@ -611,7 +611,6 @@ void _julia_init(JL_IMAGE_SEARCH rel) // Make sure we finalize the tls callback before starting any threads. jl_get_ptls_states_getter(); #endif - jl_ptls_t ptls = jl_get_ptls_states(); jl_safepoint_init(); libsupport_init(); htable_new(&jl_current_modules, 0); @@ -813,8 +812,10 @@ void jl_get_builtin_hooks(void) int t; for (t = 0; t < jl_n_threads; t++) { jl_ptls_t ptls2 = jl_all_tls_states[t]; - ptls2->root_task->tls = jl_nothing; + ptls2->root_task->storage = jl_nothing; +#ifndef JULIA_ENABLE_PARTR ptls2->root_task->donenotify = jl_nothing; +#endif ptls2->root_task->exception = jl_nothing; ptls2->root_task->result = jl_nothing; } diff --git a/src/julia.h b/src/julia.h index fbc1eb2461b66..47d03c054f3ce 100644 --- a/src/julia.h +++ b/src/julia.h @@ -542,6 +542,9 @@ extern JL_DLLEXPORT jl_unionall_t *jl_anytuple_type_type JL_GLOBALLY_ROOTED; extern JL_DLLEXPORT jl_unionall_t *jl_vararg_type JL_GLOBALLY_ROOTED; extern JL_DLLEXPORT jl_typename_t *jl_vararg_typename JL_GLOBALLY_ROOTED; extern JL_DLLEXPORT jl_datatype_t *jl_task_type JL_GLOBALLY_ROOTED; +#ifdef JULIA_ENABLE_PARTR +extern JL_DLLEXPORT jl_datatype_t *jl_condition_type JL_GLOBALLY_ROOTED; +#endif extern JL_DLLEXPORT jl_datatype_t *jl_function_type JL_GLOBALLY_ROOTED; extern JL_DLLEXPORT jl_datatype_t *jl_builtin_type JL_GLOBALLY_ROOTED; @@ -1606,44 +1609,124 @@ typedef struct _jl_handler_t { size_t world_age; } jl_handler_t; -typedef struct _jl_task_t { +typedef struct _jl_task_t jl_task_t; + +#if defined(JULIA_ENABLE_PARTR) +typedef struct _arriver_t arriver_t; +typedef struct _reducer_t reducer_t; + +typedef struct _jl_taskq_t jl_taskq_t; +typedef struct _jl_taskq_t jl_condition_t; + +struct _jl_taskq_t { + JL_DATA_TYPE + + jl_task_t *head; + jl_mutex_t lock; +}; +#endif + +struct _jl_task_t { JL_DATA_TYPE - jl_value_t *tls; + + /* task local storage */ + jl_value_t *storage; + + /* state */ jl_sym_t *state; + +#ifndef JULIA_ENABLE_PARTR + /* completion queue */ jl_value_t *donenotify; +#endif + + /* execution result */ jl_value_t *result; jl_value_t *exception; jl_value_t *backtrace; jl_value_t *logstate; - jl_function_t *start; -// hidden state: - jl_ucontext_t ctx; // saved thread state - void *stkbuf; // malloc'd memory (either copybuf or stack) - size_t bufsz; // actual sizeof stkbuf + /* task entry point */ + jl_function_t *taskentry; + +#ifdef JULIA_ENABLE_PARTR + /* reduction function entry point */ + jl_function_t *redentry; + + /* completion queue */ + jl_taskq_t cq; + + /* to link this task into queues */ + jl_task_t *next; + + /* parent (first) task of a parfor set */ + jl_task_t *parent; + + /* parfor reduction result */ + jl_value_t *redresult; +#endif + /* --- hidden --- */ + + /* context and stack */ + jl_ucontext_t ctx; // saved thread state + void *stkbuf; // malloc'd memory (either copybuf or stack) + size_t bufsz; // actual sizeof stkbuf unsigned int copy_stack:31; // sizeof stack for copybuf unsigned int started:1; - // current exception handler + /* current exception handler */ jl_handler_t *eh; - // saved gc stack top for context switches + + /* saved gc stack top for context switches */ jl_gcframe_t *gcstack; + // saved exception stack jl_excstack_t *excstack; // current world age size_t world_age; - // id of owning thread - // does not need to be defined until the task runs - int16_t tid; + /* thread currently running this task */ + int16_t current_tid; #ifdef JULIA_ENABLE_THREADING - // This is statically initialized when the task is not holding any locks arraylist_t locks; +#endif +#ifdef JULIA_ENABLE_PARTR + /* grain's range, for parfors */ + int64_t start, end; + + /* to synchronize/reduce grains of a parfor */ + arriver_t *arr; + reducer_t *red; + + /* tid of the thread to which this task is sticky */ + int16_t sticky_tid; + + /* the index of this task in the set of grains of a parfor */ + int16_t grain_num; + + /* for the multiqueue */ + int16_t prio; #endif jl_timing_block_t *timing_stack; -} jl_task_t; +}; JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, size_t ssize); + +#ifdef JULIA_ENABLE_PARTR + +JL_DLLEXPORT jl_task_t *jl_task_spawn(jl_task_t *task, jl_value_t *arg, int8_t err, + int8_t unyielding, int8_t sticky); +JL_DLLEXPORT jl_task_t *jl_task_new_multi(jl_value_t *args, size_t ssize, + int64_t count, jl_value_t *rargs); +JL_DLLEXPORT int jl_task_spawn_multi(jl_task_t *task); +JL_DLLEXPORT jl_value_t *jl_task_sync(jl_task_t *task); +JL_DLLEXPORT jl_value_t *jl_task_yield(int requeue); +JL_DLLEXPORT jl_condition_t *jl_condition_new(void); +JL_DLLEXPORT jl_value_t *jl_task_wait(jl_condition_t *c); +JL_DLLEXPORT void jl_task_notify(jl_condition_t *c, jl_value_t *arg, int8_t all, int8_t err); + +#endif // !JULIA_ENABLE_PARTR + JL_DLLEXPORT void jl_switchto(jl_task_t **pt); JL_DLLEXPORT void JL_NORETURN jl_throw(jl_value_t *e JL_MAYBE_UNROOTED); JL_DLLEXPORT void JL_NORETURN jl_rethrow(void); diff --git a/src/julia_internal.h b/src/julia_internal.h index af463a86fc5f4..7d29bcfda695b 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -500,7 +500,6 @@ extern ssize_t jl_tls_offset; extern const int jl_tls_elf_support; void jl_init_threading(void); void jl_start_threads(void); -void jl_shutdown_threading(void); // Whether the GC is running extern char *jl_safepoint_pages; @@ -706,6 +705,22 @@ void jl_copy_excstack(jl_excstack_t *dest, jl_excstack_t *src) JL_NOTSAFEPOINT; // Returns time in nanosec JL_DLLEXPORT uint64_t jl_hrtime(void); +// congruential random number generator +STATIC_INLINE void seed_cong(uint64_t *seed) +{ + *seed = jl_hrtime(); +} +STATIC_INLINE void unbias_cong(uint64_t max, uint64_t *unbias) +{ + *unbias = UINT64_MAX - ((UINT64_MAX % max)+1); +} +STATIC_INLINE uint64_t cong(uint64_t max, uint64_t unbias, uint64_t *seed) +{ + while ((*seed = 69069 * (*seed) + 362437) > unbias) + ; + return *seed % max; +} + // libuv stuff: JL_DLLEXPORT extern void *jl_dl_handle; JL_DLLEXPORT extern void *jl_RTLD_DEFAULT_handle; diff --git a/src/julia_threads.h b/src/julia_threads.h index eedab742ef6b1..cadb7cf236091 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -53,7 +53,7 @@ typedef ucontext_t jl_ucontext_t; // Recursive spin lock typedef struct { - volatile unsigned long owner; + volatile uintptr_t owner; uint32_t count; } jl_mutex_t; @@ -158,6 +158,10 @@ struct _jl_tls_states_t { jl_ucontext_t base_ctx; // base context of stack jl_jmp_buf *safe_restore; int16_t tid; +#ifdef JULIA_ENABLE_PARTR + uint64_t rngseed; + struct _jl_taskq_t *sticky_taskq; +#endif // Temp storage for exception thrown in signal handler. Not rooted. struct _jl_value_t *sig_exception; // Temporary backtrace buffer. Scanned for gc roots when bt_size > 0. diff --git a/src/locks.h b/src/locks.h index b030e8c20403f..bb53887164723 100644 --- a/src/locks.h +++ b/src/locks.h @@ -105,6 +105,22 @@ static inline void jl_mutex_lock(jl_mutex_t *lock) jl_gc_enable_finalizers(ptls, 0); } +static inline int jl_mutex_trylock_nogc(jl_mutex_t *lock) +{ + unsigned long self = jl_thread_self(); + unsigned long owner = jl_atomic_load_acquire(&lock->owner); + if (owner == self) { + lock->count++; + return 1; + } + if (owner == 0 && + jl_atomic_compare_exchange(&lock->owner, 0, self) == 0) { + lock->count = 1; + return 1; + } + return 0; +} + /* Call this function for code that could be called from either a managed or an unmanaged thread */ static inline void jl_mutex_lock_maybe_nogc(jl_mutex_t *lock) diff --git a/src/options.h b/src/options.h index 5a4fc70a1f102..3d64805a03954 100644 --- a/src/options.h +++ b/src/options.h @@ -129,6 +129,12 @@ #define MACHINE_EXCLUSIVE_NAME "JULIA_EXCLUSIVE" #define DEFAULT_MACHINE_EXCLUSIVE 0 +// threading infrastructure selection +#ifndef JULIA_ENABLE_PARTR +#define JULIA_ENABLE_FORKJOIN_TI 1 + +#endif // !JULIA_ENABLE_PARTR + // sanitizer defaults --------------------------------------------------------- diff --git a/src/partr.c b/src/partr.c new file mode 100644 index 0000000000000..8b8672eb56089 --- /dev/null +++ b/src/partr.c @@ -0,0 +1,1154 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + +#include +#include +#include +#include + +#include "julia.h" +#include "julia_internal.h" +#include "gc.h" +#include "threading.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef JULIA_ENABLE_THREADING +#ifdef JULIA_ENABLE_PARTR + +// empirically, finish_task needs about 64k stack space to infer/run +// and additionally, gc-stack reserves 64k for the guard pages +#if defined(MINSIGSTKSZ) && MINSIGSTKSZ > 131072 +#define MINSTKSZ MINSIGSTKSZ +#else +#define MINSTKSZ 131072 +#endif + +// task states and stack switching +extern jl_sym_t *done_sym; +extern jl_sym_t *failed_sym; +extern jl_sym_t *runnable_sym; +extern void jl_switchto(jl_task_t **pt); +extern char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner); + +// the lovely task-done-hook hack +extern jl_function_t *task_done_hook_func; + +// GC functions used +extern int jl_gc_mark_queue_obj_explicit(jl_gc_mark_cache_t *gc_cache, + jl_gc_mark_sp_t *sp, jl_value_t *obj); + +// thread sleep threshold +extern uint64_t jl_thread_sleep_threshold; + +// multiq +// --- + +/* a task heap */ +typedef struct taskheap_tag { + jl_mutex_t lock; + jl_task_t **tasks; + int16_t ntasks, prio; +} taskheap_t; + +/* multiqueue parameters */ +static const int16_t heap_d = 8; +static const int heap_c = 4; + +/* size of each heap */ +static const int tasks_per_heap = 8192; // TODO: this should be smaller by default, but growable! + +/* the multiqueue's heaps */ +static taskheap_t *heaps; +static int16_t heap_p; + +/* unbias state for the RNG */ +static uint64_t cong_unbias; + +/* for thread sleeping */ +static uv_mutex_t sleep_lock; +static uv_cond_t sleep_alarm; + + +/* multiq_init() + */ +static inline void multiq_init(void) +{ + heap_p = heap_c * jl_n_threads; + heaps = (taskheap_t *)calloc(heap_p, sizeof(taskheap_t)); + for (int16_t i = 0; i < heap_p; ++i) { + jl_mutex_init(&heaps[i].lock); + heaps[i].tasks = (jl_task_t **)calloc(tasks_per_heap, sizeof(jl_task_t *)); + heaps[i].ntasks = 0; + heaps[i].prio = INT16_MAX; + } + unbias_cong(heap_p, &cong_unbias); +} + + +/* sift_up() + */ +static inline void sift_up(taskheap_t *heap, int16_t idx) +{ + if (idx > 0) { + int16_t parent = (idx-1)/heap_d; + if (heap->tasks[idx]->prio < heap->tasks[parent]->prio) { + jl_task_t *t = heap->tasks[parent]; + heap->tasks[parent] = heap->tasks[idx]; + heap->tasks[idx] = t; + sift_up(heap, parent); + } + } +} + + +/* sift_down() + */ +static inline void sift_down(taskheap_t *heap, int16_t idx) +{ + if (idx < heap->ntasks) { + for (int16_t child = heap_d*idx + 1; + child < tasks_per_heap && child <= heap_d*idx + heap_d; + ++child) { + if (heap->tasks[child] + && heap->tasks[child]->prio < heap->tasks[idx]->prio) { + jl_task_t *t = heap->tasks[idx]; + heap->tasks[idx] = heap->tasks[child]; + heap->tasks[child] = t; + sift_down(heap, child); + } + } + } +} + + +/* multiq_insert() + */ +static inline int multiq_insert(jl_task_t *task, int16_t priority) +{ + jl_ptls_t ptls = jl_get_ptls_states(); + uint64_t rn; + + task->prio = priority; + do { + rn = cong(heap_p, cong_unbias, &ptls->rngseed); + } while (!jl_mutex_trylock_nogc(&heaps[rn].lock)); + + if (heaps[rn].ntasks >= tasks_per_heap) { + jl_mutex_unlock_nogc(&heaps[rn].lock); + jl_error("multiq insertion failed, increase #tasks per heap"); + return -1; + } + + heaps[rn].tasks[heaps[rn].ntasks++] = task; + sift_up(&heaps[rn], heaps[rn].ntasks-1); + jl_mutex_unlock_nogc(&heaps[rn].lock); + int16_t prio = jl_atomic_load(&heaps[rn].prio); + if (task->prio < prio) + jl_atomic_compare_exchange(&heaps[rn].prio, prio, task->prio); + + return 0; +} + + +/* multiq_deletemin() + */ +static inline jl_task_t *multiq_deletemin(void) +{ + jl_ptls_t ptls = jl_get_ptls_states(); + uint64_t rn1 = 0, rn2; + int16_t i, prio1, prio2; + jl_task_t *task; + + for (i = 0; i < heap_p; ++i) { + rn1 = cong(heap_p, cong_unbias, &ptls->rngseed); + rn2 = cong(heap_p, cong_unbias, &ptls->rngseed); + prio1 = jl_atomic_load(&heaps[rn1].prio); + prio2 = jl_atomic_load(&heaps[rn2].prio); + if (prio1 > prio2) { + prio1 = prio2; + rn1 = rn2; + } + else if (prio1 == prio2 && prio1 == INT16_MAX) + continue; + if (jl_mutex_trylock_nogc(&heaps[rn1].lock)) { + if (prio1 == heaps[rn1].prio) + break; + jl_mutex_unlock_nogc(&heaps[rn1].lock); + } + } + if (i == heap_p) + return NULL; + + task = heaps[rn1].tasks[0]; + heaps[rn1].tasks[0] = heaps[rn1].tasks[--heaps[rn1].ntasks]; + heaps[rn1].tasks[heaps[rn1].ntasks] = NULL; + prio1 = INT16_MAX; + if (heaps[rn1].ntasks > 0) { + sift_down(&heaps[rn1], 0); + prio1 = heaps[rn1].tasks[0]->prio; + } + jl_atomic_store(&heaps[rn1].prio, prio1); + jl_mutex_unlock_nogc(&heaps[rn1].lock); + + return task; +} + + +// sync trees +// --- + +/* arrival tree */ +struct _arriver_t { + int16_t index, next_avail; + int16_t **tree; +}; + +/* reduction tree */ +struct _reducer_t { + int16_t index, next_avail; + jl_value_t ***tree; +}; + + +/* pool of arrival trees */ +static arriver_t *arriverpool; +static int16_t num_arrivers, num_arriver_tree_nodes, next_arriver; + +/* pool of reduction trees */ +static reducer_t *reducerpool; +static int16_t num_reducers, num_reducer_tree_nodes, next_reducer; + + +/* synctreepool_init() + */ +static inline void synctreepool_init(void) +{ + num_arriver_tree_nodes = (GRAIN_K * jl_n_threads) - 1; + num_reducer_tree_nodes = (2 * GRAIN_K * jl_n_threads) - 1; + + /* num_arrivers = ((GRAIN_K * jl_n_threads) ^ ARRIVERS_P) + 1 */ + num_arrivers = GRAIN_K * jl_n_threads; + for (int i = 1; i < ARRIVERS_P; ++i) + num_arrivers = num_arrivers * num_arrivers; + ++num_arrivers; + + num_reducers = num_arrivers * REDUCERS_FRAC; + + /* allocate */ + arriverpool = (arriver_t *)calloc(num_arrivers, sizeof (arriver_t)); + next_arriver = 0; + for (int i = 0; i < num_arrivers; ++i) { + arriverpool[i].index = i; + arriverpool[i].next_avail = i + 1; + arriverpool[i].tree = (int16_t **) + jl_malloc_aligned(num_arriver_tree_nodes * sizeof (int16_t *), 64); + for (int j = 0; j < num_arriver_tree_nodes; ++j) + arriverpool[i].tree[j] = (int16_t *)jl_malloc_aligned(sizeof (int16_t), 64); + } + arriverpool[num_arrivers - 1].next_avail = -1; + + reducerpool = (reducer_t *)calloc(num_reducers, sizeof (reducer_t)); + next_reducer = 0; + for (int i = 0; i < num_reducers; ++i) { + reducerpool[i].index = i; + reducerpool[i].next_avail = i + 1; + reducerpool[i].tree = (jl_value_t ***) + jl_malloc_aligned(num_reducer_tree_nodes * sizeof (jl_value_t **), 64); + for (int j = 0; j < num_reducer_tree_nodes; ++j) + reducerpool[i].tree[j] = (jl_value_t **)jl_malloc_aligned(sizeof (jl_value_t *), 64); + } + if (num_reducers > 0) + reducerpool[num_reducers - 1].next_avail = -1; + else + next_reducer = -1; +} + + +/* arriver_alloc() + */ +static inline arriver_t *arriver_alloc(void) +{ + int16_t candidate; + arriver_t *arr; + + do { + candidate = jl_atomic_load(&next_arriver); + if (candidate == -1) + return NULL; + arr = &arriverpool[candidate]; + } while (!jl_atomic_bool_compare_exchange(&next_arriver, + candidate, arr->next_avail)); + return arr; +} + + +/* arriver_free() + */ +static inline void arriver_free(arriver_t *arr) +{ + for (int i = 0; i < num_arriver_tree_nodes; ++i) + *arr->tree[i] = 0; + + jl_atomic_exchange_generic(&next_arriver, &arr->index, &arr->next_avail); +} + + +/* reducer_alloc() + */ +static inline reducer_t *reducer_alloc(void) +{ + int16_t candidate; + reducer_t *red; + + do { + candidate = jl_atomic_load(&next_reducer); + if (candidate == -1) + return NULL; + red = &reducerpool[candidate]; + } while (!jl_atomic_bool_compare_exchange(&next_reducer, + candidate, red->next_avail)); + return red; +} + + +/* reducer_free() + */ +static inline void reducer_free(reducer_t *red) +{ + for (int i = 0; i < num_reducer_tree_nodes; ++i) + *red->tree[i] = 0; + + jl_atomic_exchange_generic(&next_reducer, &red->index, &red->next_avail); +} + + +/* last_arriver() + */ +static inline int last_arriver(arriver_t *arr, int idx) +{ + int arrived, aidx = idx + (GRAIN_K * jl_n_threads) - 1; + + while (aidx > 0) { + --aidx; + aidx >>= 1; + arrived = jl_atomic_fetch_add(arr->tree[aidx], 1); + if (!arrived) return 0; + } + + return 1; +} + + +#if 0 +/* reduce() + */ +static inline jl_value_t *reduce(arriver_t *arr, reducer_t *red, jl_function_t *redfun, + jl_value_t *val, int idx) +{ + int arrived, aidx = idx + (GRAIN_K * jl_n_threads) - 1, ridx = aidx, nidx; + + *red->tree[ridx] = val; + while (aidx > 0) { + --aidx; + aidx >>= 1; + arrived = jl_atomic_fetch_add(arr->tree[aidx], 1); + if (!arrived) return NULL; + + /* neighbor has already arrived, get its value and reduce it */ + nidx = ridx & 0x1 ? ridx + 1 : ridx - 1; + /* TODO: need to pass in val and red->tree[nidx] */ + JL_TRY { + val = fptr(mfunc, rargs, nrargs); + } + JL_CATCH { + val = jl_current_exception(); + } + + /* move up the tree */ + --ridx; + ridx >>= 1; + *red->tree[ridx] = val; + } + + return val; +} +#endif + +// parallel task runtime +// --- + +// sticky task queues need to be visible to all threads +jl_taskq_t *sticky_taskqs; + + +// initialize the threading infrastructure +void jl_init_threadinginfra(void) +{ + /* initialize the synchronization trees pool and the multiqueue */ + synctreepool_init(); + multiq_init(); + + /* allocate sticky task queues */ + sticky_taskqs = (jl_taskq_t *)jl_malloc_aligned(jl_n_threads * sizeof(jl_taskq_t), 64); + + /* initialize the sleep mechanism */ + uv_mutex_init(&sleep_lock); + uv_cond_init(&sleep_alarm); +} + + +// initialize the thread function argument +void jl_init_threadarg(jl_threadarg_t *targ) { } + + +// helper for final thread initialization +static void init_started_thread(void) +{ + jl_ptls_t ptls = jl_get_ptls_states(); + + /* allocate this thread's sticky task queue pointer and initialize the lock */ + seed_cong(&ptls->rngseed); + ptls->sticky_taskq = &sticky_taskqs[ptls->tid]; + ptls->sticky_taskq->head = NULL; + JL_MUTEX_INIT(&ptls->sticky_taskq->lock); +} + + +// once the threads are started, perform any final initializations +void jl_init_started_threads(jl_threadarg_t **targs) +{ + // master thread final initialization + init_started_thread(); +} + + +static int run_next(void); + + +// thread function: used by all except the main thread +void jl_threadfun(void *arg) +{ + jl_threadarg_t *targ = (jl_threadarg_t *)arg; + + // initialize this thread (set tid, create heap, set up root task) + jl_init_threadtls(targ->tid); + void *stack_lo, *stack_hi; + jl_init_stack_limits(0, &stack_lo, &stack_hi); + init_started_thread(); + jl_init_root_task(stack_lo, stack_hi); + + // Assuming the functions called below don't contain unprotected GC + // critical region. In general, the following part of this function + // shouldn't call any managed code without calling `jl_gc_unsafe_enter` + // first. + jl_ptls_t ptls = jl_get_ptls_states(); + jl_gc_state_set(ptls, JL_GC_STATE_SAFE, 0); + uv_barrier_wait(targ->barrier); + + // free the thread argument here + free(targ); + + jl_current_task->state = done_sym; + run_next(); + + // shouldn't get here + gc_debug_critical_error(); + abort(); +} + + +// enqueue the specified task for execution +static void enqueue_task(jl_task_t *task) +{ + /* sticky tasks go to the thread's sticky queue */ + if (task->sticky_tid != -1) { + jl_taskq_t *taskq = &sticky_taskqs[task->sticky_tid]; + JL_LOCK(&taskq->lock); + if (!taskq->head) + taskq->head = task; + else { + jl_task_t *pt = taskq->head; + while (pt->next) + pt = pt->next; + pt->next = task; + } + JL_UNLOCK(&taskq->lock); + } + + /* all others go back into the multiq */ + else + multiq_insert(task, task->prio); + + /* stop the event loop */ + uv_stop(jl_global_event_loop()); + + /* wake up threads */ + if (jl_thread_sleep_threshold) { + uv_mutex_lock(&sleep_lock); + uv_cond_broadcast(&sleep_alarm); + uv_mutex_unlock(&sleep_lock); + } +} + + +// parfor grains must synchronize/reduce as they end +static void sync_grains(jl_task_t *task) +{ + int was_last = 0; + + /* TODO kp: fix */ + /* TODO kp: cascade exception(s) if any */ + + /* reduce... */ + if (task->red) { + //task->result = reduce(task->arr, task->red, task->rfptr, task->mredfunc, + // task->rargs, task->result, task->grain_num); + jl_gc_wb(task, task->result); + + /* if this task is last, set the result in the parent task */ + if (task->result) { + task->parent->redresult = task->result; + jl_gc_wb(task->parent, task->parent->redresult); + was_last = 1; + } + } + /* ... or just sync */ + else { + if (last_arriver(task->arr, task->grain_num)) + was_last = 1; + } + + /* the last task to finish needs to finish up the loop */ + if (was_last) { + /* a non-parent task must wake up the parent */ + if (task->grain_num > 0) + enqueue_task(task->parent); + + /* this is the parent task which was last; it can just end */ + if (task->red) + reducer_free(task->red); + arriver_free(task->arr); + } + else { + /* the parent task needs to wait */ + if (task->grain_num == 0) { + jl_task_yield(0); + task->result = task->redresult; + jl_gc_wb(task, task->result); + } + } +} + + +// all tasks except the root task start and exit here +void NOINLINE JL_NORETURN start_task(void) +{ + jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *task = ptls->current_task; + task->started = 1; + + jl_sym_t *new_state; + + if (task->exception != jl_nothing) { + ptls->bt_size = rec_backtrace(ptls->bt_data, JL_MAX_BT_SIZE); + jl_push_excstack(&task->excstack, task->exception, + ptls->bt_data, ptls->bt_size); + task->result = task->exception; + jl_gc_wb(task, task->result); + new_state = failed_sym; + } + else { + JL_TRY { + if (ptls->defer_signal) { + ptls->defer_signal = 0; + jl_sigint_safepoint(ptls); + } + JL_TIMING(ROOT); + ptls->world_age = jl_world_counter; + task->result = jl_apply(&task->taskentry, 1); + jl_gc_wb(task, task->result); + new_state = done_sym; + } + JL_CATCH { + task->result = task->exception = jl_current_exception(); + jl_gc_wb(task, task->exception); + jl_gc_wb(task, task->result); + new_state = failed_sym; + goto skip_pop_exception; + } +skip_pop_exception:; + } + + /* grain tasks must synchronize */ + if (task->grain_num >= 0) + sync_grains(task); + + /* add back any tasks in this one's completion queue */ + JL_LOCK(&task->cq.lock); + jl_task_t *qtask = task->cq.head; + task->cq.head = NULL; + JL_UNLOCK(&task->cq.lock); + jl_task_t *qnext; + while (qtask) { + qnext = qtask->next; + qtask->next = NULL; + enqueue_task(qtask); + qtask = qnext; + } + + JL_SIGATOMIC_BEGIN(); + + task->state = new_state; + + if (task->copy_stack) // early free of stack + task->stkbuf = NULL; + + /* clear thread state */ + ptls->in_finalizer = 0; + ptls->in_pure_callback = 0; + ptls->world_age = jl_world_counter; + + /* run the task-is-done hook(s) */ + if (task_done_hook_func == NULL) + task_done_hook_func = (jl_function_t *)jl_get_global(jl_base_module, + jl_symbol("task_done_hook")); + if (task_done_hook_func != NULL) { + jl_value_t *args[2] = {task_done_hook_func, (jl_value_t *)task}; + JL_TRY { + jl_apply(args, 2); + } + JL_CATCH { + jl_no_exc_handler(jl_current_exception()); + } + } + + JL_SIGATOMIC_END(); + + /* next task */ + run_next(); + + /* shouldn't reach here */ + gc_debug_critical_error(); + abort(); +} + + +// get the next runnable task +static jl_task_t *get_next_task(void) +{ + jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *task = NULL; + JL_GC_PUSH1(&task); + + /* first check for sticky tasks */ + JL_LOCK(&ptls->sticky_taskq->lock); + task = ptls->sticky_taskq->head; + if (task) { + ptls->sticky_taskq->head = task->next; + task->next = NULL; + } + JL_UNLOCK(&ptls->sticky_taskq->lock); + + /* no sticky tasks, go to the multiq */ + if (!task) task = multiq_deletemin(); + + JL_GC_POP(); + return task; +} + + +// run the next available task +// TODO: deal with the case where another thread gets the task from which a thread is +// still trying to switch away +static int run_next(void) +{ + jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *task = NULL; + JL_GC_PUSH1(&task); + + uint64_t spin_ns, spin_start = 0; + while (!task) { + if (jl_thread_sleep_threshold) { + if (spin_start == 0) { + spin_start = uv_hrtime(); + continue; + } + } + + task = get_next_task(); + + if (!task) { + if (ptls->tid == 0) + jl_process_events(jl_global_event_loop()); + else + jl_cpu_pause(); + + if (jl_thread_sleep_threshold) { + spin_ns = uv_hrtime() - spin_start; + if (spin_ns > jl_thread_sleep_threshold) { + uv_mutex_lock(&sleep_lock); + task = get_next_task(); + if (!task) { + // thread 0 makes a blocking call to the event loop + if (ptls->tid == 0) { + uv_mutex_unlock(&sleep_lock); + jl_run_once(jl_global_event_loop()); + } + // other threads just sleep + else { + uv_cond_wait(&sleep_alarm, &sleep_lock); + uv_mutex_unlock(&sleep_lock); + } + } + else uv_mutex_unlock(&sleep_lock); + spin_start = 0; + } + } + } + } + + jl_switchto(&task); + + JL_GC_POP(); + return 1; +} + + +// initialize a task +static void init_task(jl_task_t *task, size_t ssize) +{ + jl_ptls_t ptls = jl_get_ptls_states(); + + task->started = 0; + task->storage = jl_nothing; + task->state = runnable_sym; + task->result = jl_nothing; + task->exception = jl_nothing; + task->backtrace = jl_nothing; + task->logstate = jl_nothing; + task->taskentry = NULL; + task->redentry = NULL; + task->cq.head = NULL; + JL_MUTEX_INIT(&task->cq.lock); + task->next = NULL; + //task->parent = ptls->current_task; + task->parent = NULL; + task->redresult = jl_nothing; + + task->stkbuf = NULL; + task->copy_stack = 0; + if (ssize == 0) { + // stack size unspecified; use default +#if defined(COPY_STACKS) && defined(ALWAYS_COPY_STACKS) + task->copy_stack = 1; + task->bufsz = 0; +#else + task->bufsz = JL_STACK_SIZE; +#endif + } + else { + // user requested stack of a certain size + if (ssize < MINSTKSZ) + ssize = MINSTKSZ; + task->bufsz = ssize; + task->stkbuf = jl_alloc_fiber(&task->ctx, &task->bufsz, task); + if (task->stkbuf == NULL) + jl_throw(jl_memory_exception); + } +#if defined(JL_DEBUG_BUILD) + if (!task->copy_stack) + memset(&task->ctx, 0, sizeof(task->ctx)); +#endif +#ifdef COPY_STACKS + if (task->copy_stack) + memcpy(&task->ctx, &ptls->base_ctx, sizeof(task->ctx)); +#endif + + arraylist_new(&task->locks, 0); + task->eh = NULL; + task->gcstack = NULL; + task->excstack = NULL; + task->world_age = ptls->world_age; + task->current_tid = -1; + task->arr = NULL; + task->red = NULL; + task->sticky_tid = -1; + task->grain_num = -1; + +#ifdef ENABLE_TIMINGS + task->timing_stack = NULL; +#endif +} + + +/* jl_new_task() -- create a task for `f(arg)` + + The created task can then be spawned. + */ +JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *_taskentry, size_t ssize) +{ + jl_ptls_t ptls = jl_get_ptls_states(); + + jl_task_t *task = (jl_task_t *)jl_gc_alloc(ptls, sizeof (jl_task_t), jl_task_type); + init_task(task, ssize); + task->taskentry = _taskentry; + + return task; +} + + +/* jl_task_spawn() -- enqueue a task for execution + + If `sticky` is set, the task will only run on the current thread. Continues + the current task if `unyielding` is set or in a few other cases, otherwise + yields. + */ +JL_DLLEXPORT jl_task_t *jl_task_spawn(jl_task_t *task, jl_value_t *arg, int8_t err, + int8_t unyielding, int8_t sticky) +{ + jl_ptls_t ptls = jl_get_ptls_states(); + + if (task->state != runnable_sym) + jl_error("schedule: Task not runnable"); + + if (!task->started) { + task->prio = ptls->tid; + if (sticky) task->sticky_tid = ptls->tid; + } + if (err) { + task->exception = arg; + jl_gc_wb(task, task->exception); + } + else { + task->result = arg; + if (arg != jl_nothing) + jl_gc_wb(task, task->result); + } + enqueue_task(task); + + /* Yielding here is important -- this is what allows depth first + scheduling. However, this breaks some assumptions made by parts of + the Julia runtime -- I/O and channels. So, we have to allow the caller + to disallow yielding. Also, if the task being scheduled has already + been started, we don't yield. + */ + if (!unyielding + && !ptls->in_finalizer // allow e.g. async printing from finalizers + && !task->started) + jl_task_yield(1); + + return task; +} + + +/* jl_task_new_multi() -- create multiple tasks for `f(arg)` + + Create multiple tasks, each of which invokes `f(arg, start, end)` such + that the sum of `end-start` for all tasks is `count`. If `_redentry` is + specified, the return values from the tasks are reduced; the result can + be retrieved by sync'ing on the parent task which is returned. All the + tasks can be spawned by passing the parent task to `jl_task_spawn_multi()`. + */ +JL_DLLEXPORT jl_task_t *jl_task_new_multi(jl_function_t *_taskentry, size_t ssize, + int64_t count, + jl_function_t *_redentry) +{ + jl_ptls_t ptls = jl_get_ptls_states(); + + int64_t n = GRAIN_K * jl_n_threads; + lldiv_t each = lldiv(count, n); + + /* allocate synchronization tree(s) */ + arriver_t *arr = arriver_alloc(); + if (arr == NULL) + return NULL; + reducer_t *red = NULL; + if (_redentry != NULL) { + red = reducer_alloc(); + if (red == NULL) { + arriver_free(arr); + return NULL; + } + } + + /* allocate (GRAIN_K * nthreads) tasks */ + int64_t start = 0, end = start + each.quot + (each.rem ? 1 : 0); + jl_task_t *parent = (jl_task_t *)jl_gc_alloc(ptls, sizeof (jl_task_t), jl_task_type); + JL_GC_PUSH1(&parent); + init_task(parent, ssize); + parent->taskentry = _taskentry; + parent->redentry = _redentry; + parent->start = start; + parent->end = end; + parent->grain_num = 0; + parent->arr = arr; + parent->red = red; + + jl_task_t *prev = parent, *task = NULL; + start = end; + for (int64_t i = 1; i < n; ++i) { + end = start + each.quot + (i < each.rem ? 1 : 0); + + task = (jl_task_t *)jl_gc_alloc(ptls, sizeof (jl_task_t), jl_task_type); + prev->next = task; + jl_gc_wb(prev, prev->next); + init_task(task, ssize); + task->parent = parent; + task->taskentry = _taskentry; + task->redentry = _redentry; + task->start = start; + task->end = end; + task->grain_num = i; + task->arr = arr; + task->red = red; + + prev = task; + start = end; + } + + JL_GC_POP(); + return parent; +} + + +/* jl_task_spawn_multi() -- spawn multiple tasks + + Spawns multiple tasks that were previously created with `jl_task_new_multi()`. + Yields. + */ +JL_DLLEXPORT int jl_task_spawn_multi(jl_task_t *task) +{ + jl_ptls_t ptls = jl_get_ptls_states(); + + /* enqueue (GRAIN_K * nthreads) tasks */ + jl_task_t *t = task; + for (int64_t i = 0; i < GRAIN_K * jl_n_threads; ++i) { + if (!t) // TODO: this should never happen + return -1; + if (multiq_insert(t, ptls->tid) != 0) // TODO: raise an error? + return -2; + t = t->next; + } + + /* yield to allow depth-first scheduling */ + jl_task_yield(1); + + return 0; +} + + +static void taskq_delete(jl_task_t **pnext, jl_task_t *tgt) +{ + jl_task_t *pt = *pnext; + while (pt) { + if (pt == tgt) { + *pnext = pt->next; + break; + } + pnext = &pt->next; + pt = *pnext; + } + tgt->next = NULL; +} + + +/* jl_task_sync() -- get the return value of task `t` + + Returns only when task `t` has completed. + */ +JL_DLLEXPORT jl_value_t *jl_task_sync(jl_task_t *task) +{ + jl_ptls_t ptls = jl_get_ptls_states(); + + if (task == ptls->current_task) + jl_error("cannot sync on self"); + + /* if the target task has not finished, add the current task to its + completion queue; the thread that runs the target task will add + this task back to the ready queue + */ + if (task->state != done_sym && task->state != failed_sym) { + // TODO: problem if a grain task does a sync? + ptls->current_task->next = NULL; + JL_LOCK(&task->cq.lock); + + /* ensure the task didn't finish before we got the lock */ + if (task->state != done_sym && task->state != failed_sym) { + /* add the current task to the CQ */ + if (!task->cq.head) { + task->cq.head = ptls->current_task; + jl_gc_wb(task, task->cq.head); + } + else { + jl_task_t *pt = task->cq.head; + while (pt->next) + pt = pt->next; + pt->next = ptls->current_task; + jl_gc_wb(pt, pt->next); + } + + JL_UNLOCK(&task->cq.lock); + JL_TRY { + jl_task_yield(0); + } + JL_CATCH { + taskq_delete(&task->cq.head, ptls->current_task); + jl_rethrow(); + } + } + + /* the task finished before we could add to its CQ */ + else + JL_UNLOCK(&task->cq.lock); + } + + if (task->state == failed_sym) + jl_throw(task->exception); + + return task->grain_num >= 0 && task->red ? task->redresult : task->result; +} + + +/* jl_task_yield() -- cause the invoking task to yield + + If `requeue` is set, the task is inserted into the relevant queue + (sticky or multiqueue), otherwise it is assumed it will be re-queued + in some other way (e.g. from another task's completion queue). + */ +JL_DLLEXPORT jl_value_t *jl_task_yield(int requeue) +{ + jl_ptls_t ptls = jl_get_ptls_states(); + + if (ptls->in_finalizer) + jl_error("task switch not allowed from inside gc finalizer"); + if (ptls->in_pure_callback) + jl_error("task switch not allowed from inside staged nor pure functions"); + + if (requeue) + enqueue_task(ptls->current_task); + + // run the next available task + run_next(); + + // yielding task (eventually) continues + jl_value_t *exc = ptls->current_task->exception; + if (exc != jl_nothing) { + ptls->current_task->exception = jl_nothing; + jl_throw(exc); + } + + jl_value_t *res = ptls->current_task->result; + ptls->current_task->result = jl_nothing; + return res; +} + + +/* jl_condition_new() -- create a new Condition + */ +JL_DLLEXPORT jl_condition_t *jl_condition_new(void) +{ + jl_condition_t *cond = (jl_condition_t *) + jl_new_struct_uninit(jl_condition_type); + cond->head = NULL; + JL_GC_PUSH1(&cond); + JL_MUTEX_INIT(&cond->lock); + JL_GC_POP(); + + return cond; +} + + +/* jl_task_wait() -- deschedules the task until the specified condition is + triggered + */ +JL_DLLEXPORT jl_value_t *jl_task_wait(jl_condition_t *c) +{ + jl_ptls_t ptls = jl_get_ptls_states(); + JL_LOCK(&c->lock); + if (!c->head) { + c->head = ptls->current_task; + jl_gc_wb(c, c->head); + } + else { + jl_task_t *pt = c->head; + while (pt->next) + pt = pt->next; + pt->next = ptls->current_task; + jl_gc_wb(pt, pt->next); + } + JL_UNLOCK(&c->lock); + jl_value_t *val = NULL; + JL_TRY { + val = jl_task_yield(0); + } + JL_CATCH { + taskq_delete(&c->head, ptls->current_task); + jl_rethrow(); + } + return val; +} + + +/* jl_task_notify() -- triggers the specified condition, causing all tasks + waiting on it to become schedulable + */ +JL_DLLEXPORT void jl_task_notify(jl_condition_t *c, jl_value_t *arg, int8_t all, int8_t err) +{ + JL_LOCK(&c->lock); + jl_task_t *qtask = c->head; + if (all) + c->head = NULL; + else { + if (c->head) { + c->head = c->head->next; + qtask->next = NULL; + } + } + JL_UNLOCK(&c->lock); + + jl_task_t *qnext; + while (qtask) { + qnext = qtask->next; + qtask->next = NULL; + if (err) { + qtask->exception = arg; + jl_gc_wb(qtask, qtask->exception); + } + else { + qtask->result = arg; + jl_gc_wb(qtask, qtask->result); + } + enqueue_task(qtask); + qtask = qnext; + } +} + + +JL_DLLEXPORT int jl_condition_isempty(jl_condition_t *c) +{ + return c->head ? 0 : 1; +} + + +void jl_gc_mark_enqueued_tasks(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp) +{ + for (int16_t i = 0; i < heap_p; ++i) + for (int16_t j = 0; j < heaps[i].ntasks; ++j) + jl_gc_mark_queue_obj_explicit(gc_cache, sp, (jl_value_t *)heaps[i].tasks[j]); + for (int16_t i = 0; i < jl_n_threads; ++i) { + jl_task_t *t = sticky_taskqs[i].head; + while (t) { + jl_gc_mark_queue_obj_explicit(gc_cache, sp, (jl_value_t *)t); + t = t->next; + } + } +} + +#endif // JULIA_ENABLE_PARTR +#endif // JULIA_ENABLE_THREADING + +#ifdef __cplusplus +} +#endif diff --git a/src/partr.h b/src/partr.h new file mode 100644 index 0000000000000..a8c65da362a82 --- /dev/null +++ b/src/partr.h @@ -0,0 +1,46 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + +/* partr -- parallel tasks runtime options + */ + +#ifndef PARTR_H +#define PARTR_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef JULIA_ENABLE_PARTR + +#include "julia.h" + + +/* multiq */ +#define MULTIQ_HEAP_C 4 + /* number of heaps = MULTIQ_HEAP_C * nthreads */ +#define MULTIQ_TASKS_PER_HEAP 129 + /* how many in each heap */ + +/* parfor */ +#define GRAIN_K 4 + /* tasks = niters / (GRAIN_K * nthreads) */ + +/* synchronization */ +#define ARRIVERS_P 2 + /* narrivers = ((GRAIN_K * nthreads) ^ ARRIVERS_P) + 1 + limit for number of recursive parfors */ +#define REDUCERS_FRAC 1 + /* nreducers = narrivers * REDUCERS_FRAC */ + + +#endif /* JULIA_ENABLE_PARTR */ + +#ifdef __cplusplus +} +#endif + +#endif /* PARTR_H */ + diff --git a/src/staticdata.c b/src/staticdata.c index 921abb4b770be..3bbc6af7441a9 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -1664,6 +1664,9 @@ static void jl_init_serializer2(int for_serialize) jl_globalref_type->name, jl_typeofbottom_type->name, jl_string_type->name, jl_abstractstring_type->name, jl_namedtuple_type, jl_namedtuple_typename, +#ifdef JULIA_ENABLE_PARTR + jl_condition_type, jl_condition_type->name, +#endif jl_int32_type, jl_int64_type, jl_bool_type, jl_uint8_type, jl_uint32_type, jl_uint64_type, diff --git a/src/task.c b/src/task.c index 3dce377a01a89..71d20805219d8 100644 --- a/src/task.c +++ b/src/task.c @@ -59,16 +59,22 @@ volatile int jl_in_stackwalk = 0; #define ROOT_TASK_STACK_ADJUSTMENT 3000000 -static jl_sym_t *done_sym; -static jl_sym_t *failed_sym; -static jl_sym_t *runnable_sym; +jl_sym_t *done_sym; +jl_sym_t *failed_sym; +jl_sym_t *runnable_sym; extern size_t jl_page_size; jl_datatype_t *jl_task_type; -static char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner); -static void jl_set_fiber(jl_ucontext_t *t); -static void jl_start_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t); -static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t); +#ifdef JULIA_ENABLE_PARTR +jl_datatype_t *jl_condition_type; + +void NOINLINE JL_NORETURN start_task(void); +#endif + +char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner); +void jl_set_fiber(jl_ucontext_t *t); +void jl_start_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t); +void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t); #ifdef JL_HAVE_UNW_CONTEXT static JL_THREAD unw_cursor_t jl_basecursor; @@ -84,7 +90,7 @@ static void memcpy_a16(uint64_t *to, uint64_t *from, size_t nb) // *(to++) = *(from++); } -static void NOINLINE save_stack(jl_ptls_t ptls, jl_task_t *lastt, jl_task_t **pt) +void NOINLINE save_stack(jl_ptls_t ptls, jl_task_t *lastt, jl_task_t **pt) { char *frame_addr = (char*)((uintptr_t)jl_get_frame_addr() & ~15); char *stackbase = (char*)ptls->stackbase; @@ -108,7 +114,7 @@ static void NOINLINE save_stack(jl_ptls_t ptls, jl_task_t *lastt, jl_task_t **pt jl_gc_wb_back(lastt); } -static void NOINLINE JL_NORETURN restore_stack(jl_ptls_t ptls, char *p) +void NOINLINE JL_NORETURN restore_stack(jl_ptls_t ptls, char *p) { jl_task_t *t = ptls->current_task; size_t nb = t->copy_stack; @@ -126,7 +132,8 @@ static void NOINLINE JL_NORETURN restore_stack(jl_ptls_t ptls, char *p) jl_set_fiber(&t->ctx); abort(); // unreachable } -static void restore_stack2(jl_ptls_t ptls, jl_task_t *lastt) + +void restore_stack2(jl_ptls_t ptls, jl_task_t *lastt) { jl_task_t *t = ptls->current_task; size_t nb = t->copy_stack; @@ -137,8 +144,9 @@ static void restore_stack2(jl_ptls_t ptls, jl_task_t *lastt) } #endif -static jl_function_t *task_done_hook_func = NULL; +jl_function_t *task_done_hook_func=NULL; +#ifndef JULIA_ENABLE_PARTR static void JL_NORETURN finish_task(jl_task_t *t, jl_value_t *resultval JL_MAYBE_UNROOTED) { jl_ptls_t ptls = jl_get_ptls_states(); @@ -180,6 +188,7 @@ static void JL_NORETURN finish_task(jl_task_t *t, jl_value_t *resultval JL_MAYBE gc_debug_critical_error(); abort(); } +#endif // JULIA_ENABLE_PARTR JL_DLLEXPORT void *jl_task_stack_buffer(jl_task_t *task, size_t *size, int *tid) { @@ -296,6 +305,11 @@ static void ctx_switch(jl_ptls_t ptls, jl_task_t **pt) ptls->world_age = t->world_age; t->gcstack = NULL; ptls->current_task = t; +#ifdef JULIA_ENABLE_PARTR + if (!lastt->copy_stack) + lastt->current_tid = -1; + t->current_tid = ptls->tid; +#endif jl_ucontext_t *lastt_ctx = (killed ? NULL : &lastt->ctx); #ifdef COPY_STACKS @@ -338,6 +352,8 @@ JL_DLLEXPORT void jl_switchto(jl_task_t **pt) jl_ptls_t ptls = jl_get_ptls_states(); jl_task_t *t = *pt; if (t == ptls->current_task) { + if (t->state != runnable_sym) + jl_error("trying to switch to done task from itself"); return; } if (t->state == done_sym || t->state == failed_sym || @@ -360,6 +376,8 @@ JL_DLLEXPORT void jl_switchto(jl_task_t **pt) jl_sigint_safepoint(ptls); } +jl_timing_block_t *jl_pop_timing_block(jl_timing_block_t *cur_block); + JL_DLLEXPORT JL_NORETURN void jl_no_exc_handler(jl_value_t *e) JL_NOTSAFEPOINT { jl_printf(JL_STDERR, "fatal: error thrown and no exception handler available.\n"); @@ -452,6 +470,7 @@ JL_DLLEXPORT void jl_rethrow_other(jl_value_t *e JL_MAYBE_UNROOTED) throw_internal(NULL); } +#ifndef JULIA_ENABLE_PARTR JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, size_t ssize) { jl_ptls_t ptls = jl_get_ptls_states(); @@ -475,9 +494,9 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, size_t ssize) if (t->stkbuf == NULL) jl_throw(jl_memory_exception); } - t->tls = jl_nothing; + t->storage = jl_nothing; t->state = runnable_sym; - t->start = start; + t->taskentry = start; t->result = jl_nothing; t->donenotify = jl_nothing; t->exception = jl_nothing; @@ -486,11 +505,10 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, size_t ssize) t->logstate = ptls->current_task->logstate; // there is no active exception handler available on this stack yet t->eh = NULL; - t->tid = 0; + t->current_tid = 0; t->gcstack = NULL; t->excstack = NULL; t->stkbuf = NULL; - t->tid = 0; t->started = 0; #ifdef ENABLE_TIMINGS t->timing_stack = NULL; @@ -507,8 +525,10 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, size_t ssize) if (t->copy_stack) memcpy(&t->ctx, &ptls->base_ctx, sizeof(t->ctx)); #endif + return t; } +#endif // !JULIA_ENABLE_PARTR JL_DLLEXPORT jl_value_t *jl_get_current_task(void) { @@ -519,6 +539,7 @@ JL_DLLEXPORT jl_value_t *jl_get_current_task(void) // Do one-time initializations for task system void jl_init_tasks(void) JL_GC_DISABLED { +#ifndef JULIA_ENABLE_PARTR jl_task_type = (jl_datatype_t*) jl_new_datatype(jl_symbol("Task"), NULL, @@ -543,12 +564,57 @@ void jl_init_tasks(void) JL_GC_DISABLED jl_any_type, jl_any_type), 0, 1, 7); +#else /* JULIA_ENABLE_PARTR */ + jl_task_type = (jl_datatype_t*) + jl_new_datatype(jl_symbol("Task"), NULL, jl_any_type, jl_emptysvec, + jl_perm_symsvec(14, + "storage", + "state", + "result", + "exception", + "backtrace", + "logstate", + "code", + "redentry", + "cq_head", + "cq_lock_owner", + "cq_lock_count", + "next", + "parent", + "redresult"), + jl_svec(14, + jl_any_type, + jl_sym_type, + jl_any_type, + jl_any_type, + jl_any_type, + jl_any_type, + jl_any_type, + jl_any_type, + jl_any_type, + jl_long_type, + jl_int32_type, + jl_any_type, + jl_any_type, + jl_any_type), + 0, 1, 6); + jl_svecset(jl_task_type->types, 8, (jl_value_t*)jl_task_type); + jl_svecset(jl_task_type->types, 11, (jl_value_t*)jl_task_type); + jl_svecset(jl_task_type->types, 12, (jl_value_t*)jl_task_type); + jl_condition_type = (jl_datatype_t*) + jl_new_datatype(jl_symbol("Condition"), NULL, jl_any_type, jl_emptysvec, + jl_perm_symsvec(3, "head", "lock_owner", "lock_count"), + jl_svec(3, jl_task_type, jl_long_type, jl_int32_type), + 0, 1, 0); +#endif /* JULIA_ENABLE_PARTR */ + done_sym = jl_symbol("done"); failed_sym = jl_symbol("failed"); runnable_sym = jl_symbol("runnable"); } -static void NOINLINE JL_NORETURN start_task(void) +#ifndef JULIA_ENABLE_PARTR +void NOINLINE JL_NORETURN start_task(void) { // this runs the first time we switch to a task jl_ptls_t ptls = jl_get_ptls_states(); @@ -558,7 +624,7 @@ static void NOINLINE JL_NORETURN start_task(void) if (t->exception != jl_nothing) { record_backtrace(ptls); jl_push_excstack(&t->excstack, t->exception, - ptls->bt_data, ptls->bt_size); + ptls->bt_data, ptls->bt_size); res = t->exception; } else { @@ -569,7 +635,7 @@ static void NOINLINE JL_NORETURN start_task(void) } JL_TIMING(ROOT); ptls->world_age = jl_world_counter; - res = jl_apply(&t->start, 1); + res = jl_apply(&t->taskentry, 1); } JL_CATCH { res = jl_current_exception(); @@ -583,7 +649,7 @@ skip_pop_exception:; gc_debug_critical_error(); abort(); } - +#endif /* JULIA_ENABLE_PARTR */ #if defined(JL_HAVE_UCONTEXT) #ifdef _OS_WINDOWS_ @@ -592,7 +658,7 @@ skip_pop_exception:; #define swapcontext jl_swapcontext #define makecontext jl_makecontext #endif -static char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) +char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) { #ifndef _OS_WINDOWS_ int r = getcontext(t); @@ -612,22 +678,22 @@ static char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) #endif return (char*)stk; } -static void jl_start_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t) +void jl_start_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t) { if (lastt) swapcontext(lastt, t); else setcontext(t); } -static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t) +void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t) { swapcontext(lastt, t); } -static void jl_set_fiber(jl_ucontext_t *t) +void jl_set_fiber(jl_ucontext_t *t) { setcontext(t); } -static void jl_init_basefiber(size_t ssize) +void jl_init_basefiber(size_t ssize) { jl_ptls_t ptls = jl_get_ptls_states(); char *stkbuf = jl_alloc_fiber(&ptls->base_ctx, &ssize, NULL); @@ -637,7 +703,7 @@ static void jl_init_basefiber(size_t ssize) #endif #if defined(JL_HAVE_UNW_CONTEXT) -static void start_basefiber(void) +void start_basefiber(void) { jl_ptls_t ptls = jl_get_ptls_states(); if (jl_setjmp(ptls->base_ctx.uc_mcontext, 0)) @@ -656,7 +722,7 @@ static void start_basefiber(void) #else #error please define how to simulate a CALL on this platform #endif -static char *jl_alloc_fiber(unw_context_t *t, size_t *ssize, jl_task_t *owner) +char *jl_alloc_fiber(unw_context_t *t, size_t *ssize, jl_task_t *owner) { char *stkbuf = (char*)jl_malloc_stack(ssize, owner); if (stkbuf == NULL) @@ -679,23 +745,23 @@ static char *jl_alloc_fiber(unw_context_t *t, size_t *ssize, jl_task_t *owner) } return stkbuf; } -static void jl_start_fiber(unw_context_t *lastt, unw_context_t *t) +void jl_start_fiber(unw_context_t *lastt, unw_context_t *t) { if (lastt && jl_setjmp(lastt->uc_mcontext, 0)) return; unw_resume(&jl_basecursor); // (doesn't return) } -static void jl_swap_fiber(unw_context_t *lastt, unw_context_t *t) +void jl_swap_fiber(unw_context_t *lastt, unw_context_t *t) { if (jl_setjmp(lastt->uc_mcontext, 0)) return; jl_longjmp(t->uc_mcontext, 1); // (doesn't return) } -static void jl_set_fiber(unw_context_t *t) +void jl_set_fiber(unw_context_t *t) { jl_longjmp(t->uc_mcontext, 1); } -static void jl_init_basefiber(size_t ssize) +void jl_init_basefiber(size_t ssize) { int r = unw_getcontext(&ptls->base_ctx); if (r != 0) @@ -714,7 +780,7 @@ static void jl_init_basefiber(size_t ssize) #endif #if defined(JL_HAVE_ASM) -static char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) +char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) { char *stkbuf = (char*)jl_malloc_stack(ssize, owner); if (stkbuf == NULL) @@ -723,7 +789,7 @@ static char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) ((size_t*)t)[1] = *ssize; // stash the stack size somewhere for start_fiber return stkbuf; } -static void jl_start_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t) +void jl_start_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t) { if (lastt && jl_setjmp(lastt->uc_mcontext, 0)) return; @@ -770,17 +836,17 @@ static void jl_start_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t) #endif __builtin_unreachable(); } -static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t) +void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t) { if (jl_setjmp(lastt->uc_mcontext, 0)) return; jl_longjmp(t->uc_mcontext, 1); // (doesn't return) } -static void jl_set_fiber(jl_ucontext_t *t) +void jl_set_fiber(jl_ucontext_t *t) { jl_longjmp(t->uc_mcontext, 1); } -static void jl_init_basefiber(size_t ssize) +void jl_init_basefiber(size_t ssize) { #ifdef COPY_STACKS jl_ptls_t ptls = jl_get_ptls_states(); @@ -792,13 +858,13 @@ static void jl_init_basefiber(size_t ssize) #endif #if defined(JL_HAVE_SIGALTSTACK) -static void start_basefiber(void) +void start_basefiber(void) { jl_ptls_t ptls = jl_get_ptls_states(); if (jl_setjmp(ptls->base_ctx.uc_mcontext, 0)) start_task(); } -static char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) +char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) { stack_t uc_stack, osigstk; struct sigaction sa, osa; @@ -852,23 +918,23 @@ static char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) memcpy(&ptls->base_ctx, &base_ctx, sizeof(ptls->base_ctx)); return (char*)stk; } -static void jl_start_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t) +void jl_start_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t) { if (lastt && jl_setjmp(lastt->uc_mcontext, 0)) return; jl_longjmp(t->uc_mcontext, 1); // (doesn't return) } -static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t) +void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t) { if (jl_setjmp(lastt->uc_mcontext, 0)) return; jl_longjmp(t->uc_mcontext, 1); // (doesn't return) } -static void jl_set_fiber(jl_ucontext_t *t) +void jl_set_fiber(jl_ucontext_t *t) { jl_longjmp(t->uc_mcontext, 1); } -static void jl_init_basefiber(size_t ssize) +void jl_init_basefiber(size_t ssize) { #ifdef COPY_STACKS jl_ptls_t ptls = jl_get_ptls_states(); @@ -898,18 +964,31 @@ void jl_init_root_task(void *stack_lo, void *stack_hi) ptls->current_task->stkbuf = stack; ptls->current_task->bufsz = ssize; ptls->current_task->started = 1; - ptls->current_task->tls = jl_nothing; +#ifdef JULIA_ENABLE_PARTR + ptls->current_task->redentry = NULL; + ptls->current_task->cq.head = NULL; + JL_MUTEX_INIT(&ptls->current_task->cq.lock); + ptls->current_task->next = NULL; + ptls->current_task->parent = ptls->current_task; + ptls->current_task->redresult = jl_nothing; + ptls->current_task->arr = NULL; + ptls->current_task->red = NULL; + ptls->current_task->sticky_tid = -1; + ptls->current_task->grain_num = -1; +#else + ptls->current_task->donenotify = jl_nothing; +#endif + ptls->current_task->current_tid = ptls->tid; + ptls->current_task->storage = jl_nothing; + ptls->current_task->taskentry = NULL; ptls->current_task->state = runnable_sym; - ptls->current_task->start = NULL; ptls->current_task->result = jl_nothing; - ptls->current_task->donenotify = jl_nothing; ptls->current_task->exception = jl_nothing; ptls->current_task->backtrace = jl_nothing; ptls->current_task->logstate = jl_nothing; ptls->current_task->eh = NULL; ptls->current_task->gcstack = NULL; ptls->current_task->excstack = NULL; - ptls->current_task->tid = ptls->tid; #ifdef JULIA_ENABLE_THREADING arraylist_new(&ptls->current_task->locks, 0); #endif diff --git a/src/threadgroup.c b/src/threadgroup.c deleted file mode 100644 index f2158423acc0e..0000000000000 --- a/src/threadgroup.c +++ /dev/null @@ -1,206 +0,0 @@ -// This file is a part of Julia. License is MIT: https://julialang.org/license - -/* - threading infrastructure - . threadgroup abstraction - . fork/join/barrier -*/ - -#include -#include - -#include "julia.h" -#include "julia_internal.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#include "options.h" -#include "threadgroup.h" - -int ti_threadgroup_create(uint8_t num_sockets, uint8_t num_cores, - uint8_t num_threads_per_core, - ti_threadgroup_t **newtg) -{ - int i; - ti_threadgroup_t *tg; - int num_threads = num_sockets * num_cores * num_threads_per_core; - char *cp; - - tg = (ti_threadgroup_t*)jl_malloc_aligned(sizeof(ti_threadgroup_t), 64); - tg->tid_map = (int16_t*)jl_malloc_aligned(num_threads * sizeof(int16_t), 64); - for (i = 0; i < num_threads; ++i) - tg->tid_map[i] = -1; - tg->num_sockets = num_sockets; - tg->num_cores = num_cores; - tg->num_threads_per_core = num_threads_per_core; - tg->num_threads = num_threads; - tg->added_threads = 0; - tg->thread_sense = (ti_thread_sense_t**) - jl_malloc_aligned(num_threads * sizeof(ti_thread_sense_t*), 64); - for (i = 0; i < num_threads; i++) - tg->thread_sense[i] = NULL; - jl_atomic_store_release(&tg->group_sense, 0); - - uv_mutex_init(&tg->alarm_lock); - uv_cond_init(&tg->alarm); - - tg->sleep_threshold = DEFAULT_THREAD_SLEEP_THRESHOLD; - cp = getenv(THREAD_SLEEP_THRESHOLD_NAME); - if (cp) { - if (!strncasecmp(cp, "infinite", 8)) - tg->sleep_threshold = 0; - else - tg->sleep_threshold = (uint64_t)strtol(cp, NULL, 10); - } - - *newtg = tg; - return 0; -} - -int ti_threadgroup_addthread(ti_threadgroup_t *tg, int16_t ext_tid, - int16_t *tgtid) -{ - if (ext_tid < 0 || ext_tid >= tg->num_threads) - return -1; - if (tg->tid_map[ext_tid] != -1) - return -2; - if (tg->added_threads == tg->num_threads) - return -3; - - tg->tid_map[ext_tid] = tg->added_threads++; - if (tgtid) *tgtid = tg->tid_map[ext_tid]; - - return 0; -} - -int ti_threadgroup_initthread(ti_threadgroup_t *tg, int16_t ext_tid) -{ - ti_thread_sense_t *ts; - - if (ext_tid < 0 || ext_tid >= tg->num_threads) - return -1; - if (tg->thread_sense[tg->tid_map[ext_tid]] != NULL) - return -2; - if (tg->num_threads == 0) - return -3; - - ts = (ti_thread_sense_t*)jl_malloc_aligned(sizeof(ti_thread_sense_t), 64); - ts->sense = 1; - tg->thread_sense[tg->tid_map[ext_tid]] = ts; - - return 0; -} - -int ti_threadgroup_member(ti_threadgroup_t *tg, int16_t ext_tid, int16_t *tgtid) -{ - if (ext_tid < 0 || ext_tid >= tg->num_threads) - return -1; - if (tg == NULL) { - if (tgtid) *tgtid = -1; - return -2; - } - if (tg->tid_map[ext_tid] == -1) { - if (tgtid) *tgtid = -1; - return -3; - } - if (tgtid) *tgtid = tg->tid_map[ext_tid]; - - return 0; -} - -int ti_threadgroup_size(ti_threadgroup_t *tg, int16_t *tgsize) -{ - *tgsize = tg->num_threads; - return 0; -} - -int ti_threadgroup_fork(ti_threadgroup_t *tg, int16_t ext_tid, void **bcast_val, int init) -{ - uint8_t *group_sense = &tg->group_sense; - int16_t tid = tg->tid_map[ext_tid]; - int thread_sense = tg->thread_sense[tid]->sense; - if (tid == 0) { - tg->envelope = bcast_val ? *bcast_val : NULL; - // synchronize `tg->envelope` and `tg->group_sense` - jl_atomic_store_release(group_sense, thread_sense); - - // if it's possible that threads are sleeping, signal them - if (tg->sleep_threshold) { - uv_mutex_lock(&tg->alarm_lock); - uv_cond_broadcast(&tg->alarm); - uv_mutex_unlock(&tg->alarm_lock); - } - } - else { - // spin up to threshold ns (count sheep), then sleep - uint64_t spin_ns; - uint64_t spin_start = 0; - // synchronize `tg->envelope` and `tg->group_sense` - while (jl_atomic_load_acquire(group_sense) != thread_sense) { - if (tg->sleep_threshold) { - if (!spin_start) { - // Lazily initialize spin_start since uv_hrtime is expensive - spin_start = uv_hrtime(); - continue; - } - spin_ns = uv_hrtime() - spin_start; - // In case uv_hrtime is not monotonic, we'll sleep earlier - if (init || spin_ns >= tg->sleep_threshold) { - uv_mutex_lock(&tg->alarm_lock); - if (jl_atomic_load_acquire(group_sense) != thread_sense) { - uv_cond_wait(&tg->alarm, &tg->alarm_lock); - } - uv_mutex_unlock(&tg->alarm_lock); - spin_start = 0; - init = 0; - continue; - } - } - jl_cpu_pause(); - } - if (bcast_val) - *bcast_val = tg->envelope; - } - - return 0; -} - -int ti_threadgroup_join(ti_threadgroup_t *tg, int16_t ext_tid) -{ - int *p_thread_sense = &tg->thread_sense[tg->tid_map[ext_tid]]->sense; - jl_atomic_store_release(p_thread_sense, !*p_thread_sense); - if (tg->tid_map[ext_tid] == 0) { - jl_ptls_t ptls = jl_get_ptls_states(); - int8_t group_sense = tg->group_sense; - for (int i = 1; i < tg->num_threads; ++i) { - while (jl_atomic_load_acquire(&tg->thread_sense[i]->sense) == group_sense) { - jl_gc_safepoint_(ptls); - jl_cpu_pause(); - } - } - } - - return 0; -} - -int ti_threadgroup_destroy(ti_threadgroup_t *tg) -{ - int i; - - uv_mutex_destroy(&tg->alarm_lock); - uv_cond_destroy(&tg->alarm); - - for (i = 0; i < tg->num_threads; i++) - jl_free_aligned(tg->thread_sense[i]); - jl_free_aligned(tg->thread_sense); - jl_free_aligned(tg->tid_map); - jl_free_aligned(tg); - - return 0; -} - -#ifdef __cplusplus -} -#endif diff --git a/src/threadgroup.h b/src/threadgroup.h deleted file mode 100644 index 82fc59785cd05..0000000000000 --- a/src/threadgroup.h +++ /dev/null @@ -1,44 +0,0 @@ -// This file is a part of Julia. License is MIT: https://julialang.org/license - -#ifndef JL_THREADGROUP_H -#define JL_THREADGROUP_H - -#include -#include "uv.h" - -// for the barrier -typedef struct { - int sense; -} ti_thread_sense_t; - -// thread group -typedef struct { - int16_t *tid_map, num_threads, added_threads; - uint8_t num_sockets, num_cores, num_threads_per_core; - - // fork/join/barrier - uint8_t group_sense; // Written only by master thread - ti_thread_sense_t **thread_sense; - void *envelope; - - // to let threads sleep - uv_mutex_t alarm_lock; - uv_cond_t alarm; - uint64_t sleep_threshold; -} ti_threadgroup_t; - -int ti_threadgroup_create(uint8_t num_sockets, uint8_t num_cores, - uint8_t num_threads_per_core, - ti_threadgroup_t **newtg); -int ti_threadgroup_addthread(ti_threadgroup_t *tg, int16_t ext_tid, - int16_t *tgtid); -int ti_threadgroup_initthread(ti_threadgroup_t *tg, int16_t ext_tid); -int ti_threadgroup_member(ti_threadgroup_t *tg, int16_t ext_tid, - int16_t *tgtid); -int ti_threadgroup_size(ti_threadgroup_t *tg, int16_t *tgsize); -int ti_threadgroup_fork(ti_threadgroup_t *tg, int16_t ext_tid, - void **bcast_val, int init); -int ti_threadgroup_join(ti_threadgroup_t *tg, int16_t ext_tid); -int ti_threadgroup_destroy(ti_threadgroup_t *tg); - -#endif /* THREADGROUP_H */ diff --git a/src/threading.c b/src/threading.c index 92c0eac214bc4..e13ee570ad312 100644 --- a/src/threading.c +++ b/src/threading.c @@ -1,18 +1,5 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license -/* - threading infrastructure - . thread and threadgroup creation - . thread function - . invoke Julia function from multiple threads - -TODO: - . fix interface to properly support thread groups - . add queue per thread for tasks - . add reduction; reduce values returned from thread function - . make code generation thread-safe and remove the lock -*/ - #include #include #include @@ -47,7 +34,6 @@ extern "C" { #endif -#include "threadgroup.h" #include "threading.h" // The tls_states buffer: @@ -240,9 +226,9 @@ JL_DLLEXPORT JL_CONST_FUNC jl_ptls_t (jl_get_ptls_states)(void) } #endif -// thread ID -JL_DLLEXPORT int jl_n_threads; // # threads we're actually using +JL_DLLEXPORT int jl_n_threads; jl_ptls_t *jl_all_tls_states; +uint64_t jl_thread_sleep_threshold; // return calling thread's ID // Also update the suspended_threads list in signals-mach when changing the @@ -253,10 +239,19 @@ JL_DLLEXPORT int16_t jl_threadid(void) return ptls->tid; } -static void ti_initthread(int16_t tid) +void jl_init_threadtls(int16_t tid) { jl_ptls_t ptls = jl_get_ptls_states(); -#ifndef _OS_WINDOWS_ +#ifdef _OS_WINDOWS_ + if (tid == 0) { + if (!DuplicateHandle(GetCurrentProcess(), GetCurrentThread(), + GetCurrentProcess(), &hMainThread, 0, + FALSE, DUPLICATE_SAME_ACCESS)) { + jl_printf(JL_STDERR, "WARNING: failed to access handle to main thread\n"); + hMainThread = INVALID_HANDLE_VALUE; + } + } +#else ptls->system_id = pthread_self(); #endif assert(ptls->world_age == 0); @@ -293,24 +288,12 @@ static void ti_initthread(int16_t tid) jl_all_tls_states[tid] = ptls; } -static void ti_init_master_thread(void) -{ -#ifdef _OS_WINDOWS_ - if (!DuplicateHandle(GetCurrentProcess(), GetCurrentThread(), - GetCurrentProcess(), &hMainThread, 0, - FALSE, DUPLICATE_SAME_ACCESS)) { - jl_printf(JL_STDERR, "WARNING: failed to access handle to main thread\n"); - hMainThread = INVALID_HANDLE_VALUE; - } -#endif - ti_initthread(0); -} - // all threads call this function to run user code -static jl_value_t *ti_run_fun(jl_callptr_t fptr, jl_method_instance_t *mfunc, +jl_value_t *jl_thread_run_fun(jl_callptr_t fptr, jl_method_instance_t *mfunc, jl_value_t **args, uint32_t nargs) { jl_ptls_t ptls = jl_get_ptls_states(); + jl_value_t *res = jl_nothing; JL_TRY { fptr(mfunc, args, nargs); } @@ -323,129 +306,22 @@ static jl_value_t *ti_run_fun(jl_callptr_t fptr, jl_method_instance_t *mfunc, if (!jl_setjmp(buf, 0)) { // Set up the safe_restore context so that the printing uses the thread safe version ptls->safe_restore = &buf; - jl_printf(JL_STDERR, "\nError thrown in threaded loop on thread %d: ", + jl_printf(JL_STDERR, "\nError thrown in thread %d: ", (int)ptls->tid); jl_static_show(JL_STDERR, jl_current_exception()); } ptls->safe_restore = old_buf; JL_UNLOCK_NOGC(&lock); } - return jl_nothing; + return res; } - // lock for code generation jl_mutex_t codegen_lock; jl_mutex_t typecache_lock; #ifdef JULIA_ENABLE_THREADING -// only one thread group for now -static ti_threadgroup_t *tgworld; - -// for broadcasting work to threads -static ti_threadwork_t threadwork; - -#if PROFILE_JL_THREADING -uint64_t prep_ns; -uint64_t *fork_ns; -uint64_t *user_ns; -uint64_t *join_ns; -#endif - -static uv_barrier_t thread_init_done; - -// thread function: used by all except the main thread -void ti_threadfun(void *arg) -{ - jl_ptls_t ptls = jl_get_ptls_states(); - ti_threadarg_t *ta = (ti_threadarg_t *)arg; - ti_threadgroup_t *tg; - ti_threadwork_t *work; - - // initialize this thread (set tid, create heap, etc.) - ti_initthread(ta->tid); - void *stack_lo, *stack_hi; - jl_init_stack_limits(0, &stack_lo, &stack_hi); - - // set up tasking - jl_init_root_task(stack_lo, stack_hi); - - // set the thread-local tid and wait for a thread group - while (jl_atomic_load_acquire(&ta->state) == TI_THREAD_INIT) - jl_cpu_pause(); - - // Assuming the functions called below doesn't contain unprotected GC - // critical region. In general, the following part of this function - // shouldn't call any managed code without calling `jl_gc_unsafe_enter` - // first. - jl_gc_state_set(ptls, JL_GC_STATE_SAFE, 0); - uv_barrier_wait(&thread_init_done); - // initialize this thread in the thread group - tg = ta->tg; - ti_threadgroup_initthread(tg, ptls->tid); - - // free the thread argument here - free(ta); - - int init = 1; - - // work loop - for (; ;) { -#if PROFILE_JL_THREADING - uint64_t tstart = uv_hrtime(); -#endif - - ti_threadgroup_fork(tg, ptls->tid, (void **)&work, init); - init = 0; - JL_GC_PROMISE_ROOTED(work); - -#if PROFILE_JL_THREADING - uint64_t tfork = uv_hrtime(); - fork_ns[ptls->tid] += tfork - tstart; -#endif - - if (work) { - if (work->command == TI_THREADWORK_DONE) { - break; - } - else if (work->command == TI_THREADWORK_RUN) { - // TODO: return value? reduction? - // TODO: before we support getting return value from - // the work, and after we have proper GC transition - // support in the codegen and runtime we don't need to - // enter GC unsafe region when starting the work. - int8_t gc_state = jl_gc_unsafe_enter(ptls); - // This is probably always NULL for now - size_t last_age = ptls->world_age; - ptls->world_age = work->world_age; - ti_run_fun(work->fptr, work->mfunc, work->args, work->nargs); - ptls->world_age = last_age; - jl_gc_unsafe_leave(ptls, gc_state); - } - } - -#if PROFILE_JL_THREADING - uint64_t tuser = uv_hrtime(); - user_ns[ptls->tid] += tuser - tfork; -#endif - - ti_threadgroup_join(tg, ptls->tid); - -#if PROFILE_JL_THREADING - uint64_t tjoin = uv_hrtime(); - join_ns[ptls->tid] += tjoin - tuser; -#endif - - // TODO: - // nowait should skip the join, but confirm that fork is reentrant - } -} - -#if PROFILE_JL_THREADING -void ti_reset_timings(void); -#endif - ssize_t jl_tls_offset = -1; #ifdef JL_ELF_TLS_VARIANT @@ -556,36 +432,38 @@ void jl_init_threading(void) int max_threads = jl_cpu_threads(); jl_n_threads = JULIA_NUM_THREADS; cp = getenv(NUM_THREADS_NAME); - if (cp) { + if (cp) jl_n_threads = (uint64_t)strtol(cp, NULL, 10); - } if (jl_n_threads > max_threads) jl_n_threads = max_threads; if (jl_n_threads <= 0) jl_n_threads = 1; - jl_all_tls_states = (jl_ptls_t*)malloc(jl_n_threads * sizeof(void*)); + // thread sleep threshold + jl_thread_sleep_threshold = DEFAULT_THREAD_SLEEP_THRESHOLD; + cp = getenv(THREAD_SLEEP_THRESHOLD_NAME); + if (cp) { + if (!strncasecmp(cp, "infinite", 8)) + jl_thread_sleep_threshold = 0; + else + jl_thread_sleep_threshold = (uint64_t)strtol(cp, NULL, 10); + } -#if PROFILE_JL_THREADING - // set up space for profiling information - fork_ns = (uint64_t*)jl_malloc_aligned(jl_n_threads * sizeof(uint64_t), 64); - user_ns = (uint64_t*)jl_malloc_aligned(jl_n_threads * sizeof(uint64_t), 64); - join_ns = (uint64_t*)jl_malloc_aligned(jl_n_threads * sizeof(uint64_t), 64); - ti_reset_timings(); -#endif + jl_all_tls_states = (jl_ptls_t*)malloc(jl_n_threads * sizeof(void*)); - // initialize this master thread (set tid, create heap, etc.) - ti_init_master_thread(); + // initialize this thread (set tid, create heap, etc.) + jl_init_threadtls(0); } +static uv_barrier_t thread_init_done; + void jl_start_threads(void) { - jl_ptls_t ptls = jl_get_ptls_states(); int cpumasksize = uv_cpumask_size(); char *cp; int i, exclusive; uv_thread_t uvtid; - ti_threadarg_t **targs; + jl_threadarg_t **targs; if (cpumasksize < jl_n_threads) // also handles error case cpumasksize = jl_n_threads; char *mask = (char*)alloca(cpumasksize); @@ -607,19 +485,23 @@ void jl_start_threads(void) mask[0] = 0; } + // initialize threading infrastructure + jl_init_threadinginfra(); + // The analyzer doesn't know jl_n_threads doesn't change, help it size_t nthreads = jl_n_threads; // create threads - targs = (ti_threadarg_t **)malloc((nthreads - 1) * sizeof (ti_threadarg_t *)); + targs = (jl_threadarg_t **)malloc((nthreads - 1) * sizeof (jl_threadarg_t *)); uv_barrier_init(&thread_init_done, nthreads); for (i = 0; i < nthreads - 1; ++i) { - targs[i] = (ti_threadarg_t *)malloc(sizeof (ti_threadarg_t)); - targs[i]->state = TI_THREAD_INIT; + targs[i] = (jl_threadarg_t *)malloc(sizeof (jl_threadarg_t)); targs[i]->tid = i + 1; - uv_thread_create(&uvtid, ti_threadfun, targs[i]); + targs[i]->barrier = &thread_init_done; + jl_init_threadarg(targs[i]); + uv_thread_create(&uvtid, jl_threadfun, targs[i]); if (exclusive) { mask[i + 1] = 1; uv_thread_setaffinity(&uvtid, mask, NULL, cpumasksize); @@ -628,17 +510,7 @@ void jl_start_threads(void) uv_thread_detach(&uvtid); } - // set up the world thread group - ti_threadgroup_create(1, nthreads, 1, &tgworld); - for (i = 0; i < nthreads; ++i) - ti_threadgroup_addthread(tgworld, i, NULL); - ti_threadgroup_initthread(tgworld, ptls->tid); - - // give the threads the world thread group; they will block waiting for fork - for (i = 0; i < nthreads - 1; ++i) { - targs[i]->tg = tgworld; - jl_atomic_store_release(&targs[i]->state, TI_THREAD_WORK); - } + jl_init_started_threads(targs); uv_barrier_wait(&thread_init_done); @@ -646,155 +518,17 @@ void jl_start_threads(void) free(targs); } -// TODO: is this needed? where/when/how to call it? -void jl_shutdown_threading(void) -{ - jl_ptls_t ptls = jl_get_ptls_states(); - // stop the spinning threads by sending them a command - ti_threadwork_t *work = &threadwork; - - work->command = TI_THREADWORK_DONE; - ti_threadgroup_fork(tgworld, ptls->tid, (void **)&work, 0); - - sleep(1); - - // destroy the world thread group - ti_threadgroup_destroy(tgworld); - -#if PROFILE_JL_THREADING - jl_free_aligned(join_ns); - jl_free_aligned(user_ns); - jl_free_aligned(fork_ns); - fork_ns = user_ns = join_ns = NULL; -#endif -} - -// interface to user code: specialize and compile the user thread function -// and run it in all threads -JL_DLLEXPORT jl_value_t *jl_threading_run(jl_value_t *_args) -{ - jl_ptls_t ptls = jl_get_ptls_states(); - // GC safe -#if PROFILE_JL_THREADING - uint64_t tstart = uv_hrtime(); -#endif - uint32_t nargs; - jl_value_t **args; - if (!jl_is_svec(_args)) { - nargs = 1; - args = &_args; - } - else { - nargs = jl_svec_len(_args); - args = jl_svec_data(_args); - } - - int8_t gc_state = jl_gc_unsafe_enter(ptls); - - size_t world = jl_get_ptls_states()->world_age; - threadwork.command = TI_THREADWORK_RUN; - threadwork.mfunc = jl_lookup_generic(args, nargs, - jl_int32hash_fast(jl_return_address()), world); - // Ignore constant return value for now. - threadwork.fptr = jl_compile_method_internal(&threadwork.mfunc, world); - if (threadwork.fptr == jl_fptr_const_return) - return jl_nothing; - threadwork.args = args; - threadwork.nargs = nargs; - threadwork.ret = jl_nothing; - threadwork.world_age = world; - -#if PROFILE_JL_THREADING - uint64_t tcompile = uv_hrtime(); - prep_ns += (tcompile - tstart); -#endif - - // fork the world thread group - ti_threadwork_t *tw = &threadwork; - ti_threadgroup_fork(tgworld, ptls->tid, (void **)&tw, 0); - -#if PROFILE_JL_THREADING - uint64_t tfork = uv_hrtime(); - fork_ns[ptls->tid] += (tfork - tcompile); -#endif - - // this thread must do work too (TODO: reduction?) - JL_GC_PROMISE_ROOTED(threadwork.mfunc); - tw->ret = ti_run_fun(threadwork.fptr, threadwork.mfunc, args, nargs); - -#if PROFILE_JL_THREADING - uint64_t trun = uv_hrtime(); - user_ns[ptls->tid] += (trun - tfork); -#endif - - // wait for completion (TODO: nowait?) - ti_threadgroup_join(tgworld, ptls->tid); - -#if PROFILE_JL_THREADING - uint64_t tjoin = uv_hrtime(); - join_ns[ptls->tid] += (tjoin - trun); -#endif - - jl_gc_unsafe_leave(ptls, gc_state); - - return tw->ret; -} - -#if PROFILE_JL_THREADING - -void ti_reset_timings(void) -{ - int i; - prep_ns = 0; - for (i = 0; i < jl_n_threads; i++) - fork_ns[i] = user_ns[i] = join_ns[i] = 0; -} - -void ti_timings(uint64_t *times, uint64_t *min, uint64_t *max, uint64_t *avg) -{ - int i; - *min = UINT64_MAX; - *max = *avg = 0; - for (i = 0; i < jl_n_threads; i++) { - if (times[i] < *min) - *min = times[i]; - if (times[i] > *max) - *max = times[i]; - *avg += times[i]; - } - *avg /= jl_n_threads; -} - -#define NS_TO_SECS(t) ((t) / (double)1e9) - -JL_DLLEXPORT void jl_threading_profile(void) -{ - if (!fork_ns) return; - - printf("\nti profile:\n"); - printf("prep: %g (%" PRIu64 ")\n", NS_TO_SECS(prep_ns), prep_ns); - - uint64_t min, max, avg; - ti_timings(fork_ns, &min, &max, &avg); - printf("fork: %g (%g - %g)\n", NS_TO_SECS(min), NS_TO_SECS(max), - NS_TO_SECS(avg)); - ti_timings(user_ns, &min, &max, &avg); - printf("user: %g (%g - %g)\n", NS_TO_SECS(min), NS_TO_SECS(max), - NS_TO_SECS(avg)); - ti_timings(join_ns, &min, &max, &avg); - printf("join: %g (%g - %g)\n", NS_TO_SECS(min), NS_TO_SECS(max), - NS_TO_SECS(avg)); -} - -#else //!PROFILE_JL_THREADING +#else // !JULIA_ENABLE_THREADING -JL_DLLEXPORT void jl_threading_profile(void) +void jl_init_threading(void) { + static jl_ptls_t _jl_all_tls_states; + jl_all_tls_states = &_jl_all_tls_states; + jl_n_threads = 1; + jl_init_threadtls(0); } -#endif //!PROFILE_JL_THREADING - -#else // !JULIA_ENABLE_THREADING +void jl_start_threads(void) { } JL_DLLEXPORT jl_value_t *jl_threading_run(jl_value_t *_args) { @@ -815,19 +549,9 @@ JL_DLLEXPORT jl_value_t *jl_threading_run(jl_value_t *_args) jl_callptr_t fptr = jl_compile_method_internal(&mfunc, world); if (fptr == jl_fptr_const_return) return jl_nothing; - return ti_run_fun(fptr, mfunc, args, nargs); -} - -void jl_init_threading(void) -{ - static jl_ptls_t _jl_all_tls_states; - jl_all_tls_states = &_jl_all_tls_states; - jl_n_threads = 1; - ti_init_master_thread(); + return jl_thread_run_fun(fptr, mfunc, args, nargs); } -void jl_start_threads(void) { } - #endif // !JULIA_ENABLE_THREADING // Make gc alignment available for threading diff --git a/src/threading.h b/src/threading.h index 8c812ca3c2676..a2b4501a56272 100644 --- a/src/threading.h +++ b/src/threading.h @@ -8,50 +8,40 @@ extern "C" { #endif -#include "threadgroup.h" #include "julia.h" #define PROFILE_JL_THREADING 0 -// thread ID -extern jl_ptls_t *jl_all_tls_states; -extern JL_DLLEXPORT int jl_n_threads; // # threads we're actually using - -// thread state -enum { - TI_THREAD_INIT, - TI_THREAD_WORK -}; - -// passed to thread function -typedef struct { - int16_t volatile state; - int16_t tid; - ti_threadgroup_t *tg; -} ti_threadarg_t; - -// commands to thread function -enum { - TI_THREADWORK_DONE, - TI_THREADWORK_RUN -}; - -// work command to thread function -typedef struct { - uint8_t command; - jl_method_instance_t *mfunc; - jl_callptr_t fptr; - jl_value_t **args; - uint32_t nargs; - jl_value_t *ret; - size_t world_age; -} ti_threadwork_t; - -// thread function -void ti_threadfun(void *arg); - -// helpers for thread function -jl_value_t *ti_runthread(jl_function_t *f, jl_svec_t *args, size_t nargs); +extern jl_ptls_t *jl_all_tls_states; /* thread local storage */ +extern JL_DLLEXPORT int jl_n_threads; /* # threads we're actually using */ + +typedef struct _jl_threadarg_t { + int16_t tid; + uv_barrier_t *barrier; + void *arg; +} jl_threadarg_t; + +// each thread must initialize its TLS +void jl_init_threadtls(int16_t tid); + +// generic helper for a thread to run a function +jl_value_t *jl_thread_run_fun(jl_callptr_t fptr, jl_method_instance_t *mfunc, + jl_value_t **args, uint32_t nargs); + +// provided by a threading infrastructure +void jl_init_threadinginfra(void); +void jl_init_threadarg(jl_threadarg_t *targ); +void jl_init_started_threads(jl_threadarg_t **targs); +void jl_threadfun(void *arg); + +// interfaces defined by threading infrastructures +#ifdef JULIA_ENABLE_FORKJOIN_TI +#include "forkjoin-ti.h" +#else +#ifdef JULIA_ENABLE_PARTR +#include "partr.h" +#endif +#endif #ifdef __cplusplus } diff --git a/stdlib/Distributed/test/distributed_exec.jl b/stdlib/Distributed/test/distributed_exec.jl index b82d66e685af9..ad6b6052905b5 100644 --- a/stdlib/Distributed/test/distributed_exec.jl +++ b/stdlib/Distributed/test/distributed_exec.jl @@ -756,11 +756,9 @@ function f13168(n) val end let t = schedule(@task f13168(100)) - @test t.state == :queued + wait(t) @test_throws ErrorException schedule(t) - yield() @test t.state == :done - @test_throws ErrorException schedule(t) @test isa(fetch(t),Float64) end diff --git a/stdlib/FileWatching/src/FileWatching.jl b/stdlib/FileWatching/src/FileWatching.jl index 484540ba1c547..86aec8c997c84 100644 --- a/stdlib/FileWatching/src/FileWatching.jl +++ b/stdlib/FileWatching/src/FileWatching.jl @@ -338,7 +338,7 @@ function uv_pollcb(handle::Ptr{Cvoid}, status::Int32, events::Int32) else t.events |= events if t.active[1] || t.active[2] - if isempty(t.notify.waitq) + if isempty(t.notify) # if we keep hearing about events when nobody appears to be listening, # stop the poll to save cycles t.active = (false, false) @@ -400,7 +400,7 @@ function start_watching(t::PollingFileWatcher) end function stop_watching(t::PollingFileWatcher) - if t.active && isempty(t.notify.waitq) + if t.active && isempty(t.notify) t.active = false uv_error("PollingFileWatcher (stop)", ccall(:uv_fs_poll_stop, Int32, (Ptr{Cvoid},), t.handle)) @@ -420,7 +420,7 @@ function start_watching(t::FileMonitor) end function stop_watching(t::FileMonitor) - if t.active && isempty(t.notify.waitq) + if t.active && isempty(t.notify) t.active = false uv_error("FileMonitor (stop)", ccall(:uv_fs_event_stop, Int32, (Ptr{Cvoid},), t.handle)) diff --git a/stdlib/FileWatching/test/runtests.jl b/stdlib/FileWatching/test/runtests.jl index 80af4d8e4b2fc..2c0a7fcbbf2dc 100644 --- a/stdlib/FileWatching/test/runtests.jl +++ b/stdlib/FileWatching/test/runtests.jl @@ -31,7 +31,7 @@ function pfd_tst_reads(idx, intvl) global ready += 1 wait(ready_c) t_elapsed = @elapsed begin - start_evt2 = Condition() + start_evt2 = Threads.Event() evt2 = @async (notify(start_evt2); poll_fd(pipe_fds[idx][1], intvl; readable=true, writable=false)) wait(start_evt2); yield() # make sure the async poll_fd is pumping events evt = poll_fd(pipe_fds[idx][1], intvl; readable=true, writable=false) @@ -59,7 +59,7 @@ function pfd_tst_timeout(idx, intvl) global ready += 1 wait(ready_c) t_elapsed = @elapsed begin - start_evt2 = Condition() + start_evt2 = Threads.Event() evt2 = @async (notify(start_evt2); poll_fd(pipe_fds[idx][1], intvl; readable=true, writable=false)) wait(start_evt2); yield() # make sure the async poll_fd is pumping events evt = poll_fd(pipe_fds[idx][1], intvl; readable=true, writable=false) @@ -384,7 +384,7 @@ mv(file * "~", file) let changes = [] while true let c - timeout = Sys.iswindows() ? 0.1 : 0.0 + timeout = 0.1 @test @elapsed(c = watch_folder(dir, timeout)) < 0.5 push!(changes, c) (c.second::FileWatching.FileEvent).timedout && break diff --git a/stdlib/Sockets/src/addrinfo.jl b/stdlib/Sockets/src/addrinfo.jl index 59e4f2dae1088..c484d41671a01 100644 --- a/stdlib/Sockets/src/addrinfo.jl +++ b/stdlib/Sockets/src/addrinfo.jl @@ -16,7 +16,7 @@ function uv_getaddrinfocb(req::Ptr{Cvoid}, status::Cint, addrinfo::Ptr{Cvoid}) t = unsafe_pointer_to_objref(data)::Task uv_req_set_data(req, C_NULL) if status != 0 || addrinfo == C_NULL - schedule(t, _UVError("getaddrinfocb", status)) + Base.schedule(t, _UVError("getaddrinfocb", status)) else freeaddrinfo = addrinfo addrs = IPAddr[] @@ -33,7 +33,7 @@ function uv_getaddrinfocb(req::Ptr{Cvoid}, status::Cint, addrinfo::Ptr{Cvoid}) addrinfo = ccall(:jl_next_from_addrinfo, Ptr{Cvoid}, (Ptr{Cvoid},), addrinfo) end ccall(:uv_freeaddrinfo, Cvoid, (Ptr{Cvoid},), freeaddrinfo) - schedule(t, addrs) + Base.schedule(t, addrs) end else # no owner for this req, safe to just free it @@ -129,9 +129,9 @@ function uv_getnameinfocb(req::Ptr{Cvoid}, status::Cint, hostname::Cstring, serv t = unsafe_pointer_to_objref(data)::Task uv_req_set_data(req, C_NULL) if status != 0 - schedule(t, _UVError("getnameinfocb", status)) + Base.schedule(t, _UVError("getnameinfocb", status)) else - schedule(t, unsafe_string(hostname)) + Base.schedule(t, unsafe_string(hostname)) end else # no owner for this req, safe to just free it diff --git a/stdlib/Sockets/test/runtests.jl b/stdlib/Sockets/test/runtests.jl index 6065debb2b8b7..0976d1e05c446 100644 --- a/stdlib/Sockets/test/runtests.jl +++ b/stdlib/Sockets/test/runtests.jl @@ -139,7 +139,7 @@ defaultport = rand(2000:4000) mktempdir() do tmpdir socketname = Sys.iswindows() ? ("\\\\.\\pipe\\uv-test-" * randstring(6)) : joinpath(tmpdir, "socket") - c = Condition() + c = Threads.Event() tsk = @async begin s = listen(socketname) notify(c) @@ -415,7 +415,7 @@ end let addr = Sockets.InetAddr(ip"127.0.0.1", 4444) srv = listen(addr) - r = @async close(srv) + r = @async (sleep(1); close(srv)) @test_throws Base._UVError("accept", Base.UV_ECONNABORTED) accept(srv) fetch(r) end @@ -424,7 +424,7 @@ end srv = listen(addr) s = Sockets.TCPSocket() Sockets.connect!(s, addr) - r = @async close(s) + r = @async (sleep(1); close(s)) @test_throws Base._UVError("connect", Base.UV_ECANCELED) Sockets.wait_connected(s) fetch(r) end diff --git a/test/channels.jl b/test/channels.jl index a2dcf2c4ea2cf..c2ff40cb80b7e 100644 --- a/test/channels.jl +++ b/test/channels.jl @@ -1,6 +1,6 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license -using Random +using Random, Test @testset "various constructors" begin c = Channel(1) @@ -63,10 +63,11 @@ end push!(results, ii) end end - sleep(1.0) + sleep(0.2) for i in 1:5 put!(c,i) end + sleep(0.2) close(c) end @test sum(results) == 15 @@ -135,7 +136,7 @@ using Distributed # channeled_tasks for T in [Any, Int] - chnls, tasks = Base.channeled_tasks(2, (c1,c2)->(@assert take!(c1)==1; put!(c2,2)); ctypes=[T,T], csizes=[N,N]) + chnls, tasks = Base.channeled_tasks(2, (c1,c2)->(@assert take!(c1)==1; put!(c2,2); sleep(0.2)); ctypes=[T,T], csizes=[N,N]) put!(chnls[1], 1) @test take!(chnls[2]) == 2 @test_throws InvalidStateException wait(chnls[1]) @@ -248,6 +249,7 @@ end error in running finalizer: ErrorException("task switch not allowed from inside gc finalizer") error in running finalizer: ErrorException("task switch not allowed from inside gc finalizer") """ +#= TODO: there's no Workqueue any more # test for invalid state in Workqueue during yield t = @async nothing t.state = :invalid @@ -260,24 +262,16 @@ end close(newstderr[2]) end @test fetch(errstream) == "\nWARNING: Workqueue inconsistency detected: popfirst!(Workqueue).state != :queued\n" -end - -@testset "schedule_and_wait" begin - t = @async(nothing) - ct = current_task() - testobject = "testobject" - # note: there is a low probability this test could fail, due to receiving network traffic simultaneously - @test length(Base.Workqueue) == 1 - @test Base.schedule_and_wait(ct, 8) == 8 - @test isempty(Base.Workqueue) - @test Base.schedule_and_wait(ct, testobject) === testobject +=# end @testset "throwto" begin t = @task(nothing) ct = current_task() testerr = ErrorException("expected") - @async Base.throwto(t, testerr) + # TODO: throwto() is unimplemented + #@async Base.throwto(t, testerr) + @async schedule(t, testerr, error=true) @test try Base.wait(t) false @@ -286,26 +280,26 @@ end end === testerr end +#= TODO: these tests depend on task execution ordering and that makes no +# sense with threads! +=# @testset "Timer / AsyncCondition triggering and race #12719" begin tc = Ref(0) t = Timer(0) do t tc[] += 1 end - @test isopen(t) Base.process_events(false) - @test !isopen(t) - @test tc[] == 0 yield() + @test !isopen(t) @test tc[] == 1 tc = Ref(0) - t = Timer(0) do t + t = Timer(10) do t tc[] += 1 end @test isopen(t) close(t) @test !isopen(t) - sleep(0.1) @test tc[] == 0 tc = Ref(0) @@ -320,8 +314,10 @@ end @test tc[] == 0 yield() # consume event @test tc[] == 1 - sleep(0.1) # no further events - @test tc[] == 1 + # NOTE: this depended on the scheduler not calling process_events when there + # are tasks to run. Now, this is probabilistic. + #sleep(0.1) # no further events + #@test tc[] == 1 ccall(:uv_async_send, Cvoid, (Ptr{Cvoid},), async) ccall(:uv_async_send, Cvoid, (Ptr{Cvoid},), async) close(async) diff --git a/test/file.jl b/test/file.jl index 4bdb0a2ffb2ee..579dce536462d 100644 --- a/test/file.jl +++ b/test/file.jl @@ -974,7 +974,7 @@ cd(dirwalk) do @test files == ["file1", "file2"] rm(joinpath("sub_dir1"), recursive=true) - @test_throws SystemError take!(chnl_error) # throws an error because sub_dir1 do not exist + @test_throws SystemError collect(chnl_error) # throws an error because sub_dir1 do not exist root, dirs, files = take!(chnl_noerror) @test root == "." diff --git a/test/misc.jl b/test/misc.jl index defcba93c082a..cb4ca7a402af7 100644 --- a/test/misc.jl +++ b/test/misc.jl @@ -126,6 +126,7 @@ let c = Ref(0), yield() @test c[] == 1 yield(t2) + wait(t2) @test c[] == 100 end diff --git a/test/read.jl b/test/read.jl index 944b911748b63..4ae3d0070c7dd 100644 --- a/test/read.jl +++ b/test/read.jl @@ -559,8 +559,9 @@ let p = Pipe() t = @async read(p) @sync begin @async write(p, zeros(UInt16, 660_000)) + order::UInt16 = 0 for i = 1:typemax(UInt16) - @async write(p, UInt16(i)) + @async (order+=1; write(p, order)) end @async close(p.in) end diff --git a/test/spawn.jl b/test/spawn.jl index 0cfc23a8a02df..c166268f6976e 100644 --- a/test/spawn.jl +++ b/test/spawn.jl @@ -58,13 +58,13 @@ out = read(`$echocmd hello` & `$echocmd world`, String) Sys.isunix() && run(pipeline(yescmd, `head`, devnull)) let a, p - a = Base.Condition() + a = Channel(0) t = @async begin p = run(pipeline(yescmd,devnull), wait=false) - Base.notify(a,p) + put!(a, p) @test !success(p) end - p = wait(a) + p = take!(a) kill(p) wait(t) end diff --git a/test/threads.jl b/test/threads.jl index 7b79b141b660b..ee2ca9d127ce6 100644 --- a/test/threads.jl +++ b/test/threads.jl @@ -369,7 +369,7 @@ using Dates for period in (0.06, Dates.Millisecond(60)) let async = Base.AsyncCondition(), t c = Condition() - task = schedule(Task(function() + task = Base.schedule(Task(function() notify(c) wait(c) t = Timer(period) From 6708d27df3e7bf2dd10c9e1473ba2d90a2862fd8 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Fri, 9 Nov 2018 16:23:11 -0500 Subject: [PATCH 2/4] revert invalid changes --- stdlib/Sockets/test/runtests.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stdlib/Sockets/test/runtests.jl b/stdlib/Sockets/test/runtests.jl index 0976d1e05c446..5d4d09e531bd5 100644 --- a/stdlib/Sockets/test/runtests.jl +++ b/stdlib/Sockets/test/runtests.jl @@ -415,7 +415,7 @@ end let addr = Sockets.InetAddr(ip"127.0.0.1", 4444) srv = listen(addr) - r = @async (sleep(1); close(srv)) + r = @async close(srv) @test_throws Base._UVError("accept", Base.UV_ECONNABORTED) accept(srv) fetch(r) end @@ -424,7 +424,7 @@ end srv = listen(addr) s = Sockets.TCPSocket() Sockets.connect!(s, addr) - r = @async (sleep(1); close(s)) + r = @async close(s) @test_throws Base._UVError("connect", Base.UV_ECANCELED) Sockets.wait_connected(s) fetch(r) end From 8714c98586aaa7e30522925713f2fbec6d883ffe Mon Sep 17 00:00:00 2001 From: Jeff Bezanson Date: Wed, 5 Dec 2018 14:51:11 -0500 Subject: [PATCH 3/4] try enabling again --- Make.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Make.inc b/Make.inc index 19e6265b65b3e..e89d6614dbe17 100644 --- a/Make.inc +++ b/Make.inc @@ -69,7 +69,7 @@ USEIFC ?= 0 JULIA_THREADS := 1 # Enable the parallel task runtime -JULIA_PARTR ?= 0 +JULIA_PARTR ?= 1 ifeq ($(JULIA_THREADS), 0) JULIA_PARTR := 0 endif From 70b4852f374cc21e29af0ba133e8924d6ae694b0 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Wed, 5 Dec 2018 21:54:31 +0000 Subject: [PATCH 4/4] fix behavior divergence with non-partr build --- src/partr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/partr.c b/src/partr.c index 8b8672eb56089..573971aa5c2bd 100644 --- a/src/partr.c +++ b/src/partr.c @@ -710,6 +710,8 @@ static int run_next(void) } jl_switchto(&task); + if (ptls->tid == 0) + jl_process_events(jl_global_event_loop()); JL_GC_POP(); return 1;