Skip to content
This repository has been archived by the owner on May 17, 2020. It is now read-only.

GPUifyLoops boundschecking on SubArray #87

Open
@ChrisRackauckas

Description

function lorenz(du,u,p,t)
 @inbounds begin
     du[1] = 10.0f0(u[2]-u[1])
     du[2] = u[1]*(28.0f0-u[3]) - u[2]
     du[3] = u[1]*u[2] - (8/3f0)*u[3]
 end
 nothing
end
u0 = Float32[1.0;0.0;0.0]

using GPUifyLoops, CuArrays, CUDAnative
function f(du,u,p,t)
    @loop for i in (1:size(u,2); CUDAnative.threadIdx().x)
        @views @inbounds lorenz(du[:,i],u[:,i],p,t)
        nothing
    end
    nothing
end
function _f(du,u,p,t)
    @launch CUDA() threads = size(u,2) f(du,u,p,t)
end

CuArrays.allowscalar(false)
_u0 = CuArray(hcat([u0 for i in 1:128]...))

u = copy(_u0)
du= copy(_u0)
p = nothing
t = 0.0f0
_f(du,u,p,t)

works, but removing the @inbounds in lorenz, like

function lorenz(du,u,p,t)
 du[1] = 10.0f0(u[2]-u[1])
 du[2] = u[1]*(28.0f0-u[3]) - u[2]
 du[3] = u[1]*u[2] - (8/3f0)*u[3]
 nothing
end
u0 = Float32[1.0;0.0;0.0]

using GPUifyLoops, CuArrays, CUDAnative
function f(du,u,p,t)
    @loop for i in (1:size(u,2); CUDAnative.threadIdx().x)
        @views @inbounds lorenz(du[:,i],u[:,i],p,t)
        nothing
    end
    nothing
end
function _f(du,u,p,t)
    @launch CUDA() threads = size(u,2) f(du,u,p,t)
end

CuArrays.allowscalar(false)
_u0 = CuArray(hcat([u0 for i in 1:128]...))

u = copy(_u0)
du= copy(_u0)
p = nothing
t = 0.0f0
_f(du,u,p,t)

causes

julia> _f(du,u,p,t)
ERROR: InvalidIRError: compiling f(Cassette.Context{nametype(Ctx),Nothing,Nothing,getfield(GPUifyLoops, Symbol("##PassType#397")),Nothing,Cassette.DisableHooks}, typeof(f), CuDeviceArray{Float32,2,CUDAnative.AS.Global}, CuDeviceArray{Float32,2,CUDAnative.AS.Global}, Nothing, Float32) resulted in invalid LLVM IR
Reason: unsupported call to the Julia runtime (call to jl_f_tuple)
Stacktrace:
 [1] overdub at C:\Users\accou\.julia\packages\Cassette\xggAf\src\overdub.jl:508
 [2] multiple call sites at unknown:0
Reason: unsupported call to the Julia runtime (call to jl_f_getfield)
Stacktrace:
 [1] overdub at C:\Users\accou\.julia\packages\Cassette\xggAf\src\overdub.jl:508
 [2] multiple call sites at unknown:0
Stacktrace:
 [1] check_ir(::CUDAnative.CompilerJob, ::LLVM.Module) at C:\Users\accou\.julia\dev\CUDAnative\src\compiler\validation.jl:114
 [2] macro expansion at C:\Users\accou\.julia\packages\TimerOutputs\7zSea\src\TimerOutput.jl:216 [inlined]
 [3] #codegen#121(::Bool, ::Bool, ::Bool, ::Bool, ::Bool, ::typeof(CUDAnative.codegen), ::Symbol, ::CUDAnative.CompilerJob) at C:\Users\accou\.julia\dev\CUDAnative\src\compiler\driver.jl:186
 [4] #codegen at .\none:0 [inlined]
 [5] #compile#120(::Bool, ::Bool, ::Bool, ::Bool, ::Bool, ::typeof(CUDAnative.compile), ::Symbol, ::CUDAnative.CompilerJob) at C:\Users\accou\.julia\dev\CUDAnative\src\compiler\driver.jl:47
 [6] #compile at C:\Users\accou\.julia\dev\CUDAnative\src\compiler\common.jl:0 [inlined]
 [7] #compile#119 at C:\Users\accou\.julia\dev\CUDAnative\src\compiler\driver.jl:28 [inlined]
 [8] #compile at .\none:0 [inlined] (repeats 2 times)
 [9] macro expansion at C:\Users\accou\.julia\dev\CUDAnative\src\execution.jl:388 [inlined]
 [10] #cufunction#161(::String, ::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::typeof(cufunction), ::typeof(Cassette.overdub), ::Type{Tuple{Cassette.Context{nametype(Ctx),Nothing,Nothing,getfield(GPUifyLoops, Symbol("##PassType#397")),Nothing,Cassette.DisableHooks},typeof(f),CuDeviceArray{Float32,2,CUDAnative.AS.Global},CuDeviceArray{Float32,2,CUDAnative.AS.Global},Nothing,Float32}}) at C:\Users\accou\.julia\dev\CUDAnative\src\execution.jl:356
 [11] (::getfield(CUDAnative, Symbol("#kw##cufunction")))(::NamedTuple{(:name,),Tuple{String}}, ::typeof(cufunction), ::Function, ::Type) at .\none:0
 [12] macro expansion at C:\Users\accou\.julia\packages\GPUifyLoops\HaVjN\src\GPUifyLoops.jl:125 [inlined]
 [13] macro expansion at .\gcutils.jl:87 [inlined]
 [14] #launch#46(::Base.Iterators.Pairs{Symbol,Int64,Tuple{Symbol},NamedTuple{(:threads,),Tuple{Int64}}}, ::typeof(GPUifyLoops.launch), ::CUDA, ::typeof(f), ::CuArray{Float32,2}, ::Vararg{Any,N} where N) at C:\Users\accou\.julia\packages\GPUifyLoops\HaVjN\src\GPUifyLoops.jl:121
 [15] #launch at .\none:0 [inlined]
 [16] macro expansion at C:\Users\accou\.julia\packages\GPUifyLoops\HaVjN\src\GPUifyLoops.jl:54 [inlined]
 [17] _f(::CuArray{Float32,2}, ::CuArray{Float32,2}, ::Nothing, ::Float32) at .\REPL[5]:2
 [18] top-level scope at REPL[12]:1

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions