Skip to content

Commit

Permalink
Some more of the endless stream of unrolling naming fixes...
Browse files Browse the repository at this point in the history
  • Loading branch information
chriselrod committed Apr 24, 2021
1 parent 905ee6c commit 8b5f640
Show file tree
Hide file tree
Showing 8 changed files with 197 additions and 138 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ SLEEFPirates = "0.6.14"
Static = "0.2"
ThreadingUtilities = "0.4.1"
UnPack = "1"
VectorizationBase = "0.19.29"
VectorizationBase = "0.19.30"
julia = "1.5"

[extras]
Expand Down
97 changes: 45 additions & 52 deletions src/codegen/lower_compute.jl
Original file line number Diff line number Diff line change
Expand Up @@ -274,31 +274,47 @@ end
q
end

function parent_op_name(
ls::LoopSet, parents_op::Vector{Operation}, n::Int, modsuffix, suffix_, parents_u₁syms, parents_u₂syms, u₁, opisvectorized, tiledouterreduction
function parent_op_name!(
q, ls::LoopSet, parents_op::Vector{Operation}, n::Int, modsuffix, suffix_, parents_u₁syms, parents_u₂syms, u₁, u₂max, u₂unrolledsym, op, tiledouterreduction
)
opp = parents_op[n]
parent = mangledvar(opp)
u = 0
if n == tiledouterreduction# && isvectorized(opp)
parent = Symbol(parent, modsuffix)
opp = parents_op[n]
opisvectorized = isvectorized(op)
parent = mangledvar(opp)
u = 0
if n == tiledouterreduction# && isvectorized(opp)
parent = Symbol(parent, modsuffix)
else
u = if !parents_u₁syms[n]
1
elseif isouterreduction(ls, opp) -1
getu₁full(ls, u₁)
else
if parents_u₂syms[n]
parent = Symbol(parent, suffix_)
end
u = if !parents_u₁syms[n]
1
elseif isouterreduction(ls, opp) -1
getu₁full(ls, u₁)
else
getu₁forreduct(ls, opp, u₁)
end
parent = Symbol(parent, '_', u)
getu₁forreduct(ls, opp, u₁)
end
if opisvectorized && isload(opp) && (!isvectorized(opp))
parent = Symbol(parent, "##broadcasted##")
if parents_u₂syms[n]
if isu₂unrolled(op) # u₂unrolledsym ||
parent = Symbol(parent, suffix_, '_', u)
elseif u₂max > 1
t = Expr(:tuple)
reduction = Expr(:call, GlobalRef(ArrayInterface, :reduce_tup), reduce_to_onevecunroll(instruction(opp)), t)
for u₂ 0:u₂max-1
push!(t.args, Symbol(parent, u₂, "__", u))
end
parent = gensym!(ls, parent)
push!(q.args, Expr(:(=), parent, reduction))
parent
else
# parent = Symbol(parent, '_', u)
parent = Symbol(parent, 0, "__", u)
end
else
parent = Symbol(parent, '_', u)
end
parent, u
end
if opisvectorized && isload(opp) && (!isvectorized(opp))
parent = Symbol(parent, "##broadcasted##")
end
parent, u
end
function getuouterreduct(ls::LoopSet, op::Operation, suffix)
us = ls.unrollspecification
Expand Down Expand Up @@ -413,9 +429,6 @@ function lower_compute!(
# parentsyms = [opp.variable for opp ∈ parents(op)]
Uiter = opunrolled ? u₁ - 1 : 0
isreduct = isreduction(op)
# if isreduct
# @show u₁unrolledsym, u₂unrolledsym, isu₁unrolled(op), isu₂unrolled(op) op
# end
if Base.libllvm_version < v"11.0.0" && (suffix -1) && isreduct# && (iszero(suffix) || (ls.unrollspecification.u₂ - 1 == suffix))
# if (length(reduceddependencies(op)) > 0) | (length(reducedchildren(op)) > 0)# && (iszero(suffix) || (ls.unrollspecification.u₂ - 1 == suffix))
# instrfid = findfirst(isequal(instr.instr), (:vfmadd, :vfnmadd, :vfmsub, :vfnmsub))
Expand Down Expand Up @@ -474,7 +487,6 @@ function lower_compute!(
# isouterreduct = true
isouterreduct = isanouterreduction(ls, op)
u₁reduct = isouterreduct ? getu₁full(ls, u₁) : getu₁forreduct(ls, op, u₁)
# @show isouterreduct, u₁reduct, op
dopartialmap = u₁reduct u₁
Symbol(mvar, '_', u₁reduct)
else
Expand All @@ -484,47 +496,35 @@ function lower_compute!(
Symbol(mvar, '_', 1)
end
selfopname = varsym
selfdep = 0
selfdep = 0
for n 1:nparents
opp = parents_op[n]
if isloopvalue(opp)
loopval = first(loopdependencies(opp))
add_loopvalue!(instrcall, loopval, ua, u₁)
elseif name(opp) === name(op)

selfdep = n
# @show mangledvar(op), name(opp), name(op)
if ((isvectorized(opp) && !isvectorized(op))) ||
(parents_u₁syms[n] != u₁unrolledsym) || (parents_u₂syms[n] != u₂unrolledsym)

selfopname, uₚ = parent_op_name(ls, parents_op, n, modsuffix, suffix_, parents_u₁syms, parents_u₂syms, u₁, opisvectorized, tiledouterreduction)
# if (uₚ ≠ 0) & (uₚ ≠ u₁)
# dopartialmap = true
# end
# @show selfopname, instr
push!(instrcall.args, selfopname)

selfopname, uₚ = parent_op_name!(q, ls, parents_op, n, modsuffix, suffix_, parents_u₁syms, parents_u₂syms, u₁, u₂max, u₂unrolledsym, op, tiledouterreduction)
push!(instrcall.args, selfopname)
else
push!(instrcall.args, varsym)
push!(instrcall.args, varsym)
end
elseif ((!isu₂unrolled(op)) & isu₂unrolled(opp)) && (parents_u₂syms[n] & (!u₂unrolledsym))
# elseif parents_u₂syms[n] & (!u₂unrolledsym)
# elseif parents_u₂syms[n] & (!u₂unrolledsym)
#&& (isouterreduction(ls, opp) != -1)
# this checks if the parent is u₂ unrolled but this operation is not, in which case we need to reduce it.
# @show op opp
reduced_u₂ = reduce_expr_u₂(mangledvar(opp), instruction(opp), ureduct(ls))
reducedparentname = gensym!(ls, "reducedop")
push!(q.args, Expr(:(=), reducedparentname, reduced_u₂))
reduced_u₂ = reduce_parent!(q, ls, op, opp, reducedparentname)
push!(instrcall.args, reduced_u₂)
else
parent, uₚ = parent_op_name(ls, parents_op, n, modsuffix, suffix_, parents_u₁syms, parents_u₂syms, u₁, opisvectorized, tiledouterreduction)
parent, uₚ = parent_op_name!(q, ls, parents_op, n, modsuffix, suffix_, parents_u₁syms, parents_u₂syms, u₁, u₂max, u₂unrolledsym, op, tiledouterreduction)
parent = reduce_parent!(q, ls, op, opp, parent)
# if instr.instr === :vfmadd_fast && tiledouterreduction > 0
# @show mvar, varsym, selfopname
# end
# @show opp
# if instr.instr === :identity
# @show isvectorized(op) isvectorized(opp)
# end
if (selfdep == 0) && search_tree(parents(opp), name(op))
selfdep = n
push!(instrcall.args, parent)
Expand All @@ -536,12 +536,6 @@ function lower_compute!(
end
end
selfdepreduce = ifelse(((!u₁unrolledsym) & isu₁unrolled(op)) & (u₁ > 1), selfdep, 0)
# if selfdep ≠ 0
# @show mvar
# # @show isu₁unrolled(op), u₁unrolledsym, u₁, u₂max
# # @show selfdep, selfdepreduce#, op
# end
# push!(q.args, (isreduct, u₁, (!u₁unrolledsym), isu₁unrolled(op), dopartialmap, varsym))
if maskreduct
ifelsefunc = if us.u₁ == 1
:ifelse # don't need to be fancy
Expand Down Expand Up @@ -575,7 +569,6 @@ function lower_compute!(
end
return
elseif selfdep != 0
# @show op, isouterreduct, maskreduct, instr
make_partial_map!(instrcall, selfopname, u₁, selfdepreduce)
end
elseif selfdep != 0 && (dopartialmap ||
Expand Down
161 changes: 89 additions & 72 deletions src/codegen/lower_constant.jl
Original file line number Diff line number Diff line change
Expand Up @@ -48,37 +48,41 @@ end
function lower_zero!(
q::Expr, op::Operation, ls::LoopSet, ua::UnrollArgs, zerotyp::NumberType = zerotype(ls, op)
)
@unpack u₁, u₁loopsym, u₂loopsym, vloopsym, u₂max, suffix = ua
mvar, opu₁, opu₂ = variable_name_and_unrolled(op, u₁loopsym, u₂loopsym, vloopsym, suffix, ls)
!opu₂ && suffix > 0 && return
# TODO: for u₁, needs to consider if reducedchildren are u₁-unrolled
# reductions need to consider reduct-status
# if !opu₁
# opu₁ = u₁loopsym ∈ reducedchildren(op)
# end
mvar = Symbol(mvar, '_', Core.ifelse(opu₁, u₁, 1))
typeT = typeof_sym(ls, op, zerotyp)
# TODO: make should_broadcast_op handle everything.
if isvectorized(op) || vloopsym reducedchildren(op) || vloopsym reduceddependencies(op) || should_broadcast_op(op)
if opu₁ && u₁ > 1
call = Expr(:call, lv(:zero_vecunroll), staticexpr(u₁), VECTORWIDTHSYMBOL, typeT, staticexpr(reg_size(ls)))
else
call = Expr(:call, lv(:_vzero), VECTORWIDTHSYMBOL, typeT, staticexpr(reg_size(ls)))
end
@unpack u₁, u₁loopsym, u₂loopsym, vloopsym, u₂max, suffix = ua
mvar, opu₁, opu₂ = variable_name_and_unrolled(op, u₁loopsym, u₂loopsym, vloopsym, suffix, ls)
!opu₂ && suffix > 0 && return
# TODO: for u₁, needs to consider if reducedchildren are u₁-unrolled
# reductions need to consider reduct-status
# if !opu₁
# opu₁ = u₁loopsym ∈ reducedchildren(op)
# end
typeT = typeof_sym(ls, op, zerotyp)
# TODO: make should_broadcast_op handle everything.
if isvectorized(op) || vloopsym reducedchildren(op) || vloopsym reduceddependencies(op) || should_broadcast_op(op)
if opu₁ && u₁ > 1
call = Expr(:call, lv(:zero_vecunroll), staticexpr(u₁), VECTORWIDTHSYMBOL, typeT, staticexpr(reg_size(ls)))
else
call = Expr(:call, :zero, typeT)
if opu₁ && u₁ > 1
# broadcastsym = Symbol(mvar, "_#init#")
# pushpreamble!(ls, Expr(:(=), broadcastsym, call))
t = Expr(:tuple)
for u 1:u₁
push!(t.args, call)
end
call = Expr(:call, lv(:VecUnroll), t)
end
call = Expr(:call, lv(:_vzero), VECTORWIDTHSYMBOL, typeT, staticexpr(reg_size(ls)))
end
else
call = Expr(:call, :zero, typeT)
if opu₁ && u₁ > 1
t = Expr(:tuple)
for u 1:u₁
push!(t.args, call)
end
call = Expr(:call, lv(:VecUnroll), t)
end
end
if (suffix == -1) && opu₂
for u 0:u₂max-1
push!(q.args, Expr(:(=), Symbol(mvar, u, "__", Core.ifelse(opu₁, u₁, 1)), call))
end
else
mvar = Symbol(mvar, '_', Core.ifelse(opu₁, u₁, 1))
push!(q.args, Expr(:(=), mvar, call))
nothing
end
nothing
end
# Have to awkwardly search through `operations(ls)` to try and find op's child
function getparentsreductzero(ls::LoopSet, op::Operation)::Float64
Expand All @@ -95,52 +99,65 @@ vecbasefunc(f) = Expr(:(.), Expr(:(.), :LoopVectorization, QuoteNode(:Vectorizat
function lower_constant!(
q::Expr, op::Operation, ls::LoopSet, ua::UnrollArgs
)
@unpack u₁, u₁loopsym, u₂loopsym, vloopsym, u₂max, suffix = ua
mvar, opu₁, opu₂ = variable_name_and_unrolled(op, u₁loopsym, u₂loopsym, vloopsym, suffix, ls)
!opu₂ && suffix > 0 && return
mvar = Symbol(mvar, '_', Core.ifelse(opu₁, u₁, 1))
instruction = op.instruction
constsym = instruction.instr
# constsym = Symbol(instruction.instr, '_', 1)
reducedchildvectorized = vloopsym reducedchildren(op)
if reducedchildvectorized || isvectorized(op) || vloopsym reduceddependencies(op) || should_broadcast_op(op)
# call = Expr(:call, lv(:vbroadcast), W, Expr(:call, lv(:maybeconvert), typeT, constsym))
call = if reducedchildvectorized && vloopsym loopdependencies(op)
instrclass = getparentsreductzero(ls, op)
if instrclass == ADDITIVE_IN_REDUCTIONS
Expr(:call, vecbasefunc(:addscalar), Expr(:call, lv(:vzero), VECTORWIDTHSYMBOL, ELTYPESYMBOL), constsym)
elseif instrclass == MULTIPLICATIVE_IN_REDUCTIONS
Expr(:call, vecbasefunc(:mulscalar), Expr(:call, lv(:vbroadcast), VECTORWIDTHSYMBOL, Expr(:call, :one, ELTYPESYMBOL)), constsym)
elseif instrclass == MAX
Expr(:call, vecbasefunc(:maxscalar), Expr(:call, lv(:vbroadcast), VECTORWIDTHSYMBOL, Expr(:call, :typemin, ELTYPESYMBOL)), constsym)
elseif instrclass == MIN
Expr(:call, vecbasefunc(:minscalar), Expr(:call, lv(:vbroadcast), VECTORWIDTHSYMBOL, Expr(:call, :typemax, ELTYPESYMBOL)), constsym)
else
throw("Reductions of type $(reduction_zero(reinstrclass)) not yet supported; please file an issue as a reminder to take care of this.")
end
else
Expr(:call, lv(:vbroadcast), VECTORWIDTHSYMBOL, constsym)
end
if opu₁ && u₁ > 1
# broadcastsym = Symbol(mvar, "_#init#")
# push!(q.args, Expr(:(=), broadcastsym, call))
t = Expr(:tuple)
for u 1:u₁
push!(t.args, call)
end
call = Expr(:call, lv(:VecUnroll), t)
end
push!(q.args, Expr(:(=), mvar, call))
elseif opu₁ && u₁ > 1
t = Expr(:tuple)
for u 1:u₁
push!(t.args, constsym)
end
push!(q.args, Expr(:(=), mvar, Expr(:call, lv(:VecUnroll), t)))
@unpack u₁, u₁loopsym, u₂loopsym, vloopsym, u₂max, suffix = ua
mvar, opu₁, opu₂ = variable_name_and_unrolled(op, u₁loopsym, u₂loopsym, vloopsym, suffix, ls)
!opu₂ && suffix > 0 && return
instruction = op.instruction
constsym = instruction.instr
# constsym = Symbol(instruction.instr, '_', 1)
reducedchildvectorized = vloopsym reducedchildren(op)
if reducedchildvectorized || isvectorized(op) || vloopsym reduceddependencies(op) || should_broadcast_op(op)
# call = Expr(:call, lv(:vbroadcast), W, Expr(:call, lv(:maybeconvert), typeT, constsym))
call = if reducedchildvectorized && vloopsym loopdependencies(op)
instrclass = getparentsreductzero(ls, op)
if instrclass == ADDITIVE_IN_REDUCTIONS
Expr(:call, vecbasefunc(:addscalar), Expr(:call, lv(:vzero), VECTORWIDTHSYMBOL, ELTYPESYMBOL), constsym)
elseif instrclass == MULTIPLICATIVE_IN_REDUCTIONS
Expr(:call, vecbasefunc(:mulscalar), Expr(:call, lv(:vbroadcast), VECTORWIDTHSYMBOL, Expr(:call, :one, ELTYPESYMBOL)), constsym)
elseif instrclass == MAX
Expr(:call, vecbasefunc(:maxscalar), Expr(:call, lv(:vbroadcast), VECTORWIDTHSYMBOL, Expr(:call, :typemin, ELTYPESYMBOL)), constsym)
elseif instrclass == MIN
Expr(:call, vecbasefunc(:minscalar), Expr(:call, lv(:vbroadcast), VECTORWIDTHSYMBOL, Expr(:call, :typemax, ELTYPESYMBOL)), constsym)
else
throw("Reductions of type $(reduction_zero(reinstrclass)) not yet supported; please file an issue as a reminder to take care of this.")
end
else
push!(q.args, Expr(:(=), mvar, constsym))
Expr(:call, lv(:vbroadcast), VECTORWIDTHSYMBOL, constsym)
end
nothing
if opu₁ && u₁ > 1
# broadcastsym = Symbol(mvar, "_#init#")
# push!(q.args, Expr(:(=), broadcastsym, call))
t = Expr(:tuple)
for u 1:u₁
push!(t.args, call)
end
call = Expr(:call, lv(:VecUnroll), t)
end
elseif opu₁ && u₁ > 1
t = Expr(:tuple)
for u 1:u₁
push!(t.args, constsym)
end
call = Expr(:call, lv(:VecUnroll), t)
elseif opu₂ & (suffix == -1)
for u 0:u₂max-1
push!(q.args, Expr(:(=), Symbol(mvar, u, "__", 1), constsym))
end
return nothing
else
push!(q.args, Expr(:(=), Symbol(mvar, '_', 1), constsym))
return nothing
end
u₁tag = Core.ifelse(opu₁, u₁, 1)
if opu₂ & (suffix == -1)
for u 0:u₂max-1
push!(q.args, Expr(:(=), Symbol(mvar, u, "__", u₁tag), call))
end
else
mvar = Symbol(mvar, '_', u₁tag)
push!(q.args, Expr(:(=), mvar, call))
end
nothing
end

isconstantop(op::Operation) = (instruction(op) === LOOPCONSTANT) || (isconstant(op) && length(loopdependencies(op)) == 0)
Expand Down
32 changes: 20 additions & 12 deletions src/modeling/determinestrategy.jl
Original file line number Diff line number Diff line change
Expand Up @@ -578,22 +578,30 @@ function solve_unroll(
u₁L = length(u₁loop)
u₂L = length(u₂loop)
if isstaticloop(u₂loop)
if u₂loopsym !== vloopsym && u₂L 4
u₁ = max(1, solve_unroll_constT(reg_pressure, u₂L))
u₁ = isstaticloop(u₁loop) ? maybedemotesize(u₁, u₁loopsym === vloopsym ? cld(u₁L,W) : u₁L) : u₁
return u₁, u₂L, unroll_cost(cost_vec, u₁, u₂L, u₁L, u₂L)
if u₂loopsym !== vloopsym && u₂L 4
if isstaticloop(u₁loop)
u₁ = max(solve_unroll_constT(reg_pressure, u₂L), 1)
u₁ = maybedemotesize(u₁, u₁loopsym === vloopsym ? cld(u₁L,W) : u₁L)
else
u₁ = clamp(solve_unroll_constT(reg_pressure, u₂L), 1, 8)
end
u₂Ltemp = u₂loopsym === vloopsym ? cld(u₂L, W) : u₂L
maxu₂ = min(4maxu₂, u₂Ltemp)
return u₁, u₂L, unroll_cost(cost_vec, u₁, u₂L, u₁L, u₂L)
end
u₂Ltemp = u₂loopsym === vloopsym ? cld(u₂L, W) : u₂L
maxu₂ = min(4maxu₂, u₂Ltemp)
end
if isstaticloop(u₁loop)
if u₁loopsym !== vloopsym && u₁L 4
u₂ = max(1, solve_unroll_constU(reg_pressure, u₁L))
u₂ = isstaticloop(u₂loop) ? maybedemotesize(u₂, u₂loopsym === vloopsym ? cld(u₂L,W) : u₂L) : u₂
return u₁L, u₂, unroll_cost(cost_vec, u₁L, u₂, u₁L, u₂L)
if u₁loopsym !== vloopsym && u₁L 4
if isstaticloop(u₂loop)
u₂ = max(solve_unroll_constU(reg_pressure, u₁L), 1)
u₂ = maybedemotesize(u₂, u₂loopsym === vloopsym ? cld(u₂L,W) : u₂L)
else
u₂ = clamp(solve_unroll_constU(reg_pressure, u₁L), 1, 8)
end
u₁Ltemp = u₁loopsym === vloopsym ? cld(u₁L, W) : u₁L
maxu₁ = min(4maxu₁, u₁Ltemp)
return u₁L, u₂, unroll_cost(cost_vec, u₁L, u₂, u₁L, u₂L)
end
u₁Ltemp = u₁loopsym === vloopsym ? cld(u₁L, W) : u₁L
maxu₁ = min(4maxu₁, u₁Ltemp)
end
if u₁loopsym === vloopsym
u₁Lf = u₁L / W
Expand Down
Loading

0 comments on commit 8b5f640

Please sign in to comment.