Skip to content

Commit

Permalink
clamp unroll extrema and add div to cost table (copying fdiv for now...)
Browse files Browse the repository at this point in the history
  • Loading branch information
chriselrod committed Apr 26, 2021
1 parent 8b5f640 commit 96aef96
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 7 deletions.
2 changes: 1 addition & 1 deletion src/codegen/operation_evaluation_order.jl
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ function fillorder!(ls::LoopSet, order::Vector{Symbol}, u₁loop::Symbol, u₂lo
ro[_n] = loopsym = order[n]
#loopsym = order[n]
for op ops
addoptoorder!( ls, included_vars, place_after_loop, op, loopsym, _n, u₁loop, u₂loop, vectorized, u₂max )
addoptoorder!( ls, included_vars, place_after_loop, op, loopsym, _n, u₁loop, u₂loop, vectorized, u₂max )
end
end
end
Expand Down
3 changes: 3 additions & 0 deletions src/modeling/costs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ const COST = Dict{Symbol,InstructionCost}(
:rem_fast => InstructionCost(13,4.0,-2.0), # FIXME
:div_fast => InstructionCost(13,4.0,-2.0),
:vdiv_fast => InstructionCost(20,4.0,-2.0), # FIXME
:÷ => InstructionCost(13,4.0,-2.0),
# :evadd => InstructionCost(4,0.5),
# :evsub => InstructionCost(4,0.5),
# :evmul => InstructionCost(4,0.5),
Expand Down Expand Up @@ -494,6 +495,8 @@ const FUNCTIONSYMBOLS = IdDict{Type{<:Function},Instruction}(
# typeof(VectorizationBase.vfdiv) => :(/),
# typeof(VectorizationBase.vfdiv!) => :(/),
typeof(VectorizationBase.vdiv) => :(/),
typeof(÷) => :(÷),
typeof(Base.FastMath.div_fast) => :div_fast,
typeof(Base.FastMath.div_fast) => :div_fast,
typeof(Base.FastMath.rem_fast) => :rem_fast,
typeof(==) => :(==),
Expand Down
10 changes: 4 additions & 6 deletions src/modeling/determinestrategy.jl
Original file line number Diff line number Diff line change
Expand Up @@ -469,19 +469,17 @@ function solve_unroll_lagrange(X, R, u₁L, u₂L, u₁step::Int, u₂step::Int,
u₂low = max(u₂step, floor(Int, 0.8u₂float)) # must be at least 1
u₁high = solve_unroll_constT(R, u₂low) + u₁step
u₂high = solve_unroll_constU(R, u₁low) + u₂step
# @show u₁low, u₁high, u₂low, u₂high
if u₁low u₁high
u₁low = solve_unroll_constT(R, u₂high)
end
if u₂low u₂high
u₂low = solve_unroll_constU(R, u₁high)
end
maxunroll = atleast32registers ? (((X₂ > 0) & (X₃ > 0)) ? 10 : 8) : 6
u₁low = (min(u₁low, maxunroll) ÷ u₁step) * u₁step
u₂low = (min(u₂low, maxunroll) ÷ u₂step) * u₂step
u₁high = min(u₁high, maxunroll)
u₂high = min(u₂high, maxunroll)
# @show u₁low, u₁high, u₂low, u₂high, u₁float, u₂float
u₁low = (clamp(u₁low, 1, maxunroll) ÷ u₁step) * u₁step
u₂low = (clamp(u₂low, 1, maxunroll) ÷ u₂step) * u₂step
u₁high = clamp(u₁high, 1, maxunroll)
u₂high = clamp(u₂high, 1, maxunroll)
solve_unroll_iter(X, R, u₁L, u₂L, reverse(u₁low:u₁step:u₁high), reverse(u₂low:u₂step:u₂high))
end

Expand Down

0 comments on commit 96aef96

Please sign in to comment.