Skip to content
This repository has been archived by the owner on Jul 19, 2023. It is now read-only.

Commit

Permalink
Merge pull request #431 from mjsheikh/loopconv
Browse files Browse the repository at this point in the history
Optimizing convolutions
  • Loading branch information
ChrisRackauckas authored Aug 1, 2021
2 parents a44098e + dc5c25a commit 40aa4b9
Show file tree
Hide file tree
Showing 9 changed files with 292 additions and 174 deletions.
148 changes: 110 additions & 38 deletions src/derivative_operators/convolutions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,23 +27,39 @@ function convolve_interior!(x_temp::AbstractVector{T1}, x::AbstractVector{T2}, A
T = promote_type(T1,T2,T3)
@assert length(x_temp)+2 == length(x)
stencil = A.stencil_coefs

# Initialize cur_stencil so that LoopVectorization.check_args(curr_stencil) doesn't throw undef variable for cur_stencil
cur_stencil = 0

coeff = A.coefficients
len = length(x_temp)
mid = div(A.stencil_length,2)
if !add_range
for i in (1+A.boundary_point_count) : (length(x_temp)-A.boundary_point_count)
xtempi = zero(T)
cur_stencil = eltype(stencil) <: AbstractVector ? stencil[i-A.boundary_point_count] : stencil
cur_coeff = typeof(coeff) <: AbstractVector ? coeff[i] : coeff isa Number ? coeff : true
for idx in 1:A.stencil_length
xtempi += cur_coeff * cur_stencil[idx] * x[i - mid + idx]
if eltype(stencil) <: AbstractVector
@turbo for i in (1+A.boundary_point_count) : (len-A.boundary_point_count)
xtempi = zero(T)
cur_stencil = stencil[i-A.boundary_point_count]
cur_coeff = coeff[i]
for idx in 1:A.stencil_length
xtempi += cur_coeff * cur_stencil[idx] * x[i - mid + idx]
end
x_temp[i] = xtempi + !overwrite*x_temp[i]
end
else
@turbo for i in (1+A.boundary_point_count) : (len-A.boundary_point_count)
xtempi = zero(T)
cur_coeff = coeff[i]
for idx in 1:A.stencil_length
xtempi += cur_coeff * stencil[idx] * x[i - mid + idx]
end
x_temp[i] = xtempi + !overwrite*x_temp[i]
end
x_temp[i] = xtempi + !overwrite*x_temp[i]
end
else
for i in [(1+A.boundary_point_count):(A.boundary_point_count+offset); (length(x_temp)-A.boundary_point_count-offset+1):(length(x_temp)-A.boundary_point_count)]
for i in [(1+A.boundary_point_count):(A.boundary_point_count+offset); (len-A.boundary_point_count-offset+1):(len-A.boundary_point_count)]
xtempi = zero(T)
cur_stencil = eltype(stencil) <: AbstractVector ? stencil[i-A.boundary_point_count] : stencil
cur_coeff = typeof(coeff) <: AbstractVector ? coeff[i] : coeff isa Number ? coeff : true
cur_coeff = coeff[i]
for idx in 1:A.stencil_length
xtempi += cur_coeff * cur_stencil[idx] * x[i - mid + idx]
end
Expand All @@ -55,12 +71,16 @@ end
function convolve_BC_left!(x_temp::AbstractVector{T1}, x::AbstractVector{T2}, A::DerivativeOperator{T3,N,false}; overwrite = true) where {T1, T2, T3, N}
T = promote_type(T1,T2,T3)
stencil = A.low_boundary_coefs

# Initialize cur_stencil so that LoopVectorization.check_args(curr_stencil) doesn't throw undef variable for cur_stencil
cur_stencil = 0

coeff = A.coefficients
for i in 1 : A.boundary_point_count
@turbo for i in 1 : A.boundary_point_count
cur_stencil = stencil[i]
cur_coeff = typeof(coeff) <: AbstractVector ? coeff[i] : coeff isa Number ? coeff : true
xtempi = cur_coeff*stencil[i][1]*x[1]
for idx in 2:A.boundary_stencil_length
cur_coeff = coeff[i]
xtempi = zero(T)
for idx in 1:A.boundary_stencil_length
xtempi += cur_coeff * cur_stencil[idx] * x[idx]
end
x_temp[i] = xtempi + !overwrite*x_temp[i]
Expand All @@ -70,12 +90,16 @@ end
function convolve_BC_right!(x_temp::AbstractVector{T1}, x::AbstractVector{T2}, A::DerivativeOperator{T3,N,false}; overwrite = true) where {T1, T2, T3, N}
T = promote_type(T1,T2,T3)
stencil = A.high_boundary_coefs

# Initialize cur_stencil so that LoopVectorization.check_args(curr_stencil) doesn't throw undef variable for cur_stencil
cur_stencil = 0

coeff = A.coefficients
for i in 1 : A.boundary_point_count
@turbo for i in 1 : A.boundary_point_count
cur_stencil = stencil[i]
cur_coeff = typeof(coeff) <: AbstractVector ? coeff[i] : coeff isa Number ? coeff : true
xtempi = cur_coeff*stencil[i][end]*x[end]
for idx in (A.boundary_stencil_length-1):-1:1
cur_coeff = coeff[i]
xtempi = zero(T)
for idx in (A.boundary_stencil_length-1):-1:0
xtempi += cur_coeff * cur_stencil[end-idx] * x[end-idx]
end
x_temp[end-A.boundary_point_count+i] = xtempi + !overwrite*x_temp[end-A.boundary_point_count+i]
Expand Down Expand Up @@ -169,23 +193,48 @@ function convolve_interior!(x_temp::AbstractVector{T}, x::AbstractVector{T}, A::
stl = A.stencil_length
coeff = A.coefficients

for i in bpc+1:len-bpc-A.offside
cur_coeff = coeff[i]
if cur_coeff >= 0
# Initialize cur_stencil so that LoopVectorization.check_args(curr_stencil) doesn't throw undef variable for cur_stencil
cur_stencil = 0

if A.coeff_func isa Number && A.coeff_func >= 0
@turbo for i in bpc+1:len-bpc-A.offside
cur_coeff = coeff[i]
xtempi = zero(T)
cur_stencil = A.stencil_coefs[1,i-bpc]
for idx in 1:stl
xtempi += cur_coeff * cur_stencil[idx]*x[i+idx-A.offside]
end
x_temp[i] = xtempi + !overwrite*x_temp[i]
else
end
elseif A.coeff_func isa Number && A.coeff_func < 0
@turbo for i in bpc+1:len-bpc-A.offside
cur_coeff = coeff[i]
xtempi = zero(T)
cur_stencil = A.stencil_coefs[2,i-bpc]
for idx in 1:stl
xtempi += cur_coeff * cur_stencil[idx]*x[i-stl+1+idx + A.offside]
end
x_temp[i] = xtempi + !overwrite*x_temp[i]
end
else
for i in bpc+1:len-bpc-A.offside
cur_coeff = coeff[i]
if cur_coeff >= 0
xtempi = zero(T)
cur_stencil = A.stencil_coefs[1,i-bpc]
for idx in 1:stl
xtempi += cur_coeff * cur_stencil[idx]*x[i+idx-A.offside]
end
x_temp[i] = xtempi + !overwrite*x_temp[i]
else
xtempi = zero(T)
cur_stencil = A.stencil_coefs[2,i-bpc]
for idx in 1:stl
xtempi += cur_coeff * cur_stencil[idx]*x[i-stl+1+idx + A.offside]
end
x_temp[i] = xtempi + !overwrite*x_temp[i]
end
end
end
end

Expand Down Expand Up @@ -272,30 +321,49 @@ end
function convolve_interior!(x_temp::AbstractVector{T1}, _x::BoundaryPaddedVector, A::DerivativeOperator{T2,N,false}; overwrite = true) where {T1, T2, N}
T = promote_type(T1,T2)
stencil = A.stencil_coefs

# Initialize cur_stencil so that LoopVectorization.check_args(curr_stencil) doesn't throw undef variable for cur_stencil
cur_stencil = 0

coeff = A.coefficients
x = _x.u
mid = div(A.stencil_length,2) + 1
# Just do the middle parts
for i in (2+A.boundary_point_count) : (length(x_temp)-A.boundary_point_count)-1
xtempi = zero(T)
cur_stencil = eltype(stencil) <: AbstractVector ? stencil[i-A.boundary_point_count] : stencil
cur_coeff = typeof(coeff) <: AbstractVector ? coeff[i-A.boundary_point_count] : coeff isa Number ? coeff : true
@inbounds for idx in 1:A.stencil_length
xtempi += cur_coeff * cur_stencil[idx] * x[(i-1) - (mid-idx) + 1]
if eltype(stencil) <: AbstractVector
@turbo for i in (2+A.boundary_point_count) : (length(x_temp)-A.boundary_point_count)-1
xtempi = zero(T)
cur_stencil = stencil[i-A.boundary_point_count]
cur_coeff = coeff[i-A.boundary_point_count]
for idx in 1:A.stencil_length
xtempi += cur_coeff * cur_stencil[idx] * x[(i-1) - (mid-idx) + 1]
end
x_temp[i] = xtempi + !overwrite*x_temp[i]
end
else
@turbo for i in (2+A.boundary_point_count) : (length(x_temp)-A.boundary_point_count)-1
xtempi = zero(T)
cur_coeff = coeff[i-A.boundary_point_count]
for idx in 1:A.stencil_length
xtempi += cur_coeff * stencil[idx] * x[(i-1) - (mid-idx) + 1]
end
x_temp[i] = xtempi + !overwrite*x_temp[i]
end
x_temp[i] = xtempi + !overwrite*x_temp[i]
end
end

function convolve_BC_left!(x_temp::AbstractVector{T1}, _x::BoundaryPaddedVector, A::DerivativeOperator{T2,N,false}; overwrite = true) where {T1, T2, N}
T = promote_type(T1,T2)
stencil = A.low_boundary_coefs
coeff = A.coefficients
for i in 1 : A.boundary_point_count

# Initialize cur_stencil so that LoopVectorization.check_args(curr_stencil) doesn't throw undef variable for cur_stencil
cur_stencil = 0

@turbo for i in 1 : A.boundary_point_count
cur_stencil = stencil[i]
cur_coeff = typeof(coeff) <: AbstractVector ? coeff[i] : coeff isa Number ? coeff : true
cur_coeff = coeff[i]
xtempi = cur_coeff*cur_stencil[1]*_x.l
@inbounds for idx in 2:A.boundary_stencil_length
for idx in 2:A.boundary_stencil_length
xtempi += cur_coeff * cur_stencil[idx] * _x.u[idx-1]
end
x_temp[i] = xtempi + !overwrite*x_temp[i]
Expand All @@ -306,9 +374,9 @@ function convolve_BC_left!(x_temp::AbstractVector{T1}, _x::BoundaryPaddedVector,
i = 1 + A.boundary_point_count
xtempi = zero(T)
cur_stencil = eltype(A.stencil_coefs) <: AbstractVector ? A.stencil_coefs[i-A.boundary_point_count] : A.stencil_coefs
cur_coeff = typeof(coeff) <: AbstractVector ? coeff[i-A.boundary_point_count] : coeff isa Number ? coeff : true
cur_coeff = coeff[i-A.boundary_point_count]
xtempi = cur_coeff*cur_stencil[1]*_x.l
@inbounds for idx in 2:A.stencil_length
@turbo for idx in 2:A.stencil_length
xtempi += cur_coeff * cur_stencil[idx] * x[(i-1) - (mid-idx) + 1]
end
x_temp[i] = xtempi + !overwrite*x_temp[i]
Expand All @@ -317,6 +385,10 @@ end
function convolve_BC_right!(x_temp::AbstractVector{T1}, _x::BoundaryPaddedVector, A::DerivativeOperator{T2,N,false}; overwrite = true) where {T1, T2, N}
T = promote_type(T1,T2)
stencil = A.high_boundary_coefs

# Initialize cur_stencil so that LoopVectorization.check_args(curr_stencil) doesn't throw undef variable for cur_stencil
cur_stencil = 0

coeff = A.coefficients
bc_start = length(_x.u) - A.boundary_point_count
# need to account for _x.r in last interior convolution
Expand All @@ -325,17 +397,17 @@ function convolve_BC_right!(x_temp::AbstractVector{T1}, _x::BoundaryPaddedVector
i = length(x_temp)-A.boundary_point_count
xtempi = zero(T)
cur_stencil = eltype(A.stencil_coefs) <: AbstractVector ? A.stencil_coefs[i-A.boundary_point_count] : A.stencil_coefs
cur_coeff = typeof(coeff) <: AbstractVector ? coeff[i-A.boundary_point_count] : coeff isa Number ? coeff : true
cur_coeff = coeff[i-A.boundary_point_count]
xtempi = cur_coeff*cur_stencil[end]*_x.r
@inbounds for idx in 1:A.stencil_length-1
@turbo for idx in 1:A.stencil_length-1
xtempi += cur_coeff * cur_stencil[idx] * x[(i-1) - (mid-idx) + 1]
end
x_temp[i] = xtempi + !overwrite*x_temp[i]
for i in 1 : A.boundary_point_count
@turbo for i in 1 : A.boundary_point_count
cur_stencil = stencil[i]
cur_coeff = typeof(coeff) <: AbstractVector ? coeff[bc_start + i] : coeff isa Number ? coeff : true
cur_coeff = coeff[bc_start + i]
xtempi = cur_coeff*cur_stencil[end]*_x.r
@inbounds for idx in (A.boundary_stencil_length-1):-1:1
for idx in (A.boundary_stencil_length-1):-1:1
xtempi += cur_coeff * cur_stencil[end-idx] * _x.u[end-idx+1]
end
x_temp[bc_start + i] = xtempi + !overwrite*x_temp[bc_start + i]
Expand Down
32 changes: 11 additions & 21 deletions src/derivative_operators/derivative_operator.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

index(i::Int, N::Int) = i + div(N, 2) + 1

struct DerivativeOperator{T<:Real,N,Wind,T2,S1,S2,S3<:SArray,T3,F} <: AbstractDerivativeOperator{T}
struct DerivativeOperator{T<:Real,N,Wind,T2,S1,S2,S3,T3,F} <: AbstractDerivativeOperator{T}
derivative_order :: Int
approximation_order :: Int
dx :: T2
Expand Down Expand Up @@ -64,7 +64,7 @@ struct CenteredDifference{N} end

function CenteredDifference{N}(derivative_order::Int,
approximation_order::Int, dx::T,
len::Int, coeff_func=nothing) where {T<:Real,N}
len::Int, coeff_func=1) where {T<:Real,N}
@assert approximation_order>1 "approximation_order must be greater than 1."
stencil_length = derivative_order + approximation_order - 1 + (derivative_order+approximation_order)%2
boundary_stencil_length = derivative_order + approximation_order
Expand All @@ -83,17 +83,13 @@ function CenteredDifference{N}(derivative_order::Int,
low_boundary_coefs = convert(SVector{boundary_point_count},_low_boundary_coefs)

# _high_boundary_coefs = SVector{boundary_stencil_length, T}[convert(SVector{boundary_stencil_length, T}, (1/dx^derivative_order) * calculate_weights(derivative_order, oneunit(T)*x0, reverse(right_boundary_x))) for x0 in R_boundary_deriv_spots]
# high_boundary_coefs = convert(SVector{boundary_point_count},reverse(_high_boundary_coefs))

high_boundary_coefs = convert(SVector{boundary_point_count},reverse(map(reverse, _low_boundary_coefs*(-1)^derivative_order)))

offside = 0

coefficients = coeff_func isa Nothing ? nothing : fill!(Vector{T}(undef,len),0)
coefficients = fill!(Vector{T}(undef,len),0)

if coeff_func != nothing
compute_coeffs!(coeff_func, coefficients)
end
compute_coeffs!(coeff_func, coefficients)



Expand Down Expand Up @@ -123,7 +119,7 @@ end

function CenteredDifference{N}(derivative_order::Int,
approximation_order::Int, dx::AbstractVector{T},
len::Int, coeff_func=nothing) where {T<:Real,N}
len::Int, coeff_func=1) where {T<:Real,N}

stencil_length = derivative_order + approximation_order - 1 + (derivative_order+approximation_order)%2
boundary_stencil_length = derivative_order + approximation_order
Expand All @@ -147,11 +143,9 @@ function CenteredDifference{N}(derivative_order::Int,

offside = 0

coefficients = coeff_func isa Nothing ? nothing : zeros(T,len)
coefficients = zeros(T,len)

if coeff_func != nothing
compute_coeffs!(coeff_func, coefficients)
end
compute_coeffs!(coeff_func, coefficients)


DerivativeOperator{T,N,false,typeof(dx),typeof(stencil_coefs),
Expand Down Expand Up @@ -198,7 +192,7 @@ julia> Array(L2 * Q)[1]
"""
function UpwindDifference{N}(derivative_order::Int,
approximation_order::Int, dx::T,
len::Int, coeff_func=nothing; offside::Int=0) where {T<:Real,N}
len::Int, coeff_func=1; offside::Int=0) where {T<:Real,N}

@assert offside > -1 "Number of offside points should be non-negative"
@assert offside <= div(derivative_order + approximation_order - 1,2) "Number of offside points should not exceed the primary wind points"
Expand All @@ -222,9 +216,7 @@ function UpwindDifference{N}(derivative_order::Int,
high_boundary_coefs = convert(SVector{boundary_point_count + offside},_high_boundary_coefs)

coefficients = zeros(T,len)
if coeff_func != nothing
compute_coeffs!(coeff_func, coefficients)
end
compute_coeffs!(coeff_func, coefficients)

DerivativeOperator{T,N,true,T,typeof(stencil_coefs),
typeof(low_boundary_coefs),typeof(high_boundary_coefs),Vector{T},
Expand All @@ -241,7 +233,7 @@ end
# TODO implement the non-uniform grid
function UpwindDifference{N}(derivative_order::Int,
approximation_order::Int, dx::AbstractVector{T},
len::Int, coeff_func=nothing; offside::Int=0) where {T<:Real,N}
len::Int, coeff_func=1; offside::Int=0) where {T<:Real,N}

@assert offside > -1 "Number of offside points should be non-negative"
@assert offside <= div(derivative_order + approximation_order - 1,2) "Number of offside points should not exceed the primary wind points"
Expand Down Expand Up @@ -281,9 +273,7 @@ function UpwindDifference{N}(derivative_order::Int,

# Compute coefficients
coefficients = zeros(T,len)
if coeff_func != nothing
compute_coeffs!(coeff_func, coefficients)
end
compute_coeffs!(coeff_func, coefficients)

DerivativeOperator{T,N,true,typeof(dx),typeof(stencil_coefs),
typeof(low_boundary_coefs),typeof(high_boundary_coefs),Vector{T},
Expand Down
Loading

0 comments on commit 40aa4b9

Please sign in to comment.