From e1539f91fddc288354f38d6e7305e43d6b60dab9 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Fri, 26 Apr 2019 17:12:16 -0400 Subject: [PATCH 01/40] first draft of new Hessenberg operations --- stdlib/LinearAlgebra/src/hessenberg.jl | 139 +++++++++++++++++++++++-- 1 file changed, 130 insertions(+), 9 deletions(-) diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl index e06f5ff4f9630..32a76f6e0387b 100644 --- a/stdlib/LinearAlgebra/src/hessenberg.jl +++ b/stdlib/LinearAlgebra/src/hessenberg.jl @@ -1,20 +1,28 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license -struct Hessenberg{T,S<:AbstractMatrix{T}} <: Factorization{T} +struct Hessenberg{T,S<:AbstractMatrix{T},V<:Number} <: Factorization{T} factors::S τ::Vector{T} + μ::V # represents a shifted Hessenberg matrix H-μI - function Hessenberg{T,S}(factors, τ) where {T,S<:AbstractMatrix{T}} + function Hessenberg{T,S,V}(factors, τ, μ::V=zero(V)) where {T,S<:AbstractMatrix{T},V<:Number} require_one_based_indexing(factors, τ) - new{T,S}(factors, τ) + new{T,S}(factors, τ, μ) end end -Hessenberg(factors::AbstractMatrix{T}, τ::Vector{T}) where {T} = Hessenberg{T,typeof(factors)}(factors, τ) -function Hessenberg{T}(factors::AbstractMatrix, τ::AbstractVector) where {T} - Hessenberg(convert(AbstractMatrix{T}, factors), convert(Vector{T}, v)) +Hessenberg(factors::AbstractMatrix{T}, τ::Vector{T}, μ::V=false) where {T,V<:Number} = Hessenberg{T,typeof(factors),V}(factors, τ, μ) +function Hessenberg{T}(factors::AbstractMatrix, τ::AbstractVector, μ::V=false) where {T,V<:Number} + Hessenberg(convert(AbstractMatrix{T}, factors), convert(Vector{T}, v), μ) end +Hessenberg(F::Hessenberg, μ::Number=F.μ) = Hessenberg(F.factors, F.τ, μ) +Hessenberg{T}(F::Hessenberg{T}) where {T} = copy(F) # copying simplifies promotion logic below +Hessenberg{T}(F::Hessenberg) where {T} = Hessenberg{T}(F.factors, F.τ, F.μ) -Hessenberg(A::StridedMatrix) = Hessenberg(LAPACK.gehrd!(A)...) +Hessenberg(A::StridedMatrix, μ::Number=false) = Hessenberg(LAPACK.gehrd!(A)..., μ) + +copy(F::Hessenberg) = Hessenberg(copy(F.factors), copy(F.τ), F.μ) +size(F::Hessenberg, d) = size(F.factors, d) +size(F::Hessenberg) = size(F.factors) # iteration for destructuring into components Base.iterate(S::Hessenberg) = (S.Q, Val(:H)) @@ -40,7 +48,14 @@ matrix with `F.H`. When `Q` is extracted, the resulting type is the `HessenbergQ and may be converted to a regular matrix with [`convert(Array, _)`](@ref) (or `Array(_)` for short). -Iterating the decomposition produces the factors `F.Q` and `F.H`. +Note that the shifted factorization `A-μI = Q (H - μI) Q'` can be +constructed efficiently by `F - μ*I` using the [`UniformScaling`](@ref) +object [`I`](@ref), which creates a new `Hessenberg` with the same `Q` and `H` +(shared storage) and a modified shift. The shift of a given `F` is +obtained by `F.μ`. + +Iterating the decomposition produces the factors `F.Q` and `F.H` (not including +the shift `F.μ`). # Examples ```jldoctest @@ -89,10 +104,23 @@ Base.propertynames(F::Hessenberg, private::Bool=false) = ## reconstruct the original matrix Matrix{T}(Q::HessenbergQ) where {T} = convert(Matrix{T}, LAPACK.orghr!(1, size(Q.factors, 1), copy(Q.factors), Q.τ)) -AbstractMatrix(F::Hessenberg) = (fq = Array(F.Q); (fq * F.H) * fq') AbstractArray(F::Hessenberg) = AbstractMatrix(F) Matrix(F::Hessenberg) = Array(AbstractArray(F)) Array(F::Hessenberg) = Matrix(F) +function AbstractMatrix(F::Hessenberg{T,S,V}) where {T,S,V} + fq = Array(F.Q) + A = rmul!(lmul!(fq, F.H), fq') + if iszero(F.μ) + return A + elseif promote_type(T,V) <: T # can shift A in-place + for i = 1:size(A,1) + @inbounds A[i,i] -= F.μ + end + return A + else + return A - F.μ*I # allocate another matrix, e.g. if A is real and μ is complex + end +end lmul!(Q::HessenbergQ{T}, X::StridedVecOrMat{T}) where {T<:BlasFloat} = LAPACK.ormhr!('L', 'N', 1, size(Q.factors, 1), Q.factors, Q.τ, X) @@ -102,3 +130,96 @@ lmul!(adjQ::Adjoint{<:Any,<:HessenbergQ{T}}, X::StridedVecOrMat{T}) where {T<:Bl (Q = adjQ.parent; LAPACK.ormhr!('L', ifelse(T<:Real, 'T', 'C'), 1, size(Q.factors, 1), Q.factors, Q.τ, X)) rmul!(X::StridedMatrix{T}, adjQ::Adjoint{<:Any,<:HessenbergQ{T}}) where {T<:BlasFloat} = (Q = adjQ.parent; LAPACK.ormhr!('R', ifelse(T<:Real, 'T', 'C'), 1, size(Q.factors, 1), Q.factors, Q.τ, X)) + +# multiply x by the entries of M in the upper-k triangle, which contains +# the entries of the upper-Hessenberg matrix H for k=-1 +function rmul_triu!(M::AbstractMatrix, x, k::Integer=0) + require_one_based_indexing(M) + m, n = size(M) + for j = 1:n, i = 1:min(j-k,m) + @inbounds M[i,j] *= x + end + return M +end +function lmul_triu!(x, M::AbstractMatrix, k::Integer=0) + require_one_based_indexing(M) + m, n = size(M) + for j = 1:n, i = 1:min(j-k,m) + @inbounds M[i,j] = x * M[i,j] + end + return M +end + +# multiply Hessenberg by scalar +rmul!(F::Hessenberg{T}, x::T) where {T} = Hessenberg(rmul_triu!(F.factors, x, -1), F.τ, F.μ*x) +lmul!(x::T, F::Hessenberg{T}) where {T} = Hessenberg(lmul_triu!(x, F.factors, -1), F.τ, x*F.μ) +(*)(F::Hessenberg{T}, x::T) where {T} = rmul!(copy(F), x) +(*)(x::T, F::Hessenberg{T}) where {T} = lmul!(x, copy(F)) +function (*)(F::Hessenberg{T}, x::S) where {T,S} + TS = promote_type(T, S) + rmul!(Hessenberg{TS}(F), convert(TS, x)) +end +function (*)(x::S, F::Hessenberg{T}) where {T,S} + TS = promote_type(T, S) + return lmul!(convert(TS, x), Hessenberg{TS}(F)) +end +(-)(F::Hessenberg{T}) where {T} = F * -one(T) + +# shift Hessenberg by λI +(+)(F::Hessenberg, J::UniformScaling) = Hessenberg(F, F.μ + J.λ) +(+)(J::UniformScaling, F::Hessenberg) = Hessenberg(F, J.λ + F.μ) +(-)(F::Hessenberg, J::UniformScaling) = Hessenberg(F, F.μ - J.λ) +(-)(J::UniformScaling, F::Hessenberg{T}) where {T} = Hessenberg(-F, J.λ - F.μ) + +# Solving (H-µI)x = b: we can do this in O(n²) time and O(n) memory +# (in-place in x) by the RQ algorithm from: +# +# G. Henry, "The shifted Hessenberg system solve computation," Tech. Rep. 94–163, +# Center for Appl. Math., Cornell University (1994). +# +# as reviewed in +# +# C. Beattie et al., "A note on shifted Hessenberg systems and frequency +# response computation," ACM Trans. Math. Soft. 38, pp. 12:6–12:16 (2011) +# +# Essentially, it works by doing a Givens RQ factorization of H-µI from +# right to left, and doing backsubstitution *simultaneously*. + +# solve (H-μ)x = b, storing result in b +function ldiv_H!(F::Hessenberg, b::AbstractVector) + n = size(F, 1) + n != length(b) = throw(DimensionMismatch("wrong right-hand-side length != $n")) + require_one_based_indexing(b) + H = F.factors + μ = F.μ + u = H[:,n] # temporary vector + cs = Vector{Tuple{eltype(u),eltype(u)}}(undef, length(u)) # store Givens rotations + x = b # not a copy, just rename to match paper + u[n] -= μ + @inbounds for k = n:-1:2 + Φ, ρ = givens(H[k,k] - μ, H[k,k-1], 1,2) + cs[k] = c, s = Φ.c, Φ.s + x[k] /= ρ + t₁ = s * x[k]; t₂ = c * x[k] + @simd for j = 1:k-2 + x[j] -= u[j]*t₂ + H[j,k-1]*t₁ + u[j] = H[j,k-1]*c - s'u[j] + end + x[k-1] -= u[k-1]*t₂ + (H[k-1,k-1] - μ) * t₁ + u[k-1] = (H[k-1,k-1] - μ) * c - s'u[k-1] + end + τ₁ = x[1] / u[1] + @inbounds for k = 2:n + τ₂ = x[k] + c, s = cs[k] + x[k-1] = c*τ₁ + s*τ₂ + τ₁ = c*τ₂ - s'τ₁ + end + x[n] = τ₁ + return x +end + +function ldiv!(F::Hessenberg, b::AbstractVector) + Q = F.Q + return lmul!(Q, ldiv_H!(F, lmul!(Q', b))) +end \ No newline at end of file From 6ca092d18e2ad529d5086cc3bbdeba819c33d6e8 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Fri, 26 Apr 2019 22:10:07 -0400 Subject: [PATCH 02/40] fixes --- stdlib/LinearAlgebra/src/givens.jl | 2 + stdlib/LinearAlgebra/src/hessenberg.jl | 70 ++++++++++++++------------ 2 files changed, 41 insertions(+), 31 deletions(-) diff --git a/stdlib/LinearAlgebra/src/givens.jl b/stdlib/LinearAlgebra/src/givens.jl index fc5f48e99c2cc..3bf571d1c53e9 100644 --- a/stdlib/LinearAlgebra/src/givens.jl +++ b/stdlib/LinearAlgebra/src/givens.jl @@ -248,6 +248,8 @@ function givensAlgorithm(f::Complex{T}, g::Complex{T}) where T<:AbstractFloat return cs, sn, r end +givensAlgorithm(f, g) = givensAlgorithm(promote(float(f), float(g))...) + """ givens(f::T, g::T, i1::Integer, i2::Integer) where {T} -> (G::Givens, r::T) diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl index 32a76f6e0387b..a9632bb75068e 100644 --- a/stdlib/LinearAlgebra/src/hessenberg.jl +++ b/stdlib/LinearAlgebra/src/hessenberg.jl @@ -156,11 +156,11 @@ lmul!(x::T, F::Hessenberg{T}) where {T} = Hessenberg(lmul_triu!(x, F.factors, -1 (*)(F::Hessenberg{T}, x::T) where {T} = rmul!(copy(F), x) (*)(x::T, F::Hessenberg{T}) where {T} = lmul!(x, copy(F)) function (*)(F::Hessenberg{T}, x::S) where {T,S} - TS = promote_type(T, S) + TS = typeof(zero(T) * x) rmul!(Hessenberg{TS}(F), convert(TS, x)) end function (*)(x::S, F::Hessenberg{T}) where {T,S} - TS = promote_type(T, S) + TS = typeof(zero(T) * x) return lmul!(convert(TS, x), Hessenberg{TS}(F)) end (-)(F::Hessenberg{T}) where {T} = F * -one(T) @@ -185,41 +185,49 @@ end # Essentially, it works by doing a Givens RQ factorization of H-µI from # right to left, and doing backsubstitution *simultaneously*. -# solve (H-μ)x = b, storing result in b -function ldiv_H!(F::Hessenberg, b::AbstractVector) - n = size(F, 1) - n != length(b) = throw(DimensionMismatch("wrong right-hand-side length != $n")) - require_one_based_indexing(b) +# solve (H-μ)X = B, storing result in B +function ldiv_H!(F::Hessenberg, B::AbstractVecOrMat) + m = size(F,1) + m != size(B,1) && throw(DimensionMismatch("wrong right-hand-side length != $m")) + require_one_based_indexing(B) + n = size(B,2) H = F.factors μ = F.μ - u = H[:,n] # temporary vector - cs = Vector{Tuple{eltype(u),eltype(u)}}(undef, length(u)) # store Givens rotations - x = b # not a copy, just rename to match paper - u[n] -= μ - @inbounds for k = n:-1:2 - Φ, ρ = givens(H[k,k] - μ, H[k,k-1], 1,2) - cs[k] = c, s = Φ.c, Φ.s - x[k] /= ρ - t₁ = s * x[k]; t₂ = c * x[k] + u = Vector{typeof(zero(eltype(H))-μ)}(undef, m) # for last rotated col of H-μI + copyto!(u, 1, H, m*(m-1)+1, m) # u .= H[:,m] + u[m] -= μ + X = B # not a copy, just rename to match paper + cs = Vector{Tuple{real(eltype(u)),eltype(u)}}(undef, length(u)) # store Givens rotations + @inbounds for k = m:-1:2 + c, s, ρ = givensAlgorithm(u[k], H[k,k-1]) + cs[k] = (c, s) + for i = 1:n + X[k,i] /= ρ + t₁ = s * X[k,i]; t₂ = c * X[k,i] + @simd for j = 1:k-2 + X[j,i] -= u[j]*t₂ + H[j,k-1]*t₁ + end + X[k-1,i] -= u[k-1]*t₂ + (H[k-1,k-1] - μ) * t₁ + end @simd for j = 1:k-2 - x[j] -= u[j]*t₂ + H[j,k-1]*t₁ - u[j] = H[j,k-1]*c - s'u[j] + u[j] = H[j,k-1]*c - u[j]*s' end - x[k-1] -= u[k-1]*t₂ + (H[k-1,k-1] - μ) * t₁ - u[k-1] = (H[k-1,k-1] - μ) * c - s'u[k-1] + u[k-1] = (H[k-1,k-1] - μ) * c - u[k-1]*s' end - τ₁ = x[1] / u[1] - @inbounds for k = 2:n - τ₂ = x[k] - c, s = cs[k] - x[k-1] = c*τ₁ + s*τ₂ - τ₁ = c*τ₂ - s'τ₁ + for i = 1:n + τ₁ = X[1,i] / u[1] + @inbounds for j = 2:m + τ₂ = X[j,i] + c, s = cs[j] + X[j-1,i] = c*τ₁ + s*τ₂ + τ₁ = c*τ₂ - s'τ₁ + end + X[m,i] = τ₁ end - x[n] = τ₁ - return x + return X end -function ldiv!(F::Hessenberg, b::AbstractVector) +function ldiv!(F::Hessenberg, B::AbstractVecOrMat) Q = F.Q - return lmul!(Q, ldiv_H!(F, lmul!(Q', b))) -end \ No newline at end of file + return lmul!(Q, ldiv_H!(F, lmul!(Q', B))) +end From c914120b6fdcd2ad50f28e772d192017b36e2b62 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Fri, 26 Apr 2019 22:58:45 -0400 Subject: [PATCH 03/40] more fixes and tests --- stdlib/LinearAlgebra/src/LinearAlgebra.jl | 2 +- stdlib/LinearAlgebra/src/hessenberg.jl | 41 ++++++++++++----------- stdlib/LinearAlgebra/src/qr.jl | 2 +- stdlib/LinearAlgebra/test/hessenberg.jl | 35 ++++++++++++------- 4 files changed, 46 insertions(+), 34 deletions(-) diff --git a/stdlib/LinearAlgebra/src/LinearAlgebra.jl b/stdlib/LinearAlgebra/src/LinearAlgebra.jl index dc11793bb085a..aba5d603fb5a5 100644 --- a/stdlib/LinearAlgebra/src/LinearAlgebra.jl +++ b/stdlib/LinearAlgebra/src/LinearAlgebra.jl @@ -356,7 +356,6 @@ include("triangular.jl") include("factorization.jl") include("qr.jl") -include("hessenberg.jl") include("lq.jl") include("eigen.jl") include("svd.jl") @@ -367,6 +366,7 @@ include("bunchkaufman.jl") include("diagonal.jl") include("bidiag.jl") include("uniformscaling.jl") +include("hessenberg.jl") include("givens.jl") include("special.jl") include("bitarray.jl") diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl index a9632bb75068e..ede3e7799794a 100644 --- a/stdlib/LinearAlgebra/src/hessenberg.jl +++ b/stdlib/LinearAlgebra/src/hessenberg.jl @@ -3,11 +3,11 @@ struct Hessenberg{T,S<:AbstractMatrix{T},V<:Number} <: Factorization{T} factors::S τ::Vector{T} - μ::V # represents a shifted Hessenberg matrix H-μI + μ::V # represents a shifted Hessenberg matrix H+μI function Hessenberg{T,S,V}(factors, τ, μ::V=zero(V)) where {T,S<:AbstractMatrix{T},V<:Number} require_one_based_indexing(factors, τ) - new{T,S}(factors, τ, μ) + new{T,S,V}(factors, τ, μ) end end Hessenberg(factors::AbstractMatrix{T}, τ::Vector{T}, μ::V=false) where {T,V<:Number} = Hessenberg{T,typeof(factors),V}(factors, τ, μ) @@ -48,8 +48,8 @@ matrix with `F.H`. When `Q` is extracted, the resulting type is the `HessenbergQ and may be converted to a regular matrix with [`convert(Array, _)`](@ref) (or `Array(_)` for short). -Note that the shifted factorization `A-μI = Q (H - μI) Q'` can be -constructed efficiently by `F - μ*I` using the [`UniformScaling`](@ref) +Note that the shifted factorization `A+μI = Q (H - μI) Q'` can be +constructed efficiently by `F + μ*I` using the [`UniformScaling`](@ref) object [`I`](@ref), which creates a new `Hessenberg` with the same `Q` and `H` (shared storage) and a modified shift. The shift of a given `F` is obtained by `F.μ`. @@ -108,17 +108,17 @@ AbstractArray(F::Hessenberg) = AbstractMatrix(F) Matrix(F::Hessenberg) = Array(AbstractArray(F)) Array(F::Hessenberg) = Matrix(F) function AbstractMatrix(F::Hessenberg{T,S,V}) where {T,S,V} - fq = Array(F.Q) + fq = F.Q A = rmul!(lmul!(fq, F.H), fq') if iszero(F.μ) return A elseif promote_type(T,V) <: T # can shift A in-place for i = 1:size(A,1) - @inbounds A[i,i] -= F.μ + @inbounds A[i,i] += F.μ end return A else - return A - F.μ*I # allocate another matrix, e.g. if A is real and μ is complex + return A + F.μ*I # allocate another matrix, e.g. if A is real and μ is complex end end @@ -153,8 +153,8 @@ end # multiply Hessenberg by scalar rmul!(F::Hessenberg{T}, x::T) where {T} = Hessenberg(rmul_triu!(F.factors, x, -1), F.τ, F.μ*x) lmul!(x::T, F::Hessenberg{T}) where {T} = Hessenberg(lmul_triu!(x, F.factors, -1), F.τ, x*F.μ) -(*)(F::Hessenberg{T}, x::T) where {T} = rmul!(copy(F), x) -(*)(x::T, F::Hessenberg{T}) where {T} = lmul!(x, copy(F)) +*(F::Hessenberg{T}, x::T) where {T} = rmul!(copy(F), x) +*(x::T, F::Hessenberg{T}) where {T} = lmul!(x, copy(F)) function (*)(F::Hessenberg{T}, x::S) where {T,S} TS = typeof(zero(T) * x) rmul!(Hessenberg{TS}(F), convert(TS, x)) @@ -163,13 +163,13 @@ function (*)(x::S, F::Hessenberg{T}) where {T,S} TS = typeof(zero(T) * x) return lmul!(convert(TS, x), Hessenberg{TS}(F)) end -(-)(F::Hessenberg{T}) where {T} = F * -one(T) +-(F::Hessenberg{T}) where {T} = F * -one(T) # shift Hessenberg by λI -(+)(F::Hessenberg, J::UniformScaling) = Hessenberg(F, F.μ + J.λ) -(+)(J::UniformScaling, F::Hessenberg) = Hessenberg(F, J.λ + F.μ) -(-)(F::Hessenberg, J::UniformScaling) = Hessenberg(F, F.μ - J.λ) -(-)(J::UniformScaling, F::Hessenberg{T}) where {T} = Hessenberg(-F, J.λ - F.μ) ++(F::Hessenberg, J::UniformScaling) = Hessenberg(F, F.μ + J.λ) ++(J::UniformScaling, F::Hessenberg) = Hessenberg(F, J.λ + F.μ) +-(F::Hessenberg, J::UniformScaling) = Hessenberg(F, F.μ - J.λ) +-(J::UniformScaling, F::Hessenberg{T}) where {T} = Hessenberg(-F, J.λ - F.μ) # Solving (H-µI)x = b: we can do this in O(n²) time and O(n) memory # (in-place in x) by the RQ algorithm from: @@ -182,10 +182,13 @@ end # C. Beattie et al., "A note on shifted Hessenberg systems and frequency # response computation," ACM Trans. Math. Soft. 38, pp. 12:6–12:16 (2011) # +# (Note, however, that there is apparently a typo in Algorithm 1 of the +# Beattie paper: the Givens rotation uses u(k), not H(k,k) - σ.) +# # Essentially, it works by doing a Givens RQ factorization of H-µI from # right to left, and doing backsubstitution *simultaneously*. -# solve (H-μ)X = B, storing result in B +# solve (H+μ)X = B, storing result in B function ldiv_H!(F::Hessenberg, B::AbstractVecOrMat) m = size(F,1) m != size(B,1) && throw(DimensionMismatch("wrong right-hand-side length != $m")) @@ -193,9 +196,9 @@ function ldiv_H!(F::Hessenberg, B::AbstractVecOrMat) n = size(B,2) H = F.factors μ = F.μ - u = Vector{typeof(zero(eltype(H))-μ)}(undef, m) # for last rotated col of H-μI + u = Vector{typeof(zero(eltype(H))+μ)}(undef, m) # for last rotated col of H-μI copyto!(u, 1, H, m*(m-1)+1, m) # u .= H[:,m] - u[m] -= μ + u[m] += μ X = B # not a copy, just rename to match paper cs = Vector{Tuple{real(eltype(u)),eltype(u)}}(undef, length(u)) # store Givens rotations @inbounds for k = m:-1:2 @@ -207,12 +210,12 @@ function ldiv_H!(F::Hessenberg, B::AbstractVecOrMat) @simd for j = 1:k-2 X[j,i] -= u[j]*t₂ + H[j,k-1]*t₁ end - X[k-1,i] -= u[k-1]*t₂ + (H[k-1,k-1] - μ) * t₁ + X[k-1,i] -= u[k-1]*t₂ + (H[k-1,k-1] + μ) * t₁ end @simd for j = 1:k-2 u[j] = H[j,k-1]*c - u[j]*s' end - u[k-1] = (H[k-1,k-1] - μ) * c - u[k-1]*s' + u[k-1] = (H[k-1,k-1] + μ) * c - u[k-1]*s' end for i = 1:n τ₁ = X[1,i] / u[1] diff --git a/stdlib/LinearAlgebra/src/qr.jl b/stdlib/LinearAlgebra/src/qr.jl index 97f3683e6336c..9ebbfdfd7640c 100644 --- a/stdlib/LinearAlgebra/src/qr.jl +++ b/stdlib/LinearAlgebra/src/qr.jl @@ -1,6 +1,6 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license -# QR and Hessenberg Factorizations +# QR Factorization """ QR <: Factorization diff --git a/stdlib/LinearAlgebra/test/hessenberg.jl b/stdlib/LinearAlgebra/test/hessenberg.jl index 4442c11282fef..c6081eb2484e9 100644 --- a/stdlib/LinearAlgebra/test/hessenberg.jl +++ b/stdlib/LinearAlgebra/test/hessenberg.jl @@ -9,26 +9,35 @@ let n = 10 Areal = randn(n,n)/2 Aimg = randn(n,n)/2 + b = randn(n) + B = randn(n,3) - @testset for eltya in (Float32, Float64, ComplexF32, ComplexF64, BigFloat, Int) + @testset for eltya in (Float32, Float64, ComplexF32, ComplexF64, Int) A = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(Areal, Aimg) : Areal) - if eltya != BigFloat - H = hessenberg(A) - @test size(H.Q, 1) == size(A, 1) - @test size(H.Q, 2) == size(A, 2) - @test size(H.Q) == size(A) - @test_throws ErrorException H.Z - @test convert(Array, H) ≈ A - @test (H.Q * H.H) * H.Q' ≈ A - @test (H.Q' *A) * H.Q ≈ H.H - #getindex for HessenbergQ - @test H.Q[1,1] ≈ Array(H.Q)[1,1] - end + H = hessenberg(A) + @test size(H.Q, 1) == size(A, 1) + @test size(H.Q, 2) == size(A, 2) + @test size(H.Q) == size(A) + @test_throws ErrorException H.Z + @test convert(Array, H) ≈ A + @test (H.Q * H.H) * H.Q' ≈ A + @test (H.Q' *A) * H.Q ≈ H.H + #getindex for HessenbergQ + @test H.Q[1,1] ≈ Array(H.Q)[1,1] + + @test convert(Array, 2.3 * H) ≈ 2.3 * A ≈ convert(Array, H * 2.3) + @test convert(Array, H + 2.3I) ≈ A + 2.3I ≈ convert(Array, 2.3I + H) + @test convert(Array, H + (2.3+4im)*I) ≈ A + (2.3+4im)*I ≈ convert(Array, (2.3+4im)*I + H) + @test convert(Array, H - 2.3I) ≈ A - 2.3I ≈ -convert(Array, 2.3I - H) + @test convert(Array, -H) == -convert(Array, H) + @test H \ b ≈ A \ b + @test H \ B ≈ A \ B + @test (H - (3+4im)*I) \ B ≈ (A - (3+4im)*I) \ B end end From 02c4c02ec842238e963295914d5c3e3671f32e3e Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Fri, 26 Apr 2019 23:36:56 -0400 Subject: [PATCH 04/40] fixes and tests --- stdlib/LinearAlgebra/src/hessenberg.jl | 51 +++++++++++++------------ stdlib/LinearAlgebra/test/hessenberg.jl | 18 +++++---- 2 files changed, 38 insertions(+), 31 deletions(-) diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl index ede3e7799794a..86db536fe3cab 100644 --- a/stdlib/LinearAlgebra/src/hessenberg.jl +++ b/stdlib/LinearAlgebra/src/hessenberg.jl @@ -1,22 +1,17 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license -struct Hessenberg{T,S<:AbstractMatrix{T},V<:Number} <: Factorization{T} +struct Hessenberg{T,TH,S<:AbstractMatrix{TH},V<:Number} <: Factorization{T} factors::S - τ::Vector{T} + τ::Vector{TH} μ::V # represents a shifted Hessenberg matrix H+μI - function Hessenberg{T,S,V}(factors, τ, μ::V=zero(V)) where {T,S<:AbstractMatrix{T},V<:Number} + function Hessenberg{T,TH,S,V}(factors, τ, μ::V=zero(V)) where {T,TH,S<:AbstractMatrix{TH},V<:Number} require_one_based_indexing(factors, τ) - new{T,S,V}(factors, τ, μ) + new{T,TH,S,V}(factors, τ, μ) end end -Hessenberg(factors::AbstractMatrix{T}, τ::Vector{T}, μ::V=false) where {T,V<:Number} = Hessenberg{T,typeof(factors),V}(factors, τ, μ) -function Hessenberg{T}(factors::AbstractMatrix, τ::AbstractVector, μ::V=false) where {T,V<:Number} - Hessenberg(convert(AbstractMatrix{T}, factors), convert(Vector{T}, v), μ) -end +Hessenberg(factors::AbstractMatrix{T}, τ::Vector{T}, μ::V=false) where {T,V<:Number} = Hessenberg{promote_type(T,V),T,typeof(factors),V}(factors, τ, μ) Hessenberg(F::Hessenberg, μ::Number=F.μ) = Hessenberg(F.factors, F.τ, μ) -Hessenberg{T}(F::Hessenberg{T}) where {T} = copy(F) # copying simplifies promotion logic below -Hessenberg{T}(F::Hessenberg) where {T} = Hessenberg{T}(F.factors, F.τ, F.μ) Hessenberg(A::StridedMatrix, μ::Number=false) = Hessenberg(LAPACK.gehrd!(A)..., μ) @@ -107,12 +102,12 @@ Matrix{T}(Q::HessenbergQ) where {T} = convert(Matrix{T}, LAPACK.orghr!(1, size(Q AbstractArray(F::Hessenberg) = AbstractMatrix(F) Matrix(F::Hessenberg) = Array(AbstractArray(F)) Array(F::Hessenberg) = Matrix(F) -function AbstractMatrix(F::Hessenberg{T,S,V}) where {T,S,V} - fq = F.Q - A = rmul!(lmul!(fq, F.H), fq') +function AbstractMatrix(F::Hessenberg) + Q = F.Q + A = rmul!(lmul!(Q, F.H), Q') if iszero(F.μ) return A - elseif promote_type(T,V) <: T # can shift A in-place + elseif typeof(zero(eltype(A))+F.μ) <: eltype(A) # can shift A in-place for i = 1:size(A,1) @inbounds A[i,i] += F.μ end @@ -151,25 +146,33 @@ function lmul_triu!(x, M::AbstractMatrix, k::Integer=0) end # multiply Hessenberg by scalar -rmul!(F::Hessenberg{T}, x::T) where {T} = Hessenberg(rmul_triu!(F.factors, x, -1), F.τ, F.μ*x) -lmul!(x::T, F::Hessenberg{T}) where {T} = Hessenberg(lmul_triu!(x, F.factors, -1), F.τ, x*F.μ) -*(F::Hessenberg{T}, x::T) where {T} = rmul!(copy(F), x) -*(x::T, F::Hessenberg{T}) where {T} = lmul!(x, copy(F)) -function (*)(F::Hessenberg{T}, x::S) where {T,S} +rmul!(F::Hessenberg{<:Any,T}, x::T) where {T} = Hessenberg(rmul_triu!(F.factors, x, -1), F.τ, F.μ*x) +lmul!(x::T, F::Hessenberg{<:Any,T}) where {T} = Hessenberg(lmul_triu!(x, F.factors, -1), F.τ, x*F.μ) +*(F::Hessenberg{<:Any,T}, x::T) where {T} = rmul!(copy(F), x) +*(x::T, F::Hessenberg{<:Any,T}) where {T} = lmul!(x, copy(F)) +function (*)(F::Hessenberg{<:Any,T}, x::S) where {T,S} TS = typeof(zero(T) * x) - rmul!(Hessenberg{TS}(F), convert(TS, x)) + if TS === T + return rmul!(copy(F), convert(T, x)) + else + return rmul!(Hessenberg(Matrix{TS}(F.factors), Vector{TS}(F.τ), F.μ), convert(TS, x)) + end end -function (*)(x::S, F::Hessenberg{T}) where {T,S} +function (*)(x::S, F::Hessenberg{<:Any,T}) where {T,S} TS = typeof(zero(T) * x) - return lmul!(convert(TS, x), Hessenberg{TS}(F)) + if TS === T + return lmul!(convert(T, x), copy(F)) + else + return lmul!(convert(TS, x), Hessenberg(Matrix{TS}(F.factors), Vector{TS}(F.τ), F.μ)) + end end --(F::Hessenberg{T}) where {T} = F * -one(T) +-(F::Hessenberg{<:Any,T}) where {T} = F * -one(T) # shift Hessenberg by λI +(F::Hessenberg, J::UniformScaling) = Hessenberg(F, F.μ + J.λ) +(J::UniformScaling, F::Hessenberg) = Hessenberg(F, J.λ + F.μ) -(F::Hessenberg, J::UniformScaling) = Hessenberg(F, F.μ - J.λ) --(J::UniformScaling, F::Hessenberg{T}) where {T} = Hessenberg(-F, J.λ - F.μ) +-(J::UniformScaling, F::Hessenberg) = Hessenberg(-F, J.λ - F.μ) # Solving (H-µI)x = b: we can do this in O(n²) time and O(n) memory # (in-place in x) by the RQ algorithm from: diff --git a/stdlib/LinearAlgebra/test/hessenberg.jl b/stdlib/LinearAlgebra/test/hessenberg.jl index c6081eb2484e9..127b4e3161c71 100644 --- a/stdlib/LinearAlgebra/test/hessenberg.jl +++ b/stdlib/LinearAlgebra/test/hessenberg.jl @@ -9,8 +9,8 @@ let n = 10 Areal = randn(n,n)/2 Aimg = randn(n,n)/2 - b = randn(n) - B = randn(n,3) + b_ = randn(n) + B_ = randn(n,3) @testset for eltya in (Float32, Float64, ComplexF32, ComplexF64, Int) A = eltya == Int ? @@ -20,6 +20,7 @@ let n = 10 Areal) H = hessenberg(A) + eltyh = eltype(H) @test size(H.Q, 1) == size(A, 1) @test size(H.Q, 2) == size(A, 2) @test size(H.Q) == size(A) @@ -30,14 +31,17 @@ let n = 10 #getindex for HessenbergQ @test H.Q[1,1] ≈ Array(H.Q)[1,1] - @test convert(Array, 2.3 * H) ≈ 2.3 * A ≈ convert(Array, H * 2.3) - @test convert(Array, H + 2.3I) ≈ A + 2.3I ≈ convert(Array, 2.3I + H) - @test convert(Array, H + (2.3+4im)*I) ≈ A + (2.3+4im)*I ≈ convert(Array, (2.3+4im)*I + H) - @test convert(Array, H - 2.3I) ≈ A - 2.3I ≈ -convert(Array, 2.3I - H) + @test convert(Array, 2 * H) ≈ 2 * A ≈ convert(Array, H * 2) + @test convert(Array, H + 2I) ≈ A + 2I ≈ convert(Array, 2I + H) + @test convert(Array, H + (2+4im)*I) ≈ A + (2+4im)*I ≈ convert(Array, (2+4im)*I + H) + @test convert(Array, H - 2I) ≈ A - 2I ≈ -convert(Array, 2I - H) @test convert(Array, -H) == -convert(Array, H) + + b = convert(Vector{eltype(H)}, b_) + B = convert(Matrix{eltype(H)}, B_) @test H \ b ≈ A \ b @test H \ B ≈ A \ B - @test (H - (3+4im)*I) \ B ≈ (A - (3+4im)*I) \ B + @test (H - I) \ B ≈ (A - I) \ B end end From 7e1e452d9a3e24a591ff62d2188f2d071fe809da Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sat, 27 Apr 2019 09:09:01 -0400 Subject: [PATCH 05/40] fix ambiguity --- stdlib/LinearAlgebra/src/hessenberg.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl index 86db536fe3cab..8a077c85177a9 100644 --- a/stdlib/LinearAlgebra/src/hessenberg.jl +++ b/stdlib/LinearAlgebra/src/hessenberg.jl @@ -150,7 +150,7 @@ rmul!(F::Hessenberg{<:Any,T}, x::T) where {T} = Hessenberg(rmul_triu!(F.factors, lmul!(x::T, F::Hessenberg{<:Any,T}) where {T} = Hessenberg(lmul_triu!(x, F.factors, -1), F.τ, x*F.μ) *(F::Hessenberg{<:Any,T}, x::T) where {T} = rmul!(copy(F), x) *(x::T, F::Hessenberg{<:Any,T}) where {T} = lmul!(x, copy(F)) -function (*)(F::Hessenberg{<:Any,T}, x::S) where {T,S} +function (*)(F::Hessenberg{<:Any,T}, x::S) where {T,S<:Number} TS = typeof(zero(T) * x) if TS === T return rmul!(copy(F), convert(T, x)) @@ -158,7 +158,7 @@ function (*)(F::Hessenberg{<:Any,T}, x::S) where {T,S} return rmul!(Hessenberg(Matrix{TS}(F.factors), Vector{TS}(F.τ), F.μ), convert(TS, x)) end end -function (*)(x::S, F::Hessenberg{<:Any,T}) where {T,S} +function (*)(x::S, F::Hessenberg{<:Any,T}) where {T,S<:Number} TS = typeof(zero(T) * x) if TS === T return lmul!(convert(T, x), copy(F)) From 600abb42b6eb771f4adc48eefca8074ecdc1cfa5 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sat, 27 Apr 2019 09:48:24 -0400 Subject: [PATCH 06/40] fix complex shift, pretty-print --- stdlib/LinearAlgebra/src/hessenberg.jl | 26 +++++++++++++++++++++++++ stdlib/LinearAlgebra/test/hessenberg.jl | 3 ++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl index 8a077c85177a9..02467cd15b281 100644 --- a/stdlib/LinearAlgebra/src/hessenberg.jl +++ b/stdlib/LinearAlgebra/src/hessenberg.jl @@ -77,6 +77,20 @@ true hessenberg(A::StridedMatrix{T}) where T = hessenberg!(copy_oftype(A, eigtype(T))) + +function show(io::IO, mime::MIME"text/plain", F::Hessenberg) + summary(io, F); println(io) + if iszero(F.μ) + println(io, "Factorization QHQ': ") + else + println(io, "Factorization Q(H+μI)Q' for shift μ = ", F.μ, ':') + end + println(io, "Q factor:") + show(io, mime, F.Q) + println(io, "\nH factor:") + show(io, mime, F.H) +end + struct HessenbergQ{T,S<:AbstractMatrix} <: AbstractQ{T} factors::S τ::Vector{T} @@ -237,3 +251,15 @@ function ldiv!(F::Hessenberg, B::AbstractVecOrMat) Q = F.Q return lmul!(Q, ldiv_H!(F, lmul!(Q', B))) end + +# handle case of real H and complex μ — we need to work around the +# fact that we can't multiple a real F.Q by a complex matrix directly in LAPACK +function ldiv!(F::Hessenberg{<:Complex,<:Real}, B::AbstractVecOrMat{<:Complex}) + Q = F.Q + Br = lmul!(Q', real(B)) + Bi = lmul!(Q', imag(B)) + ldiv_H!(F, B .= Complex.(Br,Bi)) + Br = lmul!(Q, real(B)) + Bi = lmul!(Q, imag(B)) + return B .= Complex.(Br,Bi) +end diff --git a/stdlib/LinearAlgebra/test/hessenberg.jl b/stdlib/LinearAlgebra/test/hessenberg.jl index 127b4e3161c71..8e9f95703cfdf 100644 --- a/stdlib/LinearAlgebra/test/hessenberg.jl +++ b/stdlib/LinearAlgebra/test/hessenberg.jl @@ -33,7 +33,7 @@ let n = 10 @test convert(Array, 2 * H) ≈ 2 * A ≈ convert(Array, H * 2) @test convert(Array, H + 2I) ≈ A + 2I ≈ convert(Array, 2I + H) - @test convert(Array, H + (2+4im)*I) ≈ A + (2+4im)*I ≈ convert(Array, (2+4im)*I + H) + @test convert(Array, H + (2+4im)I) ≈ A + (2+4im)I ≈ convert(Array, (2+4im)I + H) @test convert(Array, H - 2I) ≈ A - 2I ≈ -convert(Array, 2I - H) @test convert(Array, -H) == -convert(Array, H) @@ -42,6 +42,7 @@ let n = 10 @test H \ b ≈ A \ b @test H \ B ≈ A \ B @test (H - I) \ B ≈ (A - I) \ B + @test (H - (3+4im)I) \ B ≈ (A - (3+4im)I) \ B end end From c029fd927c69151420a91d6970a322c6a7dda8e9 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sat, 27 Apr 2019 14:56:57 -0400 Subject: [PATCH 07/40] ambiguity fix --- stdlib/LinearAlgebra/src/hessenberg.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl index 02467cd15b281..af4d4cce397ac 100644 --- a/stdlib/LinearAlgebra/src/hessenberg.jl +++ b/stdlib/LinearAlgebra/src/hessenberg.jl @@ -160,10 +160,10 @@ function lmul_triu!(x, M::AbstractMatrix, k::Integer=0) end # multiply Hessenberg by scalar -rmul!(F::Hessenberg{<:Any,T}, x::T) where {T} = Hessenberg(rmul_triu!(F.factors, x, -1), F.τ, F.μ*x) -lmul!(x::T, F::Hessenberg{<:Any,T}) where {T} = Hessenberg(lmul_triu!(x, F.factors, -1), F.τ, x*F.μ) -*(F::Hessenberg{<:Any,T}, x::T) where {T} = rmul!(copy(F), x) -*(x::T, F::Hessenberg{<:Any,T}) where {T} = lmul!(x, copy(F)) +rmul!(F::Hessenberg{<:Any,T}, x::T) where {T<:Number} = Hessenberg(rmul_triu!(F.factors, x, -1), F.τ, F.μ*x) +lmul!(x::T, F::Hessenberg{<:Any,T}) where {T<:Number} = Hessenberg(lmul_triu!(x, F.factors, -1), F.τ, x*F.μ) +*(F::Hessenberg{<:Any,T}, x::T) where {T<:Number} = rmul!(copy(F), x) +*(x::T, F::Hessenberg{<:Any,T}) where {T<:Number} = lmul!(x, copy(F)) function (*)(F::Hessenberg{<:Any,T}, x::S) where {T,S<:Number} TS = typeof(zero(T) * x) if TS === T From 98abf8ff81ed361417430b59fa72d17eadbc65cf Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sat, 27 Apr 2019 15:39:02 -0400 Subject: [PATCH 08/40] rdiv support --- stdlib/LinearAlgebra/src/factorization.jl | 26 ++++++++- stdlib/LinearAlgebra/src/hessenberg.jl | 71 +++++++++++++++++++++-- stdlib/LinearAlgebra/test/hessenberg.jl | 8 ++- 3 files changed, 98 insertions(+), 7 deletions(-) diff --git a/stdlib/LinearAlgebra/src/factorization.jl b/stdlib/LinearAlgebra/src/factorization.jl index 7ac4441e893d4..0d6f647840c70 100644 --- a/stdlib/LinearAlgebra/src/factorization.jl +++ b/stdlib/LinearAlgebra/src/factorization.jl @@ -71,13 +71,18 @@ function Base.show(io::IO, ::MIME"text/plain", x::Transpose{<:Any,<:Factorizatio end # With a real lhs and complex rhs with the same precision, we can reinterpret -# the complex rhs as a real rhs with twice the number of columns +# the complex rhs as a real rhs with twice the number of columns or rows function (\)(F::Factorization{T}, B::VecOrMat{Complex{T}}) where T<:BlasReal require_one_based_indexing(B) c2r = reshape(copy(transpose(reinterpret(T, reshape(B, (1, length(B)))))), size(B, 1), 2*size(B, 2)) x = ldiv!(F, c2r) return reshape(copy(reinterpret(Complex{T}, copy(transpose(reshape(x, div(length(x), 2), 2))))), _ret_size(F, B)) end +function (/)(B::VecOrMat{Complex{T}}, F::Factorization{T}) where T<:BlasReal + require_one_based_indexing(B) + x = rdiv!(copy(reinterpret(T, B)), F) + return copy(reinterpret(Complex{T}, x)) +end function \(F::Factorization, B::AbstractVecOrMat) require_one_based_indexing(B) @@ -95,6 +100,22 @@ function \(adjF::Adjoint{<:Any,<:Factorization}, B::AbstractVecOrMat) ldiv!(adjoint(F), BB) end +function /(B::AbstractMatrix, F::Factorization) + require_one_based_indexing(B) + TFB = typeof(oneunit(eltype(B)) / oneunit(eltype(F))) + BB = similar(B, TFB, size(B)) + copyto!(BB, B) + rdiv!(BB, F) +end +function /(B::AbstractMatrix, adjF::Adjoint{<:Any,<:Factorization}) + require_one_based_indexing(B) + F = adjF.parent + TFB = typeof(oneunit(eltype(B)) / oneunit(eltype(F))) + BB = similar(B, TFB, size(B)) + copyto!(BB, B) + rdiv!(BB, adjoint(F)) +end + # support the same 3-arg idiom as in our other in-place A_*_B functions: function ldiv!(Y::AbstractVecOrMat, A::Factorization, B::AbstractVecOrMat) require_one_based_indexing(Y, B) @@ -120,3 +141,6 @@ end # fallback methods for transposed solves \(F::Transpose{<:Any,<:Factorization{<:Real}}, B::AbstractVecOrMat) = adjoint(F.parent) \ B \(F::Transpose{<:Any,<:Factorization}, B::AbstractVecOrMat) = conj.(adjoint(F.parent) \ conj.(B)) + +/(B::AbstractMatrix, F::Transpose{<:Any,<:Factorization{<:Real}}) = B / adjoint(F.parent) +/(B::AbstractMatrix, F::Transpose{<:Any,<:Factorization}) = conj.(conj.(B) / adjoint(F.parent)) diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl index af4d4cce397ac..5174009e70919 100644 --- a/stdlib/LinearAlgebra/src/hessenberg.jl +++ b/stdlib/LinearAlgebra/src/hessenberg.jl @@ -188,7 +188,7 @@ end -(F::Hessenberg, J::UniformScaling) = Hessenberg(F, F.μ - J.λ) -(J::UniformScaling, F::Hessenberg) = Hessenberg(-F, J.λ - F.μ) -# Solving (H-µI)x = b: we can do this in O(n²) time and O(n) memory +# Solving (H+µI)x = b: we can do this in O(n²) time and O(n) memory # (in-place in x) by the RQ algorithm from: # # G. Henry, "The shifted Hessenberg system solve computation," Tech. Rep. 94–163, @@ -202,13 +202,13 @@ end # (Note, however, that there is apparently a typo in Algorithm 1 of the # Beattie paper: the Givens rotation uses u(k), not H(k,k) - σ.) # -# Essentially, it works by doing a Givens RQ factorization of H-µI from +# Essentially, it works by doing a Givens RQ factorization of H+µI from # right to left, and doing backsubstitution *simultaneously*. -# solve (H+μ)X = B, storing result in B +# solve (H+μI)X = B, storing result in B function ldiv_H!(F::Hessenberg, B::AbstractVecOrMat) m = size(F,1) - m != size(B,1) && throw(DimensionMismatch("wrong right-hand-side length != $m")) + m != size(B,1) && throw(DimensionMismatch("wrong right-hand-side # rows != $m")) require_one_based_indexing(B) n = size(B,2) H = F.factors @@ -247,11 +247,65 @@ function ldiv_H!(F::Hessenberg, B::AbstractVecOrMat) return X end +# solve X(H+μI) = B, storing result in B +# +# Note: this can be derived from the Henry (1994) algorithm +# by transformation to F(Hᵀ+µI)F FXᵀ = FBᵀ, where +# F is the permutation matrix that reverses the order +# of rows/cols. Essentially, we take the ldiv_H! algorithm, +# swap indices of H and X to transpose, and reverse the +# order of the H indices (or the order of the loops). +function rdiv_H!(B::AbstractMatrix, F::Hessenberg) + m = size(F,1) + m != size(B,2) && throw(DimensionMismatch("wrong right-hand-side # cols != $m")) + require_one_based_indexing(B) + n = size(B,1) + H = F.factors + μ = F.μ + u = Vector{typeof(zero(eltype(H))+μ)}(undef, m) # for last rotated row of H-μI + u .= @view H[1,:] + u[1] += μ + X = B # not a copy, just rename to match paper + cs = Vector{Tuple{real(eltype(u)),eltype(u)}}(undef, length(u)) # store Givens rotations + @inbounds for k = 1:m-1 + c, s, ρ = givensAlgorithm(u[k], H[k+1,k]) + cs[k] = (c, s) + for i = 1:n + X[i,k] /= ρ + t₁ = s * X[i,k]; t₂ = c * X[i,k] + @simd for j = k+2:m + X[i,j] -= u[j]*t₂ + H[k+1,j]*t₁ + end + X[i,k+1] -= u[k+1]*t₂ + (H[k+1,k+1] + μ) * t₁ + end + @simd for j = k+2:m + u[j] = H[k+1,j]*c - u[j]*s' + end + u[k+1] = (H[k+1,k+1] + μ) * c - u[k+1]*s' + end + for i = 1:n + τ₁ = X[i,m] / u[m] + @inbounds for j = m-1:-1:1 + τ₂ = X[i,j] + c, s = cs[j] + X[i,j+1] = c*τ₁ + s*τ₂ + τ₁ = c*τ₂ - s'τ₁ + end + X[i,1] = τ₁ + end + return X +end + function ldiv!(F::Hessenberg, B::AbstractVecOrMat) Q = F.Q return lmul!(Q, ldiv_H!(F, lmul!(Q', B))) end +function rdiv!(B::AbstractMatrix, F::Hessenberg) + Q = F.Q + return rmul!(rdiv_H!(rmul!(B, Q), F), Q') +end + # handle case of real H and complex μ — we need to work around the # fact that we can't multiple a real F.Q by a complex matrix directly in LAPACK function ldiv!(F::Hessenberg{<:Complex,<:Real}, B::AbstractVecOrMat{<:Complex}) @@ -263,3 +317,12 @@ function ldiv!(F::Hessenberg{<:Complex,<:Real}, B::AbstractVecOrMat{<:Complex}) Bi = lmul!(Q, imag(B)) return B .= Complex.(Br,Bi) end +function rdiv!(B::AbstractVecOrMat{<:Complex}, F::Hessenberg{<:Complex,<:Real}) + Q = F.Q + Br = rmul!(real(B), Q) + Bi = rmul!(imag(B), Q) + rdiv_H!(B .= Complex.(Br,Bi), F) + Br = rmul!(real(B), Q') + Bi = rmul!(imag(B), Q') + return B .= Complex.(Br,Bi) +end diff --git a/stdlib/LinearAlgebra/test/hessenberg.jl b/stdlib/LinearAlgebra/test/hessenberg.jl index 8e9f95703cfdf..f708d3bf8fed3 100644 --- a/stdlib/LinearAlgebra/test/hessenberg.jl +++ b/stdlib/LinearAlgebra/test/hessenberg.jl @@ -39,10 +39,14 @@ let n = 10 b = convert(Vector{eltype(H)}, b_) B = convert(Matrix{eltype(H)}, B_) - @test H \ b ≈ A \ b - @test H \ B ≈ A \ B + @test H \ b ≈ A \ b ≈ H \ complex(b) + @test H \ B ≈ A \ B ≈ H \ complex(B) @test (H - I) \ B ≈ (A - I) \ B @test (H - (3+4im)I) \ B ≈ (A - (3+4im)I) \ B + @test b' / H ≈ b' / A ≈ complex.(b') / H + @test B' / H ≈ B' / A ≈ complex(B') / H + @test B' / (H - I) ≈ B' / (A - I) + @test B' / (H - (3+4im)I) ≈ B' / (A - (3+4im)I) end end From 61ed19f19e327e3cf5418d18e4f163bd05f911f4 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sat, 27 Apr 2019 17:13:31 -0400 Subject: [PATCH 09/40] NEWS --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 1b038c5b2b9bf..51be440bf7a66 100644 --- a/NEWS.md +++ b/NEWS.md @@ -36,6 +36,8 @@ Standard library changes * The BLAS submodule no longer exports `dot`, which conflicts with that in LinearAlgebra ([#31838]). * `diagm` and `spdiagm` now accept optional `m,n` initial arguments to specify a size ([#31654]). +* `Hessenberg` factorizations `H` now support efficient shifted solves `(H+µI) \ b` and related operations ([#31853]). + #### SparseArrays From 6efdafea9664a2165ad3e77c510a689f85c8271b Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sat, 27 Apr 2019 17:28:34 -0400 Subject: [PATCH 10/40] adjoint support --- stdlib/LinearAlgebra/src/hessenberg.jl | 16 ++++++++++++++-- stdlib/LinearAlgebra/test/hessenberg.jl | 2 ++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl index 5174009e70919..a207b577d2b7e 100644 --- a/stdlib/LinearAlgebra/src/hessenberg.jl +++ b/stdlib/LinearAlgebra/src/hessenberg.jl @@ -19,6 +19,8 @@ copy(F::Hessenberg) = Hessenberg(copy(F.factors), copy(F.τ), F.μ) size(F::Hessenberg, d) = size(F.factors, d) size(F::Hessenberg) = size(F.factors) +adjoint(F::Hessenberg) = Adjoint(F) + # iteration for destructuring into components Base.iterate(S::Hessenberg) = (S.Q, Val(:H)) Base.iterate(S::Hessenberg, ::Val{:H}) = (S.H, Val(:done)) @@ -43,11 +45,13 @@ matrix with `F.H`. When `Q` is extracted, the resulting type is the `HessenbergQ and may be converted to a regular matrix with [`convert(Array, _)`](@ref) (or `Array(_)` for short). -Note that the shifted factorization `A+μI = Q (H - μI) Q'` can be +Note that the shifted factorization `A+μI = Q (H+μI) Q'` can be constructed efficiently by `F + μ*I` using the [`UniformScaling`](@ref) object [`I`](@ref), which creates a new `Hessenberg` with the same `Q` and `H` (shared storage) and a modified shift. The shift of a given `F` is -obtained by `F.μ`. +obtained by `F.μ`. This is useful because multiple shifted solves `(F + μ*I) \\ b` +(for different `μ` and/or `b`) can be performed efficiently once `F` is created. + Iterating the decomposition produces the factors `F.Q` and `F.H` (not including the shift `F.μ`). @@ -140,6 +144,11 @@ lmul!(adjQ::Adjoint{<:Any,<:HessenbergQ{T}}, X::StridedVecOrMat{T}) where {T<:Bl rmul!(X::StridedMatrix{T}, adjQ::Adjoint{<:Any,<:HessenbergQ{T}}) where {T<:BlasFloat} = (Q = adjQ.parent; LAPACK.ormhr!('R', ifelse(T<:Real, 'T', 'C'), 1, size(Q.factors, 1), Q.factors, Q.τ, X)) +lmul!(Q::HessenbergQ{T}, X::Adjoint{T,<:StridedVecOrMat{T}}) where {T<:BlasFloat} = rmul!(X', Q')' +rmul!(X::Adjoint{T,<:StridedMatrix{T}}, Q::HessenbergQ{T}) where {T<:BlasFloat} = lmul!(Q', X')' +lmul!(adjQ::Adjoint{<:Any,<:HessenbergQ{T}}, X::Adjoint{T,<:StridedVecOrMat{T}}) where {T<:BlasFloat} = rmul!(X', adjQ')' +rmul!(X::Adjoint{T,<:StridedMatrix{T}}, adjQ::Adjoint{<:Any,<:HessenbergQ{T}}) where {T<:BlasFloat} = lmul!(adjQ', X')' + # multiply x by the entries of M in the upper-k triangle, which contains # the entries of the upper-Hessenberg matrix H for k=-1 function rmul_triu!(M::AbstractMatrix, x, k::Integer=0) @@ -326,3 +335,6 @@ function rdiv!(B::AbstractVecOrMat{<:Complex}, F::Hessenberg{<:Complex,<:Real}) Bi = rmul!(imag(B), Q') return B .= Complex.(Br,Bi) end + +ldiv!(F::Adjoint{<:Any,<:Hessenberg}, B::AbstractVecOrMat) = rdiv!(B', F')' +rdiv!(B::AbstractMatrix, F::Adjoint{<:Any,<:Hessenberg}) = ldiv!(F', B')' diff --git a/stdlib/LinearAlgebra/test/hessenberg.jl b/stdlib/LinearAlgebra/test/hessenberg.jl index f708d3bf8fed3..30797d8649e42 100644 --- a/stdlib/LinearAlgebra/test/hessenberg.jl +++ b/stdlib/LinearAlgebra/test/hessenberg.jl @@ -47,6 +47,8 @@ let n = 10 @test B' / H ≈ B' / A ≈ complex(B') / H @test B' / (H - I) ≈ B' / (A - I) @test B' / (H - (3+4im)I) ≈ B' / (A - (3+4im)I) + @test (H - (3+4im)I)' \ B ≈ (A - (3+4im)I)' \ B + @test B' / (H - (3+4im)I)' ≈ B' / (A - (3+4im)I)' end end From b9b1786f9edb77b9a7690ca7c537c9e4ad0b32a0 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sat, 27 Apr 2019 21:08:55 -0400 Subject: [PATCH 11/40] add fast determinant for Hessenberg --- stdlib/LinearAlgebra/src/hessenberg.jl | 36 +++++++++++++++++++++++++ stdlib/LinearAlgebra/test/hessenberg.jl | 4 +++ 2 files changed, 40 insertions(+) diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl index a207b577d2b7e..ccd73c8065eb7 100644 --- a/stdlib/LinearAlgebra/src/hessenberg.jl +++ b/stdlib/LinearAlgebra/src/hessenberg.jl @@ -338,3 +338,39 @@ end ldiv!(F::Adjoint{<:Any,<:Hessenberg}, B::AbstractVecOrMat) = rdiv!(B', F')' rdiv!(B::AbstractMatrix, F::Adjoint{<:Any,<:Hessenberg}) = ldiv!(F', B')' + +# Hessenberg-matrix determinant formula based on Theorem 2.1 of +# +# K. Kaygisiz and A. Sahin, "Determinant and permanent of Hessenberg matrix and generalized Lucas polynomials," +# arXiv:1111.4067 (2011). +# +# which in turn cites: +# +# N. D. Cahill, J. R. D’Errico, D. A. Narayan, and J. Y. Narayan, "Fibonacci determinants," +# College Math. J. 33, pp. 221-225 (2003). +# +# Cost is O(n²) with O(n) storage. +function det(F::Hessenberg) + H = F.factors + m = size(H,1) + μ = F.μ + m == 0 && return one(zero(eltype(H)) + μ) + determinant = H[1,1] + μ + prevdeterminant = one(determinant) + m == 1 && return determinant + prods = Vector{typeof(determinant)}(undef, m-1) # temporary storage for partial products + @inbounds for n = 2:m + prods[n-1] = prevdeterminant + prevdeterminant = determinant + determinant *= H[n,n] + μ + h = H[n,n-1] + @simd for r = n-1:-2:2 + determinant -= H[r,n] * (prods[r] *= h) + determinant += H[r-1,n] * (prods[r-1] *= h) + end + if iseven(n) + determinant -= H[1,n] * (prods[1] *= h) + end + end + return determinant +end \ No newline at end of file diff --git a/stdlib/LinearAlgebra/test/hessenberg.jl b/stdlib/LinearAlgebra/test/hessenberg.jl index 30797d8649e42..65c880d505a67 100644 --- a/stdlib/LinearAlgebra/test/hessenberg.jl +++ b/stdlib/LinearAlgebra/test/hessenberg.jl @@ -49,6 +49,10 @@ let n = 10 @test B' / (H - (3+4im)I) ≈ B' / (A - (3+4im)I) @test (H - (3+4im)I)' \ B ≈ (A - (3+4im)I)' \ B @test B' / (H - (3+4im)I)' ≈ B' / (A - (3+4im)I)' + + @test det(H) ≈ det(A) + @test det(H + I) ≈ det(A + I) + @test det(H - (3+4im)I) ≈ det(A - (3+4im)I) end end From 984263c6e34c7a7a7a9ece6b24e54a0208985327 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sat, 27 Apr 2019 21:16:26 -0400 Subject: [PATCH 12/40] slight simplification --- stdlib/LinearAlgebra/src/hessenberg.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl index ccd73c8065eb7..a7922ebc9a612 100644 --- a/stdlib/LinearAlgebra/src/hessenberg.jl +++ b/stdlib/LinearAlgebra/src/hessenberg.jl @@ -365,8 +365,7 @@ function det(F::Hessenberg) determinant *= H[n,n] + μ h = H[n,n-1] @simd for r = n-1:-2:2 - determinant -= H[r,n] * (prods[r] *= h) - determinant += H[r-1,n] * (prods[r-1] *= h) + determinant -= H[r,n] * (prods[r] *= h) - H[r-1,n] * (prods[r-1] *= h) end if iseven(n) determinant -= H[1,n] * (prods[1] *= h) From 69bcc9b5de5228f6f60a2b8dcc1277c610244652 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sat, 27 Apr 2019 21:19:15 -0400 Subject: [PATCH 13/40] clarify citation --- stdlib/LinearAlgebra/src/hessenberg.jl | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl index a7922ebc9a612..8641fccc9908b 100644 --- a/stdlib/LinearAlgebra/src/hessenberg.jl +++ b/stdlib/LinearAlgebra/src/hessenberg.jl @@ -339,15 +339,16 @@ end ldiv!(F::Adjoint{<:Any,<:Hessenberg}, B::AbstractVecOrMat) = rdiv!(B', F')' rdiv!(B::AbstractMatrix, F::Adjoint{<:Any,<:Hessenberg}) = ldiv!(F', B')' -# Hessenberg-matrix determinant formula based on Theorem 2.1 of +# Hessenberg-matrix determinant formula based on: +# +# N. D. Cahill, J. R. D’Errico, D. A. Narayan, and J. Y. Narayan, "Fibonacci determinants," +# College Math. J. 33, pp. 221-225 (2003). +# +# as reviewed in Theorem 2.1 of: # # K. Kaygisiz and A. Sahin, "Determinant and permanent of Hessenberg matrix and generalized Lucas polynomials," # arXiv:1111.4067 (2011). # -# which in turn cites: -# -# N. D. Cahill, J. R. D’Errico, D. A. Narayan, and J. Y. Narayan, "Fibonacci determinants," -# College Math. J. 33, pp. 221-225 (2003). # # Cost is O(n²) with O(n) storage. function det(F::Hessenberg) From af0d995715f1b6e65135c9a982084f3e5cf19b53 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sat, 27 Apr 2019 21:19:48 -0400 Subject: [PATCH 14/40] tweaks --- stdlib/LinearAlgebra/src/hessenberg.jl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl index 8641fccc9908b..9a9517a50e9f6 100644 --- a/stdlib/LinearAlgebra/src/hessenberg.jl +++ b/stdlib/LinearAlgebra/src/hessenberg.jl @@ -197,7 +197,7 @@ end -(F::Hessenberg, J::UniformScaling) = Hessenberg(F, F.μ - J.λ) -(J::UniformScaling, F::Hessenberg) = Hessenberg(-F, J.λ - F.μ) -# Solving (H+µI)x = b: we can do this in O(n²) time and O(n) memory +# Solving (H+µI)x = b: we can do this in O(m²) time and O(m) memory # (in-place in x) by the RQ algorithm from: # # G. Henry, "The shifted Hessenberg system solve computation," Tech. Rep. 94–163, @@ -349,8 +349,7 @@ rdiv!(B::AbstractMatrix, F::Adjoint{<:Any,<:Hessenberg}) = ldiv!(F', B')' # K. Kaygisiz and A. Sahin, "Determinant and permanent of Hessenberg matrix and generalized Lucas polynomials," # arXiv:1111.4067 (2011). # -# -# Cost is O(n²) with O(n) storage. +# Cost is O(m²) with O(m) storage. function det(F::Hessenberg) H = F.factors m = size(H,1) From 915c8cf9710832b3f055555b7e16b12a4a1de5b6 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sat, 27 Apr 2019 22:13:24 -0400 Subject: [PATCH 15/40] fast logdet --- stdlib/LinearAlgebra/src/hessenberg.jl | 32 +++++++++++++++++++++++++ stdlib/LinearAlgebra/test/hessenberg.jl | 15 +++++++++--- 2 files changed, 44 insertions(+), 3 deletions(-) diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl index 9a9517a50e9f6..d1721e2a94270 100644 --- a/stdlib/LinearAlgebra/src/hessenberg.jl +++ b/stdlib/LinearAlgebra/src/hessenberg.jl @@ -372,4 +372,36 @@ function det(F::Hessenberg) end end return determinant +end + +# O(m²) determinant based on first doing Givens rotations to put H into upper-triangular form and then +# taking the product of the diagonal entries. The trick is that we only need O(m) temporary storage, +# because we don't need to store the whole Givens-rotated matrix, only the most recent row. +# (Slightly slower than the Cahill algorithm above, but useful to compute logdet.) +function logabsdet(F::Hessenberg) + H = F.factors + m = size(H,1) + μ = F.μ + P = one(zero(eltype(H)) + μ) + logdeterminant = zero(real(P)) + m == 0 && return (logdeterminant, P) + g = Vector{typeof(P)}(undef, m) + g .= @view H[1,:] # below, g is the i-th row of Givens-rotated H+μI matrix + g[1] += μ + @inbounds for i = 1:m-1 + c, s, ρ = givensAlgorithm(g[i], H[i+1,i]) + logdeterminant += log(abs(ρ)) + P *= sign(ρ) + g[i+1] = c*(H[i+1,i+1] + μ) - s'*g[i+1] + @simd for j = i+2:m + g[j] = c*H[i+1,j] - s'*g[j] + end + end + logdeterminant += log(abs(g[m])) + P *= sign(g[m]) + return (logdeterminant, P) +end +function logdet(F::Hessenberg) + d,s = logabsdet(F) + return d + log(s) end \ No newline at end of file diff --git a/stdlib/LinearAlgebra/test/hessenberg.jl b/stdlib/LinearAlgebra/test/hessenberg.jl index 65c880d505a67..8e8472ed3451d 100644 --- a/stdlib/LinearAlgebra/test/hessenberg.jl +++ b/stdlib/LinearAlgebra/test/hessenberg.jl @@ -4,6 +4,9 @@ module TestHessenberg using Test, LinearAlgebra, Random +# for tuple tests below +≅(x,y) = all(p -> p[1] ≈ p[2], zip(x,y)) + let n = 10 Random.seed!(1234321) @@ -50,10 +53,16 @@ let n = 10 @test (H - (3+4im)I)' \ B ≈ (A - (3+4im)I)' \ B @test B' / (H - (3+4im)I)' ≈ B' / (A - (3+4im)I)' - @test det(H) ≈ det(A) - @test det(H + I) ≈ det(A + I) - @test det(H - (3+4im)I) ≈ det(A - (3+4im)I) + for shift in (0,1,3+4im) + @test det(H + shift*I) ≈ det(A + shift*I) + @test logabsdet(H + shift*I) ≅ logabsdet(A + shift*I) + end end end +# check logdet on a matrix that has a positive determinant +let A = [0.5 0.1 0.9 0.4; 0.9 0.7 0.5 0.4; 0.3 0.4 0.9 0.0; 0.4 0.0 0.0 0.5] + @test logdet(hessenberg(A)) ≈ logdet(A) ≈ -3.5065578973199822 +end + end # module TestHessenberg From d462234c034a01850fd4dae29d8acca01773f22c Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sun, 28 Apr 2019 07:11:34 -0400 Subject: [PATCH 16/40] fix propertynames --- stdlib/LinearAlgebra/src/hessenberg.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl index d1721e2a94270..be6b98bb8cbc1 100644 --- a/stdlib/LinearAlgebra/src/hessenberg.jl +++ b/stdlib/LinearAlgebra/src/hessenberg.jl @@ -113,7 +113,7 @@ function getproperty(F::Hessenberg, d::Symbol) end Base.propertynames(F::Hessenberg, private::Bool=false) = - (:Q, :H, (private ? fieldnames(typeof(F)) : ())...) + (:Q, :H, :μ, (private ? (:factors, :τ) : ())...) ## reconstruct the original matrix Matrix{T}(Q::HessenbergQ) where {T} = convert(Matrix{T}, LAPACK.orghr!(1, size(Q.factors, 1), copy(Q.factors), Q.τ)) From 63e7b5d0b99d9017de7ffa8751a9248a3a5a8dcf Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sun, 28 Apr 2019 15:47:52 -0400 Subject: [PATCH 17/40] use RQ rather than QR for logabsdet to make memory access more consecutive --- stdlib/LinearAlgebra/src/hessenberg.jl | 29 ++++++++++++++------------ 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl index be6b98bb8cbc1..8e58e4d62e83e 100644 --- a/stdlib/LinearAlgebra/src/hessenberg.jl +++ b/stdlib/LinearAlgebra/src/hessenberg.jl @@ -374,10 +374,13 @@ function det(F::Hessenberg) return determinant end -# O(m²) determinant based on first doing Givens rotations to put H into upper-triangular form and then +# O(m²) log-determinant based on first doing Givens RQ to put H into upper-triangular form and then # taking the product of the diagonal entries. The trick is that we only need O(m) temporary storage, -# because we don't need to store the whole Givens-rotated matrix, only the most recent row. -# (Slightly slower than the Cahill algorithm above, but useful to compute logdet.) +# because we don't need to store the whole Givens-rotated matrix, only the most recent column. +# We do RQ (column rotations rather than QR (row rotations) for more consecutive memory access. +# (We could also use it for det instead of the Cahill algorithm above. Cahill is slightly faster +# for very small matrices where you are likely to use det, and also uses only ± and * so it can +# be applied to Hessenberg matrices over other number fields.) function logabsdet(F::Hessenberg) H = F.factors m = size(H,1) @@ -385,20 +388,20 @@ function logabsdet(F::Hessenberg) P = one(zero(eltype(H)) + μ) logdeterminant = zero(real(P)) m == 0 && return (logdeterminant, P) - g = Vector{typeof(P)}(undef, m) - g .= @view H[1,:] # below, g is the i-th row of Givens-rotated H+μI matrix - g[1] += μ - @inbounds for i = 1:m-1 - c, s, ρ = givensAlgorithm(g[i], H[i+1,i]) + g = Vector{typeof(P)}(undef, m) # below, g is the k-th col of Givens-rotated H+μI matrix + copyto!(g, 1, H, m*(m-1)+1, m) # g .= H[:,m] + g[m] += μ + @inbounds for k = m:-1:2 + c, s, ρ = givensAlgorithm(g[k], H[k,k-1]) logdeterminant += log(abs(ρ)) P *= sign(ρ) - g[i+1] = c*(H[i+1,i+1] + μ) - s'*g[i+1] - @simd for j = i+2:m - g[j] = c*H[i+1,j] - s'*g[j] + g[k-1] = c*(H[k-1,k-1] + μ) - s'*g[k-1] + @simd for j = 1:k-2 + g[j] = c*H[j,k-1] - s'*g[j] end end - logdeterminant += log(abs(g[m])) - P *= sign(g[m]) + logdeterminant += log(abs(g[1])) + P *= sign(g[1]) return (logdeterminant, P) end function logdet(F::Hessenberg) From f02bc4a496968414e2b9fcec66f370b4c559a730 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sun, 28 Apr 2019 15:53:56 -0400 Subject: [PATCH 18/40] comment fix --- stdlib/LinearAlgebra/src/adjtrans.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stdlib/LinearAlgebra/src/adjtrans.jl b/stdlib/LinearAlgebra/src/adjtrans.jl index 06c042ba9c773..84fa4a30140e4 100644 --- a/stdlib/LinearAlgebra/src/adjtrans.jl +++ b/stdlib/LinearAlgebra/src/adjtrans.jl @@ -273,7 +273,7 @@ pinv(v::TransposeAbsVec, tol::Real = 0) = pinv(conj(v.parent)).parent \(u::AdjOrTransAbsVec, v::AdjOrTransAbsVec) = pinv(u) * v -## right-division \ +## right-division / /(u::AdjointAbsVec, A::AbstractMatrix) = adjoint(adjoint(A) \ u.parent) /(u::TransposeAbsVec, A::AbstractMatrix) = transpose(transpose(A) \ u.parent) /(u::AdjointAbsVec, A::Transpose{<:Any,<:AbstractMatrix}) = adjoint(conj(A.parent) \ u.parent) # technically should be adjoint(copy(adjoint(copy(A))) \ u.parent) From 014b9da33aee7383eeff92f195e2f4299b271338 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sun, 28 Apr 2019 17:52:43 -0400 Subject: [PATCH 19/40] fix ambiguities --- stdlib/LinearAlgebra/src/factorization.jl | 6 ++++++ stdlib/LinearAlgebra/src/lu.jl | 4 +++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/stdlib/LinearAlgebra/src/factorization.jl b/stdlib/LinearAlgebra/src/factorization.jl index 0d6f647840c70..c90f1b8c087d5 100644 --- a/stdlib/LinearAlgebra/src/factorization.jl +++ b/stdlib/LinearAlgebra/src/factorization.jl @@ -115,6 +115,8 @@ function /(B::AbstractMatrix, adjF::Adjoint{<:Any,<:Factorization}) copyto!(BB, B) rdiv!(BB, adjoint(F)) end +/(adjB::AdjointAbsVec, adjF::Adjoint{<:Any,<:Factorization}) = adjoint(adjF.parent \ adjB.parent) +/(B::TransposeAbsVec, adjF::Adjoint{<:Any,<:Factorization}) = adjoint(adjF.parent \ adjoint(B)) # support the same 3-arg idiom as in our other in-place A_*_B functions: function ldiv!(Y::AbstractVecOrMat, A::Factorization, B::AbstractVecOrMat) @@ -144,3 +146,7 @@ end /(B::AbstractMatrix, F::Transpose{<:Any,<:Factorization{<:Real}}) = B / adjoint(F.parent) /(B::AbstractMatrix, F::Transpose{<:Any,<:Factorization}) = conj.(conj.(B) / adjoint(F.parent)) +/(B::AdjointAbsVec, F::Transpose{<:Any,<:Factorization{<:Real}}) = B / adjoint(F.parent) +/(B::TransposeAbsVec, F::Transpose{<:Any,<:Factorization{<:Real}}) = B / adjoint(F.parent) +/(B::AdjointAbsVec, F::Transpose{<:Any,<:Factorization}) = conj.(conj.(B) / adjoint(F.parent)) +/(B::TransposeAbsVec, F::Transpose{<:Any,<:Factorization}) = conj.(conj.(B) / adjoint(F.parent)) diff --git a/stdlib/LinearAlgebra/src/lu.jl b/stdlib/LinearAlgebra/src/lu.jl index 01f45f57e663f..5748acd7eb0c0 100644 --- a/stdlib/LinearAlgebra/src/lu.jl +++ b/stdlib/LinearAlgebra/src/lu.jl @@ -652,7 +652,9 @@ function ldiv!(adjA::Adjoint{<:Any,LU{T,Tridiagonal{T,V}}}, B::AbstractVecOrMat) return B end -/(B::AbstractMatrix, A::LU) = copy(transpose(transpose(A) \ transpose(B))) +rdiv!(B::AbstractMatrix, A::LU) = transpose(ldiv!(transpose(A), transpose(B))) +rdiv!(B::AbstractMatrix, A::Transpose{<:Any,<:LU}) = transpose(ldiv!(A.parent, transpose(B))) +rdiv!(B::AbstractMatrix, A::Adjoint{<:Any,<:LU}) = adjoint(ldiv!(A.parent, adjoint(B))) # Conversions AbstractMatrix(F::LU) = (F.L * F.U)[invperm(F.p),:] From 47786bcaf7080012ea672a4c5957967a4c137480 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sun, 28 Apr 2019 17:55:19 -0400 Subject: [PATCH 20/40] comment typo --- stdlib/LinearAlgebra/src/hessenberg.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl index 8e58e4d62e83e..b9b3fef0443d2 100644 --- a/stdlib/LinearAlgebra/src/hessenberg.jl +++ b/stdlib/LinearAlgebra/src/hessenberg.jl @@ -377,7 +377,7 @@ end # O(m²) log-determinant based on first doing Givens RQ to put H into upper-triangular form and then # taking the product of the diagonal entries. The trick is that we only need O(m) temporary storage, # because we don't need to store the whole Givens-rotated matrix, only the most recent column. -# We do RQ (column rotations rather than QR (row rotations) for more consecutive memory access. +# We do RQ (column rotations) rather than QR (row rotations) for more consecutive memory access. # (We could also use it for det instead of the Cahill algorithm above. Cahill is slightly faster # for very small matrices where you are likely to use det, and also uses only ± and * so it can # be applied to Hessenberg matrices over other number fields.) From c4aef93ad9292b161f68c587f48408c5369d1e1c Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Mon, 29 Apr 2019 08:54:04 -0400 Subject: [PATCH 21/40] add another test --- stdlib/LinearAlgebra/test/hessenberg.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/stdlib/LinearAlgebra/test/hessenberg.jl b/stdlib/LinearAlgebra/test/hessenberg.jl index 8e8472ed3451d..a937f89e007f1 100644 --- a/stdlib/LinearAlgebra/test/hessenberg.jl +++ b/stdlib/LinearAlgebra/test/hessenberg.jl @@ -39,6 +39,7 @@ let n = 10 @test convert(Array, H + (2+4im)I) ≈ A + (2+4im)I ≈ convert(Array, (2+4im)I + H) @test convert(Array, H - 2I) ≈ A - 2I ≈ -convert(Array, 2I - H) @test convert(Array, -H) == -convert(Array, H) + @test convert(Array, 2*(H + (2+4im)I)) ≈ 2A + (4+8im)I b = convert(Vector{eltype(H)}, b_) B = convert(Matrix{eltype(H)}, B_) From eecbc5713ab1f99678be740fdaf578e77757ba54 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Mon, 29 Apr 2019 18:36:06 -0400 Subject: [PATCH 22/40] tweak --- stdlib/LinearAlgebra/src/hessenberg.jl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl index b9b3fef0443d2..8d4a9a554e187 100644 --- a/stdlib/LinearAlgebra/src/hessenberg.jl +++ b/stdlib/LinearAlgebra/src/hessenberg.jl @@ -52,7 +52,6 @@ object [`I`](@ref), which creates a new `Hessenberg` with the same `Q` and `H` obtained by `F.μ`. This is useful because multiple shifted solves `(F + μ*I) \\ b` (for different `μ` and/or `b`) can be performed efficiently once `F` is created. - Iterating the decomposition produces the factors `F.Q` and `F.H` (not including the shift `F.μ`). @@ -339,7 +338,7 @@ end ldiv!(F::Adjoint{<:Any,<:Hessenberg}, B::AbstractVecOrMat) = rdiv!(B', F')' rdiv!(B::AbstractMatrix, F::Adjoint{<:Any,<:Hessenberg}) = ldiv!(F', B')' -# Hessenberg-matrix determinant formula based on: +# Hessenberg-matrix determinant formula for H+μI based on: # # N. D. Cahill, J. R. D’Errico, D. A. Narayan, and J. Y. Narayan, "Fibonacci determinants," # College Math. J. 33, pp. 221-225 (2003). @@ -374,7 +373,7 @@ function det(F::Hessenberg) return determinant end -# O(m²) log-determinant based on first doing Givens RQ to put H into upper-triangular form and then +# O(m²) log-determinant based on first doing Givens RQ to put H+μI into upper-triangular form and then # taking the product of the diagonal entries. The trick is that we only need O(m) temporary storage, # because we don't need to store the whole Givens-rotated matrix, only the most recent column. # We do RQ (column rotations) rather than QR (row rotations) for more consecutive memory access. From 1b978e65de03c2ff799f9ceaa28c04511d0da73a Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Wed, 1 May 2019 13:57:16 -0400 Subject: [PATCH 23/40] add UpperHessenberg type --- stdlib/LinearAlgebra/src/LinearAlgebra.jl | 1 + stdlib/LinearAlgebra/src/hessenberg.jl | 562 ++++++++++++++-------- stdlib/LinearAlgebra/test/hessenberg.jl | 26 + 3 files changed, 377 insertions(+), 212 deletions(-) diff --git a/stdlib/LinearAlgebra/src/LinearAlgebra.jl b/stdlib/LinearAlgebra/src/LinearAlgebra.jl index aba5d603fb5a5..d73807e83a0ae 100644 --- a/stdlib/LinearAlgebra/src/LinearAlgebra.jl +++ b/stdlib/LinearAlgebra/src/LinearAlgebra.jl @@ -52,6 +52,7 @@ export UpperTriangular, UnitLowerTriangular, UnitUpperTriangular, + UpperHessenberg, Diagonal, UniformScaling, diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl index 8d4a9a554e187..5cbe4dfe05d55 100644 --- a/stdlib/LinearAlgebra/src/hessenberg.jl +++ b/stdlib/LinearAlgebra/src/hessenberg.jl @@ -1,200 +1,125 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license -struct Hessenberg{T,TH,S<:AbstractMatrix{TH},V<:Number} <: Factorization{T} - factors::S - τ::Vector{TH} - μ::V # represents a shifted Hessenberg matrix H+μI - - function Hessenberg{T,TH,S,V}(factors, τ, μ::V=zero(V)) where {T,TH,S<:AbstractMatrix{TH},V<:Number} - require_one_based_indexing(factors, τ) - new{T,TH,S,V}(factors, τ, μ) - end -end -Hessenberg(factors::AbstractMatrix{T}, τ::Vector{T}, μ::V=false) where {T,V<:Number} = Hessenberg{promote_type(T,V),T,typeof(factors),V}(factors, τ, μ) -Hessenberg(F::Hessenberg, μ::Number=F.μ) = Hessenberg(F.factors, F.τ, μ) - -Hessenberg(A::StridedMatrix, μ::Number=false) = Hessenberg(LAPACK.gehrd!(A)..., μ) - -copy(F::Hessenberg) = Hessenberg(copy(F.factors), copy(F.τ), F.μ) -size(F::Hessenberg, d) = size(F.factors, d) -size(F::Hessenberg) = size(F.factors) - -adjoint(F::Hessenberg) = Adjoint(F) - -# iteration for destructuring into components -Base.iterate(S::Hessenberg) = (S.Q, Val(:H)) -Base.iterate(S::Hessenberg, ::Val{:H}) = (S.H, Val(:done)) -Base.iterate(S::Hessenberg, ::Val{:done}) = nothing +###################################################################################### +# Upper-Hessenberg matrices H+μI, analogous to the UpperTriangular type +# but including a shift μ. """ - hessenberg!(A) -> Hessenberg + UpperHessenberg(A::AbstractMatrix, μ=0) -`hessenberg!` is the same as [`hessenberg`](@ref), but saves space by overwriting -the input `A`, instead of creating a copy. -""" -hessenberg!(A::StridedMatrix{<:BlasFloat}) = Hessenberg(A) +Construct an `UpperHessenberg` view of the matrix `A+μI`, where `μ` is an +optional diagonal shift. (Entries of `A` below the first subdiagonal are ignored.) -hessenberg(A::StridedMatrix{<:BlasFloat}) = hessenberg!(copy(A)) +Given an `UpperHessenberg` matrix `H`, subsequent shifts `H+λ*I` are also +performed efficiently by constructing a new `UpperHessenberg` that shares +same underlying data array. Efficient algorithms are also implemented for +`H \\ b`, `det(H)`, and similar, so shifted solves `(H+λ*I) \\ b` can be performed +for multiple shifts `λ` without making a copy of the matrix. -""" - hessenberg(A) -> Hessenberg - -Compute the Hessenberg decomposition of `A` and return a `Hessenberg` object. If `F` is the -factorization object, the unitary matrix can be accessed with `F.Q` and the Hessenberg -matrix with `F.H`. When `Q` is extracted, the resulting type is the `HessenbergQ` object, -and may be converted to a regular matrix with [`convert(Array, _)`](@ref) - (or `Array(_)` for short). - -Note that the shifted factorization `A+μI = Q (H+μI) Q'` can be -constructed efficiently by `F + μ*I` using the [`UniformScaling`](@ref) -object [`I`](@ref), which creates a new `Hessenberg` with the same `Q` and `H` -(shared storage) and a modified shift. The shift of a given `F` is -obtained by `F.μ`. This is useful because multiple shifted solves `(F + μ*I) \\ b` -(for different `μ` and/or `b`) can be performed efficiently once `F` is created. - -Iterating the decomposition produces the factors `F.Q` and `F.H` (not including -the shift `F.μ`). +See also the [`hessenberg`](@ref) function to factor any matrix into a similar +upper-Hessenberg matrix. # Examples ```jldoctest -julia> A = [4. 9. 7.; 4. 4. 1.; 4. 3. 2.] -3×3 Array{Float64,2}: - 4.0 9.0 7.0 - 4.0 4.0 1.0 - 4.0 3.0 2.0 - -julia> F = hessenberg(A); - -julia> F.Q * F.H * F.Q' -3×3 Array{Float64,2}: - 4.0 9.0 7.0 - 4.0 4.0 1.0 - 4.0 3.0 2.0 - -julia> q, h = F; # destructuring via iteration - -julia> q == F.Q && h == F.H -true +julia> A = [1 2 3 4; 5 6 7 8; 9 10 11 12; 13 14 15 16] +4×4 Array{Int64,2}: + 1 2 3 4 + 5 6 7 8 + 9 10 11 12 + 13 14 15 16 + +julia> UpperHessenberg(A, 100) +4×4 UpperHessenberg{Int64,Array{Int64,2},Int64}: + 101 2 3 4 + 5 106 7 8 + ⋅ 10 111 12 + ⋅ ⋅ 15 116 ``` """ -hessenberg(A::StridedMatrix{T}) where T = - hessenberg!(copy_oftype(A, eigtype(T))) - +struct UpperHessenberg{T,S<:AbstractMatrix,V<:Number} <: AbstractMatrix{T} + data::S + μ::V # represents a shifted Hessenberg matrix H+μI -function show(io::IO, mime::MIME"text/plain", F::Hessenberg) - summary(io, F); println(io) - if iszero(F.μ) - println(io, "Factorization QHQ': ") - else - println(io, "Factorization Q(H+μI)Q' for shift μ = ", F.μ, ':') + function UpperHessenberg{T,S,V}(data, μ) where {T,S<:AbstractMatrix,V<:Number} + require_one_based_indexing(data) + new{T,S,V}(data, μ) end - println(io, "Q factor:") - show(io, mime, F.Q) - println(io, "\nH factor:") - show(io, mime, F.H) end - -struct HessenbergQ{T,S<:AbstractMatrix} <: AbstractQ{T} - factors::S - τ::Vector{T} - function HessenbergQ{T,S}(factors, τ) where {T,S<:AbstractMatrix} - require_one_based_indexing(factors) - new(factors, τ) +UpperHessenberg(H::UpperHessenberg) = H +UpperHessenberg(H::UpperHessenberg, μ::Number) = UpperHessenberg(H.data, H.μ + μ) +UpperHessenberg{T}(A::S, μ::V=false) where {T,S<:AbstractMatrix,V<:Number} = + UpperHessenberg{T,S,V}(A, μ) +UpperHessenberg{T}(H::UpperHessenberg, μ::Number=false) where {T} = + UpperHessenberg{T}(H.data, H.μ+μ) +UpperHessenberg(A::AbstractMatrix{T}, μ::Number=false) where {T} = + UpperHessenberg{typeof(zero(T) + μ)}(A, μ) +Matrix(H::UpperHessenberg{T}) where {T} = Matrix{T}(H) +Array(H::UpperHessenberg) = Matrix(H) +size(H::UpperHessenberg, d) = size(H.data, d) +size(H::UpperHessenberg) = size(H.data) +parent(H::UpperHessenberg) = H.data + +# similar behaves like UpperTriangular +similar(H::UpperHessenberg, ::Type{T}) where {T} = UpperHessenberg(similar(H.data, T)) +similar(H::UpperHessenberg, ::Type{T}, dims::Dims{N}) where {T,N} = similar(H.data, T, dims) + +copy(H::UpperHessenberg{T}) where {T} = UpperHessenberg{T}(copy(H.data), H.μ) +real(H::UpperHessenberg{<:Real}) = H +real(H::UpperHessenberg{<:Complex}) = UpperHessenberg(triu!(real(H.data),-1), real(H.μ)) +imag(H::UpperHessenberg) = UpperHessenberg(triu!(imag(H.data),-1), imag(H.μ)) + +function Matrix{T}(H::UpperHessenberg) where T + m,n = size(H) + B = triu!(copyto!(Matrix{T}(undef, m, n), H.data), -1) + if !iszero(H.μ) + for i = 1:min(m,n) + @inbounds B[i,i] += H.μ + end end + return B end -HessenbergQ(factors::AbstractMatrix{T}, τ::Vector{T}) where {T} = HessenbergQ{T,typeof(factors)}(factors, τ) -HessenbergQ(A::Hessenberg) = HessenbergQ(A.factors, A.τ) -function getproperty(F::Hessenberg, d::Symbol) - d == :Q && return HessenbergQ(F) - d == :H && return triu(getfield(F, :factors), -1) - return getfield(F, d) -end - -Base.propertynames(F::Hessenberg, private::Bool=false) = - (:Q, :H, :μ, (private ? (:factors, :τ) : ())...) - -## reconstruct the original matrix -Matrix{T}(Q::HessenbergQ) where {T} = convert(Matrix{T}, LAPACK.orghr!(1, size(Q.factors, 1), copy(Q.factors), Q.τ)) -AbstractArray(F::Hessenberg) = AbstractMatrix(F) -Matrix(F::Hessenberg) = Array(AbstractArray(F)) -Array(F::Hessenberg) = Matrix(F) -function AbstractMatrix(F::Hessenberg) - Q = F.Q - A = rmul!(lmul!(Q, F.H), Q') - if iszero(F.μ) - return A - elseif typeof(zero(eltype(A))+F.μ) <: eltype(A) # can shift A in-place - for i = 1:size(A,1) - @inbounds A[i,i] += F.μ - end - return A +getindex(H::UpperHessenberg{T}, i::Integer, j::Integer) where {T} = + i < j ? convert(T, H.data[i,j]) : + i == j ? convert(T, H.data[i,i] + H.μ) : + i == j+1 ? convert(T, H.data[i,j]) : zero(T) + +function setindex!(A::UpperHessenberg, x, i::Integer, j::Integer) + if i > j + x == 0 || throw(ArgumentError("cannot set index in the lower triangular part " * + "($i, $j) of an UpperHessenberg matrix to a nonzero value ($x)")) + elseif i == j + A.data[i,j] = x - A.μ else - return A + F.μ*I # allocate another matrix, e.g. if A is real and μ is complex + A.data[i,j] = x end + return x end -lmul!(Q::HessenbergQ{T}, X::StridedVecOrMat{T}) where {T<:BlasFloat} = - LAPACK.ormhr!('L', 'N', 1, size(Q.factors, 1), Q.factors, Q.τ, X) -rmul!(X::StridedMatrix{T}, Q::HessenbergQ{T}) where {T<:BlasFloat} = - LAPACK.ormhr!('R', 'N', 1, size(Q.factors, 1), Q.factors, Q.τ, X) -lmul!(adjQ::Adjoint{<:Any,<:HessenbergQ{T}}, X::StridedVecOrMat{T}) where {T<:BlasFloat} = - (Q = adjQ.parent; LAPACK.ormhr!('L', ifelse(T<:Real, 'T', 'C'), 1, size(Q.factors, 1), Q.factors, Q.τ, X)) -rmul!(X::StridedMatrix{T}, adjQ::Adjoint{<:Any,<:HessenbergQ{T}}) where {T<:BlasFloat} = - (Q = adjQ.parent; LAPACK.ormhr!('R', ifelse(T<:Real, 'T', 'C'), 1, size(Q.factors, 1), Q.factors, Q.τ, X)) +function Base.replace_in_print_matrix(A::UpperHessenberg, i::Integer, j::Integer, s::AbstractString) + return i <= j+1 ? s : Base.replace_with_centered_mark(s) +end -lmul!(Q::HessenbergQ{T}, X::Adjoint{T,<:StridedVecOrMat{T}}) where {T<:BlasFloat} = rmul!(X', Q')' -rmul!(X::Adjoint{T,<:StridedMatrix{T}}, Q::HessenbergQ{T}) where {T<:BlasFloat} = lmul!(Q', X')' -lmul!(adjQ::Adjoint{<:Any,<:HessenbergQ{T}}, X::Adjoint{T,<:StridedVecOrMat{T}}) where {T<:BlasFloat} = rmul!(X', adjQ')' -rmul!(X::Adjoint{T,<:StridedMatrix{T}}, adjQ::Adjoint{<:Any,<:HessenbergQ{T}}) where {T<:BlasFloat} = lmul!(adjQ', X')' +Base.copy(A::Adjoint{<:Any,<:UpperHessenberg}) = adjoint!(copy(A.parent)) +Base.copy(A::Transpose{<:Any,<:UpperHessenberg}) = transpose!(copy(A.parent)) -# multiply x by the entries of M in the upper-k triangle, which contains -# the entries of the upper-Hessenberg matrix H for k=-1 -function rmul_triu!(M::AbstractMatrix, x, k::Integer=0) - require_one_based_indexing(M) - m, n = size(M) - for j = 1:n, i = 1:min(j-k,m) - @inbounds M[i,j] *= x - end - return M -end -function lmul_triu!(x, M::AbstractMatrix, k::Integer=0) - require_one_based_indexing(M) - m, n = size(M) - for j = 1:n, i = 1:min(j-k,m) - @inbounds M[i,j] = x * M[i,j] - end - return M -end +-(A::UpperHessenberg) = UpperHessenberg(-A.data, -A.μ) +rmul!(H::UpperHessenberg, x::Number) = UpperHessenberg(rmul!(H.data, x), H.μ*x) +lmul!(x::Number, H::UpperHessenberg) = UpperHessenberg(lmul!(x, H.data), x*H.μ) -# multiply Hessenberg by scalar -rmul!(F::Hessenberg{<:Any,T}, x::T) where {T<:Number} = Hessenberg(rmul_triu!(F.factors, x, -1), F.τ, F.μ*x) -lmul!(x::T, F::Hessenberg{<:Any,T}) where {T<:Number} = Hessenberg(lmul_triu!(x, F.factors, -1), F.τ, x*F.μ) -*(F::Hessenberg{<:Any,T}, x::T) where {T<:Number} = rmul!(copy(F), x) -*(x::T, F::Hessenberg{<:Any,T}) where {T<:Number} = lmul!(x, copy(F)) -function (*)(F::Hessenberg{<:Any,T}, x::S) where {T,S<:Number} - TS = typeof(zero(T) * x) - if TS === T - return rmul!(copy(F), convert(T, x)) - else - return rmul!(Hessenberg(Matrix{TS}(F.factors), Vector{TS}(F.τ), F.μ), convert(TS, x)) - end -end -function (*)(x::S, F::Hessenberg{<:Any,T}) where {T,S<:Number} - TS = typeof(zero(T) * x) - if TS === T - return lmul!(convert(T, x), copy(F)) - else - return lmul!(convert(TS, x), Hessenberg(Matrix{TS}(F.factors), Vector{TS}(F.τ), F.μ)) - end -end --(F::Hessenberg{<:Any,T}) where {T} = F * -one(T) +# (future: we could also have specialized routines for UpperHessenberg * UpperTriangular) + +fillstored!(H::UpperHessenberg, x) = (H′ = UpperHessenberg(fillband!(H.data, x, -1, size(H,2)-1)); H′) + ++(A::UpperHessenberg, B::UpperHessenberg) = UpperHessenberg(A.data+B.data, A.μ + B.μ) +-(A::UpperHessenberg, B::UpperHessenberg) = UpperHessenberg(A.data-B.data, A.μ - B.μ) +# (future: we could also have specialized routines for UpperHessenberg ± UpperTriangular) # shift Hessenberg by λI -+(F::Hessenberg, J::UniformScaling) = Hessenberg(F, F.μ + J.λ) -+(J::UniformScaling, F::Hessenberg) = Hessenberg(F, J.λ + F.μ) --(F::Hessenberg, J::UniformScaling) = Hessenberg(F, F.μ - J.λ) --(J::UniformScaling, F::Hessenberg) = Hessenberg(-F, J.λ - F.μ) ++(H::UpperHessenberg, J::UniformScaling) = UpperHessenberg(H, H.μ + J.λ) ++(J::UniformScaling, H::UpperHessenberg) = UpperHessenberg(H, J.λ + H.μ) +-(H::UpperHessenberg, J::UniformScaling) = UpperHessenberg(H, H.μ - J.λ) +-(J::UniformScaling, H::UpperHessenberg) = UpperHessenberg(-H.data, J.λ - H.μ) # Solving (H+µI)x = b: we can do this in O(m²) time and O(m) memory # (in-place in x) by the RQ algorithm from: @@ -214,12 +139,13 @@ end # right to left, and doing backsubstitution *simultaneously*. # solve (H+μI)X = B, storing result in B -function ldiv_H!(F::Hessenberg, B::AbstractVecOrMat) +function ldiv!(F::UpperHessenberg, B::AbstractVecOrMat) + checksquare(F) m = size(F,1) m != size(B,1) && throw(DimensionMismatch("wrong right-hand-side # rows != $m")) require_one_based_indexing(B) n = size(B,2) - H = F.factors + H = F.data μ = F.μ u = Vector{typeof(zero(eltype(H))+μ)}(undef, m) # for last rotated col of H-μI copyto!(u, 1, H, m*(m-1)+1, m) # u .= H[:,m] @@ -263,12 +189,13 @@ end # of rows/cols. Essentially, we take the ldiv_H! algorithm, # swap indices of H and X to transpose, and reverse the # order of the H indices (or the order of the loops). -function rdiv_H!(B::AbstractMatrix, F::Hessenberg) +function rdiv!(B::AbstractMatrix, F::UpperHessenberg) + checksquare(F) m = size(F,1) m != size(B,2) && throw(DimensionMismatch("wrong right-hand-side # cols != $m")) require_one_based_indexing(B) n = size(B,1) - H = F.factors + H = F.data μ = F.μ u = Vector{typeof(zero(eltype(H))+μ)}(undef, m) # for last rotated row of H-μI u .= @view H[1,:] @@ -304,40 +231,6 @@ function rdiv_H!(B::AbstractMatrix, F::Hessenberg) return X end -function ldiv!(F::Hessenberg, B::AbstractVecOrMat) - Q = F.Q - return lmul!(Q, ldiv_H!(F, lmul!(Q', B))) -end - -function rdiv!(B::AbstractMatrix, F::Hessenberg) - Q = F.Q - return rmul!(rdiv_H!(rmul!(B, Q), F), Q') -end - -# handle case of real H and complex μ — we need to work around the -# fact that we can't multiple a real F.Q by a complex matrix directly in LAPACK -function ldiv!(F::Hessenberg{<:Complex,<:Real}, B::AbstractVecOrMat{<:Complex}) - Q = F.Q - Br = lmul!(Q', real(B)) - Bi = lmul!(Q', imag(B)) - ldiv_H!(F, B .= Complex.(Br,Bi)) - Br = lmul!(Q, real(B)) - Bi = lmul!(Q, imag(B)) - return B .= Complex.(Br,Bi) -end -function rdiv!(B::AbstractVecOrMat{<:Complex}, F::Hessenberg{<:Complex,<:Real}) - Q = F.Q - Br = rmul!(real(B), Q) - Bi = rmul!(imag(B), Q) - rdiv_H!(B .= Complex.(Br,Bi), F) - Br = rmul!(real(B), Q') - Bi = rmul!(imag(B), Q') - return B .= Complex.(Br,Bi) -end - -ldiv!(F::Adjoint{<:Any,<:Hessenberg}, B::AbstractVecOrMat) = rdiv!(B', F')' -rdiv!(B::AbstractMatrix, F::Adjoint{<:Any,<:Hessenberg}) = ldiv!(F', B')' - # Hessenberg-matrix determinant formula for H+μI based on: # # N. D. Cahill, J. R. D’Errico, D. A. Narayan, and J. Y. Narayan, "Fibonacci determinants," @@ -349,8 +242,9 @@ rdiv!(B::AbstractMatrix, F::Adjoint{<:Any,<:Hessenberg}) = ldiv!(F', B')' # arXiv:1111.4067 (2011). # # Cost is O(m²) with O(m) storage. -function det(F::Hessenberg) - H = F.factors +function det(F::UpperHessenberg) + checksquare(F) + H = F.data m = size(H,1) μ = F.μ m == 0 && return one(zero(eltype(H)) + μ) @@ -380,8 +274,9 @@ end # (We could also use it for det instead of the Cahill algorithm above. Cahill is slightly faster # for very small matrices where you are likely to use det, and also uses only ± and * so it can # be applied to Hessenberg matrices over other number fields.) -function logabsdet(F::Hessenberg) - H = F.factors +function logabsdet(F::UpperHessenberg) + checksquare(F) + H = F.data m = size(H,1) μ = F.μ P = one(zero(eltype(H)) + μ) @@ -403,6 +298,249 @@ function logabsdet(F::Hessenberg) P *= sign(g[1]) return (logdeterminant, P) end + +###################################################################################### +# Hessenberg factorizations Q(H+μI)Q' of A+μI: + +""" + Hessenberg <: Factorization + +A `Hessenberg` object represents the Hessenberg factorization `QHQ'` of a square +matrix, and is produced by the [`hessenberg`](@ref) function. +""" +struct Hessenberg{T,TH,S<:UpperHessenberg{T,<:AbstractMatrix{TH}},W<:AbstractVector{TH}} <: Factorization{T} + H::S # upper triangle is H, lower triangle is Q data (reflectors) + τ::W # more Q (reflector) data + + function Hessenberg{T,TH,S,W}(H, τ) where {T,TH,S<:UpperHessenberg{T,<:AbstractMatrix{TH}},W<:AbstractVector{TH}} + require_one_based_indexing(τ) + new{T,TH,S,W}(H, τ) + end +end +function Hessenberg(factors::AbstractMatrix{T}, τ::AbstractVector{T}, μ::V=false) where {T,V<:Number} + H = UpperHessenberg(factors, μ) + return Hessenberg{eltype(H),T,typeof(H),typeof(τ)}(H, τ) +end +Hessenberg(F::Hessenberg) = F +Hessenberg(F::Hessenberg, μ::Number) = Hessenberg(F.H.data, F.τ, μ) + +copy(F::Hessenberg) = Hessenberg(copy(F.H.data), copy(F.τ), F.H.μ) +size(F::Hessenberg, d) = size(F.H, d) +size(F::Hessenberg) = size(F.H) + +adjoint(F::Hessenberg) = Adjoint(F) + +# iteration for destructuring into components +Base.iterate(S::Hessenberg) = (S.Q, Val(:H)) +Base.iterate(S::Hessenberg, ::Val{:H}) = (S.H, Val(:done)) +Base.iterate(S::Hessenberg, ::Val{:done}) = nothing + +""" + hessenberg!(A) -> Hessenberg + +`hessenberg!` is the same as [`hessenberg`](@ref), but saves space by overwriting +the input `A`, instead of creating a copy. +""" +hessenberg!(A::StridedMatrix{<:BlasFloat}) = Hessenberg(LAPACK.gehrd!(A)...) + +hessenberg(A::StridedMatrix{<:BlasFloat}) = hessenberg!(copy(A)) + +""" + hessenberg(A) -> Hessenberg + +Compute the Hessenberg decomposition of `A` and return a `Hessenberg` object. If `F` is the +factorization object, the unitary matrix can be accessed with `F.Q` (of type [`HessenbergQ`](@ref)) +and the Hessenberg matrix with `F.H` (of type [`UpperHessenberg`](@ref)), either of +which may be converted to a regular matrix with `Matrix(F.H)` or `Matrix(F.Q)`. + +Note that the shifted factorization `A+μI = Q (H+μI) Q'` can be +constructed efficiently by `F + μ*I` using the [`UniformScaling`](@ref) +object [`I`](@ref), which creates a new `Hessenberg` object with shared storage +and a modified shift. The shift of a given `F` is obtained by `F.H.μ`. +This is useful because multiple shifted solves `(F + μ*I) \\ b` +(for different `μ` and/or `b`) can be performed efficiently once `F` is created. + +Iterating the decomposition produces the factors `F.Q` and `F.H`. + +# Examples +```jldoctest +julia> A = [4. 9. 7.; 4. 4. 1.; 4. 3. 2.] +3×3 Array{Float64,2}: + 4.0 9.0 7.0 + 4.0 4.0 1.0 + 4.0 3.0 2.0 + +julia> F = hessenberg(A); + +julia> F.Q * F.H * F.Q' +3×3 Array{Float64,2}: + 4.0 9.0 7.0 + 4.0 4.0 1.0 + 4.0 3.0 2.0 + +julia> q, h = F; # destructuring via iteration + +julia> q == F.Q && h == F.H +true +``` +""" +hessenberg(A::StridedMatrix{T}) where T = + hessenberg!(copy_oftype(A, eigtype(T))) + +hessenberg(H::UpperHessenberg) = H + +function show(io::IO, mime::MIME"text/plain", F::Hessenberg) + summary(io, F) + println(io, "\nQ factor:") + show(io, mime, F.Q) + println(io, "\nH factor:") + show(io, mime, F.H) +end + +""" + HessenbergQ <: AbstractQ + +Given a [`Hessenberg`](@ref) factorization object `F`, `F.Q` returns +a `HessenbergQ` object, which is an implicit representation of the unitary +matrix `Q` in the Hessenberg factorization `QHQ'` represented by `F`. +This `F.Q` object can be efficiently multiplied by matrices or vectors, +and can be converted to an ordinary matrix type with `Matrix(F.Q)`. +""" +struct HessenbergQ{T,S<:AbstractMatrix} <: AbstractQ{T} + factors::S + τ::Vector{T} + function HessenbergQ{T,S}(factors, τ) where {T,S<:AbstractMatrix} + require_one_based_indexing(factors) + new(factors, τ) + end +end +HessenbergQ(factors::AbstractMatrix{T}, τ::Vector{T}) where {T} = HessenbergQ{T,typeof(factors)}(factors, τ) +HessenbergQ(F::Hessenberg) = HessenbergQ(F.H.data, F.τ) + +function getproperty(F::Hessenberg, d::Symbol) + d == :Q && return HessenbergQ(F) + return getfield(F, d) +end + +Base.propertynames(F::Hessenberg, private::Bool=false) = + (:Q, :H, (private ? (:τ,) : ())...) + +## reconstruct the original matrix +Matrix{T}(Q::HessenbergQ) where {T} = convert(Matrix{T}, LAPACK.orghr!(1, size(Q.factors, 1), copy(Q.factors), Q.τ)) +AbstractArray(F::Hessenberg) = AbstractMatrix(F) +Matrix(F::Hessenberg) = Array(AbstractArray(F)) +Array(F::Hessenberg) = Matrix(F) +function AbstractMatrix(F::Hessenberg) + Q = F.Q + A = rmul!(lmul!(Q, triu(F.H.data,-1)), Q') # don't include μ for efficiency if Q is real and μ is complex + if iszero(F.H.μ) + return A + elseif typeof(zero(eltype(A))+F.H.μ) <: eltype(A) # can shift A in-place + for i = 1:size(A,1) + @inbounds A[i,i] += F.H.μ + end + return A + else + return A + F.H.μ*I # allocate another matrix, e.g. if A is real and μ is complex + end +end + +lmul!(Q::HessenbergQ{T}, X::StridedVecOrMat{T}) where {T<:BlasFloat} = + LAPACK.ormhr!('L', 'N', 1, size(Q.factors, 1), Q.factors, Q.τ, X) +rmul!(X::StridedMatrix{T}, Q::HessenbergQ{T}) where {T<:BlasFloat} = + LAPACK.ormhr!('R', 'N', 1, size(Q.factors, 1), Q.factors, Q.τ, X) +lmul!(adjQ::Adjoint{<:Any,<:HessenbergQ{T}}, X::StridedVecOrMat{T}) where {T<:BlasFloat} = + (Q = adjQ.parent; LAPACK.ormhr!('L', ifelse(T<:Real, 'T', 'C'), 1, size(Q.factors, 1), Q.factors, Q.τ, X)) +rmul!(X::StridedMatrix{T}, adjQ::Adjoint{<:Any,<:HessenbergQ{T}}) where {T<:BlasFloat} = + (Q = adjQ.parent; LAPACK.ormhr!('R', ifelse(T<:Real, 'T', 'C'), 1, size(Q.factors, 1), Q.factors, Q.τ, X)) + +lmul!(Q::HessenbergQ{T}, X::Adjoint{T,<:StridedVecOrMat{T}}) where {T<:BlasFloat} = rmul!(X', Q')' +rmul!(X::Adjoint{T,<:StridedMatrix{T}}, Q::HessenbergQ{T}) where {T<:BlasFloat} = lmul!(Q', X')' +lmul!(adjQ::Adjoint{<:Any,<:HessenbergQ{T}}, X::Adjoint{T,<:StridedVecOrMat{T}}) where {T<:BlasFloat} = rmul!(X', adjQ')' +rmul!(X::Adjoint{T,<:StridedMatrix{T}}, adjQ::Adjoint{<:Any,<:HessenbergQ{T}}) where {T<:BlasFloat} = lmul!(adjQ', X')' + +# multiply x by the entries of M in the upper-k triangle, which contains +# the entries of the upper-Hessenberg matrix H for k=-1 +function rmul_triu!(M::AbstractMatrix, x, k::Integer=0) + require_one_based_indexing(M) + m, n = size(M) + for j = 1:n, i = 1:min(j-k,m) + @inbounds M[i,j] *= x + end + return M +end +function lmul_triu!(x, M::AbstractMatrix, k::Integer=0) + require_one_based_indexing(M) + m, n = size(M) + for j = 1:n, i = 1:min(j-k,m) + @inbounds M[i,j] = x * M[i,j] + end + return M +end + +# multiply Hessenberg by scalar (but don't modify lower triangle of F.H.data) +rmul!(F::Hessenberg{<:Any,T}, x::T) where {T<:Number} = Hessenberg(rmul_triu!(F.H.data, x, -1), F.τ, F.H.μ*x) +lmul!(x::T, F::Hessenberg{<:Any,T}) where {T<:Number} = Hessenberg(lmul_triu!(x, F.H.data, -1), F.τ, x*F.H.μ) +function (*)(F::Hessenberg{<:Any,T}, x::S) where {T,S<:Number} + TS = typeof(zero(T) * x) + if TS === T + return rmul!(copy(F), convert(T, x)) + else + return rmul!(Hessenberg(Matrix{TS}(F.H.data), Vector{TS}(F.τ), F.H.μ), convert(TS, x)) + end +end +function (*)(x::S, F::Hessenberg{<:Any,T}) where {T,S<:Number} + TS = typeof(zero(T) * x) + if TS === T + return lmul!(convert(T, x), copy(F)) + else + return lmul!(convert(TS, x), Hessenberg(Matrix{TS}(F.H.data), Vector{TS}(F.τ), F.H.μ)) + end +end +-(F::Hessenberg) = F * -one(eltype(F.H.data)) + +# shift Hessenberg by λI ++(F::Hessenberg, J::UniformScaling) = Hessenberg(F, F.H.μ + J.λ) ++(J::UniformScaling, F::Hessenberg) = Hessenberg(F, J.λ + F.H.μ) +-(F::Hessenberg, J::UniformScaling) = Hessenberg(F, F.H.μ - J.λ) +-(J::UniformScaling, F::Hessenberg) = Hessenberg(-F, J.λ - F.H.μ) + +function ldiv!(F::Hessenberg, B::AbstractVecOrMat) + Q = F.Q + return lmul!(Q, ldiv!(F.H, lmul!(Q', B))) +end + +function rdiv!(B::AbstractMatrix, F::Hessenberg) + Q = F.Q + return rmul!(rdiv!(rmul!(B, Q), F.H), Q') +end + +# handle case of real H and complex μ — we need to work around the +# fact that we can't multiple a real F.Q by a complex matrix directly in LAPACK +function ldiv!(F::Hessenberg{<:Complex,<:Real}, B::AbstractVecOrMat{<:Complex}) + Q = F.Q + Br = lmul!(Q', real(B)) + Bi = lmul!(Q', imag(B)) + ldiv!(F.H, B .= Complex.(Br,Bi)) + Br = lmul!(Q, real(B)) + Bi = lmul!(Q, imag(B)) + return B .= Complex.(Br,Bi) +end +function rdiv!(B::AbstractVecOrMat{<:Complex}, F::Hessenberg{<:Complex,<:Real}) + Q = F.Q + Br = rmul!(real(B), Q) + Bi = rmul!(imag(B), Q) + rdiv!(B .= Complex.(Br,Bi), F.H) + Br = rmul!(real(B), Q') + Bi = rmul!(imag(B), Q') + return B .= Complex.(Br,Bi) +end + +ldiv!(F::Adjoint{<:Any,<:Hessenberg}, B::AbstractVecOrMat) = rdiv!(B', F')' +rdiv!(B::AbstractMatrix, F::Adjoint{<:Any,<:Hessenberg}) = ldiv!(F', B')' + +det(F::Hessenberg) = det(F.H) +logabsdet(F::Hessenberg) = logabsdet(F.H) function logdet(F::Hessenberg) d,s = logabsdet(F) return d + log(s) diff --git a/stdlib/LinearAlgebra/test/hessenberg.jl b/stdlib/LinearAlgebra/test/hessenberg.jl index a937f89e007f1..6164b7624a94c 100644 --- a/stdlib/LinearAlgebra/test/hessenberg.jl +++ b/stdlib/LinearAlgebra/test/hessenberg.jl @@ -15,6 +15,27 @@ let n = 10 b_ = randn(n) B_ = randn(n,3) + # UpperHessenberg methods not covered by the tests below + @testset "UpperHessenberg" begin + A = Areal + H = UpperHessenberg(A) + AH = triu(A,-1) + @test Matrix(H) == H == AH + for x in (2,2+3im) + @test x*H == H*x == x*AH + for op in (+,-) + @test op(H,x*I) == op(AH,x*I) == op(op(x*I,H)) + @test op(H,x*I).data === H.data + @test op(H,x*I)*x == op(AH,x*I)*x == x*op(H,x*I) + @test op(H+x*I, 2H+I) ≈ UpperHessenberg(op(1,2)*H, op(x,1)) + end + end + @test UpperHessenberg(H+I, 3) == H+4I + H1 = LinearAlgebra.fillstored!(copy(H), 1) + @test H1 == triu(fill(1, n,n), -1) + @test tril(H1.data,-2) == tril(H.data,-2) + end + @testset for eltya in (Float32, Float64, ComplexF32, ComplexF64, Int) A = eltya == Int ? rand(1:7, n, n) : @@ -34,6 +55,11 @@ let n = 10 #getindex for HessenbergQ @test H.Q[1,1] ≈ Array(H.Q)[1,1] + #iterate + q,h = H + @test q == H.Q + @test h == H.H + @test convert(Array, 2 * H) ≈ 2 * A ≈ convert(Array, H * 2) @test convert(Array, H + 2I) ≈ A + 2I ≈ convert(Array, 2I + H) @test convert(Array, H + (2+4im)I) ≈ A + (2+4im)I ≈ convert(Array, (2+4im)I + H) From 53d870ef80206825c6e060171550cff5ba16fe22 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Wed, 1 May 2019 14:16:22 -0400 Subject: [PATCH 24/40] hessenberg docs --- stdlib/LinearAlgebra/docs/src/index.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/stdlib/LinearAlgebra/docs/src/index.md b/stdlib/LinearAlgebra/docs/src/index.md index 81faa64084b05..4cf816cac51c8 100644 --- a/stdlib/LinearAlgebra/docs/src/index.md +++ b/stdlib/LinearAlgebra/docs/src/index.md @@ -168,8 +168,9 @@ as well as whether hooks to various optimized methods for them in LAPACK are ava | [`Hermitian`](@ref) | [Hermitian matrix](https://en.wikipedia.org/wiki/Hermitian_matrix) | | [`UpperTriangular`](@ref) | Upper [triangular matrix](https://en.wikipedia.org/wiki/Triangular_matrix) | | [`UnitUpperTriangular`](@ref) | Upper [triangular matrix](https://en.wikipedia.org/wiki/Triangular_matrix) with unit diagonal | -| [`LowerTriangular`](@ref) | Lower [triangular matrix](https://en.wikipedia.org/wiki/Triangular_matrix) | +| [`LowerTriangular`](@ref) | Lower [triangular matrix](https://en.wikipedia.org/wiki/Triangular_matrix) | | | [`UnitLowerTriangular`](@ref) | Lower [triangular matrix](https://en.wikipedia.org/wiki/Triangular_matrix) with unit diagonal | +| [`UpperHessenberg`](@ref) | Upper [Hessenberg matrix](https://en.wikipedia.org/wiki/Hessenberg_matrix) | [`Tridiagonal`](@ref) | [Tridiagonal matrix](https://en.wikipedia.org/wiki/Tridiagonal_matrix) | | [`SymTridiagonal`](@ref) | Symmetric tridiagonal matrix | | [`Bidiagonal`](@ref) | Upper/lower [bidiagonal matrix](https://en.wikipedia.org/wiki/Bidiagonal_matrix) | @@ -186,6 +187,7 @@ as well as whether hooks to various optimized methods for them in LAPACK are ava | [`UnitUpperTriangular`](@ref) | | | MV | MV | [`inv`](@ref), [`det`](@ref) | | [`LowerTriangular`](@ref) | | | MV | MV | [`inv`](@ref), [`det`](@ref) | | [`UnitLowerTriangular`](@ref) | | | MV | MV | [`inv`](@ref), [`det`](@ref) | +| [`UpperHessenberg`](@ref) | | | | MM | [`inv`](@ref), [`det`](@ref) | | [`SymTridiagonal`](@ref) | M | M | MS | MV | [`eigmax`](@ref), [`eigmin`](@ref) | | [`Tridiagonal`](@ref) | M | M | MS | MV | | | [`Bidiagonal`](@ref) | M | M | MS | MV | | @@ -269,6 +271,12 @@ Stacktrace: [...] ``` +If you need to solve many systems of the form `(A+μI)x = b`, it might be beneficial +to first compute the Hessenberg factorization `F` of `A` via the [`hessenberg`](@ref) function. +Given `F`, Julia implements an efficient algorithm for `(F+μ*I) \ b` and related +operations. + + ## [Matrix factorizations](@id man-linalg-factorizations) [Matrix factorizations (a.k.a. matrix decompositions)](https://en.wikipedia.org/wiki/Matrix_decomposition) @@ -319,6 +327,7 @@ LinearAlgebra.LowerTriangular LinearAlgebra.UpperTriangular LinearAlgebra.UnitLowerTriangular LinearAlgebra.UnitUpperTriangular +LinearAlgebra.UpperHessenberg LinearAlgebra.UniformScaling LinearAlgebra.lu LinearAlgebra.lu! From 7ba75c451c2dc2aeeba331ee619d633afa7ff3a1 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Wed, 1 May 2019 14:17:28 -0400 Subject: [PATCH 25/40] tweak --- stdlib/LinearAlgebra/docs/src/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stdlib/LinearAlgebra/docs/src/index.md b/stdlib/LinearAlgebra/docs/src/index.md index 4cf816cac51c8..ab03bfc646d3a 100644 --- a/stdlib/LinearAlgebra/docs/src/index.md +++ b/stdlib/LinearAlgebra/docs/src/index.md @@ -273,7 +273,7 @@ Stacktrace: If you need to solve many systems of the form `(A+μI)x = b`, it might be beneficial to first compute the Hessenberg factorization `F` of `A` via the [`hessenberg`](@ref) function. -Given `F`, Julia implements an efficient algorithm for `(F+μ*I) \ b` and related +Given `F`, Julia employs an efficient algorithm for `(F+μ*I) \ b` and related operations. From 4b6c12b67b8ba610c962ddf6fd89d16a53108628 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Wed, 1 May 2019 14:42:05 -0400 Subject: [PATCH 26/40] fix doc reference for non-exported HessenbergQ --- stdlib/LinearAlgebra/src/hessenberg.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl index 5cbe4dfe05d55..77750ee9cc3ce 100644 --- a/stdlib/LinearAlgebra/src/hessenberg.jl +++ b/stdlib/LinearAlgebra/src/hessenberg.jl @@ -349,7 +349,7 @@ hessenberg(A::StridedMatrix{<:BlasFloat}) = hessenberg!(copy(A)) hessenberg(A) -> Hessenberg Compute the Hessenberg decomposition of `A` and return a `Hessenberg` object. If `F` is the -factorization object, the unitary matrix can be accessed with `F.Q` (of type [`HessenbergQ`](@ref)) +factorization object, the unitary matrix can be accessed with `F.Q` (of type `LinearAlgebra.HessenbergQ`) and the Hessenberg matrix with `F.H` (of type [`UpperHessenberg`](@ref)), either of which may be converted to a regular matrix with `Matrix(F.H)` or `Matrix(F.Q)`. From 330ca95b551e881e62b184da3ce0a4cbb45e582e Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Wed, 1 May 2019 17:06:12 -0400 Subject: [PATCH 27/40] =?UTF-8?q?make=20=CF=84=20type=20more=20generic?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- stdlib/LinearAlgebra/src/hessenberg.jl | 34 ++++++++++++++------------ 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl index 77750ee9cc3ce..12910518af52e 100644 --- a/stdlib/LinearAlgebra/src/hessenberg.jl +++ b/stdlib/LinearAlgebra/src/hessenberg.jl @@ -308,11 +308,11 @@ end A `Hessenberg` object represents the Hessenberg factorization `QHQ'` of a square matrix, and is produced by the [`hessenberg`](@ref) function. """ -struct Hessenberg{T,TH,S<:UpperHessenberg{T,<:AbstractMatrix{TH}},W<:AbstractVector{TH}} <: Factorization{T} +struct Hessenberg{T,TH,S<:UpperHessenberg{T,<:AbstractMatrix{TH}},W<:AbstractVector} <: Factorization{T} H::S # upper triangle is H, lower triangle is Q data (reflectors) τ::W # more Q (reflector) data - function Hessenberg{T,TH,S,W}(H, τ) where {T,TH,S<:UpperHessenberg{T,<:AbstractMatrix{TH}},W<:AbstractVector{TH}} + function Hessenberg{T,TH,S,W}(H, τ) where {T,TH,S<:UpperHessenberg{T,<:AbstractMatrix{TH}},W<:AbstractVector} require_one_based_indexing(τ) new{T,TH,S,W}(H, τ) end @@ -406,15 +406,14 @@ matrix `Q` in the Hessenberg factorization `QHQ'` represented by `F`. This `F.Q` object can be efficiently multiplied by matrices or vectors, and can be converted to an ordinary matrix type with `Matrix(F.Q)`. """ -struct HessenbergQ{T,S<:AbstractMatrix} <: AbstractQ{T} +struct HessenbergQ{T,S<:AbstractMatrix,W<:AbstractVector} <: AbstractQ{T} factors::S - τ::Vector{T} - function HessenbergQ{T,S}(factors, τ) where {T,S<:AbstractMatrix} - require_one_based_indexing(factors) + τ::W + function HessenbergQ{T,S,W}(factors, τ) where {T,S<:AbstractMatrix,W<:AbstractVector} new(factors, τ) end end -HessenbergQ(factors::AbstractMatrix{T}, τ::Vector{T}) where {T} = HessenbergQ{T,typeof(factors)}(factors, τ) +HessenbergQ(factors::AbstractMatrix{T}, τ::AbstractVector) where {T} = HessenbergQ{T,typeof(factors),typeof(τ)}(factors, τ) HessenbergQ(F::Hessenberg) = HessenbergQ(F.H.data, F.τ) function getproperty(F::Hessenberg, d::Symbol) @@ -425,8 +424,11 @@ end Base.propertynames(F::Hessenberg, private::Bool=false) = (:Q, :H, (private ? (:τ,) : ())...) +# HessenbergQ from LAPACK/BLAS (as opposed to Julia libraries like GenericLinearAlgebra) +const BlasHessenbergQ{T} = HessenbergQ{T,<:StridedMatrix{T},<:StridedVector{T}} where {T<:BlasFloat} + ## reconstruct the original matrix -Matrix{T}(Q::HessenbergQ) where {T} = convert(Matrix{T}, LAPACK.orghr!(1, size(Q.factors, 1), copy(Q.factors), Q.τ)) +Matrix{T}(Q::BlasHessenbergQ) where {T} = convert(Matrix{T}, LAPACK.orghr!(1, size(Q.factors, 1), copy(Q.factors), Q.τ)) AbstractArray(F::Hessenberg) = AbstractMatrix(F) Matrix(F::Hessenberg) = Array(AbstractArray(F)) Array(F::Hessenberg) = Matrix(F) @@ -445,19 +447,19 @@ function AbstractMatrix(F::Hessenberg) end end -lmul!(Q::HessenbergQ{T}, X::StridedVecOrMat{T}) where {T<:BlasFloat} = +lmul!(Q::BlasHessenbergQ{T}, X::StridedVecOrMat{T}) where {T<:BlasFloat} = LAPACK.ormhr!('L', 'N', 1, size(Q.factors, 1), Q.factors, Q.τ, X) -rmul!(X::StridedMatrix{T}, Q::HessenbergQ{T}) where {T<:BlasFloat} = +rmul!(X::StridedMatrix{T}, Q::BlasHessenbergQ{T}) where {T<:BlasFloat} = LAPACK.ormhr!('R', 'N', 1, size(Q.factors, 1), Q.factors, Q.τ, X) -lmul!(adjQ::Adjoint{<:Any,<:HessenbergQ{T}}, X::StridedVecOrMat{T}) where {T<:BlasFloat} = +lmul!(adjQ::Adjoint{<:Any,<:BlasHessenbergQ{T}}, X::StridedVecOrMat{T}) where {T<:BlasFloat} = (Q = adjQ.parent; LAPACK.ormhr!('L', ifelse(T<:Real, 'T', 'C'), 1, size(Q.factors, 1), Q.factors, Q.τ, X)) -rmul!(X::StridedMatrix{T}, adjQ::Adjoint{<:Any,<:HessenbergQ{T}}) where {T<:BlasFloat} = +rmul!(X::StridedMatrix{T}, adjQ::Adjoint{<:Any,<:BlasHessenbergQ{T}}) where {T<:BlasFloat} = (Q = adjQ.parent; LAPACK.ormhr!('R', ifelse(T<:Real, 'T', 'C'), 1, size(Q.factors, 1), Q.factors, Q.τ, X)) -lmul!(Q::HessenbergQ{T}, X::Adjoint{T,<:StridedVecOrMat{T}}) where {T<:BlasFloat} = rmul!(X', Q')' -rmul!(X::Adjoint{T,<:StridedMatrix{T}}, Q::HessenbergQ{T}) where {T<:BlasFloat} = lmul!(Q', X')' -lmul!(adjQ::Adjoint{<:Any,<:HessenbergQ{T}}, X::Adjoint{T,<:StridedVecOrMat{T}}) where {T<:BlasFloat} = rmul!(X', adjQ')' -rmul!(X::Adjoint{T,<:StridedMatrix{T}}, adjQ::Adjoint{<:Any,<:HessenbergQ{T}}) where {T<:BlasFloat} = lmul!(adjQ', X')' +lmul!(Q::HessenbergQ{T}, X::Adjoint{T,<:StridedVecOrMat{T}}) where {T} = rmul!(X', Q')' +rmul!(X::Adjoint{T,<:StridedMatrix{T}}, Q::HessenbergQ{T}) where {T} = lmul!(Q', X')' +lmul!(adjQ::Adjoint{<:Any,<:HessenbergQ{T}}, X::Adjoint{T,<:StridedVecOrMat{T}}) where {T} = rmul!(X', adjQ')' +rmul!(X::Adjoint{T,<:StridedMatrix{T}}, adjQ::Adjoint{<:Any,<:HessenbergQ{T}}) where {T} = lmul!(adjQ', X')' # multiply x by the entries of M in the upper-k triangle, which contains # the entries of the upper-Hessenberg matrix H for k=-1 From 3d37fb28e76fbaf33e310dfed95812e6876f3d11 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Wed, 1 May 2019 17:10:03 -0400 Subject: [PATCH 28/40] update NEWS --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 51be440bf7a66..97fdd3d51ff34 100644 --- a/NEWS.md +++ b/NEWS.md @@ -36,7 +36,7 @@ Standard library changes * The BLAS submodule no longer exports `dot`, which conflicts with that in LinearAlgebra ([#31838]). * `diagm` and `spdiagm` now accept optional `m,n` initial arguments to specify a size ([#31654]). -* `Hessenberg` factorizations `H` now support efficient shifted solves `(H+µI) \ b` and related operations ([#31853]). +* `Hessenberg` factorizations `H` now support efficient shifted solves `(H+µI) \ b` and related operations, and there is also an `UpperHessenberg` matrix type ([#31853]). #### SparseArrays From 8e2dbea16a7cda3d4993b642401445f07d631d2f Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Fri, 3 May 2019 12:11:52 -0400 Subject: [PATCH 29/40] put shifts into Hessenberg object, store factors separately in preparation for SymTridiagonal factorization of Hermitian A --- stdlib/LinearAlgebra/src/hessenberg.jl | 197 +++++++++++------------- stdlib/LinearAlgebra/test/hessenberg.jl | 9 +- 2 files changed, 96 insertions(+), 110 deletions(-) diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl index 12910518af52e..aa2c3c28af7b8 100644 --- a/stdlib/LinearAlgebra/src/hessenberg.jl +++ b/stdlib/LinearAlgebra/src/hessenberg.jl @@ -5,16 +5,12 @@ # but including a shift μ. """ - UpperHessenberg(A::AbstractMatrix, μ=0) + UpperHessenberg(A::AbstractMatrix) -Construct an `UpperHessenberg` view of the matrix `A+μI`, where `μ` is an -optional diagonal shift. (Entries of `A` below the first subdiagonal are ignored.) +Construct an `UpperHessenberg` view of the matrix `A`. +Entries of `A` below the first subdiagonal are ignored. -Given an `UpperHessenberg` matrix `H`, subsequent shifts `H+λ*I` are also -performed efficiently by constructing a new `UpperHessenberg` that shares -same underlying data array. Efficient algorithms are also implemented for -`H \\ b`, `det(H)`, and similar, so shifted solves `(H+λ*I) \\ b` can be performed -for multiple shifts `λ` without making a copy of the matrix. +Efficient algorithms are implemented for `H \\ b`, `det(H)`, and similar. See also the [`hessenberg`](@ref) function to factor any matrix into a similar upper-Hessenberg matrix. @@ -28,31 +24,26 @@ julia> A = [1 2 3 4; 5 6 7 8; 9 10 11 12; 13 14 15 16] 9 10 11 12 13 14 15 16 -julia> UpperHessenberg(A, 100) -4×4 UpperHessenberg{Int64,Array{Int64,2},Int64}: - 101 2 3 4 - 5 106 7 8 - ⋅ 10 111 12 - ⋅ ⋅ 15 116 +julia> UpperHessenberg(A) +4×4 UpperHessenberg{Int64,Array{Int64,2}}: + 1 2 3 4 + 5 6 7 8 + ⋅ 10 11 12 + ⋅ ⋅ 15 16 ``` """ -struct UpperHessenberg{T,S<:AbstractMatrix,V<:Number} <: AbstractMatrix{T} +struct UpperHessenberg{T,S<:AbstractMatrix{T}} <: AbstractMatrix{T} data::S - μ::V # represents a shifted Hessenberg matrix H+μI - function UpperHessenberg{T,S,V}(data, μ) where {T,S<:AbstractMatrix,V<:Number} + function UpperHessenberg{T,S}(data) where {T,S<:AbstractMatrix{T}} require_one_based_indexing(data) - new{T,S,V}(data, μ) + new{T,S}(data) end end UpperHessenberg(H::UpperHessenberg) = H -UpperHessenberg(H::UpperHessenberg, μ::Number) = UpperHessenberg(H.data, H.μ + μ) -UpperHessenberg{T}(A::S, μ::V=false) where {T,S<:AbstractMatrix,V<:Number} = - UpperHessenberg{T,S,V}(A, μ) -UpperHessenberg{T}(H::UpperHessenberg, μ::Number=false) where {T} = - UpperHessenberg{T}(H.data, H.μ+μ) -UpperHessenberg(A::AbstractMatrix{T}, μ::Number=false) where {T} = - UpperHessenberg{typeof(zero(T) + μ)}(A, μ) +UpperHessenberg{T}(A::AbstractMatrix) where {T} = UpperHessenberg(convert(AbstractMatrix{T}, A)) +UpperHessenberg{T}(H::UpperHessenberg) where {T} = UpperHessenberg{T}(H.data) +UpperHessenberg(A::AbstractMatrix) = UpperHessenberg{eltype(A),typeof(A)}(A) Matrix(H::UpperHessenberg{T}) where {T} = Matrix{T}(H) Array(H::UpperHessenberg) = Matrix(H) size(H::UpperHessenberg, d) = size(H.data, d) @@ -63,63 +54,53 @@ parent(H::UpperHessenberg) = H.data similar(H::UpperHessenberg, ::Type{T}) where {T} = UpperHessenberg(similar(H.data, T)) similar(H::UpperHessenberg, ::Type{T}, dims::Dims{N}) where {T,N} = similar(H.data, T, dims) -copy(H::UpperHessenberg{T}) where {T} = UpperHessenberg{T}(copy(H.data), H.μ) +copy(H::UpperHessenberg) = UpperHessenberg(copy(H.data)) real(H::UpperHessenberg{<:Real}) = H -real(H::UpperHessenberg{<:Complex}) = UpperHessenberg(triu!(real(H.data),-1), real(H.μ)) -imag(H::UpperHessenberg) = UpperHessenberg(triu!(imag(H.data),-1), imag(H.μ)) +real(H::UpperHessenberg{<:Complex}) = UpperHessenberg(triu!(real(H.data),-1)) +imag(H::UpperHessenberg) = UpperHessenberg(triu!(imag(H.data),-1)) function Matrix{T}(H::UpperHessenberg) where T m,n = size(H) - B = triu!(copyto!(Matrix{T}(undef, m, n), H.data), -1) - if !iszero(H.μ) - for i = 1:min(m,n) - @inbounds B[i,i] += H.μ - end - end - return B + return triu!(copyto!(Matrix{T}(undef, m, n), H.data), -1) end getindex(H::UpperHessenberg{T}, i::Integer, j::Integer) where {T} = - i < j ? convert(T, H.data[i,j]) : - i == j ? convert(T, H.data[i,i] + H.μ) : - i == j+1 ? convert(T, H.data[i,j]) : zero(T) + i <= j+1 ? convert(T, H.data[i,j]) : zero(T) function setindex!(A::UpperHessenberg, x, i::Integer, j::Integer) - if i > j + if i > j+1 x == 0 || throw(ArgumentError("cannot set index in the lower triangular part " * "($i, $j) of an UpperHessenberg matrix to a nonzero value ($x)")) - elseif i == j - A.data[i,j] = x - A.μ else A.data[i,j] = x end - return x + return A end function Base.replace_in_print_matrix(A::UpperHessenberg, i::Integer, j::Integer, s::AbstractString) return i <= j+1 ? s : Base.replace_with_centered_mark(s) end -Base.copy(A::Adjoint{<:Any,<:UpperHessenberg}) = adjoint!(copy(A.parent)) -Base.copy(A::Transpose{<:Any,<:UpperHessenberg}) = transpose!(copy(A.parent)) +Base.copy(A::Adjoint{<:Any,<:UpperHessenberg}) = tril!(adjoint!(similar(A.parent.data), A.parent.data), 1) +Base.copy(A::Transpose{<:Any,<:UpperHessenberg}) = tril!(transpose!(similar(A.parent.data), A.parent.data), 1) --(A::UpperHessenberg) = UpperHessenberg(-A.data, -A.μ) -rmul!(H::UpperHessenberg, x::Number) = UpperHessenberg(rmul!(H.data, x), H.μ*x) -lmul!(x::Number, H::UpperHessenberg) = UpperHessenberg(lmul!(x, H.data), x*H.μ) +-(A::UpperHessenberg) = UpperHessenberg(-A.data) +rmul!(H::UpperHessenberg, x::Number) = (rmul!(H.data, x); H) +lmul!(x::Number, H::UpperHessenberg) = (lmul!(x, H.data); H) # (future: we could also have specialized routines for UpperHessenberg * UpperTriangular) -fillstored!(H::UpperHessenberg, x) = (H′ = UpperHessenberg(fillband!(H.data, x, -1, size(H,2)-1)); H′) +fillstored!(H::UpperHessenberg, x) = (fillband!(H.data, x, -1, size(H,2)-1); H) -+(A::UpperHessenberg, B::UpperHessenberg) = UpperHessenberg(A.data+B.data, A.μ + B.μ) --(A::UpperHessenberg, B::UpperHessenberg) = UpperHessenberg(A.data-B.data, A.μ - B.μ) ++(A::UpperHessenberg, B::UpperHessenberg) = UpperHessenberg(A.data+B.data) +-(A::UpperHessenberg, B::UpperHessenberg) = UpperHessenberg(A.data-B.data) # (future: we could also have specialized routines for UpperHessenberg ± UpperTriangular) # shift Hessenberg by λI -+(H::UpperHessenberg, J::UniformScaling) = UpperHessenberg(H, H.μ + J.λ) -+(J::UniformScaling, H::UpperHessenberg) = UpperHessenberg(H, J.λ + H.μ) --(H::UpperHessenberg, J::UniformScaling) = UpperHessenberg(H, H.μ - J.λ) --(J::UniformScaling, H::UpperHessenberg) = UpperHessenberg(-H.data, J.λ - H.μ) ++(H::UpperHessenberg, J::UniformScaling) = UpperHessenberg(H.data + J) ++(J::UniformScaling, H::UpperHessenberg) = UpperHessenberg(J + H.data) +-(H::UpperHessenberg, J::UniformScaling) = UpperHessenberg(H.data - J) +-(J::UniformScaling, H::UpperHessenberg) = UpperHessenberg(J - H.data) # Solving (H+µI)x = b: we can do this in O(m²) time and O(m) memory # (in-place in x) by the RQ algorithm from: @@ -139,14 +120,14 @@ fillstored!(H::UpperHessenberg, x) = (H′ = UpperHessenberg(fillband!(H.data, x # right to left, and doing backsubstitution *simultaneously*. # solve (H+μI)X = B, storing result in B -function ldiv!(F::UpperHessenberg, B::AbstractVecOrMat) +function ldiv!(F::UpperHessenberg, B::AbstractVecOrMat; shift::Number=false) checksquare(F) m = size(F,1) m != size(B,1) && throw(DimensionMismatch("wrong right-hand-side # rows != $m")) require_one_based_indexing(B) n = size(B,2) H = F.data - μ = F.μ + μ = shift u = Vector{typeof(zero(eltype(H))+μ)}(undef, m) # for last rotated col of H-μI copyto!(u, 1, H, m*(m-1)+1, m) # u .= H[:,m] u[m] += μ @@ -189,14 +170,14 @@ end # of rows/cols. Essentially, we take the ldiv_H! algorithm, # swap indices of H and X to transpose, and reverse the # order of the H indices (or the order of the loops). -function rdiv!(B::AbstractMatrix, F::UpperHessenberg) +function rdiv!(B::AbstractMatrix, F::UpperHessenberg; shift::Number=false) checksquare(F) m = size(F,1) m != size(B,2) && throw(DimensionMismatch("wrong right-hand-side # cols != $m")) require_one_based_indexing(B) n = size(B,1) H = F.data - μ = F.μ + μ = shift u = Vector{typeof(zero(eltype(H))+μ)}(undef, m) # for last rotated row of H-μI u .= @view H[1,:] u[1] += μ @@ -242,11 +223,11 @@ end # arXiv:1111.4067 (2011). # # Cost is O(m²) with O(m) storage. -function det(F::UpperHessenberg) +function det(F::UpperHessenberg; shift::Number=false) checksquare(F) H = F.data m = size(H,1) - μ = F.μ + μ = shift m == 0 && return one(zero(eltype(H)) + μ) determinant = H[1,1] + μ prevdeterminant = one(determinant) @@ -274,11 +255,11 @@ end # (We could also use it for det instead of the Cahill algorithm above. Cahill is slightly faster # for very small matrices where you are likely to use det, and also uses only ± and * so it can # be applied to Hessenberg matrices over other number fields.) -function logabsdet(F::UpperHessenberg) +function logabsdet(F::UpperHessenberg; shift::Number=false) checksquare(F) H = F.data m = size(H,1) - μ = F.μ + μ = shift P = one(zero(eltype(H)) + μ) logdeterminant = zero(real(P)) m == 0 && return (logdeterminant, P) @@ -306,25 +287,20 @@ end Hessenberg <: Factorization A `Hessenberg` object represents the Hessenberg factorization `QHQ'` of a square -matrix, and is produced by the [`hessenberg`](@ref) function. +matrix, or a shift `Q(H+μI)Q'` thereof, which is produced by the [`hessenberg`](@ref) function. """ -struct Hessenberg{T,TH,S<:UpperHessenberg{T,<:AbstractMatrix{TH}},W<:AbstractVector} <: Factorization{T} - H::S # upper triangle is H, lower triangle is Q data (reflectors) +struct Hessenberg{T,SH<:AbstractMatrix,S<:AbstractMatrix,W<:AbstractVector,V<:Number} <: Factorization{T} + H::SH # UpperHessenberg or SymTridiagonal + factors::S # reflector data in lower triangle, may share data with H τ::W # more Q (reflector) data - - function Hessenberg{T,TH,S,W}(H, τ) where {T,TH,S<:UpperHessenberg{T,<:AbstractMatrix{TH}},W<:AbstractVector} - require_one_based_indexing(τ) - new{T,TH,S,W}(H, τ) - end -end -function Hessenberg(factors::AbstractMatrix{T}, τ::AbstractVector{T}, μ::V=false) where {T,V<:Number} - H = UpperHessenberg(factors, μ) - return Hessenberg{eltype(H),T,typeof(H),typeof(τ)}(H, τ) + μ::V # diagonal shift for copy-free (F+μI) \ b solves and similar end +Hessenberg(factors::AbstractMatrix, τ::AbstractVector, H::AbstractMatrix=UpperHessenberg(factors); μ::Number=false) = + Hessenberg{typeof(zero(eltype(factors))+μ),typeof(H),typeof(factors),typeof(τ),typeof(μ)}(H, factors, τ, μ) Hessenberg(F::Hessenberg) = F -Hessenberg(F::Hessenberg, μ::Number) = Hessenberg(F.H.data, F.τ, μ) +Hessenberg(F::Hessenberg, μ::Number) = Hessenberg(F.factors, F.τ; μ=μ) -copy(F::Hessenberg) = Hessenberg(copy(F.H.data), copy(F.τ), F.H.μ) +copy(F::Hessenberg) = Hessenberg(copy(F.factors), copy(F.τ); μ=F.μ) size(F::Hessenberg, d) = size(F.H, d) size(F::Hessenberg) = size(F.H) @@ -332,7 +308,8 @@ adjoint(F::Hessenberg) = Adjoint(F) # iteration for destructuring into components Base.iterate(S::Hessenberg) = (S.Q, Val(:H)) -Base.iterate(S::Hessenberg, ::Val{:H}) = (S.H, Val(:done)) +Base.iterate(S::Hessenberg, ::Val{:H}) = (S.H, Val(:μ)) +Base.iterate(S::Hessenberg, ::Val{:μ}) = (S.μ, Val(:done)) Base.iterate(S::Hessenberg, ::Val{:done}) = nothing """ @@ -356,11 +333,11 @@ which may be converted to a regular matrix with `Matrix(F.H)` or `Matrix(F.Q)`. Note that the shifted factorization `A+μI = Q (H+μI) Q'` can be constructed efficiently by `F + μ*I` using the [`UniformScaling`](@ref) object [`I`](@ref), which creates a new `Hessenberg` object with shared storage -and a modified shift. The shift of a given `F` is obtained by `F.H.μ`. +and a modified shift. The shift of a given `F` is obtained by `F.μ`. This is useful because multiple shifted solves `(F + μ*I) \\ b` (for different `μ` and/or `b`) can be performed efficiently once `F` is created. -Iterating the decomposition produces the factors `F.Q` and `F.H`. +Iterating the decomposition produces the factors `F.Q, F.H, F.μ`. # Examples ```jldoctest @@ -387,10 +364,11 @@ true hessenberg(A::StridedMatrix{T}) where T = hessenberg!(copy_oftype(A, eigtype(T))) -hessenberg(H::UpperHessenberg) = H - function show(io::IO, mime::MIME"text/plain", F::Hessenberg) summary(io, F) + if !iszero(F.μ) + print("\nwith shift μI for μ = ", F.μ) + end println(io, "\nQ factor:") show(io, mime, F.Q) println(io, "\nH factor:") @@ -414,7 +392,7 @@ struct HessenbergQ{T,S<:AbstractMatrix,W<:AbstractVector} <: AbstractQ{T} end end HessenbergQ(factors::AbstractMatrix{T}, τ::AbstractVector) where {T} = HessenbergQ{T,typeof(factors),typeof(τ)}(factors, τ) -HessenbergQ(F::Hessenberg) = HessenbergQ(F.H.data, F.τ) +HessenbergQ(F::Hessenberg) = HessenbergQ(F.factors, F.τ) function getproperty(F::Hessenberg, d::Symbol) d == :Q && return HessenbergQ(F) @@ -422,7 +400,7 @@ function getproperty(F::Hessenberg, d::Symbol) end Base.propertynames(F::Hessenberg, private::Bool=false) = - (:Q, :H, (private ? (:τ,) : ())...) + (:Q, :H, :μ, (private ? (:τ, :factors) : ())...) # HessenbergQ from LAPACK/BLAS (as opposed to Julia libraries like GenericLinearAlgebra) const BlasHessenbergQ{T} = HessenbergQ{T,<:StridedMatrix{T},<:StridedVector{T}} where {T<:BlasFloat} @@ -434,16 +412,17 @@ Matrix(F::Hessenberg) = Array(AbstractArray(F)) Array(F::Hessenberg) = Matrix(F) function AbstractMatrix(F::Hessenberg) Q = F.Q - A = rmul!(lmul!(Q, triu(F.H.data,-1)), Q') # don't include μ for efficiency if Q is real and μ is complex - if iszero(F.H.μ) + A = rmul!(lmul!(Q, Matrix{eltype(Q)}(F.H)), Q') + μ = F.μ + if iszero(μ) return A - elseif typeof(zero(eltype(A))+F.H.μ) <: eltype(A) # can shift A in-place + elseif typeof(zero(eltype(A))+μ) <: eltype(A) # can shift A in-place for i = 1:size(A,1) - @inbounds A[i,i] += F.H.μ + @inbounds A[i,i] += μ end return A else - return A + F.H.μ*I # allocate another matrix, e.g. if A is real and μ is complex + return A + μ*I # allocate another matrix, e.g. if A is real and μ is complex end end @@ -480,59 +459,63 @@ function lmul_triu!(x, M::AbstractMatrix, k::Integer=0) return M end +# when H is UpperHessenberg, it shares data with F.factors # multiply Hessenberg by scalar (but don't modify lower triangle of F.H.data) -rmul!(F::Hessenberg{<:Any,T}, x::T) where {T<:Number} = Hessenberg(rmul_triu!(F.H.data, x, -1), F.τ, F.H.μ*x) -lmul!(x::T, F::Hessenberg{<:Any,T}) where {T<:Number} = Hessenberg(lmul_triu!(x, F.H.data, -1), F.τ, x*F.H.μ) -function (*)(F::Hessenberg{<:Any,T}, x::S) where {T,S<:Number} +rmul!(F::Hessenberg{<:Any,<:UpperHessenberg{T}}, x::T) where {T<:Number} = Hessenberg(rmul_triu!(F.factors, x, -1), F.τ; μ=F.μ*x) +lmul!(x::T, F::Hessenberg{<:Any,<:UpperHessenberg{T}}) where {T<:Number} = Hessenberg(lmul_triu!(x, F.factors, -1), F.τ; μ=x*F.μ) + +# Promote F * x or x * F. In general, we don't know how to do promotions +# that would change the element type of F.H, however. +function (*)(F::Hessenberg{<:Any,<:AbstractMatrix{T}}, x::S) where {T,S<:Number} TS = typeof(zero(T) * x) if TS === T return rmul!(copy(F), convert(T, x)) else - return rmul!(Hessenberg(Matrix{TS}(F.H.data), Vector{TS}(F.τ), F.H.μ), convert(TS, x)) + throw(MethodError(*, (F, x))) end end -function (*)(x::S, F::Hessenberg{<:Any,T}) where {T,S<:Number} +function (*)(x::S, F::Hessenberg{<:Any,<:AbstractMatrix{T}}) where {T,S<:Number} TS = typeof(zero(T) * x) if TS === T return lmul!(convert(T, x), copy(F)) else - return lmul!(convert(TS, x), Hessenberg(Matrix{TS}(F.H.data), Vector{TS}(F.τ), F.H.μ)) + throw(MethodError(*, (x, F))) end end --(F::Hessenberg) = F * -one(eltype(F.H.data)) +-(F::Hessenberg) = F * -one(eltype(F.H)) # shift Hessenberg by λI -+(F::Hessenberg, J::UniformScaling) = Hessenberg(F, F.H.μ + J.λ) -+(J::UniformScaling, F::Hessenberg) = Hessenberg(F, J.λ + F.H.μ) --(F::Hessenberg, J::UniformScaling) = Hessenberg(F, F.H.μ - J.λ) --(J::UniformScaling, F::Hessenberg) = Hessenberg(-F, J.λ - F.H.μ) ++(F::Hessenberg, J::UniformScaling) = Hessenberg(F, F.μ + J.λ) ++(J::UniformScaling, F::Hessenberg) = Hessenberg(F, J.λ + F.μ) +-(F::Hessenberg, J::UniformScaling) = Hessenberg(F, F.μ - J.λ) +-(J::UniformScaling, F::Hessenberg) = Hessenberg(-F, J.λ - F.μ) function ldiv!(F::Hessenberg, B::AbstractVecOrMat) Q = F.Q - return lmul!(Q, ldiv!(F.H, lmul!(Q', B))) + return lmul!(Q, ldiv!(F.H, lmul!(Q', B); shift=F.μ)) end function rdiv!(B::AbstractMatrix, F::Hessenberg) Q = F.Q - return rmul!(rdiv!(rmul!(B, Q), F.H), Q') + return rmul!(rdiv!(rmul!(B, Q), F.H; shift=F.μ), Q') end # handle case of real H and complex μ — we need to work around the # fact that we can't multiple a real F.Q by a complex matrix directly in LAPACK -function ldiv!(F::Hessenberg{<:Complex,<:Real}, B::AbstractVecOrMat{<:Complex}) +function ldiv!(F::Hessenberg{<:Complex,<:AbstractMatrix{<:Real}}, B::AbstractVecOrMat{<:Complex}) Q = F.Q Br = lmul!(Q', real(B)) Bi = lmul!(Q', imag(B)) - ldiv!(F.H, B .= Complex.(Br,Bi)) + ldiv!(F.H, B .= Complex.(Br,Bi); shift=F.μ) Br = lmul!(Q, real(B)) Bi = lmul!(Q, imag(B)) return B .= Complex.(Br,Bi) end -function rdiv!(B::AbstractVecOrMat{<:Complex}, F::Hessenberg{<:Complex,<:Real}) +function rdiv!(B::AbstractVecOrMat{<:Complex}, F::Hessenberg{<:Complex,<:AbstractMatrix{<:Real}}) Q = F.Q Br = rmul!(real(B), Q) Bi = rmul!(imag(B), Q) - rdiv!(B .= Complex.(Br,Bi), F.H) + rdiv!(B .= Complex.(Br,Bi), F.H; shift=F.μ) Br = rmul!(real(B), Q') Bi = rmul!(imag(B), Q') return B .= Complex.(Br,Bi) @@ -541,8 +524,8 @@ end ldiv!(F::Adjoint{<:Any,<:Hessenberg}, B::AbstractVecOrMat) = rdiv!(B', F')' rdiv!(B::AbstractMatrix, F::Adjoint{<:Any,<:Hessenberg}) = ldiv!(F', B')' -det(F::Hessenberg) = det(F.H) -logabsdet(F::Hessenberg) = logabsdet(F.H) +det(F::Hessenberg) = det(F.H; shift=F.μ) +logabsdet(F::Hessenberg) = logabsdet(F.H; shift=F.μ) function logdet(F::Hessenberg) d,s = logabsdet(F) return d + log(s) diff --git a/stdlib/LinearAlgebra/test/hessenberg.jl b/stdlib/LinearAlgebra/test/hessenberg.jl index 6164b7624a94c..e9e812d89e73f 100644 --- a/stdlib/LinearAlgebra/test/hessenberg.jl +++ b/stdlib/LinearAlgebra/test/hessenberg.jl @@ -25,15 +25,18 @@ let n = 10 @test x*H == H*x == x*AH for op in (+,-) @test op(H,x*I) == op(AH,x*I) == op(op(x*I,H)) - @test op(H,x*I).data === H.data @test op(H,x*I)*x == op(AH,x*I)*x == x*op(H,x*I) - @test op(H+x*I, 2H+I) ≈ UpperHessenberg(op(1,2)*H, op(x,1)) end end - @test UpperHessenberg(H+I, 3) == H+4I + @test [H[i,j] for i=1:size(H,1), j=1:size(H,2)] == triu(A,-1) H1 = LinearAlgebra.fillstored!(copy(H), 1) @test H1 == triu(fill(1, n,n), -1) @test tril(H1.data,-2) == tril(H.data,-2) + A2, H2 = copy(A), copy(H) + A2[1:4,3]=H2[1:4,3]=1:4 + H2[5,3]=0 + @test H2 == triu(A2,-1) + @test_throws ArgumentError H[5,3]=1 end @testset for eltya in (Float32, Float64, ComplexF32, ComplexF64, Int) From 8d612b82b1622f0950b7d17048d78418f9e57d98 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Fri, 3 May 2019 15:00:37 -0400 Subject: [PATCH 30/40] correct comment --- stdlib/LinearAlgebra/src/hessenberg.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl index aa2c3c28af7b8..9cbd7613596d1 100644 --- a/stdlib/LinearAlgebra/src/hessenberg.jl +++ b/stdlib/LinearAlgebra/src/hessenberg.jl @@ -2,7 +2,6 @@ ###################################################################################### # Upper-Hessenberg matrices H+μI, analogous to the UpperTriangular type -# but including a shift μ. """ UpperHessenberg(A::AbstractMatrix) From 9056d42ef36c25ac304bb0afae6a8348525c1e46 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Fri, 10 May 2019 14:33:53 -0400 Subject: [PATCH 31/40] hessenberg for Hermitian matrices -> real-symmetric tridiagonal --- stdlib/LinearAlgebra/src/hessenberg.jl | 91 ++++++++++----- stdlib/LinearAlgebra/src/lapack.jl | 154 +++++++++++++++++++++++++ 2 files changed, 218 insertions(+), 27 deletions(-) diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl index 9cbd7613596d1..0940ac58d3956 100644 --- a/stdlib/LinearAlgebra/src/hessenberg.jl +++ b/stdlib/LinearAlgebra/src/hessenberg.jl @@ -166,7 +166,7 @@ end # Note: this can be derived from the Henry (1994) algorithm # by transformation to F(Hᵀ+µI)F FXᵀ = FBᵀ, where # F is the permutation matrix that reverses the order -# of rows/cols. Essentially, we take the ldiv_H! algorithm, +# of rows/cols. Essentially, we take the ldiv! algorithm, # swap indices of H and X to transpose, and reverse the # order of the H indices (or the order of the loops). function rdiv!(B::AbstractMatrix, F::UpperHessenberg; shift::Number=false) @@ -290,14 +290,15 @@ matrix, or a shift `Q(H+μI)Q'` thereof, which is produced by the [`hessenberg`] """ struct Hessenberg{T,SH<:AbstractMatrix,S<:AbstractMatrix,W<:AbstractVector,V<:Number} <: Factorization{T} H::SH # UpperHessenberg or SymTridiagonal - factors::S # reflector data in lower triangle, may share data with H + uplo::Char + factors::S # reflector data in uplo triangle, may share data with H τ::W # more Q (reflector) data μ::V # diagonal shift for copy-free (F+μI) \ b solves and similar end -Hessenberg(factors::AbstractMatrix, τ::AbstractVector, H::AbstractMatrix=UpperHessenberg(factors); μ::Number=false) = - Hessenberg{typeof(zero(eltype(factors))+μ),typeof(H),typeof(factors),typeof(τ),typeof(μ)}(H, factors, τ, μ) +Hessenberg(factors::AbstractMatrix, τ::AbstractVector, H::AbstractMatrix=UpperHessenberg(factors), uplo::AbstractChar='L'; μ::Number=false) = + Hessenberg{typeof(zero(eltype(factors))+μ),typeof(H),typeof(factors),typeof(τ),typeof(μ)}(H, uplo, factors, τ, μ) Hessenberg(F::Hessenberg) = F -Hessenberg(F::Hessenberg, μ::Number) = Hessenberg(F.factors, F.τ; μ=μ) +Hessenberg(F::Hessenberg, μ::Number) = Hessenberg(F.factors, F.τ, F.H, F.uplo; μ=μ) copy(F::Hessenberg) = Hessenberg(copy(F.factors), copy(F.τ); μ=F.μ) size(F::Hessenberg, d) = size(F.H, d) @@ -311,15 +312,20 @@ Base.iterate(S::Hessenberg, ::Val{:H}) = (S.H, Val(:μ)) Base.iterate(S::Hessenberg, ::Val{:μ}) = (S.μ, Val(:done)) Base.iterate(S::Hessenberg, ::Val{:done}) = nothing +hessenberg!(A::StridedMatrix{<:BlasFloat}) = Hessenberg(LAPACK.gehrd!(A)...) + +function hessenberg!(A::Union{Symmetric{<:BlasReal},Hermitian{<:BlasFloat}}) + factors, τ, d, e = LAPACK.hetrd!(A.uplo, A.data) + return Hessenberg(factors, τ, SymTridiagonal(d, e), A.uplo) +end + """ hessenberg!(A) -> Hessenberg `hessenberg!` is the same as [`hessenberg`](@ref), but saves space by overwriting the input `A`, instead of creating a copy. """ -hessenberg!(A::StridedMatrix{<:BlasFloat}) = Hessenberg(LAPACK.gehrd!(A)...) - -hessenberg(A::StridedMatrix{<:BlasFloat}) = hessenberg!(copy(A)) +hessenberg!(A::AbstractMatrix) """ hessenberg(A) -> Hessenberg @@ -329,6 +335,10 @@ factorization object, the unitary matrix can be accessed with `F.Q` (of type `Li and the Hessenberg matrix with `F.H` (of type [`UpperHessenberg`](@ref)), either of which may be converted to a regular matrix with `Matrix(F.H)` or `Matrix(F.Q)`. +If `A` is [`Hermitian`](@ref) or real-[`Symmetric`](@ref), then the Hessenberg +decomposition produces a real-symmetric tridiagonal matrix and `F.H` is of type +[`SymTridiagonal`](@ref). + Note that the shifted factorization `A+μI = Q (H+μI) Q'` can be constructed efficiently by `F + μ*I` using the [`UniformScaling`](@ref) object [`I`](@ref), which creates a new `Hessenberg` object with shared storage @@ -360,7 +370,7 @@ julia> q == F.Q && h == F.H true ``` """ -hessenberg(A::StridedMatrix{T}) where T = +hessenberg(A::AbstractMatrix{T}) where T = hessenberg!(copy_oftype(A, eigtype(T))) function show(io::IO, mime::MIME"text/plain", F::Hessenberg) @@ -383,15 +393,16 @@ matrix `Q` in the Hessenberg factorization `QHQ'` represented by `F`. This `F.Q` object can be efficiently multiplied by matrices or vectors, and can be converted to an ordinary matrix type with `Matrix(F.Q)`. """ -struct HessenbergQ{T,S<:AbstractMatrix,W<:AbstractVector} <: AbstractQ{T} +struct HessenbergQ{T,S<:AbstractMatrix,W<:AbstractVector,sym} <: AbstractQ{T} + uplo::Char factors::S τ::W - function HessenbergQ{T,S,W}(factors, τ) where {T,S<:AbstractMatrix,W<:AbstractVector} - new(factors, τ) + function HessenbergQ{T,S,W,sym}(uplo::AbstractChar, factors, τ) where {T,S<:AbstractMatrix,W<:AbstractVector,sym} + new(uplo, factors, τ) end end -HessenbergQ(factors::AbstractMatrix{T}, τ::AbstractVector) where {T} = HessenbergQ{T,typeof(factors),typeof(τ)}(factors, τ) -HessenbergQ(F::Hessenberg) = HessenbergQ(F.factors, F.τ) +HessenbergQ(F::Hessenberg{<:Any,<:UpperHessenberg,S,W}) where {S,W} = HessenbergQ{eltype(F.factors),S,W,false}(F.uplo, F.factors, F.τ) +HessenbergQ(F::Hessenberg{<:Any,<:SymTridiagonal,S,W}) where {S,W} = HessenbergQ{eltype(F.factors),S,W,true}(F.uplo, F.factors, F.τ) function getproperty(F::Hessenberg, d::Symbol) d == :Q && return HessenbergQ(F) @@ -399,13 +410,14 @@ function getproperty(F::Hessenberg, d::Symbol) end Base.propertynames(F::Hessenberg, private::Bool=false) = - (:Q, :H, :μ, (private ? (:τ, :factors) : ())...) + (:Q, :H, :μ, (private ? (:τ, :factors, :uplo) : ())...) # HessenbergQ from LAPACK/BLAS (as opposed to Julia libraries like GenericLinearAlgebra) -const BlasHessenbergQ{T} = HessenbergQ{T,<:StridedMatrix{T},<:StridedVector{T}} where {T<:BlasFloat} +const BlasHessenbergQ{T,sym} = HessenbergQ{T,<:StridedMatrix{T},<:StridedVector{T},sym} where {T<:BlasFloat,sym} ## reconstruct the original matrix -Matrix{T}(Q::BlasHessenbergQ) where {T} = convert(Matrix{T}, LAPACK.orghr!(1, size(Q.factors, 1), copy(Q.factors), Q.τ)) +Matrix{T}(Q::BlasHessenbergQ{<:Any,false}) where {T} = convert(Matrix{T}, LAPACK.orghr!(1, size(Q.factors, 1), copy(Q.factors), Q.τ)) +Matrix{T}(Q::BlasHessenbergQ{<:Any,true}) where {T} = convert(Matrix{T}, LAPACK.orgtr!(Q.uplo, copy(Q.factors), Q.τ)) AbstractArray(F::Hessenberg) = AbstractMatrix(F) Matrix(F::Hessenberg) = Array(AbstractArray(F)) Array(F::Hessenberg) = Matrix(F) @@ -425,15 +437,24 @@ function AbstractMatrix(F::Hessenberg) end end -lmul!(Q::BlasHessenbergQ{T}, X::StridedVecOrMat{T}) where {T<:BlasFloat} = +lmul!(Q::BlasHessenbergQ{T,false}, X::StridedVecOrMat{T}) where {T<:BlasFloat} = LAPACK.ormhr!('L', 'N', 1, size(Q.factors, 1), Q.factors, Q.τ, X) -rmul!(X::StridedMatrix{T}, Q::BlasHessenbergQ{T}) where {T<:BlasFloat} = +rmul!(X::StridedMatrix{T}, Q::BlasHessenbergQ{T,false}) where {T<:BlasFloat} = LAPACK.ormhr!('R', 'N', 1, size(Q.factors, 1), Q.factors, Q.τ, X) -lmul!(adjQ::Adjoint{<:Any,<:BlasHessenbergQ{T}}, X::StridedVecOrMat{T}) where {T<:BlasFloat} = +lmul!(adjQ::Adjoint{<:Any,<:BlasHessenbergQ{T,false}}, X::StridedVecOrMat{T}) where {T<:BlasFloat} = (Q = adjQ.parent; LAPACK.ormhr!('L', ifelse(T<:Real, 'T', 'C'), 1, size(Q.factors, 1), Q.factors, Q.τ, X)) -rmul!(X::StridedMatrix{T}, adjQ::Adjoint{<:Any,<:BlasHessenbergQ{T}}) where {T<:BlasFloat} = +rmul!(X::StridedMatrix{T}, adjQ::Adjoint{<:Any,<:BlasHessenbergQ{T,false}}) where {T<:BlasFloat} = (Q = adjQ.parent; LAPACK.ormhr!('R', ifelse(T<:Real, 'T', 'C'), 1, size(Q.factors, 1), Q.factors, Q.τ, X)) +lmul!(Q::BlasHessenbergQ{T,true}, X::StridedVecOrMat{T}) where {T<:BlasFloat} = + LAPACK.ormtr!('L', Q.uplo, 'N', Q.factors, Q.τ, X) +rmul!(X::StridedMatrix{T}, Q::BlasHessenbergQ{T,true}) where {T<:BlasFloat} = + LAPACK.ormtr!('R', Q.uplo, 'N', Q.factors, Q.τ, X) +lmul!(adjQ::Adjoint{<:Any,<:BlasHessenbergQ{T,true}}, X::StridedVecOrMat{T}) where {T<:BlasFloat} = + (Q = adjQ.parent; LAPACK.ormtr!('L', Q.uplo, ifelse(T<:Real, 'T', 'C'), Q.factors, Q.τ, X)) +rmul!(X::StridedMatrix{T}, adjQ::Adjoint{<:Any,<:BlasHessenbergQ{T,true}}) where {T<:BlasFloat} = + (Q = adjQ.parent; LAPACK.ormtr!('R', Q.uplo, ifelse(T<:Real, 'T', 'C'), Q.factors, Q.τ, X)) + lmul!(Q::HessenbergQ{T}, X::Adjoint{T,<:StridedVecOrMat{T}}) where {T} = rmul!(X', Q')' rmul!(X::Adjoint{T,<:StridedMatrix{T}}, Q::HessenbergQ{T}) where {T} = lmul!(Q', X')' lmul!(adjQ::Adjoint{<:Any,<:HessenbergQ{T}}, X::Adjoint{T,<:StridedVecOrMat{T}}) where {T} = rmul!(X', adjQ')' @@ -491,12 +512,20 @@ end function ldiv!(F::Hessenberg, B::AbstractVecOrMat) Q = F.Q - return lmul!(Q, ldiv!(F.H, lmul!(Q', B); shift=F.μ)) + if iszero(F.μ) + return lmul!(Q, ldiv!(F.H, lmul!(Q', B))) + else + return lmul!(Q, ldiv!(F.H, lmul!(Q', B); shift=F.μ)) + end end function rdiv!(B::AbstractMatrix, F::Hessenberg) Q = F.Q - return rmul!(rdiv!(rmul!(B, Q), F.H; shift=F.μ), Q') + if iszero(F.μ) + return rmul!(rdiv!(rmul!(B, Q), F.H), Q') + else + return rmul!(rdiv!(rmul!(B, Q), F.H; shift=F.μ), Q') + end end # handle case of real H and complex μ — we need to work around the @@ -505,7 +534,11 @@ function ldiv!(F::Hessenberg{<:Complex,<:AbstractMatrix{<:Real}}, B::AbstractVec Q = F.Q Br = lmul!(Q', real(B)) Bi = lmul!(Q', imag(B)) - ldiv!(F.H, B .= Complex.(Br,Bi); shift=F.μ) + if iszero(F.μ) + ldiv!(F.H, B .= Complex.(Br,Bi)) + else + ldiv!(F.H, B .= Complex.(Br,Bi); shift=F.μ) + end Br = lmul!(Q, real(B)) Bi = lmul!(Q, imag(B)) return B .= Complex.(Br,Bi) @@ -514,7 +547,11 @@ function rdiv!(B::AbstractVecOrMat{<:Complex}, F::Hessenberg{<:Complex,<:Abstrac Q = F.Q Br = rmul!(real(B), Q) Bi = rmul!(imag(B), Q) - rdiv!(B .= Complex.(Br,Bi), F.H; shift=F.μ) + if iszero(F.μ) + rdiv!(B .= Complex.(Br,Bi), F.H) + else + rdiv!(B .= Complex.(Br,Bi), F.H; shift=F.μ) + end Br = rmul!(real(B), Q') Bi = rmul!(imag(B), Q') return B .= Complex.(Br,Bi) @@ -523,8 +560,8 @@ end ldiv!(F::Adjoint{<:Any,<:Hessenberg}, B::AbstractVecOrMat) = rdiv!(B', F')' rdiv!(B::AbstractMatrix, F::Adjoint{<:Any,<:Hessenberg}) = ldiv!(F', B')' -det(F::Hessenberg) = det(F.H; shift=F.μ) -logabsdet(F::Hessenberg) = logabsdet(F.H; shift=F.μ) +det(F::Hessenberg) = iszero(F.μ) ? det(F.H) : det(F.H; shift=F.μ) +logabsdet(F::Hessenberg) = iszero(F.μ) ? logabsdet(F.H) : logabsdet(F.H; shift=F.μ) function logdet(F::Hessenberg) d,s = logabsdet(F) return d + log(s) diff --git a/stdlib/LinearAlgebra/src/lapack.jl b/stdlib/LinearAlgebra/src/lapack.jl index e8c44cf732363..e26a7e5e9ae69 100644 --- a/stdlib/LinearAlgebra/src/lapack.jl +++ b/stdlib/LinearAlgebra/src/lapack.jl @@ -5634,6 +5634,160 @@ for (ormhr, elty) in end end +for (hetrd, elty) in + ((:dsytrd_,Float64), + (:ssytrd_,Float32), + (:zhetrd_,ComplexF64), + (:chetrd_,ComplexF32)) + relty = real(elty) + @eval begin + + # .. Scalar Arguments .. + # CHARACTER UPLO + # INTEGER INFO, LDA, LWORK, N + # * .. + # * .. Array Arguments .. + # DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAU( * ), WORK( * ) + function hetrd!(uplo::AbstractChar, A::AbstractMatrix{$elty}) + chkstride1(A) + n = checksquare(A) + chkuplo(uplo) + chkfinite(A) # balancing routines don't support NaNs and Infs + tau = similar(A, $elty, max(0,n - 1)) + d = Vector{$relty}(undef, n) + e = Vector{$relty}(undef, max(0,n - 1)) + work = Vector{$elty}(undef, 1) + lwork = BlasInt(-1) + info = Ref{BlasInt}() + for i = 1:2 # first call returns lwork as work[1] + ccall((@blasfunc($hetrd), liblapack), Cvoid, + (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, + Ptr{$relty}, Ptr{$relty}, + Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}), + uplo, n, A, max(1, stride(A, 2)), d, e, tau, work, lwork, info) + chklapackerror(info[]) + if i == 1 + lwork = BlasInt(real(work[1])) + resize!(work, lwork) + end + end + A, tau, d, e + end + end +end + +""" + hetrd!(uplo, A) -> (A, tau, d, e) + +Converts a Hermitian matrix `A` to real-symmetric tridiagonal Hessenberg form. +If `uplo = U`, the upper half of `A` is stored; if `uplo = L`, the lower half is stored. +`tau` contains the elementary reflectors of the factorization, `d` contains the +diagonal and `e` contains the upper/lower diagonal. +""" +hetrd!(uplo::AbstractChar, A::AbstractMatrix) + +for (orgtr, elty) in + ((:dorgtr_,:Float64), + (:sorgtr_,:Float32), + (:zungtr_,:ComplexF64), + (:cungtr_,:ComplexF32)) + @eval begin + # * .. Scalar Arguments .. + # CHARACTER UPLO + # INTEGER INFO, LDA, LWORK, N + # * .. + # * .. Array Arguments .. + # DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) + function orgtr!(uplo::AbstractChar, A::AbstractMatrix{$elty}, tau::AbstractVector{$elty}) + require_one_based_indexing(A, tau) + chkstride1(A, tau) + n = checksquare(A) + if n - length(tau) != 1 + throw(DimensionMismatch("tau has length $(length(tau)), needs $(n - 1)")) + end + chkuplo(uplo) + work = Vector{$elty}(undef, 1) + lwork = BlasInt(-1) + info = Ref{BlasInt}() + for i = 1:2 # first call returns lwork as work[1] + ccall((@blasfunc($orgtr), liblapack), Cvoid, + (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, + Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, + Ptr{BlasInt}), + uplo, n, A, + max(1, stride(A, 2)), tau, work, lwork, + info) + chklapackerror(info[]) + if i == 1 + lwork = BlasInt(real(work[1])) + resize!(work, lwork) + end + end + A + end + end +end + +""" + orgtr!(uplo, A, tau) + +Explicitly finds `Q`, the orthogonal/unitary matrix from `hetrd!`. `uplo`, +`A`, and `tau` must correspond to the input/output to `hetrd!`. +""" +orgtr!(uplo::AbstractChar, A::AbstractMatrix, tau::AbstractVector) + +for (ormtr, elty) in + ((:dormtr_,:Float64), + (:sormtr_,:Float32), + (:zunmtr_,:ComplexF64), + (:cunmtr_,:ComplexF32)) + @eval begin + # .. Scalar Arguments .. + # CHARACTER side, trans, uplo + # INTEGER info, lda, ldc, lwork, m, n + # .. + # .. Array Arguments .. + # DOUBLE PRECISION a( lda, * ), c( ldc, * ), tau( * ), work( * ) + function ormtr!(side::AbstractChar, uplo::AbstractChar, trans::AbstractChar, A::AbstractMatrix{$elty}, + tau::AbstractVector{$elty}, C::AbstractVecOrMat{$elty}) + + require_one_based_indexing(A, tau, C) + chkstride1(A, tau, C) + n = checksquare(A) + chkuplo(uplo) + mC, nC = size(C, 1), size(C, 2) + + if n - length(tau) != 1 + throw(DimensionMismatch("tau has length $(length(tau)), needs $(n - 1)")) + end + if (side == 'L' && mC != n) || (side == 'R' && nC != n) + throw(DimensionMismatch("A and C matrices are not conformable")) + end + + work = Vector{$elty}(undef, 1) + lwork = BlasInt(-1) + info = Ref{BlasInt}() + for i = 1:2 # first call returns lwork as work[1] + ccall((@blasfunc($ormtr), liblapack), Cvoid, + (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, + Ptr{$elty}, Ref{BlasInt}, + Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, + Ref{BlasInt}, Ptr{BlasInt}), + side, uplo, trans, mC, nC, + A, max(1, stride(A, 2)), + tau, C, max(1, stride(C, 2)), work, + lwork, info) + chklapackerror(info[]) + if i == 1 + lwork = BlasInt(real(work[1])) + resize!(work, lwork) + end + end + C + end + end +end + for (gees, gges, elty) in ((:dgees_,:dgges_,:Float64), (:sgees_,:sgges_,:Float32)) From 9f5c32d5a4a6774605dd9bf8f6825dceba59b791 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Fri, 10 May 2019 18:26:07 -0400 Subject: [PATCH 32/40] shifted solvers for symtridiag --- stdlib/LinearAlgebra/src/hessenberg.jl | 42 +++++++++++-------------- stdlib/LinearAlgebra/src/ldlt.jl | 15 ++++++--- stdlib/LinearAlgebra/src/tridiag.jl | 16 +++++++--- stdlib/LinearAlgebra/test/hessenberg.jl | 5 +-- 4 files changed, 43 insertions(+), 35 deletions(-) diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl index 0940ac58d3956..1266d125cf5c3 100644 --- a/stdlib/LinearAlgebra/src/hessenberg.jl +++ b/stdlib/LinearAlgebra/src/hessenberg.jl @@ -300,7 +300,8 @@ Hessenberg(factors::AbstractMatrix, τ::AbstractVector, H::AbstractMatrix=UpperH Hessenberg(F::Hessenberg) = F Hessenberg(F::Hessenberg, μ::Number) = Hessenberg(F.factors, F.τ, F.H, F.uplo; μ=μ) -copy(F::Hessenberg) = Hessenberg(copy(F.factors), copy(F.τ); μ=F.μ) +copy(F::Hessenberg{<:Any,<:UpperHessenberg}) = Hessenberg(copy(F.factors), copy(F.τ); μ=F.μ) +copy(F::Hessenberg{<:Any,<:SymTridiagonal}) = Hessenberg(copy(F.factors), copy(F.τ), copy(F.H), F.uplo; μ=F.μ) size(F::Hessenberg, d) = size(F.H, d) size(F::Hessenberg) = size(F.H) @@ -484,6 +485,9 @@ end rmul!(F::Hessenberg{<:Any,<:UpperHessenberg{T}}, x::T) where {T<:Number} = Hessenberg(rmul_triu!(F.factors, x, -1), F.τ; μ=F.μ*x) lmul!(x::T, F::Hessenberg{<:Any,<:UpperHessenberg{T}}) where {T<:Number} = Hessenberg(lmul_triu!(x, F.factors, -1), F.τ; μ=x*F.μ) +rmul!(F::Hessenberg{<:Any,<:SymTridiagonal{T}}, x::T) where {T<:Number} = Hessenberg(F.factors, F.τ, SymTridiagonal(F.H.dv*x, F.H.ev*x), F.uplo; μ=F.μ*x) +lmul!(x::T, F::Hessenberg{<:Any,<:SymTridiagonal{T}}) where {T<:Number} = Hessenberg(F.factors, F.τ, SymTridiagonal(x*F.H.dv, x*F.H.ev), F.uplo; μ=x*F.μ) + # Promote F * x or x * F. In general, we don't know how to do promotions # that would change the element type of F.H, however. function (*)(F::Hessenberg{<:Any,<:AbstractMatrix{T}}, x::S) where {T,S<:Number} @@ -521,47 +525,37 @@ end function rdiv!(B::AbstractMatrix, F::Hessenberg) Q = F.Q - if iszero(F.μ) - return rmul!(rdiv!(rmul!(B, Q), F.H), Q') - else - return rmul!(rdiv!(rmul!(B, Q), F.H; shift=F.μ), Q') - end + return rmul!(rdiv!(rmul!(B, Q), F.H; shift=F.μ), Q') end # handle case of real H and complex μ — we need to work around the # fact that we can't multiple a real F.Q by a complex matrix directly in LAPACK -function ldiv!(F::Hessenberg{<:Complex,<:AbstractMatrix{<:Real}}, B::AbstractVecOrMat{<:Complex}) +function ldiv!(F::Hessenberg{<:Complex,<:Any,<:AbstractMatrix{<:Real}}, B::AbstractVecOrMat{<:Complex}) Q = F.Q Br = lmul!(Q', real(B)) Bi = lmul!(Q', imag(B)) - if iszero(F.μ) - ldiv!(F.H, B .= Complex.(Br,Bi)) - else - ldiv!(F.H, B .= Complex.(Br,Bi); shift=F.μ) - end - Br = lmul!(Q, real(B)) - Bi = lmul!(Q, imag(B)) + ldiv!(F.H, B .= Complex.(Br,Bi); shift=F.μ) + Br .= real.(B); Bi .= imag.(B) + Br = lmul!(Q, Br) + Bi = lmul!(Q, Bi) return B .= Complex.(Br,Bi) end -function rdiv!(B::AbstractVecOrMat{<:Complex}, F::Hessenberg{<:Complex,<:AbstractMatrix{<:Real}}) +function rdiv!(B::AbstractVecOrMat{<:Complex}, F::Hessenberg{<:Complex,<:Any,<:AbstractMatrix{<:Real}}) Q = F.Q Br = rmul!(real(B), Q) Bi = rmul!(imag(B), Q) - if iszero(F.μ) - rdiv!(B .= Complex.(Br,Bi), F.H) - else - rdiv!(B .= Complex.(Br,Bi), F.H; shift=F.μ) - end - Br = rmul!(real(B), Q') - Bi = rmul!(imag(B), Q') + rdiv!(B .= Complex.(Br,Bi), F.H; shift=F.μ) + Br .= real.(B); Bi .= imag.(B) + Br = rmul!(Br, Q') + Bi = rmul!(Bi, Q') return B .= Complex.(Br,Bi) end ldiv!(F::Adjoint{<:Any,<:Hessenberg}, B::AbstractVecOrMat) = rdiv!(B', F')' rdiv!(B::AbstractMatrix, F::Adjoint{<:Any,<:Hessenberg}) = ldiv!(F', B')' -det(F::Hessenberg) = iszero(F.μ) ? det(F.H) : det(F.H; shift=F.μ) -logabsdet(F::Hessenberg) = iszero(F.μ) ? logabsdet(F.H) : logabsdet(F.H; shift=F.μ) +det(F::Hessenberg) = det(F.H; shift=F.μ) +logabsdet(F::Hessenberg) = logabsdet(F.H; shift=F.μ) function logdet(F::Hessenberg) d,s = logabsdet(F) return d + log(s) diff --git a/stdlib/LinearAlgebra/src/ldlt.jl b/stdlib/LinearAlgebra/src/ldlt.jl index 84b8ae5fa8465..2cc84dd2cc8bc 100644 --- a/stdlib/LinearAlgebra/src/ldlt.jl +++ b/stdlib/LinearAlgebra/src/ldlt.jl @@ -91,14 +91,18 @@ julia> S \\ b 1.3488372093023255 ``` """ -function ldlt(M::SymTridiagonal{T}) where T - S = typeof(zero(T)/one(T)) - return S == T ? ldlt!(copy(M)) : ldlt!(SymTridiagonal{S}(M)) +function ldlt(M::SymTridiagonal{T}; shift::Number=false) where T + S = typeof((zero(T)+shift)/one(T)) + Mₛ = SymTridiagonal{S}(copy_oftype(M.dv, S), copy_oftype(M.ev, S)) + if !iszero(shift) + Mₛ.dv .+= shift + end + return ldlt!(Mₛ) end factorize(S::SymTridiagonal) = ldlt(S) -function ldiv!(S::LDLt{T,M}, B::AbstractVecOrMat{T}) where {T,M<:SymTridiagonal{T}} +function ldiv!(S::LDLt{<:Any,<:SymTridiagonal}, B::AbstractVecOrMat) require_one_based_indexing(B) n, nrhs = size(B, 1), size(B, 2) if size(S,1) != n @@ -129,6 +133,9 @@ function ldiv!(S::LDLt{T,M}, B::AbstractVecOrMat{T}) where {T,M<:SymTridiagonal{ return B end +rdiv!(B::AbstractVecOrMat, S::LDLt{<:Any,<:SymTridiagonal}) = + transpose(ldiv!(S, transpose(B))) + function logabsdet(F::LDLt{<:Any,<:SymTridiagonal}) it = (F.data[i,i] for i in 1:size(F, 1)) return sum(log∘abs, it), prod(sign, it) diff --git a/stdlib/LinearAlgebra/src/tridiag.jl b/stdlib/LinearAlgebra/src/tridiag.jl index 094fc5e184d9a..763a54775e5cb 100644 --- a/stdlib/LinearAlgebra/src/tridiag.jl +++ b/stdlib/LinearAlgebra/src/tridiag.jl @@ -197,6 +197,10 @@ end (\)(T::SymTridiagonal, B::StridedVecOrMat) = ldlt(T)\B +# division with optional shift for use in shifted-Hessenberg solvers (hessenberg.jl): +ldiv!(A::SymTridiagonal, B::AbstractVecOrMat; shift::Number=false) = ldiv!(ldlt(A, shift=shift), B) +rdiv!(B::AbstractVecOrMat, A::SymTridiagonal; shift::Number=false) = rdiv!(B, ldlt(A, shift=shift)) + eigen!(A::SymTridiagonal{<:BlasReal}) = Eigen(LAPACK.stegr!('V', A.dv, A.ev)...) eigen(A::SymTridiagonal{T}) where T = eigen!(copy_oftype(A, eigtype(T))) @@ -330,21 +334,23 @@ end # R. Usmani, "Inversion of a tridiagonal Jacobi matrix", # Linear Algebra and its Applications 212-213 (1994), pp.413-414 # doi:10.1016/0024-3795(94)90414-6 -function det_usmani(a::V, b::V, c::V) where {T,V<:AbstractVector{T}} +function det_usmani(a::V, b::V, c::V, shift::Number=0) where {T,V<:AbstractVector{T}} require_one_based_indexing(a, b, c) n = length(b) - θa = one(T) + θa = oneunit(T)+zero(shift) if n == 0 return θa end - θb = b[1] + θb = b[1]+shift for i in 2:n - θb, θa = b[i]*θb - a[i-1]*c[i-1]*θa, θb + θb, θa = (b[i]+shift)*θb - a[i-1]*c[i-1]*θa, θb end return θb end -det(A::SymTridiagonal) = det_usmani(A.ev, A.dv, A.ev) +# det with optional diagonal shift for use with shifted Hessenberg factorizations +det(A::SymTridiagonal; shift::Number=false) = det_usmani(A.ev, A.dv, A.ev, shift) +logabsdet(A::SymTridiagonal; shift::Number=false) = logabsdet(ldlt(A; shift=shift)) function getindex(A::SymTridiagonal{T}, i::Integer, j::Integer) where T if !(1 <= i <= size(A,2) && 1 <= j <= size(A,2)) diff --git a/stdlib/LinearAlgebra/test/hessenberg.jl b/stdlib/LinearAlgebra/test/hessenberg.jl index e9e812d89e73f..10fc755256b6b 100644 --- a/stdlib/LinearAlgebra/test/hessenberg.jl +++ b/stdlib/LinearAlgebra/test/hessenberg.jl @@ -39,12 +39,13 @@ let n = 10 @test_throws ArgumentError H[5,3]=1 end - @testset for eltya in (Float32, Float64, ComplexF32, ComplexF64, Int) - A = eltya == Int ? + @testset for eltya in (Float32, Float64, ComplexF32, ComplexF64, Int), herm in (false, true) + A_ = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(Areal, Aimg) : Areal) + A = herm ? Hermitian(A_ + A_') : A_ H = hessenberg(A) eltyh = eltype(H) From 1ecd267edcc19f028480bb2139716c8ec4ddffcb Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Fri, 10 May 2019 20:03:34 -0400 Subject: [PATCH 33/40] rm redundant methods --- stdlib/LinearAlgebra/src/hessenberg.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl index 1266d125cf5c3..7c0fd1dad3652 100644 --- a/stdlib/LinearAlgebra/src/hessenberg.jl +++ b/stdlib/LinearAlgebra/src/hessenberg.jl @@ -97,8 +97,6 @@ fillstored!(H::UpperHessenberg, x) = (fillband!(H.data, x, -1, size(H,2)-1); H) # shift Hessenberg by λI +(H::UpperHessenberg, J::UniformScaling) = UpperHessenberg(H.data + J) -+(J::UniformScaling, H::UpperHessenberg) = UpperHessenberg(J + H.data) --(H::UpperHessenberg, J::UniformScaling) = UpperHessenberg(H.data - J) -(J::UniformScaling, H::UpperHessenberg) = UpperHessenberg(J - H.data) # Solving (H+µI)x = b: we can do this in O(m²) time and O(m) memory From ce059289f54ed206f2595eb562f11b688d8f0673 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Fri, 10 May 2019 20:16:59 -0400 Subject: [PATCH 34/40] test workaround --- stdlib/LinearAlgebra/test/hessenberg.jl | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/stdlib/LinearAlgebra/test/hessenberg.jl b/stdlib/LinearAlgebra/test/hessenberg.jl index 10fc755256b6b..923dbf921dcba 100644 --- a/stdlib/LinearAlgebra/test/hessenberg.jl +++ b/stdlib/LinearAlgebra/test/hessenberg.jl @@ -46,6 +46,7 @@ let n = 10 complex.(Areal, Aimg) : Areal) A = herm ? Hermitian(A_ + A_') : A_ + Am = Matrix(A) # workaround for #32001 H = hessenberg(A) eltyh = eltype(H) @@ -66,27 +67,27 @@ let n = 10 @test convert(Array, 2 * H) ≈ 2 * A ≈ convert(Array, H * 2) @test convert(Array, H + 2I) ≈ A + 2I ≈ convert(Array, 2I + H) - @test convert(Array, H + (2+4im)I) ≈ A + (2+4im)I ≈ convert(Array, (2+4im)I + H) + @test convert(Array, H + (2+4im)I) ≈ Am + (2+4im)I ≈ convert(Array, (2+4im)I + H) @test convert(Array, H - 2I) ≈ A - 2I ≈ -convert(Array, 2I - H) @test convert(Array, -H) == -convert(Array, H) - @test convert(Array, 2*(H + (2+4im)I)) ≈ 2A + (4+8im)I + @test convert(Array, 2*(H + (2+4im)I)) ≈ 2Am + (4+8im)I b = convert(Vector{eltype(H)}, b_) B = convert(Matrix{eltype(H)}, B_) @test H \ b ≈ A \ b ≈ H \ complex(b) @test H \ B ≈ A \ B ≈ H \ complex(B) @test (H - I) \ B ≈ (A - I) \ B - @test (H - (3+4im)I) \ B ≈ (A - (3+4im)I) \ B + @test (H - (3+4im)I) \ B ≈ (Am - (3+4im)I) \ B @test b' / H ≈ b' / A ≈ complex.(b') / H @test B' / H ≈ B' / A ≈ complex(B') / H @test B' / (H - I) ≈ B' / (A - I) - @test B' / (H - (3+4im)I) ≈ B' / (A - (3+4im)I) - @test (H - (3+4im)I)' \ B ≈ (A - (3+4im)I)' \ B - @test B' / (H - (3+4im)I)' ≈ B' / (A - (3+4im)I)' + @test B' / (H - (3+4im)I) ≈ B' / (Am - (3+4im)I) + @test (H - (3+4im)I)' \ B ≈ (Am - (3+4im)I)' \ B + @test B' / (H - (3+4im)I)' ≈ B' / (Am - (3+4im)I)' for shift in (0,1,3+4im) - @test det(H + shift*I) ≈ det(A + shift*I) - @test logabsdet(H + shift*I) ≅ logabsdet(A + shift*I) + @test det(H + shift*I) ≈ det(Am + shift*I) + @test logabsdet(H + shift*I) ≅ logabsdet(Am + shift*I) end end end From 01df020ec3938a77b6a63f2820100f31796450aa Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Fri, 10 May 2019 20:24:06 -0400 Subject: [PATCH 35/40] news for tridiagonal Hessenberg --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 97fdd3d51ff34..d2aa12962dc31 100644 --- a/NEWS.md +++ b/NEWS.md @@ -36,7 +36,7 @@ Standard library changes * The BLAS submodule no longer exports `dot`, which conflicts with that in LinearAlgebra ([#31838]). * `diagm` and `spdiagm` now accept optional `m,n` initial arguments to specify a size ([#31654]). -* `Hessenberg` factorizations `H` now support efficient shifted solves `(H+µI) \ b` and related operations, and there is also an `UpperHessenberg` matrix type ([#31853]). +* `Hessenberg` factorizations `H` now support efficient shifted solves `(H+µI) \ b` and related operations, does a specialized tridiagonal factorization for Hermitian matrices, and there is also a new `UpperHessenberg` matrix type ([#31853]). #### SparseArrays From e4afbc4c2173cc55ac2351ca7d828c5a25b53029 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Mon, 13 May 2019 11:37:37 -0400 Subject: [PATCH 36/40] grammar --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index d2aa12962dc31..43d0e2691622e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -36,7 +36,7 @@ Standard library changes * The BLAS submodule no longer exports `dot`, which conflicts with that in LinearAlgebra ([#31838]). * `diagm` and `spdiagm` now accept optional `m,n` initial arguments to specify a size ([#31654]). -* `Hessenberg` factorizations `H` now support efficient shifted solves `(H+µI) \ b` and related operations, does a specialized tridiagonal factorization for Hermitian matrices, and there is also a new `UpperHessenberg` matrix type ([#31853]). +* `Hessenberg` factorizations `H` now support efficient shifted solves `(H+µI) \ b` and determinants, and use a specialized tridiagonal factorization for Hermitian matrices. There is also a new `UpperHessenberg` matrix type ([#31853]). #### SparseArrays From 2a91795ec42762cf987ca0252eead85d33eef2d5 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Wed, 15 May 2019 09:52:16 -0400 Subject: [PATCH 37/40] make sure Matrix(H.Q) is tested --- stdlib/LinearAlgebra/test/hessenberg.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stdlib/LinearAlgebra/test/hessenberg.jl b/stdlib/LinearAlgebra/test/hessenberg.jl index 923dbf921dcba..84fb08237362f 100644 --- a/stdlib/LinearAlgebra/test/hessenberg.jl +++ b/stdlib/LinearAlgebra/test/hessenberg.jl @@ -55,7 +55,7 @@ let n = 10 @test size(H.Q) == size(A) @test_throws ErrorException H.Z @test convert(Array, H) ≈ A - @test (H.Q * H.H) * H.Q' ≈ A + @test (H.Q * H.H) * H.Q' ≈ A ≈ (Matrix(H.Q) * Matrix(H.H)) * Matrix(H.Q)' @test (H.Q' *A) * H.Q ≈ H.H #getindex for HessenbergQ @test H.Q[1,1] ≈ Array(H.Q)[1,1] From ef3644f5ed5f5a59361f4212465576c8133dfae4 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Wed, 15 May 2019 09:54:43 -0400 Subject: [PATCH 38/40] rm workaround now that 32001 is merged --- stdlib/LinearAlgebra/test/hessenberg.jl | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/stdlib/LinearAlgebra/test/hessenberg.jl b/stdlib/LinearAlgebra/test/hessenberg.jl index 84fb08237362f..94ed21c091fcd 100644 --- a/stdlib/LinearAlgebra/test/hessenberg.jl +++ b/stdlib/LinearAlgebra/test/hessenberg.jl @@ -46,7 +46,6 @@ let n = 10 complex.(Areal, Aimg) : Areal) A = herm ? Hermitian(A_ + A_') : A_ - Am = Matrix(A) # workaround for #32001 H = hessenberg(A) eltyh = eltype(H) @@ -67,7 +66,7 @@ let n = 10 @test convert(Array, 2 * H) ≈ 2 * A ≈ convert(Array, H * 2) @test convert(Array, H + 2I) ≈ A + 2I ≈ convert(Array, 2I + H) - @test convert(Array, H + (2+4im)I) ≈ Am + (2+4im)I ≈ convert(Array, (2+4im)I + H) + @test convert(Array, H + (2+4im)I) ≈ A + (2+4im)I ≈ convert(Array, (2+4im)I + H) @test convert(Array, H - 2I) ≈ A - 2I ≈ -convert(Array, 2I - H) @test convert(Array, -H) == -convert(Array, H) @test convert(Array, 2*(H + (2+4im)I)) ≈ 2Am + (4+8im)I @@ -77,17 +76,17 @@ let n = 10 @test H \ b ≈ A \ b ≈ H \ complex(b) @test H \ B ≈ A \ B ≈ H \ complex(B) @test (H - I) \ B ≈ (A - I) \ B - @test (H - (3+4im)I) \ B ≈ (Am - (3+4im)I) \ B + @test (H - (3+4im)I) \ B ≈ (A - (3+4im)I) \ B @test b' / H ≈ b' / A ≈ complex.(b') / H @test B' / H ≈ B' / A ≈ complex(B') / H @test B' / (H - I) ≈ B' / (A - I) - @test B' / (H - (3+4im)I) ≈ B' / (Am - (3+4im)I) - @test (H - (3+4im)I)' \ B ≈ (Am - (3+4im)I)' \ B - @test B' / (H - (3+4im)I)' ≈ B' / (Am - (3+4im)I)' + @test B' / (H - (3+4im)I) ≈ B' / (A - (3+4im)I) + @test (H - (3+4im)I)' \ B ≈ (A - (3+4im)I)' \ B + @test B' / (H - (3+4im)I)' ≈ B' / (A - (3+4im)I)' for shift in (0,1,3+4im) - @test det(H + shift*I) ≈ det(Am + shift*I) - @test logabsdet(H + shift*I) ≅ logabsdet(Am + shift*I) + @test det(H + shift*I) ≈ det(A + shift*I) + @test logabsdet(H + shift*I) ≅ logabsdet(A + shift*I) end end end From 41ea262812a244bc4b97bdbdc97250f5b8dc1324 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Wed, 15 May 2019 10:54:28 -0400 Subject: [PATCH 39/40] clarifications --- stdlib/LinearAlgebra/docs/src/index.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/stdlib/LinearAlgebra/docs/src/index.md b/stdlib/LinearAlgebra/docs/src/index.md index ab03bfc646d3a..1db9a6ed9d8a5 100644 --- a/stdlib/LinearAlgebra/docs/src/index.md +++ b/stdlib/LinearAlgebra/docs/src/index.md @@ -271,10 +271,10 @@ Stacktrace: [...] ``` -If you need to solve many systems of the form `(A+μI)x = b`, it might be beneficial +If you need to solve many systems of the form `(A+μI)x = b` for the same `A` and different `μ`, it might be beneficial to first compute the Hessenberg factorization `F` of `A` via the [`hessenberg`](@ref) function. -Given `F`, Julia employs an efficient algorithm for `(F+μ*I) \ b` and related -operations. +Given `F`, Julia employs an efficient algorithm for `(F+μ*I) \ b` (equivalent to `(A+μ*I)x \ b`) and related +operations like determinants. ## [Matrix factorizations](@id man-linalg-factorizations) From b3d33ed89135a5235f5ac75d7e0e3ac961d4ab5b Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Fri, 17 May 2019 12:04:25 -0400 Subject: [PATCH 40/40] fix typo --- stdlib/LinearAlgebra/test/hessenberg.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stdlib/LinearAlgebra/test/hessenberg.jl b/stdlib/LinearAlgebra/test/hessenberg.jl index 94ed21c091fcd..4db168dfe9189 100644 --- a/stdlib/LinearAlgebra/test/hessenberg.jl +++ b/stdlib/LinearAlgebra/test/hessenberg.jl @@ -69,7 +69,7 @@ let n = 10 @test convert(Array, H + (2+4im)I) ≈ A + (2+4im)I ≈ convert(Array, (2+4im)I + H) @test convert(Array, H - 2I) ≈ A - 2I ≈ -convert(Array, 2I - H) @test convert(Array, -H) == -convert(Array, H) - @test convert(Array, 2*(H + (2+4im)I)) ≈ 2Am + (4+8im)I + @test convert(Array, 2*(H + (2+4im)I)) ≈ 2A + (4+8im)I b = convert(Vector{eltype(H)}, b_) B = convert(Matrix{eltype(H)}, B_)