From 624a21c3da1495c2a8a9d01046b20053ad491d8a Mon Sep 17 00:00:00 2001 From: geoffroyleconte <47035783+geoffroyleconte@users.noreply.github.com> Date: Mon, 3 Oct 2022 14:31:17 -0400 Subject: [PATCH] Factorization in lower precision (#72) * factorization in lower precision * Apply suggestions from code review Co-authored-by: Alexis <35051714+amontoison@users.noreply.github.com> * remove inline macro * fix type instabilities * end items docs Co-authored-by: Alexis <35051714+amontoison@users.noreply.github.com> --- Project.toml | 2 +- src/LimitedLDLFactorizations.jl | 137 ++++++++++++++++++-------------- test/runtests.jl | 39 +++++++++ 3 files changed, 116 insertions(+), 62 deletions(-) diff --git a/Project.toml b/Project.toml index 2e520c3..d591f9f 100644 --- a/Project.toml +++ b/Project.toml @@ -8,8 +8,8 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" [compat] -julia = "^1.6.0" AMD = "0.4, 0.5" +julia = "^1.6.0" [extras] AMD = "14f7f29c-3bd6-536c-9a0b-7339e30b5a3e" diff --git a/src/LimitedLDLFactorizations.jl b/src/LimitedLDLFactorizations.jl index 11e821c..767050b 100644 --- a/src/LimitedLDLFactorizations.jl +++ b/src/LimitedLDLFactorizations.jl @@ -64,44 +64,44 @@ function LimitedLDLFactorization( T::SparseMatrixCSC{Tv, Ti}, P::AbstractVector{<:Integer}, memory::Int, - α::Tv, + α::Number, n::Int, nnzT::Int, -) where {Tv <: Number, Ti} + ::Type{Tf}, +) where {Tv <: Number, Ti, Tf <: Real} np = n * memory Pinv = similar(P) nnz_diag = 0 - adiag = Vector{Tv}(undef, n) + adiag = Vector{Tf}(undef, n) for col = 1:n k = T.colptr[col] row = (k ≤ nnzT) ? T.rowval[k] : 0 if row == col nnz_diag += 1 - adiag[col] = T.nzval[k] + adiag[col] = Tf(T.nzval[k]) else - adiag[col] = zero(Tv) + adiag[col] = zero(Tf) end end # Make room to store L. nnzLmax = nnzT + np - nnz_diag - d = Vector{Tv}(undef, n) # Diagonal matrix D. - lvals = Vector{Tv}(undef, nnzLmax) # Strict lower triangle of L. + d = Vector{Tf}(undef, n) # Diagonal matrix D. + lvals = Vector{Tf}(undef, nnzLmax) # Strict lower triangle of L. rowind = Vector{Ti}(undef, nnzLmax) colptr = Vector{Ti}(undef, n + 1) - wa1 = Vector{Tv}(undef, n) - s = Vector{Tv}(undef, n) + wa1 = Vector{Tf}(undef, n) + s = Vector{Tf}(undef, n) - w = Vector{Tv}(undef, n) # contents of the current column of A. + w = Vector{Tf}(undef, n) # contents of the current column of A. indr = Vector{Ti}(undef, n) # row indices of the nonzeros in the current column after it's been loaded into w. indf = Vector{Ti}(undef, n) # indf[col] = position in w of the next entry in column col to be used during the factorization. list = zeros(Ti, n) # list[col] = linked list of columns that will update column col. - pos = findall(adiag[P] .> Tv(0)) - neg = findall(adiag[P] .≤ Tv(0)) + pos = findall(adiag[P] .> Tf(0)) + neg = findall(adiag[P] .≤ Tf(0)) - nz = colptr[end] - 1 Lrowind = view(rowind, 1:nnzLmax) Lnzvals = view(lvals, 1:nnzLmax) @@ -116,7 +116,7 @@ function LimitedLDLFactorization( adiag, d, P, - α, + Tf(α), memory, Pinv, wa1, @@ -132,35 +132,47 @@ function LimitedLDLFactorization( end """ - LLDL = LimitedLDLFactorization(T, P; memory = 0, α = 0.0) + LLDL = LimitedLDLFactorization(T; P = amd(T), memory = 0, α = 0) + LLDL = LimitedLDLFactorization(T, ::Type{Tf}; P = amd(T), memory = 0, α = 0) Perform the allocations for the LLDL factorization of symmetric matrix whose lower triangle is `T` with the permutation vector `P`. # Arguments -- `T::SparseMatrixCSC{Tv,Ti}`: lower triangle of the matrix to factorize. +- `T::SparseMatrixCSC{Tv,Ti}`: lower triangle of the matrix to factorize; +- `::Type{Tf}`: type used for the factorization, by default the type of the elements of `A`. # Keyword arguments -- `P::AbstractVector{<:Integer} = amd(T)`: permutation vector. +- `P::AbstractVector{<:Integer} = amd(T)`: permutation vector; - `memory::Int=0`: extra amount of memory to allocate for the incomplete factor `L`. The total memory allocated is nnz(T) + n * `memory`, where - `T` is the strict lower triangle of A and `n` is the size of `A`. -- `α::Tv=Tv(0)`: initial value of the shift in case the incomplete LDLᵀ + `T` is the strict lower triangle of A and `n` is the size of `A`; +- `α::Number=0`: initial value of the shift in case the incomplete LDLᵀ factorization of `A` is found to not exist. The shift will be gradually increased from this initial value until success. + +# Example + A = sprand(Float64, 10, 10, 0.2) + T = tril(A * A' + I) + LLDL = LimitedLDLFactorization(T) # Float64 factorization + LLDL = LimitedLDLFactorization(T, Float32) # Float32 factorization """ function LimitedLDLFactorization( - T::SparseMatrixCSC{Tv, Ti}; + T::SparseMatrixCSC{Tv, Ti}, + ::Type{Tf}; P::AbstractVector{<:Integer} = amd(T), memory::Int = 0, - α::Tv = Tv(0), -) where {Tv <: Number, Ti <: Integer} + α::Number = 0, +) where {Tv <: Number, Ti <: Integer, Tf <: Real} memory < 0 && error("limited-memory parameter must be nonnegative") n = size(T, 1) n != size(T, 2) && error("input matrix must be square") - return LimitedLDLFactorization(T, P, memory, α, n, nnz(T)) + return LimitedLDLFactorization(T, P, memory, α, n, nnz(T), Tf) end +LimitedLDLFactorization(T::SparseMatrixCSC{Tv, Ti}; kwargs...) where {Tv <: Number, Ti <: Integer} = + LimitedLDLFactorization(T, Tv; kwargs...) + # Here T is the lower triangle of A. """ lldl_factorize!(S, T; droptol = 0.0) @@ -169,19 +181,19 @@ Perform the in-place factorization of a symmetric matrix whose lower triangle is with the permutation vector. # Arguments -- `S::LimitedLDLFactorization{Tv, Ti}`. +- `S::LimitedLDLFactorization{Tf, Ti}`; - `T::SparseMatrixCSC{Tv,Ti}`: lower triangle of the matrix to factorize. `T` should keep the same nonzero pattern and the sign of its diagonal elements. # Keyword arguments -- `droptol::Tv=Tv(0)`: to further sparsify `L`, all elements with magnitude smaller +- `droptol::Tf=Tf(0)`: to further sparsify `L`, all elements with magnitude smaller than `droptol` are dropped. """ function lldl_factorize!( - S::LimitedLDLFactorization{Tv, Ti}, + S::LimitedLDLFactorization{Tf, Ti}, T::SparseMatrixCSC{Tv, Ti}; - droptol::Tv = Tv(0), -) where {Tv <: Number, Ti <: Integer} + droptol::Tf = Tf(0), +) where {Tf <: Number, Tv <: Number, Ti <: Integer} n = size(T, 1) n != size(T, 2) && error("input matrix must be square") @@ -205,7 +217,7 @@ function lldl_factorize!( for col = 1:n k = T.colptr[col] row = (k ≤ nnzT) ? T.rowval[k] : 0 - adiag[col] = (row == col) ? T.nzval[k] : zero(Tv) + adiag[col] = (row == col) ? Tf(T.nzval[k]) : zero(Tf) end d = S.D # Diagonal matrix D. @@ -216,14 +228,14 @@ function lldl_factorize!( # Compute the 2-norm of columns of A # and the diagonal scaling matrix. wa1 = S.wa1 - wa1 .= zero(Tv) + wa1 .= zero(Tf) s = S.s @inbounds for col = 1:n - s[col] = Tv(1) # Initialization + s[col] = Tf(1) # Initialization @inbounds for k = T.colptr[col]:(T.colptr[col + 1] - 1) row = T.rowval[k] (row == col) && continue - val = T.nzval[k] + val = Tf(T.nzval[k]) val2 = val * val wa1[Pinv[col]] += val2 # Contribution to column Pinv[col]. wa1[Pinv[row]] += val2 # Contribution to column Pinv[row]. @@ -264,7 +276,7 @@ function lldl_factorize!( if !(S.computed_posneg) for i = 1:n adiagPi = adiag[P[i]] - if adiagPi > Tv(0) + if adiagPi > Tf(0) cpos += 1 pos[cpos] = i else @@ -362,40 +374,54 @@ function lldl_factorize!( end """ - lldl(A) + lldl(A; P = amd(A), memory = 0, α = 0, droptol = 0, check_tril = true) + lldl(A, ::Type{Tf}; P = amd(A), memory = 0, α = 0, droptol = 0, check_tril = true) Compute the limited-memory LDLᵀ factorization of `A`. `A` should be a lower triangular matrix. # Arguments - `A::SparseMatrixCSC{Tv,Ti}`: matrix to factorize (its strict lower triangle and - diagonal will be extracted) + diagonal will be extracted); +- `::Type{Tf}`: type used for the factorization, by default the type of the elements of `A`. # Keyword arguments - `P::AbstractVector{<:Integer} = amd(A)`: permutation vector. - `memory::Int=0`: extra amount of memory to allocate for the incomplete factor `L`. The total memory allocated is nnz(T) + n * `memory`, where - `T` is the strict lower triangle of A and `n` is the size of `A`. + `T` is the strict lower triangle of A and `n` is the size of `A`; - `α::Tv=Tv(0)`: initial value of the shift in case the incomplete LDLᵀ factorization of `A` is found to not exist. The shift will be - gradually increased from this initial value until success. + gradually increased from this initial value until success; - `droptol::Tv=Tv(0)`: to further sparsify `L`, all elements with magnitude smaller - than `droptol` are dropped. -- `check_tril::Bool = true`: check if `A` is a lower triangular matrix. + than `droptol` are dropped; +- `check_tril::Bool = true`: check if `A` is a lower triangular matrix. + +# Example + A = sprand(Float64, 10, 10, 0.2) + As = A * A' + I + LLDL = lldl(As) # lower triangle is extracted + T = tril(As) + LLDL = lldl(T) # Float64 factorization + LLDL = lldl(T, Float32) # Float32 factorization """ function lldl( - A::SparseMatrixCSC{Tv, Ti}; + A::SparseMatrixCSC{Tv, Ti}, + ::Type{Tf}; P::AbstractVector{<:Integer} = amd(A), memory::Int = 0, - α::Tv = Tv(0), - droptol::Tv = Tv(0), + α::Number = 0, + droptol::Number = 0, check_tril::Bool = true, -) where {Tv <: Number, Ti <: Integer} +) where {Tv <: Number, Ti <: Integer, Tf <: Real} T = (!check_tril || istril(A)) ? A : tril(A) - S = LimitedLDLFactorization(T; P = P, memory = memory, α = α) - lldl_factorize!(S, T, droptol = droptol) + S = LimitedLDLFactorization(T, Tf; P = P, memory = memory, α = α) + lldl_factorize!(S, T, droptol = Tf(droptol)) end +lldl(A::SparseMatrixCSC{Tv, Ti}; kwargs...) where {Tv <: Number, Ti <: Integer} = + lldl(A, Tv; kwargs...) + lldl(A::Matrix{Tv}; kwargs...) where {Tv <: Number} = lldl(sparse(A); kwargs...) # symmetric matrix input @@ -709,35 +735,24 @@ function lldl_solve!(n, b, Lp, Li, Lx, D, P) end import Base.(\) -function (\)( - LLDL::LimitedLDLFactorization{T, Ti}, - b::AbstractVector{T}, -) where {T <: Real, Ti <: Integer} +function (\)(LLDL::LimitedLDLFactorization, b::AbstractVector) y = copy(b) lldl_solve!(LLDL.n, y, LLDL.colptr, LLDL.Lrowind, LLDL.Lnzvals, LLDL.D, LLDL.P) end import LinearAlgebra.ldiv! -@inline ldiv!( - LLDL::LimitedLDLFactorization{T, Ti}, - b::AbstractVector{T}, -) where {T <: Real, Ti <: Integer} = +ldiv!(LLDL::LimitedLDLFactorization, b::AbstractVector) = lldl_solve!(LLDL.n, b, LLDL.colptr, LLDL.Lrowind, LLDL.Lnzvals, LLDL.D, LLDL.P) -function ldiv!( - y::AbstractVector{T}, - LLDL::LimitedLDLFactorization{T, Ti}, - b::AbstractVector{T}, -) where {T <: Real, Ti <: Integer} +function ldiv!(y::AbstractVector, LLDL::LimitedLDLFactorization, b::AbstractVector) y .= b lldl_solve!(LLDL.n, y, LLDL.colptr, LLDL.Lrowind, LLDL.Lnzvals, LLDL.D, LLDL.P) end import SparseArrays.nnz -@inline nnz(LLDL::LimitedLDLFactorization{T, Ti}) where {T <: Real, Ti <: Integer} = - length(LLDL.Lrowind) + length(LLDL.D) +nnz(LLDL::LimitedLDLFactorization) = length(LLDL.Lrowind) + length(LLDL.D) -@inline function Base.getproperty(LLDL::LimitedLDLFactorization, prop::Symbol) +function Base.getproperty(LLDL::LimitedLDLFactorization, prop::Symbol) if prop == :L nz = LLDL.colptr[end] - 1 return SparseMatrixCSC(LLDL.n, LLDL.n, LLDL.colptr, LLDL.Lrowind[1:nz], LLDL.Lnzvals[1:nz]) diff --git a/test/runtests.jl b/test/runtests.jl index 3dc099d..820e41e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -219,3 +219,42 @@ end LLDL = lldl(A, memory = 5) @test LLDL.α == 0 end + +@testset "test lower-precision factorization" begin + # Lower triangle only + Tf = Float32 + A = [ + 1.7 0 0 0 0 0 0 0 0.13 0 + 0 1.0 0 0 0.02 0 0 0 0 0.01 + 0 0 1.5 0 0 0 0 0 0 0 + 0 0 0 1.1 0 0 0 0 0 0 + 0 0.02 0 0 2.6 0 0.16 0.09 0.52 0.53 + 0 0 0 0 0 1.2 0 0 0 0 + 0 0 0 0 0.16 0 1.3 0 0 0.56 + 0 0 0 0 0.09 0 0 1.6 0.11 0 + 0.13 0 0 0 0.52 0 0 0.11 1.4 0 + 0 0.01 0 0 0.53 0 0.56 0 0 3.1 + ] + A = sparse(A) + Alow = tril(A) + perm = amd(A) + LLDL = lldl(Alow, Tf, P = perm, memory = 10) + @test eltype(LLDL.D) == Tf + + L = LLDL.L + I + @test norm(L * diagm(0 => LLDL.D) * L' - A[perm, perm]) ≤ sqrt(eps(Tf)) * norm(A) + + sol = ones(Tf, A.n) + b = similar(sol) + mul!(b, A, sol) + x = LLDL \ b + @test x ≈ sol + @test eltype(x) == Tf + + y = similar(b) + ldiv!(y, LLDL, b) + @test y ≈ sol + + ldiv!(LLDL, b) + @test b ≈ sol +end \ No newline at end of file