Skip to content

Commit

Permalink
Factorization in lower precision (#72)
Browse files Browse the repository at this point in the history
* factorization in lower precision

* Apply suggestions from code review

Co-authored-by: Alexis <[email protected]>

* remove inline macro

* fix type instabilities

* end items docs

Co-authored-by: Alexis <[email protected]>
  • Loading branch information
geoffroyleconte and amontoison authored Oct 3, 2022
1 parent 0ab4769 commit 624a21c
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 62 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"

[compat]
julia = "^1.6.0"
AMD = "0.4, 0.5"
julia = "^1.6.0"

[extras]
AMD = "14f7f29c-3bd6-536c-9a0b-7339e30b5a3e"
Expand Down
137 changes: 76 additions & 61 deletions src/LimitedLDLFactorizations.jl
Original file line number Diff line number Diff line change
Expand Up @@ -64,44 +64,44 @@ function LimitedLDLFactorization(
T::SparseMatrixCSC{Tv, Ti},
P::AbstractVector{<:Integer},
memory::Int,
α::Tv,
α::Number,
n::Int,
nnzT::Int,
) where {Tv <: Number, Ti}
::Type{Tf},
) where {Tv <: Number, Ti, Tf <: Real}
np = n * memory
Pinv = similar(P)

nnz_diag = 0
adiag = Vector{Tv}(undef, n)
adiag = Vector{Tf}(undef, n)
for col = 1:n
k = T.colptr[col]
row = (k nnzT) ? T.rowval[k] : 0
if row == col
nnz_diag += 1
adiag[col] = T.nzval[k]
adiag[col] = Tf(T.nzval[k])
else
adiag[col] = zero(Tv)
adiag[col] = zero(Tf)
end
end

# Make room to store L.
nnzLmax = nnzT + np - nnz_diag
d = Vector{Tv}(undef, n) # Diagonal matrix D.
lvals = Vector{Tv}(undef, nnzLmax) # Strict lower triangle of L.
d = Vector{Tf}(undef, n) # Diagonal matrix D.
lvals = Vector{Tf}(undef, nnzLmax) # Strict lower triangle of L.
rowind = Vector{Ti}(undef, nnzLmax)
colptr = Vector{Ti}(undef, n + 1)
wa1 = Vector{Tv}(undef, n)
s = Vector{Tv}(undef, n)
wa1 = Vector{Tf}(undef, n)
s = Vector{Tf}(undef, n)

w = Vector{Tv}(undef, n) # contents of the current column of A.
w = Vector{Tf}(undef, n) # contents of the current column of A.
indr = Vector{Ti}(undef, n) # row indices of the nonzeros in the current column after it's been loaded into w.
indf = Vector{Ti}(undef, n) # indf[col] = position in w of the next entry in column col to be used during the factorization.
list = zeros(Ti, n) # list[col] = linked list of columns that will update column col.

pos = findall(adiag[P] .> Tv(0))
neg = findall(adiag[P] .≤ Tv(0))
pos = findall(adiag[P] .> Tf(0))
neg = findall(adiag[P] .≤ Tf(0))

nz = colptr[end] - 1
Lrowind = view(rowind, 1:nnzLmax)
Lnzvals = view(lvals, 1:nnzLmax)

Expand All @@ -116,7 +116,7 @@ function LimitedLDLFactorization(
adiag,
d,
P,
α,
Tf(α),
memory,
Pinv,
wa1,
Expand All @@ -132,35 +132,47 @@ function LimitedLDLFactorization(
end

"""
LLDL = LimitedLDLFactorization(T, P; memory = 0, α = 0.0)
LLDL = LimitedLDLFactorization(T; P = amd(T), memory = 0, α = 0)
LLDL = LimitedLDLFactorization(T, ::Type{Tf}; P = amd(T), memory = 0, α = 0)
Perform the allocations for the LLDL factorization of symmetric matrix whose lower triangle is `T`
with the permutation vector `P`.
# Arguments
- `T::SparseMatrixCSC{Tv,Ti}`: lower triangle of the matrix to factorize.
- `T::SparseMatrixCSC{Tv,Ti}`: lower triangle of the matrix to factorize;
- `::Type{Tf}`: type used for the factorization, by default the type of the elements of `A`.
# Keyword arguments
- `P::AbstractVector{<:Integer} = amd(T)`: permutation vector.
- `P::AbstractVector{<:Integer} = amd(T)`: permutation vector;
- `memory::Int=0`: extra amount of memory to allocate for the incomplete factor `L`.
The total memory allocated is nnz(T) + n * `memory`, where
`T` is the strict lower triangle of A and `n` is the size of `A`.
- `α::Tv=Tv(0)`: initial value of the shift in case the incomplete LDLᵀ
`T` is the strict lower triangle of A and `n` is the size of `A`;
- `α::Number=0`: initial value of the shift in case the incomplete LDLᵀ
factorization of `A` is found to not exist. The shift will be
gradually increased from this initial value until success.
# Example
A = sprand(Float64, 10, 10, 0.2)
T = tril(A * A' + I)
LLDL = LimitedLDLFactorization(T) # Float64 factorization
LLDL = LimitedLDLFactorization(T, Float32) # Float32 factorization
"""
function LimitedLDLFactorization(
T::SparseMatrixCSC{Tv, Ti};
T::SparseMatrixCSC{Tv, Ti},
::Type{Tf};
P::AbstractVector{<:Integer} = amd(T),
memory::Int = 0,
α::Tv = Tv(0),
) where {Tv <: Number, Ti <: Integer}
α::Number = 0,
) where {Tv <: Number, Ti <: Integer, Tf <: Real}
memory < 0 && error("limited-memory parameter must be nonnegative")
n = size(T, 1)
n != size(T, 2) && error("input matrix must be square")
return LimitedLDLFactorization(T, P, memory, α, n, nnz(T))
return LimitedLDLFactorization(T, P, memory, α, n, nnz(T), Tf)
end

LimitedLDLFactorization(T::SparseMatrixCSC{Tv, Ti}; kwargs...) where {Tv <: Number, Ti <: Integer} =
LimitedLDLFactorization(T, Tv; kwargs...)

# Here T is the lower triangle of A.
"""
lldl_factorize!(S, T; droptol = 0.0)
Expand All @@ -169,19 +181,19 @@ Perform the in-place factorization of a symmetric matrix whose lower triangle is
with the permutation vector.
# Arguments
- `S::LimitedLDLFactorization{Tv, Ti}`.
- `S::LimitedLDLFactorization{Tf, Ti}`;
- `T::SparseMatrixCSC{Tv,Ti}`: lower triangle of the matrix to factorize.
`T` should keep the same nonzero pattern and the sign of its diagonal elements.
# Keyword arguments
- `droptol::Tv=Tv(0)`: to further sparsify `L`, all elements with magnitude smaller
- `droptol::Tf=Tf(0)`: to further sparsify `L`, all elements with magnitude smaller
than `droptol` are dropped.
"""
function lldl_factorize!(
S::LimitedLDLFactorization{Tv, Ti},
S::LimitedLDLFactorization{Tf, Ti},
T::SparseMatrixCSC{Tv, Ti};
droptol::Tv = Tv(0),
) where {Tv <: Number, Ti <: Integer}
droptol::Tf = Tf(0),
) where {Tf <: Number, Tv <: Number, Ti <: Integer}
n = size(T, 1)
n != size(T, 2) && error("input matrix must be square")

Expand All @@ -205,7 +217,7 @@ function lldl_factorize!(
for col = 1:n
k = T.colptr[col]
row = (k nnzT) ? T.rowval[k] : 0
adiag[col] = (row == col) ? T.nzval[k] : zero(Tv)
adiag[col] = (row == col) ? Tf(T.nzval[k]) : zero(Tf)
end

d = S.D # Diagonal matrix D.
Expand All @@ -216,14 +228,14 @@ function lldl_factorize!(
# Compute the 2-norm of columns of A
# and the diagonal scaling matrix.
wa1 = S.wa1
wa1 .= zero(Tv)
wa1 .= zero(Tf)
s = S.s
@inbounds for col = 1:n
s[col] = Tv(1) # Initialization
s[col] = Tf(1) # Initialization
@inbounds for k = T.colptr[col]:(T.colptr[col + 1] - 1)
row = T.rowval[k]
(row == col) && continue
val = T.nzval[k]
val = Tf(T.nzval[k])
val2 = val * val
wa1[Pinv[col]] += val2 # Contribution to column Pinv[col].
wa1[Pinv[row]] += val2 # Contribution to column Pinv[row].
Expand Down Expand Up @@ -264,7 +276,7 @@ function lldl_factorize!(
if !(S.computed_posneg)
for i = 1:n
adiagPi = adiag[P[i]]
if adiagPi > Tv(0)
if adiagPi > Tf(0)
cpos += 1
pos[cpos] = i
else
Expand Down Expand Up @@ -362,40 +374,54 @@ function lldl_factorize!(
end

"""
lldl(A)
lldl(A; P = amd(A), memory = 0, α = 0, droptol = 0, check_tril = true)
lldl(A, ::Type{Tf}; P = amd(A), memory = 0, α = 0, droptol = 0, check_tril = true)
Compute the limited-memory LDLᵀ factorization of `A`.
`A` should be a lower triangular matrix.
# Arguments
- `A::SparseMatrixCSC{Tv,Ti}`: matrix to factorize (its strict lower triangle and
diagonal will be extracted)
diagonal will be extracted);
- `::Type{Tf}`: type used for the factorization, by default the type of the elements of `A`.
# Keyword arguments
- `P::AbstractVector{<:Integer} = amd(A)`: permutation vector.
- `memory::Int=0`: extra amount of memory to allocate for the incomplete factor `L`.
The total memory allocated is nnz(T) + n * `memory`, where
`T` is the strict lower triangle of A and `n` is the size of `A`.
`T` is the strict lower triangle of A and `n` is the size of `A`;
- `α::Tv=Tv(0)`: initial value of the shift in case the incomplete LDLᵀ
factorization of `A` is found to not exist. The shift will be
gradually increased from this initial value until success.
gradually increased from this initial value until success;
- `droptol::Tv=Tv(0)`: to further sparsify `L`, all elements with magnitude smaller
than `droptol` are dropped.
- `check_tril::Bool = true`: check if `A` is a lower triangular matrix.
than `droptol` are dropped;
- `check_tril::Bool = true`: check if `A` is a lower triangular matrix.
# Example
A = sprand(Float64, 10, 10, 0.2)
As = A * A' + I
LLDL = lldl(As) # lower triangle is extracted
T = tril(As)
LLDL = lldl(T) # Float64 factorization
LLDL = lldl(T, Float32) # Float32 factorization
"""
function lldl(
A::SparseMatrixCSC{Tv, Ti};
A::SparseMatrixCSC{Tv, Ti},
::Type{Tf};
P::AbstractVector{<:Integer} = amd(A),
memory::Int = 0,
α::Tv = Tv(0),
droptol::Tv = Tv(0),
α::Number = 0,
droptol::Number = 0,
check_tril::Bool = true,
) where {Tv <: Number, Ti <: Integer}
) where {Tv <: Number, Ti <: Integer, Tf <: Real}
T = (!check_tril || istril(A)) ? A : tril(A)
S = LimitedLDLFactorization(T; P = P, memory = memory, α = α)
lldl_factorize!(S, T, droptol = droptol)
S = LimitedLDLFactorization(T, Tf; P = P, memory = memory, α = α)
lldl_factorize!(S, T, droptol = Tf(droptol))
end

lldl(A::SparseMatrixCSC{Tv, Ti}; kwargs...) where {Tv <: Number, Ti <: Integer} =
lldl(A, Tv; kwargs...)

lldl(A::Matrix{Tv}; kwargs...) where {Tv <: Number} = lldl(sparse(A); kwargs...)

# symmetric matrix input
Expand Down Expand Up @@ -709,35 +735,24 @@ function lldl_solve!(n, b, Lp, Li, Lx, D, P)
end

import Base.(\)
function (\)(
LLDL::LimitedLDLFactorization{T, Ti},
b::AbstractVector{T},
) where {T <: Real, Ti <: Integer}
function (\)(LLDL::LimitedLDLFactorization, b::AbstractVector)
y = copy(b)
lldl_solve!(LLDL.n, y, LLDL.colptr, LLDL.Lrowind, LLDL.Lnzvals, LLDL.D, LLDL.P)
end

import LinearAlgebra.ldiv!
@inline ldiv!(
LLDL::LimitedLDLFactorization{T, Ti},
b::AbstractVector{T},
) where {T <: Real, Ti <: Integer} =
ldiv!(LLDL::LimitedLDLFactorization, b::AbstractVector) =
lldl_solve!(LLDL.n, b, LLDL.colptr, LLDL.Lrowind, LLDL.Lnzvals, LLDL.D, LLDL.P)

function ldiv!(
y::AbstractVector{T},
LLDL::LimitedLDLFactorization{T, Ti},
b::AbstractVector{T},
) where {T <: Real, Ti <: Integer}
function ldiv!(y::AbstractVector, LLDL::LimitedLDLFactorization, b::AbstractVector)
y .= b
lldl_solve!(LLDL.n, y, LLDL.colptr, LLDL.Lrowind, LLDL.Lnzvals, LLDL.D, LLDL.P)
end

import SparseArrays.nnz
@inline nnz(LLDL::LimitedLDLFactorization{T, Ti}) where {T <: Real, Ti <: Integer} =
length(LLDL.Lrowind) + length(LLDL.D)
nnz(LLDL::LimitedLDLFactorization) = length(LLDL.Lrowind) + length(LLDL.D)

@inline function Base.getproperty(LLDL::LimitedLDLFactorization, prop::Symbol)
function Base.getproperty(LLDL::LimitedLDLFactorization, prop::Symbol)
if prop == :L
nz = LLDL.colptr[end] - 1
return SparseMatrixCSC(LLDL.n, LLDL.n, LLDL.colptr, LLDL.Lrowind[1:nz], LLDL.Lnzvals[1:nz])
Expand Down
39 changes: 39 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -219,3 +219,42 @@ end
LLDL = lldl(A, memory = 5)
@test LLDL.α == 0
end

@testset "test lower-precision factorization" begin
# Lower triangle only
Tf = Float32
A = [
1.7 0 0 0 0 0 0 0 0.13 0
0 1.0 0 0 0.02 0 0 0 0 0.01
0 0 1.5 0 0 0 0 0 0 0
0 0 0 1.1 0 0 0 0 0 0
0 0.02 0 0 2.6 0 0.16 0.09 0.52 0.53
0 0 0 0 0 1.2 0 0 0 0
0 0 0 0 0.16 0 1.3 0 0 0.56
0 0 0 0 0.09 0 0 1.6 0.11 0
0.13 0 0 0 0.52 0 0 0.11 1.4 0
0 0.01 0 0 0.53 0 0.56 0 0 3.1
]
A = sparse(A)
Alow = tril(A)
perm = amd(A)
LLDL = lldl(Alow, Tf, P = perm, memory = 10)
@test eltype(LLDL.D) == Tf

L = LLDL.L + I
@test norm(L * diagm(0 => LLDL.D) * L' - A[perm, perm]) sqrt(eps(Tf)) * norm(A)

sol = ones(Tf, A.n)
b = similar(sol)
mul!(b, A, sol)
x = LLDL \ b
@test x sol
@test eltype(x) == Tf

y = similar(b)
ldiv!(y, LLDL, b)
@test y sol

ldiv!(LLDL, b)
@test b sol
end

0 comments on commit 624a21c

Please sign in to comment.