From 624a21c3da1495c2a8a9d01046b20053ad491d8a Mon Sep 17 00:00:00 2001
From: geoffroyleconte <47035783+geoffroyleconte@users.noreply.github.com>
Date: Mon, 3 Oct 2022 14:31:17 -0400
Subject: [PATCH] Factorization in lower precision (#72)

* factorization in lower precision

* Apply suggestions from code review

Co-authored-by: Alexis <35051714+amontoison@users.noreply.github.com>

* remove inline macro

* fix type instabilities

* end items docs

Co-authored-by: Alexis <35051714+amontoison@users.noreply.github.com>
---
 Project.toml                    |   2 +-
 src/LimitedLDLFactorizations.jl | 137 ++++++++++++++++++--------------
 test/runtests.jl                |  39 +++++++++
 3 files changed, 116 insertions(+), 62 deletions(-)

diff --git a/Project.toml b/Project.toml
index 2e520c3..d591f9f 100644
--- a/Project.toml
+++ b/Project.toml
@@ -8,8 +8,8 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 
 [compat]
-julia = "^1.6.0"
 AMD = "0.4, 0.5"
+julia = "^1.6.0"
 
 [extras]
 AMD = "14f7f29c-3bd6-536c-9a0b-7339e30b5a3e"
diff --git a/src/LimitedLDLFactorizations.jl b/src/LimitedLDLFactorizations.jl
index 11e821c..767050b 100644
--- a/src/LimitedLDLFactorizations.jl
+++ b/src/LimitedLDLFactorizations.jl
@@ -64,44 +64,44 @@ function LimitedLDLFactorization(
   T::SparseMatrixCSC{Tv, Ti},
   P::AbstractVector{<:Integer},
   memory::Int,
-  α::Tv,
+  α::Number,
   n::Int,
   nnzT::Int,
-) where {Tv <: Number, Ti}
+  ::Type{Tf},
+) where {Tv <: Number, Ti, Tf <: Real}
   np = n * memory
   Pinv = similar(P)
 
   nnz_diag = 0
-  adiag = Vector{Tv}(undef, n)
+  adiag = Vector{Tf}(undef, n)
   for col = 1:n
     k = T.colptr[col]
     row = (k ≤ nnzT) ? T.rowval[k] : 0
     if row == col
       nnz_diag += 1
-      adiag[col] = T.nzval[k]
+      adiag[col] = Tf(T.nzval[k])
     else
-      adiag[col] = zero(Tv)
+      adiag[col] = zero(Tf)
     end
   end
 
   # Make room to store L.
   nnzLmax = nnzT + np - nnz_diag
-  d = Vector{Tv}(undef, n)  # Diagonal matrix D.
-  lvals = Vector{Tv}(undef, nnzLmax)  # Strict lower triangle of L.
+  d = Vector{Tf}(undef, n)  # Diagonal matrix D.
+  lvals = Vector{Tf}(undef, nnzLmax)  # Strict lower triangle of L.
   rowind = Vector{Ti}(undef, nnzLmax)
   colptr = Vector{Ti}(undef, n + 1)
-  wa1 = Vector{Tv}(undef, n)
-  s = Vector{Tv}(undef, n)
+  wa1 = Vector{Tf}(undef, n)
+  s = Vector{Tf}(undef, n)
 
-  w = Vector{Tv}(undef, n)     # contents of the current column of A.
+  w = Vector{Tf}(undef, n)     # contents of the current column of A.
   indr = Vector{Ti}(undef, n)  # row indices of the nonzeros in the current column after it's been loaded into w.
   indf = Vector{Ti}(undef, n)  # indf[col] = position in w of the next entry in column col to be used during the factorization.
   list = zeros(Ti, n)  # list[col] = linked list of columns that will update column col.
 
-  pos = findall(adiag[P] .> Tv(0))
-  neg = findall(adiag[P] .≤ Tv(0))
+  pos = findall(adiag[P] .> Tf(0))
+  neg = findall(adiag[P] .≤ Tf(0))
 
-  nz = colptr[end] - 1
   Lrowind = view(rowind, 1:nnzLmax)
   Lnzvals = view(lvals, 1:nnzLmax)
 
@@ -116,7 +116,7 @@ function LimitedLDLFactorization(
     adiag,
     d,
     P,
-    α,
+    Tf(α),
     memory,
     Pinv,
     wa1,
@@ -132,35 +132,47 @@ function LimitedLDLFactorization(
 end
 
 """
-    LLDL = LimitedLDLFactorization(T, P; memory = 0, α = 0.0)
+    LLDL = LimitedLDLFactorization(T; P = amd(T), memory = 0, α = 0)
+    LLDL = LimitedLDLFactorization(T, ::Type{Tf}; P = amd(T), memory = 0, α = 0)
 
 Perform the allocations for the LLDL factorization of symmetric matrix whose lower triangle is `T` 
 with the permutation vector `P`.
 
 # Arguments
-- `T::SparseMatrixCSC{Tv,Ti}`: lower triangle of the matrix to factorize.
+- `T::SparseMatrixCSC{Tv,Ti}`: lower triangle of the matrix to factorize;
+- `::Type{Tf}`: type used for the factorization, by default the type of the elements of `A`.
 
 # Keyword arguments
-- `P::AbstractVector{<:Integer} = amd(T)`: permutation vector.
+- `P::AbstractVector{<:Integer} = amd(T)`: permutation vector;
 - `memory::Int=0`: extra amount of memory to allocate for the incomplete factor `L`.
                    The total memory allocated is nnz(T) + n * `memory`, where
-                   `T` is the strict lower triangle of A and `n` is the size of `A`.
-- `α::Tv=Tv(0)`: initial value of the shift in case the incomplete LDLᵀ
+                   `T` is the strict lower triangle of A and `n` is the size of `A`;
+- `α::Number=0`: initial value of the shift in case the incomplete LDLᵀ
                  factorization of `A` is found to not exist. The shift will be
                  gradually increased from this initial value until success.
+
+# Example
+    A = sprand(Float64, 10, 10, 0.2)
+    T = tril(A * A' + I)
+    LLDL = LimitedLDLFactorization(T) # Float64 factorization
+    LLDL = LimitedLDLFactorization(T, Float32) # Float32 factorization
 """
 function LimitedLDLFactorization(
-  T::SparseMatrixCSC{Tv, Ti};
+  T::SparseMatrixCSC{Tv, Ti},
+  ::Type{Tf};
   P::AbstractVector{<:Integer} = amd(T),
   memory::Int = 0,
-  α::Tv = Tv(0),
-) where {Tv <: Number, Ti <: Integer}
+  α::Number = 0,
+) where {Tv <: Number, Ti <: Integer, Tf <: Real}
   memory < 0 && error("limited-memory parameter must be nonnegative")
   n = size(T, 1)
   n != size(T, 2) && error("input matrix must be square")
-  return LimitedLDLFactorization(T, P, memory, α, n, nnz(T))
+  return LimitedLDLFactorization(T, P, memory, α, n, nnz(T), Tf)
 end
 
+LimitedLDLFactorization(T::SparseMatrixCSC{Tv, Ti}; kwargs...) where {Tv <: Number, Ti <: Integer} = 
+  LimitedLDLFactorization(T, Tv; kwargs...)
+
 # Here T is the lower triangle of A.
 """
     lldl_factorize!(S, T; droptol = 0.0)
@@ -169,19 +181,19 @@ Perform the in-place factorization of a symmetric matrix whose lower triangle is
 with the permutation vector.
 
 # Arguments
-- `S::LimitedLDLFactorization{Tv, Ti}`.
+- `S::LimitedLDLFactorization{Tf, Ti}`;
 - `T::SparseMatrixCSC{Tv,Ti}`: lower triangle of the matrix to factorize.
 `T` should keep the same nonzero pattern and the sign of its diagonal elements.
 
 # Keyword arguments
-- `droptol::Tv=Tv(0)`: to further sparsify `L`, all elements with magnitude smaller
+- `droptol::Tf=Tf(0)`: to further sparsify `L`, all elements with magnitude smaller
                        than `droptol` are dropped.
 """
 function lldl_factorize!(
-  S::LimitedLDLFactorization{Tv, Ti},
+  S::LimitedLDLFactorization{Tf, Ti},
   T::SparseMatrixCSC{Tv, Ti};
-  droptol::Tv = Tv(0),
-) where {Tv <: Number, Ti <: Integer}
+  droptol::Tf = Tf(0),
+) where {Tf <: Number, Tv <: Number, Ti <: Integer}
   n = size(T, 1)
   n != size(T, 2) && error("input matrix must be square")
 
@@ -205,7 +217,7 @@ function lldl_factorize!(
   for col = 1:n
     k = T.colptr[col]
     row = (k ≤ nnzT) ? T.rowval[k] : 0
-    adiag[col] = (row == col) ? T.nzval[k] : zero(Tv)
+    adiag[col] = (row == col) ? Tf(T.nzval[k]) : zero(Tf)
   end
 
   d = S.D  # Diagonal matrix D.
@@ -216,14 +228,14 @@ function lldl_factorize!(
   # Compute the 2-norm of columns of A
   # and the diagonal scaling matrix.
   wa1 = S.wa1
-  wa1 .= zero(Tv)
+  wa1 .= zero(Tf)
   s = S.s
   @inbounds for col = 1:n
-    s[col] = Tv(1) # Initialization
+    s[col] = Tf(1) # Initialization
     @inbounds for k = T.colptr[col]:(T.colptr[col + 1] - 1)
       row = T.rowval[k]
       (row == col) && continue
-      val = T.nzval[k]
+      val = Tf(T.nzval[k])
       val2 = val * val
       wa1[Pinv[col]] += val2  # Contribution to column Pinv[col].
       wa1[Pinv[row]] += val2  # Contribution to column Pinv[row].
@@ -264,7 +276,7 @@ function lldl_factorize!(
   if !(S.computed_posneg)
     for i = 1:n
       adiagPi = adiag[P[i]]
-      if adiagPi > Tv(0)
+      if adiagPi > Tf(0)
         cpos += 1
         pos[cpos] = i
       else
@@ -362,40 +374,54 @@ function lldl_factorize!(
 end
 
 """
-    lldl(A)
+    lldl(A; P = amd(A), memory = 0, α = 0, droptol = 0, check_tril = true)
+    lldl(A, ::Type{Tf}; P = amd(A), memory = 0, α = 0, droptol = 0, check_tril = true)
 
 Compute the limited-memory LDLᵀ factorization of `A`.
 `A` should be a lower triangular matrix.
 
 # Arguments
 - `A::SparseMatrixCSC{Tv,Ti}`: matrix to factorize (its strict lower triangle and
-                               diagonal will be extracted)
+                               diagonal will be extracted);
+- `::Type{Tf}`: type used for the factorization, by default the type of the elements of `A`.
 
 # Keyword arguments
 - `P::AbstractVector{<:Integer} = amd(A)`: permutation vector.
 - `memory::Int=0`: extra amount of memory to allocate for the incomplete factor `L`.
                    The total memory allocated is nnz(T) + n * `memory`, where
-                   `T` is the strict lower triangle of A and `n` is the size of `A`.
+                   `T` is the strict lower triangle of A and `n` is the size of `A`;
 - `α::Tv=Tv(0)`: initial value of the shift in case the incomplete LDLᵀ
                  factorization of `A` is found to not exist. The shift will be
-                 gradually increased from this initial value until success.
+                 gradually increased from this initial value until success;
 - `droptol::Tv=Tv(0)`: to further sparsify `L`, all elements with magnitude smaller
-                       than `droptol` are dropped.
-- `check_tril::Bool = true`: check if `A` is a lower triangular matrix. 
+                       than `droptol` are dropped;
+- `check_tril::Bool = true`: check if `A` is a lower triangular matrix.
+
+# Example
+    A = sprand(Float64, 10, 10, 0.2)
+    As = A * A' + I
+    LLDL = lldl(As) # lower triangle is extracted
+    T = tril(As)
+    LLDL = lldl(T) # Float64 factorization
+    LLDL = lldl(T, Float32) # Float32 factorization
 """
 function lldl(
-  A::SparseMatrixCSC{Tv, Ti};
+  A::SparseMatrixCSC{Tv, Ti},
+  ::Type{Tf};
   P::AbstractVector{<:Integer} = amd(A),
   memory::Int = 0,
-  α::Tv = Tv(0),
-  droptol::Tv = Tv(0),
+  α::Number = 0,
+  droptol::Number = 0,
   check_tril::Bool = true,
-) where {Tv <: Number, Ti <: Integer}
+) where {Tv <: Number, Ti <: Integer, Tf <: Real}
   T = (!check_tril || istril(A)) ? A : tril(A)
-  S = LimitedLDLFactorization(T; P = P, memory = memory, α = α)
-  lldl_factorize!(S, T, droptol = droptol)
+  S = LimitedLDLFactorization(T, Tf; P = P, memory = memory, α = α)
+  lldl_factorize!(S, T, droptol = Tf(droptol))
 end
 
+lldl(A::SparseMatrixCSC{Tv, Ti}; kwargs...) where {Tv <: Number, Ti <: Integer} =
+  lldl(A, Tv; kwargs...)
+
 lldl(A::Matrix{Tv}; kwargs...) where {Tv <: Number} = lldl(sparse(A); kwargs...)
 
 # symmetric matrix input
@@ -709,35 +735,24 @@ function lldl_solve!(n, b, Lp, Li, Lx, D, P)
 end
 
 import Base.(\)
-function (\)(
-  LLDL::LimitedLDLFactorization{T, Ti},
-  b::AbstractVector{T},
-) where {T <: Real, Ti <: Integer}
+function (\)(LLDL::LimitedLDLFactorization, b::AbstractVector)
   y = copy(b)
   lldl_solve!(LLDL.n, y, LLDL.colptr, LLDL.Lrowind, LLDL.Lnzvals, LLDL.D, LLDL.P)
 end
 
 import LinearAlgebra.ldiv!
-@inline ldiv!(
-  LLDL::LimitedLDLFactorization{T, Ti},
-  b::AbstractVector{T},
-) where {T <: Real, Ti <: Integer} =
+ldiv!(LLDL::LimitedLDLFactorization, b::AbstractVector) =
   lldl_solve!(LLDL.n, b, LLDL.colptr, LLDL.Lrowind, LLDL.Lnzvals, LLDL.D, LLDL.P)
 
-function ldiv!(
-  y::AbstractVector{T},
-  LLDL::LimitedLDLFactorization{T, Ti},
-  b::AbstractVector{T},
-) where {T <: Real, Ti <: Integer}
+function ldiv!(y::AbstractVector, LLDL::LimitedLDLFactorization, b::AbstractVector)
   y .= b
   lldl_solve!(LLDL.n, y, LLDL.colptr, LLDL.Lrowind, LLDL.Lnzvals, LLDL.D, LLDL.P)
 end
 
 import SparseArrays.nnz
-@inline nnz(LLDL::LimitedLDLFactorization{T, Ti}) where {T <: Real, Ti <: Integer} =
-  length(LLDL.Lrowind) + length(LLDL.D)
+nnz(LLDL::LimitedLDLFactorization) = length(LLDL.Lrowind) + length(LLDL.D)
 
-@inline function Base.getproperty(LLDL::LimitedLDLFactorization, prop::Symbol)
+function Base.getproperty(LLDL::LimitedLDLFactorization, prop::Symbol)
   if prop == :L
     nz = LLDL.colptr[end] - 1
     return SparseMatrixCSC(LLDL.n, LLDL.n, LLDL.colptr, LLDL.Lrowind[1:nz], LLDL.Lnzvals[1:nz])
diff --git a/test/runtests.jl b/test/runtests.jl
index 3dc099d..820e41e 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -219,3 +219,42 @@ end
   LLDL = lldl(A, memory = 5)
   @test LLDL.α == 0
 end
+
+@testset "test lower-precision factorization" begin
+  # Lower triangle only
+  Tf = Float32
+  A = [
+    1.7 0 0 0 0 0 0 0 0.13 0
+    0 1.0 0 0 0.02 0 0 0 0 0.01
+    0 0 1.5 0 0 0 0 0 0 0
+    0 0 0 1.1 0 0 0 0 0 0
+    0 0.02 0 0 2.6 0 0.16 0.09 0.52 0.53
+    0 0 0 0 0 1.2 0 0 0 0
+    0 0 0 0 0.16 0 1.3 0 0 0.56
+    0 0 0 0 0.09 0 0 1.6 0.11 0
+    0.13 0 0 0 0.52 0 0 0.11 1.4 0
+    0 0.01 0 0 0.53 0 0.56 0 0 3.1
+  ]
+  A = sparse(A)
+  Alow = tril(A)
+  perm = amd(A)
+  LLDL = lldl(Alow, Tf, P = perm, memory = 10)
+  @test eltype(LLDL.D) == Tf
+
+  L = LLDL.L + I
+  @test norm(L * diagm(0 => LLDL.D) * L' - A[perm, perm]) ≤ sqrt(eps(Tf)) * norm(A)
+
+  sol = ones(Tf, A.n)
+  b = similar(sol)
+  mul!(b, A, sol)
+  x = LLDL \ b
+  @test x ≈ sol
+  @test eltype(x) == Tf
+
+  y = similar(b)
+  ldiv!(y, LLDL, b)
+  @test y ≈ sol
+
+  ldiv!(LLDL, b)
+  @test b ≈ sol
+end
\ No newline at end of file