diff --git a/Project.toml b/Project.toml index dfeb1db7..d43a183c 100644 --- a/Project.toml +++ b/Project.toml @@ -19,6 +19,6 @@ ForwardDiff = "0.9.0, 0.10.0" NLPModels = "0.18, 0.19, 0.20, 0.21" Requires = "1" ReverseDiff = "1" -SparseConnectivityTracer = "0.6" -SparseMatrixColorings = "0.3.5" +SparseConnectivityTracer = "0.6.1" +SparseMatrixColorings = "0.4.0" julia = "^1.6" diff --git a/docs/src/sparse.md b/docs/src/sparse.md index 17ecebbb..da3d9af1 100644 --- a/docs/src/sparse.md +++ b/docs/src/sparse.md @@ -31,14 +31,14 @@ x = rand(T, 2) H = hess(nlp, x) ``` -The available backends for sparse derivatives (`SparseADJacobian`, `SparseADHessian` and `SparseReverseADHessian`) have keyword arguments `detector` and `coloring` to specify the sparsity pattern detector and the coloring algorithm, respectively. +The available backends for sparse derivatives (`SparseADJacobian`, `SparseADHessian` and `SparseReverseADHessian`) have keyword arguments `detector` and `coloring_algorithm` to specify the sparsity pattern detector and the coloring algorithm, respectively. -- **Detector**: A `detector` must be of type `ADTypes.AbstractSparsityDetector`. +- A **`detector`** must be of type `ADTypes.AbstractSparsityDetector`. The default detector is `TracerSparsityDetector()` from the package `SparseConnectivityTracer.jl`. Prior to version 0.8.0, the default detector was `SymbolicSparsityDetector()` from `Symbolics.jl`. -- **Coloring**: A `coloring` must be of type `ADTypes.AbstractColoringAlgorithm`. -The default algorithm is `GreedyColoringAlgorithm()` from the package `SparseMatrixColorings.jl`. +- A **`coloring_algorithm`** must be of type `SparseMatrixColorings.GreedyColoringAlgorithm`. +The default algorithm is `GreedyColoringAlgorithm{:direct}()` from the package `SparseMatrixColorings.jl`. If the sparsity pattern of the Jacobian of the constraint or the Hessian of the Lagrangian is available, you can directly provide them. ```@example ex2 @@ -95,4 +95,4 @@ nlp = ADNLPModel!(f, x0, lvar, uvar, c!, lcon, ucon, jacobian_backend=J_backend, The package [`SparseConnectivityTracer.jl`](https://github.com/adrhill/SparseConnectivityTracer.jl) is used to compute the sparsity pattern of Jacobians and Hessians. The evaluation of the number of directional derivatives and the seeds required to compute compressed Jacobians and Hessians is performed using [`SparseMatrixColorings.jl`](https://github.com/gdalle/SparseMatrixColorings.jl). As of release v0.8.1, it has replaced [`ColPack.jl`](https://github.com/exanauts/ColPack.jl). -We acknowledge Guillaume Dalle (@gdalle), Adrian Hill (@adrhill), and Michel Schanen (@michel2323) for the development of these packages. +We acknowledge Guillaume Dalle (@gdalle), Adrian Hill (@adrhill), Alexis Montoison (@amontoison), and Michel Schanen (@michel2323) for the development of these packages. diff --git a/src/ADNLPModels.jl b/src/ADNLPModels.jl index e1f034cf..a50d1005 100644 --- a/src/ADNLPModels.jl +++ b/src/ADNLPModels.jl @@ -6,8 +6,7 @@ using LinearAlgebra, SparseArrays # external using ADTypes: ADTypes, AbstractColoringAlgorithm, AbstractSparsityDetector using SparseConnectivityTracer: TracerSparsityDetector -using SparseMatrixColorings: - GreedyColoringAlgorithm, symmetric_coloring_detailed, symmetric_coefficient +using SparseMatrixColorings using ForwardDiff, ReverseDiff # JSO diff --git a/src/sparse_hessian.jl b/src/sparse_hessian.jl index d8649fbd..ccff097f 100644 --- a/src/sparse_hessian.jl +++ b/src/sparse_hessian.jl @@ -1,11 +1,11 @@ -struct SparseADHessian{Tag, GT, S, T} <: ADNLPModels.ADBackend - d::BitVector +struct SparseADHessian{Tag, R, T, C, S, GT} <: ADNLPModels.ADBackend + nvar::Int rowval::Vector{Int} colptr::Vector{Int} - colors::Vector{Int} - ncolors::Int - dcolors::Dict{Int, Vector{Tuple{Int, Int}}} - res::S + nzval::Vector{R} + result_coloring::C + compressed_hessian::S + seed::BitVector lz::Vector{ForwardDiff.Dual{Tag, T, 1}} glz::Vector{ForwardDiff.Dual{Tag, T, 1}} sol::S @@ -21,12 +21,12 @@ function SparseADHessian( ncon, c!; x0::AbstractVector = rand(nvar), - coloring::AbstractColoringAlgorithm = GreedyColoringAlgorithm(), + coloring_algorithm::AbstractColoringAlgorithm = GreedyColoringAlgorithm{:direct}(), detector::AbstractSparsityDetector = TracerSparsityDetector(), kwargs..., ) H = compute_hessian_sparsity(f, nvar, c!, ncon, detector = detector) - SparseADHessian(nvar, f, ncon, c!, H; x0, coloring, kwargs...) + SparseADHessian(nvar, f, ncon, c!, H; x0, coloring_algorithm, kwargs...) end function SparseADHessian( @@ -36,25 +36,20 @@ function SparseADHessian( c!, H::SparseMatrixCSC{Bool, Int64}; x0::S = rand(nvar), - coloring::AbstractColoringAlgorithm = GreedyColoringAlgorithm(), + coloring_algorithm::AbstractColoringAlgorithm = GreedyColoringAlgorithm{:direct}(), kwargs..., ) where {S} T = eltype(S) - colors, star_set = symmetric_coloring_detailed(H, coloring) - ncolors = maximum(colors) - - d = BitVector(undef, nvar) + problem = ColoringProblem{:symmetric, :column}() + result_coloring = coloring(H, problem, coloring_algorithm, decompression_eltype=T) trilH = tril(H) rowval = trilH.rowval colptr = trilH.colptr - - # The indices of the nonzero elements in `vals` that will be processed by color `c` are stored in `dcolors[c]`. - dcolors = nnz_colors(trilH, star_set, colors, ncolors) - - # prepare directional derivatives - res = similar(x0) + nzval = T.(trilH.nzval) + compressed_hessian = similar(x0) + seed = BitVector(undef, nvar) function lag(z; nvar = nvar, ncon = ncon, f = f, c! = c!) cx, x, y, ob = view(z, 1:ncon), @@ -82,13 +77,13 @@ function SparseADHessian( y = fill!(S(undef, ncon), 0) return SparseADHessian( - d, + nvar, rowval, colptr, - colors, - ncolors, - dcolors, - res, + nzval, + result_coloring, + compressed_hessian, + seed, lz, glz, sol, @@ -99,14 +94,14 @@ function SparseADHessian( ) end -struct SparseReverseADHessian{T, S, Tagf, F, Tagψ, P} <: ADNLPModels.ADBackend - d::BitVector +struct SparseReverseADHessian{Tagf, Tagψ, R, T, C, S, F, P} <: ADNLPModels.ADBackend + nvar::Int rowval::Vector{Int} colptr::Vector{Int} - colors::Vector{Int} - ncolors::Int - dcolors::Dict{Int, Vector{Tuple{Int, Int}}} - res::S + nzval::Vector{R} + result_coloring::C + compressed_hessian::S + seed::BitVector z::Vector{ForwardDiff.Dual{Tagf, T, 1}} gz::Vector{ForwardDiff.Dual{Tagf, T, 1}} ∇f!::F @@ -125,12 +120,12 @@ function SparseReverseADHessian( ncon, c!; x0::AbstractVector = rand(nvar), - coloring::AbstractColoringAlgorithm = GreedyColoringAlgorithm(), + coloring_algorithm::AbstractColoringAlgorithm = GreedyColoringAlgorithm{:direct}(), detector::AbstractSparsityDetector = TracerSparsityDetector(), kwargs..., ) H = compute_hessian_sparsity(f, nvar, c!, ncon, detector = detector) - SparseReverseADHessian(nvar, f, ncon, c!, H; x0, coloring, kwargs...) + SparseReverseADHessian(nvar, f, ncon, c!, H; x0, coloring_algorithm, kwargs...) end function SparseReverseADHessian( @@ -138,25 +133,20 @@ function SparseReverseADHessian( f, ncon, c!, - H::SparseMatrixCSC{Bool, Int64}; + H::SparseMatrixCSC{Bool, Int}; x0::AbstractVector{T} = rand(nvar), - coloring::AbstractColoringAlgorithm = GreedyColoringAlgorithm(), + coloring_algorithm::AbstractColoringAlgorithm = GreedyColoringAlgorithm{:direct}(), kwargs..., ) where {T} - colors, star_set = symmetric_coloring_detailed(H, coloring) - ncolors = maximum(colors) - - d = BitVector(undef, nvar) + problem = ColoringProblem{:symmetric, :column}() + result_coloring = coloring(H, problem, coloring_algorithm, decompression_eltype=T) trilH = tril(H) rowval = trilH.rowval colptr = trilH.colptr - - # The indices of the nonzero elements in `vals` that will be processed by color `c` are stored in `dcolors[c]`. - dcolors = nnz_colors(trilH, star_set, colors, ncolors) - - # prepare directional derivatives - res = similar(x0) + nzval = T.(trilH.nzval) + compressed_hessian = similar(x0) + seed = BitVector(undef, nvar) # unconstrained Hessian tagf = ForwardDiff.Tag{typeof(f), T} @@ -188,13 +178,13 @@ function SparseReverseADHessian( y = similar(x0, ncon) return SparseReverseADHessian( - d, + nvar, rowval, colptr, - colors, - ncolors, - dcolors, - res, + nzval, + result_coloring, + compressed_hessian, + seed, z, gz, ∇f!, @@ -237,76 +227,80 @@ function NLPModels.hess_structure_residual!( end function sparse_hess_coord!( - b::SparseADHessian{Tag, GT, S, T}, + b::SparseADHessian{Tag}, x::AbstractVector, obj_weight, y::AbstractVector, vals::AbstractVector, -) where {Tag, GT, S, T} - nvar = length(x) +) where {Tag} ncon = length(y) - - b.sol[1:ncon] .= zero(T) # cx - b.sol[(ncon + 1):(ncon + nvar)] .= x - b.sol[(ncon + nvar + 1):(2 * ncon + nvar)] .= y + T = eltype(x) + b.sol[1:ncon] .= zero(T) # cx + b.sol[(ncon + 1):(ncon + b.nvar)] .= x + b.sol[(ncon + b.nvar + 1):(2 * ncon + b.nvar)] .= y b.sol[end] = obj_weight b.longv .= 0 - for icol = 1:(b.ncolors) - dcolor_icol = b.dcolors[icol] - if !isempty(dcolor_icol) - b.d .= (b.colors .== icol) - b.longv[(ncon + 1):(ncon + nvar)] .= b.d - map!(ForwardDiff.Dual{Tag}, b.lz, b.sol, b.longv) - b.∇φ!(b.glz, b.lz) - ForwardDiff.extract_derivative!(Tag, b.Hvp, b.glz) - b.res .= view(b.Hvp, (ncon + 1):(ncon + nvar)) - - # Store in `vals` the nonzeros of each column of the Hessian computed with color `icol` - for (row, k) in dcolor_icol - vals[k] = b.res[row] - end + # SparseMatrixColorings.jl requires a SparseMatrixCSC for the decompression + A = SparseMatrixCSC(b.nvar, b.nvar, b.colptr, b.rowval, b.nzval) + + groups = column_groups(b.result_coloring) + for (icol, cols) in enumerate(groups) + # Update the seed + b.seed .= false + for col in cols + b.seed[col] = true end - end + b.longv[(ncon + 1):(ncon + b.nvar)] .= b.seed + map!(ForwardDiff.Dual{Tag}, b.lz, b.sol, b.longv) + b.∇φ!(b.glz, b.lz) + ForwardDiff.extract_derivative!(Tag, b.Hvp, b.glz) + b.compressed_hessian .= view(b.Hvp, (ncon + 1):(ncon + b.nvar)) + + # Update the coefficients of the lower triangular part of the Hessian that are related to the color `icol` + decompress_single_color!(A, b.compressed_hessian, icol, b.result_coloring, :L) + end + vals .= b.nzval return vals end function sparse_hess_coord!( - b::SparseReverseADHessian{T, S, Tagf, F, Tagψ, P}, + b::SparseReverseADHessian{Tagf, Tagψ}, x::AbstractVector, obj_weight, y::AbstractVector, vals::AbstractVector, -) where {T, S, Tagf, F, Tagψ, P} - nvar = length(x) - - for icol = 1:(b.ncolors) - dcolor_icol = b.dcolors[icol] - if !isempty(dcolor_icol) - b.d .= (b.colors .== icol) - - # objective - map!(ForwardDiff.Dual{Tagf}, b.z, x, b.d) # x + ε * v - b.∇f!(b.gz, b.z) - ForwardDiff.extract_derivative!(Tagf, b.res, b.gz) - b.res .*= obj_weight - - # constraints - map!(ForwardDiff.Dual{Tagψ}, b.zψ, x, b.d) - b.yψ .= y - b.∇l!(b.gzψ, b.gyψ, b.zψ, b.yψ) - ForwardDiff.extract_derivative!(Tagψ, b.Hv_temp, b.gzψ) - b.res .+= b.Hv_temp - - # Store in `vals` the nonzeros of each column of the Hessian computed with color `icol` - for (row, k) in dcolor_icol - vals[k] = b.res[row] - end +) where {Tagf, Tagψ} + # SparseMatrixColorings.jl requires a SparseMatrixCSC for the decompression + A = SparseMatrixCSC(b.nvar, b.nvar, b.colptr, b.rowval, b.nzval) + + groups = column_groups(b.result_coloring) + for (icol, cols) in enumerate(groups) + # Update the seed + b.seed .= false + for col in cols + b.seed[col] = true end - end + # objective + map!(ForwardDiff.Dual{Tagf}, b.z, x, b.seed) # x + ε * v + b.∇f!(b.gz, b.z) + ForwardDiff.extract_derivative!(Tagf, b.compressed_hessian, b.gz) + b.compressed_hessian .*= obj_weight + + # constraints + map!(ForwardDiff.Dual{Tagψ}, b.zψ, x, b.seed) + b.yψ .= y + b.∇l!(b.gzψ, b.gyψ, b.zψ, b.yψ) + ForwardDiff.extract_derivative!(Tagψ, b.Hv_temp, b.gzψ) + b.compressed_hessian .+= b.Hv_temp + + # Update the coefficients of the lower triangular part of the Hessian that are related to the color `icol` + decompress_single_color!(A, b.compressed_hessian, icol, b.result_coloring, :L) + vals .= b.nzval + end return vals end diff --git a/src/sparse_jacobian.jl b/src/sparse_jacobian.jl index d4826b33..75b2e994 100644 --- a/src/sparse_jacobian.jl +++ b/src/sparse_jacobian.jl @@ -1,13 +1,14 @@ -struct SparseADJacobian{T, Tag, S} <: ADBackend - d::BitVector +struct SparseADJacobian{Tag, R, T, C, S} <: ADBackend + nvar::Int + ncon::Int rowval::Vector{Int} colptr::Vector{Int} - colors::Vector{Int} - ncolors::Int - dcolors::Dict{Int, Vector{UnitRange{Int}}} + nzval::Vector{R} + result_coloring::C + compressed_jacobian::S + seed::BitVector z::Vector{ForwardDiff.Dual{Tag, T, 1}} cz::Vector{ForwardDiff.Dual{Tag, T, 1}} - res::S end function SparseADJacobian( @@ -16,13 +17,13 @@ function SparseADJacobian( ncon, c!; x0::AbstractVector = rand(nvar), - coloring::AbstractColoringAlgorithm = GreedyColoringAlgorithm(), + coloring_algorithm::AbstractColoringAlgorithm = GreedyColoringAlgorithm{:direct}(), detector::AbstractSparsityDetector = TracerSparsityDetector(), kwargs..., ) output = similar(x0, ncon) J = compute_jacobian_sparsity(c!, output, x0, detector = detector) - SparseADJacobian(nvar, f, ncon, c!, J; x0, coloring, kwargs...) + SparseADJacobian(nvar, f, ncon, c!, J; x0, coloring_algorithm, kwargs...) end function SparseADJacobian( @@ -30,34 +31,26 @@ function SparseADJacobian( f, ncon, c!, - J::SparseMatrixCSC{Bool, Int64}; + J::SparseMatrixCSC{Bool, Int}; x0::AbstractVector{T} = rand(nvar), - coloring::AbstractColoringAlgorithm = GreedyColoringAlgorithm(), + coloring_algorithm::AbstractColoringAlgorithm = GreedyColoringAlgorithm{:direct}(), kwargs..., ) where {T} - # TODO: use ADTypes.row_coloring instead if you have the right decompression and some heuristic recommends it - colors = ADTypes.column_coloring(J, coloring) - ncolors = maximum(colors) - - d = BitVector(undef, nvar) + # We should support :row and :bidirectional in the future + problem = ColoringProblem{:nonsymmetric, :column}() + result_coloring = coloring(J, problem, coloring_algorithm, decompression_eltype=T) rowval = J.rowval colptr = J.colptr - - # The indices of the nonzero elements in `vals` that will be processed by color `c` are stored in `dcolors[c]`. - dcolors = Dict(i => UnitRange{Int}[] for i = 1:ncolors) - for (i, color) in enumerate(colors) - range_vals = colptr[i]:(colptr[i + 1] - 1) - push!(dcolors[color], range_vals) - end + nzval = T.(J.nzval) + compressed_jacobian = similar(x0, ncon) + seed = BitVector(undef, nvar) tag = ForwardDiff.Tag{typeof(c!), T} - z = Vector{ForwardDiff.Dual{tag, T, 1}}(undef, nvar) cz = similar(z, ncon) - res = similar(x0, ncon) - SparseADJacobian(d, rowval, colptr, colors, ncolors, dcolors, z, cz, res) + SparseADJacobian(nvar, ncon, rowval, colptr, nzval, result_coloring, compressed_jacobian, seed, z, cz) end function get_nln_nnzj(b::SparseADJacobian, nvar, ncon) @@ -81,25 +74,29 @@ end function sparse_jac_coord!( ℓ!::Function, - b::SparseADJacobian{T, Tag}, + b::SparseADJacobian{Tag}, x::AbstractVector, vals::AbstractVector, -) where {T, Tag} - nvar = length(x) - for icol = 1:(b.ncolors) - b.d .= (b.colors .== icol) - map!(ForwardDiff.Dual{Tag}, b.z, x, b.d) # x + ε * v - ℓ!(b.cz, b.z) # c!(cz, x + ε * v) - ForwardDiff.extract_derivative!(Tag, b.res, b.cz) # ∇c!(cx, x)ᵀv +) where {Tag} + # SparseMatrixColorings.jl requires a SparseMatrixCSC for the decompression + A = SparseMatrixCSC(b.ncon, b.nvar, b.colptr, b.rowval, b.nzval) - # Store in `vals` the nonzeros of each column of the Jacobian computed with color `icol` - for range_vals in b.dcolors[icol] - for k in range_vals - row = b.rowval[k] - vals[k] = b.res[row] - end + groups = column_groups(b.result_coloring) + for (icol, cols) in enumerate(groups) + # Update the seed + b.seed .= false + for col in cols + b.seed[col] = true end + + map!(ForwardDiff.Dual{Tag}, b.z, x, b.seed) # x + ε * v + ℓ!(b.cz, b.z) # c!(cz, x + ε * v) + ForwardDiff.extract_derivative!(Tag, b.compressed_jacobian, b.cz) # ∇c!(cx, x)ᵀv + + # Update the columns of the Jacobian that have the color `icol` + decompress_single_color!(A, b.compressed_jacobian, icol, b.result_coloring) end + vals .= b.nzval return vals end