From 5004c7e851bec441bce8c8fc8d160a7f2083759c Mon Sep 17 00:00:00 2001 From: Alexis Montoison Date: Fri, 12 Jul 2024 15:10:18 -0400 Subject: [PATCH] Use a symmetric coloring for the computation of sparse Hessians --- src/sparse_hessian.jl | 82 ++++++++++++++++++----------------------- src/sparsity_pattern.jl | 80 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+), 46 deletions(-) diff --git a/src/sparse_hessian.jl b/src/sparse_hessian.jl index c1c7f478..03ae1d6b 100644 --- a/src/sparse_hessian.jl +++ b/src/sparse_hessian.jl @@ -4,7 +4,7 @@ struct SparseADHessian{Tag, GT, S, T} <: ADNLPModels.ADBackend colptr::Vector{Int} colors::Vector{Int} ncolors::Int - dcolors::Dict{Int, Vector{UnitRange{Int}}} + dcolors::Dict{Int, Vector{Tuple{Int,Int}}} res::S lz::Vector{ForwardDiff.Dual{Tag, T, 1}} glz::Vector{ForwardDiff.Dual{Tag, T, 1}} @@ -28,8 +28,7 @@ function SparseADHessian( T = eltype(S) H = compute_hessian_sparsity(f, nvar, c!, ncon, detector = detector) - # TODO: use ADTypes.symmetric_coloring instead if you have the right decompression - colors = ADTypes.column_coloring(H, coloring) + colors = ADTypes.symmetric_coloring(H, coloring) ncolors = maximum(colors) d = BitVector(undef, nvar) @@ -39,11 +38,7 @@ function SparseADHessian( colptr = trilH.colptr # The indices of the nonzero elements in `vals` that will be processed by color `c` are stored in `dcolors[c]`. - dcolors = Dict(i => UnitRange{Int}[] for i = 1:ncolors) - for (i, color) in enumerate(colors) - range_vals = colptr[i]:(colptr[i + 1] - 1) - push!(dcolors[color], range_vals) - end + dcolors = nnz_colors(trilH, colors, ncolors) # prepare directional derivatives res = similar(x0) @@ -97,7 +92,7 @@ struct SparseReverseADHessian{T, S, Tagf, F, Tagψ, P} <: ADNLPModels.ADBackend colptr::Vector{Int} colors::Vector{Int} ncolors::Int - dcolors::Dict{Int, Vector{UnitRange{Int}}} + dcolors::Dict{Int, Vector{Tuple{Int,Int}}} res::S z::Vector{ForwardDiff.Dual{Tagf, T, 1}} gz::Vector{ForwardDiff.Dual{Tagf, T, 1}} @@ -123,8 +118,7 @@ function SparseReverseADHessian( ) where {T} H = compute_hessian_sparsity(f, nvar, c!, ncon, detector = detector) - # TODO: use ADTypes.symmetric_coloring instead if you have the right decompression - colors = ADTypes.column_coloring(H, coloring) + colors = ADTypes.symmetric_coloring(H, coloring) ncolors = maximum(colors) d = BitVector(undef, nvar) @@ -134,11 +128,7 @@ function SparseReverseADHessian( colptr = trilH.colptr # The indices of the nonzero elements in `vals` that will be processed by color `c` are stored in `dcolors[c]`. - dcolors = Dict(i => UnitRange{Int}[] for i = 1:ncolors) - for (i, color) in enumerate(colors) - range_vals = colptr[i]:(colptr[i + 1] - 1) - push!(dcolors[color], range_vals) - end + dcolors = nnz_colors(trilH, colors, ncolors) # prepare directional derivatives res = similar(x0) @@ -239,17 +229,17 @@ function sparse_hess_coord!( b.longv .= 0 for icol = 1:(b.ncolors) - b.d .= (b.colors .== icol) - b.longv[(ncon + 1):(ncon + nvar)] .= b.d - map!(ForwardDiff.Dual{Tag}, b.lz, b.sol, b.longv) - b.∇φ!(b.glz, b.lz) - ForwardDiff.extract_derivative!(Tag, b.Hvp, b.glz) - b.res .= view(b.Hvp, (ncon + 1):(ncon + nvar)) - - # Store in `vals` the nonzeros of each column of the Hessian computed with color `icol` - for range_vals in b.dcolors[icol] - for k in range_vals - row = b.rowval[k] + dcolor_icol = b.dcolors[icol] + if !isempty(dcolor_icol) + b.d .= (b.colors .== icol) + b.longv[(ncon + 1):(ncon + nvar)] .= b.d + map!(ForwardDiff.Dual{Tag}, b.lz, b.sol, b.longv) + b.∇φ!(b.glz, b.lz) + ForwardDiff.extract_derivative!(Tag, b.Hvp, b.glz) + b.res .= view(b.Hvp, (ncon + 1):(ncon + nvar)) + + # Store in `vals` the nonzeros of each column of the Hessian computed with color `icol` + for (row, k) in dcolor_icol vals[k] = b.res[row] end end @@ -268,25 +258,25 @@ function sparse_hess_coord!( nvar = length(x) for icol = 1:(b.ncolors) - b.d .= (b.colors .== icol) - - # objective - map!(ForwardDiff.Dual{Tagf}, b.z, x, b.d) # x + ε * v - b.∇f!(b.gz, b.z) - ForwardDiff.extract_derivative!(Tagf, b.res, b.gz) - b.res .*= obj_weight - - # constraints - map!(ForwardDiff.Dual{Tagψ}, b.zψ, x, b.d) - b.yψ .= y - b.∇l!(b.gzψ, b.gyψ, b.zψ, b.yψ) - ForwardDiff.extract_derivative!(Tagψ, b.Hv_temp, b.gzψ) - b.res .+= b.Hv_temp - - # Store in `vals` the nonzeros of each column of the Hessian computed with color `icol` - for range_vals in b.dcolors[icol] - for k in range_vals - row = b.rowval[k] + dcolor_icol = b.dcolors[icol] + if !isempty(dcolor_icol) + b.d .= (b.colors .== icol) + + # objective + map!(ForwardDiff.Dual{Tagf}, b.z, x, b.d) # x + ε * v + b.∇f!(b.gz, b.z) + ForwardDiff.extract_derivative!(Tagf, b.res, b.gz) + b.res .*= obj_weight + + # constraints + map!(ForwardDiff.Dual{Tagψ}, b.zψ, x, b.d) + b.yψ .= y + b.∇l!(b.gzψ, b.gyψ, b.zψ, b.yψ) + ForwardDiff.extract_derivative!(Tagψ, b.Hv_temp, b.gzψ) + b.res .+= b.Hv_temp + + # Store in `vals` the nonzeros of each column of the Hessian computed with color `icol` + for (row, k) in dcolor_icol vals[k] = b.res[row] end end diff --git a/src/sparsity_pattern.jl b/src/sparsity_pattern.jl index bc3cf3ef..088514a8 100644 --- a/src/sparsity_pattern.jl +++ b/src/sparsity_pattern.jl @@ -51,3 +51,83 @@ function compute_hessian_sparsity( S = ADTypes.hessian_sparsity(lagrangian, x0, detector) return S end + +""" + dcolors = nnz_colors(trilH, colors, ncolors) + +Determine the coefficients in `trilH` that will be computed by a given color. +This function leverages the symmetry of the matrix and also stores the row index for a +given coefficient in the "compressed column". + +# Arguments +- `trilH::SparseMatrixCSC`: A lower triangular sparse matrix in CSC format. +- `colors::Vector{Int}`: A vector where the i-th entry represents the color assigned to the i-th column of the matrix. +- `ncolors::Int`: The number of distinct colors used in the coloring. + +# Output +- `dcolors::Dict{Int, Vector{Tuple{Int, Int}}}`: A dictionary where the keys are the color indices (from 1 to `ncolors`), +and the values are vectors of tuples. Each tuple contains two integers: the first integer is the row index, and the +second integer is the index in `trilH.nzval` where the non-zero coefficient can be found. +""" +function nnz_colors(trilH, colors, ncolors) + # We want to determine the coefficients in `trilH` that will be computed by a given color. + # Because we exploit the symmetry, we also need to store the row index for a given coefficient + # in the "compressed column". + dcolors = Dict(i => Tuple{Int,Int}[] for i=1:ncolors) + + n = LinearAlgebra.checksquare(trilH) + for j in 1:n + for k in trilH.colptr[j]:trilH.colptr[j+1]-1 + i = trilH.rowval[k] + + # Should we use c[i] or c[j] for this coefficient H[i,j]? + ci = colors[i] + cj = colors[j] + + if i == j + # H[i,j] is a coefficient of the diagonal + push!(dcolors[ci], (j,k)) + else # i > j + if is_only_color_in_row(H, i, j, n, colors, ci) + # column i is the only column of its color c[i] with a non-zero coefficient in row j + push!(dcolors[ci], (j,k)) + else + # column j is the only column of its color c[j] with a non-zero coefficient in row i + # it is ensured by the star coloring used in `symmetric_coloring`. + push!(dcolors[cj], (i,k)) + end + end + end + end + + return dcolors +end + +""" + flag = is_only_color_in_row(H, i, j, n, colors, ci) + +This function returns `true` if the column `i` is the only column of color `ci` with a non-zero coefficient in row `j`. +It returns `false` otherwise. +""" +function is_only_color_in_row(H, i, j, n, colors, ci) + column = 1 + flag = true + while flag && column ≤ n + # We want to check that all columns (excpect the i-th one) + # with color ci doesn't have a non-zero coefficient in row j + if (column != i) && (colors[column] == ci) + k = H.colptr[column] + while (k ≤ H.colptr[column+1]-1) && flag + row = H.rowval[k] + if row == j + # We found a coefficient at row j in a column of color ci + flag = false + else # row != j + k += 1 + end + end + end + column += 1 + end + return flag +end