Skip to content

Commit

Permalink
Use a symmetric coloring for the computation of sparse Hessians
Browse files Browse the repository at this point in the history
  • Loading branch information
amontoison committed Jul 12, 2024
1 parent 093eb80 commit 5004c7e
Show file tree
Hide file tree
Showing 2 changed files with 116 additions and 46 deletions.
82 changes: 36 additions & 46 deletions src/sparse_hessian.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ struct SparseADHessian{Tag, GT, S, T} <: ADNLPModels.ADBackend
colptr::Vector{Int}
colors::Vector{Int}
ncolors::Int
dcolors::Dict{Int, Vector{UnitRange{Int}}}
dcolors::Dict{Int, Vector{Tuple{Int,Int}}}
res::S
lz::Vector{ForwardDiff.Dual{Tag, T, 1}}
glz::Vector{ForwardDiff.Dual{Tag, T, 1}}
Expand All @@ -28,8 +28,7 @@ function SparseADHessian(
T = eltype(S)
H = compute_hessian_sparsity(f, nvar, c!, ncon, detector = detector)

# TODO: use ADTypes.symmetric_coloring instead if you have the right decompression
colors = ADTypes.column_coloring(H, coloring)
colors = ADTypes.symmetric_coloring(H, coloring)
ncolors = maximum(colors)

d = BitVector(undef, nvar)
Expand All @@ -39,11 +38,7 @@ function SparseADHessian(
colptr = trilH.colptr

# The indices of the nonzero elements in `vals` that will be processed by color `c` are stored in `dcolors[c]`.
dcolors = Dict(i => UnitRange{Int}[] for i = 1:ncolors)
for (i, color) in enumerate(colors)
range_vals = colptr[i]:(colptr[i + 1] - 1)
push!(dcolors[color], range_vals)
end
dcolors = nnz_colors(trilH, colors, ncolors)

# prepare directional derivatives
res = similar(x0)
Expand Down Expand Up @@ -97,7 +92,7 @@ struct SparseReverseADHessian{T, S, Tagf, F, Tagψ, P} <: ADNLPModels.ADBackend
colptr::Vector{Int}
colors::Vector{Int}
ncolors::Int
dcolors::Dict{Int, Vector{UnitRange{Int}}}
dcolors::Dict{Int, Vector{Tuple{Int,Int}}}
res::S
z::Vector{ForwardDiff.Dual{Tagf, T, 1}}
gz::Vector{ForwardDiff.Dual{Tagf, T, 1}}
Expand All @@ -123,8 +118,7 @@ function SparseReverseADHessian(
) where {T}
H = compute_hessian_sparsity(f, nvar, c!, ncon, detector = detector)

# TODO: use ADTypes.symmetric_coloring instead if you have the right decompression
colors = ADTypes.column_coloring(H, coloring)
colors = ADTypes.symmetric_coloring(H, coloring)
ncolors = maximum(colors)

d = BitVector(undef, nvar)
Expand All @@ -134,11 +128,7 @@ function SparseReverseADHessian(
colptr = trilH.colptr

# The indices of the nonzero elements in `vals` that will be processed by color `c` are stored in `dcolors[c]`.
dcolors = Dict(i => UnitRange{Int}[] for i = 1:ncolors)
for (i, color) in enumerate(colors)
range_vals = colptr[i]:(colptr[i + 1] - 1)
push!(dcolors[color], range_vals)
end
dcolors = nnz_colors(trilH, colors, ncolors)

# prepare directional derivatives
res = similar(x0)
Expand Down Expand Up @@ -239,17 +229,17 @@ function sparse_hess_coord!(
b.longv .= 0

for icol = 1:(b.ncolors)
b.d .= (b.colors .== icol)
b.longv[(ncon + 1):(ncon + nvar)] .= b.d
map!(ForwardDiff.Dual{Tag}, b.lz, b.sol, b.longv)
b.∇φ!(b.glz, b.lz)
ForwardDiff.extract_derivative!(Tag, b.Hvp, b.glz)
b.res .= view(b.Hvp, (ncon + 1):(ncon + nvar))

# Store in `vals` the nonzeros of each column of the Hessian computed with color `icol`
for range_vals in b.dcolors[icol]
for k in range_vals
row = b.rowval[k]
dcolor_icol = b.dcolors[icol]
if !isempty(dcolor_icol)
b.d .= (b.colors .== icol)
b.longv[(ncon + 1):(ncon + nvar)] .= b.d
map!(ForwardDiff.Dual{Tag}, b.lz, b.sol, b.longv)
b.∇φ!(b.glz, b.lz)
ForwardDiff.extract_derivative!(Tag, b.Hvp, b.glz)
b.res .= view(b.Hvp, (ncon + 1):(ncon + nvar))

# Store in `vals` the nonzeros of each column of the Hessian computed with color `icol`
for (row, k) in dcolor_icol
vals[k] = b.res[row]
end
end
Expand All @@ -268,25 +258,25 @@ function sparse_hess_coord!(
nvar = length(x)

for icol = 1:(b.ncolors)
b.d .= (b.colors .== icol)

# objective
map!(ForwardDiff.Dual{Tagf}, b.z, x, b.d) # x + ε * v
b.∇f!(b.gz, b.z)
ForwardDiff.extract_derivative!(Tagf, b.res, b.gz)
b.res .*= obj_weight

# constraints
map!(ForwardDiff.Dual{Tagψ}, b.zψ, x, b.d)
b.yψ .= y
b.∇l!(b.gzψ, b.gyψ, b., b.)
ForwardDiff.extract_derivative!(Tagψ, b.Hv_temp, b.gzψ)
b.res .+= b.Hv_temp

# Store in `vals` the nonzeros of each column of the Hessian computed with color `icol`
for range_vals in b.dcolors[icol]
for k in range_vals
row = b.rowval[k]
dcolor_icol = b.dcolors[icol]
if !isempty(dcolor_icol)
b.d .= (b.colors .== icol)

# objective
map!(ForwardDiff.Dual{Tagf}, b.z, x, b.d) # x + ε * v
b.∇f!(b.gz, b.z)
ForwardDiff.extract_derivative!(Tagf, b.res, b.gz)
b.res .*= obj_weight

# constraints
map!(ForwardDiff.Dual{Tagψ}, b.zψ, x, b.d)
b.yψ .= y
b.∇l!(b.gzψ, b.gyψ, b.zψ, b.yψ)
ForwardDiff.extract_derivative!(Tagψ, b.Hv_temp, b.gzψ)
b.res .+= b.Hv_temp

# Store in `vals` the nonzeros of each column of the Hessian computed with color `icol`
for (row, k) in dcolor_icol
vals[k] = b.res[row]
end
end
Expand Down
80 changes: 80 additions & 0 deletions src/sparsity_pattern.jl
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,83 @@ function compute_hessian_sparsity(
S = ADTypes.hessian_sparsity(lagrangian, x0, detector)
return S
end

"""
dcolors = nnz_colors(trilH, colors, ncolors)
Determine the coefficients in `trilH` that will be computed by a given color.
This function leverages the symmetry of the matrix and also stores the row index for a
given coefficient in the "compressed column".
# Arguments
- `trilH::SparseMatrixCSC`: A lower triangular sparse matrix in CSC format.
- `colors::Vector{Int}`: A vector where the i-th entry represents the color assigned to the i-th column of the matrix.
- `ncolors::Int`: The number of distinct colors used in the coloring.
# Output
- `dcolors::Dict{Int, Vector{Tuple{Int, Int}}}`: A dictionary where the keys are the color indices (from 1 to `ncolors`),
and the values are vectors of tuples. Each tuple contains two integers: the first integer is the row index, and the
second integer is the index in `trilH.nzval` where the non-zero coefficient can be found.
"""
function nnz_colors(trilH, colors, ncolors)
# We want to determine the coefficients in `trilH` that will be computed by a given color.
# Because we exploit the symmetry, we also need to store the row index for a given coefficient
# in the "compressed column".
dcolors = Dict(i => Tuple{Int,Int}[] for i=1:ncolors)

n = LinearAlgebra.checksquare(trilH)
for j in 1:n
for k in trilH.colptr[j]:trilH.colptr[j+1]-1
i = trilH.rowval[k]

# Should we use c[i] or c[j] for this coefficient H[i,j]?
ci = colors[i]
cj = colors[j]

if i == j
# H[i,j] is a coefficient of the diagonal
push!(dcolors[ci], (j,k))
else # i > j
if is_only_color_in_row(H, i, j, n, colors, ci)
# column i is the only column of its color c[i] with a non-zero coefficient in row j
push!(dcolors[ci], (j,k))
else
# column j is the only column of its color c[j] with a non-zero coefficient in row i
# it is ensured by the star coloring used in `symmetric_coloring`.
push!(dcolors[cj], (i,k))
end
end
end
end

return dcolors
end

"""
flag = is_only_color_in_row(H, i, j, n, colors, ci)
This function returns `true` if the column `i` is the only column of color `ci` with a non-zero coefficient in row `j`.
It returns `false` otherwise.
"""
function is_only_color_in_row(H, i, j, n, colors, ci)
column = 1
flag = true
while flag && column n
# We want to check that all columns (excpect the i-th one)
# with color ci doesn't have a non-zero coefficient in row j
if (column != i) && (colors[column] == ci)
k = H.colptr[column]
while (k H.colptr[column+1]-1) && flag
row = H.rowval[k]
if row == j
# We found a coefficient at row j in a column of color ci
flag = false
else # row != j
k += 1
end
end
end
column += 1
end
return flag
end

0 comments on commit 5004c7e

Please sign in to comment.