From 84a66d6e944e37d3b16bd2b25e744e966ab1db74 Mon Sep 17 00:00:00 2001 From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> Date: Mon, 24 Jun 2024 09:43:01 +0200 Subject: [PATCH 01/20] =?UTF-8?q?Add=20shared=20Hessian=20tracer=20=C3=A0?= =?UTF-8?q?=20la=20Walther?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/interface.jl | 4 +- src/overloads/hessian_tracer.jl | 44 ++++++---- src/tracers.jl | 69 ++++++++++++++-- test/test_hessian.jl | 142 +++++++++++++++++++++++++------- 4 files changed, 204 insertions(+), 55 deletions(-) diff --git a/src/interface.jl b/src/interface.jl index 9e3f85de..f3e2badf 100644 --- a/src/interface.jl +++ b/src/interface.jl @@ -1,6 +1,6 @@ const DEFAULT_CONNECTIVITY_TRACER = ConnectivityTracer{BitSet} const DEFAULT_GRADIENT_TRACER = GradientTracer{BitSet} -const DEFAULT_HESSIAN_TRACER = HessianTracer{BitSet,Set{Tuple{Int,Int}}} +const DEFAULT_HESSIAN_TRACER = HessianTracer{BitSet,Set{Tuple{Int,Int}},false} #==================# # Enumerate inputs # @@ -21,7 +21,7 @@ function trace_input(::Type{T}, x::Real, i::Integer) where {T<:Union{AbstractTra end function trace_input(::Type{T}, xs::AbstractArray, i) where {T<:Union{AbstractTracer,Dual}} indices = reshape(1:length(xs), size(xs)) .+ (i - 1) - return create_tracer.(T, xs, indices) + return create_tracers(T, xs, indices) end #=========================# diff --git a/src/overloads/hessian_tracer.jl b/src/overloads/hessian_tracer.jl index c15d7ee2..64d347c0 100644 --- a/src/overloads/hessian_tracer.jl +++ b/src/overloads/hessian_tracer.jl @@ -7,24 +7,32 @@ return t else g_out, h_out = hessian_tracer_1_to_1_inner( - gradient(t), hessian(t), is_der1_zero, is_secondder_zero + gradient(t), hessian(t), is_der1_zero, is_secondder_zero; shared=isshared(T) ) return T(g_out, h_out) # return tracer end end function hessian_tracer_1_to_1_inner( - sg::G, sh::H, is_der1_zero::Bool, is_secondder_zero::Bool + sg::G, sh::H, is_der1_zero::Bool, is_secondder_zero::Bool; shared::Bool ) where {I<:Integer,G<:AbstractSet{I},H<:AbstractSet{Tuple{I,I}}} sg_out = gradient_tracer_1_to_1_inner(sg, is_der1_zero) - sh_out = if is_der1_zero && is_secondder_zero - myempty(H) - elseif !is_der1_zero && is_secondder_zero - sh - elseif is_der1_zero && !is_secondder_zero - union_product!(myempty(H), sg, sg) + if shared + sh_out = if is_secondder_zero + sh + else + union_product!(sh, sg, sg) + end else - union_product!(copy(sh), sg, sg) + sh_out = if is_der1_zero && is_secondder_zero + myempty(H) + elseif !is_der1_zero && is_secondder_zero + sh + elseif is_der1_zero && !is_secondder_zero + union_product!(myempty(H), sg, sg) + else + union_product!(copy(sh), sg, sg) + end end return sg_out, sh_out # return sets end @@ -65,7 +73,7 @@ end is_secondder_arg1_zero::Bool, is_der1_arg2_zero::Bool, is_secondder_arg2_zero::Bool, - is_der_cross_zero::Bool, + is_der_cross_zero::Bool; ) where {T<:HessianTracer} # TODO: add tests for isempty if tx.isempty && ty.isempty @@ -84,7 +92,8 @@ end is_secondder_arg1_zero, is_der1_arg2_zero, is_secondder_arg2_zero, - is_der_cross_zero, + is_der_cross_zero; + shared=isshared(T), ) return T(g_out, h_out) # return tracer end @@ -99,12 +108,17 @@ function hessian_tracer_2_to_1_inner( is_secondder_arg1_zero::Bool, is_der1_arg2_zero::Bool, is_secondder_arg2_zero::Bool, - is_der_cross_zero::Bool, + is_der_cross_zero::Bool; + shared::Bool, ) where {I<:Integer,G<:AbstractSet{I},H<:AbstractSet{Tuple{I,I}}} sg_out = gradient_tracer_2_to_1_inner(sgx, sgy, is_der1_arg1_zero, is_der1_arg2_zero) - sh_out = myempty(H) - !is_der1_arg1_zero && union!(sh_out, shx) # hessian alpha - !is_der1_arg2_zero && union!(sh_out, shy) # hessian beta + if shared + sh_out = union!(shx, shy) + else + sh_out = myempty(H) + !is_der1_arg1_zero && union!(sh_out, shx) # hessian alpha + !is_der1_arg2_zero && union!(sh_out, shy) # hessian beta + end !is_secondder_arg1_zero && union_product!(sh_out, sgx, sgx) # product alpha !is_secondder_arg2_zero && union_product!(sh_out, sgy, sgy) # product beta !is_der_cross_zero && union_product!(sh_out, sgx, sgy) # cross product 1 diff --git a/src/tracers.jl b/src/tracers.jl index 672d9a42..cdf61a98 100644 --- a/src/tracers.jl +++ b/src/tracers.jl @@ -1,5 +1,7 @@ abstract type AbstractTracer <: Real end +isshared(::Type{<:AbstractTracer}) = false + #===================# # Set operations # #===================# @@ -117,10 +119,12 @@ $(TYPEDEF) For a higher-level interface, refer to [`hessian_pattern`](@ref). +The last type parameter `shared` is a `Bool` indicating whether the Hessian should be shared among all intermediate scalar quantities. + ## Fields $(TYPEDFIELDS) """ -struct HessianTracer{G,H} <: AbstractTracer +struct HessianTracer{G,H,shared} <: AbstractTracer "Sparse representation of non-zero entries in the gradient and the Hessian." gradient::G "Sparse representation of non-zero entries in the Hessian." @@ -128,8 +132,14 @@ struct HessianTracer{G,H} <: AbstractTracer "Indicator whether gradient and Hessian in tracer both contain only zeros." isempty::Bool + function HessianTracer{G,H,shared}( + gradient::G, hessian::H, isempty::Bool=false + ) where {G,H,shared} + return new{G,H,shared}(gradient, hessian, isempty) + end + function HessianTracer{G,H}(gradient::G, hessian::H, isempty::Bool=false) where {G,H} - return new{G,H}(gradient, hessian, isempty) + return new{G,H,false}(gradient, hessian, isempty) end end @@ -141,6 +151,8 @@ gradient(t::HessianTracer) = t.gradient hessian(t::HessianTracer) = t.hessian isemptytracer(t::HessianTracer) = t.isempty +isshared(::Type{HessianTracer{G,H,shared}}) where {G,H,shared} = shared + function Base.show(io::IO, t::HessianTracer) print(io, typeof(t)) if isemptytracer(t) @@ -187,6 +199,7 @@ gradient(d::Dual{P,T}) where {P,T<:GradientTracer} = gradient(tracer(d)) gradient(d::Dual{P,T}) where {P,T<:HessianTracer} = gradient(tracer(d)) hessian(d::Dual{P,T}) where {P,T<:HessianTracer} = hessian(tracer(d)) isemptytracer(d::Dual) = isemptytracer(tracer(d)) +isshared(d::Dual{P,T}) where {P,T<:HessianTracer} = isshared(tracer(d)) Dual{P,T}(d::Dual{P,T}) where {P<:Real,T<:AbstractTracer} = d Dual(primal::P, tracer::T) where {P,T} = Dual{P,T}(primal, tracer) @@ -200,13 +213,16 @@ end #===========# myempty(::Type{ConnectivityTracer{I}}) where {I} = ConnectivityTracer{I}(myempty(I), true) -myempty(::Type{GradientTracer{G}}) where {G} = GradientTracer{G}(myempty(G), true) -myempty(::Type{HessianTracer{G,H}}) where {G,H} = HessianTracer{G,H}(myempty(G), myempty(H), true) +myempty(::Type{GradientTracer{G}}) where {G} = GradientTracer{G}(myempty(G), true) + +function myempty(::Type{HessianTracer{G,H,shared}}) where {G,H,shared} + return HessianTracer{G,H,shared}(myempty(G), myempty(H), true) +end """ - create_tracer(T, index) where {T<:AbstractTracer} + create_tracer(T, index) -Convenience constructor for [`ConnectivityTracer`](@ref), [`GradientTracer`](@ref) and [`HessianTracer`](@ref) from input indices. +Convenience constructor for [`ConnectivityTracer`](@ref), [`GradientTracer`](@ref), [`HessianTracer`](@ref) and [`Dual`](@ref) from a single input and its index """ function create_tracer(::Type{Dual{P,T}}, primal::Real, index::Integer) where {P,T} return Dual(primal, create_tracer(T, primal, index)) @@ -218,8 +234,45 @@ end function create_tracer(::Type{GradientTracer{G}}, ::Real, index::Integer) where {G} return GradientTracer{G}(seed(G, index)) end -function create_tracer(::Type{HessianTracer{G,H}}, ::Real, index::Integer) where {G,H} - return HessianTracer{G,H}(seed(G, index), myempty(H)) +function create_tracer( + ::Type{HessianTracer{G,H,shared}}, ::Real, index::Integer +) where {G,H,shared} + return HessianTracer{G,H,shared}(seed(G, index), myempty(H)) +end + +""" + create_tracers(T, xs, indices) + +Convenience constructor for [`ConnectivityTracer`](@ref), [`GradientTracer`](@ref), [`HessianTracer`](@ref) and [`Dual`](@ref) from multiple inputs and their indices. + +Allows the creation of shared tracer fields (sofar only for the Hessian). +""" +function create_tracers( + ::Type{T}, xs::AbstractArray{<:Real,N}, indices::AbstractArray{<:Integer,N} +) where {T<:Union{AbstractTracer,Dual},N} + return create_tracer.(T, xs, indices) +end + +function create_tracers( + ::Type{HessianTracer{G,H,true}}, + xs::AbstractArray{<:Real,N}, + indices::AbstractArray{<:Integer,N}, +) where {G,H,N} + sh = myempty(H) # shared + return map(indices) do index + HessianTracer{G,H,true}(seed(G, index), sh) + end +end + +function create_tracers( + ::Type{Dual{P,HessianTracer{G,H,true}}}, + xs::AbstractArray{<:Real,N}, + indices::AbstractArray{<:Integer,N}, +) where {P<:Real,G,H,N} + sh = myempty(H) # shared + return map(xs, indices) do x, index + Dual(x, HessianTracer{G,H,true}(seed(G, index), sh)) + end end # Pretty-printing of Dual tracers diff --git a/test/test_hessian.jl b/test/test_hessian.jl index 6075e38d..0c1fb25f 100644 --- a/test/test_hessian.jl +++ b/test/test_hessian.jl @@ -1,15 +1,17 @@ using SparseConnectivityTracer -using SparseConnectivityTracer: Dual, HessianTracer, MissingPrimalError, trace_input, empty +using SparseConnectivityTracer: + Dual, HessianTracer, MissingPrimalError, trace_input, empty, isshared using SparseConnectivityTracer: DuplicateVector, RecursiveSet, SortedVector using ADTypes: hessian_sparsity using SpecialFunctions: erf, beta using Test HESSIAN_TRACERS = ( - HessianTracer{BitSet,Set{Tuple{Int,Int}}}, - HessianTracer{Set{Int},Set{Tuple{Int,Int}}}, - HessianTracer{DuplicateVector{Int},DuplicateVector{Tuple{Int,Int}}}, - HessianTracer{SortedVector{Int},SortedVector{Tuple{Int,Int}}}, + HessianTracer{BitSet,Set{Tuple{Int,Int}},false}, + HessianTracer{BitSet,Set{Tuple{Int,Int}},true}, + HessianTracer{Set{Int},Set{Tuple{Int,Int}},false}, + HessianTracer{DuplicateVector{Int},DuplicateVector{Tuple{Int,Int}},false}, + HessianTracer{SortedVector{Int},SortedVector{Tuple{Int,Int}},false}, # TODO: test on RecursiveSet ) @@ -99,20 +101,38 @@ HESSIAN_TRACERS = ( ] h = hessian_sparsity(x -> copysign(x[1] * x[2], x[3] * x[4]), rand(4), method) - @test h == [ - 0 1 0 0 - 1 0 0 0 - 0 0 0 0 - 0 0 0 0 - ] + if isshared(T) + @test h == [ + 0 1 0 0 + 1 0 0 0 + 0 0 0 1 + 0 0 1 0 + ] + else + @test h == [ + 0 1 0 0 + 1 0 0 0 + 0 0 0 0 + 0 0 0 0 + ] + end h = hessian_sparsity(x -> div(x[1] * x[2], x[3] * x[4]), rand(4), method) - @test h == [ - 0 0 0 0 - 0 0 0 0 - 0 0 0 0 - 0 0 0 0 - ] + if isshared(T) + @test Matrix(h) == [ + 0 1 0 0 + 1 0 0 0 + 0 0 0 1 + 0 0 1 0 + ] + else + @test h == [ + 0 0 0 0 + 0 0 0 0 + 0 0 0 0 + 0 0 0 0 + ] + end h = hessian_sparsity(x -> sum(sincosd(x)), 1.0, method) @test h ≈ [1;;] @@ -146,8 +166,30 @@ HESSIAN_TRACERS = ( 0 1 0 0 1 ] + # Shared Hessian + function dead_end(x) + z = x[1] * x[2] + return x[3] * x[4] + end + h = hessian_sparsity(dead_end, rand(4), method) + if isshared(T) + @test h == [ + 0 1 0 0 + 1 0 0 0 + 0 0 0 1 + 0 0 1 0 + ] + else + @test h == [ + 0 0 0 0 + 0 0 0 0 + 0 0 0 1 + 0 0 1 0 + ] + end + # Missing primal errors - @testset "MissingPrimalError on $f" for f in ( + for f in ( iseven, isfinite, isinf, @@ -241,19 +283,59 @@ end f2(x) = ifelse(x[2] < x[3], x[1] * x[2], x[3] * x[4]) h = hessian_sparsity(f2, [1 2 3 4], method) - @test h == [ - 0 1 0 0 - 1 0 0 0 - 0 0 0 0 - 0 0 0 0 - ] + if isshared(T) + @test h == [ + 0 1 0 0 + 1 0 0 0 + 0 0 0 1 + 0 0 1 0 + ] + else + @test h == [ + 0 1 0 0 + 1 0 0 0 + 0 0 0 0 + 0 0 0 0 + ] + end h = hessian_sparsity(f2, [1 3 2 4], method) - @test h == [ - 0 0 0 0 - 0 0 0 0 - 0 0 0 1 - 0 0 1 0 - ] + if isshared(T) + @test h == [ + 0 1 0 0 + 1 0 0 0 + 0 0 0 1 + 0 0 1 0 + ] + else + @test h == [ + 0 0 0 0 + 0 0 0 0 + 0 0 0 1 + 0 0 1 0 + ] + end + + # Shared Hessian + function dead_end(x) + z = x[1] * x[2] + return x[3] * x[4] + end + h = hessian_sparsity(dead_end, rand(4), method) + if isshared(T) + @test h == [ + 0 1 0 0 + 1 0 0 0 + 0 0 0 1 + 0 0 1 0 + ] + else + @test h == [ + 0 0 0 0 + 0 0 0 0 + 0 0 0 1 + 0 0 1 0 + ] + end # Code coverage @test hessian_sparsity(typemax, 1, method) ≈ [0;;] From 41db8f2d5adf477278a057624bdbc3d92ba22b79 Mon Sep 17 00:00:00 2001 From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> Date: Mon, 24 Jun 2024 09:51:24 +0200 Subject: [PATCH 02/20] Fix constructors --- test/test_constructors.jl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/test/test_constructors.jl b/test/test_constructors.jl index cb6f479c..10cf69b9 100644 --- a/test/test_constructors.jl +++ b/test/test_constructors.jl @@ -21,10 +21,11 @@ GRADIENT_TRACERS = ( ) HESSIAN_TRACERS = ( - HessianTracer{BitSet,Set{Tuple{Int,Int}}}, - HessianTracer{Set{Int},Set{Tuple{Int,Int}}}, - HessianTracer{DuplicateVector{Int},DuplicateVector{Tuple{Int,Int}}}, - HessianTracer{SortedVector{Int},SortedVector{Tuple{Int,Int}}}, + HessianTracer{BitSet,Set{Tuple{Int,Int}},false}, + HessianTracer{BitSet,Set{Tuple{Int,Int}},true}, + HessianTracer{Set{Int},Set{Tuple{Int,Int}},false}, + HessianTracer{DuplicateVector{Int},DuplicateVector{Tuple{Int,Int}},false}, + HessianTracer{SortedVector{Int},SortedVector{Tuple{Int,Int}},false}, # TODO: test on RecursiveSet ) From 2d1195159498d6c9785e84dbba06a62f19a03e23 Mon Sep 17 00:00:00 2001 From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> Date: Mon, 24 Jun 2024 09:57:55 +0200 Subject: [PATCH 03/20] Fixes --- src/tracers.jl | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/tracers.jl b/src/tracers.jl index cdf61a98..6ce9cb52 100644 --- a/src/tracers.jl +++ b/src/tracers.jl @@ -137,14 +137,10 @@ struct HessianTracer{G,H,shared} <: AbstractTracer ) where {G,H,shared} return new{G,H,shared}(gradient, hessian, isempty) end - - function HessianTracer{G,H}(gradient::G, hessian::H, isempty::Bool=false) where {G,H} - return new{G,H,false}(gradient, hessian, isempty) - end end -HessianTracer{G,H}(::Real) where {G,H} = myempty(HessianTracer{G,H}) -HessianTracer{G,H}(t::HessianTracer{G,H}) where {G,H} = t +HessianTracer{G,H,shared}(::Real) where {G,H,shared} = myempty(HessianTracer{G,H,shared}) +HessianTracer{G,H,shared}(t::HessianTracer{G,H,shared}) where {G,H,shared} = t HessianTracer(t::HessianTracer) = t gradient(t::HessianTracer) = t.gradient From c7a34136e760e11207fb1088f583030a5cdc1fd6 Mon Sep 17 00:00:00 2001 From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> Date: Mon, 24 Jun 2024 09:59:57 +0200 Subject: [PATCH 04/20] Moar fixes --- src/tracers.jl | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/tracers.jl b/src/tracers.jl index 6ce9cb52..c8fdf590 100644 --- a/src/tracers.jl +++ b/src/tracers.jl @@ -1,7 +1,5 @@ abstract type AbstractTracer <: Real end -isshared(::Type{<:AbstractTracer}) = false - #===================# # Set operations # #===================# @@ -195,7 +193,6 @@ gradient(d::Dual{P,T}) where {P,T<:GradientTracer} = gradient(tracer(d)) gradient(d::Dual{P,T}) where {P,T<:HessianTracer} = gradient(tracer(d)) hessian(d::Dual{P,T}) where {P,T<:HessianTracer} = hessian(tracer(d)) isemptytracer(d::Dual) = isemptytracer(tracer(d)) -isshared(d::Dual{P,T}) where {P,T<:HessianTracer} = isshared(tracer(d)) Dual{P,T}(d::Dual{P,T}) where {P<:Real,T<:AbstractTracer} = d Dual(primal::P, tracer::T) where {P,T} = Dual{P,T}(primal, tracer) From 3a1df201c6864d50b73359473db058df9b8271d2 Mon Sep 17 00:00:00 2001 From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> Date: Mon, 24 Jun 2024 10:08:27 +0200 Subject: [PATCH 05/20] Typo --- src/overloads/hessian_tracer.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/overloads/hessian_tracer.jl b/src/overloads/hessian_tracer.jl index 64d347c0..246ea0e7 100644 --- a/src/overloads/hessian_tracer.jl +++ b/src/overloads/hessian_tracer.jl @@ -73,7 +73,7 @@ end is_secondder_arg1_zero::Bool, is_der1_arg2_zero::Bool, is_secondder_arg2_zero::Bool, - is_der_cross_zero::Bool; + is_der_cross_zero::Bool, ) where {T<:HessianTracer} # TODO: add tests for isempty if tx.isempty && ty.isempty From ff7c2b8f9b90e2ee117f46f81d142f7132224c1d Mon Sep 17 00:00:00 2001 From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> Date: Mon, 24 Jun 2024 12:40:31 +0200 Subject: [PATCH 06/20] Overload ifelse --- src/overloads/ifelse_global.jl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/overloads/ifelse_global.jl b/src/overloads/ifelse_global.jl index f9ad5836..018a18a0 100644 --- a/src/overloads/ifelse_global.jl +++ b/src/overloads/ifelse_global.jl @@ -15,9 +15,13 @@ function output_union(tx::T, ty::T) where {T<:GradientTracer} return T(union(gradient(tx), gradient(ty))) end - function output_union(tx::T, ty::T) where {T<:HessianTracer} + function output_union(tx::T, ty::T) where {G,H,shared,T<:HessianTracer{G,H,shared}} g_out = union(gradient(tx), gradient(ty)) - h_out = union(hessian(tx), hessian(ty)) + h_out = if shared + union!(hessian(tx), hessian(ty)) + else + union(hessian(tx), hessian(ty)) + end return T(g_out, h_out) end From f5b94d37cb22b061fbce1734fe8eb0e616450cb9 Mon Sep 17 00:00:00 2001 From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> Date: Mon, 24 Jun 2024 14:34:01 +0200 Subject: [PATCH 07/20] Add identical objects test --- benchmark/benchmarks.jl | 6 +++++- test/test_hessian.jl | 24 +++++++++++++++++++++++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl index e6b36ad6..97541c92 100644 --- a/benchmark/benchmarks.jl +++ b/benchmark/benchmarks.jl @@ -17,13 +17,17 @@ for S1 in SET_TYPES S2 = Set{Tuple{Int,Int}} G = GradientTracer{S1} - H = HessianTracer{S1,S2} + H = HessianTracer{S1,S2,false} + H_shared = HessianTracer{S1,S2,true} SUITE["Jacobian"]["Global"][nameof(S1)] = jacbench(TracerSparsityDetector(G, H)) SUITE["Jacobian"]["Local"][nameof(S1)] = jacbench(TracerLocalSparsityDetector(G, H)) SUITE["Hessian"]["Global"][(nameof(S1), nameof(S2))] = hessbench( TracerSparsityDetector(G, H) ) + SUITE["Hessian"]["Global (shared)"][(nameof(S1), nameof(S2))] = hessbench( + TracerSparsityDetector(G, H_shared) + ) SUITE["Hessian"]["Local"][(nameof(S1), nameof(S2))] = hessbench( TracerLocalSparsityDetector(G, H) ) diff --git a/test/test_hessian.jl b/test/test_hessian.jl index 0c1fb25f..dd961a6c 100644 --- a/test/test_hessian.jl +++ b/test/test_hessian.jl @@ -1,6 +1,6 @@ using SparseConnectivityTracer using SparseConnectivityTracer: - Dual, HessianTracer, MissingPrimalError, trace_input, empty, isshared + Dual, HessianTracer, MissingPrimalError, create_tracers, trace_input, empty, isshared using SparseConnectivityTracer: DuplicateVector, RecursiveSet, SortedVector using ADTypes: hessian_sparsity using SpecialFunctions: erf, beta @@ -352,3 +352,25 @@ end @test hessian_sparsity(x -> ℯ^zero(x), 1, method) ≈ [0;;] end end + +@testset "Shared HessianTracer - same objects" begin + H = HessianTracer{BitSet,Set{Tuple{Int,Int}},true} + + function multi_output_for_shared_test(x::AbstractArray) + z = ones(eltype(x), size(x)) + y1 = x[1]^2 * z[1] + y2 = z[2] * x[2]^2 + y3 = x[1] * x[2] + y4 = z[1] * z[2] # entirely new tracer + y = [y1, y2, y3, y4] + return y + end + + x = rand(2) + xt = create_tracers(H, x, eachindex(x)) + yt = multi_output_for_shared_test(xt) + + @test yt[1].hessian === yt[2].hessian + @test yt[1].hessian === yt[3].hessian + @test_broken yt[1].hessian === yt[4].hessian +end From 828a65614568334365784eb1306029eb04bac47f Mon Sep 17 00:00:00 2001 From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> Date: Mon, 24 Jun 2024 16:51:50 +0200 Subject: [PATCH 08/20] Add warning --- src/tracers.jl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/tracers.jl b/src/tracers.jl index c8fdf590..536fe5b7 100644 --- a/src/tracers.jl +++ b/src/tracers.jl @@ -117,10 +117,13 @@ $(TYPEDEF) For a higher-level interface, refer to [`hessian_pattern`](@ref). -The last type parameter `shared` is a `Bool` indicating whether the Hessian should be shared among all intermediate scalar quantities. - ## Fields $(TYPEDFIELDS) + +## Internals + +The last type parameter `shared` is a `Bool` indicating whether the `hessian` field of this object should be shared among all intermediate scalar quantities involved in a function. +It is not yet part of the public API, and users should always set it to `false`. """ struct HessianTracer{G,H,shared} <: AbstractTracer "Sparse representation of non-zero entries in the gradient and the Hessian." From 3748c47974e082ad061c1697ad20cb24c61e74cf Mon Sep 17 00:00:00 2001 From: adrhill Date: Thu, 27 Jun 2024 20:17:33 +0200 Subject: [PATCH 09/20] Fixes for patterns introduced in #139 --- src/interface.jl | 17 ++++---- src/overloads/hessian_tracer.jl | 72 +++++++++++++++++++++------------ src/overloads/ifelse_global.jl | 17 +++++--- src/patterns.jl | 68 ++++++++++++++++++++----------- src/tracers.jl | 70 ++++++-------------------------- test/brusselator.jl | 2 +- test/flux.jl | 2 +- test/test_constructors.jl | 2 +- test/test_gradient.jl | 8 ++-- test/test_hessian.jl | 21 +++++----- test/tracers_definitions.jl | 13 +++--- 11 files changed, 150 insertions(+), 142 deletions(-) diff --git a/src/interface.jl b/src/interface.jl index bbd4df71..325e0b86 100644 --- a/src/interface.jl +++ b/src/interface.jl @@ -1,6 +1,6 @@ const DEFAULT_GRADIENT_TRACER = GradientTracer{IndexSetGradientPattern{Int,BitSet}} const DEFAULT_HESSIAN_TRACER = HessianTracer{ - IndexSetHessianPattern{Int,BitSet,Set{Tuple{Int,Int}}} + IndexSetHessianPattern{Int,BitSet,Set{Tuple{Int,Int}},false} } #==================# @@ -9,20 +9,19 @@ const DEFAULT_HESSIAN_TRACER = HessianTracer{ """ trace_input(T, x) - trace_input(T, x) - + trace_input(T, xs) Enumerates input indices and constructs the specified type `T` of tracer. Supports [`GradientTracer`](@ref), [`HessianTracer`](@ref) and [`Dual`](@ref). """ -trace_input(::Type{T}, x) where {T<:Union{AbstractTracer,Dual}} = trace_input(T, x, 1) +trace_input(::Type{T}, xs) where {T<:Union{AbstractTracer,Dual}} = trace_input(T, xs, 1) -function trace_input(::Type{T}, x::Real, i::Integer) where {T<:Union{AbstractTracer,Dual}} - return create_tracer(T, x, i) -end function trace_input(::Type{T}, xs::AbstractArray, i) where {T<:Union{AbstractTracer,Dual}} - indices = reshape(1:length(xs), size(xs)) .+ (i - 1) - return create_tracers(T, xs, indices) + is = reshape(1:length(xs), size(xs)) .+ (i - 1) + return create_tracers(T, xs, is) +end +function trace_input(::Type{T}, x::Real, i::Integer) where {T<:Union{AbstractTracer,Dual}} + return only(create_tracers(T, [x], [i])) end #=========================# diff --git a/src/overloads/hessian_tracer.jl b/src/overloads/hessian_tracer.jl index 054cb432..83dcf3a8 100644 --- a/src/overloads/hessian_tracer.jl +++ b/src/overloads/hessian_tracer.jl @@ -12,27 +12,33 @@ end function hessian_tracer_1_to_1_inner( - p::P, is_der1_zero::Bool, is_der2_zero::Bool; shared::Bool -) where {I,SG,SH,P<:IndexSetHessianPattern{I,SG,SH}} + p::P, is_der1_zero::Bool, is_der2_zero::Bool +) where {I,SG,SH,P<:IndexSetHessianPattern{I,SG,SH,false}} sg = gradient(p) sh = hessian(p) sg_out = gradient_tracer_1_to_1_inner(sg, is_der1_zero) - if shared - sh_out = if is_secondder_zero - sh - else - union_product!(sh, sg, sg) - end + sh_out = if is_der1_zero && is_der2_zero + myempty(SH) + elseif !is_der1_zero && is_der2_zero + sh + elseif is_der1_zero && is_der2_zero + union_product!(myempty(SH), sg, sg) else - sh_out = if is_der1_zero && is_secondder_zero - myempty(H) - elseif !is_der1_zero && is_secondder_zero - sh - elseif is_der1_zero && !is_secondder_zero - union_product!(myempty(H), sg, sg) - else - union_product!(copy(sh), sg, sg) - end + union_product!(copy(sh), sg, sg) + end + return P(sg_out, sh_out) # return pattern +end + +function hessian_tracer_1_to_1_inner( + p::P, is_der1_zero::Bool, is_der2_zero::Bool +) where {I,SG,SH,P<:IndexSetHessianPattern{I,SG,SH,true}} + sg = gradient(p) + sh = hessian(p) + sg_out = gradient_tracer_1_to_1_inner(sg, is_der1_zero) + sh_out = if is_der2_zero + sh + else + union_product!(sh, sg, sg) end return P(sg_out, sh_out) # return pattern end @@ -104,17 +110,33 @@ function hessian_tracer_2_to_1_inner( is_der1_arg2_zero::Bool, is_der2_arg2_zero::Bool, is_der_cross_zero::Bool, -) where {I,SG,SH,P<:IndexSetHessianPattern{I,SG,SH}} +) where {I,SG,SH,P<:IndexSetHessianPattern{I,SG,SH,false}} sgx, shx = gradient(px), hessian(px) sgy, shy = gradient(py), hessian(py) sg_out = gradient_tracer_2_to_1_inner(sgx, sgy, is_der1_arg1_zero, is_der1_arg2_zero) - if shared - sh_out = union!(shx, shy) - else - sh_out = myempty(SH) - !is_der1_arg1_zero && union!(sh_out, shx) # hessian alpha - !is_der1_arg2_zero && union!(sh_out, shy) # hessian beta - end + sh_out = myempty(SH) + !is_der1_arg1_zero && union!(sh_out, shx) # hessian alpha + !is_der1_arg2_zero && union!(sh_out, shy) # hessian beta + !is_der2_arg1_zero && union_product!(sh_out, sgx, sgx) # product alpha + !is_der2_arg2_zero && union_product!(sh_out, sgy, sgy) # product beta + !is_der_cross_zero && union_product!(sh_out, sgx, sgy) # cross product 1 + !is_der_cross_zero && union_product!(sh_out, sgy, sgx) # cross product 2 + return P(sg_out, sh_out) # return pattern +end + +function hessian_tracer_2_to_1_inner( + px::P, + py::P, + is_der1_arg1_zero::Bool, + is_der2_arg1_zero::Bool, + is_der1_arg2_zero::Bool, + is_der2_arg2_zero::Bool, + is_der_cross_zero::Bool, +) where {I,SG,SH,P<:IndexSetHessianPattern{I,SG,SH,true}} + sgx, shx = gradient(px), hessian(px) + sgy, shy = gradient(py), hessian(py) + sg_out = gradient_tracer_2_to_1_inner(sgx, sgy, is_der1_arg1_zero, is_der1_arg2_zero) + sh_out = union!(shx, shy) !is_der2_arg1_zero && union_product!(sh_out, sgx, sgx) # product alpha !is_der2_arg2_zero && union_product!(sh_out, sgy, sgy) # product beta !is_der_cross_zero && union_product!(sh_out, sgx, sgy) # cross product 1 diff --git a/src/overloads/ifelse_global.jl b/src/overloads/ifelse_global.jl index 0164a0f2..e330c319 100644 --- a/src/overloads/ifelse_global.jl +++ b/src/overloads/ifelse_global.jl @@ -15,13 +15,18 @@ function output_union(px::P, py::P) where {P<:IndexSetGradientPattern} return P(union(set(px), set(py))) # return pattern end - function output_union(px::P, py::P) where {G,H,shared,P<:IndexSetHessianPattern{G,H,shared}} + function output_union( + px::P, py::P + ) where {I,SG,SH,P<:IndexSetHessianPattern{I,SG,SH,false}} # non-mutating + g_out = union(gradient(px), gradient(py)) + h_out = union(hessian(px), hessian(py)) + return P(g_out, h_out) # return pattern + end + function output_union( + px::P, py::P + ) where {I,SG,SH,P<:IndexSetHessianPattern{I,SG,SH,true}} # mutating g_out = union(gradient(px), gradient(py)) - h_out = if shared - union!(hessian(tx), hessian(ty)) - else - union(hessian(px), hessian(py)) - end + h_out = union!(hessian(px), hessian(py)) return P(g_out, h_out) # return pattern end diff --git a/src/patterns.jl b/src/patterns.jl index a3d9b479..afa7b38b 100644 --- a/src/patterns.jl +++ b/src/patterns.jl @@ -9,11 +9,20 @@ AbstractPattern ├── AbstractGradientPattern: used in GradientTracer │ └── IndexSetGradientPattern └── AbstractHessianPattern: used in HessianTracer - └── IndexSetHessianPattern + ├── IndexSetHessianPattern + └── SharedIndexSetHessianPattern ``` """ abstract type AbstractPattern end +""" + isshared(pattern) + +Indicates whether patterns share memory (mutate). +""" +isshared(::P) where {P<:AbstractPattern} = isshared(P) +isshared(::Type{P}) where {P<:AbstractPattern} = false + """ myempty(T) myempty(tracer) @@ -25,13 +34,11 @@ Constructor for an empty tracer or pattern of type `T` representing a new number myempty """ - seed(T, i) - seed(tracer, i) - seed(pattern, i) + create_patterns(P, xs, is) -Constructor for a tracer or pattern of type `T` that only contains the given index `i`. +Convenience constructor for patterns of type `P` for multiple inputs `xs` and their indices `is`. """ -seed +create_patterns #==========================# # Utilities on AbstractSet # @@ -69,18 +76,17 @@ For use with [`GradientTracer`](@ref). ## Expected interface -* `myempty(::Type{MyPattern})`: return a pattern representing a new number (usually an empty pattern) -* `seed(::Type{MyPattern}, i::Integer)`: return an pattern that only contains the given index `i` -* `gradient(p::MyPattern)`: return non-zero indices `i` for use with `GradientTracer` - -Note that besides their names, the last two functions are usually identical. +* [`myempty`](@ref) +* [`create_patterns`](@ref) +* `gradient(p::MyPattern)`: return non-zero indices `i` in the gradient representation +* [`isshared`](@ref) in case the pattern is shared (mutates). Defaults to false. """ abstract type AbstractGradientPattern <: AbstractPattern end """ $(TYPEDEF) -Vector sparsity pattern represented by an `AbstractSet` of indices ``{i}`` of non-zero values. +Gradient sparsity pattern represented by an `AbstractSet` of indices ``{i}`` of non-zero values. ## Fields $(TYPEDFIELDS) @@ -97,8 +103,9 @@ Base.show(io::IO, p::IndexSetGradientPattern) = Base.show(io, set(p)) function myempty(::Type{IndexSetGradientPattern{I,S}}) where {I,S} return IndexSetGradientPattern{I,S}(myempty(S)) end -function seed(::Type{IndexSetGradientPattern{I,S}}, i) where {I,S} - return IndexSetGradientPattern{I,S}(seed(S, i)) +function create_patterns(::Type{P}, xs, is) where {I,S,P<:IndexSetGradientPattern{I,S}} + sets = seed.(S, is) + return P.(sets) end # Tracer compatibility @@ -118,29 +125,42 @@ For use with [`HessianTracer`](@ref). ## Expected interface -* `myempty(::Type{MyPattern})`: return a pattern representing a new number (usually an empty pattern) -* `seed(::Type{MyPattern}, i::Integer)`: return an pattern that only contains the given index `i` in the first-order representation +* [`myempty`](@ref) +* [`create_patterns`](@ref) * `gradient(p::MyPattern)`: return non-zero indices `i` in the first-order representation * `hessian(p::MyPattern)`: return non-zero indices `(i, j)` in the second-order representation +* [`isshared`](@ref) in case the pattern is shared (mutates). Defaults to false. """ abstract type AbstractHessianPattern <: AbstractPattern end """ - IndexSetHessianPattern(vector::AbstractGradientPattern, mat::AbstractMatrixPattern) +$(TYPEDEF) + +Hessian sparsity pattern represented by: +* an `AbstractSet` of indices ``i`` of non-zero values representing first-order sparsity +* an `AbstractSet` of index tuples ``(i,j)`` of non-zero values representing second-order sparsity -Gradient and Hessian sparsity patterns constructed by combining two AbstractSets. +## Fields + +$(TYPEDFIELDS) """ -struct IndexSetHessianPattern{I<:Integer,SG<:AbstractSet{I},SH<:AbstractSet{Tuple{I,I}}} <: - AbstractHessianPattern +struct IndexSetHessianPattern{ + I<:Integer,SG<:AbstractSet{I},SH<:AbstractSet{Tuple{I,I}},mutating +} <: AbstractHessianPattern gradient::SG hessian::SH end +isshared(::Type{IndexSetHessianPattern{I,SG,SH,true}}) where {I,SG,SH} = true -function myempty(::Type{IndexSetHessianPattern{I,SG,SH}}) where {I,SG,SH} - return IndexSetHessianPattern{I,SG,SH}(myempty(SG), myempty(SH)) +function myempty(::Type{P}) where {I,SG,SH,M,P<:IndexSetHessianPattern{I,SG,SH,M}} + return P(myempty(SG), myempty(SH)) end -function seed(::Type{IndexSetHessianPattern{I,SG,SH}}, index) where {I,SG,SH} - return IndexSetHessianPattern{I,SG,SH}(seed(SG, index), myempty(SH)) +function create_patterns( + ::Type{P}, xs, is +) where {I,SG,SH,M,P<:IndexSetHessianPattern{I,SG,SH,M}} + gradients = seed.(SG, is) + hessian = myempty(SH) + return P.(gradients, Ref(hessian)) end # Tracer compatibility diff --git a/src/tracers.jl b/src/tracers.jl index e74aa44c..0c969672 100644 --- a/src/tracers.jl +++ b/src/tracers.jl @@ -136,76 +136,32 @@ end # Utilities # #===========# -myempty(::T) where {T<:AbstractTracer} = myempty(T) +# isshared(::Type{T}) where {P,T<:GradientTracer{P}} = isshared(P) # no shared AbstractGradientPattern yet +isshared(::Type{T}) where {P,T<:HessianTracer{P}} = isshared(P) -# myempty(::Type{T}) where {P,T<:AbstractTracer{P}} = T(myempty(P), true) # JET complains about this +myempty(::T) where {T<:AbstractTracer} = myempty(T) +# myempty(::Type{T}) where {P,T<:AbstractTracer{P}} = T(myempty(P), true) # JET complains about this myempty(::Type{T}) where {P,T<:GradientTracer{P}} = T(myempty(P), true) myempty(::Type{T}) where {P,T<:HessianTracer{P}} = T(myempty(P), true) -seed(::T, i) where {T<:AbstractTracer} = seed(T, i) - -# seed(::Type{T}, i) where {P,T<:AbstractTracer{P}} = T(seed(P, i)) # JET complains about this -seed(::Type{T}, i) where {P,T<:GradientTracer{P}} = T(seed(P, i)) -seed(::Type{T}, i) where {P,T<:HessianTracer{P}} = T(seed(P, i)) - -""" - create_tracer(T, index) - -Convenience constructor for [`GradientTracer`](@ref) and [`HessianTracer`](@ref) from input indices. -""" -function create_tracer(::Type{T}, ::Real, index::Integer) where {P,T<:AbstractTracer{P}} - return T(seed(P, index)) -end - -function create_tracer(::Type{Dual{P,T}}, primal::Real, index::Integer) where {P,T} - return Dual(primal, create_tracer(T, primal, index)) -end - -function create_tracer(::Type{ConnectivityTracer{I}}, ::Real, index::Integer) where {I} - return ConnectivityTracer{I}(seed(I, index)) -end -function create_tracer(::Type{GradientTracer{G}}, ::Real, index::Integer) where {G} - return GradientTracer{G}(seed(G, index)) -end -function create_tracer( - ::Type{HessianTracer{G,H,shared}}, ::Real, index::Integer -) where {G,H,shared} - return HessianTracer{G,H,shared}(seed(G, index), myempty(H)) -end - """ create_tracers(T, xs, indices) -Convenience constructor for [`ConnectivityTracer`](@ref), [`GradientTracer`](@ref), [`HessianTracer`](@ref) and [`Dual`](@ref) from multiple inputs and their indices. - -Allows the creation of shared tracer fields (sofar only for the Hessian). +Convenience constructor for [`GradientTracer`](@ref), [`HessianTracer`](@ref) and [`Dual`](@ref) +from multiple inputs `xs` and their indices `is`. """ function create_tracers( ::Type{T}, xs::AbstractArray{<:Real,N}, indices::AbstractArray{<:Integer,N} -) where {T<:Union{AbstractTracer,Dual},N} - return create_tracer.(T, xs, indices) +) where {P<:AbstractPattern,T<:AbstractTracer{P},N} + patterns = create_patterns(P, xs, indices) + return T.(patterns) end function create_tracers( - ::Type{HessianTracer{G,H,true}}, - xs::AbstractArray{<:Real,N}, - indices::AbstractArray{<:Integer,N}, -) where {G,H,N} - sh = myempty(H) # shared - return map(indices) do index - HessianTracer{G,H,true}(seed(G, index), sh) - end -end - -function create_tracers( - ::Type{Dual{P,HessianTracer{G,H,true}}}, - xs::AbstractArray{<:Real,N}, - indices::AbstractArray{<:Integer,N}, -) where {P<:Real,G,H,N} - sh = myempty(H) # shared - return map(xs, indices) do x, index - Dual(x, HessianTracer{G,H,true}(seed(G, index), sh)) - end + ::Type{D}, xs::AbstractArray{<:Real,N}, indices::AbstractArray{<:Integer,N} +) where {P,T,D<:Dual{P,T},N} + tracers = create_tracers(T, xs, indices) + return D.(xs, tracers) end # Pretty-printing of Dual tracers diff --git a/test/brusselator.jl b/test/brusselator.jl index c2cae04d..51e897f1 100644 --- a/test/brusselator.jl +++ b/test/brusselator.jl @@ -6,7 +6,7 @@ using SparseConnectivityTracer: DuplicateVector, RecursiveSet, SortedVector using SparseConnectivityTracerBenchmarks.ODE: Brusselator! using Test -# Load definitions of GRADIENT_TRACERS and HESSIAN_TRACERS +# Load definitions of GRADIENT_TRACERS, GRADIENT_PATTERNS, HESSIAN_TRACERS and HESSIAN_PATTERNS include("tracers_definitions.jl") function test_brusselator(method::AbstractSparsityDetector) diff --git a/test/flux.jl b/test/flux.jl index d28f7bde..d6db1b3d 100644 --- a/test/flux.jl +++ b/test/flux.jl @@ -6,7 +6,7 @@ using SparseConnectivityTracer using SparseConnectivityTracer: DuplicateVector, RecursiveSet, SortedVector using Test -# Load definitions of GRADIENT_TRACERS and HESSIAN_TRACERS +# Load definitions of GRADIENT_TRACERS, GRADIENT_PATTERNS, HESSIAN_TRACERS and HESSIAN_PATTERNS include("tracers_definitions.jl") const INPUT_FLUX = reshape( diff --git a/test/test_constructors.jl b/test/test_constructors.jl index 1bf9ed31..cc8b906c 100644 --- a/test/test_constructors.jl +++ b/test/test_constructors.jl @@ -4,7 +4,7 @@ using SparseConnectivityTracer: primal, tracer, isemptytracer using SparseConnectivityTracer: myempty, name using Test -# Load definitions of GRADIENT_TRACERS and HESSIAN_TRACERS +# Load definitions of GRADIENT_TRACERS, GRADIENT_PATTERNS, HESSIAN_TRACERS and HESSIAN_PATTERNS include("tracers_definitions.jl") function test_nested_duals(::Type{T}) where {T<:AbstractTracer} diff --git a/test/test_gradient.jl b/test/test_gradient.jl index 795e8518..ee6dcf49 100644 --- a/test/test_gradient.jl +++ b/test/test_gradient.jl @@ -6,7 +6,7 @@ using SpecialFunctions: erf, beta using NNlib: NNlib using Test -# Load definitions of GRADIENT_TRACERS and HESSIAN_TRACERS +# Load definitions of GRADIENT_TRACERS, GRADIENT_PATTERNS, HESSIAN_TRACERS and HESSIAN_PATTERNS include("tracers_definitions.jl") NNLIB_ACTIVATIONS_S = ( @@ -39,7 +39,8 @@ NNLIB_ACTIVATIONS_F = ( NNLIB_ACTIVATIONS = union(NNLIB_ACTIVATIONS_S, NNLIB_ACTIVATIONS_F) @testset "Jacobian Global" begin - @testset "$T" for T in GRADIENT_TRACERS + @testset "$P" for P in GRADIENT_PATTERNS + T = GradientTracer{P} method = TracerSparsityDetector(; gradient_tracer_type=T) f(x) = [x[1]^2, 2 * x[1] * x[2]^2, sin(x[3])] @@ -130,7 +131,8 @@ NNLIB_ACTIVATIONS = union(NNLIB_ACTIVATIONS_S, NNLIB_ACTIVATIONS_F) end @testset "Jacobian Local" begin - @testset "$T" for T in GRADIENT_TRACERS + @testset "$P" for P in GRADIENT_PATTERNS + T = GradientTracer{P} method = TracerLocalSparsityDetector(; gradient_tracer_type=T) # Multiplication diff --git a/test/test_hessian.jl b/test/test_hessian.jl index 9fcbed43..af0da267 100644 --- a/test/test_hessian.jl +++ b/test/test_hessian.jl @@ -1,14 +1,16 @@ using SparseConnectivityTracer -using SparseConnectivityTracer: Dual, HessianTracer, MissingPrimalError, trace_input, empty +using SparseConnectivityTracer: Dual, HessianTracer, MissingPrimalError +using SparseConnectivityTracer: trace_input, create_tracers, pattern, isshared using ADTypes: hessian_sparsity using SpecialFunctions: erf, beta using Test -# Load definitions of GRADIENT_TRACERS and HESSIAN_TRACERS +# Load definitions of GRADIENT_TRACERS, GRADIENT_PATTERNS, HESSIAN_TRACERS and HESSIAN_PATTERNS include("tracers_definitions.jl") @testset "Global Hessian" begin - @testset "$T" for T in HESSIAN_TRACERS + @testset "$P" for P in HESSIAN_PATTERNS + T = HessianTracer{P} method = TracerSparsityDetector(; hessian_tracer_type=T) @test hessian_sparsity(identity, rand(), method) ≈ [0;;] @@ -243,7 +245,8 @@ include("tracers_definitions.jl") end @testset "Local Hessian" begin - @testset "$T" for T in HESSIAN_TRACERS + @testset "$P" for P in HESSIAN_PATTERNS + T = HessianTracer{P} method = TracerLocalSparsityDetector(; hessian_tracer_type=T) f1(x) = x[1] + x[2] * x[3] + 1 / x[4] + x[2] * max(x[1], x[5]) @@ -337,8 +340,8 @@ end end end -@testset "Shared HessianTracer - same objects" begin - H = HessianTracer{BitSet,Set{Tuple{Int,Int}},true} +@testset "Shared IndexSetHessianPattern - same objects" begin + H = HessianTracer{IndexSetHessianPattern{Int,BitSet,Set{Tuple{Int,Int}},true}} function multi_output_for_shared_test(x::AbstractArray) z = ones(eltype(x), size(x)) @@ -354,7 +357,7 @@ end xt = create_tracers(H, x, eachindex(x)) yt = multi_output_for_shared_test(xt) - @test yt[1].hessian === yt[2].hessian - @test yt[1].hessian === yt[3].hessian - @test_broken yt[1].hessian === yt[4].hessian + @test pattern(yt[1]).hessian === pattern(yt[2]).hessian + @test pattern(yt[1]).hessian === pattern(yt[3]).hessian + @test_broken pattern(yt[1]).hessian === pattern(yt[4]).hessian end diff --git a/test/tracers_definitions.jl b/test/tracers_definitions.jl index 671f3e2b..6b49ed14 100644 --- a/test/tracers_definitions.jl +++ b/test/tracers_definitions.jl @@ -2,7 +2,7 @@ using SparseConnectivityTracer: AbstractTracer, GradientTracer, HessianTracer, D using SparseConnectivityTracer: IndexSetGradientPattern, IndexSetHessianPattern using SparseConnectivityTracer: DuplicateVector, RecursiveSet, SortedVector -VECTOR_PATTERNS = ( +GRADIENT_PATTERNS = ( IndexSetGradientPattern{Int,BitSet}, IndexSetGradientPattern{Int,Set{Int}}, IndexSetGradientPattern{Int,DuplicateVector{Int}}, @@ -10,12 +10,13 @@ VECTOR_PATTERNS = ( ) HESSIAN_PATTERNS = ( - IndexSetHessianPattern{Int,BitSet,Set{Tuple{Int,Int}}}, - IndexSetHessianPattern{Int,Set{Int},Set{Tuple{Int,Int}}}, - IndexSetHessianPattern{Int,DuplicateVector{Int},DuplicateVector{Tuple{Int,Int}}}, - IndexSetHessianPattern{Int,SortedVector{Int},SortedVector{Tuple{Int,Int}}}, + IndexSetHessianPattern{Int,BitSet,Set{Tuple{Int,Int}},true}, + IndexSetHessianPattern{Int,BitSet,Set{Tuple{Int,Int}},false}, + IndexSetHessianPattern{Int,Set{Int},Set{Tuple{Int,Int}},false}, + IndexSetHessianPattern{Int,DuplicateVector{Int},DuplicateVector{Tuple{Int,Int}},false}, + IndexSetHessianPattern{Int,SortedVector{Int},SortedVector{Tuple{Int,Int}},false}, # TODO: test on RecursiveSet ) -GRADIENT_TRACERS = (GradientTracer{P} for P in VECTOR_PATTERNS) +GRADIENT_TRACERS = (GradientTracer{P} for P in GRADIENT_PATTERNS) HESSIAN_TRACERS = (HessianTracer{P} for P in HESSIAN_PATTERNS) From b640c6f5df30e69ca441171c7f3917f04f9eb5b6 Mon Sep 17 00:00:00 2001 From: adrhill Date: Thu, 27 Jun 2024 20:23:00 +0200 Subject: [PATCH 10/20] Fix docstrings --- src/patterns.jl | 8 +++++--- src/tracers.jl | 5 ----- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/src/patterns.jl b/src/patterns.jl index afa7b38b..8d0c0c0c 100644 --- a/src/patterns.jl +++ b/src/patterns.jl @@ -9,8 +9,7 @@ AbstractPattern ├── AbstractGradientPattern: used in GradientTracer │ └── IndexSetGradientPattern └── AbstractHessianPattern: used in HessianTracer - ├── IndexSetHessianPattern - └── SharedIndexSetHessianPattern + └── IndexSetHessianPattern ``` """ abstract type AbstractPattern end @@ -141,8 +140,11 @@ Hessian sparsity pattern represented by: * an `AbstractSet` of index tuples ``(i,j)`` of non-zero values representing second-order sparsity ## Fields - $(TYPEDFIELDS) + +## Internals + +The last type parameter `shared` is a `Bool` indicating whether the `hessian` field of this object should be shared among all intermediate scalar quantities involved in a function. """ struct IndexSetHessianPattern{ I<:Integer,SG<:AbstractSet{I},SH<:AbstractSet{Tuple{I,I}},mutating diff --git a/src/tracers.jl b/src/tracers.jl index 0c969672..047bf0df 100644 --- a/src/tracers.jl +++ b/src/tracers.jl @@ -53,11 +53,6 @@ $(TYPEDEF) ## Fields $(TYPEDFIELDS) - -## Internals - -The last type parameter `shared` is a `Bool` indicating whether the `hessian` field of this object should be shared among all intermediate scalar quantities involved in a function. -It is not yet part of the public API, and users should always set it to `false`. """ struct HessianTracer{P<:AbstractHessianPattern} <: AbstractTracer{P} "Sparse representation of non-zero entries in the gradient and the Hessian." From 05c91fe0d43fb53b3794f6a783cc10c681ebf198 Mon Sep 17 00:00:00 2001 From: adrhill Date: Thu, 27 Jun 2024 20:29:38 +0200 Subject: [PATCH 11/20] Fix benchmarks --- benchmark/bench_jogger.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/bench_jogger.jl b/benchmark/bench_jogger.jl index 25e153ca..2c710fbe 100644 --- a/benchmark/bench_jogger.jl +++ b/benchmark/bench_jogger.jl @@ -21,7 +21,7 @@ for S1 in SET_TYPES S2 = Set{Tuple{Int,Int}} PG = IndexSetGradientPattern{Int,S1} - PH = IndexSetHessianPattern{Int,S1,S2} + PH = IndexSetHessianPattern{Int,S1,S2,false} G = GradientTracer{PG} H = HessianTracer{PH} From bf42093d9b5fd4768fa69cc3c2229726380767e1 Mon Sep 17 00:00:00 2001 From: adrhill Date: Fri, 28 Jun 2024 16:42:20 +0200 Subject: [PATCH 12/20] Include feedback from code review --- src/overloads/hessian_tracer.jl | 2 +- src/patterns.jl | 24 ++++++++++++------------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/overloads/hessian_tracer.jl b/src/overloads/hessian_tracer.jl index 83dcf3a8..9622633d 100644 --- a/src/overloads/hessian_tracer.jl +++ b/src/overloads/hessian_tracer.jl @@ -21,7 +21,7 @@ function hessian_tracer_1_to_1_inner( myempty(SH) elseif !is_der1_zero && is_der2_zero sh - elseif is_der1_zero && is_der2_zero + elseif is_der1_zero && !is_der2_zero union_product!(myempty(SH), sg, sg) else union_product!(copy(sh), sg, sg) diff --git a/src/patterns.jl b/src/patterns.jl index 8d0c0c0c..87532fc3 100644 --- a/src/patterns.jl +++ b/src/patterns.jl @@ -17,7 +17,7 @@ abstract type AbstractPattern end """ isshared(pattern) -Indicates whether patterns share memory (mutate). +Indicates whether patterns share memory, allowing operators to mutate their arguments. """ isshared(::P) where {P<:AbstractPattern} = isshared(P) isshared(::Type{P}) where {P<:AbstractPattern} = false @@ -55,8 +55,8 @@ product(a::AbstractSet{I}, b::AbstractSet{I}) where {I<:Integer} = Set((i, j) for i in a, j in b) function union_product!( - hessian::SH, gradient_x::SG, gradient_y::SG -) where {I<:Integer,SG<:AbstractSet{I},SH<:AbstractSet{Tuple{I,I}}} + hessian::H, gradient_x::G, gradient_y::G +) where {I<:Integer,G<:AbstractSet{I},H<:AbstractSet{Tuple{I,I}}} hxy = product(gradient_x, gradient_y) return union!(hessian, hxy) end @@ -147,21 +147,21 @@ $(TYPEDFIELDS) The last type parameter `shared` is a `Bool` indicating whether the `hessian` field of this object should be shared among all intermediate scalar quantities involved in a function. """ struct IndexSetHessianPattern{ - I<:Integer,SG<:AbstractSet{I},SH<:AbstractSet{Tuple{I,I}},mutating + I<:Integer,G<:AbstractSet{I},H<:AbstractSet{Tuple{I,I}},shared } <: AbstractHessianPattern - gradient::SG - hessian::SH + gradient::G + hessian::H end -isshared(::Type{IndexSetHessianPattern{I,SG,SH,true}}) where {I,SG,SH} = true +isshared(::Type{IndexSetHessianPattern{I,G,H,true}}) where {I,G,H} = true -function myempty(::Type{P}) where {I,SG,SH,M,P<:IndexSetHessianPattern{I,SG,SH,M}} - return P(myempty(SG), myempty(SH)) +function myempty(::Type{P}) where {I,G,H,S,P<:IndexSetHessianPattern{I,G,H,S}} + return P(myempty(G), myempty(H)) end function create_patterns( ::Type{P}, xs, is -) where {I,SG,SH,M,P<:IndexSetHessianPattern{I,SG,SH,M}} - gradients = seed.(SG, is) - hessian = myempty(SH) +) where {I,G,H,S,P<:IndexSetHessianPattern{I,G,H,S}} + gradients = seed.(G, is) + hessian = myempty(H) return P.(gradients, Ref(hessian)) end From bb80539e7b035ec14abc52bb59b81ac2f015c72a Mon Sep 17 00:00:00 2001 From: adrhill Date: Fri, 28 Jun 2024 17:19:10 +0200 Subject: [PATCH 13/20] Add notes and test for `hessian` field sharing --- src/overloads/hessian_tracer.jl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/overloads/hessian_tracer.jl b/src/overloads/hessian_tracer.jl index 9622633d..865403b9 100644 --- a/src/overloads/hessian_tracer.jl +++ b/src/overloads/hessian_tracer.jl @@ -29,6 +29,7 @@ function hessian_tracer_1_to_1_inner( return P(sg_out, sh_out) # return pattern end +# NOTE: mutates its argument p and should arguably be called `hessian_tracer_1_to_1_inner!` function hessian_tracer_1_to_1_inner( p::P, is_der1_zero::Bool, is_der2_zero::Bool ) where {I,SG,SH,P<:IndexSetHessianPattern{I,SG,SH,true}} @@ -124,6 +125,7 @@ function hessian_tracer_2_to_1_inner( return P(sg_out, sh_out) # return pattern end +# NOTE: mutates arguments px and py and should arguably be called `hessian_tracer_1_to_1_inner!` function hessian_tracer_2_to_1_inner( px::P, py::P, @@ -135,6 +137,8 @@ function hessian_tracer_2_to_1_inner( ) where {I,SG,SH,P<:IndexSetHessianPattern{I,SG,SH,true}} sgx, shx = gradient(px), hessian(px) sgy, shy = gradient(py), hessian(py) + shx !== shy && error("Expected shared Hessians, got $shx, $shy.") + sg_out = gradient_tracer_2_to_1_inner(sgx, sgy, is_der1_arg1_zero, is_der1_arg2_zero) sh_out = union!(shx, shy) !is_der2_arg1_zero && union_product!(sh_out, sgx, sgx) # product alpha From 6e48a2b59f858423b48b64cf511c3a8dcdaad530 Mon Sep 17 00:00:00 2001 From: adrhill Date: Fri, 28 Jun 2024 17:29:46 +0200 Subject: [PATCH 14/20] Clarify connection between mutation and sharing --- src/patterns.jl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/patterns.jl b/src/patterns.jl index 87532fc3..a7686762 100644 --- a/src/patterns.jl +++ b/src/patterns.jl @@ -17,7 +17,11 @@ abstract type AbstractPattern end """ isshared(pattern) -Indicates whether patterns share memory, allowing operators to mutate their arguments. +Indicates whether patterns share memory and allow operators to mutate their `AbstractTracer` arguments. +If `false`, operators are prohibited from mutating `AbstractTracer` arguments. + +## Note +In practice, this memory sharing is limited to second-order information in `AbstractHessianPattern`. """ isshared(::P) where {P<:AbstractPattern} = isshared(P) isshared(::Type{P}) where {P<:AbstractPattern} = false @@ -162,6 +166,8 @@ function create_patterns( ) where {I,G,H,S,P<:IndexSetHessianPattern{I,G,H,S}} gradients = seed.(G, is) hessian = myempty(H) + # Even if `shared=false`, sharing a single reference to `hessian` is allowed upon initialization, + # since mutation is prohibited when `isshared` is false. return P.(gradients, Ref(hessian)) end From c5e4f51af8c710804cfc63d36fbdcb176772d4ae Mon Sep 17 00:00:00 2001 From: adrhill Date: Fri, 28 Jun 2024 17:32:56 +0200 Subject: [PATCH 15/20] More clarifications --- src/patterns.jl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/patterns.jl b/src/patterns.jl index a7686762..3a305e47 100644 --- a/src/patterns.jl +++ b/src/patterns.jl @@ -17,11 +17,12 @@ abstract type AbstractPattern end """ isshared(pattern) -Indicates whether patterns share memory and allow operators to mutate their `AbstractTracer` arguments. -If `false`, operators are prohibited from mutating `AbstractTracer` arguments. +Indicates whether patterns **always** share memory and whether operators are **allowed** to mutate their `AbstractTracer` arguments. + +If `false`, patterns **can** share memory and operators are **prohibited** from mutating `AbstractTracer` arguments. ## Note -In practice, this memory sharing is limited to second-order information in `AbstractHessianPattern`. +In practice, memory sharing is limited to second-order information in `AbstractHessianPattern`. """ isshared(::P) where {P<:AbstractPattern} = isshared(P) isshared(::Type{P}) where {P<:AbstractPattern} = false From 4ba5613ea848f6c6c4e402f89aa72beb9f6cad19 Mon Sep 17 00:00:00 2001 From: adrhill Date: Fri, 28 Jun 2024 17:51:47 +0200 Subject: [PATCH 16/20] More comments --- src/overloads/hessian_tracer.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/overloads/hessian_tracer.jl b/src/overloads/hessian_tracer.jl index 865403b9..bba1d274 100644 --- a/src/overloads/hessian_tracer.jl +++ b/src/overloads/hessian_tracer.jl @@ -23,19 +23,20 @@ function hessian_tracer_1_to_1_inner( sh elseif is_der1_zero && !is_der2_zero union_product!(myempty(SH), sg, sg) - else + else # !is_der1_zero && !is_der2_zero union_product!(copy(sh), sg, sg) end return P(sg_out, sh_out) # return pattern end -# NOTE: mutates its argument p and should arguably be called `hessian_tracer_1_to_1_inner!` +# NOTE: mutates argument p and should arguably be called `hessian_tracer_1_to_1_inner!` function hessian_tracer_1_to_1_inner( p::P, is_der1_zero::Bool, is_der2_zero::Bool ) where {I,SG,SH,P<:IndexSetHessianPattern{I,SG,SH,true}} sg = gradient(p) sh = hessian(p) sg_out = gradient_tracer_1_to_1_inner(sg, is_der1_zero) + # shared Hessian patterns can't remove second-order information, only add to it. sh_out = if is_der2_zero sh else From ec185ef410af7fbeb1281408025dbfa4805668cf Mon Sep 17 00:00:00 2001 From: adrhill Date: Fri, 26 Jul 2024 17:08:41 +0200 Subject: [PATCH 17/20] Skip redundant union --- src/overloads/hessian_tracer.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/overloads/hessian_tracer.jl b/src/overloads/hessian_tracer.jl index bba1d274..b98c8398 100644 --- a/src/overloads/hessian_tracer.jl +++ b/src/overloads/hessian_tracer.jl @@ -138,10 +138,11 @@ function hessian_tracer_2_to_1_inner( ) where {I,SG,SH,P<:IndexSetHessianPattern{I,SG,SH,true}} sgx, shx = gradient(px), hessian(px) sgy, shy = gradient(py), hessian(py) - shx !== shy && error("Expected shared Hessians, got $shx, $shy.") + shx !== shy && error("Expected shared Hessians, got $shx, $shy.") + sh_out = shx # union of shx and shy can be skipped since they are the same object sg_out = gradient_tracer_2_to_1_inner(sgx, sgy, is_der1_arg1_zero, is_der1_arg2_zero) - sh_out = union!(shx, shy) + !is_der2_arg1_zero && union_product!(sh_out, sgx, sgx) # product alpha !is_der2_arg2_zero && union_product!(sh_out, sgy, sgy) # product beta !is_der_cross_zero && union_product!(sh_out, sgx, sgy) # cross product 1 From 10eb7a56e6bab920d9b1076e150431c3cb14ff4c Mon Sep 17 00:00:00 2001 From: adrhill Date: Fri, 26 Jul 2024 17:17:25 +0200 Subject: [PATCH 18/20] Benchmark shared global Hessian-tracing --- benchmark/bench_jogger.jl | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/benchmark/bench_jogger.jl b/benchmark/bench_jogger.jl index 2c710fbe..36831f7f 100644 --- a/benchmark/bench_jogger.jl +++ b/benchmark/bench_jogger.jl @@ -20,9 +20,10 @@ suite["OptimizationProblems"] = optbench([:britgas]) for S1 in SET_TYPES S2 = Set{Tuple{Int,Int}} + # Non-shared tracers + shared = false PG = IndexSetGradientPattern{Int,S1} - PH = IndexSetHessianPattern{Int,S1,S2,false} - + PH = IndexSetHessianPattern{Int,S1,S2,shared} G = GradientTracer{PG} H = HessianTracer{PH} @@ -34,4 +35,15 @@ for S1 in SET_TYPES suite["Hessian"]["Local"][(nameof(S1), nameof(S2))] = hessbench( TracerLocalSparsityDetector(G, H) ) + + # Shared tracers + shared = true + PG = IndexSetGradientPattern{Int,S1} + PH = IndexSetHessianPattern{Int,S1,S2,shared} + G = GradientTracer{PG} + H = HessianTracer{PH} + + suite["Hessian"]["Global (shared)"][(nameof(S1), nameof(S2))] = hessbench( + TracerSparsityDetector(G, H) + ) end From a15463aeb973bc5ea6005ab45291c69b23ef69f2 Mon Sep 17 00:00:00 2001 From: adrhill Date: Tue, 30 Jul 2024 22:00:47 +0200 Subject: [PATCH 19/20] Add missing `is_der1_zero && is_der2_zero` test --- test/test_hessian.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/test_hessian.jl b/test/test_hessian.jl index af0da267..d0ba2818 100644 --- a/test/test_hessian.jl +++ b/test/test_hessian.jl @@ -20,6 +20,7 @@ include("tracers_definitions.jl") @test hessian_sparsity(x -> x * 1, rand(), method) ≈ [0;;] # Code coverage + @test hessian_sparsity(sign, 1, method) ≈ [0;;] @test hessian_sparsity(typemax, 1, method) ≈ [0;;] @test hessian_sparsity(x -> x^(2//3), 1, method) ≈ [1;;] @test hessian_sparsity(x -> (2//3)^x, 1, method) ≈ [1;;] @@ -324,6 +325,7 @@ end end # Code coverage + @test hessian_sparsity(sign, 1, method) ≈ [0;;] @test hessian_sparsity(typemax, 1, method) ≈ [0;;] @test hessian_sparsity(x -> x^(2//3), 1, method) ≈ [1;;] @test hessian_sparsity(x -> (2//3)^x, 1, method) ≈ [1;;] From df501838bfa3a32b08ef460d88d2d7f8bc28e0a4 Mon Sep 17 00:00:00 2001 From: adrhill Date: Tue, 30 Jul 2024 22:01:07 +0200 Subject: [PATCH 20/20] Add comment for `is_der1_zero && !is_der2_zero` --- src/overloads/hessian_tracer.jl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/overloads/hessian_tracer.jl b/src/overloads/hessian_tracer.jl index b98c8398..2e22bb08 100644 --- a/src/overloads/hessian_tracer.jl +++ b/src/overloads/hessian_tracer.jl @@ -22,6 +22,9 @@ function hessian_tracer_1_to_1_inner( elseif !is_der1_zero && is_der2_zero sh elseif is_der1_zero && !is_der2_zero + # TODO: this branch of the code currently isn't tested. + # Covering it would require a scalar 1-to-1 function with local overloads, + # such that ∂f/∂x == 0 and ∂²f/∂x² != 0. union_product!(myempty(SH), sg, sg) else # !is_der1_zero && !is_der2_zero union_product!(copy(sh), sg, sg)