Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#15 and #4 is addressed here #25

Merged
merged 18 commits into from
Nov 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["LinearAlgebra", "CUDA", "Test"]
test = ["LinearAlgebra", "CUDA", "Test"]
14 changes: 7 additions & 7 deletions src/FluxNLPModels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ using Flux: onehotbatch, onecold
export AbstractFluxNLPModel, FluxNLPModel
export reset_minibatch_train!, reset_minibatch_test!
export minibatch_next_train!, minibatch_next_test!
export accuracy, set_vars!, local_loss
export accuracy, set_vars!, local_loss, update_type!

abstract type AbstractFluxNLPModel{T, S} <: AbstractNLPModel{T, S} end

"""
FluxNLPModel{T, S, C <: Flux.Chain} <: AbstractNLPModel{T, S}
FluxNLPModel{T, S, C } <: AbstractNLPModel{T, S}

Data structure that makes the interfaces between neural networks defined with [Flux.jl](https://fluxml.ai/) and [NLPModels](https://github.com/JuliaSmoothOptimizers/NLPModels.jl).
A FluxNLPModel has fields
Expand All @@ -27,9 +27,9 @@ A FluxNLPModel has fields
- `current_minibatch_test` is the current test minibatch, it is not used in practice;
- `w` is the vector of weights/variables;
"""
mutable struct FluxNLPModel{T, S, C <: Chain, F <: Function} <: AbstractFluxNLPModel{T, S}
mutable struct FluxNLPModel{T, S, F <: Function} <: AbstractFluxNLPModel{T, S}
meta::NLPModelMeta{T, S}
chain::C
chain
counters::Counters
loss_f::F
size_minibatch::Int
Expand All @@ -40,7 +40,7 @@ mutable struct FluxNLPModel{T, S, C <: Chain, F <: Function} <: AbstractFluxNLPM
rebuild # this is used to create the rebuild of flat function
current_training_minibatch_status
current_test_minibatch_status
w::S
w
end

"""
Expand All @@ -52,14 +52,14 @@ The other data required are: an iterator over the training dataset `data_train`,
Suppose `(xtrn,ytrn) = Fluxnlp.data_train`
"""
function FluxNLPModel(
chain_ANN::T,
chain_ANN,
data_train,
data_test;
current_training_minibatch = [],
current_test_minibatch = [],
size_minibatch::Int = 100,
loss_f::F = Flux.mse, #Flux.crossentropy,
) where {T <: Chain, F <: Function}
) where {F <: Function}
x0, rebuild = Flux.destructure(chain_ANN)
n = length(x0)
meta = NLPModelMeta(n, x0 = x0)
Expand Down
61 changes: 42 additions & 19 deletions src/FluxNLPModels_methods.jl
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
"""
f = obj(nlp, w)

Evaluate `f(w)`, the objective function of `nlp` at `w`.

Evaluate the objective function f(w) of the non-linear programming (NLP) problem at the point w.
If the precision of w and the precision expected by the nlp are different, ensure that the type of nlp.w matches the precision required by w.
# Arguments
- `nlp::AbstractFluxNLPModel{T, S}`: the FluxNLPModel data struct;
- `w::AbstractVector{T}`: is the vector of weights/variables.

- `w::AbstractVector{V}`: is the vector of weights/variables. The use of `V` allows for flexibility in specifying different precision types for weights and models.
# Output
- `f_w`: the new objective function.

"""
function NLPModels.obj(nlp::AbstractFluxNLPModel{T, S}, w::AbstractVector{T}) where {T, S}
increment!(nlp, :neval_obj)
set_vars!(nlp, w)
function NLPModels.obj(nlp::AbstractFluxNLPModel{T, S}, w::AbstractVector{V}) where {T, S, V}
farhadrclass marked this conversation as resolved.
Show resolved Hide resolved
x, y = nlp.current_training_minibatch

eltype(nlp.w) == V || update_type!(nlp, w) #Check if the type has changed
farhadrclass marked this conversation as resolved.
Show resolved Hide resolved
if eltype(x) != V
x = V.(x)
end

set_vars!(nlp, w)
increment!(nlp, :neval_obj)
return nlp.loss_f(nlp.chain(x), y)
end

Expand All @@ -25,21 +30,30 @@ Evaluate `∇f(w)`, the gradient of the objective function at `w` in place.

# Arguments
- `nlp::AbstractFluxNLPModel{T, S}`: the FluxNLPModel data struct;
- `w::AbstractVector{T}`: is the vector of weights/variables;
- `g::AbstractVector{T}`: the gradient vector.
- `w::AbstractVector{V}`: is the vector of weights/variables. The use of `V` allows for flexibility in specifying different precision types for weights and models.
- `g::AbstractVector{}`: the gradient vector.

# Output
- `g`: the gradient at point `w`.

"""
function NLPModels.grad!(
nlp::AbstractFluxNLPModel{T, S},
w::AbstractVector{T},
g::AbstractVector{T},
) where {T, S}
w::AbstractVector{V},
g::AbstractVector{U},
) where {T, S, V,U}
@lencheck nlp.meta.nvar w g
increment!(nlp, :neval_grad)
x, y = nlp.current_training_minibatch

if (eltype(nlp.w) != V) # we check if the types are the same,
farhadrclass marked this conversation as resolved.
Show resolved Hide resolved
update_type!(nlp, w)
g = V.(g)
if eltype(x) != V
x = V.(x)
end
end

increment!(nlp, :neval_grad)
g .= gradient(w_g -> local_loss(nlp, x, y, w_g), w)[1]
return g
end
Expand All @@ -51,24 +65,33 @@ Evaluate both `f(w)`, the objective function of `nlp` at `w`, and `∇f(w)`, the

# Arguments
- `nlp::AbstractFluxNLPModel{T, S}`: the FluxNLPModel data struct;
- `w::AbstractVector{T}`: is the vector of weights/variables;
- `g::AbstractVector{T}`: the gradient vector.
- `w::AbstractVector{V}`: is the vector of weights/variables. The use of `V` allows for flexibility in specifying different precision types for weights and models.
- `g::AbstractVector{V}`: the gradient vector.

# Output
- `f_w`, `g`: the new objective function, and the gradient at point w.

"""
function NLPModels.objgrad!(
nlp::AbstractFluxNLPModel{T, S},
w::AbstractVector{T},
g::AbstractVector{T},
) where {T, S}
w::AbstractVector{V},
g::AbstractVector{U},
) where {T, S, V,U}
@lencheck nlp.meta.nvar w g
x, y = nlp.current_training_minibatch

if (eltype(nlp.w) != V) # we check if the types are the same,
farhadrclass marked this conversation as resolved.
Show resolved Hide resolved
update_type!(nlp, w)
g = V.(g)
if eltype(x) != V
x = V.(x)
end
end

increment!(nlp, :neval_obj)
increment!(nlp, :neval_grad)
set_vars!(nlp, w)

x, y = nlp.current_training_minibatch
f_w = nlp.loss_f(nlp.chain(x), y)
g .= gradient(w_g -> local_loss(nlp, x, y, w_g), w)[1]

Expand Down
35 changes: 33 additions & 2 deletions src/utils.jl
Original file line number Diff line number Diff line change
@@ -1,14 +1,45 @@
"""
update_type!(nlp::AbstractFluxNLPModel{T, S}, w::AbstractVector{V}) where {T, V, S}

Sets the variables and rebuild the chain to a specific type defined by weights.
"""
function update_type!(nlp::AbstractFluxNLPModel{T, S}, w::AbstractVector{V}) where {T, V, S}
nlp.chain = update_type(nlp.chain, V)
nlp.w, nlp.rebuild = Flux.destructure(nlp.chain)
end

# Define a separate method for updating the type of the chain
function update_type(chain::Chain, ::Type{Float16})
return f16(chain)
end

function update_type(chain::Chain, ::Type{Float32})
return f32(chain)
end

function update_type(chain::Chain, ::Type{Float64})
return f64(chain)
end

# Throw an error for unsupported types
function update_type(chain::Chain, ::Type)
farhadrclass marked this conversation as resolved.
Show resolved Hide resolved
error("The package only supports Float16, Float32, and Float64")
end

"""
set_vars!(model::AbstractFluxNLPModel{T,S}, new_w::AbstractVector{T}) where {T<:Number, S}

Sets the vaiables and rebuild the chain
"""
function set_vars!(nlp::AbstractFluxNLPModel{T, S}, new_w::AbstractVector{T}) where {T <: Number, S}
function set_vars!(
nlp::AbstractFluxNLPModel{T, S},
new_w::AbstractVector{V},
) where {T <: Number, S, V}
nlp.w .= new_w
nlp.chain = nlp.rebuild(nlp.w)
end

function local_loss(nlp::AbstractFluxNLPModel{T, S}, x, y, w::AbstractVector{T}) where {T, S}
function local_loss(nlp::AbstractFluxNLPModel{T, S}, x, y, w::AbstractVector{V}) where {T, S, V}
# increment!(nlp, :neval_obj) #TODO not sure
nlp.chain = nlp.rebuild(w)
return nlp.loss_f(nlp.chain(x), y)
Expand Down
86 changes: 81 additions & 5 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ using MLDatasets
using LinearAlgebra

# Helper functions
function getdata(args)
function getdata(args; T = Float32)
ENV["DATADEPS_ALWAYS_ACCEPT"] = "true" # download datasets without having to manually confirm the download

# Loading Dataset

xtrain, ytrain = MLDatasets.MNIST(Tx = Float32, split = :train)[:]
xtest, ytest = MLDatasets.MNIST(Tx = Float32, split = :test)[:]
xtrain, ytrain = MLDatasets.MNIST(Tx = T, split = :train)[:]
xtest, ytest = MLDatasets.MNIST(Tx = T, split = :test)[:]

# Reshape Data in order to flatten each image into a linear array
xtrain = Flux.flatten(xtrain)
Expand All @@ -32,7 +32,7 @@ function getdata(args)
end

function build_model(; imgsize = (28, 28, 1), nclasses = 10)
return Flux.Chain(Dense(prod(imgsize), 32, relu), Dense(32, nclasses))
return Flux.Chain(Dense(prod(imgsize), 32, relu), Dense(32, nclasses), softmax)
end

@kwdef mutable struct Args
Expand Down Expand Up @@ -67,7 +67,6 @@ device = cpu

@test DNNLPModel.w == old_w
@test obj_x1 == obj_x1_2
println(norm(grad_x1 - grad_x1_2))
@test norm(grad_x1 - grad_x1_2) ≈ 0.0

@test x1 == DNNLPModel.w
Expand Down Expand Up @@ -113,3 +112,80 @@ end
@test minibatch_next_test!(nlp) # should return true
@test minibatch_next_test!(nlp) # should return true
end

@testset "Multiple precision test" begin
# Create test and train dataloaders
train_data, test_data = getdata(args)

# Construct model in Float32
DN = build_model() |> device
nlp = FluxNLPModel(DN, train_data, test_data)

x1 = copy(nlp.w)
obj_x1 = obj(nlp, x1)
grad_x1 = NLPModels.grad(nlp, x1)
@test typeof(obj_x1) == Float32
@test eltype(grad_x1) == Float32

# change to Float16
x2 = Float16.(x1)
obj_x2 = obj(nlp, x2)
grad_x2 = NLPModels.grad(nlp, x2)
# T test grad again after changing the type, using grad! method
grad!(nlp, x2, grad_x2)
@test typeof(obj_x2) == Float16
@test eltype(grad_x2) == Float16

# change to Float64
x3 = Float64.(x1)
obj_x3 = obj(nlp, x3)
grad_x3 = NLPModels.grad(nlp, x3)
@test typeof(obj_x3) == Float64
@test eltype(grad_x3) == Float64

# change to Float16 with objgrad!
x3_2 = Float16.(x1)
grad_x3_2 = similar(x3_2)
obj_x3_2, grad_x3_2 = NLPModels.objgrad!(nlp, x3_2, grad_x3_2)
@test typeof(obj_x3_2) == Float16
@test eltype(grad_x3_2) == Float16

# change to Float64 with grad!
x3_3 = Float64.(x1)
grad_x3_3 = similar(x3_3)
grad_x3_3 = grad!(nlp, x3_3, grad_x3_3)
@test eltype(grad_x3_3) == Float64

# Construct model in Float16
train_data_f16, test_data_f16 = getdata(args, T = Float16)
DN_f16 = build_model() |> f16
nlp_f16 = FluxNLPModel(DN_f16, train_data_f16, test_data_f16)

x4 = copy(nlp_f16.w)
obj_x4 = obj(nlp_f16, x4)
grad_x4 = NLPModels.grad(nlp_f16, x4)

@test typeof(obj_x4) == Float16
@test eltype(grad_x4) == Float16

# change to Float32 from Float16
x5 = Float32.(x4)
obj_x5 = obj(nlp_f16, x5)
grad_x5 = NLPModels.grad(nlp_f16, x5)
@test typeof(obj_x5) == Float32
@test eltype(grad_x5) == Float32

# change to Float64 from Float16
x6 = Float64.(x4)
obj_x6 = obj(nlp_f16, x6)
grad_x6 = NLPModels.grad(nlp_f16, x6)
@test typeof(obj_x6) == Float64
@test eltype(grad_x6) == Float64

# change to Float32 from Float128
# expected to throw an error
# Note we do not support BigFloat in FluxNLPModels yet!
x7 = BigFloat.(x5)
@test_throws Exception obj(nlp_f16, x7)

end
Loading