Skip to content

Commit

Permalink
Merge pull request #25 from Farhad-phd/main
Browse files Browse the repository at this point in the history
#15 and #4 is addressed here
  • Loading branch information
farhadrclass authored Nov 23, 2023
2 parents c91b63c + 716304d commit ef8af7b
Show file tree
Hide file tree
Showing 5 changed files with 164 additions and 34 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["LinearAlgebra", "CUDA", "Test"]
test = ["LinearAlgebra", "CUDA", "Test"]
14 changes: 7 additions & 7 deletions src/FluxNLPModels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ using Flux: onehotbatch, onecold
export AbstractFluxNLPModel, FluxNLPModel
export reset_minibatch_train!, reset_minibatch_test!
export minibatch_next_train!, minibatch_next_test!
export accuracy, set_vars!, local_loss
export accuracy, set_vars!, local_loss, update_type!

abstract type AbstractFluxNLPModel{T, S} <: AbstractNLPModel{T, S} end

"""
FluxNLPModel{T, S, C <: Flux.Chain} <: AbstractNLPModel{T, S}
FluxNLPModel{T, S, C } <: AbstractNLPModel{T, S}
Data structure that makes the interfaces between neural networks defined with [Flux.jl](https://fluxml.ai/) and [NLPModels](https://github.com/JuliaSmoothOptimizers/NLPModels.jl).
A FluxNLPModel has fields
Expand All @@ -27,9 +27,9 @@ A FluxNLPModel has fields
- `current_minibatch_test` is the current test minibatch, it is not used in practice;
- `w` is the vector of weights/variables;
"""
mutable struct FluxNLPModel{T, S, C <: Chain, F <: Function} <: AbstractFluxNLPModel{T, S}
mutable struct FluxNLPModel{T, S, F <: Function} <: AbstractFluxNLPModel{T, S}
meta::NLPModelMeta{T, S}
chain::C
chain
counters::Counters
loss_f::F
size_minibatch::Int
Expand All @@ -40,7 +40,7 @@ mutable struct FluxNLPModel{T, S, C <: Chain, F <: Function} <: AbstractFluxNLPM
rebuild # this is used to create the rebuild of flat function
current_training_minibatch_status
current_test_minibatch_status
w::S
w
end

"""
Expand All @@ -52,14 +52,14 @@ The other data required are: an iterator over the training dataset `data_train`,
Suppose `(xtrn,ytrn) = Fluxnlp.data_train`
"""
function FluxNLPModel(
chain_ANN::T,
chain_ANN,
data_train,
data_test;
current_training_minibatch = [],
current_test_minibatch = [],
size_minibatch::Int = 100,
loss_f::F = Flux.mse, #Flux.crossentropy,
) where {T <: Chain, F <: Function}
) where {F <: Function}
x0, rebuild = Flux.destructure(chain_ANN)
n = length(x0)
meta = NLPModelMeta(n, x0 = x0)
Expand Down
61 changes: 42 additions & 19 deletions src/FluxNLPModels_methods.jl
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
"""
f = obj(nlp, w)
Evaluate `f(w)`, the objective function of `nlp` at `w`.
Evaluate the objective function f(w) of the non-linear programming (NLP) problem at the point w.
If the precision of w and the precision expected by the nlp are different, ensure that the type of nlp.w matches the precision required by w.
# Arguments
- `nlp::AbstractFluxNLPModel{T, S}`: the FluxNLPModel data struct;
- `w::AbstractVector{T}`: is the vector of weights/variables.
- `w::AbstractVector{V}`: is the vector of weights/variables. The use of `V` allows for flexibility in specifying different precision types for weights and models.
# Output
- `f_w`: the new objective function.
"""
function NLPModels.obj(nlp::AbstractFluxNLPModel{T, S}, w::AbstractVector{T}) where {T, S}
increment!(nlp, :neval_obj)
set_vars!(nlp, w)
function NLPModels.obj(nlp::AbstractFluxNLPModel{T, S}, w::AbstractVector{V}) where {T, S, V}
x, y = nlp.current_training_minibatch

eltype(nlp.w) == V || update_type!(nlp, w) #Check if the type has changed
if eltype(x) != V
x = V.(x)
end

set_vars!(nlp, w)
increment!(nlp, :neval_obj)
return nlp.loss_f(nlp.chain(x), y)
end

Expand All @@ -25,21 +30,30 @@ Evaluate `∇f(w)`, the gradient of the objective function at `w` in place.
# Arguments
- `nlp::AbstractFluxNLPModel{T, S}`: the FluxNLPModel data struct;
- `w::AbstractVector{T}`: is the vector of weights/variables;
- `g::AbstractVector{T}`: the gradient vector.
- `w::AbstractVector{V}`: is the vector of weights/variables. The use of `V` allows for flexibility in specifying different precision types for weights and models.
- `g::AbstractVector{}`: the gradient vector.
# Output
- `g`: the gradient at point `w`.
"""
function NLPModels.grad!(
nlp::AbstractFluxNLPModel{T, S},
w::AbstractVector{T},
g::AbstractVector{T},
) where {T, S}
w::AbstractVector{V},
g::AbstractVector{U},
) where {T, S, V,U}
@lencheck nlp.meta.nvar w g
increment!(nlp, :neval_grad)
x, y = nlp.current_training_minibatch

if (eltype(nlp.w) != V) # we check if the types are the same,
update_type!(nlp, w)
g = V.(g)
if eltype(x) != V
x = V.(x)
end
end

increment!(nlp, :neval_grad)
g .= gradient(w_g -> local_loss(nlp, x, y, w_g), w)[1]
return g
end
Expand All @@ -51,24 +65,33 @@ Evaluate both `f(w)`, the objective function of `nlp` at `w`, and `∇f(w)`, the
# Arguments
- `nlp::AbstractFluxNLPModel{T, S}`: the FluxNLPModel data struct;
- `w::AbstractVector{T}`: is the vector of weights/variables;
- `g::AbstractVector{T}`: the gradient vector.
- `w::AbstractVector{V}`: is the vector of weights/variables. The use of `V` allows for flexibility in specifying different precision types for weights and models.
- `g::AbstractVector{V}`: the gradient vector.
# Output
- `f_w`, `g`: the new objective function, and the gradient at point w.
"""
function NLPModels.objgrad!(
nlp::AbstractFluxNLPModel{T, S},
w::AbstractVector{T},
g::AbstractVector{T},
) where {T, S}
w::AbstractVector{V},
g::AbstractVector{U},
) where {T, S, V,U}
@lencheck nlp.meta.nvar w g
x, y = nlp.current_training_minibatch

if (eltype(nlp.w) != V) # we check if the types are the same,
update_type!(nlp, w)
g = V.(g)
if eltype(x) != V
x = V.(x)
end
end

increment!(nlp, :neval_obj)
increment!(nlp, :neval_grad)
set_vars!(nlp, w)

x, y = nlp.current_training_minibatch
f_w = nlp.loss_f(nlp.chain(x), y)
g .= gradient(w_g -> local_loss(nlp, x, y, w_g), w)[1]

Expand Down
35 changes: 33 additions & 2 deletions src/utils.jl
Original file line number Diff line number Diff line change
@@ -1,14 +1,45 @@
"""
update_type!(nlp::AbstractFluxNLPModel{T, S}, w::AbstractVector{V}) where {T, V, S}
Sets the variables and rebuild the chain to a specific type defined by weights.
"""
function update_type!(nlp::AbstractFluxNLPModel{T, S}, w::AbstractVector{V}) where {T, V, S}
nlp.chain = update_type(nlp.chain, V)
nlp.w, nlp.rebuild = Flux.destructure(nlp.chain)
end

# Define a separate method for updating the type of the chain
function update_type(chain::Chain, ::Type{Float16})
return f16(chain)
end

function update_type(chain::Chain, ::Type{Float32})
return f32(chain)
end

function update_type(chain::Chain, ::Type{Float64})
return f64(chain)
end

# Throw an error for unsupported types
function update_type(chain::Chain, ::Type)
error("The package only supports Float16, Float32, and Float64")
end

"""
set_vars!(model::AbstractFluxNLPModel{T,S}, new_w::AbstractVector{T}) where {T<:Number, S}
Sets the vaiables and rebuild the chain
"""
function set_vars!(nlp::AbstractFluxNLPModel{T, S}, new_w::AbstractVector{T}) where {T <: Number, S}
function set_vars!(
nlp::AbstractFluxNLPModel{T, S},
new_w::AbstractVector{V},
) where {T <: Number, S, V}
nlp.w .= new_w
nlp.chain = nlp.rebuild(nlp.w)
end

function local_loss(nlp::AbstractFluxNLPModel{T, S}, x, y, w::AbstractVector{T}) where {T, S}
function local_loss(nlp::AbstractFluxNLPModel{T, S}, x, y, w::AbstractVector{V}) where {T, S, V}
# increment!(nlp, :neval_obj) #TODO not sure
nlp.chain = nlp.rebuild(w)
return nlp.loss_f(nlp.chain(x), y)
Expand Down
86 changes: 81 additions & 5 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ using MLDatasets
using LinearAlgebra

# Helper functions
function getdata(args)
function getdata(args; T = Float32)
ENV["DATADEPS_ALWAYS_ACCEPT"] = "true" # download datasets without having to manually confirm the download

# Loading Dataset

xtrain, ytrain = MLDatasets.MNIST(Tx = Float32, split = :train)[:]
xtest, ytest = MLDatasets.MNIST(Tx = Float32, split = :test)[:]
xtrain, ytrain = MLDatasets.MNIST(Tx = T, split = :train)[:]
xtest, ytest = MLDatasets.MNIST(Tx = T, split = :test)[:]

# Reshape Data in order to flatten each image into a linear array
xtrain = Flux.flatten(xtrain)
Expand All @@ -32,7 +32,7 @@ function getdata(args)
end

function build_model(; imgsize = (28, 28, 1), nclasses = 10)
return Flux.Chain(Dense(prod(imgsize), 32, relu), Dense(32, nclasses))
return Flux.Chain(Dense(prod(imgsize), 32, relu), Dense(32, nclasses), softmax)
end

@kwdef mutable struct Args
Expand Down Expand Up @@ -67,7 +67,6 @@ device = cpu

@test DNNLPModel.w == old_w
@test obj_x1 == obj_x1_2
println(norm(grad_x1 - grad_x1_2))
@test norm(grad_x1 - grad_x1_2) 0.0

@test x1 == DNNLPModel.w
Expand Down Expand Up @@ -113,3 +112,80 @@ end
@test minibatch_next_test!(nlp) # should return true
@test minibatch_next_test!(nlp) # should return true
end

@testset "Multiple precision test" begin
# Create test and train dataloaders
train_data, test_data = getdata(args)

# Construct model in Float32
DN = build_model() |> device
nlp = FluxNLPModel(DN, train_data, test_data)

x1 = copy(nlp.w)
obj_x1 = obj(nlp, x1)
grad_x1 = NLPModels.grad(nlp, x1)
@test typeof(obj_x1) == Float32
@test eltype(grad_x1) == Float32

# change to Float16
x2 = Float16.(x1)
obj_x2 = obj(nlp, x2)
grad_x2 = NLPModels.grad(nlp, x2)
# T test grad again after changing the type, using grad! method
grad!(nlp, x2, grad_x2)
@test typeof(obj_x2) == Float16
@test eltype(grad_x2) == Float16

# change to Float64
x3 = Float64.(x1)
obj_x3 = obj(nlp, x3)
grad_x3 = NLPModels.grad(nlp, x3)
@test typeof(obj_x3) == Float64
@test eltype(grad_x3) == Float64

# change to Float16 with objgrad!
x3_2 = Float16.(x1)
grad_x3_2 = similar(x3_2)
obj_x3_2, grad_x3_2 = NLPModels.objgrad!(nlp, x3_2, grad_x3_2)
@test typeof(obj_x3_2) == Float16
@test eltype(grad_x3_2) == Float16

# change to Float64 with grad!
x3_3 = Float64.(x1)
grad_x3_3 = similar(x3_3)
grad_x3_3 = grad!(nlp, x3_3, grad_x3_3)
@test eltype(grad_x3_3) == Float64

# Construct model in Float16
train_data_f16, test_data_f16 = getdata(args, T = Float16)
DN_f16 = build_model() |> f16
nlp_f16 = FluxNLPModel(DN_f16, train_data_f16, test_data_f16)

x4 = copy(nlp_f16.w)
obj_x4 = obj(nlp_f16, x4)
grad_x4 = NLPModels.grad(nlp_f16, x4)

@test typeof(obj_x4) == Float16
@test eltype(grad_x4) == Float16

# change to Float32 from Float16
x5 = Float32.(x4)
obj_x5 = obj(nlp_f16, x5)
grad_x5 = NLPModels.grad(nlp_f16, x5)
@test typeof(obj_x5) == Float32
@test eltype(grad_x5) == Float32

# change to Float64 from Float16
x6 = Float64.(x4)
obj_x6 = obj(nlp_f16, x6)
grad_x6 = NLPModels.grad(nlp_f16, x6)
@test typeof(obj_x6) == Float64
@test eltype(grad_x6) == Float64

# change to Float32 from Float128
# expected to throw an error
# Note we do not support BigFloat in FluxNLPModels yet!
x7 = BigFloat.(x5)
@test_throws Exception obj(nlp_f16, x7)

end

0 comments on commit ef8af7b

Please sign in to comment.