diff --git a/Project.toml b/Project.toml index 5d8275f..83ea529 100644 --- a/Project.toml +++ b/Project.toml @@ -10,9 +10,11 @@ NLPModels = "a4795742-8479-5a88-8948-cc11e1c8c1a6" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [compat] +Flux = "0.13.16" MLDatasets = "0.7.9" NLPModels = "0.20.0" -julia = "^1.9.0" +Zygote = "0.6.49" +julia = "^1.6.0" [extras] CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" @@ -20,4 +22,4 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["LinearAlgebra", "CUDA", "Test"] +test = ["LinearAlgebra","CUDA" ,"Test"] diff --git a/src/FluxNLPModels.jl b/src/FluxNLPModels.jl index bd360d4..9ffe11c 100644 --- a/src/FluxNLPModels.jl +++ b/src/FluxNLPModels.jl @@ -5,7 +5,7 @@ using Flux: onehotbatch, onecold, @epochs export AbstractFluxNLPModel, FluxNLPModel export reset_minibatch_train!, reset_minibatch_test! export minibatch_next_train!, minibatch_next_test! -export accuracy, set_vars!, local_loss, update_type! +export accuracy, set_vars!, local_loss abstract type AbstractFluxNLPModel{T, S} <: AbstractNLPModel{T, S} end @@ -58,7 +58,7 @@ function FluxNLPModel( current_training_minibatch = [], current_test_minibatch = [], size_minibatch::Int = 100, - loss_f::F = Flux.crossentropy,#Flux.mse, # + loss_f::F = Flux.mse, #Flux.crossentropy, ) where {T <: Chain, F <: Function} x0, rebuild = Flux.destructure(chain_ANN) n = length(x0) diff --git a/src/FluxNLPModels_methods.jl b/src/FluxNLPModels_methods.jl index 60f9625..9d50597 100644 --- a/src/FluxNLPModels_methods.jl +++ b/src/FluxNLPModels_methods.jl @@ -1,28 +1,20 @@ """ f = obj(nlp, w) -Evaluate `f(w)`, the objective function of `nlp` at `w`. if `w` and `nlp` precision different, we advance to match the the type of `w` +Evaluate `f(w)`, the objective function of `nlp` at `w`. # Arguments - `nlp::AbstractFluxNLPModel{T, S}`: the FluxNLPModel data struct; -- `w::AbstractVector{V}`: is the vector of weights/variables. The reason for V here is to allow different precision type for weight and models +- `w::AbstractVector{T}`: is the vector of weights/variables. # Output - `f_w`: the new objective function. """ -function NLPModels.obj(nlp::AbstractFluxNLPModel{T, S}, w::AbstractVector{V}) where {T, V, S} - x, y = nlp.current_training_minibatch - - if (T != V) # we check if the types are the same, - update_type!(nlp, w) - if eltype(x) != V #TODO check if the user have changed the typed ? - x = V.(x) - end - end - - set_vars!(nlp, w) +function NLPModels.obj(nlp::AbstractFluxNLPModel{T, S}, w::AbstractVector{T}) where {T, S} increment!(nlp, :neval_obj) + set_vars!(nlp, w) + x, y = nlp.current_training_minibatch return nlp.loss_f(nlp.chain(x), y) end @@ -42,21 +34,12 @@ Evaluate `∇f(w)`, the gradient of the objective function at `w` in place. """ function NLPModels.grad!( nlp::AbstractFluxNLPModel{T, S}, - w::AbstractVector{V}, + w::AbstractVector{T}, g::AbstractVector{T}, -) where {T, V, S} +) where {T, S} @lencheck nlp.meta.nvar w g - x, y = nlp.current_training_minibatch - - if (T != V) # we check if the types are the same, - update_type!(nlp, w) - g = V.(g) - if eltype(x) != V #TODO check if the user have changed the typed ? - x = V.(x) - end - end - increment!(nlp, :neval_grad) + x, y = nlp.current_training_minibatch g .= gradient(w_g -> local_loss(nlp, x, y, w_g), w)[1] return g end @@ -77,19 +60,10 @@ Evaluate both `f(w)`, the objective function of `nlp` at `w`, and `∇f(w)`, the """ function NLPModels.objgrad!( nlp::AbstractFluxNLPModel{T, S}, - w::AbstractVector{V}, + w::AbstractVector{T}, g::AbstractVector{T}, -) where {T,V, S} +) where {T, S} @lencheck nlp.meta.nvar w g - - if (T != V) # we check if the types are the same, - update_type!(nlp, w) - g = V.(g) - if eltype(x) != V #TODO check if the user have changed the typed ? - x = V.(x) - end - end - increment!(nlp, :neval_obj) increment!(nlp, :neval_grad) set_vars!(nlp, w) diff --git a/src/utils.jl b/src/utils.jl index 3979175..71ee317 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,25 +1,3 @@ -""" - update_type!(nlp::AbstractFluxNLPModel{T, S}, w::AbstractVector{V}) where {T,V, S} - - -Sets the vaiables and rebuild the chain to a sepecific type defined by weigths -""" -function update_type!(nlp::AbstractFluxNLPModel{T, S}, w::AbstractVector{V}) where {T, V, S} - if V == Float16 - Local_chain = f16(nlp.chain) - elseif V == Float64 - Local_chain = f64(nlp.chain) - elseif V == Float32 - Local_chain = f32(nlp.chain) - else - error("The package only support Float16, Float32 and Float64") - end - - # this is same for all the cases - nlp.chain = Local_chain - -, nlp.rebuild = Flux.destructure(nlp.chain) -end - """ set_vars!(model::AbstractFluxNLPModel{T,S}, new_w::AbstractVector{T}) where {T<:Number, S} diff --git a/test/runtests.jl b/test/runtests.jl index 23f8131..bd0793b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -9,13 +9,13 @@ using MLDatasets using LinearAlgebra # Helper functions -function getdata(args; T = Float32) +function getdata(args) ENV["DATADEPS_ALWAYS_ACCEPT"] = "true" # download datasets without having to manually confirm the download # Loading Dataset - xtrain, ytrain = MLDatasets.MNIST(Tx = T, split = :train)[:] - xtest, ytest = MLDatasets.MNIST(Tx = T, split = :test)[:] + xtrain, ytrain = MLDatasets.MNIST(Tx = Float32, split = :train)[:] + xtest, ytest = MLDatasets.MNIST(Tx = Float32, split = :test)[:] # Reshape Data in order to flatten each image into a linear array xtrain = Flux.flatten(xtrain) @@ -32,7 +32,7 @@ function getdata(args; T = Float32) end function build_model(; imgsize = (28, 28, 1), nclasses = 10) - return Flux.Chain(Dense(prod(imgsize), 32, relu), Dense(32, nclasses), softmax) + return Flux.Chain(Dense(prod(imgsize), 32, relu), Dense(32, nclasses)) end @kwdef mutable struct Args @@ -46,126 +46,70 @@ args = Args() # collect options in a struct for convenience device = cpu -# @testset "FluxNLPModels tests" begin +@testset "FluxNLPModels tests" begin -# # Create test and train dataloaders -# train_data, test_data = getdata(args) - -# # Construct model -# DN = build_model() |> device -# DNNLPModel = FluxNLPModel(DN, train_data, test_data) - -# old_w, rebuild = Flux.destructure(DN) - -# x1 = copy(DNNLPModel.w) + # Create test and train dataloaders + train_data, test_data = getdata(args) -# obj_x1 = obj(DNNLPModel, x1) -# grad_x1 = NLPModels.grad(DNNLPModel, x1) + # Construct model + DN = build_model() |> device + DNNLPModel = FluxNLPModel(DN, train_data, test_data) -# grad_x1_2 = similar(x1) -# obj_x1_2, grad_x1_2 = NLPModels.objgrad!(DNNLPModel, x1, grad_x1_2) + old_w, rebuild = Flux.destructure(DN) -# @test DNNLPModel.w == old_w -# @test obj_x1 == obj_x1_2 -# # println(norm(grad_x1 - grad_x1_2)) -# @test norm(grad_x1 - grad_x1_2) ≈ 0.0 + x1 = copy(DNNLPModel.w) -# @test x1 == DNNLPModel.w -# @test Flux.params(DNNLPModel.chain)[1][1] == x1[1] -# @test Flux.params(DNNLPModel.chain)[1][2] == x1[2] + obj_x1 = obj(DNNLPModel, x1) + grad_x1 = NLPModels.grad(DNNLPModel, x1) -# @test_throws Exception FluxNLPModel(DN, [], test_data) # if the train data is empty -# @test_throws Exception FluxNLPModel(DN, train_data, []) # if the test data is empty -# @test_throws Exception FluxNLPModel(DN, [], []) # if the both data is empty + grad_x1_2 = similar(x1) + obj_x1_2, grad_x1_2 = NLPModels.objgrad!(DNNLPModel, x1, grad_x1_2) -# # Testing if the value of the first batch was passed it -# DNNLPModel_2 = FluxNLPModel( -# DN, -# train_data, -# test_data, -# current_training_minibatch = first(train_data), -# current_test_minibatch = first(test_data), -# ) + @test DNNLPModel.w == old_w + @test obj_x1 == obj_x1_2 + println(norm(grad_x1 - grad_x1_2)) + @test norm(grad_x1 - grad_x1_2) ≈ 0.0 -# #checking if we can call accuracy -# train_acc = FluxNLPModels.accuracy(DNNLPModel_2; data_loader = train_data) # accuracy on train data -# test_acc = FluxNLPModels.accuracy(DNNLPModel_2) # on the test data + @test x1 == DNNLPModel.w + @test Flux.params(DNNLPModel.chain)[1][1] == x1[1] + @test Flux.params(DNNLPModel.chain)[1][2] == x1[2] -# @test train_acc >= 0.0 -# @test train_acc <= 1.0 -# end + @test_throws Exception FluxNLPModel(DN, [], test_data) # if the train data is empty + @test_throws Exception FluxNLPModel(DN, train_data, []) # if the test data is empty + @test_throws Exception FluxNLPModel(DN, [], []) # if the both data is empty -# @testset "minibatch tests" begin -# # Create test and train dataloaders -# train_data, test_data = getdata(args) + # Testing if the value of the first batch was passed it + DNNLPModel_2 = FluxNLPModel( + DN, + train_data, + test_data, + current_training_minibatch = first(train_data), + current_test_minibatch = first(test_data), + ) -# # Construct model -# DN = build_model() |> device -# nlp = FluxNLPModel(DN, train_data, test_data) -# reset_minibatch_train!(nlp) -# @test nlp.current_training_minibatch_status === nothing -# buffer_minibatch = deepcopy(nlp.current_training_minibatch) -# @test minibatch_next_train!(nlp) # should return true -# @test minibatch_next_train!(nlp) # should return true -# @test !isequal(nlp.current_training_minibatch, buffer_minibatch) + #checking if we can call accuracy + train_acc = FluxNLPModels.accuracy(DNNLPModel_2; data_loader = train_data) # accuracy on train data + test_acc = FluxNLPModels.accuracy(DNNLPModel_2) # on the test data -# reset_minibatch_test!(nlp) -# @test minibatch_next_test!(nlp) # should return true -# @test minibatch_next_test!(nlp) # should return true -# end + @test train_acc >= 0.0 + @test train_acc <= 1.0 +end -@testset "Multiple precision test" begin +@testset "minibatch tests" begin # Create test and train dataloaders train_data, test_data = getdata(args) - # Construct model in Float32 + # Construct model DN = build_model() |> device nlp = FluxNLPModel(DN, train_data, test_data) - - - x1 = copy(nlp.w) - obj_x1 = obj(nlp, x1) - grad_x1 = NLPModels.grad(nlp, x1) - @test typeof(obj_x1) == Float32 - @test eltype(grad_x1) == Float32 - - # change to Float16 - x2 = Float16.(x1) - obj_x2 = obj(nlp, x2) - grad_x2 = NLPModels.grad(nlp, x2) - @test typeof(obj_x2) == Float16 - @test eltype(grad_x2) == Float16 - - # # change to Float64 - # x3 = Float64.(x1) - # obj_x3 = obj(nlp, x3) - # grad_x3 = NLPModels.grad(nlp, x3) - # @test typeof(obj_x3) == Float64 - # @test eltype(grad_x3) == Float64 - - # # Construct model in Float16 - # train_data_f16, test_data_f16 = getdata(args, T = Float16) - # DN_f16 = build_model() |> f16 - # nlp_f16 = FluxNLPModel(DN_f16, train_data_f16, test_data_f16) - - # x4 = copy(nlp_f16.w) - # obj_x4 = obj(nlp_f16, x4) - # grad_x4 = NLPModels.grad(nlp_f16, x4) - - # @test typeof(obj_x4) == Float16 - # @test eltype(grad_x4) == Float16 - - # # change to Float32 from Float16 - # x5 = Float32.(x4) - # obj_x5 = obj(nlp_f16, x5) - # grad_x5 = NLPModels.grad(nlp_f16, x5) - # @test typeof(obj_x5) == Float32 - # @test eltype(grad_x5) == Float32 - - # # change to Float64 from Float16 - # x6 = Float64.(x4) - # obj_x6 = obj(nlp_f16, x6) - # grad_x6 = NLPModels.grad(nlp_f16, x6) - # @test typeof(obj_x6) == Float64 - # @test eltype(grad_x6) == Float64 + reset_minibatch_train!(nlp) + @test nlp.current_training_minibatch_status === nothing + buffer_minibatch = deepcopy(nlp.current_training_minibatch) + @test minibatch_next_train!(nlp) # should return true + @test minibatch_next_train!(nlp) # should return true + @test !isequal(nlp.current_training_minibatch, buffer_minibatch) + + reset_minibatch_test!(nlp) + @test minibatch_next_test!(nlp) # should return true + @test minibatch_next_test!(nlp) # should return true end