Merge pull request #25 from Farhad-phd/main

#15 and #4 is addressed here
JuliaSmoothOptimizers · Nov 23, 2023 · ef8af7b · ef8af7b
2 parents c91b63c + 716304d
commit ef8af7b
Show file tree

Hide file tree

Showing 5 changed files with 164 additions and 34 deletions.
diff --git a/Project.toml b/Project.toml
@@ -22,4 +22,4 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["LinearAlgebra", "CUDA", "Test"]
+test = ["LinearAlgebra", "CUDA", "Test"]
diff --git a/src/FluxNLPModels.jl b/src/FluxNLPModels.jl
@@ -5,12 +5,12 @@ using Flux: onehotbatch, onecold
 export AbstractFluxNLPModel, FluxNLPModel
 export reset_minibatch_train!, reset_minibatch_test!
 export minibatch_next_train!, minibatch_next_test!
-export accuracy, set_vars!, local_loss
+export accuracy, set_vars!, local_loss, update_type!
 
 abstract type AbstractFluxNLPModel{T, S} <: AbstractNLPModel{T, S} end
 
 """ 
-    FluxNLPModel{T, S, C <: Flux.Chain} <: AbstractNLPModel{T, S}
+    FluxNLPModel{T, S, C } <: AbstractNLPModel{T, S}
 
 Data structure that makes the interfaces between neural networks defined with [Flux.jl](https://fluxml.ai/) and [NLPModels](https://github.com/JuliaSmoothOptimizers/NLPModels.jl).
 A FluxNLPModel has fields
@@ -27,9 +27,9 @@ A FluxNLPModel has fields
 - `current_minibatch_test` is the current test minibatch, it is not used in practice;
 - `w` is the vector of weights/variables;
 """
-mutable struct FluxNLPModel{T, S, C <: Chain, F <: Function} <: AbstractFluxNLPModel{T, S}
+mutable struct FluxNLPModel{T, S, F <: Function} <: AbstractFluxNLPModel{T, S}
   meta::NLPModelMeta{T, S}
-  chain::C
+  chain
   counters::Counters
   loss_f::F
   size_minibatch::Int
@@ -40,7 +40,7 @@ mutable struct FluxNLPModel{T, S, C <: Chain, F <: Function} <: AbstractFluxNLPM
   rebuild # this is used to create the rebuild of flat function 
   current_training_minibatch_status
   current_test_minibatch_status
-  w::S
+  w
 end
 
 """
@@ -52,14 +52,14 @@ The other data required are: an iterator over the training dataset `data_train`,
 Suppose `(xtrn,ytrn) = Fluxnlp.data_train`
 """
 function FluxNLPModel(
-  chain_ANN::T,
+  chain_ANN,
   data_train,
   data_test;
   current_training_minibatch = [],
   current_test_minibatch = [],
   size_minibatch::Int = 100,
   loss_f::F = Flux.mse, #Flux.crossentropy,
-) where {T <: Chain, F <: Function}
+) where {F <: Function}
   x0, rebuild = Flux.destructure(chain_ANN)
   n = length(x0)
   meta = NLPModelMeta(n, x0 = x0)

diff --git a/src/FluxNLPModels_methods.jl b/src/FluxNLPModels_methods.jl
@@ -1,20 +1,25 @@
 """
     f = obj(nlp, w)
 
-Evaluate `f(w)`, the objective function of `nlp` at `w`.
-
+    Evaluate the objective function f(w) of the non-linear programming (NLP) problem at the point w. 
+    If the precision of w and the precision expected by the nlp are different, ensure that the type of nlp.w matches the precision required by w.
 # Arguments
 - `nlp::AbstractFluxNLPModel{T, S}`: the FluxNLPModel data struct;
-- `w::AbstractVector{T}`: is the vector of weights/variables.
-
+- `w::AbstractVector{V}`: is the vector of weights/variables. The use of `V` allows for flexibility in specifying different precision types for weights and models.
 # Output
 - `f_w`: the new objective function.
 
 """
-function NLPModels.obj(nlp::AbstractFluxNLPModel{T, S}, w::AbstractVector{T}) where {T, S}
-  increment!(nlp, :neval_obj)
-  set_vars!(nlp, w)
+function NLPModels.obj(nlp::AbstractFluxNLPModel{T, S}, w::AbstractVector{V}) where {T, S, V}
   x, y = nlp.current_training_minibatch
+
+  eltype(nlp.w) == V || update_type!(nlp, w) #Check if the type has changed 
+  if eltype(x) != V
+    x = V.(x)
+  end
+
+  set_vars!(nlp, w)
+  increment!(nlp, :neval_obj)
   return nlp.loss_f(nlp.chain(x), y)
 end
 
@@ -25,21 +30,30 @@ Evaluate `∇f(w)`, the gradient of the objective function at `w` in place.
 
 # Arguments
 - `nlp::AbstractFluxNLPModel{T, S}`: the FluxNLPModel data struct;
-- `w::AbstractVector{T}`: is the vector of weights/variables;
-- `g::AbstractVector{T}`: the gradient vector.
+- `w::AbstractVector{V}`: is the vector of weights/variables. The use of `V` allows for flexibility in specifying different precision types for weights and models.
+- `g::AbstractVector{}`: the gradient vector.
 
 # Output
 - `g`: the gradient at point `w`.
 
 """
 function NLPModels.grad!(
   nlp::AbstractFluxNLPModel{T, S},
-  w::AbstractVector{T},
-  g::AbstractVector{T},
-) where {T, S}
+  w::AbstractVector{V},
+  g::AbstractVector{U},
+) where {T, S, V,U}
   @lencheck nlp.meta.nvar w g
-  increment!(nlp, :neval_grad)
   x, y = nlp.current_training_minibatch
+
+  if (eltype(nlp.w) != V)  # we check if the types are the same, 
+    update_type!(nlp, w)
+    g = V.(g)
+    if eltype(x) != V
+      x = V.(x)
+    end
+  end
+
+  increment!(nlp, :neval_grad)
   g .= gradient(w_g -> local_loss(nlp, x, y, w_g), w)[1]
   return g
 end
@@ -51,24 +65,33 @@ Evaluate both `f(w)`, the objective function of `nlp` at `w`, and `∇f(w)`, the
 
 # Arguments
 - `nlp::AbstractFluxNLPModel{T, S}`: the FluxNLPModel data struct;
-- `w::AbstractVector{T}`: is the vector of weights/variables;
-- `g::AbstractVector{T}`: the gradient vector.
+- `w::AbstractVector{V}`: is the vector of weights/variables. The use of `V` allows for flexibility in specifying different precision types for weights and models.
+- `g::AbstractVector{V}`: the gradient vector.
 
 # Output
 - `f_w`, `g`: the new objective function, and the gradient at point w.
 
 """
 function NLPModels.objgrad!(
   nlp::AbstractFluxNLPModel{T, S},
-  w::AbstractVector{T},
-  g::AbstractVector{T},
-) where {T, S}
+  w::AbstractVector{V},
+  g::AbstractVector{U},
+  ) where {T, S, V,U}
   @lencheck nlp.meta.nvar w g
+  x, y = nlp.current_training_minibatch
+
+  if (eltype(nlp.w) != V)  # we check if the types are the same, 
+    update_type!(nlp, w)
+    g = V.(g)
+    if eltype(x) != V
+      x = V.(x)
+    end
+  end
+
   increment!(nlp, :neval_obj)
   increment!(nlp, :neval_grad)
   set_vars!(nlp, w)
 
-  x, y = nlp.current_training_minibatch
   f_w = nlp.loss_f(nlp.chain(x), y)
   g .= gradient(w_g -> local_loss(nlp, x, y, w_g), w)[1]
 

diff --git a/src/utils.jl b/src/utils.jl
@@ -1,14 +1,45 @@
+"""
+    update_type!(nlp::AbstractFluxNLPModel{T, S}, w::AbstractVector{V}) where {T, V, S}
+
+Sets the variables and rebuild the chain to a specific type defined by weights.
+"""
+function update_type!(nlp::AbstractFluxNLPModel{T, S}, w::AbstractVector{V}) where {T, V, S}
+  nlp.chain = update_type(nlp.chain, V)
+  nlp.w, nlp.rebuild = Flux.destructure(nlp.chain)
+end
+
+# Define a separate method for updating the type of the chain
+function update_type(chain::Chain, ::Type{Float16})
+  return f16(chain)
+end
+
+function update_type(chain::Chain, ::Type{Float32})
+  return f32(chain)
+end
+
+function update_type(chain::Chain, ::Type{Float64})
+  return f64(chain)
+end
+
+# Throw an error for unsupported types
+function update_type(chain::Chain, ::Type)
+  error("The package only supports Float16, Float32, and Float64")
+end
+
 """
     set_vars!(model::AbstractFluxNLPModel{T,S}, new_w::AbstractVector{T}) where {T<:Number, S}
 
 Sets the vaiables and rebuild the chain
 """
-function set_vars!(nlp::AbstractFluxNLPModel{T, S}, new_w::AbstractVector{T}) where {T <: Number, S}
+function set_vars!(
+  nlp::AbstractFluxNLPModel{T, S},
+  new_w::AbstractVector{V},
+) where {T <: Number, S, V}
   nlp.w .= new_w
   nlp.chain = nlp.rebuild(nlp.w)
 end
 
-function local_loss(nlp::AbstractFluxNLPModel{T, S}, x, y, w::AbstractVector{T}) where {T, S}
+function local_loss(nlp::AbstractFluxNLPModel{T, S}, x, y, w::AbstractVector{V}) where {T, S, V}
   # increment!(nlp, :neval_obj) #TODO not sure 
   nlp.chain = nlp.rebuild(w)
   return nlp.loss_f(nlp.chain(x), y)

diff --git a/test/runtests.jl b/test/runtests.jl
@@ -9,13 +9,13 @@ using MLDatasets
 using LinearAlgebra
 
 # Helper functions
-function getdata(args)
+function getdata(args; T = Float32)
   ENV["DATADEPS_ALWAYS_ACCEPT"] = "true" # download datasets without having to manually confirm the download
 
   # Loading Dataset	
 
-  xtrain, ytrain = MLDatasets.MNIST(Tx = Float32, split = :train)[:]
-  xtest, ytest = MLDatasets.MNIST(Tx = Float32, split = :test)[:]
+  xtrain, ytrain = MLDatasets.MNIST(Tx = T, split = :train)[:]
+  xtest, ytest = MLDatasets.MNIST(Tx = T, split = :test)[:]
 
   # Reshape Data in order to flatten each image into a linear array
   xtrain = Flux.flatten(xtrain)
@@ -32,7 +32,7 @@ function getdata(args)
 end
 
 function build_model(; imgsize = (28, 28, 1), nclasses = 10)
-  return Flux.Chain(Dense(prod(imgsize), 32, relu), Dense(32, nclasses))
+  return Flux.Chain(Dense(prod(imgsize), 32, relu), Dense(32, nclasses), softmax)
 end
 
 @kwdef mutable struct Args
@@ -67,7 +67,6 @@ device = cpu
 
   @test DNNLPModel.w == old_w
   @test obj_x1 == obj_x1_2
-  println(norm(grad_x1 - grad_x1_2))
   @test norm(grad_x1 - grad_x1_2) ≈ 0.0
 
   @test x1 == DNNLPModel.w
@@ -113,3 +112,80 @@ end
   @test minibatch_next_test!(nlp) # should return true 
   @test minibatch_next_test!(nlp) # should return true 
 end
+
+@testset "Multiple precision test" begin
+  # Create test and train dataloaders
+  train_data, test_data = getdata(args)
+
+  # Construct model in Float32
+  DN = build_model() |> device
+  nlp = FluxNLPModel(DN, train_data, test_data)
+
+  x1 = copy(nlp.w)
+  obj_x1 = obj(nlp, x1)
+  grad_x1 = NLPModels.grad(nlp, x1)
+  @test typeof(obj_x1) == Float32
+  @test eltype(grad_x1) == Float32
+
+  # change to Float16 
+  x2 = Float16.(x1)
+  obj_x2 = obj(nlp, x2)
+  grad_x2 = NLPModels.grad(nlp, x2)
+  # T test grad again after changing the type, using grad! method
+  grad!(nlp, x2, grad_x2)
+  @test typeof(obj_x2) == Float16
+  @test eltype(grad_x2) == Float16
+
+  # change to Float64
+  x3 = Float64.(x1)
+  obj_x3 = obj(nlp, x3)
+  grad_x3 = NLPModels.grad(nlp, x3)
+  @test typeof(obj_x3) == Float64
+  @test eltype(grad_x3) == Float64
+
+  # change to Float16 with objgrad!
+  x3_2 = Float16.(x1)
+  grad_x3_2 = similar(x3_2)
+  obj_x3_2, grad_x3_2 = NLPModels.objgrad!(nlp, x3_2, grad_x3_2)
+  @test typeof(obj_x3_2) == Float16
+  @test eltype(grad_x3_2) == Float16
+
+  # change to Float64 with grad!
+  x3_3 = Float64.(x1)
+  grad_x3_3 = similar(x3_3)
+  grad_x3_3 = grad!(nlp, x3_3, grad_x3_3)
+  @test eltype(grad_x3_3) == Float64
+
+  # Construct model in Float16
+  train_data_f16, test_data_f16 = getdata(args, T = Float16)
+  DN_f16 = build_model() |> f16
+  nlp_f16 = FluxNLPModel(DN_f16, train_data_f16, test_data_f16)
+
+  x4 = copy(nlp_f16.w)
+  obj_x4 = obj(nlp_f16, x4)
+  grad_x4 = NLPModels.grad(nlp_f16, x4)
+
+  @test typeof(obj_x4) == Float16
+  @test eltype(grad_x4) == Float16
+
+  # change to Float32 from Float16 
+  x5 = Float32.(x4)
+  obj_x5 = obj(nlp_f16, x5)
+  grad_x5 = NLPModels.grad(nlp_f16, x5)
+  @test typeof(obj_x5) == Float32
+  @test eltype(grad_x5) == Float32
+
+  # change to Float64 from Float16 
+  x6 = Float64.(x4)
+  obj_x6 = obj(nlp_f16, x6)
+  grad_x6 = NLPModels.grad(nlp_f16, x6)
+  @test typeof(obj_x6) == Float64
+  @test eltype(grad_x6) == Float64
+
+  # change to Float32 from Float128
+  # expected to throw an error
+  # Note we do not support BigFloat in FluxNLPModels yet!
+  x7 = BigFloat.(x5)
+  @test_throws Exception obj(nlp_f16, x7)
+
+end