Using the Light Interface (#51)

JuliaAI · Feb 7, 2020 · 4b5a703 · tlienart · Feb 7, 2020 · 4b5a703
1 parent f199300
commit 4b5a703
Show file tree

Hide file tree

Showing 18 changed files with 210 additions and 154 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,35 +1,36 @@
 name = "MLJLinearModels"
 uuid = "6ee0df7b-362f-4a72-a706-9e79364fb692"
 authors = ["Thibaut Lienart <[email protected]>"]
-version = "0.2.4"
+version = "0.3.0"
 
 [deps]
 DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
 IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LinearMaps = "7a12625a-238d-50fd-b39a-03d52299707e"
-MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
+MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea"
 Optim = "429524aa-4258-5aef-a3af-852621145aeb"
 Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
 Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
 
 [compat]
 DocStringExtensions = "^0.8"
 IterativeSolvers = "^0.8"
-LinearMaps = "^2.5"
-MLJBase = "^0.9"
-Optim = "^0.19"
+LinearMaps = "^2.6"
+MLJModelInterface = "^0.1"
+Optim = "^0.20"
 Parameters = "^0.12"
 Tables = "^0.2"
-julia = "^1.0.0"
+julia = "^1"
 
 [extras]
 DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
+MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
 PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0"
 RCall = "6f49c342-dc21-5d91-9882-a32aef131414"
 RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["DelimitedFiles", "PyCall", "Test", "Random", "RDatasets", "RCall"]
+test = ["DelimitedFiles", "PyCall", "Test", "Random", "RDatasets", "RCall", "MLJBase"]
diff --git a/src/MLJLinearModels.jl b/src/MLJLinearModels.jl
@@ -6,12 +6,12 @@ import LinearMaps: LinearMap
 import IterativeSolvers: cg
 import Optim
 
-import MLJBase
+import MLJModelInterface
 
 import Base.+, Base.-, Base.*, Base./, Base.convert
 
-const AVR = AbstractVector{<:Real}
-
+const MMI       = MLJModelInterface
+const AVR       = AbstractVector{<:Real}
 const Option{T} = Union{Nothing,T}
 
 include("scratchspace.jl")

diff --git a/src/fit/analytical.jl b/src/fit/analytical.jl
@@ -3,14 +3,17 @@
 """
 $SIGNATURES
 
-Fit a least square regression either with no penalty (OLS) or with a L2 penalty (Ridge).
+Fit a least square regression either with no penalty (OLS) or with a L2 penalty
+(Ridge).
 
 ## Complexity
 
 Assuming `n` dominates `p`,
 
-* non-iterative (full solve):     O(np²) - dominated by the construction of the Hessian X'X.
-* iterative (conjugate gradient): O(κnp) - with κ the number of CG steps (κ ≤ p).
+* non-iterative (full solve):     O(np²) - dominated by the construction of the
+								  Hessian X'X.
+* iterative (conjugate gradient): O(κnp) - with κ the number of CG steps
+							 	  (κ ≤ p).
 """
 function _fit(glr::GLR{L2Loss,<:L2R}, solver::Analytical, X, y)
 	# full solve
@@ -34,7 +37,8 @@ function _fit(glr::GLR{L2Loss,<:L2R}, solver::Analytical, X, y)
 	p = size(X, 2) + Int(glr.fit_intercept)
 	max_cg_steps = min(solver.max_inner, p)
 	# Form the Hessian map, cost of application H*v is O(np)
-	Hm = LinearMap(Hv!(glr, X, y), p; ismutating=true, isposdef=true, issymmetric=true)
+	Hm = LinearMap(Hv!(glr, X, y), p;
+				   ismutating=true, isposdef=true, issymmetric=true)
 	b  = X'y
 	glr.fit_intercept && (b = vcat(b, sum(y)))
 	return cg(Hm, b; maxiter=max_cg_steps)

diff --git a/src/fit/default.jl b/src/fit/default.jl
@@ -2,8 +2,9 @@ export fit
 
 # Default solvers
 
-# TODO: in the future, have cases where if the things are too big, take another default.
-# also should check if p > n in which case should do dual stuff (or other appropriate alternative)
+# TODO: in the future, have cases where if the things are too big, take another
+# default. Also should check if p > n in which case should do dual stuff (or
+# other appropriate alternative)
 
 # Linear, Ridge
 _solver(::GLR{L2Loss,<:L2R}, np::NTuple{2,Int}) = Analytical()
@@ -21,8 +22,8 @@ end
 # Robust, Quantile
 _solver(::GLR{<:RobustLoss,<:L2R}, np::NTuple{2,Int}) = LBFGS()
 
-# Fallback NOTE: should revisit bc with non-smooth, wouldn't work probably PGD/PSGD
-# depending on how much data there is
+# Fallback NOTE: should revisit bc with non-smooth, wouldn't work probably
+# PGD/PSGD depending on how much data there is
 _solver(::GLR, np::NTuple{2,Int}) = @error "Not yet implemented."
 
 

diff --git a/src/fit/iwls.jl b/src/fit/iwls.jl
@@ -16,7 +16,8 @@ function _fit(glr::GLR{RobustLoss{ρ},<:L2R}, solver::IWLSCG, X, y) where {ρ}
         # update the weights and retrieve the application function
         # Mθv! corresponds to the current application of (X'WX + λI) on v
         Mθv! = _Mv!(ω, θ)
-        Mm  = LinearMap(Mθv!, p; ismutating=true, isposdef=true, issymmetric=true)
+        Mm  = LinearMap(Mθv!, p;
+                        ismutating=true, isposdef=true, issymmetric=true)
         Wy  = ω .* y
         b   = X'Wy
         if glr.fit_intercept
@@ -30,6 +31,7 @@ function _fit(glr::GLR{RobustLoss{ρ},<:L2R}, solver::IWLSCG, X, y) where {ρ}
         copyto!(θ_, θ)
         k  += 1
     end
-    tol ≤ solver.tol || @warn "IWLS did not converge in $(solver.max_iter) iterations."
+    tol ≤ solver.tol ||
+        @warn "IWLS did not converge in $(solver.max_iter) iterations."
     return θ
 end
diff --git a/src/fit/newton.jl b/src/fit/newton.jl
@@ -9,10 +9,11 @@ Fit a GLR using Newton's method.
 
 ## Complexity
 
-Assuming `n` dominates `p`, O(κnp²), dominated by the construction of the Hessian at each step with
-κ the number of Newton steps.
+Assuming `n` dominates `p`, O(κnp²), dominated by the construction of the
+Hessian at each step with κ the number of Newton steps.
 """
-function _fit(glr::GLR{<:Union{LogisticLoss,RobustLoss},<:L2R}, solver::Newton, X, y)
+function _fit(glr::GLR{<:Union{LogisticLoss,RobustLoss},<:L2R},
+              solver::Newton, X, y)
     p     = size(X, 2) + Int(glr.fit_intercept)
     θ₀    = zeros(p)
     _fgh! = fgh!(glr, X, y)
@@ -24,20 +25,21 @@ end
 """
 $SIGNATURES
 
-Fit a GLR using Newton's method combined with an iterative solver  (conjugate gradient) to solve
-the Newton steps (∇²f)⁻¹∇f.
+Fit a GLR using Newton's method combined with an iterative solver  (conjugate
+gradient) to solve the Newton steps (∇²f)⁻¹∇f.
 
 ## Complexity
 
-Assuming `n` dominates `p`, O(κ₁κ₂np), dominated by the application of the Hessian at each step
-where κ₁ is the number of Newton steps and κ₂ is the average number of CG steps per Newton step
-(which is at most p).
+Assuming `n` dominates `p`, O(κ₁κ₂np), dominated by the application of the
+Hessian at each step where κ₁ is the number of Newton steps and κ₂ is the
+average number of CG steps per Newton step (which is at most p).
 """
-function _fit(glr::GLR{<:Union{LogisticLoss,RobustLoss},<:L2R}, solver::NewtonCG, X, y)
+function _fit(glr::GLR{<:Union{LogisticLoss,RobustLoss},<:L2R},
+              solver::NewtonCG, X, y)
     p    = size(X, 2) + Int(glr.fit_intercept)
     θ₀   = zeros(p)
     _f   = objective(glr, X, y)
-    _fg! = (g, θ) -> fgh!(glr, X, y)(0.0, g, nothing, θ) # XXX: Optim.jl/issues/738
+    _fg! = (g, θ) -> fgh!(glr, X, y)(0.0, g, nothing, θ) # Optim.jl/issues/738
     _Hv! = Hv!(glr, X, y)
     opt  = Optim.TwiceDifferentiableHV(_f, _fg!, _Hv!, θ₀)
     res  = Optim.optimize(opt, θ₀, Optim.KrylovTrustRegion())
@@ -51,10 +53,11 @@ Fit a GLR using LBFGS.
 
 ## Complexity
 
-Assuming `n` dominates `p`, O(κnp), dominated by the computation of the gradient at each step with
-κ the number of LBFGS steps.
+Assuming `n` dominates `p`, O(κnp), dominated by the computation of the
+gradient at each step with κ the number of LBFGS steps.
 """
-function _fit(glr::GLR{<:Union{LogisticLoss,RobustLoss},<:L2R}, solver::LBFGS, X, y)
+function _fit(glr::GLR{<:Union{LogisticLoss,RobustLoss},<:L2R},
+              solver::LBFGS, X, y)
     p    = size(X, 2) + Int(glr.fit_intercept)
     θ₀   = zeros(p)
     _fg! = (f, g, θ) -> fgh!(glr, X, y)(f, g, nothing, θ)
@@ -69,13 +72,15 @@ end
 """
 $SIGNATURES
 
-Fit a multiclass GLR using Newton's method with an iterative solver (conjugate gradient).
+Fit a multiclass GLR using Newton's method with an iterative solver (conjugate
+gradient).
 
 ## Complexity
 
-Assuming `n` dominates `p`, O(κ₁κ₂npc), where `c` is the number of classes. The computations are
-dominated by the application of the Hessian at each step with κ₁ the number of Newton steps and κ₂
-the average number of CG steps per Newton step.
+Assuming `n` dominates `p`, O(κ₁κ₂npc), where `c` is the number of classes. The
+computations are dominated by the application of the Hessian at each step with
+κ₁ the number of Newton steps and κ₂ the average number of CG steps per Newton
+step.
 """
 function _fit(glr::GLR{MultinomialLoss,<:L2R}, solver::NewtonCG, X, y)
     p    = size(X, 2) + Int(glr.fit_intercept)
@@ -96,8 +101,9 @@ Fit a multiclass GLR using LBFGS.
 
 ## Complexity
 
-Assuming `n` dominates `p`, O(κnpc), with `c` the number of classes, dominated by the computation
-of the gradient at each step with κ the number of LBFGS steps.
+Assuming `n` dominates `p`, O(κnpc), with `c` the number of classes, dominated
+by the computation of the gradient at each step with κ the number of LBFGS
+steps.
 """
 function _fit(glr::GLR{MultinomialLoss,<:L2R}, solver::LBFGS, X, y)
     p    = size(X, 2) + Int(glr.fit_intercept)

diff --git a/src/fit/proxgrad.jl b/src/fit/proxgrad.jl
@@ -22,7 +22,8 @@ function _fit(glr::GLR, solver::ProxGrad, X, y)
     _f      = smooth_objective(glr, X, y; c=c)
     _fg!    = smooth_fg!(glr, X, y)
     _prox!  = prox!(glr)
-    bt_cond = θ̂ -> _f(θ̂) > fθ̄ + dot(θ̂ .- θ̄, ∇fθ̄) + sum(abs2.(θ̂ .- θ̄))/(2η)
+    bt_cond = θ̂ ->
+                _f(θ̂) > fθ̄ + dot(θ̂ .- θ̄, ∇fθ̄) + sum(abs2.(θ̂ .- θ̄)) / (2η)
     # loop-related
     k, tol = 1, Inf
     while k ≤ solver.max_iter && tol > solver.tol
@@ -46,7 +47,8 @@ function _fit(glr::GLR, solver::ProxGrad, X, y)
             inner += 1
         end
         if inner == solver.max_inner
-            @warn "No appropriate stepsize found via backtracking; interrupting."
+            @warn "No appropriate stepsize found via backtracking; " *
+                  "interrupting."
             break
         end
         # update caches
@@ -59,6 +61,7 @@ function _fit(glr::GLR, solver::ProxGrad, X, y)
         # update niter
         k += 1
     end
-    tol ≤ solver.tol || @warn "Proximal GD did not converge in $(solver.max_iter) iterations."
+    tol ≤ solver.tol || @warn "Proximal GD did not converge in " *
+                              "$(solver.max_iter) iterations."
     return θ
 end
diff --git a/src/fit/solvers.jl b/src/fit/solvers.jl
@@ -38,7 +38,7 @@ struct LBFGS <: Solver end
 @with_kw struct ProxGrad <: Solver
     accel::Bool    = false # use Nesterov style acceleration (see also FISTA)
     max_iter::Int  = 1000  # max number of overall iterations
-    tol::Float64   = 1e-4  # tolerance over relative change of θ i.e. norm(θ-θ_)/norm(θ)
+    tol::Float64   = 1e-4  # tol relative change of θ i.e. norm(θ-θ_)/norm(θ)
     max_inner::Int = 100   # β^max_inner should be > 1e-10
     beta::Float64  = 0.8   # in (0, 1); shrinkage in the backtracking step
 end
@@ -53,7 +53,7 @@ ISTA(; kwa...)  = ProxGrad(;accel = false, kwa...)
     max_inner::Int     = 200
     tol::Float64       = 1e-4
     damping::Float64   = 1.0   # should be between 0 and 1, 1 = trust iterates
-    threshold::Float64 = 1e-6  # threshold for the residuals used for instance in quantile reg
+    threshold::Float64 = 1e-6  # thresh for residuals; used eg in quantile reg
 end
 
 # ===================== admm.jl

diff --git a/src/glr/constructors.jl b/src/glr/constructors.jl
@@ -6,14 +6,14 @@ export GeneralizedLinearRegression, GLR,
         RobustRegression, HuberRegression, QuantileRegression
 
 """
-GeneralizedLinearRegression{L<:Loss, P<:Penalty}
+    GeneralizedLinearRegression{L<:Loss, P<:Penalty}
 
 Generalized Linear Regression (GLR) model with objective function:
 
 ``L(y, Xθ) + P(θ)``
 
-where `L` is a loss function, `P` a penalty, `y` is the vector of observed response, `X` is
-the feature matrix and `θ` the vector of parameters.
+where `L` is a loss function, `P` a penalty, `y` is the vector of observed
+response, `X` is the feature matrix and `θ` the vector of parameters.
 
 Special cases include:
 
@@ -74,8 +74,10 @@ $SIGNATURES
 
 Objective function: ``|Xθ - y|₂²/2 + λ|θ|₂²/2 + γ|θ|₁``.
 """
-function ElasticNetRegression(λ::Real=1.0, γ::Real=1.0; lambda::Real=λ, gamma::Real=γ,
-                              fit_intercept::Bool=true, penalize_intercept::Bool=false)
+function ElasticNetRegression(λ::Real=1.0, γ::Real=1.0;
+                              lambda::Real=λ, gamma::Real=γ,
+                              fit_intercept::Bool=true,
+                              penalize_intercept::Bool=false)
     check_pos.((lambda, gamma))
     GLR(penalty=lambda*L2Penalty()+gamma*L1Penalty(),
         fit_intercept=fit_intercept,
@@ -108,10 +110,11 @@ end
 """
 $SIGNATURES
 
-Objective function: ``L(y, Xθ) + λ|θ|₂²/2 + γ|θ|₁`` where `L` is either the logistic loss in the
-binary case or the multinomial loss otherwise.
+Objective function: ``L(y, Xθ) + λ|θ|₂²/2 + γ|θ|₁`` where `L` is either the
+logistic loss in the binary case or the multinomial loss otherwise.
 """
-function LogisticRegression(λ::Real=1.0, γ::Real=0.0; lambda::Real=λ, gamma::Real=γ,
+function LogisticRegression(λ::Real=1.0, γ::Real=0.0;
+                            lambda::Real=λ, gamma::Real=γ,
                             penalty::Symbol=iszero(gamma) ? :l2 : :en,
                             multi_class::Bool=false, fit_intercept::Bool=true,
                             penalize_intercept::Bool=false)
@@ -131,11 +134,13 @@ MultinomialRegression(a...; kwa...) = LogisticRegression(a...; multi_class=true,
 """
 $SIGNATURES
 
-Objective function: ``∑ρ(Xθ - y) + λ|θ|₂² + γ|θ|₁`` where ρ is a given function on the residuals.
+Objective function: ``∑ρ(Xθ - y) + λ|θ|₂² + γ|θ|₁`` where ρ is a given function
+on the residuals.
 """
 function RobustRegression(ρ::RobustRho=HuberRho(0.1), λ::Real=1.0, γ::Real=0.0;
                           rho::RobustRho=ρ, lambda::Real=λ, gamma::Real=γ,
-                          penalty::Symbol=iszero(gamma) ? :l2 : :en, fit_intercept::Bool=true,
+                          penalty::Symbol=iszero(gamma) ? :l2 : :en,
+                          fit_intercept::Bool=true,
                           penalize_intercept::Bool=false)
     penalty = _l1l2en(lambda, gamma, penalty, "Robust regression")
     GLR(loss=RobustLoss(rho),
@@ -151,12 +156,14 @@ Huber Regression with objective:
 
 ``∑ρ(Xθ - y) + λ|θ|₂²/2 + γ|θ|₁``
 
-Where `ρ` is the Huber function `ρ(r) = r²/2``  if `|r|≤δ` and `ρ(r)=δ(|r|-δ/2)` otherwise.
+Where `ρ` is the Huber function `ρ(r) = r²/2``  if `|r|≤δ` and
+`ρ(r)=δ(|r|-δ/2)` otherwise.
 """
 function HuberRegression(δ::Real=0.5, λ::Real=1.0, γ::Real=0.0;
                          delta::Real=δ, lambda::Real=λ, gamma::Real=γ,
                          penalty::Symbol=iszero(gamma) ? :l2 : :en,
-                         fit_intercept::Bool=true, penalize_intercept::Bool=false)
+                         fit_intercept::Bool=true,
+                         penalize_intercept::Bool=false)
     return RobustRegression(HuberRho(delta), lambda, gamma;
                             penalty=penalty, fit_intercept=fit_intercept,
                             penalize_intercept=penalize_intercept)
@@ -174,7 +181,8 @@ Where `ρ` is the check function `ρ(r) = r(δ - 1(r < 0))`.
 function QuantileRegression(δ::Real=0.5, λ::Real=1.0, γ::Real=0.0;
                             delta::Real=δ, lambda::Real=λ, gamma::Real=γ,
                             penalty::Symbol=iszero(gamma) ? :l2 : :en,
-                            fit_intercept::Bool=true, penalize_intercept::Bool=false)
+                            fit_intercept::Bool=true,
+                            penalize_intercept::Bool=false)
     return RobustRegression(QuantileRho(delta), lambda, gamma;
                             penalty=penalty, fit_intercept=fit_intercept,
                             penalize_intercept=penalize_intercept)