Skip to content

Commit

Permalink
Using the Light Interface (#51)
Browse files Browse the repository at this point in the history
  • Loading branch information
tlienart authored Feb 7, 2020
1 parent f199300 commit 4b5a703
Show file tree
Hide file tree
Showing 18 changed files with 210 additions and 154 deletions.
15 changes: 8 additions & 7 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,35 +1,36 @@
name = "MLJLinearModels"
uuid = "6ee0df7b-362f-4a72-a706-9e79364fb692"
authors = ["Thibaut Lienart <[email protected]>"]
version = "0.2.4"
version = "0.3.0"

This comment has been minimized.

Copy link
@tlienart

tlienart Feb 7, 2020

Author Collaborator

[deps]
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
LinearMaps = "7a12625a-238d-50fd-b39a-03d52299707e"
MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea"
Optim = "429524aa-4258-5aef-a3af-852621145aeb"
Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"

[compat]
DocStringExtensions = "^0.8"
IterativeSolvers = "^0.8"
LinearMaps = "^2.5"
MLJBase = "^0.9"
Optim = "^0.19"
LinearMaps = "^2.6"
MLJModelInterface = "^0.1"
Optim = "^0.20"
Parameters = "^0.12"
Tables = "^0.2"
julia = "^1.0.0"
julia = "^1"

[extras]
DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0"
RCall = "6f49c342-dc21-5d91-9882-a32aef131414"
RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["DelimitedFiles", "PyCall", "Test", "Random", "RDatasets", "RCall"]
test = ["DelimitedFiles", "PyCall", "Test", "Random", "RDatasets", "RCall", "MLJBase"]
6 changes: 3 additions & 3 deletions src/MLJLinearModels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@ import LinearMaps: LinearMap
import IterativeSolvers: cg
import Optim

import MLJBase
import MLJModelInterface

import Base.+, Base.-, Base.*, Base./, Base.convert

const AVR = AbstractVector{<:Real}

const MMI = MLJModelInterface
const AVR = AbstractVector{<:Real}
const Option{T} = Union{Nothing,T}

include("scratchspace.jl")
Expand Down
12 changes: 8 additions & 4 deletions src/fit/analytical.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,17 @@
"""
$SIGNATURES
Fit a least square regression either with no penalty (OLS) or with a L2 penalty (Ridge).
Fit a least square regression either with no penalty (OLS) or with a L2 penalty
(Ridge).
## Complexity
Assuming `n` dominates `p`,
* non-iterative (full solve): O(np²) - dominated by the construction of the Hessian X'X.
* iterative (conjugate gradient): O(κnp) - with κ the number of CG steps (κ ≤ p).
* non-iterative (full solve): O(np²) - dominated by the construction of the
Hessian X'X.
* iterative (conjugate gradient): O(κnp) - with κ the number of CG steps
(κ ≤ p).
"""
function _fit(glr::GLR{L2Loss,<:L2R}, solver::Analytical, X, y)
# full solve
Expand All @@ -34,7 +37,8 @@ function _fit(glr::GLR{L2Loss,<:L2R}, solver::Analytical, X, y)
p = size(X, 2) + Int(glr.fit_intercept)
max_cg_steps = min(solver.max_inner, p)
# Form the Hessian map, cost of application H*v is O(np)
Hm = LinearMap(Hv!(glr, X, y), p; ismutating=true, isposdef=true, issymmetric=true)
Hm = LinearMap(Hv!(glr, X, y), p;
ismutating=true, isposdef=true, issymmetric=true)
b = X'y
glr.fit_intercept && (b = vcat(b, sum(y)))
return cg(Hm, b; maxiter=max_cg_steps)
Expand Down
9 changes: 5 additions & 4 deletions src/fit/default.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@ export fit

# Default solvers

# TODO: in the future, have cases where if the things are too big, take another default.
# also should check if p > n in which case should do dual stuff (or other appropriate alternative)
# TODO: in the future, have cases where if the things are too big, take another
# default. Also should check if p > n in which case should do dual stuff (or
# other appropriate alternative)

# Linear, Ridge
_solver(::GLR{L2Loss,<:L2R}, np::NTuple{2,Int}) = Analytical()
Expand All @@ -21,8 +22,8 @@ end
# Robust, Quantile
_solver(::GLR{<:RobustLoss,<:L2R}, np::NTuple{2,Int}) = LBFGS()

# Fallback NOTE: should revisit bc with non-smooth, wouldn't work probably PGD/PSGD
# depending on how much data there is
# Fallback NOTE: should revisit bc with non-smooth, wouldn't work probably
# PGD/PSGD depending on how much data there is
_solver(::GLR, np::NTuple{2,Int}) = @error "Not yet implemented."


Expand Down
6 changes: 4 additions & 2 deletions src/fit/iwls.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ function _fit(glr::GLR{RobustLoss{ρ},<:L2R}, solver::IWLSCG, X, y) where {ρ}
# update the weights and retrieve the application function
# Mθv! corresponds to the current application of (X'WX + λI) on v
Mθv! = _Mv!(ω, θ)
Mm = LinearMap(Mθv!, p; ismutating=true, isposdef=true, issymmetric=true)
Mm = LinearMap(Mθv!, p;
ismutating=true, isposdef=true, issymmetric=true)
Wy = ω .* y
b = X'Wy
if glr.fit_intercept
Expand All @@ -30,6 +31,7 @@ function _fit(glr::GLR{RobustLoss{ρ},<:L2R}, solver::IWLSCG, X, y) where {ρ}
copyto!(θ_, θ)
k += 1
end
tol solver.tol || @warn "IWLS did not converge in $(solver.max_iter) iterations."
tol solver.tol ||
@warn "IWLS did not converge in $(solver.max_iter) iterations."
return θ
end
44 changes: 25 additions & 19 deletions src/fit/newton.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@ Fit a GLR using Newton's method.
## Complexity
Assuming `n` dominates `p`, O(κnp²), dominated by the construction of the Hessian at each step with
κ the number of Newton steps.
Assuming `n` dominates `p`, O(κnp²), dominated by the construction of the
Hessian at each step with κ the number of Newton steps.
"""
function _fit(glr::GLR{<:Union{LogisticLoss,RobustLoss},<:L2R}, solver::Newton, X, y)
function _fit(glr::GLR{<:Union{LogisticLoss,RobustLoss},<:L2R},
solver::Newton, X, y)
p = size(X, 2) + Int(glr.fit_intercept)
θ₀ = zeros(p)
_fgh! = fgh!(glr, X, y)
Expand All @@ -24,20 +25,21 @@ end
"""
$SIGNATURES
Fit a GLR using Newton's method combined with an iterative solver (conjugate gradient) to solve
the Newton steps (∇²f)⁻¹∇f.
Fit a GLR using Newton's method combined with an iterative solver (conjugate
gradient) to solve the Newton steps (∇²f)⁻¹∇f.
## Complexity
Assuming `n` dominates `p`, O(κ₁κ₂np), dominated by the application of the Hessian at each step
where κ₁ is the number of Newton steps and κ₂ is the average number of CG steps per Newton step
(which is at most p).
Assuming `n` dominates `p`, O(κ₁κ₂np), dominated by the application of the
Hessian at each step where κ₁ is the number of Newton steps and κ₂ is the
average number of CG steps per Newton step (which is at most p).
"""
function _fit(glr::GLR{<:Union{LogisticLoss,RobustLoss},<:L2R}, solver::NewtonCG, X, y)
function _fit(glr::GLR{<:Union{LogisticLoss,RobustLoss},<:L2R},
solver::NewtonCG, X, y)
p = size(X, 2) + Int(glr.fit_intercept)
θ₀ = zeros(p)
_f = objective(glr, X, y)
_fg! = (g, θ) -> fgh!(glr, X, y)(0.0, g, nothing, θ) # XXX: Optim.jl/issues/738
_fg! = (g, θ) -> fgh!(glr, X, y)(0.0, g, nothing, θ) # Optim.jl/issues/738
_Hv! = Hv!(glr, X, y)
opt = Optim.TwiceDifferentiableHV(_f, _fg!, _Hv!, θ₀)
res = Optim.optimize(opt, θ₀, Optim.KrylovTrustRegion())
Expand All @@ -51,10 +53,11 @@ Fit a GLR using LBFGS.
## Complexity
Assuming `n` dominates `p`, O(κnp), dominated by the computation of the gradient at each step with
κ the number of LBFGS steps.
Assuming `n` dominates `p`, O(κnp), dominated by the computation of the
gradient at each step with κ the number of LBFGS steps.
"""
function _fit(glr::GLR{<:Union{LogisticLoss,RobustLoss},<:L2R}, solver::LBFGS, X, y)
function _fit(glr::GLR{<:Union{LogisticLoss,RobustLoss},<:L2R},
solver::LBFGS, X, y)
p = size(X, 2) + Int(glr.fit_intercept)
θ₀ = zeros(p)
_fg! = (f, g, θ) -> fgh!(glr, X, y)(f, g, nothing, θ)
Expand All @@ -69,13 +72,15 @@ end
"""
$SIGNATURES
Fit a multiclass GLR using Newton's method with an iterative solver (conjugate gradient).
Fit a multiclass GLR using Newton's method with an iterative solver (conjugate
gradient).
## Complexity
Assuming `n` dominates `p`, O(κ₁κ₂npc), where `c` is the number of classes. The computations are
dominated by the application of the Hessian at each step with κ₁ the number of Newton steps and κ₂
the average number of CG steps per Newton step.
Assuming `n` dominates `p`, O(κ₁κ₂npc), where `c` is the number of classes. The
computations are dominated by the application of the Hessian at each step with
κ₁ the number of Newton steps and κ₂ the average number of CG steps per Newton
step.
"""
function _fit(glr::GLR{MultinomialLoss,<:L2R}, solver::NewtonCG, X, y)
p = size(X, 2) + Int(glr.fit_intercept)
Expand All @@ -96,8 +101,9 @@ Fit a multiclass GLR using LBFGS.
## Complexity
Assuming `n` dominates `p`, O(κnpc), with `c` the number of classes, dominated by the computation
of the gradient at each step with κ the number of LBFGS steps.
Assuming `n` dominates `p`, O(κnpc), with `c` the number of classes, dominated
by the computation of the gradient at each step with κ the number of LBFGS
steps.
"""
function _fit(glr::GLR{MultinomialLoss,<:L2R}, solver::LBFGS, X, y)
p = size(X, 2) + Int(glr.fit_intercept)
Expand Down
9 changes: 6 additions & 3 deletions src/fit/proxgrad.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ function _fit(glr::GLR, solver::ProxGrad, X, y)
_f = smooth_objective(glr, X, y; c=c)
_fg! = smooth_fg!(glr, X, y)
_prox! = prox!(glr)
bt_cond = θ̂ -> _f(θ̂) > fθ̄ + dot(θ̂ .- θ̄, ∇fθ̄) + sum(abs2.(θ̂ .- θ̄))/(2η)
bt_cond = θ̂ ->
_f(θ̂) > fθ̄ + dot(θ̂ .- θ̄, ∇fθ̄) + sum(abs2.(θ̂ .- θ̄)) / (2η)
# loop-related
k, tol = 1, Inf
while k solver.max_iter && tol > solver.tol
Expand All @@ -46,7 +47,8 @@ function _fit(glr::GLR, solver::ProxGrad, X, y)
inner += 1
end
if inner == solver.max_inner
@warn "No appropriate stepsize found via backtracking; interrupting."
@warn "No appropriate stepsize found via backtracking; " *
"interrupting."
break
end
# update caches
Expand All @@ -59,6 +61,7 @@ function _fit(glr::GLR, solver::ProxGrad, X, y)
# update niter
k += 1
end
tol solver.tol || @warn "Proximal GD did not converge in $(solver.max_iter) iterations."
tol solver.tol || @warn "Proximal GD did not converge in " *
"$(solver.max_iter) iterations."
return θ
end
4 changes: 2 additions & 2 deletions src/fit/solvers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ struct LBFGS <: Solver end
@with_kw struct ProxGrad <: Solver
accel::Bool = false # use Nesterov style acceleration (see also FISTA)
max_iter::Int = 1000 # max number of overall iterations
tol::Float64 = 1e-4 # tolerance over relative change of θ i.e. norm(θ-θ_)/norm(θ)
tol::Float64 = 1e-4 # tol relative change of θ i.e. norm(θ-θ_)/norm(θ)
max_inner::Int = 100 # β^max_inner should be > 1e-10
beta::Float64 = 0.8 # in (0, 1); shrinkage in the backtracking step
end
Expand All @@ -53,7 +53,7 @@ ISTA(; kwa...) = ProxGrad(;accel = false, kwa...)
max_inner::Int = 200
tol::Float64 = 1e-4
damping::Float64 = 1.0 # should be between 0 and 1, 1 = trust iterates
threshold::Float64 = 1e-6 # threshold for the residuals used for instance in quantile reg
threshold::Float64 = 1e-6 # thresh for residuals; used eg in quantile reg
end

# ===================== admm.jl
Expand Down
34 changes: 21 additions & 13 deletions src/glr/constructors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@ export GeneralizedLinearRegression, GLR,
RobustRegression, HuberRegression, QuantileRegression

"""
GeneralizedLinearRegression{L<:Loss, P<:Penalty}
GeneralizedLinearRegression{L<:Loss, P<:Penalty}
Generalized Linear Regression (GLR) model with objective function:
``L(y, Xθ) + P(θ)``
where `L` is a loss function, `P` a penalty, `y` is the vector of observed response, `X` is
the feature matrix and `θ` the vector of parameters.
where `L` is a loss function, `P` a penalty, `y` is the vector of observed
response, `X` is the feature matrix and `θ` the vector of parameters.
Special cases include:
Expand Down Expand Up @@ -74,8 +74,10 @@ $SIGNATURES
Objective function: ``|Xθ - y|₂²/2 + λ|θ|₂²/2 + γ|θ|₁``.
"""
function ElasticNetRegression::Real=1.0, γ::Real=1.0; lambda::Real=λ, gamma::Real=γ,
fit_intercept::Bool=true, penalize_intercept::Bool=false)
function ElasticNetRegression::Real=1.0, γ::Real=1.0;
lambda::Real=λ, gamma::Real=γ,
fit_intercept::Bool=true,
penalize_intercept::Bool=false)
check_pos.((lambda, gamma))
GLR(penalty=lambda*L2Penalty()+gamma*L1Penalty(),
fit_intercept=fit_intercept,
Expand Down Expand Up @@ -108,10 +110,11 @@ end
"""
$SIGNATURES
Objective function: ``L(y, Xθ) + λ|θ|₂²/2 + γ|θ|₁`` where `L` is either the logistic loss in the
binary case or the multinomial loss otherwise.
Objective function: ``L(y, Xθ) + λ|θ|₂²/2 + γ|θ|₁`` where `L` is either the
logistic loss in the binary case or the multinomial loss otherwise.
"""
function LogisticRegression::Real=1.0, γ::Real=0.0; lambda::Real=λ, gamma::Real=γ,
function LogisticRegression::Real=1.0, γ::Real=0.0;
lambda::Real=λ, gamma::Real=γ,
penalty::Symbol=iszero(gamma) ? :l2 : :en,
multi_class::Bool=false, fit_intercept::Bool=true,
penalize_intercept::Bool=false)
Expand All @@ -131,11 +134,13 @@ MultinomialRegression(a...; kwa...) = LogisticRegression(a...; multi_class=true,
"""
$SIGNATURES
Objective function: ``∑ρ(Xθ - y) + λ|θ|₂² + γ|θ|₁`` where ρ is a given function on the residuals.
Objective function: ``∑ρ(Xθ - y) + λ|θ|₂² + γ|θ|₁`` where ρ is a given function
on the residuals.
"""
function RobustRegression::RobustRho=HuberRho(0.1), λ::Real=1.0, γ::Real=0.0;
rho::RobustRho=ρ, lambda::Real=λ, gamma::Real=γ,
penalty::Symbol=iszero(gamma) ? :l2 : :en, fit_intercept::Bool=true,
penalty::Symbol=iszero(gamma) ? :l2 : :en,
fit_intercept::Bool=true,
penalize_intercept::Bool=false)
penalty = _l1l2en(lambda, gamma, penalty, "Robust regression")
GLR(loss=RobustLoss(rho),
Expand All @@ -151,12 +156,14 @@ Huber Regression with objective:
``∑ρ(Xθ - y) + λ|θ|₂²/2 + γ|θ|₁``
Where `ρ` is the Huber function `ρ(r) = r²/2`` if `|r|≤δ` and `ρ(r)=δ(|r|-δ/2)` otherwise.
Where `ρ` is the Huber function `ρ(r) = r²/2`` if `|r|≤δ` and
`ρ(r)=δ(|r|-δ/2)` otherwise.
"""
function HuberRegression::Real=0.5, λ::Real=1.0, γ::Real=0.0;
delta::Real=δ, lambda::Real=λ, gamma::Real=γ,
penalty::Symbol=iszero(gamma) ? :l2 : :en,
fit_intercept::Bool=true, penalize_intercept::Bool=false)
fit_intercept::Bool=true,
penalize_intercept::Bool=false)
return RobustRegression(HuberRho(delta), lambda, gamma;
penalty=penalty, fit_intercept=fit_intercept,
penalize_intercept=penalize_intercept)
Expand All @@ -174,7 +181,8 @@ Where `ρ` is the check function `ρ(r) = r(δ - 1(r < 0))`.
function QuantileRegression::Real=0.5, λ::Real=1.0, γ::Real=0.0;
delta::Real=δ, lambda::Real=λ, gamma::Real=γ,
penalty::Symbol=iszero(gamma) ? :l2 : :en,
fit_intercept::Bool=true, penalize_intercept::Bool=false)
fit_intercept::Bool=true,
penalize_intercept::Bool=false)
return RobustRegression(QuantileRho(delta), lambda, gamma;
penalty=penalty, fit_intercept=fit_intercept,
penalize_intercept=penalize_intercept)
Expand Down
Loading

1 comment on commit 4b5a703

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/9014

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if Julia TagBot is installed, or can be done manually through the github interface, or via:

git tag -a v0.3.0 -m "<description of version>" 4b5a70308aeee649a32da25e4a5262676497d185
git push origin v0.3.0

Please sign in to comment.