Skip to content

Commit

Permalink
concrete linear reg
Browse files Browse the repository at this point in the history
  • Loading branch information
fkiraly committed Jan 3, 2024
1 parent 26b9ab0 commit 2af082d
Show file tree
Hide file tree
Showing 2 changed files with 395 additions and 0 deletions.
12 changes: 12 additions & 0 deletions skpro/regression/linear/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
"""Linear regression models."""
# copyright: skpro developers, BSD-3-Clause License (see LICENSE file)

from skpro.regression.linear._sklearn import (
ARDRegression,
BayesianRidge,
)

__all__ = [
"ARDRegression",
"BayesianRidge",
]
383 changes: 383 additions & 0 deletions skpro/regression/linear/_sklearn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,383 @@
"""Adapters to sklearnn linear regressors with probabilistic components."""
# copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
# based on sktime pipelines

__author__ = ["fkiraly"]

from skpro.regression.adapters.sklearn import SklearnProbaReg
from skpro.regression.base._delegate import _DelegatedProbaRegressor


class _DelegateWithFittedParamForwarding(_DelegatedProbaRegressor):
"""Common base class for delegates with attribute forwarding.
Assumes that delegate has an attribute `estimator_`,
from which fitted attributes are forwarded to self.
"""
# attribute for _DelegatedProbaRegressor, which then delegates
# all non-overridden methods are same as of getattr(self, _delegate_name)
# see further details in _DelegatedRegressor docstring
_delegate_name = "_estimator"
# _estimator, not estimator_, because we do not want to expose it as
# fitted params - fitted params are instead forwarded

def _fit(self, X, y):
"""Fit regressor to training data.
Writes to self:
Sets fitted model attributes ending in "_".
Parameters
----------
X : pandas DataFrame
feature instances to fit regressor to
y : pandas DataFrame, must be same length as X
labels to fit regressor to
Returns
-------
self : reference to self
"""
estimator = self._get_delegate()
estimator.fit(X=X, y=y)

for attr in self.FITTED_PARAMS_TO_FORWARD:
setattr(self, attr, getattr(estimator.estimator_, attr))

return self


class ARDRegression(_DelegateWithFittedParamForwarding):
"""ARD regression, direct adapter to sklearn ARDRegression.
Fit the weights of a regression model, using an ARD prior. The weights of
the regression model are assumed to be in Gaussian distributions.
Also estimate the parameters lambda (precisions of the distributions of the
weights) and alpha (precision of the distribution of the noise).
The estimation is done by an iterative procedures (Evidence Maximization)
Read more in the :ref:`User Guide <bayesian_regression>`.
Parameters
----------
max_iter : int, default=None
Maximum number of iterations. If `None`, it corresponds to `max_iter=300`.
tol : float, default=1e-3
Stop the algorithm if w has converged.
alpha_1 : float, default=1e-6
Hyper-parameter : shape parameter for the Gamma distribution prior
over the alpha parameter.
alpha_2 : float, default=1e-6
Hyper-parameter : inverse scale parameter (rate parameter) for the
Gamma distribution prior over the alpha parameter.
lambda_1 : float, default=1e-6
Hyper-parameter : shape parameter for the Gamma distribution prior
over the lambda parameter.
lambda_2 : float, default=1e-6
Hyper-parameter : inverse scale parameter (rate parameter) for the
Gamma distribution prior over the lambda parameter.
compute_score : bool, default=False
If True, compute the objective function at each step of the model.
threshold_lambda : float, default=10 000
Threshold for removing (pruning) weights with high precision from
the computation.
fit_intercept : bool, default=True
Whether to calculate the intercept for this model. If set
to false, no intercept will be used in calculations
(i.e. data is expected to be centered).
copy_X : bool, default=True
If True, X will be copied; else, it may be overwritten.
verbose : bool, default=False
Verbose mode when fitting the model.
Attributes
----------
coef_ : array-like of shape (n_features,)
Coefficients of the regression model (mean of distribution)
alpha_ : float
estimated precision of the noise.
lambda_ : array-like of shape (n_features,)
estimated precisions of the weights.
sigma_ : array-like of shape (n_features, n_features)
estimated variance-covariance matrix of the weights
scores_ : float
if computed, value of the objective function (to be maximized)
n_iter_ : int
The actual number of iterations to reach the stopping criterion.
intercept_ : float
Independent term in decision function. Set to 0.0 if
``fit_intercept = False``.
X_offset_ : float
If `fit_intercept=True`, offset subtracted for centering data to a
zero mean. Set to np.zeros(n_features) otherwise.
"""

def __init__(
self,
max_iter=None,
tol=1e-3,
alpha_1=1e-6,
alpha_2=1e-6,
lambda_1=1e-6,
lambda_2=1e-6,
compute_score=False,
threshold_lambda=10000.0,
fit_intercept=True,
copy_X=True,
verbose=False,
):
from sklearn.linear_model import ARDRegression

skl_estimator=ARDRegression(
max_iter=max_iter,
tol=tol,
alpha_1=alpha_1,
alpha_2=alpha_2,
lambda_1=lambda_1,
lambda_2=lambda_2,
compute_score=compute_score,
threshold_lambda=threshold_lambda,
fit_intercept=fit_intercept,
copy_X=copy_X,
verbose=verbose,
)

skpro_est = SklearnProbaReg(skl_estimator)
self._estimator = skpro_est.clone()

super().__init__()

FITTED_PARAMS_TO_FORWARD = [
"coef_",
"alpha_",
"lambda_",
"sigma_",
"scores_",
"n_iter_",
"intercept_",
"X_offset_",
]

@classmethod
def get_test_params(cls, parameter_set="default"):
"""Return testing parameter settings for the estimator.
Parameters
----------
parameter_set : str, default="default"
Name of the set of test parameters to return, for use in tests. If no
special parameters are defined for a value, will return `"default"` set.
Returns
-------
params : dict or list of dict, default = {}
Parameters to create testing instances of the class
Each dict are parameters to construct an "interesting" test instance, i.e.,
`MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
`create_test_instance` uses the first (or only) dictionary in `params`
"""
param1 = {}
param2 = {
"max_iter": 300,
"tol": 2e-3,
"alpha_1": 2e-6,
"alpha_2": 2e-6,
"lambda_1": 2e-6,
"lambda_2": 2e-6,
"compute_score": True,
"threshold_lambda": 15000.0,
"fit_intercept": False,
}
return [param1, param2]


class BayesianRidge(_DelegateWithFittedParamForwarding):
"""Bayesian ridge regression, direct adapter to sklearn BayesianRidge.
Fit a Bayesian ridge model. See the Notes section for details on this
implementation and the optimization of the regularization parameters
lambda (precision of the weights) and alpha (precision of the noise).
Read more in the :ref:`User Guide <bayesian_regression>`.
Parameters
----------
max_iter : int, default=None
Maximum number of iterations over the complete dataset before
stopping independently of any early stopping criterion. If `None`, it
corresponds to `max_iter=300`.
tol : float, default=1e-3
Stop the algorithm if w has converged.
alpha_1 : float, default=1e-6
Hyper-parameter : shape parameter for the Gamma distribution prior
over the alpha parameter.
alpha_2 : float, default=1e-6
Hyper-parameter : inverse scale parameter (rate parameter) for the
Gamma distribution prior over the alpha parameter.
lambda_1 : float, default=1e-6
Hyper-parameter : shape parameter for the Gamma distribution prior
over the lambda parameter.
lambda_2 : float, default=1e-6
Hyper-parameter : inverse scale parameter (rate parameter) for the
Gamma distribution prior over the lambda parameter.
alpha_init : float, default=None
Initial value for alpha (precision of the noise).
If not set, alpha_init is 1/Var(y).
lambda_init : float, default=None
Initial value for lambda (precision of the weights).
If not set, lambda_init is 1.
compute_score : bool, default=False
If True, compute the log marginal likelihood at each iteration of the
optimization.
fit_intercept : bool, default=True
Whether to calculate the intercept for this model.
The intercept is not treated as a probabilistic parameter
and thus has no associated variance. If set
to False, no intercept will be used in calculations
(i.e. data is expected to be centered).
copy_X : bool, default=True
If True, X will be copied; else, it may be overwritten.
verbose : bool, default=False
Verbose mode when fitting the model.
Attributes
----------
coef_ : array-like of shape (n_features,)
Coefficients of the regression model (mean of distribution)
intercept_ : float
Independent term in decision function. Set to 0.0 if
`fit_intercept = False`.
alpha_ : float
Estimated precision of the noise.
lambda_ : float
Estimated precision of the weights.
sigma_ : array-like of shape (n_features, n_features)
Estimated variance-covariance matrix of the weights
scores_ : array-like of shape (n_iter_+1,)
If computed_score is True, value of the log marginal likelihood (to be
maximized) at each iteration of the optimization. The array starts
with the value of the log marginal likelihood obtained for the initial
values of alpha and lambda and ends with the value obtained for the
estimated alpha and lambda.
n_iter_ : int
The actual number of iterations to reach the stopping criterion.
X_offset_ : ndarray of shape (n_features,)
If `fit_intercept=True`, offset subtracted for centering data to a
zero mean. Set to np.zeros(n_features) otherwise.
"""

def __init__(
self,
max_iter=None,
tol=1e-3,
alpha_1=1e-6,
alpha_2=1e-6,
lambda_1=1e-6,
lambda_2=1e-6,
alpha_init=None,
lambda_init=None,
compute_score=False,
fit_intercept=True,
copy_X=True,
verbose=False,
):

from sklearn.linear_model import BayesianRidge

skl_estimator = BayesianRidge(
max_iter=max_iter,
tol=tol,
alpha_1=alpha_1,
alpha_2=alpha_2,
lambda_1=lambda_1,
lambda_2=lambda_2,
alpha_init=alpha_init,
lambda_init=lambda_init,
compute_score=compute_score,
fit_intercept=fit_intercept,
copy_X=copy_X,
verbose=verbose,
)

skpro_est = SklearnProbaReg(skl_estimator)
self._estimator = skpro_est.clone()

super().__init__()

FITTED_PARAMS_TO_FORWARD = [
"coef_",
"alpha_",
"lambda_",
"sigma_",
"scores_",
"n_iter_",
"intercept_",
"X_offset_",
]

@classmethod
def get_test_params(cls, parameter_set="default"):
"""Return testing parameter settings for the estimator.
Parameters
----------
parameter_set : str, default="default"
Name of the set of test parameters to return, for use in tests. If no
special parameters are defined for a value, will return `"default"` set.
Returns
-------
params : dict or list of dict, default = {}
Parameters to create testing instances of the class
Each dict are parameters to construct an "interesting" test instance, i.e.,
`MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
`create_test_instance` uses the first (or only) dictionary in `params`
"""
param1 = {}
param2 = {
"max_iter": 300,
"tol": 2e-3,
"alpha_1": 2e-6,
"alpha_2": 2e-6,
"lambda_1": 2e-6,
"lambda_2": 2e-6,
"compute_score": True,
"threshold_lambda": 15000.0,
"fit_intercept": False,
}
return [param1, param2]

0 comments on commit 2af082d

Please sign in to comment.