-
Notifications
You must be signed in to change notification settings - Fork 53
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
395 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
"""Linear regression models.""" | ||
# copyright: skpro developers, BSD-3-Clause License (see LICENSE file) | ||
|
||
from skpro.regression.linear._sklearn import ( | ||
ARDRegression, | ||
BayesianRidge, | ||
) | ||
|
||
__all__ = [ | ||
"ARDRegression", | ||
"BayesianRidge", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,383 @@ | ||
"""Adapters to sklearnn linear regressors with probabilistic components.""" | ||
# copyright: skpro developers, BSD-3-Clause License (see LICENSE file) | ||
# based on sktime pipelines | ||
|
||
__author__ = ["fkiraly"] | ||
|
||
from skpro.regression.adapters.sklearn import SklearnProbaReg | ||
from skpro.regression.base._delegate import _DelegatedProbaRegressor | ||
|
||
|
||
class _DelegateWithFittedParamForwarding(_DelegatedProbaRegressor): | ||
"""Common base class for delegates with attribute forwarding. | ||
Assumes that delegate has an attribute `estimator_`, | ||
from which fitted attributes are forwarded to self. | ||
""" | ||
# attribute for _DelegatedProbaRegressor, which then delegates | ||
# all non-overridden methods are same as of getattr(self, _delegate_name) | ||
# see further details in _DelegatedRegressor docstring | ||
_delegate_name = "_estimator" | ||
# _estimator, not estimator_, because we do not want to expose it as | ||
# fitted params - fitted params are instead forwarded | ||
|
||
def _fit(self, X, y): | ||
"""Fit regressor to training data. | ||
Writes to self: | ||
Sets fitted model attributes ending in "_". | ||
Parameters | ||
---------- | ||
X : pandas DataFrame | ||
feature instances to fit regressor to | ||
y : pandas DataFrame, must be same length as X | ||
labels to fit regressor to | ||
Returns | ||
------- | ||
self : reference to self | ||
""" | ||
estimator = self._get_delegate() | ||
estimator.fit(X=X, y=y) | ||
|
||
for attr in self.FITTED_PARAMS_TO_FORWARD: | ||
setattr(self, attr, getattr(estimator.estimator_, attr)) | ||
|
||
return self | ||
|
||
|
||
class ARDRegression(_DelegateWithFittedParamForwarding): | ||
"""ARD regression, direct adapter to sklearn ARDRegression. | ||
Fit the weights of a regression model, using an ARD prior. The weights of | ||
the regression model are assumed to be in Gaussian distributions. | ||
Also estimate the parameters lambda (precisions of the distributions of the | ||
weights) and alpha (precision of the distribution of the noise). | ||
The estimation is done by an iterative procedures (Evidence Maximization) | ||
Read more in the :ref:`User Guide <bayesian_regression>`. | ||
Parameters | ||
---------- | ||
max_iter : int, default=None | ||
Maximum number of iterations. If `None`, it corresponds to `max_iter=300`. | ||
tol : float, default=1e-3 | ||
Stop the algorithm if w has converged. | ||
alpha_1 : float, default=1e-6 | ||
Hyper-parameter : shape parameter for the Gamma distribution prior | ||
over the alpha parameter. | ||
alpha_2 : float, default=1e-6 | ||
Hyper-parameter : inverse scale parameter (rate parameter) for the | ||
Gamma distribution prior over the alpha parameter. | ||
lambda_1 : float, default=1e-6 | ||
Hyper-parameter : shape parameter for the Gamma distribution prior | ||
over the lambda parameter. | ||
lambda_2 : float, default=1e-6 | ||
Hyper-parameter : inverse scale parameter (rate parameter) for the | ||
Gamma distribution prior over the lambda parameter. | ||
compute_score : bool, default=False | ||
If True, compute the objective function at each step of the model. | ||
threshold_lambda : float, default=10 000 | ||
Threshold for removing (pruning) weights with high precision from | ||
the computation. | ||
fit_intercept : bool, default=True | ||
Whether to calculate the intercept for this model. If set | ||
to false, no intercept will be used in calculations | ||
(i.e. data is expected to be centered). | ||
copy_X : bool, default=True | ||
If True, X will be copied; else, it may be overwritten. | ||
verbose : bool, default=False | ||
Verbose mode when fitting the model. | ||
Attributes | ||
---------- | ||
coef_ : array-like of shape (n_features,) | ||
Coefficients of the regression model (mean of distribution) | ||
alpha_ : float | ||
estimated precision of the noise. | ||
lambda_ : array-like of shape (n_features,) | ||
estimated precisions of the weights. | ||
sigma_ : array-like of shape (n_features, n_features) | ||
estimated variance-covariance matrix of the weights | ||
scores_ : float | ||
if computed, value of the objective function (to be maximized) | ||
n_iter_ : int | ||
The actual number of iterations to reach the stopping criterion. | ||
intercept_ : float | ||
Independent term in decision function. Set to 0.0 if | ||
``fit_intercept = False``. | ||
X_offset_ : float | ||
If `fit_intercept=True`, offset subtracted for centering data to a | ||
zero mean. Set to np.zeros(n_features) otherwise. | ||
""" | ||
|
||
def __init__( | ||
self, | ||
max_iter=None, | ||
tol=1e-3, | ||
alpha_1=1e-6, | ||
alpha_2=1e-6, | ||
lambda_1=1e-6, | ||
lambda_2=1e-6, | ||
compute_score=False, | ||
threshold_lambda=10000.0, | ||
fit_intercept=True, | ||
copy_X=True, | ||
verbose=False, | ||
): | ||
from sklearn.linear_model import ARDRegression | ||
|
||
skl_estimator=ARDRegression( | ||
max_iter=max_iter, | ||
tol=tol, | ||
alpha_1=alpha_1, | ||
alpha_2=alpha_2, | ||
lambda_1=lambda_1, | ||
lambda_2=lambda_2, | ||
compute_score=compute_score, | ||
threshold_lambda=threshold_lambda, | ||
fit_intercept=fit_intercept, | ||
copy_X=copy_X, | ||
verbose=verbose, | ||
) | ||
|
||
skpro_est = SklearnProbaReg(skl_estimator) | ||
self._estimator = skpro_est.clone() | ||
|
||
super().__init__() | ||
|
||
FITTED_PARAMS_TO_FORWARD = [ | ||
"coef_", | ||
"alpha_", | ||
"lambda_", | ||
"sigma_", | ||
"scores_", | ||
"n_iter_", | ||
"intercept_", | ||
"X_offset_", | ||
] | ||
|
||
@classmethod | ||
def get_test_params(cls, parameter_set="default"): | ||
"""Return testing parameter settings for the estimator. | ||
Parameters | ||
---------- | ||
parameter_set : str, default="default" | ||
Name of the set of test parameters to return, for use in tests. If no | ||
special parameters are defined for a value, will return `"default"` set. | ||
Returns | ||
------- | ||
params : dict or list of dict, default = {} | ||
Parameters to create testing instances of the class | ||
Each dict are parameters to construct an "interesting" test instance, i.e., | ||
`MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. | ||
`create_test_instance` uses the first (or only) dictionary in `params` | ||
""" | ||
param1 = {} | ||
param2 = { | ||
"max_iter": 300, | ||
"tol": 2e-3, | ||
"alpha_1": 2e-6, | ||
"alpha_2": 2e-6, | ||
"lambda_1": 2e-6, | ||
"lambda_2": 2e-6, | ||
"compute_score": True, | ||
"threshold_lambda": 15000.0, | ||
"fit_intercept": False, | ||
} | ||
return [param1, param2] | ||
|
||
|
||
class BayesianRidge(_DelegateWithFittedParamForwarding): | ||
"""Bayesian ridge regression, direct adapter to sklearn BayesianRidge. | ||
Fit a Bayesian ridge model. See the Notes section for details on this | ||
implementation and the optimization of the regularization parameters | ||
lambda (precision of the weights) and alpha (precision of the noise). | ||
Read more in the :ref:`User Guide <bayesian_regression>`. | ||
Parameters | ||
---------- | ||
max_iter : int, default=None | ||
Maximum number of iterations over the complete dataset before | ||
stopping independently of any early stopping criterion. If `None`, it | ||
corresponds to `max_iter=300`. | ||
tol : float, default=1e-3 | ||
Stop the algorithm if w has converged. | ||
alpha_1 : float, default=1e-6 | ||
Hyper-parameter : shape parameter for the Gamma distribution prior | ||
over the alpha parameter. | ||
alpha_2 : float, default=1e-6 | ||
Hyper-parameter : inverse scale parameter (rate parameter) for the | ||
Gamma distribution prior over the alpha parameter. | ||
lambda_1 : float, default=1e-6 | ||
Hyper-parameter : shape parameter for the Gamma distribution prior | ||
over the lambda parameter. | ||
lambda_2 : float, default=1e-6 | ||
Hyper-parameter : inverse scale parameter (rate parameter) for the | ||
Gamma distribution prior over the lambda parameter. | ||
alpha_init : float, default=None | ||
Initial value for alpha (precision of the noise). | ||
If not set, alpha_init is 1/Var(y). | ||
lambda_init : float, default=None | ||
Initial value for lambda (precision of the weights). | ||
If not set, lambda_init is 1. | ||
compute_score : bool, default=False | ||
If True, compute the log marginal likelihood at each iteration of the | ||
optimization. | ||
fit_intercept : bool, default=True | ||
Whether to calculate the intercept for this model. | ||
The intercept is not treated as a probabilistic parameter | ||
and thus has no associated variance. If set | ||
to False, no intercept will be used in calculations | ||
(i.e. data is expected to be centered). | ||
copy_X : bool, default=True | ||
If True, X will be copied; else, it may be overwritten. | ||
verbose : bool, default=False | ||
Verbose mode when fitting the model. | ||
Attributes | ||
---------- | ||
coef_ : array-like of shape (n_features,) | ||
Coefficients of the regression model (mean of distribution) | ||
intercept_ : float | ||
Independent term in decision function. Set to 0.0 if | ||
`fit_intercept = False`. | ||
alpha_ : float | ||
Estimated precision of the noise. | ||
lambda_ : float | ||
Estimated precision of the weights. | ||
sigma_ : array-like of shape (n_features, n_features) | ||
Estimated variance-covariance matrix of the weights | ||
scores_ : array-like of shape (n_iter_+1,) | ||
If computed_score is True, value of the log marginal likelihood (to be | ||
maximized) at each iteration of the optimization. The array starts | ||
with the value of the log marginal likelihood obtained for the initial | ||
values of alpha and lambda and ends with the value obtained for the | ||
estimated alpha and lambda. | ||
n_iter_ : int | ||
The actual number of iterations to reach the stopping criterion. | ||
X_offset_ : ndarray of shape (n_features,) | ||
If `fit_intercept=True`, offset subtracted for centering data to a | ||
zero mean. Set to np.zeros(n_features) otherwise. | ||
""" | ||
|
||
def __init__( | ||
self, | ||
max_iter=None, | ||
tol=1e-3, | ||
alpha_1=1e-6, | ||
alpha_2=1e-6, | ||
lambda_1=1e-6, | ||
lambda_2=1e-6, | ||
alpha_init=None, | ||
lambda_init=None, | ||
compute_score=False, | ||
fit_intercept=True, | ||
copy_X=True, | ||
verbose=False, | ||
): | ||
|
||
from sklearn.linear_model import BayesianRidge | ||
|
||
skl_estimator = BayesianRidge( | ||
max_iter=max_iter, | ||
tol=tol, | ||
alpha_1=alpha_1, | ||
alpha_2=alpha_2, | ||
lambda_1=lambda_1, | ||
lambda_2=lambda_2, | ||
alpha_init=alpha_init, | ||
lambda_init=lambda_init, | ||
compute_score=compute_score, | ||
fit_intercept=fit_intercept, | ||
copy_X=copy_X, | ||
verbose=verbose, | ||
) | ||
|
||
skpro_est = SklearnProbaReg(skl_estimator) | ||
self._estimator = skpro_est.clone() | ||
|
||
super().__init__() | ||
|
||
FITTED_PARAMS_TO_FORWARD = [ | ||
"coef_", | ||
"alpha_", | ||
"lambda_", | ||
"sigma_", | ||
"scores_", | ||
"n_iter_", | ||
"intercept_", | ||
"X_offset_", | ||
] | ||
|
||
@classmethod | ||
def get_test_params(cls, parameter_set="default"): | ||
"""Return testing parameter settings for the estimator. | ||
Parameters | ||
---------- | ||
parameter_set : str, default="default" | ||
Name of the set of test parameters to return, for use in tests. If no | ||
special parameters are defined for a value, will return `"default"` set. | ||
Returns | ||
------- | ||
params : dict or list of dict, default = {} | ||
Parameters to create testing instances of the class | ||
Each dict are parameters to construct an "interesting" test instance, i.e., | ||
`MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. | ||
`create_test_instance` uses the first (or only) dictionary in `params` | ||
""" | ||
param1 = {} | ||
param2 = { | ||
"max_iter": 300, | ||
"tol": 2e-3, | ||
"alpha_1": 2e-6, | ||
"alpha_2": 2e-6, | ||
"lambda_1": 2e-6, | ||
"lambda_2": 2e-6, | ||
"compute_score": True, | ||
"threshold_lambda": 15000.0, | ||
"fit_intercept": False, | ||
} | ||
return [param1, param2] |