Update readme and user guide

alvaromc317 · Aug 14, 2024 · 9631bcd · 9631bcd
1 parent d94b1db
commit 9631bcd
Show file tree

Hide file tree

Showing 4 changed files with 1,001 additions and 581 deletions.
diff --git a/.virtual_documents/Untitled.ipynb b/.virtual_documents/Untitled.ipynb
@@ -0,0 +1 @@
+
diff --git a/.virtual_documents/user_guide.ipynb b/.virtual_documents/user_guide.ipynb
@@ -0,0 +1,241 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+from sklearn.datasets import make_regression
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import mean_squared_error
+from asgl import Regressor
+
+# Generate synthetic regression data
+X, y = make_regression(n_samples=1000, n_features=50, n_informative=25, bias=10, noise=5, random_state=42)
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=250)
+
+# Create a Regressor object configured for linear regression with Lasso penalization
+model = Regressor(model='lm', penalization='lasso', lambda1=0.1)
+model.fit(X_train, y_train)
+
+# Make predictions on the test data
+predictions = model.predict(X_test)
+
+# Evaluate the model's performance using mean squared error
+mse = mean_squared_error(predictions, y_test)
+print(f"Mean Squared Error: {mse}")
+
+
+
+
+
+import numpy as np
+from sklearn.model_selection import RandomizedSearchCV
+
+# Generate synthetic regression data
+X, y = make_regression(n_samples=1000, n_features=50, n_informative=25, bias=10, noise=5, random_state=42)
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=250)
+
+# Define the group structure
+group_index = np.random.randint(1, 5, size=50)
+
+# Create a Regressor object configured for quantile regression with Adaptive Sparse Group Lasso penalization
+model = Regressor(model='qr', penalization='asgl', quantile=0.5)
+
+# Define the parameter grid for RandomizedSearchCV
+param_grid = {'lambda1': [1e-4, 1e-3, 1e-2, 1e-1, 1], 'alpha': [0, 0.2, 0.4, 0.6, 0.8, 1]}
+rscv = RandomizedSearchCV(model, param_grid, scoring='neg_median_absolute_error')
+rscv.fit(X_train, y_train, **{'group_index': group_index})
+
+
+rscv.best_params_
+
+
+rscv.best_score_
+
+
+
+
+
+# Generate custom weights
+custom_individual_weights = np.random.rand(X_train.shape[1])
+custom_group_weights = np.random.rand(len(np.unique(group_index)))
+
+# Create a Regressor object with custom weights
+model = Regressor(model='lm', penalization='asgl', individual_weights=custom_individual_weights, group_weights=custom_group_weights)
+
+# Fit the model
+model.fit(X_train, y_train, group_index=group_index)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+lm_model = Regressor(model='lm', penalization=None)
+lm_model.fit(X=X, y=y)
+
+coef = lm_model.coef_
+print(np.round(coef, 1))
+
+
+
+
+
+
+
+
+qr_model = Regressor(model='qr', penalization=None, quantile=0.5)
+qr_model.fit(X=X, y=y)
+
+coef = qr_model.coef_
+print(np.round(coef, 1))
+
+
+
+
+
+
+
+
+
+
+
+lasso_model = Regressor(model='lm', penalization='lasso',lambda1=0.1)
+lasso_model.fit(X=X, y=y)
+coef = lasso_model.coef_
+print(np.round(coef, 1))
+
+
+
+
+
+group_index = np.random.randint(1, 5, size=50)
+group_lasso_model = Regressor(model='lm', penalization='gl',lambda1=0.1)
+group_lasso_model.fit(X=X, y=y, group_index=group_index)
+coef = group_lasso_model.coef_
+print(np.round(coef, 1))
+
+
+
+
+
+sgl_model = Regressor(model='lm', penalization='sgl',lambda1=0.1, alpha=0.5)
+sgl_model.fit(X=X, y=y, group_index=group_index)
+coef = sgl_model.coef_
+print(np.round(coef, 1))
+
+
+
+
+
+individual_weights = np.repeat(0.5, 50)
+alasso_model = Regressor(model='lm', penalization='alasso',lambda1=0.1, individual_weights=individual_weights)
+alasso_model.fit(X=X, y=y)
+coef = alasso_model.coef_
+
+
+
+
+
+group_weights=np.repeat(1.5, len(np.unique(group_index)))
+agl_model = Regressor(model='lm', penalization='agl',lambda1=0.1, group_weights=group_weights)
+agl_model.fit(X=X, y=y, group_index=group_index)
+coef = agl_model.coef_
+
+
+
+
+
+
+
+
+
+
+
+asgl_model = Regressor(model='lm', penalization='asgl',lambda1=0.1, alpha=0.5, individual_weights=individual_weights, group_weights=group_weights)
+asgl_model.fit(X=X, y=y, group_index=group_index)
+coef = asgl_model.coef_
+
+
+
+
+
+
+
+
+
+
+
+asgl_model = Regressor(model='lm', penalization='asgl',lambda1=0.1, alpha=0.5, weight_technique='pca_pct', individual_power_weight=1, group_power_weight=1, variability_pct=0.9)
+asgl_model.fit(X=X, y=y, group_index=group_index)
+coef = asgl_model.coef_
+
+
+print(f"Let's see what the individual weights look like:\n{np.round(asgl_model.individual_weights, 2)}")
+
+
+
+
+
+asgl_model = Regressor(model='lm', penalization='asgl',lambda1=0.1, alpha=0.5, weight_technique='pls_pct', individual_power_weight=1, group_power_weight=1, variability_pct=0.9)
+asgl_model.fit(X=X, y=y, group_index=group_index)
+coef = asgl_model.coef_
+
+
+
+
+
+asgl_model = Regressor(model='lm', penalization='asgl',lambda1=0.1, alpha=0.5, weight_technique='pca_1', individual_power_weight=1, group_power_weight=1)
+asgl_model.fit(X=X, y=y, group_index=group_index)
+coef = asgl_model.coef_
+
+
+
+
+
+asgl_model = Regressor(model='lm', penalization='asgl',lambda1=0.1, alpha=0.5, weight_technique='lasso', lambda1_weights=1e-2, individual_power_weight=1, group_power_weight=1)
+asgl_model.fit(X=X, y=y, group_index=group_index)
+coef = asgl_model.coef_
+
+
+
+
+
+asgl_model = Regressor(model='lm', penalization='asgl',lambda1=0.1, alpha=0.5, weight_technique='unpenalized', individual_power_weight=1, group_power_weight=1)
+asgl_model.fit(X=X, y=y, group_index=group_index)
+coef = asgl_model.coef_
+
+
+
+
+
+
diff --git a/README.md b/README.md
@@ -89,7 +89,7 @@ For users currently utilizing the `ASGL` class, we recommend switching
 to the `Regressor` class to ensure continued support and access to the
 latest functionalities.
 
-## Main parameters:
+## Key features:
 
 The `Regressor` class includes the following list of parameters:
 
@@ -212,9 +212,11 @@ genes are grouped into genetic pathways.
 
 For scenarios where the regressors have a known grouped structure, this
 information can be passed to the `Regressor` class during model fitting
-using the group_index parameter. The following example demonstrates this
-with a synthetic group_index. The model will be optimized using
-scikit-learn’s `RandomizedSearchCV` function.
+using the `group_index` parameter. This parameter is an array where each
+element indicates the group at which the associated variable belongs.
+The following example demonstrates this with a synthetic group_index.
+The model will be optimized using scikit-learn’s `RandomizedSearchCV`
+function.
 
 ``` python
 import numpy as np
@@ -240,6 +242,40 @@ This example demonstrates how to fit a quantile regression model with
 Adaptive Sparse Group Lasso penalization, utilizing scikit-learn’s
 `RandomizedSearchCV` to optimize the model’s hyperparameters.
 
+### Example 3: Customizing weights
+
+The `asgl` package offers several built-in methods for estimating
+adaptive weights, controlled via the `weight_technique` parameter. For
+more details onto the inners of each of these alternatives, refer to the
+[associated research
+paper](https://link.springer.com/article/10.1007/s11634-020-00413-8) or
+to the user guide. However, for users requiring extensive customization,
+the package allows for the direct specification of custom weights
+through the `individual_weights` and `group_weights` parameters. This
+allows the users to implement their own weight computation techniques
+and use them within the `asgl` framework.
+
+When using custom weights, ensure that the length of
+`individual_weights` matches the number of variables, and the length of
+`group_weights` matches the number of groups. Below is an example
+demonstrating how to fit a model with custom individual and group
+weights:
+
+``` python
+import numpy as np
+from asgl import Regressor
+
+# Generate custom weights
+custom_individual_weights = np.random.rand(X_train.shape[1])
+custom_group_weights = np.random.rand(len(np.unique(group_index)))
+
+# Create a Regressor object with custom weights
+model = Regressor(model='lm', penalization='asgl', individual_weights=custom_individual_weights, group_weights=custom_group_weights)
+
+# Fit the model
+model.fit(X_train, y_train, group_index=group_index)
+```
+
 ## Contributions
 
 Contributions are welcome! Please submit a pull request or open an issue