From e98a21c4086416cac415ff580ae877445692f86d Mon Sep 17 00:00:00 2001 From: Holger Roth <6304754+holgerroth@users.noreply.github.com> Date: Thu, 13 Feb 2025 17:45:16 -0500 Subject: [PATCH 1/2] Enhance lightning api (#3225) Fixes # . ### Description Enhance lightning client API to give warnings if there were unexpected or missing keys when loading back the global state dictionary. Skip updating the fit loop to support customized lightning trainers, such as NeMo & BioNeMo. ### Types of changes - [x] Non-breaking change (fix or new feature that would not break existing functionality). - [ ] Breaking change (fix or new feature that would cause existing functionality to change). - [ ] New tests added to cover the changes. - [ ] Quick tests passed locally by running `./runtest.sh`. - [ ] In-line docstrings updated. - [ ] Documentation updated. --- nvflare/app_opt/lightning/api.py | 38 +++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/nvflare/app_opt/lightning/api.py b/nvflare/app_opt/lightning/api.py index 4e674e5915..45629a6b42 100644 --- a/nvflare/app_opt/lightning/api.py +++ b/nvflare/app_opt/lightning/api.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging from typing import Dict import pytorch_lightning as pl @@ -29,7 +30,9 @@ FL_META_KEY = "__fl_meta__" -def patch(trainer: pl.Trainer, restore_state: bool = True, load_state_dict_strict: bool = True): +def patch( + trainer: pl.Trainer, restore_state: bool = True, load_state_dict_strict: bool = True, update_fit_loop: bool = True +): """Patches the PyTorch Lightning Trainer for usage with NVFlare. Args: @@ -39,6 +42,8 @@ def patch(trainer: pl.Trainer, restore_state: bool = True, load_state_dict_stric load_state_dict_strict: exposes `strict` argument of `torch.nn.Module.load_state_dict()` used to load the received model. Defaults to `True`. See https://pytorch.org/docs/stable/generated/torch.nn.Module.html#torch.nn.Module.load_state_dict for details. + update_fit_loop: whether to increase `trainer.fit_loop.max_epochs` and `trainer.fit_loop.epoch_loop.max_steps` each FL round. + Defaults to `True` which is suitable for most PyTorch Lightning applications. Example: @@ -75,7 +80,9 @@ def __init__(self): callbacks = [] if not any(isinstance(cb, FLCallback) for cb in callbacks): - fl_callback = FLCallback(rank=trainer.global_rank, load_state_dict_strict=load_state_dict_strict) + fl_callback = FLCallback( + rank=trainer.global_rank, load_state_dict_strict=load_state_dict_strict, update_fit_loop=update_fit_loop + ) callbacks.append(fl_callback) if restore_state and not any(isinstance(cb, RestoreState) for cb in callbacks): @@ -85,7 +92,7 @@ def __init__(self): class FLCallback(Callback): - def __init__(self, rank: int = 0, load_state_dict_strict: bool = True): + def __init__(self, rank: int = 0, load_state_dict_strict: bool = True, update_fit_loop: bool = True): """FL callback for lightning API. Args: @@ -93,6 +100,8 @@ def __init__(self, rank: int = 0, load_state_dict_strict: bool = True): load_state_dict_strict: exposes `strict` argument of `torch.nn.Module.load_state_dict()` used to load the received model. Defaults to `True`. See https://pytorch.org/docs/stable/generated/torch.nn.Module.html#torch.nn.Module.load_state_dict for details. + update_fit_loop: whether to increase `trainer.fit_loop.max_epochs` and `trainer.fit_loop.epoch_loop.max_steps` each FL round. + Defaults to `True` which is suitable for most PyTorch Lightning applications. """ super(FLCallback, self).__init__() init(rank=str(rank)) @@ -108,6 +117,9 @@ def __init__(self, rank: int = 0, load_state_dict_strict: bool = True): self._is_evaluation = False self._is_submit_model = False self._load_state_dict_strict = load_state_dict_strict + self._update_fit_loop = update_fit_loop + + self.logger = logging.getLogger(self.__class__.__name__) def reset_state(self, trainer): """Resets the state. @@ -130,10 +142,12 @@ def reset_state(self, trainer): # for next round trainer.num_sanity_val_steps = 0 # Turn off sanity validation steps in following rounds of FL - if self.total_local_epochs and self.max_epochs_per_round is not None: - trainer.fit_loop.max_epochs = self.max_epochs_per_round + self.total_local_epochs - if self.total_local_steps and self.max_steps_per_round is not None: - trainer.fit_loop.epoch_loop.max_steps = self.max_steps_per_round + self.total_local_steps + + if self._update_fit_loop: + if self.total_local_epochs and self.max_epochs_per_round is not None: + trainer.fit_loop.max_epochs = self.max_epochs_per_round + self.total_local_epochs + if self.total_local_steps and self.max_steps_per_round is not None: + trainer.fit_loop.epoch_loop.max_steps = self.max_steps_per_round + self.total_local_steps # resets attributes self.metrics = None @@ -184,7 +198,15 @@ def _receive_and_update_model(self, trainer, pl_module): model = self._receive_model(trainer) if model: if model.params: - pl_module.load_state_dict(model.params, strict=self._load_state_dict_strict) + missing_keys, unexpected_keys = pl_module.load_state_dict( + model.params, strict=self._load_state_dict_strict + ) + if len(missing_keys) > 0: + self.logger.warning(f"There were missing keys when loading the global state_dict: {missing_keys}") + if len(unexpected_keys) > 0: + self.logger.warning( + f"There were unexpected keys when loading the global state_dict: {unexpected_keys}" + ) if model.current_round is not None: self.current_round = model.current_round From 583daeb9c60a242440aca93d385055272cbf419e Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Thu, 13 Feb 2025 20:09:48 -0500 Subject: [PATCH 2/2] Consolidate xgboost examples (#3214) Fixes # . ### Description Combine three xgboost example folders to one and reorganize, migrate from job templates to job API ### Types of changes - [x] Non-breaking change (fix or new feature that would not break existing functionality). - [ ] Breaking change (fix or new feature that would cause existing functionality to change). - [ ] New tests added to cover the changes. - [ ] Quick tests passed locally by running `./runtest.sh`. - [ ] In-line docstrings updated. - [ ] Documentation updated. --- examples/advanced/vertical_xgboost/README.md | 103 ------- .../figs/vertical_xgboost_graph.png | Bin 10860 -> 0 bytes .../advanced/vertical_xgboost/prepare_data.sh | 23 -- .../vertical_xgboost/requirements.txt | 7 - examples/advanced/xgboost/README.md | 252 +++-------------- examples/advanced/xgboost/fedxgb/README.md | 212 ++++++++++++++ .../{tree-based => fedxgb}/figs/20_client.png | Bin .../{tree-based => fedxgb}/figs/5_client.png | Bin .../figs/Centralized.png | Bin .../xgboost/fedxgb/figs/histogram.png | Bin 0 -> 21014 bytes .../fedxgb}/figs/vertical_fl.png | Bin .../xgboost/fedxgb/figs/vertical_xgb.png | Bin 0 -> 12621 bytes .../notebooks}/data_job_setup.ipynb | 0 .../notebooks}/xgboost_histogram_higgs.ipynb | 0 .../notebooks}/xgboost_tree_higgs.ipynb | 0 .../advanced/xgboost/fedxgb/prepare_data.sh | 37 +++ .../fedxgb/run_experiment_centralized.sh | 15 + .../run_experiment_horizontal_histogram.sh | 5 + .../fedxgb/run_experiment_horizontal_tree.sh | 12 + .../xgboost/fedxgb/run_experiment_vertical.sh | 3 + .../src}/higgs_data_loader.py | 0 .../psi => xgboost/fedxgb/src}/local_psi.py | 0 .../fedxgb/src}/vertical_data_loader.py | 0 .../utils/baseline_centralized.py | 10 +- .../utils/prepare_data_horizontal.py} | 0 .../fedxgb/utils/prepare_data_vertical.py} | 0 .../xgboost/fedxgb/xgb_fl_job_horizontal.py | 260 ++++++++++++++++++ .../xgboost/fedxgb/xgb_fl_job_vertical.py | 107 +++++++ .../xgboost/fedxgb/xgb_fl_job_vertical_psi.py | 70 +++++ .../fedxgb_secure}/.gitignore | 0 .../fedxgb_secure}/README.md | 0 .../fedxgb_secure}/figs/tree.base.png | Bin .../figs/tree.vert.secure.0.png | Bin .../figs/tree.vert.secure.1.png | Bin .../figs/tree.vert.secure.2.png | Bin .../fedxgb_secure}/prepare_data.sh | 0 .../fedxgb_secure}/prepare_flare_job.sh | 0 .../fedxgb_secure}/project.yml | 0 .../fedxgb_secure}/run_training_flare.sh | 0 .../fedxgb_secure}/run_training_standalone.sh | 0 .../train_standalone/train_base.py | 2 +- .../train_standalone/train_federated.py | 2 +- .../fedxgb_secure}/utils/prepare_data_base.py | 0 .../utils/prepare_data_horizontal.py | 0 .../utils/prepare_data_traintest_split.py | 0 .../utils/prepare_data_vertical.py | 0 .../xgboost/histogram-based/README.md | 77 ------ .../base/app/config/config_fed_client.json | 50 ---- .../base/app/config/config_fed_server.json | 23 -- .../histogram-based/jobs/base/meta.json | 10 - .../base_v2/app/config/config_fed_client.json | 39 --- .../base_v2/app/config/config_fed_server.json | 37 --- .../base_v2/app/custom/higgs_data_loader.py | 77 ------ .../histogram-based/jobs/base_v2/meta.json | 10 - .../xgboost/histogram-based/prepare_data.sh | 5 - .../xgboost/histogram-based/requirements.txt | 9 - .../run_experiment_centralized.sh | 9 - .../run_experiment_simulator.sh | 9 - examples/advanced/xgboost/prepare_data.sh | 25 -- .../advanced/xgboost/prepare_job_config.sh | 26 -- .../requirements.txt | 10 +- .../advanced/xgboost/tree-based/README.md | 101 ------- .../app/config/config_fed_client.json | 41 --- .../app/config/config_fed_server.json | 48 ---- .../app/custom/higgs_data_loader.py | 77 ------ .../tree-based/jobs/bagging_base/meta.json | 9 - .../app/config/config_fed_client.json | 39 --- .../app/config/config_fed_server.json | 38 --- .../app/custom/higgs_data_loader.py | 77 ------ .../tree-based/jobs/cyclic_base/meta.json | 9 - .../xgboost/tree-based/plot-requirements.txt | 2 - .../xgboost/tree-based/prepare_data.sh | 5 - .../xgboost/tree-based/requirements.txt | 9 - .../tree-based/run_experiment_centralized.sh | 13 - .../tree-based/run_experiment_simulator.sh | 22 -- .../utils/plot_tensorboard_events.py | 136 --------- .../xgboost/utils/prepare_job_config.py | 239 ---------------- .../app_opt/psi/dh_psi/dh_psi_task_handler.py | 2 + 78 files changed, 780 insertions(+), 1623 deletions(-) delete mode 100644 examples/advanced/vertical_xgboost/README.md delete mode 100644 examples/advanced/vertical_xgboost/figs/vertical_xgboost_graph.png delete mode 100755 examples/advanced/vertical_xgboost/prepare_data.sh delete mode 100644 examples/advanced/vertical_xgboost/requirements.txt create mode 100644 examples/advanced/xgboost/fedxgb/README.md rename examples/advanced/xgboost/{tree-based => fedxgb}/figs/20_client.png (100%) rename examples/advanced/xgboost/{tree-based => fedxgb}/figs/5_client.png (100%) rename examples/advanced/xgboost/{tree-based => fedxgb}/figs/Centralized.png (100%) create mode 100644 examples/advanced/xgboost/fedxgb/figs/histogram.png rename examples/advanced/{vertical_xgboost => xgboost/fedxgb}/figs/vertical_fl.png (100%) create mode 100644 examples/advanced/xgboost/fedxgb/figs/vertical_xgb.png rename examples/advanced/xgboost/{ => fedxgb/notebooks}/data_job_setup.ipynb (100%) rename examples/advanced/xgboost/{histogram-based => fedxgb/notebooks}/xgboost_histogram_higgs.ipynb (100%) rename examples/advanced/xgboost/{tree-based => fedxgb/notebooks}/xgboost_tree_higgs.ipynb (100%) create mode 100755 examples/advanced/xgboost/fedxgb/prepare_data.sh create mode 100755 examples/advanced/xgboost/fedxgb/run_experiment_centralized.sh create mode 100755 examples/advanced/xgboost/fedxgb/run_experiment_horizontal_histogram.sh create mode 100755 examples/advanced/xgboost/fedxgb/run_experiment_horizontal_tree.sh create mode 100755 examples/advanced/xgboost/fedxgb/run_experiment_vertical.sh rename examples/advanced/xgboost/{histogram-based/jobs/base/app/custom => fedxgb/src}/higgs_data_loader.py (100%) rename examples/advanced/{vertical_xgboost/code/psi => xgboost/fedxgb/src}/local_psi.py (100%) rename examples/advanced/{vertical_xgboost/code/vertical_xgb => xgboost/fedxgb/src}/vertical_data_loader.py (100%) rename examples/advanced/xgboost/{ => fedxgb}/utils/baseline_centralized.py (94%) rename examples/advanced/xgboost/{utils/prepare_data_split.py => fedxgb/utils/prepare_data_horizontal.py} (100%) rename examples/advanced/{vertical_xgboost/utils/prepare_data.py => xgboost/fedxgb/utils/prepare_data_vertical.py} (100%) create mode 100644 examples/advanced/xgboost/fedxgb/xgb_fl_job_horizontal.py create mode 100644 examples/advanced/xgboost/fedxgb/xgb_fl_job_vertical.py create mode 100644 examples/advanced/xgboost/fedxgb/xgb_fl_job_vertical_psi.py rename examples/advanced/{xgboost_secure => xgboost/fedxgb_secure}/.gitignore (100%) rename examples/advanced/{xgboost_secure => xgboost/fedxgb_secure}/README.md (100%) rename examples/advanced/{xgboost_secure => xgboost/fedxgb_secure}/figs/tree.base.png (100%) rename examples/advanced/{xgboost_secure => xgboost/fedxgb_secure}/figs/tree.vert.secure.0.png (100%) rename examples/advanced/{xgboost_secure => xgboost/fedxgb_secure}/figs/tree.vert.secure.1.png (100%) rename examples/advanced/{xgboost_secure => xgboost/fedxgb_secure}/figs/tree.vert.secure.2.png (100%) rename examples/advanced/{xgboost_secure => xgboost/fedxgb_secure}/prepare_data.sh (100%) rename examples/advanced/{xgboost_secure => xgboost/fedxgb_secure}/prepare_flare_job.sh (100%) rename examples/advanced/{xgboost_secure => xgboost/fedxgb_secure}/project.yml (100%) rename examples/advanced/{xgboost_secure => xgboost/fedxgb_secure}/run_training_flare.sh (100%) rename examples/advanced/{xgboost_secure => xgboost/fedxgb_secure}/run_training_standalone.sh (100%) rename examples/advanced/{xgboost_secure => xgboost/fedxgb_secure}/train_standalone/train_base.py (98%) rename examples/advanced/{xgboost_secure => xgboost/fedxgb_secure}/train_standalone/train_federated.py (98%) rename examples/advanced/{xgboost_secure => xgboost/fedxgb_secure}/utils/prepare_data_base.py (100%) rename examples/advanced/{xgboost_secure => xgboost/fedxgb_secure}/utils/prepare_data_horizontal.py (100%) rename examples/advanced/{xgboost_secure => xgboost/fedxgb_secure}/utils/prepare_data_traintest_split.py (100%) rename examples/advanced/{xgboost_secure => xgboost/fedxgb_secure}/utils/prepare_data_vertical.py (100%) delete mode 100644 examples/advanced/xgboost/histogram-based/README.md delete mode 100755 examples/advanced/xgboost/histogram-based/jobs/base/app/config/config_fed_client.json delete mode 100755 examples/advanced/xgboost/histogram-based/jobs/base/app/config/config_fed_server.json delete mode 100644 examples/advanced/xgboost/histogram-based/jobs/base/meta.json delete mode 100755 examples/advanced/xgboost/histogram-based/jobs/base_v2/app/config/config_fed_client.json delete mode 100755 examples/advanced/xgboost/histogram-based/jobs/base_v2/app/config/config_fed_server.json delete mode 100644 examples/advanced/xgboost/histogram-based/jobs/base_v2/app/custom/higgs_data_loader.py delete mode 100644 examples/advanced/xgboost/histogram-based/jobs/base_v2/meta.json delete mode 100755 examples/advanced/xgboost/histogram-based/prepare_data.sh delete mode 100644 examples/advanced/xgboost/histogram-based/requirements.txt delete mode 100755 examples/advanced/xgboost/histogram-based/run_experiment_centralized.sh delete mode 100755 examples/advanced/xgboost/histogram-based/run_experiment_simulator.sh delete mode 100755 examples/advanced/xgboost/prepare_data.sh delete mode 100755 examples/advanced/xgboost/prepare_job_config.sh rename examples/advanced/{xgboost_secure => xgboost}/requirements.txt (90%) delete mode 100644 examples/advanced/xgboost/tree-based/README.md delete mode 100755 examples/advanced/xgboost/tree-based/jobs/bagging_base/app/config/config_fed_client.json delete mode 100755 examples/advanced/xgboost/tree-based/jobs/bagging_base/app/config/config_fed_server.json delete mode 100644 examples/advanced/xgboost/tree-based/jobs/bagging_base/app/custom/higgs_data_loader.py delete mode 100644 examples/advanced/xgboost/tree-based/jobs/bagging_base/meta.json delete mode 100755 examples/advanced/xgboost/tree-based/jobs/cyclic_base/app/config/config_fed_client.json delete mode 100755 examples/advanced/xgboost/tree-based/jobs/cyclic_base/app/config/config_fed_server.json delete mode 100644 examples/advanced/xgboost/tree-based/jobs/cyclic_base/app/custom/higgs_data_loader.py delete mode 100644 examples/advanced/xgboost/tree-based/jobs/cyclic_base/meta.json delete mode 100644 examples/advanced/xgboost/tree-based/plot-requirements.txt delete mode 100755 examples/advanced/xgboost/tree-based/prepare_data.sh delete mode 100644 examples/advanced/xgboost/tree-based/requirements.txt delete mode 100755 examples/advanced/xgboost/tree-based/run_experiment_centralized.sh delete mode 100755 examples/advanced/xgboost/tree-based/run_experiment_simulator.sh delete mode 100644 examples/advanced/xgboost/tree-based/utils/plot_tensorboard_events.py delete mode 100644 examples/advanced/xgboost/utils/prepare_job_config.py diff --git a/examples/advanced/vertical_xgboost/README.md b/examples/advanced/vertical_xgboost/README.md deleted file mode 100644 index bddf82f2a6..0000000000 --- a/examples/advanced/vertical_xgboost/README.md +++ /dev/null @@ -1,103 +0,0 @@ -# Vertical Federated XGBoost -This example shows how to use vertical federated learning with [NVIDIA FLARE](https://nvflare.readthedocs.io/en/main/index.html) on tabular data. -Here we use the optimized gradient boosting library [XGBoost](https://github.com/dmlc/xgboost) and leverage its federated learning support. - -Before starting please make sure you set up a [virtual environment](../../README.md#set-up-a-virtual-environment) and install the additional requirements: -``` -python3 -m pip install -r requirements.txt -``` - -## Preparing HIGGS Data -In this example we showcase a binary classification task based on the [HIGGS dataset](https://mlphysics.ics.uci.edu/data/higgs/), which contains 11 million instances, each with 28 features and 1 class label. - -### Download and Store Dataset -First download the dataset from the HIGGS link above, which is a single zipped `.csv` file. -By default, we assume the dataset is downloaded, uncompressed, and stored in `DATASET_ROOT/HIGGS.csv`. - -### Vertical Data Splits -In vertical federated learning, sites share overlapping data samples (rows), but contain different features (columns). -In order to achieve this, we split the HIGGS dataset both horizontally and vertically. As a result, each site has an overlapping subset of the rows and a subset of the 29 columns. Since the first column of HIGGS is the class label, we give site-1 the label column for simplicity's sake. - -vertical fl diagram - -Run the following command to prepare the data splits: -``` -./prepare_data.sh DATASET_ROOT -``` -> **_NOTE:_** make sure to put the correct path for `DATASET_ROOT`. - -### Private Set Intersection (PSI) -Since not every site will have the same set of data samples (rows), we can use PSI to compare encrypted versions of the sites' datasets in order to jointly compute the intersection based on common IDs. In this example, the HIGGS dataset does not contain unique identifiers so we add a temporary `uid_{idx}` to each instance and give each site a portion of the HIGGS dataset that includes a common overlap. Afterwards the identifiers are dropped since they are only used for matching, and training is then done on the intersected data. To learn more about our PSI protocol implementation, see our [psi example](../psi/README.md). - -> **_NOTE:_** The uid can be a composition of multiple variables with a transformation, however in this example we use indices for simplicity. PSI can also be used for computing the intersection of overlapping features, but here we give each site unique features. - -Create the psi job using the predefined psi_csv template: -``` -nvflare job create -j ./jobs/vertical_xgb_psi -w psi_csv -sd ./code/psi -force -``` - -Run the psi job to calculate the dataset intersection of the clients at `psi/intersection.txt` inside the psi workspace: -``` -nvflare simulator ./jobs/vertical_xgb_psi -w /tmp/nvflare/vertical_xgb_psi -n 2 -t 2 -``` - -## Vertical XGBoost Federated Learning with FLARE - -This Vertical XGBoost example leverages the recently added [vertical federated learning support](https://github.com/dmlc/xgboost/issues/8424) in the XGBoost open-source library. This allows for the distributed XGBoost algorithm to operate in a federated manner on vertically split data. - -For integrating with FLARE, we can use the predefined `XGBFedController` to run the federated server and control the workflow. - -Next, we can use `FedXGBHistogramExecutor` and set XGBoost training parameters in `config_fed_client.json`, or define new training logic by overwriting the `xgb_train()` method. - -Lastly, we must subclass `XGBDataLoader` and implement the `load_data()` method. For vertical federated learning, it is important when creating the `xgb.Dmatrix` to set `data_split_mode=1` for column mode, and to specify the presence of a label column `?format=csv&label_column=0` for the csv file. To support PSI, the dataloader can also read in the dataset based on the calculated intersection, and split the data into training and validation. - -> **_NOTE:_** For secure mode, make sure to provide the required certificates for the federated communicator. - -## Run the Example -Create the vertical xgboost job using the predefined vertical_xgb template: -``` -nvflare job create -j ./jobs/vertical_xgb -w vertical_xgb -sd ./code/vertical_xgb -force -``` - -Run the vertical xgboost job: -``` -nvflare simulator ./jobs/vertical_xgb -w /tmp/nvflare/vertical_xgb -n 2 -t 2 -``` - -The model will be saved to `test.model.json`. - -(Feel free to modify the scripts and jobs as desired to change arguments such as number of clients, dataset sizes, training params, etc.) - -### GPU Support -By default, CPU based training is used. - -In order to enable GPU accelerated training, first ensure that your machine has CUDA installed and has at least one GPU. -In `config_fed_client.json` set `"use_gpus": true` and `"tree_method": "hist"` in `xgb_params`. -Then, in `FedXGBHistogramExecutor` we can use the `device` parameter to map each rank to a GPU device ordinal in `xgb_params`. -If using multiple GPUs, we can map each rank to a different GPU device, however you can also map each rank to the same GPU device if using a single GPU. - -We can create a GPU enabled job using the job CLI: -``` -nvflare job create -j ./jobs/vertical_xgb_gpu -w vertical_xgb \ --f config_fed_client.conf \ --f config_fed_server.conf use_gpus=true \ --sd ./code/vertical_xgb \ --force -``` - -This job can be run: -``` -nvflare simulator ./jobs/vertical_xgb_gpu -w /tmp/nvflare/vertical_xgb_gpu -n 2 -t 2 -``` - -## Results -Model accuracy can be visualized in tensorboard: -``` -tensorboard --logdir /tmp/nvflare/vertical_xgb/server/simulate_job/tb_events -``` - -An example training (pink) and validation (orange) AUC graph from running vertical XGBoost on HIGGS: -(Used an intersection of 50000 samples across 5 clients each with different features, -and ran for ~50 rounds due to early stopping.) - -![Vertical XGBoost graph](./figs/vertical_xgboost_graph.png) diff --git a/examples/advanced/vertical_xgboost/figs/vertical_xgboost_graph.png b/examples/advanced/vertical_xgboost/figs/vertical_xgboost_graph.png deleted file mode 100644 index 56e7f2c03ca96243fe6ada04928db0039cd49d66..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 10860 zcmb7q2{@GP+x|0SFv&7r$&zIzNp`8k*v3*wAwu?7$Xa%?&mg3-hRVK^H72qvi73X_ zWLMD)vd-8C-{XCM$G837<9~dA$1$FHuDR!)`#R03$2Y&&|LC#M@9y9jNT%n+H!|j%qj50N{1}ahfeXcz(=7+sqpPSUZj$ zkS_NUdjJrF=xD0ldSJaY$>MQqV}E7MJqwOf;>FxRFs~yu38yV93)k|Virg{04jt24T_K-+5@OdN1|oybkEDGE!7PF(#ld{1OOy0 zOwa*Id^iA*fkAFdt)x4XOm zfYbWlEBm*l{%7E(ewdY!m2dO1qHq1 z1a(!{keWTT#gY&LE3n?Dxbkghp~=X^Bz3~0gQd?TI0Nl_&2M?anW+0nv%g6-WY=eY zx~**}d^33ad-VF4Yt`sa`r{|NKRn`C^7I!%0P%Z^His@-7w&$p-rKx*@#1Y#&}*ZC zu3eJvi0Z)_Tfo5%X&#wwFH#L>{^##eb=PqSInQRkX4x_pU1JW_WfDP=d<6-PgS-Z1nx|(gp%Ce6!r_F znQ3BFvAfpW`+vZ((ILC@#p@eHoAG~I!TPLN)yp_L4tI~2U{YD+UIyp_d zm+GgN@tb^5$ZECc&yS;sgsG2l{EPV0=UUgEx_9@ah&$}8&v~Y+l4Bxa;i>92!D|oQ zCf?qhdhqip#qRFH>i3R{&EUQa1tG!URb24Kh^gM=?)jl&hpssOP0B9mL7@*rk}-Y- zIjw&f8-&bI4l>CK_{C#ZWB=%0+u^}pjmLL&1&-TIfjThqa!vUN#?&h2;Q!N&80?G^c4|-u&!%PmJXcG9(v*Z?M zhmJWr>-qz4795M0e)y0!zrD8e3j!}c^J^Bz7KU5iBZnU6x7@gFE)L~q6_b|sw;pJH z^-65-PphD%HzeszRj*lXv$oKd*rdxMvvXz_X|``BdflX`8O$0y;mZbGicI9>#epyt$SGiJRh z&aaA(F0gbch;DQ(UXD{eSaJ{Z&SD?mBhtD1aO;S4QD0Y{G2GwXsNG>u##_)~!a|9a zR_hbq{bmLL(5OE_hf-CYHaRvQfUay@5IbeTryPhM{`z%{>5MYzQL1jTh<;rQ{}OEy zbvpC%TS}i|1>Z=IddK&NT-FJNx@lt*FoaJdU14zn!NSo5^=`bmuIHRBNp?`jfq!}6 zg6!42-WzYIBgx)Z@>Hu(dBzcn@0hpEBuD~-uWtAA7f_$E52)-tLZ{t)+a(ZmJ#cIJ zUKtQ%C_xg9oxAkt8J;BD)7|ZKnfaLt(VF|*en95+v&;$Csgz2)-ayKNqm2lDWA9NM zWlT({&yR_U$Yd)kEAy^vj9xBXB)FWOr{yq&SIVh(krM`feMw(BbU&c&WE}8no9qfN z^&OujPo(z^dE)pYLC^rA7Eu6zj>G@}HxvT=35Nk#)KU2V7O=@aFqg(>r>6QHTr+IU z%*whSZxzUSy2Y>7`bBrk>BNKUK}(5bdI{>wzO^DQ-qMni1aPM-NFJD4Ti@r>J9gNz zonqaDktU)4YzK;k&r>ffhw9k{Ot5#w3)c5uj-g zkZzwsSzphn=sQN+Ku}e6s^kjOR=gD-wEU{Ejxq|@drYbrov-LpB**M3?eLAF^N|%e5#9K*IS6bx^6@fG2 zHvK63yLYcSI21lhP3=ju33U$I<2}(>U-gJ=0yJtBR}G{`VNIY*Z|E>J>%K^*M?s?w zX=cvyZr@~@+TRWa@pFuL5KzdFayyT}rr9Ljyt=;!zP-a^Gt{SI7(HzbeoQ(B%$hli z|DU((pYAX^_n>9)xqGM@62LrZwWefE#>E9(6;G!2cMh0(V0 zFDVL_q>IvR3T_dHe`!?CFQ{U6`_3z0h(_j7BEBI|2 z9i2zBB$6(e*@k|&i|Az}fFR5B-@j?({QSJ$<2HUpzw0m4(}gczCOSKp6%MK1i9a>v z-r!#^U9=Lmr?01_`q}wWRZ&sV^U70#PebqT*>CyQjcFl2U@>GR=)1in6hfU2>BFY_ zhkv|^-L*yKy@SJ$S0FIMVeu#gIv%syiewCD-Eg8uIIq}jQBzITk4iv!w;6FSqeigo7jZe8J|^>vX=ODFhS z2zHYl4g`w~_GD8!)DL}|FG}-j!F{0M_{$SW4-HI%F{bZh!%3*9|I_+Y=r{~86Rg6U^cK6gDj@;w{;I1AKA+J-S)ijn`q-6w9-t&^!P_rF!E z@$&L!I9mK3t<~~d8WT>JyO(sy_3O6~XXjG-&fSg0rM23A6>*}Ke}_Z=b#K@{FjcsI2XK*wH2ivk8e)a$#{%SVW}j>$|O$ z>1hGiaW0!+l!$)TGsTtWp#Wj5rk6NSPhnFwX=efat=suest50EwqIn4Oc6Kj$-DB{ zez@eOTrfU($IGnF?FBRM#qNNShAjglp)%8|)bAb9i{TP9Wi`@tHTYFdr*3l^$&gsyAjD+i|^A?LMs%q-Rsy zaUSHn&tBVg^*j?ba&xyFXJq9&2c}cM1ABmrLlkyvJFCk#&Ke*D9z-kYC9 zWQ-ictDmvE!P_5Rm@t|vYl^4w$Sx>FJ!zIsMdjbt6I@DZ35$9%#zafFS#taA2Wp3a zGv~uPdgt|HWT{+EOo1Jp4|jJ3gS-lo8wzg7My;R@j2NEcAC0>RgNM%`5kSV`_Ov?( z^gez2JKIkY7$E7n37rO$|Ftg^L3v5Z6pRynyUx!fMjUV^mtcz;A&DfvI1Yek99!4( z?jxMw&)g4wHSCGPAaEFx=dIdI8eWa12(H36jQu zQ@qKnsg49xiXjMPn3TjR`aCeR3No=kSg=m0_L`d?VlhClI$e9qXL~2OCK~$$fpJ8@ zv0V>Mpv%X8T?D8_kambYSqcW7xw%|zuppZCXe$Prpm6STeQa*5nCf1jSYO-@Mg0>Lr-ITE9fL||cFB9AmokFx`- zOi+k6mhc@lX99yknn5D)BL<1h2CsrolL_ko0KO^)PJi{gTF!G3tPL2x>?95676?BY z!R3UAltLN9AT;nw&^jHM2&B(3Q8hpnR5YyD_S=HY6e2OF5m;{LD;{^p+H}S~w$$A~ zAf1Bw;VCI8HFr9hUMP4MNmw`Ej^lF{oz%z#8G7CQNwHeT{?sFB)0e4O*w8Qbqv)RWdJVN93?rA#|Qg0N?se;_xcL8;s zS;|57AnB`d9y)7Qd-v&NT52k~KQ@@Q*0sLgE8SaaRGe{gCVX{7qtxF<=;`6!)%O5y zb{tbuQIXOS&DL9>lYB;4_{!Pq&yk#*oQB56bIo~IKvg+AJF5-mGU{KLmyQnE#nXQX z*_w15$BHY#C%)%WgU)`FLulf%H3&KK-oKtSQwMeBEk-J6xmG0hycOJ5z z?N00~OZ=@KfziKG!7ZR(qYX;q7MUD@qN2uY3~U0w-2xt)z{O1~pIv!;@>9vpd^Z(a zP!H9(jmq*|bV{IzCDzo)22E1uhvsHKC3}JV`bgQZoO1TfiC5hv3kB>g)z5LYZ9k+K zRm>A^sj1;=IaU8VCZS48r>3^LY4!cWi(5AG7N?&QnV&_4s~H|fd{sP-78w~Hw*MQf zj!;x=3F+kKhTo%7eVW+9*M4>>#-mA~_#6~zs;V7nZD)L6cW=Dh$o#EGt}%U?Nkx)- zC?&ndu8Vc=G^jIr2J*G$DE+GS#EHPgiaveo9GuMm3%AQV*7C-<{B$SBCi_ygR|Id= z?3D}FPec#(zX;u=dUKoz+FE|IVgM!tkW+w=y=f(g_9+3MH_|0#W-ht;>dnxpRaiBd zfQnN5@}y7l2`;YQR7s23+-QLsWx7nuuX@^aYHc6+FQyot9H}S@Q#KLI{N3OT=D2T{3i#cO?-Xd zJmEh79!xuVd8f;;5j9#NzrUS79+?waw`pNjQEkTp18L#s^vnHGA-_Jq5YkPPdQ(`~ z61Y*rYAsG*!F>1gbJX7EQZ0$o7K~riyEA938f4N!lafWZ*MB!6TaE6Pg9y`BZ z&FJvtL?(xdWv)(7=;)6^8q}D4GPKP5@3Kq5P~qMlK3Ek-4^%& zPNjb*2CfZ$RF=9iKn0C~EOv9w>WmXdA!Vt-QjyzwOlJgeSb>p07$l3ca>Rf@aQOoH z8k)QevET{y872E-Hm~-UW7tCb1C$JOtI7-$w}wuyrNCR5WAV!RE}*&ppZJ_m-vzECl2=M-8F`@5t)+i1DM+l zUWF3oc5{Glg5J$1Qf zuuxZ0{=KuX*UzVy(H3i<^K~~@?*4>)ap&Gk8;QhcX{ycv%}yBw6PKn0ntFPBm$nkK zvX=4N>7iM8CVrVev7k}|WnvmiBkG&K5ZtHOAUbSsW03@oXiWo|4oKVdGUQQzdu+#P z1vR1a7bO^2`J6IaLbuhS94BqV(5eiXlbdwX+v(v^a;KG|4~pSyU1^4k;4D8fnE*@DljN0cl6w+=!)2W8i5 zyG^V9FJFAkX%(fVrM)8-GvctNKk-in17L;XPcFhin{!E32G2y0wNXXhZ(`+Y(9oIQ zU-r+Bf|ws?q05wxImrOfI*A%r8y&04qCmK@`+3-i$Ad;qP0-f;vJ-d;(FH?-Qx!ro z^(6E&wzq`%n@3qy>LMq&66*<*%t{hf7^S_w-4I`z>7ec`a@UR z#rr#&;WH1Q2JSmCW%3glAc+o#jkyS?Qb*9Y3T$&I3_$o(3`RUxEe8yKoG-)82q8SI z$qJ92HNA}aYdDb4(Uohgw%nRs^AfHt3xoFQ^NEK8+SV}jq#?S#i5^T<5Hm7>9j+rq zCENXC!p>ero;ja+*?=#E83RUR>G}ZB@klZtH{)12A=uN1r0d&ex-TvRGvHG=@dAKg z!6lr5(*ax>$TN`l07e2Hd1girS=)R9a}lQA?&*07czqfHy&fGq;eCRg0g#Hqz^e(l zr-8TJP*nTG0yd%__z;ePpW~M@H0SqN<-y)Yz#*NbqJYl{90Id!FFOLk#vu{V?h01f zBAFM=NI!7eNU2M6$cq5K8WQ&86obSidcsW{b0kQCdcIazIBfnNt@-mEbhN>{_~PZu ziv4vlbC*JgS5ATIV);S}P9k^Q=-iXtTT0s;RH->WZ$e zMXJj#XYwbY@pJ-ib-pgh>3*xwtsyGOZ@ z`VvZxeYWs&%&$wwdd>o)G&~4jV>QQethZRkw{hoD8hP&OS#UPxw^R6|7)+8 zg8b=UyZy){4CtiaC4G5$sdqFs8#GpXL9<{Eq(FY=cZjP8H%l8yzClVQWPzrw@c$v% z>U(;w?foX}T{Spz5xG84vV5V(g}#1OR_5>_hF$ORAxJStS@jLj1iP-dQGfK22eNi~5;DKQYFv&(bSp%H z1gaOTB-{=1F4oVklRX3}Y(1L}SqYbVFjjQqvsVuFXpJ)Qy*06Qg2^!+yvd`Pk-eczR<=DsC zxN)CLB|hLe`emkJ7}W_N4}Go(5cuIR!F)8CN6jqj7`Xp}W5x3}_|B#z8yW2!E>gcp zLzPP>teWLZEj_zTEUaeV{A7La$rrvw?`&wMI2{|yq@&uo5|KlD>?Xjl+;QY{` zhGBGY@94sFO+7IV`1w_HMr8hEuKkKnNSWdE(zbQQDvTVv;KBmi^Ysfc1lHYq}%3iT3S zG{~q3Pn%D`LwThPZ^5UCiI|w-fO_7K^E1|=3vV6z@>Am`mz&;h=^^n~kUm1v^x@f2 zk5AxcLcZe=t(T|y?WiQh07o{}7NmViNjf|H9y@%pTO5Z7G%-?T<`M-l*Zr`mEt!4v zFlo5z;Opz}r%bzL^!%6R{ONIjfA&duD36pOOuc_dPsPXG#tQ$F?NrbFC)U<{cx2rj zmdMI|N$qoS*FSbTX;q2)|K8lZ;q~jyi6)|sWs|dffqAvdQ#S2wkJO^ zS`~Z)dmuX>Rf0cG`%FE>Uyc5d@b64*M|mb;=f*9Z6hcjQZmD~ z6XH35z~d|cI`qRS+zcHEwH;RuellP!=p&bR%KjdtO%I&;12^M|wk+V5T90$$>OLmO z;6>M70af3kNdn&}(a{LFK-X)=sgvkqe9qk6{0E+8z$Wdx0}M)lw6s1@uhGp$+dwkd zu+_H6dZs^Vf}!fj2i%PL8_k54u$GMjqRV{P?|C^+%(e`|7@);qRw1lcb@}|^-7MJE zmcvGN-Py%nE*oy>9Vj>C=&cmSN9?@NWkiB_@XF!!1L*>>6-X2op$!XtSDZDAivj?`FpM$ zQ`uAhau_RR>uft1oSmFJvN%6|{`^c!AV3q8fOqSs4-d9v1J><%6ub)l!FueKdCFBi zfw!9gVlMIx^P_+>bpL~*+6&W;z$-8By%csTt6%kSLj4YvJYb>>3h!@){Cis~?(uKF z99Ji?5n2f&YRiQv<82QF3ixo=`4_?`hO8g9n+&3z#^B(f1m>tZDBVI$YTo|wI4Ld7 zk$A8HO-;W_Vm-la0RXU}aA?TP38Ry!>-$=q`2M3(PV>p`Vh3H!bD)Uy7YO=TY}VOZ-I3 zoXTsPqx3Qot26lE#`x9`k=;;PS=q+m#8owi{?U-WN2H}GbBNAUxnE_La^ z%D>fixfg4XeZaqEoY&U&s-jRe5eTu$#+|})Bx>KxXN3Ei9Osh4lT0hj9Y#8aOO2Lx z40z$ScfWYm{ct=y*f(5T$?o~a6*s<_lNa=Rs-Kll9tlc`TGrxx!%gM0r8fIZmQ`&- z&YrWuMlYpyqz9FL*%qTbsoo|=Ms}PIN7?hDU1rINDBSireNWKrE}i$M!AnX znP!lnL4w`VXQQ9z(j}EvHm_eR?mUc*jV*_=_6Tf%3y?r~{avUzC@tgkE62?&DntfQ zevK~VD0u(s2Gz#BW6o99E%sZ>lP02XlRvn(Y@~kSd25@4=lxdip1uF%p!qL8JP=Sv zvRqp!UQ*#9u}vKNv21;IY*L;TqBgh4^sbP$RUA{MA_)J#$~9&vjq)ckQqr39EK%7r^T=E z^Obx*tsIKH7bF-CtU^H5ns2N|#@VYKgPQQ5O5iY`rq%DcMFVU`?0^aenPbr)9{tP& zmi_ebtybJQ27)K3Ums=#T%_g5ib30L!0|6k=mUSyodaNB|rXL$CJj!)xp34A?_DqCKC-1 z7D&1fyy=0m8UW+M0Yfg@WJ`w}pBj1Ej}0LFF-F_#yN0?seMS|mj~*9=rKiKTg9QC~ zl$sObtuc@SBxHaL{6wi35(XgPI%f*m-Tp$^=5&@(1YZ=E@N2vts70Yi4pr49C?=d4O+o0U&4$j z2PYYLl{0efG|2cFLB5`s+#M@i2%sY{@EDWi?nnq$Z`Jp&<^eG=v01R^z>Bu+>Fnm_ zljS@xG2zw^x{7s;#-^s(VPK8@S}Sc`x3sLRR!)^EQ}gZ>c5L5KjZY*iB}LfO((@rR zpS%HBkStI3F9f?-^z`DGPsx409}o~A3)VkD5XF-&aZ@j)SqGLF6XRLgor^1jc9#9{1_T5FF+g;cCx0otx9fPNCV+CcVeR9oqXMb8crB{( zuQKg`1OKjSlGSd3vda0Tn8~0UAO?VS8gicP5|{ZyIk3m1kmm$?0&U-o*t-wIvC=KF z+O>u5XMokxt-SnGR-QjT9Gg#`B#yddSJ9zvUPJ&n#V1alM8~6}!TR#^+jKaG;(zP= z5yvw#e#1;t1WuLQ@Elx_@ zMk@L*3=#hJQu6l{LHUhyLN3B ztlFjANSFKc@{+4P=tH)WJwWK1T?spEqJQUF)pEBjjZKwCI9LKr#ujqW;8xMfz#`Q@ zFS`nCHs}FMTB7+{@u{E{H16Gq4+OJYoiZpaM!~in-|L}kV4F^FuXLMRX36^EigrVn z(fn(vz_U@B0)imPzzQ^90tJ)Bk1|0l*!1%WY#&)rS`<{>9f-FFZ49u`J%h&+wqzZ8 zPAH05>T~2Vj&7q6)ITf0i<&-|3~X9>RW=Kn%$x4St(=uB;~OnI-?vgSGw&Izfii|a zrnK05Ad`La_y`3zPtqF~x&Hjmpprmrnjm>aQ*j^ctm$`lJ_b`ghSktdhKA zFrW0zIO?XjvR3NUGuDUBbKYMr5x><5&aV`^Pq+GoYgHgXBk0@_kH~b~*HOF?B0W@e zBRNq>3te$gq+wzH#rX|KXlUI7u+H$9AU!vvuX-=D*n~wbCk4x@ONQu%)`cC0p0aM4 zFG|lic{p@UPyCd!ZE==QvI4AKi29@vOj&PONl~ZndzkOvC=P??Z zo!;*eulhzv)jPYGf$Q5L2kUQ_uI_(Oo_^`I!toHZ3S=r0*xqEtS@d=uk&};Q5$Yn+ zs(ZgL1i7Q9%EZm3+Gt)Rvz5WHD!+?h9U;v*H`6tp*ebGL zN#+Y@s5dEjWa;qX(~^?rmc36r@?3E&Yq3Lf=uCv{&Try$TAYrnR(_fLd*_iHwFGi* z?1MFm|Fy%($&H|ySwEEmeg}gN%zV)C!E0yQ4qAQUFBo#4Yni!a@E)_Xqd3CR#`sTd z3`dQZXN}nXTj;NAOUPN^%nOz@U2Rxm6)Gv0N67pME^ei zd?;f8c2iK1Rgt(KR3d=z0Gnk10t$@%b6?_rHv9eCh?{gMuw?WyeO#GG3~2v}XCt;M zodko*%2r^pJW~?DPCE%!h6G6DF{qmdMjbUMT3PX@K!NR;&CxM`*S}x0Mrwimf}ch| zGJtvJ%n6`f#OJxTI2}vG@bI^y$I`rT06W_*CiH*p)FH*t!osA< **_NOTE:_** If the dataset is downloaded in another place, -> make sure to modify the corresponding `DATASET_PATH` inside `prepare_data.sh`. - -### Data Split -Since HIGGS dataset is already randomly recorded, -data split will be specified by the continuous index ranges for each client, -rather than a vector of random instance indices. -We provide four options to split the dataset to simulate the non-uniformity in data quantity: - -1. uniform: all clients has the same amount of data -2. linear: the amount of data is linearly correlated with the client ID (1 to M) -3. square: the amount of data is correlated with the client ID in a squared fashion (1^2 to M^2) -4. exponential: the amount of data is correlated with the client ID in an exponential fashion (exp(1) to exp(M)) - -The choice of data split depends on dataset and the number of participants. - -For a large dataset like HIGGS, if the number of clients is small (e.g. 5), -each client will still have sufficient data to train on with uniform split, -and hence exponential would be used to observe the performance drop caused by non-uniform data split. -If the number of clients is large (e.g. 20), exponential split will be too aggressive, and linear/square should be used. - -Data splits used in this example can be generated with -``` -bash prepare_data.sh -``` - -This will generate data splits for three client sizes: 2, 5 and 20, and 3 split conditions: uniform, square, and exponential. -If you want to customize for your experiments, please check `utils/prepare_data_split.py`. - -> **_NOTE:_** The generated train config files will be stored in the folder `/tmp/nvflare/xgboost_higgs_dataset/`, -> and will be used by jobs by specifying the path within `config_fed_client.json` - - -## HIGGS job configs preparation under various training schemes - -Please follow the [Installation](../../getting_started/README.md) instructions to install NVFlare. - -We then prepare the NVFlare job configs for different settings by running +This example demonstrates how to use NVFlare to train an XGBoost model in a federated learning setting. +Several potential variations of federated XGBoost are illustrated, including: +- non-secure horizontal collaboration with histogram-based and tree-based mechanisms. +- non-secure vertical collaboration with histogram-based mechanism. +- secure horizontal and vertical collaboration with histogram-based mechanism and homomorphic encryption. + +To run the examples and notebooks, please make sure you set up a virtual environment and Jupyterlab, following [the example root readme](../../README.md) +and install the additional requirements: ``` -bash prepare_job_config.sh +python3 -m pip install -r requirements.txt ``` -This script modifies settings from base job configuration -(`./tree-based/jobs/bagging_base` or `./tree-based/jobs/cyclic_base` -or `./histogram-based/jobs/base`), -and copies the correct data split file generated in the data preparation step. - -> **_NOTE:_** To customize your own job configs, you can just edit from the generated ones. -> Or check the code in `./utils/prepare_job_config.py`. - -The script will generate a total of 10 different configs in `tree-based/jobs` for tree-based algorithm: +## XGBoost +XGBosot is a machine learning algorithm that uses decision/regression trees to perform classification and regression tasks, +mapping a vector of feature values to its label prediction. It is especially powerful for tabular data, so even in the age of LLM, +it still widely used for many tabular data use cases. It is also preferred for its explainability and efficiency. -- tree-based cyclic training with uniform data split for 5 clients -- tree-based cyclic training with non-uniform data split for 5 clients -- tree-based bagging training with uniform data split and uniform shrinkage for 5 clients -- tree-based bagging training with non-uniform data split and uniform shrinkage for 5 clients -- tree-based bagging training with non-uniform data split and scaled shrinkage for 5 clients -- tree-based cyclic training with uniform data split for 20 clients -- tree-based cyclic training with non-uniform data split for 20 clients -- tree-based bagging training with uniform data split and uniform shrinkage for 20 clients -- tree-based bagging training with non-uniform data split and uniform shrinkage for 20 clients -- tree-based bagging training with non-uniform data split and scaled shrinkage for 20 clients +In these examples, we use [DMLC XGBoost](https://github.com/dmlc/xgboost), which is an optimized distributed gradient boosting library. +It offers advanced features like GPU accelerated capabilities, and distributed/federated learning support. +## Data +We use two datasets: [HIGGS](https://mlphysics.ics.uci.edu/data/higgs/) and [creditcardfraud](https://www.kaggle.com/datasets/mlg-ulb/creditcardfraud) +to perform the experiments, both of them are binary classification task, but of significantly different scales: +HIGGS dataset contains 11 million instances, each with 28 attributes; while creditcardfraud dataset contains 284,807 instances, each with 30 attributes. -The script will also generate 2 configs in `histogram-based/jobs` for histogram-base algorithm: +We use the HIGGS dataset to compare the performance of different federated learning settings for its large scale; +and the creditcardfraud dataset to demonstrate the secure federated learning with homomorphic encryption for computation efficiency. +Please note that the websites may experience occasional downtime. -- histogram-based training with uniform data split for 2 clients -- histogram-based training with uniform data split for 5 clients +First download the dataset from the links above, which is a single zipped `HIGGS.csv.gz` file and a single `creditcard.csv` file. +By default, we assume the dataset is downloaded, uncompressed, and stored in `DATASET_ROOT/HIGGS.csv` and `DATASET_ROOT/creditcard.csv`. +Each row corresponds to a data sample, and each column corresponds to a feature. -## Run experiments for tree-based and histogram-based settings -After you run the two scripts `prepare_data.sh` and `prepare_job_config.sh`, -please go to sub-folder [tree-based](tree-based) for running tree-based algorithms, -and sub-folder [histogram-based](histogram-based) for running histogram-based algorithms. +## Collaboration Modes and Data Split +Essentially there are two collaboration modes: horizontal and vertical: +- In horizontal case, each participant has access to the same features (columns) of different data samples (rows). +In this case, everyone holds equal status as "label owner" +- In vertical case, each client has access to different features (columns) of the same data samples (rows). +We assume that only one is the "label owner" (or we call it as the "active party") +To simulate the above two collaboration modes, we split the two datasets both horizontally and vertically, and +we give site-1 the label column for simplicity. -## GPU support -By default, CPU based training is used. - -If the CUDA is installed on the site, tree construction and prediction can be -accelerated using GPUs. - -To enable GPU accelerated training, in `config_fed_client.json` set `"use_gpus": true` and `"tree_method": "hist"`. -Then, in `FedXGBHistogramExecutor` we use the `device` parameter to map each rank to a GPU device ordinal in `xgb_params`. -For a single GPU, assuming it has enough memory, we can map each rank to the same device with `params["device"] = f"cuda:0"`. - -### Multi GPU support - -Multiple GPUs can be supported by running one NVFlare client for each GPU. - -In the `xgb_params`, we can set the `device` parameter to map each rank to a corresponding GPU device ordinal in with `params["device"] = f"cuda:{self.rank}"` - -Assuming there are 2 physical client sites, each with 2 GPUs (id 0 and 1). -We can start 4 NVFlare client processes (site-1a, site-1b, site-2a, site-2b), one for each GPU. -The job layout looks like this, -:: - - xgb_multi_gpu_job - ├── app_server - │ └── config - │ └── config_fed_server.json - ├── app_site1_gpu0 - │ └── config - │ └── config_fed_client.json - ├── app_site1_gpu1 - │ └── config - │ └── config_fed_client.json - ├── app_site2_gpu0 - │ └── config - │ └── config_fed_client.json - ├── app_site2_gpu1 - │ └── config - │ └── config_fed_client.json - └── meta.json - -Each app is deployed to its own client site. Here is the `meta.json`, -:: - - { - "name": "xgb_multi_gpu_job", - "resource_spec": { - "site-1a": { - "num_of_gpus": 1, - "mem_per_gpu_in_GiB": 1 - }, - "site-1b": { - "num_of_gpus": 1, - "mem_per_gpu_in_GiB": 1 - }, - "site-2a": { - "num_of_gpus": 1, - "mem_per_gpu_in_GiB": 1 - }, - "site-2b": { - "num_of_gpus": 1, - "mem_per_gpu_in_GiB": 1 - } - }, - "deploy_map": { - "app_server": [ - "server" - ], - "app_site1_gpu0": [ - "site-1a" - ], - "app_site1_gpu1": [ - "site-1b" - ], - "app_site2_gpu0": [ - "site-2a" - ], - "app_site2_gpu1": [ - "site-2b" - ] - }, - "min_clients": 4 - } - -For federated XGBoost, all clients must participate in the training. Therefore, -`min_clients` must equal to the number of clients. - +## Federated Training of XGBoost +Continue with this example for two scenarios: +### [Federated XGBoost without Encryption](./fedxgb/README.md) +This example includes instructions on running federated XGBoost without encryption under histogram-based and tree-based horizontal +collaboration, and histogram-based vertical collaboration. + +### [Secure Federated XGBoost with Homomorphic Encryption](./fedxgb_secure/README.md) +This example includes instructions on running secure federated XGBoost with homomorphic encryption under +histogram-based horizontal and vertical collaboration. Note that tree-based collaboration does not have security concerns +that can be handled by encryption. \ No newline at end of file diff --git a/examples/advanced/xgboost/fedxgb/README.md b/examples/advanced/xgboost/fedxgb/README.md new file mode 100644 index 0000000000..0eb70f7a38 --- /dev/null +++ b/examples/advanced/xgboost/fedxgb/README.md @@ -0,0 +1,212 @@ +# Federated XGBoost +Several mechanisms have been proposed for training an XGBoost model in a federated learning setting. +In these examples, we illustrate the use of NVFlare to carry out *horizontal* federated learning using two approaches: histogram-based collaboration and tree-based collaboration. +And *vertical* federated learning using histogram-based collaboration. + +## Horizontal Federated XGBoost +Under horizontal setting, each participant joining the federated learning will have part of +the whole data samples / instances / records, while each sample has all the features. + +### Histogram-based Collaboration +The histogram-based collaboration federated XGBoost approach leverages NVFlare integration of [federated learning support](https://github.com/dmlc/xgboost/issues/7778) in the XGBoost open-source library, +which allows the existing *distributed* XGBoost training algorithm to operate in a federated manner, +with the federated clients acting as the distinct workers in the distributed XGBoost algorithm. + +In distributed XGBoost, the individual workers share and aggregate gradient information about their respective portions of the training data, +as required to optimize tree node splitting when building the successive boosted trees. + +The shared information is in the form of quantile sketches of feature values as well as corresponding sample gradient and sample Hessian histograms. + +Under federated histogram-based collaboration, precisely the same information is exchanged among the clients. +The main differences are that the data is partitioned across the workers according to client data ownership, rather than being arbitrarily partionable, and all communication is via an aggregating federated [gRPC](https://grpc.io) server instead of direct client-to-client communication. +Histograms from different clients, in particular, are aggregated in the server and then communicated back to the clients. + +### Tree-based Collaboration +Under tree-based collaboration, individual trees are independently trained on each client's local data without aggregating the global sample gradient histogram information. +Trained trees are collected and passed to the server / other clients for aggregation and / or further boosting rounds. +Under this setting, we can further distinguish between two types of tree-based collaboration: cyclic and bagging. + +#### Cyclic Training +"Cyclic XGBoost" is one way of performing tree-based federated boosting with +multiple sites: at each round of tree boosting, instead of relying on the whole +data statistics collected from all clients, the boosting relies on only 1 client's +local data. The resulting tree sequence is then forwarded to the next client for +next round's boosting. Such training scheme have been proposed in literatures [1] [2]. + +#### Bagging Aggregation + +"Bagging XGBoost" is another way of performing tree-based federated boosting with multiple sites: at each round of tree boosting, all sites start from the same "global model", and boost a number of trees (in current example, 1 tree) based on their local data. The resulting trees are then send to server. A bagging aggregation scheme is applied to all the submitted trees to update the global model, which is further distributed to all clients for next round's boosting. + +This scheme bears certain similarity to the [Random Forest mode](https://xgboost.readthedocs.io/en/stable/tutorials/rf.html) of XGBoost, where a `num_parallel_tree` is boosted based on random row/col splits, rather than a single tree. Under federated learning setting, such split is fixed to clients rather than random and without column subsampling. + +In addition to basic uniform shrinkage setting where all clients have the same learning rate, based on our research, we enabled scaled shrinkage across clients for weighted aggregation according to each client's data size, which is shown to significantly improve the model's performance on non-uniform quantity splits over HIGGS data. + + +Specifically, the global model is updated by aggregating the trees from all clients as a forest, and the global model is then broadcasted back to all clients for local prediction and further training. + +The XGBoost Booster api is leveraged to create in-memory Booster objects that persist across rounds to cache predictions from trees added in previous rounds and retain other data structures needed for training. + +## Vertical Federated XGBoost +Under vertical setting, each participant joining the federated learning will +have part of the whole features, while each site has all the overlapping instances. + +### Private Set Intersection (PSI) +Since not every site will have the same set of data samples (rows), we will use PSI to first compare encrypted versions of the sites' datasets in order to jointly compute the intersection based on common IDs. In the following example, we add a `uid_{idx}` to each instance and give each site +a portion of the dataset that includes a common overlap. After PSI, the identifiers are dropped since they are only used for matching, and training is then done on the intersected data. To learn more about our PSI protocol implementation, see our [psi example](../../psi/README.md). +> **_NOTE:_** The uid can be a composition of multiple variables with a transformation, however in this example we use indices for simplicity. PSI can also be used for computing the intersection of overlapping features, but here we give each site unique features. + +### Histogram-based Collaboration +Similar to its horizontal counterpart, under vertical collaboration, histogram-based collaboration will +aggregate the gradient information from each site and update the global model accordingly, resulting in +the same model as the centralized / histogram-based horizontal training. +We leverage the [vertical federated learning support](https://github.com/dmlc/xgboost/issues/8424) in the XGBoost open-source library. This allows for the distributed XGBoost algorithm to operate in a federated manner on vertically split data. + +## Data Preparation +Assuming the HIGGS data has been downloaded following [the instructions](../README.md), we further split the data +horizontally and vertically for federated learning. + +In horizontal settings, each party holds different data samples with the same set of features. +To simulate this, we split the HIGGS data by rows, and assigning each party a subset of the data samples. +In vertical settings, each party holds different features of the same data samples, and usually, the population +on each site will not fully overlap. To simulate this, we split the HIGGS data by both columns and rows, each site +will have different features with overlapping data samples. +More details will be provided in the following sub-sections. + + +Data splits used in this example can be generated with +``` +DATASET_ROOT=~/.cache/dataset/HIGGS +bash prepare_data.sh ${DATASET_ROOT} +``` +Please modify the path according to your own dataset location. +The generated horizontal train config files and vertical data files will be stored in the +folder `/tmp/nvflare/dataset/`, this output path can be changed in the script `prepare_data.sh`. + +### Horizontal Data Split +Since HIGGS dataset is already randomly recorded, +horizontal data split will be specified by the continuous index ranges for each client, +rather than a vector of random instance indices. +We provide four options to split the dataset to simulate the non-uniformity in data quantity: + +1. uniform: all clients has the same amount of data +2. linear: the amount of data is linearly correlated with the client ID (1 to M) +3. square: the amount of data is correlated with the client ID in a squared fashion (1^2 to M^2) +4. exponential: the amount of data is correlated with the client ID in an exponential fashion (exp(1) to exp(M)) + +The choice of data split depends on dataset and the number of participants. + +For a large dataset like HIGGS, if the number of clients is small (e.g. 5), +each client will still have sufficient data to train on with uniform split, +and hence exponential would be used to observe the performance drop caused by non-uniform data split. +If the number of clients is large (e.g. 20), exponential split will be too aggressive, and linear/square should be used. + +In this example, we generate data splits with three client sizes: 2, 5 and 20, under three split conditions: uniform, square, and exponential. + +### Vertical Data Split +For vertical, we simulate a realistic 2-client scenario where participants share overlapping data samples (rows) with different features (columns). +We split the HIGGS dataset both horizontally and vertically. As a result, each site has an overlapping subset of the rows and a subset of the 29 columns. Since the first column of HIGGS is the class label, we give site-1 the label column for simplicity's sake. +vertical fl diagram + +PSI will be performed first to identify and match the overlapping samples, then the training will be done on the intersected data. + + +## Experiments +We first run the centralized trainings to get the baseline performance, then run the federated XGBoost training using NVFlare Simulator via [JobAPI](https://nvflare.readthedocs.io/en/main/programming_guide/fed_job_api.html). + +### Centralized Baselines +For centralize training, we train the XGBoost model on the whole dataset, as well as subsets with different subsample rates +and parallel tree settings. +``` +bash run_experiment_centralized.sh ${DATASET_ROOT} +``` +The results by default will be stored in the folder `/tmp/nvflare/workspace/centralized/`. + +![Centralized validation curve](./figs/Centralized.png) + +As shown, including multiple trees in a single round may not yield significant performance gain, +and can even make the accuracy worse if subsample rate is too low (e.g. 0.05). + +### Horizontal Experiments +The following cases will be covered: +- Histogram-based collaboration based on uniform data split for 2 / 5 clients +- Tree-based collaboration with cyclic training based on uniform / exponential / square data split for 5 / 20 clients +- Tree-based collaboration with bagging training based on uniform / exponential / square data split for 5 / 20 clients w/ and w/o scaled learning rate + +Histogram-based experiments can be run with: +``` +bash run_experiment_horizontal_histogram.sh +``` + +> **_NOTE:_** "histogram_v2" implements a fault-tolerant XGBoost training by using +> NVFlare as the communicator rather than relying on XGBoost MPI, for more information, please refer to this [TechBlog](https://developer.nvidia.com/blog/federated-xgboost-made-practical-and-productive-with-nvidia-flare/). + +Model accuracy curve during training can be visualized in tensorboard, +recorded in the simulator folder under `/tmp/nvflare/workspace/works/`. +As expected, we can observe that all histogram-based experiments results in identical curves as centeralized training: +![Horizontal Histogram XGBoost Graph](./figs/histogram.png) + +Tree-based experiments can be run with: +``` +bash run_experiment_horizontal_tree.sh +``` +The resulting validation AUC curves are shown below: + +![5 clients validation curve](./figs/5_client.png) +![20 clients validation curve](./figs/20_client.png) + +As illustrated, we can have the following observations: +- cyclic training performs ok under uniform split (the purple curve), however under non-uniform split, it will have significant performance drop (the brown curve) +- bagging training performs better than cyclic under both uniform and non-uniform data splits (orange v.s. purple, red/green v.s. brown) +- with uniform shrinkage, bagging will have significant performance drop under non-uniform split (green v.s. orange) +- data-size dependent shrinkage will be able to recover the performance drop above (red v.s. green), and achieve comparable/better performance as uniform data split (red v.s. orange) +- bagging under uniform data split (orange), and bagging with data-size dependent shrinkage under non-uniform data split(red), can achieve comparable/better performance as compared with centralized training baseline (blue) + +For model size, centralized training and cyclic training will have a model consisting of `num_round` trees, +while the bagging models consist of `num_round * num_client` trees, since each round, +bagging training boosts a forest consisting of individually trained trees from each client. + +### Vertical Experiments + +Create the psi job using the predefined psi_csv template: +``` +nvflare job create -j ./jobs/vertical_xgb_psi -w psi_csv -sd ./code/psi -force +``` + +Run the psi job to calculate the dataset intersection of the clients at `psi/intersection.txt` inside the psi workspace: +``` +nvflare simulator ./jobs/vertical_xgb_psi -w /tmp/nvflare/vertical_xgb_psi -n 2 -t 2 +``` + +Create the vertical xgboost job using the predefined vertical_xgb template: +``` +nvflare job create -j ./jobs/vertical_xgb -w vertical_xgb -sd ./code/vertical_xgb -force +``` + +Run the vertical xgboost job: +``` +nvflare simulator ./jobs/vertical_xgb -w /tmp/nvflare/vertical_xgb -n 2 -t 2 +``` + +Model accuracy can be visualized in tensorboard: +``` +tensorboard --logdir /tmp/nvflare/vertical_xgb/server/simulate_job/tb_events +``` + +An example validation AUC graph (red) from running vertical XGBoost on HIGGS as compared with baseline centralized (blue): +Since in this case we only used ~50k samples, the performance is worse than centralized training using full dataset. + +![Vertical XGBoost graph](./figs/vertical_xgb.png) + +## GPU Support +By default, CPU based training is used. + +In order to enable GPU accelerated training, first ensure that your machine has CUDA installed and has at least one GPU. +In `XGBFedController` set `"use_gpus": true`. +Then, in `FedXGBHistogramExecutor` we can use the `device` parameter to map each rank to a GPU device ordinal in `xgb_params`. +If using multiple GPUs, we can map each rank to a different GPU device, however you can also map each rank to the same GPU device if using a single GPU. + + +## Reference +[1] Zhao, L. et al., "InPrivate Digging: Enabling Tree-based Distributed Data Mining with Differential Privacy," IEEE INFOCOM 2018 - IEEE Conference on Computer Communications, 2018, pp. 2087-2095 + +[2] Yamamoto, F. et al., "New Approaches to Federated XGBoost Learning for Privacy-Preserving Data Analysis," ICONIP 2020 - International Conference on Neural Information Processing, 2020, Lecture Notes in Computer Science, vol 12533 diff --git a/examples/advanced/xgboost/tree-based/figs/20_client.png b/examples/advanced/xgboost/fedxgb/figs/20_client.png similarity index 100% rename from examples/advanced/xgboost/tree-based/figs/20_client.png rename to examples/advanced/xgboost/fedxgb/figs/20_client.png diff --git a/examples/advanced/xgboost/tree-based/figs/5_client.png b/examples/advanced/xgboost/fedxgb/figs/5_client.png similarity index 100% rename from examples/advanced/xgboost/tree-based/figs/5_client.png rename to examples/advanced/xgboost/fedxgb/figs/5_client.png diff --git a/examples/advanced/xgboost/tree-based/figs/Centralized.png b/examples/advanced/xgboost/fedxgb/figs/Centralized.png similarity index 100% rename from examples/advanced/xgboost/tree-based/figs/Centralized.png rename to examples/advanced/xgboost/fedxgb/figs/Centralized.png diff --git a/examples/advanced/xgboost/fedxgb/figs/histogram.png b/examples/advanced/xgboost/fedxgb/figs/histogram.png new file mode 100644 index 0000000000000000000000000000000000000000..fb95949a7ed9f005ee40079df0bb0efb2150690d GIT binary patch literal 21014 zcmaI;by!s2`vwZ5eh@{(0t6&fy1NCGlP@<#i3??7j9{&wBEH?lpnRiqdy+NpZ2Tu*UA5+kehtGU~U$%lnquSMZs{MN->E&B4OO-Pp+-%M#{bZ_eUu>SS&XbGCAD*}2gu z0&c~Uk$9#4E@5-V-CO@KaR+mJpt3Ux_qq$KdSA?9>iCm5;GxjZ`*@uSL?!Y6`SR7q zD@;hUK7p0+c}1YjZ-2i3wDTeS&+qTQ`MJeEG6dS^a#T#K=g;yCS#ifXNgaoUOB2Dw zev43{4!vJo{OyTNJMJ;1-q`9tPB2AnZEX>PX=1(6E{-FI{=oV>sImgRzadWRxdz^w z*d=0bfcN*tUpaB0FIw)K<3sOXo^_Ey?|9;o0?<407kVHLEG%}s{~Lf*(;U_v&60~i za0p51bK!~WRoD&cInsB6BAEPwXLD4 znVXvW6AWa$yU-fPV;kLoLGL6xO?|k5vvlamrW?2x9F#?uC53qQsL*Dfld1VU7SCfr?t_=R5)_a??k*bUHPAw`2Q!SrxSL(I!Mil ztrbe&Ul;#fNkYEvXwud~`fKX{2Ib-9?ONSu;N#Qk?Ci|4Fpi~xF2fsA;G4;K0;_s; zJm=Rqq4Ga5lKoGN-yc4FXjVD3>{ia_Y{J)Pwm+14+v9T=Fa5fVgTmK02CfAj`KDmq=xTBbiPJ<}CHaY(PvM~AIic~Eu zvYv;3<>ch-2^IUNK0p8Z-$h;&N|*6-NyeBva?3$8_-flwl#1ckEyQ6YH50Bg3RMha zHL7|!SGS<5CP)5H&-$7yB}^n0;c#DcO1`k~77{UX)2pOGr23^%><Yc{34t-c^fG>>E1Z|Rnqy%%wV@>o(-9zKN&s)i#V8SawH$;)pFQt z4v#()P9LX=5wR0YkJHNxa$4+nRTe5r+s|ry+|lvWa8s2zceH*|Hr>!DMD0qQ;8aTU zP~~z&QqXbAo)TTnhIKXX_hHC!O3lCMn}=sDxLXeEjj41sK21Je&A8jP>I0-IIv#Mwv>wj=CthA9D zKmi}$7|j+%ThF+T8+3JaC~Ig$Z8u_k>rWPf!N-9__gTcdl9m+cF!24eGgRHy?}tu z#(3d)oyR`6{UkT~u+UFKLu1s#=L57NM}k}KdDRGgfl(AZ!_eq;>J4-^mY2^f54k7m z^GA8@q$(Z`*<npLpdz&^U@Jp|F)8{KQ}-GyiJYNvAn(%|_U@wi`jB*2XkotEKZ) z4i=*}oGnl6NSQsHic)KnW;<5sXAb>OK!lq-m`^cJa(pRsBR7G2hWqVDZs;3$E-(OeGGww$^;eIfU4brqFxx2?%c zLMF|eq9WzFixoc8{)ykebI#5@k^0qp7j>LqT~F~3+^`8>+-K1VFGy*|(OasNa(VHy z@U@mQSm1fg;h7jNg%x~`R3)>v@qHq_Wj2dNBAvnIt@H1R^yaK`+7bFl+=IzclitBe zIaQP6x3vawA|7=h4sE)59UOa0efN(h|Mnx}slwi!&WfPpaA^Sw*o!J+Wu z&o}uUI?O~9TW%y@QeT_>@p!z_8=tLHYJoDkTsESX;j-)-0TIfq^4Jg6*QbcSSQXWB zbE_z*S_mrj-AYQLC@(L+OLLTSKTyKNJnk5^>FMRQy!&p+k-sPOVdcl4Dd6)dbKuT~lfqB1^bB`*}h?`S+30Y36{IXVRXY zc6vSZ=ih`e7yC9=Rs(PvId1c=$ZM4(U!E0?=PA>>sFdI#dVrV&68o+#vpfv!uqf@c zDR0}hB>v*I`8#!JNEN+AC#qs-nB=<`k-QczO)=PbxzWhT!J+c=jdGaiWmQ~UoXbxA zagq~O)V=+4w2id@`hnGb7J38%0hFjt>C?dMtjHg(;19q;o`>H`&UC)zFNPb?Qm3>! zFg#2JG|TawWA{5LrWW>*i3!B^_A@e7uSjcK+lW>Zu$|L_yI!QC@rs1pHKQ|wIr)>^ z{=&W!UG?iO>Jko{xSg7V47Qce~ESy>kg`vj1Y7lb$XP_VqDM_xG-ZUJErCtRR$JC`*~1x^|`@G!&nPmKLA? zVWu%LBNNl`LThlj%_yS)8NU7RqE||y=rTbep=GD)GM{s|`ol53v!lvJWU*;Ky0ahn zc@>pGWs@-6FUD)=6Ja_Fusciv%cr{3JLt-qlA|8`={!L zh9Wk;U!(=EozCHH6t#;BRMyX-TOw&DRQ_`76hDa-jOfu%sKrz}>HIS?|2A^f5 zU0p|qp>hg&;q83^ZPJIEc>d!|*XrgMmJaupiYvtR2S zcqOCC|3*X3^!3(Vjjmj|2T zOK39~Gfy6!eXixu{GGqeJ<-lgazaXl&og<4$6V~~rl%yknoQvBIL_S{-}W6<toFpFi(bs%TU{&%)xn?V8~Cod z#ijaig53jp?+m}|qvV_FI>#zJPac?$X6Fhl490)>1lGQ6Bdc0oE4>ZN)(;u%{sfDe(;X_eaYT6}_! zx(r-VusbdR{^dxXeRwfU6z2B%$FmrLfR@ydCww-GZ6Vpy6~-+$fzCg3LxVhE1f(n; zUFZc=2#5$T8(-HNrBWe%V!Q}MCXKoT1#8uHG?-*;g32~N9dVkEWy`{A?7rA2GMX6p zJStTiJwzvZ3{`wy`#{pKvec(J0FQXYW9=Od&wr)xdMB#>Z^rZJ@3da?`Pas$$g$Vg z*N1)uqUqo_`&K&XlGW|)k(z~uNZ{}j&#yfm_}&fGN()LEy@dmVn*Qs;vhVU#XSFVF z!0u3Ph+g;)u4tWD#n)97*-T&SL`BeKsu;FpuD_{SIzTWDrpGDQ)KM~8XLpb$#umxiLurHuqN23hQDVVP7vvmw!}}Jy;BwoM_vo*F6uPAFYIYzBMYYu zWjY(!(wx{+noi0OTx|31alhVl`>KR`M8YyC=H?_5k+zInd=p!Dc(?1XwwX+5bY5SB z>X>zv#v}QN1cNfnv`8~LLt-h!%^48_;^ zeu)cTZ<(fv(NC+Xsj1SAlY`cF^5dIDa`W;QXI!nh`n$8r$`iB5V}(K83*1rlR@qp6 z1J$`@{1#-5a@^9os-;-A4GXUhJbq-}Ew6%JArX7~UKDq5TDHB2%E*~-=DBjXoUDuO z-mhX=C56G0V*lbTx*(lgq_im)S;=ZLy3P#iOn9$@>i>#7Jh6;wy$`}XxZ@h#@ag=o zl`21P>@cd&7T|knANt4<({!3r=k%F`(^OYuDv{m@s_e!{$vgUU0<~m8N#pVi@IiN9 z#{P-^)u^QCw70F;U9aT^s}`(Ogj9w&hg72yV{`ZRoQ984*QDZ%(Cb>c1qEZktg}7# zS7PJ*q552k#UVvw#v3#pImW4QpKXK7l z3ms}zD&ctTjog>r^5d+{z>^Vt+s753hJ!1hp=nVQgzJ1>XqnL(q-!A+2R`7~-N2IZ z{PO{)C-lBJq^*x52M02nl1NE35{oyahToE;9-Eg%%E2eUN$^ZQw{brmeX|iogOIdG z1v0v>BA!J)XAt%LeqRQzhD#%tI_y`U-@{EvxWd51WzgzBX#R~Z%SO^?{Mwrw3eUA? z8d_Q#XB&m%X_CS6y;pTOWw6SQhNDAbvj#c5R{X)vTkJW}6h|YKh+6S2)<)T7&2%o$ z$nU&VDtA)}%4w((90@~4BhOK)67banoYRgz+3_DA^>tVt7;yUJ+VRGha1L`@u7C+F zB^_~KIf|gq#v7x@H$eTt2^H_ZL|fF-|6GC~Vn>CB$Ule=5+`C#$9M3ZuTYRXH~Xg4wbo zf8U-R7JCV#8^r$K1Phy4+6i~bVpJBR9=_ld49!ENN-Bh=#OY0Hj1Q32Vgf&okt(XI z@n>QJH&LqWJ_FL8J)K##fB(*>_0o?1#asS4LVYqKJIm2&EUsC4y2EMrf9c*bw_khGg`s80vOe9iucuCzp|Y-m(viAy21r1Z*l6R-Iw zQz{<{5pwG0jl_-RSXe5v%v1BUcf~2F`p$U_2lbsyR|~{4c65gtiXc*!%`6k#gs{F5 zVNR1Vh}>L8q*0?GC=IhLAgt-WG;@eAiR)clYIjLFU8I(*l@@i8l9XPiZnKPym5rLX zT5#OfvvU4Vj9Z;Ti2OsDaDvV?Q8A2+lQSpJDjZS#JUTuCaWT!jylj^HG|cHHmSw5y zhNefo;quCgDkOCl6ScBOGzxH)DH8OsCQbTQ)zHI9jSalqkp)#bn{?*DEt_pUToM+$ z#GuQ4ACnrl_3_T`&XI)J;hXXUd;2AkV0^JIQmk*pP-N(%O3zZ8pX|FF(i~<;FHk!S zVX@hZlDWNQ7hhHQ+XC*mKFhNoFoBQ7-qRDgy-ca}6xudEUIDx?<)&j1oehAO-G@kr0b zA|uW&JJPg`?@-Hq;PR^nEANdJP&lpDfuG%(4&15H)W zp8s{!<^P^PbuQ&%>kIx5E9D;W0POi?XEx>tRP;Ie`?oKw{(4;m-k6l?AL}+7?5n$a`>F=m?qRWi^H~*|;}8T0!Au&t=pR>ZlfPcs zqxPehEur;xvWVUs-YFACu| zIs2V_5*RQm|67^%hwiS|wN-2Yc}@5THk|5ZRh9ifQaC`g zP-q&MHh}x9q=?|-ltAisQsI&+fn6`_vD8bw9pMc zB@I)l85yRza(@vc78;hG_3J?_g1;lO@lB^mVq{pW?Lujl$nq9AJmP#h)n%W64ua$H zjC(x!voByiZ<*D{vIb@aNmyoZW6BwdDm8g#-J)pEA`^b+rlS-lg@0z^x3Dm9oIS;S zzSggzA6Zz#xR<_~A;~Q8_w;cP0k8=tN6f0-k;$c2}IER4CUcgoYkfSC9k8cwr7 zIg3xm56YhRgIyT0*k);6^8-S@^h`ZhpX}@O3VWylhF1HO%wX2hj9tk=^fYT~*)=To zN$|)&&u@`$O!Xs23zHsq5eN=Fmzj*81Zi>D`t-FZ#eKxEuZ;(ji;LA!mCLext-NF` zt3l0QCv!rjhTm+UIDUQB{J0?nfCxz4@~*4qOV$00eo8e^=rx<$<8C9u2iIdpjAmyO zMi8X#{nnITX?0N8;d1tj5^^u!^ygx;@W#S&ylHgO{P5a444O9vTt&wq)a=H^}H-gY-Ebe9ZdF%3OkP% z6xDRjHK4ecs_FN;+|NZKN6Fo%Qz3716~>8OFN8wr_2NAEIu8fWjjs!yavGsSp1rjd^aaZD%YWO|WDM~(Q%Mz9Aj$I~% z6hFLu4e{=ePcRvUPxDL;f!<3k7K4;xNo=Qnbex^XaL?^~kmdl{QmNXVRV;orw9Qkm zFD+~$OcwO1eU7TaB`0T$O~~2hTImw7PEUx4>4Zm`i#cIo-S2O>JS@zssnOLk@+trK z^e>N=S-dkO+mE79FN-t+;NAe z5C&NyE39r;s+8&%uq8aXOG_XIfucIs2|P3lQv9w?w&ENSoknicAW3|K0PP{U2OI@l z%E>KQ{h=o4lwK(+5T&sVQ5V^J;}%TMgTp%QEYtJQ#L_xKA>p`r*a)6``U+cvD(X~= z(+3n&lzQgonZVv{Y;CV9x%+c9m6XwM>%`f%CetU+Xwf_~>23LctarRj&MNF=;cdM? zm{x!3EhUu5NB`;xP9Oz{Rx>DPzY3*_vw&DS+GlB~pl+*8E-2M2_xkG|=BUNfqkk&bvr?>3 zk5rMtQ4Y=+x%1aX#>qdZEi}q`s3hXZ$MozbmVAV`%l~l!uvqJC3#!{TZ*hPP%vzsy z)0Wb3Az3EEgj;=ZKAiJ)`oKiNr`RXc9a(fRUa~CD%f=0)UCwLwI|X#ZXWq7xL$2oy zBseU!lS=Zj{YGDv@|gm)}x4EAj&7#smEWCq$_ zK=Ews?6`$+&GDOB7w6ox2^B&-y;i3@$FwL|9O+on^})@DBs)y~^BT<;Ax^z#U&w&| z#QiWfv;sTai{_~hcjZq$MX}+YarV!NBJ=Wr-+ql3$ zzIuUL=O5<6U+C6QqGvAZWl;DoEN^1MPo%22&kaDvHVW(WYn)x!mGr2%xsGU_!Cz&- zN-2Bq@=d2iM-xF(s@d-oQh*BMzs3IexLh9(NwxY4TXXv^=XH(#V{v+qLlvj8KQ` z7wCJ%$ilmMwu3g(v0*=1O3W45Iat8u^ z^akzWy%rJFpx=blt6Yb0>@eR0DgardirXpFe0>|D*^GRjnOx&nGW0gD?4tBkD0I=d zt1YVo9(ah_$#b0kPk=6lzIdsUfFIB}(q8;+x?ux@K_z!@sMBS4f-&)Ar9(1kq-Me4 z53BWAk=n&#weCC7b30m~;h3>@P*75$3J4#=Lz!};Gc!mCH6|n^j1+i^?<#7LYtUeW zUI{VCb=cI?9N?2+lVKh2jXB7*I?I%0J{742TlA6&gWa4gvyRAE3pZZrPxc#Ib%4P_gA?yAPJn)i zS+Vv3*o|1w4>>$K$}KA!0UVT#-eY`JtAPWecQFLE77tBkynMf%-gSYFu%}VoppRgy zaPPH7`tknQ)KuQ=tYO1y-!sqxGbzp~E*{*Ze+&9yIcaHPM8U1#!%owO8$jW0FYHLk zX0v|^>~hsLG|X<&XW7DFn(pqEU0q#@r_&P8ACL>VZKk2|M0d>lOo}^DF8K5g;jbll z;}taXc-%xmzgU{mM;&y>ChI&(b#-+Q{(QP;1KQ=qPVR!BOVZWfFFIE&jR*S8&Z~-G z^Q_MfP!{chwmA1FMt%i868O_6^U(pLSUAFexYSLVQ(Vnb;iKae+otbZuc^+b68V~a0Ov{*A?jeq+nQ@M0#^!oy{R>TRi1mdQ*hw5D1-Z`XS1D)w3)#pZ-~UCYxK|39b_Yw z){hBMHn)#4e|0>Tmunc8bBJaSSSC0b7%dkZ`2oS;BuNOTcBZfY@2etRq?2GG=s^B) zs=jL5KB&ZN`QBVU<-H~UVU%c4ga7{bxc99s)JXrvN=?LmFICo-@;nl}o1mjD@NF|0fB zmcH(I^Rm8itZydsY(|gVW-m0*-l)_h5Z36Cs2cIe1+?*kfDe6{{7x6`_G*l_CrMau zV+=WL;69THTG2Bu!wNOa(Ru`d5>UJF@ZexgZ|*F*;UJR@^%I9uT}CDlFp|owhiQSA zyA+eS?)VThYqT+ej8}^Nl}P{ALm+nW{1j~=33_2)3n9~#!~rO&{L@Ad)~)%{x?tNXF!%grE0?q-7PM-Q8i51gXZl)NnRXvhpyJ+^{q zCSCSjBY`)&Vy}IBb159QfkcO`bu>`a4-~yb`b1ROYB8(86tl+iwezPpr?;Mrt1N)oe0iFrX!Nez|)0J~WX9+tuW8|JHv5_yMM?ZoH4c4Ot&? z;>z~du8nSP)5g46T3@7d&UjYiIcwuHF*E@q{**C_`BPfLhYTjbO52pmm<&}19V z>etWSyb{yP7bO#RCA!EG-QGFV5KbI@i3U2+?>n(L2k^u9Nk?p5btI`ojV)kriNlE+nD9nqt1l$lmbSxjqyB5k@58qx0==dV zD`X9BucJuVWX_g&2GI-j&^0Igz(71!y!LL``z?6bm#cRy-jWMfml=E*6C0l8nkN?- z`x7n;raKRaW1Tz4-OEm~5I~fmBxLwbBdoDX92pw=yTWz+fc*F8>!5?hzKLFD^ZN(D z@!xZ+`h1_L6J$YZ5UUi zz?-j09`hRIUV?```?WVCs|XpS%TYDm+$)n9rLht3f41RW3!EH37B0I!xrinLgHD%* zx&>3bKPPgPs-7I8%H_B%wsy=EI=MK2_Fzff=py_3;KhD(o~QRdetEg{aiCtflRApG zQKWiBHl+IKm5$n#K2qrbCk+TXa5(&m{CIQmZ?P=ecS>=~!?y84_!?4PAPr>Cc_~Z0 zG{{RLNbBE3{;qb;zCxFq1YQigJ4PHepcgc=$OfCOJ-QN3326)t?-2l?{JqN3pP!$g zE`B6~T4Yp|Ih5i%5=|w~Ww!I}DA#7IJu*Pnux_P97wG5p@EFDL(2Q@s1GBH{XgFzQ zbHXBcnnW(Bi3 zb_IEPVqwKdekDr`ysN*UUfy&Jou1#h;>QKt=+jG(U-w>BW8dD*6pH4H!hB@)Y;Z@c zt7v3wgDuu_e?@iaH0X{kaYsOI_f6IBa@!bEQc{-*^BC*q&$l64Gyb*Lcv*B=bjW~i z$Wk_>It!^NVNt6@8X`FS8nw?uDd99N_D;Z1u<+<0PMZK0uH&;)BR_2=hlyO4a?>Rq zeR#%`G1l=dzCyLLK=eXrJn?;S^~nz~b(I>U^H^wXdjtuUgIQX}8aHfsU4v2I&p#h= zo0blur8sJ8Yv+LS5O9Sd0}KdX4C}v?409044ehfEj9*e3_^8A(*860fk%ebs3Dlxk zEqZ{-Ht}Ggc?_v{1;DlxRbrNrp0-n_lbZ2#4%9UnPUg1JyauJJ{>g^Swv6YkkU?I& z1Ju%yfdOCv4;7MLOpGu`3gFoeT2If73<_w%%W`?-_;^-xPtM2bje7}n9-e#!%Jt`> z8pMCV($+A#@;IsdE@fg$m-EivvJs8EvPNPevG%!q4=IZiQjFX?xtaBnB>(Ih=URCfZ6h%8%?I2Q;240I>b?{EP)H&i$n_i%6!z#=yzUu(-r$U(CT)USaywA z%w(lsLVOq-w3-6z;VeW&g(jf8a6#4bZua?T@;U;kWNT|*T=QH}VUKQYd%c*{t6Pl5 z?~qexSl>b#+JE}PZllYgD=q#N^+*yXXJ72?Be|WFTg=!H^&Fyg?+fq#dIaHV<>Q8V z0?sQ}=2$hj`)jp)>yT=4@`XmjSGBzJKFXNm2@l8}Vg2$?zC6eVfI*PN^GJav%%sxj z;l=TRop?jiUJr%xQjl+v)a^XFQa_NuVEuh|@dZyU5z%4KoRFEuk{rpxHn9mEl6cvZ zuT!e7rtiFv+ru~WIS&Ny89G{UU=O29nc!Om(zNTUDkXLU%A%*6+S!@I`D2kxOU!d zU*lH3hndF>)r2VY-v5oIim(&ot@AaWh7hqCf^}G+pt{`4>-_zBSy|cIO1#4usGD#u zbSms(@hL@^0d)>@5{4PLINdJ?D7^6LQUs_vDbFRJHMlgMrOZxU7FCQsM(u?9NrhFi zd}mu2;kgnu9@m}6tRsM<;Mz$O^Ml-cEutEbo1~{I9meaAXY)ROyedJ7vbYXIX~5Wofyr*5LeT(`yrQBzxc2nY&!X=xuh`NUqFHZ|G~?qY`eDmXm5 zxb$Y|vfkPjs$71NETQJOGWWku81ylOP4dmSL;!i)*+f5!sU92Cq@e=^WM#KWF+Br= z3`Ll7f+Dxj+!Rrj7JJ zHvWV=31249jba>y)1`GEuWxNX_xH5d#dH#ag)4DhJ-+C0DMG1hAOZ`{|IA#!EOu9e5&2{9@|3~gULHmLf9#pMs>+28_WK0ZI&8f<-0h9vh zX`Di`(|Lu3F{6S@GZ^VQKpbshKbdgBpP8A#^%sNqumG~TViDW95ex$qX?AvY_y`V$ zUs*18)Ys)1^{0o0`Rp#`MY#{ez=w%#>m5? z0cxl*P@O{X3$-fgTf5u9}auhd@@gm^6j+cULMadDn1bJ?Gy zyAC(I{URFaQcSy|sTH;rN4EB9ZRf{fFdEKsI~y8Nd>7i zMOeTwiVSqqKy?pl^f%yC)5@V|R8&;v!GTL@`mge~HYt$Qbj&;cAg6%=pASHg-BJ`8 zpe(;jzL^TJGFYYF>FKG3G{?-1doLW@rUV5ABO@ZxBO`> zfT+}iE^8cz=@)oJ1O$_e8+s{xc=;0UviX}MDmr?7J+Cl&m?UwGnuZ3Elk=L=`|k&D zk!0{ZK)x-IiJ(nv)CveT**J2BY)Z3MJWIu>_$#OcsDqZ@~Z%# z1~b_{p}>jF0Ln51Bzz&gK`ZE3CEw_E=5bnsF3yBfz^9lwgO-ez;?M$vJjx#?_Wde5Zrazv)A?)rF?tzQZP zTdTujxx)n9K^(hXe{2Ym1E4SgU>t?s=2+g{a66wBn)=h(57#7NCT~9$7^1}$Sm;wb zK!AE9;RW=)+KyoI2}~JKLd4R&jR^%diTM=dRUBJ~>e0Mp$u3LJ3OM zSCAn_LJO@$_TQs-3Rb(jJbR!P!lWw;ZFo~o5nu;{-o5o;nxus^$d4Q=V16B+r3{VWK^Ho1x;cG-p!DiV+b&9$4v5%<(N< z_V&7fgyUi=WJ!zwy32XB?)2d=cb`C1F7xi_tBiM82-1j&vwZIvU!(%dsKRn-B)ag4 znT$NQ5NQ1Xwnw4Q7Ia?EI9VWJEZSaZdFM7bYNeBG*R6bXcVk(XQ-&zJcM~)@u-;oY z9F-SZw|uz^Mck`pjn-p!xwd?58^uzK3ha9|ZMu48FPjOO3!&sIr_$>fMlRy5je}2i z)#JF4SeHCKV^CjHW>GT)Ym|?szS6k+O1~}fIVj13w&}*Vb_R7@vUSR17}25K%d8~r zdr)=oatHwM>cIH(3L7S&uJXZD4LWnp*J&p3%xn}dvzI}2&tTt7X5rT#Xr9z)^ccskS_ld8R^v|J*>XSAJZN`oV=2k!dQB63{a z52@DNZ|Q25DYv#PC+BPh)i#4WI)+ffmGJX%8vfuX27e4_##jTgkLN-lT~BDU8H}5hJ?i|Y<&qgAlxO3~>gdD* z!~%qlt8dE$9Ea4mOEDEPw-HyOdc$wR$=7w^I~BZl{GUtbK_DQm#F4p)K)coS#%J-# zoU(yIB53EpK-YWP%4tu1dzddD4ssA7NC52l`hd+3>2d5b{lmmA?AR{IRK#8Od--fn z_m4Khm}kXtuvxyt97$^Fbi$k$E=crm!fxYg{+VtOlP89}I{?PB6sf z-zxi(L1Y85yS%dUN*Cb8zaSA4SD$S8{wGxYYY$HHzwr}r?ulL$O-@d7`<#LB5A>Te zS{vH2)p1Z30GzIcv-`pNI|Wm_z-4Z&(#@bB3831_LKa3{dKd5D>;JxlCMjWp3lFH@|Cn6 zPi)E!!P}(e3JRv_#*?(Y+EcXkAv_Tl^0wKXWg%H@#H2!t)I7)*YAaI{>fIqm*&7n! zDYrUsn@gHly#!+BC~M+dnLf3-PV88=iSieDCHZ`|>@VFpPa0o#@)soFbvB*JWQaaf z4tOO2y(&q@dqQ4SY>SkYxrq_9rZ7aWD+fqI-w+(ldhaXnli|z9a?*^X7R5dZR`edj zEmXplFaE6iWc~YB=+*XlWMo7+;C_>Jw5hhW9o4}{f_3(P-+Tj%_SBRVEWeW$N-=WW z`0C=U`K8sh&Gk>zGVFamq#tLA|8Eey%Bj051oLjS;^es5I-xuQI@a1pqjuD_Q_7L? z*FxfKm;Vj<98CZF?nYpd_aP>QCl-wQ0K5i&?16(KO(>V=3=9m=OsSqjFH*$P(h@W^ z9%pDh5)xG0UPpFItE-ExkB#tL>Frjo2K_1}3#XjoOs|;I@8v;3-^Z$Oh`5d8g{@{m*VS zuNggfU?k58PUS!N4*f2`)g)ugc4q7Ude(mh*{3Ugy2Hu~ihJN6t^5zH5UEcM4I=!B zioe?0Zu&|gHBGxB8Cod#*x9kd+HW8=@5sOhrl#&BD&hpUUQbkPmkPa_`L?dR_@h<6 zb?fL7<`ygN=ZuWFf{FMX93097*gWf%&0|QLSS9piwkg+>@JuSWvm)PT^XVN;ii1v=*_t54IcBLw3pe#ud;`+eHF8Z zjjgP1f&a)G6$Ag5xu7cYPgqY#0G2;$6X1hXMZg3VRPX;sjQ8bbY1T06dKflq$kEI< z`XI;Jne@9}JrUFlfBjls%=}2B|NHI*ADKd^%jLyicT}6V2qm?QNfJzqK=RhdrWZ4x znY)3kdWLVkdqMFU4s>c;*RU{#$NN9+7HKIswPAqKN~2qyrpUI--N%vaHE^E|WThDS z^{aIa8W@jIIj5GmpoWwKUS4fIfs-*0r}WjI5;31=I{pb@5V^j+-3D0V_a8sTsW%L} zy!0gwk+R~h9DB|XbUjMeA8eJ2t1DQ3YEI5Ai&Br@Nz;`#r2pv+y-7#OHxO+JLPA11 zy1T{Y<$Gd$Map$Uz*f=+eFO;4OvS1N*T088_&M+R#?kRU=uSpU|AGDT3&1exAtUIN z#PUW>z}bb$F96(51=KS zO8UBgoJWnWtt}I%*1s8D?!|mSvWi@um_|lNKWAdP1KNXP&d$y`EqJ$O;D5nkx@S~W zH$Zp#wSqz`fHi_^Y6Rca8*ctCrYhptIy>AjuACAlcj`DeJTwL;x_|K45&`ddH=yO( zd!FgSh6V}%@qL)1!ZdpR?_O~$Uw0Qv?E{Pm6=Z4&rdkT z0D+N@=e^tiVjmB9Sgfa;`=Gilw(O@kI6n46gpj>&x}5rHbmOI_CQ*+C7pr02JH5U~ z6}<_}-S;IQ-28M+D*ZuFnuOW4;|OySklHl?Kfc?1hwYEF=-C6%*k;dv8>kLW3$_E8 z>KiCc9)sW8_#hleEhDC(K?rc~U4S&+?H9$o0MLvunUEXvuLpA;tTA`M4`uxP^XF-W z!(1p`viCz!Q~qxB6HQR00|n8|ywM;)_-8h1AOoNZC>aIC$dAa~FE2dormG%UTU&!( z0v*^)in@a#xwW-5pbsWV1z_8XKu4dOj}J#A^@ZQ%iwf9GPymJJEs*%XmXY}h2L9tB zC{DJOm`#s+^XAPW&qJF8L04R`)BN5i>>%g-GC8TcIa~h(93^j`!(6(%xVbF=YOa>V zHe$u8x(M`xz4tolv>SbeFTwHEW;@=}FF2jOy{)O@0pQ5!M?kq586RH+M|gks#B!x- z=IO+MKlD`5(FsgTdu}&bb{n7-V1G!TJShf4f*!{_=s`*A={t>l63&e5?DUL`|2*p{vK13?U4OeL z=pv;&JOm&*)Kfmb#{i}Jm_`r495HuyzI_*e)ArE$5ro@Eh7 zAA8nAstKw6#eP4ubU;;k2_CWOCRAZJ@$vKL{~(opt*8jtJarF#kUM4qP{+_Uzov#1 z?uKX2Rj|F%>N>?f2509D4n_cN_~&(La8CwM_OQ^7r@0dJhWxl+w~i z;IQ-iVbtgT)jRqxt;=T6g89y_#nXU0gm%EDCpy;O)GG*`vgG# zI1crPM9^<#$S3eWXv(aa0TDMu>PEdGI8Q(t0;1Kpk11ImGKzPXP23z~ch;!BZ{$83g zM&SZCy!1K1}mD`R#2kU7BzW_8iHpa`W1T-Uk@FhIsYn}o~d0>_QPZQT3 z4Q1ZPtter}RC-;K+oH>s^wuH`ipbqcYdNbX!(fL(dA+qX8s)MyMAoiG?4qR$#;QeT zON>(PW#;82BlMme2C>yJgZJ}vIcNWzb7tl|=l6Vn-_PZHX_my56c;Nt#7P_uAJ#+w zDxw`nmqj8jDfVbsL=923)mMssGelIOW@i2UUz-UG>)=|AvRCOhmbzZ{=bD4_w03qr z#HEfN5v=Z$#PQKmh%1}4tu5$k*@lYsq$z_~n~iijy>gEs{h#^2^n_RT^8u6kXkcWg zbc$OSC&TQUVe40&8AAC(Je%qrG|+ye>br)cm#swCg-k4Q3c6pqRB3;OR-a!sS*C zXfAB9;dZ1TcMS2eSD14m%}h)NpGK$+{9AcHCFQxP*=ZN+wL0o61pTnt5wXj#Se zW&WItjElu2dc?v5gaDo@K{g;x>5p^5;k5LT7KOC$-6=cc%jF+-%VrnX7bGlSidoOu%GLpE(5IELg zL%44d<=*x*F6O1=uk$a%qgFme%K{rpoBT!r|lO5r0o7-uV9!se|B!&yZ=qJl~au z91|Hy{GLceU<2EZ^0h&34kGasmy}e+7p~jn)fifX5fUYA7N`&MNG~3oT@do3{wJRp zJK%G%&wD%D-LWYkAi(;}tjWT{LU&IO8@wPSG}JlL>x3CJ2=D_K>J$~VqqAVod2n3g z9XqBgmoJRR`D!YoSaKTkCe^SjLqbB7?){XTdjSo#deqYtM@L8Ef+ELZ=ali3!@e97 zKK5Q=+mVK0x-m=&#H5*5K`ZshZ+RM6f#N@YD-opcWRo`T$ibZ#B{iUN{!sjx7!smbAXZS5%T z0&hSfMSW=I4qU_(ElO~<+qEWK?3mXhPM~(`$m1x3oEI{ag-ixXqZn*iZ;xU?o)5B5 z`QBTeG$QUFhcuNHu~dt?QgJR|xkh*^xOx~D!S_M+co@T+eBPjiMH>K?*49>)0V*Rb zjP@=b3KM>POZ@bosl>n(0@{hm+}g|h62+8sEn31FRKi*a8VZ>8uMkDpdC&AdK`-cE zH@=MhW_CWImM`(>JNi;KO&_OS+M=SUzS6P0Sycx@1~pBiEn8M&+ln=7X|5>v?PUMY zJA<7C>nE}gT|ew!7i|O`dJdkMMwabcZPbA*!a57m+5%`a1F*`JMcKPxILmC?bfP&L zo&fc2UvT(|6V5?FMkpTJ2Vs^Ebe{wQ6zJxriq#VtM;hXc?ClK_=i6vAi_?h)=vV^a z(hHplgc8e!-Vw(@kdVqsGO>(eMTttbfL=X}#R~`UPpWEa)#zNLqMGgE?(WE9*}1v9 zN%R+z!h~PT-90T{L#n z<_gSzis(Ce!RJp)(?iqR4)3MsninNmcHmOZ2?_GA#i!~6r6@L}V&_r&)xB1{i;8u( zy9VKR563lBBeUn`=0baOq?hio2p`P)U2Ud4x6D5!bV3UegIrZy5njqor zZBL#gnYh+o`{8c>aKjYsI+zq8c<=IiMs~Zu|C4l;xQ30>_m}N%`Auy1j?vp*;g>;7 z@6^7mxBjTD*BIH^=`$FNOAH+--ORy7zdS~PZTvq{$s4gvsqQ#OuQB_B!;-#FKWmjG z7duW!7Wp3FWVH-?Z`T})-Mf`sa|qa}3F|V3>4E`Ugw0)2b|SmugJ}2B{`5e_7J|?Z uPRM5U>W`+6s^ceHr-|~(%=z2b7VYOpZuidzhiAf(Q*d?iI#B6wEb0G%i{o|x literal 0 HcmV?d00001 diff --git a/examples/advanced/vertical_xgboost/figs/vertical_fl.png b/examples/advanced/xgboost/fedxgb/figs/vertical_fl.png similarity index 100% rename from examples/advanced/vertical_xgboost/figs/vertical_fl.png rename to examples/advanced/xgboost/fedxgb/figs/vertical_fl.png diff --git a/examples/advanced/xgboost/fedxgb/figs/vertical_xgb.png b/examples/advanced/xgboost/fedxgb/figs/vertical_xgb.png new file mode 100644 index 0000000000000000000000000000000000000000..b5d9e0a2d177be7c61a4e44c9a9145c8820bbe14 GIT binary patch literal 12621 zcma)j2RPR6`}QN*Au}ZO5G6`h_I&J>EjtR?JDcn$D@pcF2q7cN3{k#9nb{+@B1Fd|M!27w?jw7=lVtybKa78snf;CTa_bWZ2Nm&} z2Wt-lN8fl1YWsRJXAWA9T0NL}Enrm1^;!JYhXdzrEmM}w;AYFb+2OW{W8V&0VGVjC!0Zo|;@bjFsM^AF%fxkkx_*hq}}4P*iq0uDd; zkB-*^BU;Rx!$>ZijS9i0Mt0&wwVw%TK_z@%YuU0#eytaezN~P zX;*3X1nD9jnS3oG1_IxH4CV&gl`GTRJ(3INXo`4)>Z+>jH*d(SD)06R4Ii+eT`zNR z5TvA}cWa&mHvQ{c_) z1?~rfO(f@%Mc9drYV4TXcEd?7VDeQu%#0vrz%BdYbq?Zwg22auBQtpTFbf z(XRf$c;k5gRc0M(leW{*S~7+h`Cgp%8&uVOHf?WAK9t{U#qS>)V&�y{`WTbLY;T zv{_YlD$n(am`4-d1UoxBUUTsZ<@O`IXYufI3JVFH8vn49F$x7`XR~mbyr*7VTnsjw z2#-9t-61F!kyTgt%;A;%A9LI1>!s@*g+KhrQ5+hytdFuj{pZ$_}eYol&Rd7g1{Ih4@3OE!nxC$EAEwdf8OyensS4SW6Gepzfk{P>B)osP+ z>gGm4MHMnPcfU}F^Pdzxg2{kG?%zJ*RfgQCW{Zlc2<8B(`}f(_KCDaFR2!y?`_hJm zg%uVRJ=QChGc;tNq@qF|pY4dtD=h3}N_lzFeq1-1VJ^2N`EOa!D>UFLLdVXM6 zO%8(XQWy)tT2<2P>gx8|RzL0f`Bagu*M=X)leVnb{OzWS$%UQeAx%BK@H)4J-PII|cpG884u^oj8`nlD%V+m$+LlXn zryb!`YW@E3&bi);*cS13FCHMy>$B1LMu|P^TE_?zppDA!uE1ik$@~u~1CE!?a8c~) z>5={Y{em0@`uY)j6TajvEiK6s{yqhdO&a~`3Kseu;4#?skQVce&ROp#ExiZKQe{IV zjMzg1F52Yod7K?FoYEd$B_-?>wmBnOo|TJMC1SyLVR2Az@sC||*tfxIPpa^{Km1l} z@tr>Mkf1|S&kNHe3;h#J)LMd4MJ*YTm!tfTU4Uf|Bwhe{0L1l z3!t~HW{}ULU=G;7%*n|KF_lUd62FQ^K(IH*=%nK2#z#y-GQGaO9^bM3d}+xR)^UzW z%;49Zu}?Pqxpj5kQsNp?cDZDPgh6FalX~Hq&hrtpetRMGeIYwS+V@+2Ui`uwxOPB1 zK^%5Q#fo!fQX>lg#Ta*{5K=HSpn9NJ>fDN+gXteGWG-_*#GM4>N~{) z4p#Qb-B#4p)aIX}>G$6S9^Sr6O$>RJRA3r*; z4Bu+|^hu_8Ah))b4kA98_W^NkZm#pjl(deH4$IZ6gbQCYS|K$|Ei6Q^D#T8mo~)m4 z%tYNpi-`QtUXaH)nqM5oea|$O#_$t2uP+jgnDAVEVzhXl#u`aQMTOUF%k1HRu z#H{OPZEI`md~)Osmt9()!!xCjh8T~Rm-lIWeBbmV$W)3N8t(#|+!wVVb5*svK;+!J z*~*4~^iXm(SJds9xcI0H*A27AXvve!xEV7@yT@tVw;mNJ?+%7?c|v~2RehwtvEyEy zd7J3mxwbYe6&~{n6tbm`_a8{oxvy0!H;P+!#J2yawriJ!axvp9+V%P__Xh36(%R%ARI> z+W5-Wm)Ge`t;%=3prj}&D%w_LWyexfQqp#N(hnvu{!`x7AB#=xRC~{NJlSLFrAZYt0z7ezkF>f6^2C_Rfun91gvHSt#LOGBsuXPx39? zfPeu110j=punD21Ju95TZm4o=2EkPV6x~~r70K@sZe_=I+UnA2lf}~6jydu}~wA5{ctX#LEbgwT}=R+FC#Kpt6C}C2R z-N6WNOoD0D!CtA(zq(kN0e6+}8reI$!+p{;GO0v<{GIjH^{2z4#p$D!mwfAlm`7_I zX3f#@=O-r2zL!c%U4B+s`hL}cxBANKwTZ^!wpWVwhYgj)j5ZBz8MoM}p_uR9CjVf) z=5ZjO>;KB=vS;r}qdmNK`;GhVa3e=Z(2BqKlkSYTko9ct4@X1ZG(!%t+CoT&1ae;YiLR*zZucw%wCj!l#ppjWB27+ zZ7FerBfYRIo2lQX8~~O&ZGG)udN1)kiftW-tD0obcna{$FD=A=9@J(2{VC=mpqvQh z{4>2G+bx%93TOR)FFet+fdnPwwZQ_XARo(k4T<>vbq_(6ykb0#)$1DBv(!8tkGXZf z(fNk?iW2qeGFM%_Ngl$A&ZgNXM$Z{*YEpQv47b44Gk0zlE4e@DH@gfQ-gY~q<%@&sqkAtLe(a?=;5w-Go<+fSD=H~X3s7O- zyDArSY_4A{U(A*a8YC8J++~=tRY9R7xkW~N%r%#@8GCnDh>T8?iBd%PEZPXgPLHgH2FJC$)=^&fXpK)w}DlL%N?X!yQbwu9@(` z9l@OGJV)51d+S}R9^Ov%vuBhm6Aft))ip)VHIExy$eP?rGA}YmzgvIT+@?AaicLLx zuQ>Bk!AJT^k4{p;4wE=_jHA@q89SA1MMhk0b)5SeV=+&d;Q9Z zsO62XS8089tZr@YCuG53Rpw7vB>B~&kqX6wBem@Jpuu!~L<-xRT9|ahN!7H5` z?eZes4fkXG#q(rM`9=HHik`1nU3H9&qs4u9neZ8QCO>N@-uil)_&dw=wG*O2xUiVqG{-vdX;^8_sj-#U^ z9{&UPmKpQhU9F7n$DVUs(zxm5Y7^6N2ZUVb2>BUEsRX>$A5rl)bEYbCB^Nd*Y9#Pe z@p~8dBqt;5gJ2jh{@I>SoU4~6?`RP;RxE|BslQPBWoINV)@ z`rcY{w-RANK0(jcj__|k3a~rP)aCD6Eq?v_^d$-S9ytua*wMzkKCNSO~V?H{AY`09l&wb^n z>Yiq0#W{@Dv_RMZo-8wW;nCc4TTS?#{YXEXQ{ct%BAX`Nhl~#br6x7uH^R=m|4D~Sl zZnxTtn`W^<3uTIQb~(MA=96D0DrwRktq~`6h_EjfX-W+5+9xYmur00U=H+b`n;k1} zw6vM(_6rLOcYgWuF;&2>+_Ho0ImB>JrZ|tN--=*b_x|ti8ZLl^N{rr-UDqpVu5lQ3 zKHA?jC0H*_eu))h1sqgq)h#0<6P)exyEYe|=bu_M z9f12N!{W7%A7#e;_r`0N-+bup?+?vWh?{}%307xW??DEf{g$udwcfBdwZ0zrXM2H! zRw3@XEnwW&uQ?%+bpCWzWTUj$c44}>B_b<(KA(u0!M+=hiu<7xv z3h#fOtyfEw)i6AaUT%q5oiu>aeTr&oXeR?t8?v>xgG_b$^@?fa^ZGJhUZEtKa~1x_ zijRx7tCZkrSA1&f6aT~A%Y1xPsEuA(au4sE&iVQI%sL!5E6%sM~e?MSkljGd{+7oin{>S5!IZv}~_3q)eO^-!IvkgC4;kNC= zhmS|>na(fV=V!QAZMYgJ=A!=fU|L+gAIOPv{#!MR8UwDUpYK0acA{AVM4NWoDZHsk z(sOML`{j!=5&?Jb-ra3m{bz47aPoWa`)=E6G#Y&dRswUn_(bI=UDXZ?DtoiV%Xm-V zjJidm!Hrgh$*P(R3t6Lb&N|qDz}CEqU7DzN^ufZS=UpyZ>I)ZuSP))%GCKUs(BHqI zH|KXM2Hi3nCrM^DIY_US>2<7@DOT6o~qDt6W8{mt3KqfMW@rSW(7 zFm`4lD~TK#Om0wo0@rU6pxQ{2%sX!8iPPPi{I>j;!htXlM%G@swY7Dgf&wQnFfd2f zB@J{Fx^wu^3GTPzUmOSO;HUG4L$q(`SU4=tim^MK2=zDoWFRH zBt||2W;6{PcVlNq4ud)SHA^z?(3 z#|$pGP>{NNdwU~N$fnupj#==hzT!pi{|ma`t+cufcOmM+&G2>$X`n3oI6ZvQg~uJ+_3`w1&W3>>2KTP4EG}@e z4=bqO)u!BDnpdeI*GkFD%STL>nl}76sV*zKY3jQwP`~@buEATVtiOB_g&OtDf3uh1 zIrf3o*ks)5hA733)7Y|X8-w8Zd%qXOFFLMeG1OJyu^;J9sSwf-_ANc2d5x* zvOgQ#-h$zl3AZ;%suZg&;MbhAv$d6Z`0$#oogFAfqv=ncS_-U|{DiVGXJVikrK<1t z3WgE0o@v7xGzEwQ$yr)jg8WTW9qBF*G#tRdlmLTh6%WvfdUA!6Gv7c`FnXre|KTAH zrh{9>;rm8pp;HEE5E>GH{-Q7GS@UICHpX{19hy4a59_|APfO3ycXxLemXw5=mVvgU z*N>hWPGXvylR>3lSDPC*&?G4-Zv1^MBhxJ(dzSLTg$P8$ftz>Rk?@@f&vjdz>ZaYC z$}sNO0sg~pn9QHEvmq3$@+@p@m|iHPcymH#KSrv_FIjt+r?PL=^I3m2-<)apTB}eZSe0i|6 z+_3a2H#ZFzOqFR+w=0CiD6&m9=Z-nlSwdoBN<_ z8z!8H-sh!w*yw)vB4T#xGR$|zTrlk(4;@PqNgC%}OL=R_jOH-2~kt9GyMX)g@=mPBu2F!*ECOgJeG9)Hrp@oFvQflFFrGO zDbHQQ9ltw#2DUeM?Ov{D<*w3?yImmz{FZMp5L`t1#EjWaVj z`n`#2zvE0TroNH{2W5;_?>B?X2OpjHd8KEk%yZK=Y`HPGJC(Zu;O!jx%R2US;a%=AM{{C#7aPly7 ziYqZnsO*)4fg06LAl!)BfMXFhN^G`Exq^Qh<83qBrh*7d0X59=K-ocl%0Dfg6rK5Pan-Ccm7PIo zW@cuig@D-^R#>sRjMKcxG6RBYTn7f@Ur!&NGofV-gu&W`+=8lQU_r`4xixTv)u%_cxCkRG=Pc8o(|G?A%{=Q~J zuLlWgc0k{ZhClYkB)hAt>(j)yDK9hfR!QKP*Tp$JW#A3HpPtC zf9hZkjD{Qm>Emoxz`?cHOQPW(%Y!+&xo1G3mbS6E3a=cn=9qVkJ$|V79W$qtq1O*G zBdF$)Sgf?GD=#9O3cJlut&CIxh`QmVud5pdpvhwXpOiaAI&fdx#ve__d{}!=QdCp~ z&$Pfq#{+o%^Zf&TK8L%a#xBR%KzLNs3D{DDe)V3WmfXWoGp*Z7M^{(6ct8fUe9#p0 z`m$fW!jFlG0hD&h(X!gmWou3e)Iik?5gO%u%GL<-4t+EKFwwPIToBk40sH)DUoXw9fEv{PtJ5M;dB4u?B{@z!W|OIa*1UT)D)JUH?D1% z3Jx3+XKqXxhoPaNd4&Ofzc5be&|Gmz3C6@Eqc^8k&C-(fAFJ&kuQ&D~Sm-u{;FE53 z3Pag<-@jW>N{+>x$=4cTzh%Tl3x}4Z!nwV*bw^Y4G3em%^{*Wt`-yt`lamvK*uc7i zj*k59KX_0#{akFyuj06?5@%Y-Xv`G?h>{X2JpKs%zkC}@A4KUV!Z0z#fX zJ^#yHBw3Xms%-z@piHTrY`&I>ty>}oYdYRA;-UC}bY4cw)H}p|`=$j#YTVI9pC_he3JwmdK_VR4 zJ5)B#&gD}ioMWJCox=~gCnctzpPzr{D)k-CL{Qy>GqejH8Sr`E@NndlC&b2e zu6UN8q8ASwoEa?OM6hu!-$uBTk`~5>JM2n{UG}RJv3)+Fwqf96& zkECH3%@mHHWj$a|l#KmJ{A_fb=mnw=pRyuD-DW?LGKzYhgI)l%G@$4)d2rbCNkn#v znl$*(qzO8FobOHpyhc+^RYT7VF!5Hh138+Kj>?1MD}LxKQ%(pAy5$@_?^!^xjH=XW z+9KlUfECRi^T5>3;ut1zhMNt)DG8{sk!`Q*>lq`+nZxDtq`hjLE%g{x?K_g)+E1!E zm4;Q?sN&0?JbAJ))O11<2pt!sr}#cYTcn})@_CBtmO%y48sUCrNP?y}`T3~ER=jzaI<;{g?i1lX?7(e@<0!SZRoS10N8hSD*oNQ7QpPGWRD{; zFG04?^4lVk_%lZZATifdI==HsYGm8vQgVU<_wN1HxKeU-Zh1KmqNe{>8*+)+g(W`n z$3+Q0p-lhXyReN)^|Aup?KdzwT3W%N#eW1QKl>?$ii>QwFtF`025nnbRYh$U@aIb7 z{tU_A*91qIL@KHdAME#m(8X(~5=s^liD3hN_VD3DNUlhoTV=^acE1Ih2OIs`8W2b4 zw!ZxE465afj|>_d-c3WZq|hV=H{K~OrVbZE+AAl@rKs!2eNP8M`X1)QZo z%Cg?UB+v}Yb61g*!y{vshz3a%dc#yZaWl2UVUd_G%SOy$)uq>Cg=RxwU!a5lG&utT zy$z#jR^jL@OzbTY5s|cyj|c%(n4ZCqr>yf0&&%jE=#!cQq6CFT*46c`xr>*7+(E?q z!LH`Y9cj#}uAbgZnp0y7@UpWpt;Po2gQEo2WW3RWDJv@^u{-{I zoRQp1_$bu*?}!(G6Hv(ZLQjXuqjBgdo0@1Y7z@<=$%G|$YQhbb*q_2Zi;r&sB{{f# zdWYuk3g?XBS~ni9i9(xQt5>LALNkuI@Pf!$bgsI$$1H-w%0{O z7>e~ugX7}jvR%A9pb@{Mdgsph`L7vWiCBy5{l=0Vr#H%MXo94hW}hH&A#%mx+6Rqt zq54JzHO!o{ni?s>F2}}N! zs-v6gg9lfb{C{g=;KO@w0u;gwC1a|$-pwV!pH90RI-{pjAQ~E2gZK99XswO|#3(f+! z|CGXKeV&#!9ID6B?z<*Ab8{9!r-^7HDt4(G7xaD8kq~Nt3_j+)kTJug%K!hAcGUk3 zOlBSvFa^oywLtTcC`{(DzNt4JXw3euXmJ_?V-9yh44c-4o2G4`2twa64&*&TS_LS_ z|D(jS=l~Q#JyVP*sy!Go(AhdcNZFW#FGshusxa~OhD}f4)c1?oz*xbUx z?BROTg}-I^{|k#ixBGQ*agGk><`28F7IPl+)gRTv4ZgSGVCT0gdeVizf8bljFCKW3 zb6XV^wB$_sM0_JT)4bXcrz+qry&@NFl!EDn%=;AvU=2WFF=+Rz2!sM{M^jsy)%|M2 ztM7~1L(7-r72y%_t%Ec%Z`X({kJLiYot`T{NNcmNFknge%#tW_^V9!9A-?2BKmws! zMn*-24A{1h)wyZjxzqcw91_s-pwf;!;2Ta-{KDQJ)^QunmOe6Yg+amsc{r4;-61tC zP1C^OSNu>I$D+Fk0v_yHcNJitg8FPQUS72z@fdn}3CYJXk%r0OH1m0XPEn_-1~j0k zNa!wd392n9VnE|_^ol{o6JeYj8;iyzAT$5=Qi)F3_5Lz$=T9J~&_4=k__j=@!8O94 zqPm;xSI{nXWdll9h5s2!pOIqGyQPCTkJM2tiOR6M1pVG45+3BCZd;q1El)4Yu7qG^ z0EGO{f=#Jj)XaIesOOX#5XeF&5Rrc)5CEhb8?EG)K;NG#=L@;a zoZVd;k18y@ij=wtvP&`;3?_|@D_)W3(V8)!Wke2yn;S*TP3eAifFAN(?k_U`C1~ON>G8 zXY2Isd%?Xq=i>=8L?4kk)oqG}KkO&k75Baj8JgCMyJ0DHo_AI{>zJjI{ z)k8c#s;T@$h(YD0*29R^3~}iFfrH4*%q*Io42Z3)-Zih#fX$O6U&cv9Pu_jQwQeC15{ta&ibYN8Y)bT2|9VN*o5o z8hrs8#1-Z(XO2MS{kLlfA|7H@A524<3&VNrXxDpRo?k;m>wCN@AX5I@AyuS93`M;S>YOej;mIUu(bEPaEu%!`+-1S`jaRrhUCK$2yh#aX_gh)!;9?uJIm6jl60WjxSBW8`PU2}Lbn+f1e5AYQ$C zrI8eJothZx63~DTI{_($SexeJ;$m43k7}cJZFGF|Eq}vOJy5q%QjmzxoH;YLYhWrW zj^K-?)8qA9Ah3uX|Ng$eJL+_=!XT7vxyGPWFI~c)K|^fwtc9iJCWP{AXFO}U0T=Kh zA)nt@0n|mf%zm0KD#)aCC$iVz=jS&cc&%$tWkd1CtceU&>-v*8(|evCLFgbQOP=hM z1@5FRbl>N_2iFO@g3`*$#9Q0jIH><#uN*Zv{qy18^+=wIA#A*=il0$3z}UU4DLgz} z;?JJF+3b&J{3t}GZ)}`!i2Ip{{Q;WGdf=%jxRNTsP$jm}j0@bVAX}e)4GSfx#H@)4 zY!Uk_l|4_>)1xJi7d5V4y($G_B2tu~t6{M|(SVTH8pm46qN|x|xu5bQXlVR&vQBmeipvp!MBpw^t{DI?m0_nS(72 zF>oNn0I~#*jX&%7hetOv{kD|2jBC#`2OhbU?dCLkZ?mhiYnS!lY<$C*7#T$YYhvTz zKwK@!!fx!(mGc`uF5U!h1_;$r1(`m?pwKFSy$WR5FI`>G(;%-ijs=844*jWQ$v}xO zU%sp)77f!}xL^ql$6DzBtm5)RdY4jCBAV~Xii0%v^3^M_`b`6dIHNFH4ewhSeMhb# zdCX6H(=__BY&gnBjqQKZ8IVl?Hwvt08dMz&P(;wJA?0Fr4vI@l9kl-}xr^A3yv1{y z|EC449IH-GK;#DG|Ebac5V__hhSkIb;|)6{=PnR1Ab40+|Ny3M$C3 zG`z*!d6cB!CVh-Nq9D?V`|yJsZI$`j^71lV{}n70P$|yD+}E#_fA@@npV0aEz{3wD ze6WZ#AMZ`ZA#4oJxbXGsV7=ln;~Nne^kbkdYHBgRB#)tcdapNIdKk_UL2@M}T%eSP zhK5GY?+o?!nxFmvZyf9`otPJo)z@?^%(qvM_T5ro^jdcg)>&39E*&Gv$$r+t77UTPJMO1vUO z{DjZ}k_RaWinT+a%g9$v&cu(l)$bpOXqk0EW(6Bux%0FP$aBMij=U+zeu}0j<$nv< zt{)f}$N@`drFG9I*iI}6eY2nz$$`X@wxA5M&yA#3d%>uaZiT!)7H~HfXk>koB^M8U zH0gig+O=zR5`MK;O#^)z8fBSBT$(k|@xwPO2jEUmKLRmD2Da(@kiM-gClcx0 zEgo)XnZSe~is&8=0T`=rTTn+zJxE=S+oAwb!0>X~<9Mhs%$#`}kQm(Hf6{J~gYl}W zi74^K3+Q?N+ZCs}Q7Z63%hWU$Sq9L^_K61HO)zFzfUOKF5k>oQ*>PI14yT-s4mIcX z>n;E)K%JRsevFF*g7J-xT9;YGA!Yo*lN1CKA@^T*3X}u|1ZZGOc9nnJeTF~vLPqAO zY(rvdd6^_k=H96d7%FpymI2UCTFuo++SuBD1bhp)Y2zcwO{wC6Gbm_fpGo)}`sx)s zltnt|;DP{y*wch&z9QWWKpTk3b7QOP=4fE%ix=S_w$XrQ1^ok*Awk%Vk2n+r%xOIy zKK>c0h?Z;rQCu1w#kcSPs*eaL6&*~i&}X9ncn{hUIE*boLJe*@N%u#C=TRE++U~5SU(u3Iv4#3Ss=yr$OK=F$RePu7Qx}3LR9* zRj2nB0{1)5Lkl?_fD?op__^T1lgh_1gaI8TJ~?pP#bUs&C@f5G^v+QZ=oI7>ihng8 z_YV%xtgN^n9<3vsj|Tz+s2W6`XDzdlhRBsGcui14R~?@2;zpy!wtoEnf+^MEbOA*_ z2gHatu`NQR_JAVH2w_VtQZ<_UeG&2NT0*qjgiUy)idJa&iS@V@3oKJm`9H4+tWF_mAKim7DxVL`vFDa!(sVuAoK(-z52sR{lqj2nn09&LAp9R{W&{eA#gzoKH|XY3E1Uh zV`rD)O#C646L7Ft5E&QOp{*hQ^TmwA{>F3zlaqNadoIp0Oy1GtveySNkpP77^_Mgq z0h9WAZxU|z>iW>#?&UXn4o+5s<0;oHi>%UCu2pT-o^@7{&k0@fGf6NV*Yd{e0 ze|(WIaXb=V;vk>2xN|7sB2NU{WW5{#07^+=t&}Q`{GLGVI2)<+1Mr~#s`!t;G4S{I d2 str: + return f"higgs_{args.site_num}_{args.training_algo}_{args.split_method}_split_{args.lr_mode}_lr" + + +def _get_data_path(args) -> str: + return f"{args.data_root}_{args.data_split_mode}/{args.site_num}_{args.split_method}" + + +def _read_json(filename): + if not os.path.isfile(filename): + raise ValueError(f"{filename} does not exist!") + with open(filename, "r") as f: + return json.load(f) + + +def _get_lr_scale_from_split_json(data_split: dict): + split = {} + total_data_num = 0 + for k, v in data_split["data_index"].items(): + if k == "valid": + continue + data_num = int(v["end"] - v["start"]) + total_data_num += data_num + split[k] = data_num + + lr_scales = {} + for k in split: + lr_scales[k] = split[k] / total_data_num + + return lr_scales + + +def main(): + args = define_parser() + job_name = _get_job_name(args) + dataset_path = _get_data_path(args) + + site_num = args.site_num + job = FedJob(name=job_name, min_clients=site_num) + + # Define the controller workflow and send to server + if args.training_algo == "histogram": + from nvflare.app_opt.xgboost.histogram_based.controller import XGBFedController + + controller = XGBFedController() + from nvflare.app_opt.xgboost.histogram_based.executor import FedXGBHistogramExecutor + + executor = FedXGBHistogramExecutor( + data_loader_id="dataloader", + num_rounds=args.round_num, + early_stopping_rounds=2, + metrics_writer_id="metrics_writer", + xgb_params={ + "max_depth": 8, + "eta": 0.1, + "objective": "binary:logistic", + "eval_metric": "auc", + "tree_method": "hist", + "nthread": 16, + }, + ) + # Add tensorboard receiver to server + tb_receiver = TBAnalyticsReceiver( + tb_folder="tb_events", + ) + job.to_server(tb_receiver, id="tb_receiver") + elif args.training_algo == "histogram_v2": + from nvflare.app_opt.xgboost.histogram_based_v2.fed_controller import XGBFedController + + controller = XGBFedController( + num_rounds=args.round_num, + data_split_mode=0, + secure_training=False, + xgb_options={"early_stopping_rounds": 2, "use_gpus": False}, + xgb_params={ + "max_depth": 8, + "eta": 0.1, + "objective": "binary:logistic", + "eval_metric": "auc", + "tree_method": "hist", + "nthread": 16, + }, + ) + from nvflare.app_opt.xgboost.histogram_based_v2.fed_executor import FedXGBHistogramExecutor + + executor = FedXGBHistogramExecutor( + data_loader_id="dataloader", + metrics_writer_id="metrics_writer", + ) + # Add tensorboard receiver to server + tb_receiver = TBAnalyticsReceiver( + tb_folder="tb_events", + ) + job.to_server(tb_receiver, id="tb_receiver") + elif args.training_algo == "bagging": + from nvflare.app_common.workflows.scatter_and_gather import ScatterAndGather + + controller = ScatterAndGather( + min_clients=args.site_num, + num_rounds=args.round_num, + start_round=0, + aggregator_id="aggregator", + persistor_id="persistor", + shareable_generator_id="shareable_generator", + wait_time_after_min_received=0, + train_timeout=0, + allow_empty_global_weights=True, + task_check_period=0.01, + persist_every_n_rounds=0, + snapshot_every_n_rounds=0, + ) + from nvflare.app_opt.xgboost.tree_based.model_persistor import XGBModelPersistor + + persistor = XGBModelPersistor(save_name="xgboost_model.json") + from nvflare.app_opt.xgboost.tree_based.shareable_generator import XGBModelShareableGenerator + + shareable_generator = XGBModelShareableGenerator() + from nvflare.app_opt.xgboost.tree_based.bagging_aggregator import XGBBaggingAggregator + + aggregator = XGBBaggingAggregator() + job.to_server(persistor, id="persistor") + job.to_server(shareable_generator, id="shareable_generator") + job.to_server(aggregator, id="aggregator") + elif args.training_algo == "cyclic": + from nvflare.app_common.workflows.cyclic_ctl import CyclicController + + controller = CyclicController( + num_rounds=int(args.round_num / args.site_num), + task_assignment_timeout=60, + persistor_id="persistor", + shareable_generator_id="shareable_generator", + task_check_period=0.01, + persist_every_n_rounds=0, + snapshot_every_n_rounds=0, + ) + from nvflare.app_opt.xgboost.tree_based.model_persistor import XGBModelPersistor + + persistor = XGBModelPersistor(save_name="xgboost_model.json", load_as_dict=False) + from nvflare.app_opt.xgboost.tree_based.shareable_generator import XGBModelShareableGenerator + + shareable_generator = XGBModelShareableGenerator() + job.to_server(persistor, id="persistor") + job.to_server(shareable_generator, id="shareable_generator") + # send controller to server + job.to_server(controller, id="xgb_controller") + + # Add executor and other components to clients + for site_id in range(1, site_num + 1): + if args.training_algo in ["bagging", "cyclic"]: + lr_scale = 1 + num_client_bagging = 1 + if args.training_algo == "bagging": + num_client_bagging = args.site_num + if args.lr_mode == "scaled": + data_split = _read_json(f"{dataset_path}/data_site-{site_id}.json") + lr_scales = _get_lr_scale_from_split_json(data_split) + lr_scale = lr_scales[f"site-{site_id}"] + from nvflare.app_opt.xgboost.tree_based.executor import FedXGBTreeExecutor + + executor = FedXGBTreeExecutor( + data_loader_id="dataloader", + training_mode=args.training_algo, + num_client_bagging=num_client_bagging, + num_local_parallel_tree=1, + local_subsample=1, + local_model_path="model.json", + global_model_path="model_global.json", + learning_rate=0.1, + objective="binary:logistic", + max_depth=8, + eval_metric="auc", + tree_method="hist", + nthread=16, + lr_scale=lr_scale, + lr_mode=args.lr_mode, + ) + job.to(executor, f"site-{site_id}") + + dataloader = HIGGSDataLoader(data_split_filename=f"{dataset_path}/data_site-{site_id}.json") + job.to(dataloader, f"site-{site_id}", id="dataloader") + + if args.training_algo in ["histogram", "histogram_v2"]: + metrics_writer = TBWriter(event_type="analytix_log_stats") + job.to(metrics_writer, f"site-{site_id}", id="metrics_writer") + + event_to_fed = ConvertToFedEvent( + events_to_convert=["analytix_log_stats"], + fed_event_prefix="fed.", + ) + job.to(event_to_fed, f"site-{site_id}", id="event_to_fed") + + # Export job config and run the job + job.export_job("/tmp/nvflare/workspace/jobs/") + job.simulator_run(f"/tmp/nvflare/workspace/works/{job_name}") + + +if __name__ == "__main__": + main() diff --git a/examples/advanced/xgboost/fedxgb/xgb_fl_job_vertical.py b/examples/advanced/xgboost/fedxgb/xgb_fl_job_vertical.py new file mode 100644 index 0000000000..717188f681 --- /dev/null +++ b/examples/advanced/xgboost/fedxgb/xgb_fl_job_vertical.py @@ -0,0 +1,107 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse + +from src.vertical_data_loader import VerticalDataLoader + +from nvflare.app_common.widgets.convert_to_fed_event import ConvertToFedEvent +from nvflare.app_opt.tracking.tb.tb_receiver import TBAnalyticsReceiver +from nvflare.app_opt.tracking.tb.tb_writer import TBWriter +from nvflare.app_opt.xgboost.histogram_based_v2.fed_controller import XGBFedController +from nvflare.app_opt.xgboost.histogram_based_v2.fed_executor import FedXGBHistogramExecutor +from nvflare.job_config.api import FedJob + + +def define_parser(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--data_split_path", + type=str, + default="/tmp/nvflare/dataset/xgboost_higgs_vertical/{SITE_NAME}/higgs.data.csv", + help="Path to data split files for each site", + ) + parser.add_argument( + "--psi_path", + type=str, + default="/tmp/nvflare/workspace/works/vertical_xgb_psi/{SITE_NAME}/simulate_job/{SITE_NAME}/psi/intersection.txt", + help="Path to psi files for each site", + ) + parser.add_argument("--site_num", type=int, default=2, help="Total number of sites") + parser.add_argument("--round_num", type=int, default=100, help="Total number of training rounds") + return parser.parse_args() + + +def main(): + args = define_parser() + data_split_path = args.data_split_path + psi_path = args.psi_path + site_num = args.site_num + round_num = args.round_num + job_name = "xgboost_vertical" + job = FedJob(name=job_name, min_clients=site_num) + + # Define the controller workflow and send to server + controller = XGBFedController( + num_rounds=round_num, + data_split_mode=1, + secure_training=False, + xgb_options={"early_stopping_rounds": 3, "use_gpus": False}, + xgb_params={ + "max_depth": 8, + "eta": 0.1, + "objective": "binary:logistic", + "eval_metric": "auc", + "tree_method": "hist", + "nthread": 16, + }, + ) + job.to_server(controller, id="xgb_controller") + + # Add tensorboard receiver to server + tb_receiver = TBAnalyticsReceiver( + tb_folder="tb_events", + ) + job.to_server(tb_receiver, id="tb_receiver") + + # Define the executor and send to clients + executor = FedXGBHistogramExecutor( + data_loader_id="dataloader", + metrics_writer_id="metrics_writer", + in_process=True, + model_file_name="test.model.json", + ) + job.to_clients(executor, id="xgb_hist_executor", tasks=["config", "start"]) + + dataloader = VerticalDataLoader( + data_split_path=data_split_path, psi_path=psi_path, id_col="uid", label_owner="site-1", train_proportion=0.8 + ) + job.to_clients(dataloader, id="dataloader") + + metrics_writer = TBWriter(event_type="analytix_log_stats") + job.to_clients(metrics_writer, id="metrics_writer") + + event_to_fed = ConvertToFedEvent( + events_to_convert=["analytix_log_stats"], + fed_event_prefix="fed.", + ) + job.to_clients(event_to_fed, id="event_to_fed") + + # Export job config and run the job + job.export_job("/tmp/nvflare/workspace/jobs/") + job.simulator_run(f"/tmp/nvflare/workspace/works/{job_name}", n_clients=site_num) + + +if __name__ == "__main__": + main() diff --git a/examples/advanced/xgboost/fedxgb/xgb_fl_job_vertical_psi.py b/examples/advanced/xgboost/fedxgb/xgb_fl_job_vertical_psi.py new file mode 100644 index 0000000000..fc4954e772 --- /dev/null +++ b/examples/advanced/xgboost/fedxgb/xgb_fl_job_vertical_psi.py @@ -0,0 +1,70 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse + +from src.local_psi import LocalPSI + +from nvflare.app_common.psi.dh_psi.dh_psi_controller import DhPSIController +from nvflare.app_common.psi.file_psi_writer import FilePSIWriter +from nvflare.app_common.psi.psi_executor import PSIExecutor +from nvflare.app_opt.psi.dh_psi.dh_psi_task_handler import DhPSITaskHandler +from nvflare.job_config.api import FedJob + + +def define_parser(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--data_split_path", + type=str, + default="/tmp/nvflare/dataset/xgboost_higgs_vertical/site-x/higgs.data.csv", + help="Path to data split files for each site", + ) + parser.add_argument("--site_num", type=int, default=2, help="Total number of sites") + parser.add_argument("--psi_path", type=str, default="psi/intersection.txt", help="PSI ouput path") + return parser.parse_args() + + +def main(): + args = define_parser() + data_split_path = args.data_split_path + psi_path = args.psi_path + site_num = args.site_num + job_name = "xgboost_vertical_psi" + job = FedJob(name=job_name, min_clients=site_num) + + # Define the controller workflow and send to server + controller = DhPSIController() + job.to_server(controller) + + # Define the executor and other components for each site + executor = PSIExecutor(psi_algo_id="dh_psi") + job.to_clients(executor, id="psi_executor", tasks=["PSI"]) + + local_psi = LocalPSI(psi_writer_id="psi_writer", data_split_path=data_split_path, id_col="uid") + job.to_clients(local_psi, id="local_psi") + + task_handler = DhPSITaskHandler(local_psi_id="local_psi") + job.to_clients(task_handler, id="dh_psi") + + psi_writer = FilePSIWriter(output_path=psi_path) + job.to_clients(psi_writer, id="psi_writer") + + # Export job config and run the job + job.export_job("/tmp/nvflare/workspace/jobs/") + job.simulator_run(f"/tmp/nvflare/workspace/works/{job_name}", n_clients=site_num) + + +if __name__ == "__main__": + main() diff --git a/examples/advanced/xgboost_secure/.gitignore b/examples/advanced/xgboost/fedxgb_secure/.gitignore similarity index 100% rename from examples/advanced/xgboost_secure/.gitignore rename to examples/advanced/xgboost/fedxgb_secure/.gitignore diff --git a/examples/advanced/xgboost_secure/README.md b/examples/advanced/xgboost/fedxgb_secure/README.md similarity index 100% rename from examples/advanced/xgboost_secure/README.md rename to examples/advanced/xgboost/fedxgb_secure/README.md diff --git a/examples/advanced/xgboost_secure/figs/tree.base.png b/examples/advanced/xgboost/fedxgb_secure/figs/tree.base.png similarity index 100% rename from examples/advanced/xgboost_secure/figs/tree.base.png rename to examples/advanced/xgboost/fedxgb_secure/figs/tree.base.png diff --git a/examples/advanced/xgboost_secure/figs/tree.vert.secure.0.png b/examples/advanced/xgboost/fedxgb_secure/figs/tree.vert.secure.0.png similarity index 100% rename from examples/advanced/xgboost_secure/figs/tree.vert.secure.0.png rename to examples/advanced/xgboost/fedxgb_secure/figs/tree.vert.secure.0.png diff --git a/examples/advanced/xgboost_secure/figs/tree.vert.secure.1.png b/examples/advanced/xgboost/fedxgb_secure/figs/tree.vert.secure.1.png similarity index 100% rename from examples/advanced/xgboost_secure/figs/tree.vert.secure.1.png rename to examples/advanced/xgboost/fedxgb_secure/figs/tree.vert.secure.1.png diff --git a/examples/advanced/xgboost_secure/figs/tree.vert.secure.2.png b/examples/advanced/xgboost/fedxgb_secure/figs/tree.vert.secure.2.png similarity index 100% rename from examples/advanced/xgboost_secure/figs/tree.vert.secure.2.png rename to examples/advanced/xgboost/fedxgb_secure/figs/tree.vert.secure.2.png diff --git a/examples/advanced/xgboost_secure/prepare_data.sh b/examples/advanced/xgboost/fedxgb_secure/prepare_data.sh similarity index 100% rename from examples/advanced/xgboost_secure/prepare_data.sh rename to examples/advanced/xgboost/fedxgb_secure/prepare_data.sh diff --git a/examples/advanced/xgboost_secure/prepare_flare_job.sh b/examples/advanced/xgboost/fedxgb_secure/prepare_flare_job.sh similarity index 100% rename from examples/advanced/xgboost_secure/prepare_flare_job.sh rename to examples/advanced/xgboost/fedxgb_secure/prepare_flare_job.sh diff --git a/examples/advanced/xgboost_secure/project.yml b/examples/advanced/xgboost/fedxgb_secure/project.yml similarity index 100% rename from examples/advanced/xgboost_secure/project.yml rename to examples/advanced/xgboost/fedxgb_secure/project.yml diff --git a/examples/advanced/xgboost_secure/run_training_flare.sh b/examples/advanced/xgboost/fedxgb_secure/run_training_flare.sh similarity index 100% rename from examples/advanced/xgboost_secure/run_training_flare.sh rename to examples/advanced/xgboost/fedxgb_secure/run_training_flare.sh diff --git a/examples/advanced/xgboost_secure/run_training_standalone.sh b/examples/advanced/xgboost/fedxgb_secure/run_training_standalone.sh similarity index 100% rename from examples/advanced/xgboost_secure/run_training_standalone.sh rename to examples/advanced/xgboost/fedxgb_secure/run_training_standalone.sh diff --git a/examples/advanced/xgboost_secure/train_standalone/train_base.py b/examples/advanced/xgboost/fedxgb_secure/train_standalone/train_base.py similarity index 98% rename from examples/advanced/xgboost_secure/train_standalone/train_base.py rename to examples/advanced/xgboost/fedxgb_secure/train_standalone/train_base.py index 58db56b94c..a8762b9e29 100644 --- a/examples/advanced/xgboost_secure/train_standalone/train_base.py +++ b/examples/advanced/xgboost/fedxgb_secure/train_standalone/train_base.py @@ -38,7 +38,7 @@ def train_base_args_parser(): parser.add_argument( "--out_path", type=str, - default="/tmp/nvflare/xgboost_secure/train_standalone/base", + default="/tmp/nvflare/fedxgb_secure/train_standalone/base", help="Output path for the data split file", ) return parser diff --git a/examples/advanced/xgboost_secure/train_standalone/train_federated.py b/examples/advanced/xgboost/fedxgb_secure/train_standalone/train_federated.py similarity index 98% rename from examples/advanced/xgboost_secure/train_standalone/train_federated.py rename to examples/advanced/xgboost/fedxgb_secure/train_standalone/train_federated.py index 808e88fa17..f4aad83054 100644 --- a/examples/advanced/xgboost_secure/train_standalone/train_federated.py +++ b/examples/advanced/xgboost/fedxgb_secure/train_standalone/train_federated.py @@ -48,7 +48,7 @@ def train_federated_args_parser(): parser.add_argument( "--out_path", type=str, - default="/tmp/nvflare/xgboost_secure/train_standalone/federated", + default="/tmp/nvflare/fedxgb_secure/train_standalone/federated", help="Output path for the data split file", ) return parser diff --git a/examples/advanced/xgboost_secure/utils/prepare_data_base.py b/examples/advanced/xgboost/fedxgb_secure/utils/prepare_data_base.py similarity index 100% rename from examples/advanced/xgboost_secure/utils/prepare_data_base.py rename to examples/advanced/xgboost/fedxgb_secure/utils/prepare_data_base.py diff --git a/examples/advanced/xgboost_secure/utils/prepare_data_horizontal.py b/examples/advanced/xgboost/fedxgb_secure/utils/prepare_data_horizontal.py similarity index 100% rename from examples/advanced/xgboost_secure/utils/prepare_data_horizontal.py rename to examples/advanced/xgboost/fedxgb_secure/utils/prepare_data_horizontal.py diff --git a/examples/advanced/xgboost_secure/utils/prepare_data_traintest_split.py b/examples/advanced/xgboost/fedxgb_secure/utils/prepare_data_traintest_split.py similarity index 100% rename from examples/advanced/xgboost_secure/utils/prepare_data_traintest_split.py rename to examples/advanced/xgboost/fedxgb_secure/utils/prepare_data_traintest_split.py diff --git a/examples/advanced/xgboost_secure/utils/prepare_data_vertical.py b/examples/advanced/xgboost/fedxgb_secure/utils/prepare_data_vertical.py similarity index 100% rename from examples/advanced/xgboost_secure/utils/prepare_data_vertical.py rename to examples/advanced/xgboost/fedxgb_secure/utils/prepare_data_vertical.py diff --git a/examples/advanced/xgboost/histogram-based/README.md b/examples/advanced/xgboost/histogram-based/README.md deleted file mode 100644 index 8c89f95eff..0000000000 --- a/examples/advanced/xgboost/histogram-based/README.md +++ /dev/null @@ -1,77 +0,0 @@ -# Histogram-based Federated Learning for XGBoost - -## Run automated experiments -Please make sure to finish the [preparation steps](../README.md) before running the following steps. -To run this example with NVFlare, follow the steps below or this [notebook](./xgboost_histogram_higgs.ipynb) for an interactive experience. - -### Environment Preparation - -Switch to this directory and install additional requirements (suggest to do this inside virtual environment): -``` -python3 -m pip install -r requirements.txt -``` - -### Run centralized experiments -``` -bash run_experiment_centralized.sh -``` - -### Run federated experiments with simulator locally -Next, we will use the NVFlare simulator to run FL training automatically. -``` -nvflare simulator jobs/higgs_2_histogram_v2_uniform_split_uniform_lr \ - -w /tmp/nvflare/xgboost_v2_workspace -n 2 -t 2 -``` - -Model accuracy can be visualized in tensorboard: -``` -tensorboard --logdir /tmp/nvflare/xgboost_v2_workspace/simulate_job/tb_events -``` - -### Run federated experiments in real world - -To run in a federated setting, follow [Real-World FL](https://nvflare.readthedocs.io/en/main/real_world_fl.html) to -start the overseer, FL servers and FL clients. - -You need to download the HIGGS data on each client site. -You will also need to install XGBoost on each client site and server site. - -You can still generate the data splits and job configs using the scripts provided. - -You will need to copy the generated data split file into each client site. -You might also need to modify the `data_path` in the `data_site-XXX.json` -inside the `/tmp/nvflare/xgboost_higgs_dataset` folder, -since each site might save the HIGGS dataset in different places. - -Then, you can use the admin client to submit the job via the `submit_job` command. - -## Customization - -The provided XGBoost executor can be customized using boost parameters -provided in the `xgb_params` argument. - -If the parameter change alone is not sufficient and code changes are required, -a custom executor can be implemented to make calls to xgboost library directly. - -The custom executor can inherit the base class `FedXGBHistogramExecutor` and -overwrite the `xgb_train()` method. - -To use a different dataset, you can inherit the base class `XGBDataLoader` and -implement the `load_data()` method. - -## Loose integration - -We can use the NVFlare controller/executor just to launch the external xgboost -federated server and client. - -### Run federated experiments with simulator locally -Next, we will use the NVFlare simulator to run FL training automatically. -``` -nvflare simulator jobs/higgs_2_histogram_uniform_split_uniform_lr \ - -w /tmp/nvflare/xgboost_workspace -n 2 -t 2 -``` - -Model accuracy can be visualized in tensorboard: -``` -tensorboard --logdir /tmp/nvflare/xgboost_workspace/simulate_job/tb_events -``` diff --git a/examples/advanced/xgboost/histogram-based/jobs/base/app/config/config_fed_client.json b/examples/advanced/xgboost/histogram-based/jobs/base/app/config/config_fed_client.json deleted file mode 100755 index a3fe316d90..0000000000 --- a/examples/advanced/xgboost/histogram-based/jobs/base/app/config/config_fed_client.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "format_version": 2, - "num_rounds": 100, - "executors": [ - { - "tasks": [ - "train" - ], - "executor": { - "id": "Executor", - "path": "nvflare.app_opt.xgboost.histogram_based.executor.FedXGBHistogramExecutor", - "args": { - "data_loader_id": "dataloader", - "num_rounds": "{num_rounds}", - "early_stopping_rounds": 2, - "metrics_writer_id": "metrics_writer", - "xgb_params": { - "max_depth": 8, - "eta": 0.1, - "objective": "binary:logistic", - "eval_metric": "auc", - "tree_method": "hist", - "nthread": 16 - } - } - } - } - ], - "task_result_filters": [], - "task_data_filters": [], - "components": [ - { - "id": "dataloader", - "path": "higgs_data_loader.HIGGSDataLoader", - "args": { - "data_split_filename": "data_split.json" - } - }, - { - "id": "metrics_writer", - "path": "nvflare.app_opt.tracking.tb.tb_writer.TBWriter", - "args": {"event_type": "analytix_log_stats"} - }, - { - "id": "event_to_fed", - "path": "nvflare.app_common.widgets.convert_to_fed_event.ConvertToFedEvent", - "args": {"events_to_convert": ["analytix_log_stats"], "fed_event_prefix": "fed."} - } - ] -} diff --git a/examples/advanced/xgboost/histogram-based/jobs/base/app/config/config_fed_server.json b/examples/advanced/xgboost/histogram-based/jobs/base/app/config/config_fed_server.json deleted file mode 100755 index 9814f32e2c..0000000000 --- a/examples/advanced/xgboost/histogram-based/jobs/base/app/config/config_fed_server.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "format_version": 2, - "task_data_filters": [], - "task_result_filters": [], - "components": [ - { - "id": "tb_receiver", - "path": "nvflare.app_opt.tracking.tb.tb_receiver.TBAnalyticsReceiver", - "args": { - "tb_folder": "tb_events" - } - } - ], - "workflows": [ - { - "id": "xgb_controller", - "path": "nvflare.app_opt.xgboost.histogram_based.controller.XGBFedController", - "args": { - "train_timeout": 30000 - } - } - ] -} \ No newline at end of file diff --git a/examples/advanced/xgboost/histogram-based/jobs/base/meta.json b/examples/advanced/xgboost/histogram-based/jobs/base/meta.json deleted file mode 100644 index 68fc7c42e0..0000000000 --- a/examples/advanced/xgboost/histogram-based/jobs/base/meta.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "name": "xgboost_histogram_based", - "resource_spec": {}, - "deploy_map": { - "app": [ - "@ALL" - ] - }, - "min_clients": 2 -} diff --git a/examples/advanced/xgboost/histogram-based/jobs/base_v2/app/config/config_fed_client.json b/examples/advanced/xgboost/histogram-based/jobs/base_v2/app/config/config_fed_client.json deleted file mode 100755 index a23a960c3d..0000000000 --- a/examples/advanced/xgboost/histogram-based/jobs/base_v2/app/config/config_fed_client.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "format_version": 2, - "executors": [ - { - "tasks": [ - "config", "start" - ], - "executor": { - "id": "Executor", - "path": "nvflare.app_opt.xgboost.histogram_based_v2.fed_executor.FedXGBHistogramExecutor", - "args": { - "data_loader_id": "dataloader", - "metrics_writer_id": "metrics_writer" - } - } - } - ], - "task_result_filters": [], - "task_data_filters": [], - "components": [ - { - "id": "dataloader", - "path": "higgs_data_loader.HIGGSDataLoader", - "args": { - "data_split_filename": "data_split.json" - } - }, - { - "id": "metrics_writer", - "path": "nvflare.app_opt.tracking.tb.tb_writer.TBWriter", - "args": {"event_type": "analytix_log_stats"} - }, - { - "id": "event_to_fed", - "path": "nvflare.app_common.widgets.convert_to_fed_event.ConvertToFedEvent", - "args": {"events_to_convert": ["analytix_log_stats"], "fed_event_prefix": "fed."} - } - ] -} diff --git a/examples/advanced/xgboost/histogram-based/jobs/base_v2/app/config/config_fed_server.json b/examples/advanced/xgboost/histogram-based/jobs/base_v2/app/config/config_fed_server.json deleted file mode 100755 index d0dd1e3908..0000000000 --- a/examples/advanced/xgboost/histogram-based/jobs/base_v2/app/config/config_fed_server.json +++ /dev/null @@ -1,37 +0,0 @@ -{ - "format_version": 2, - "num_rounds": 100, - "task_data_filters": [], - "task_result_filters": [], - "components": [ - { - "id": "tb_receiver", - "path": "nvflare.app_opt.tracking.tb.tb_receiver.TBAnalyticsReceiver", - "args": { - "tb_folder": "tb_events" - } - } - ], - "workflows": [ - { - "id": "xgb_controller", - "path": "nvflare.app_opt.xgboost.histogram_based_v2.fed_controller.XGBFedController", - "args": { - "num_rounds": "{num_rounds}", - "data_split_mode": 0, - "secure_training": false, - "xgb_params": { - "max_depth": 8, - "eta": 0.1, - "objective": "binary:logistic", - "eval_metric": "auc", - "tree_method": "hist", - "nthread": 16 - }, - "xgb_options": { - "early_stopping_rounds": 2 - } - } - } - ] -} \ No newline at end of file diff --git a/examples/advanced/xgboost/histogram-based/jobs/base_v2/app/custom/higgs_data_loader.py b/examples/advanced/xgboost/histogram-based/jobs/base_v2/app/custom/higgs_data_loader.py deleted file mode 100644 index 6623e35fa3..0000000000 --- a/examples/advanced/xgboost/histogram-based/jobs/base_v2/app/custom/higgs_data_loader.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json - -import pandas as pd -import xgboost as xgb - -from nvflare.app_opt.xgboost.data_loader import XGBDataLoader - - -def _read_higgs_with_pandas(data_path, start: int, end: int): - data_size = end - start - data = pd.read_csv(data_path, header=None, skiprows=start, nrows=data_size) - data_num = data.shape[0] - - # split to feature and label - x = data.iloc[:, 1:].copy() - y = data.iloc[:, 0].copy() - - return x, y, data_num - - -class HIGGSDataLoader(XGBDataLoader): - def __init__(self, data_split_filename): - """Reads HIGGS dataset and return XGB data matrix. - - Args: - data_split_filename: file name to data splits - """ - self.data_split_filename = data_split_filename - - def load_data(self): - with open(self.data_split_filename, "r") as file: - data_split = json.load(file) - - data_path = data_split["data_path"] - data_index = data_split["data_index"] - - # check if site_id and "valid" in the mapping dict - if self.client_id not in data_index.keys(): - raise ValueError( - f"Data does not contain Client {self.client_id} split", - ) - - if "valid" not in data_index.keys(): - raise ValueError( - "Data does not contain Validation split", - ) - - site_index = data_index[self.client_id] - valid_index = data_index["valid"] - - # training - x_train, y_train, total_train_data_num = _read_higgs_with_pandas( - data_path=data_path, start=site_index["start"], end=site_index["end"] - ) - dmat_train = xgb.DMatrix(x_train, label=y_train) - - # validation - x_valid, y_valid, total_valid_data_num = _read_higgs_with_pandas( - data_path=data_path, start=valid_index["start"], end=valid_index["end"] - ) - dmat_valid = xgb.DMatrix(x_valid, label=y_valid, data_split_mode=self.data_split_mode) - - return dmat_train, dmat_valid diff --git a/examples/advanced/xgboost/histogram-based/jobs/base_v2/meta.json b/examples/advanced/xgboost/histogram-based/jobs/base_v2/meta.json deleted file mode 100644 index 6d82211a16..0000000000 --- a/examples/advanced/xgboost/histogram-based/jobs/base_v2/meta.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "name": "xgboost_histogram_based_v2", - "resource_spec": {}, - "deploy_map": { - "app": [ - "@ALL" - ] - }, - "min_clients": 2 -} diff --git a/examples/advanced/xgboost/histogram-based/prepare_data.sh b/examples/advanced/xgboost/histogram-based/prepare_data.sh deleted file mode 100755 index f7bdf9e68d..0000000000 --- a/examples/advanced/xgboost/histogram-based/prepare_data.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env bash - -SCRIPT_DIR="$( dirname -- "$0"; )"; - -bash "${SCRIPT_DIR}"/../prepare_data.sh diff --git a/examples/advanced/xgboost/histogram-based/requirements.txt b/examples/advanced/xgboost/histogram-based/requirements.txt deleted file mode 100644 index d79a5bef89..0000000000 --- a/examples/advanced/xgboost/histogram-based/requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ -nvflare~=2.5.0rc -pandas -scikit-learn -torch -tensorboard -matplotlib -shap -# require xgboost 2.2 version, for now need to install a nightly build -https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds/federated-secure/xgboost-2.2.0.dev0%2B4601688195708f7c31fcceeb0e0ac735e7311e61-py3-none-manylinux_2_28_x86_64.whl diff --git a/examples/advanced/xgboost/histogram-based/run_experiment_centralized.sh b/examples/advanced/xgboost/histogram-based/run_experiment_centralized.sh deleted file mode 100755 index 7a71f2d0a8..0000000000 --- a/examples/advanced/xgboost/histogram-based/run_experiment_centralized.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/env bash -DATASET_PATH="$HOME/dataset/HIGGS.csv" - -if [ ! -f "${DATASET_PATH}" ] -then - echo "Please check if you saved HIGGS dataset in ${DATASET_PATH}" - exit 1 -fi -python3 ../utils/baseline_centralized.py --num_parallel_tree 1 --train_in_one_session --data_path "${DATASET_PATH}" diff --git a/examples/advanced/xgboost/histogram-based/run_experiment_simulator.sh b/examples/advanced/xgboost/histogram-based/run_experiment_simulator.sh deleted file mode 100755 index eb6861c326..0000000000 --- a/examples/advanced/xgboost/histogram-based/run_experiment_simulator.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/env bash - -n=2 -study=histogram_uniform_split_uniform_lr -nvflare simulator jobs/higgs_${n}_${study} -w ${PWD}/workspaces/xgboost_workspace_${n}_${study} -n ${n} -t ${n} - -n=5 -study=histogram_uniform_split_uniform_lr -nvflare simulator jobs/higgs_${n}_${study} -w ${PWD}/workspaces/xgboost_workspace_${n}_${study} -n ${n} -t ${n} diff --git a/examples/advanced/xgboost/prepare_data.sh b/examples/advanced/xgboost/prepare_data.sh deleted file mode 100755 index f1a2e28675..0000000000 --- a/examples/advanced/xgboost/prepare_data.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env bash -DATASET_PATH="$HOME/dataset/HIGGS.csv" -OUTPUT_PATH="/tmp/nvflare/xgboost_higgs_dataset" -SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) - -if [ ! -f "${DATASET_PATH}" ] -then - echo "Please check if you saved HIGGS dataset in ${DATASET_PATH}" -fi - -echo "Generated HIGGS data splits, reading from ${DATASET_PATH}" -for site_num in 2 5 20; -do - for split_mode in uniform exponential square; - do - python3 ${SCRIPT_DIR}/utils/prepare_data_split.py \ - --data_path "${DATASET_PATH}" \ - --site_num ${site_num} \ - --size_total 11000000 \ - --size_valid 1000000 \ - --split_method ${split_mode} \ - --out_path "${OUTPUT_PATH}/${site_num}_${split_mode}" - done -done -echo "Data splits are generated in ${OUTPUT_PATH}" diff --git a/examples/advanced/xgboost/prepare_job_config.sh b/examples/advanced/xgboost/prepare_job_config.sh deleted file mode 100755 index f839b46242..0000000000 --- a/examples/advanced/xgboost/prepare_job_config.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env bash -TREE_METHOD="hist" - -prepare_job_config() { - python3 utils/prepare_job_config.py --site_num "$1" --training_algo "$2" --split_method "$3" \ - --lr_mode "$4" --nthread 16 --tree_method "$5" -} - -echo "Generating job configs" -prepare_job_config 5 bagging exponential scaled $TREE_METHOD -prepare_job_config 5 bagging exponential uniform $TREE_METHOD -prepare_job_config 5 bagging uniform uniform $TREE_METHOD -prepare_job_config 5 cyclic exponential uniform $TREE_METHOD -prepare_job_config 5 cyclic uniform uniform $TREE_METHOD - -prepare_job_config 20 bagging square scaled $TREE_METHOD -prepare_job_config 20 bagging square uniform $TREE_METHOD -prepare_job_config 20 bagging uniform uniform $TREE_METHOD -prepare_job_config 20 cyclic square uniform $TREE_METHOD -prepare_job_config 20 cyclic uniform uniform $TREE_METHOD - -prepare_job_config 2 histogram uniform uniform $TREE_METHOD -prepare_job_config 5 histogram uniform uniform $TREE_METHOD -prepare_job_config 2 histogram_v2 uniform uniform $TREE_METHOD -prepare_job_config 5 histogram_v2 uniform uniform $TREE_METHOD -echo "Job configs generated" diff --git a/examples/advanced/xgboost_secure/requirements.txt b/examples/advanced/xgboost/requirements.txt similarity index 90% rename from examples/advanced/xgboost_secure/requirements.txt rename to examples/advanced/xgboost/requirements.txt index 2d9890c2c6..95cbefd2e9 100644 --- a/examples/advanced/xgboost_secure/requirements.txt +++ b/examples/advanced/xgboost/requirements.txt @@ -1,10 +1,12 @@ -nvflare~=2.5.0rc -ipcl_python @ git+https://github.com/intel/pailliercryptolib_python.git@development -# require xgboost 2.2 version, for now need to install a nightly build -https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds/federated-secure/xgboost-2.2.0.dev0%2B4601688195708f7c31fcceeb0e0ac735e7311e61-py3-none-manylinux_2_28_x86_64.whl +nvflare~=2.5.0 +openmined.psi==1.1.1 pandas +torch scikit-learn shap matplotlib tensorboard tenseal +# require xgboost 2.2 version, for now need to install a nightly build +https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds/federated-secure/xgboost-2.2.0.dev0%2B4601688195708f7c31fcceeb0e0ac735e7311e61-py3-none-manylinux_2_28_x86_64.whl +ipcl_python @ git+https://github.com/intel/pailliercryptolib_python.git@development diff --git a/examples/advanced/xgboost/tree-based/README.md b/examples/advanced/xgboost/tree-based/README.md deleted file mode 100644 index ddcb545d09..0000000000 --- a/examples/advanced/xgboost/tree-based/README.md +++ /dev/null @@ -1,101 +0,0 @@ -# Tree-based Federated Learning for XGBoost - -You can also follow along in this [notebook](./xgboost_tree_higgs.ipynb) for an interactive experience. - -## Cyclic Training - -"Cyclic XGBoost" is one way of performing tree-based federated boosting with multiple sites: at each round of tree boosting, instead of relying on the whole data statistics collected from all clients, the boosting relies on only 1 client's local data. The resulting tree sequence is then forwarded to the next client for next round's boosting. Such training scheme have been proposed in literatures [1] [2]. - -## Bagging Aggregation - -"Bagging XGBoost" is another way of performing tree-based federated boosting with multiple sites: at each round of tree boosting, all sites start from the same "global model", and boost a number of trees (in current example, 1 tree) based on their local data. The resulting trees are then send to server. A bagging aggregation scheme is applied to all the submitted trees to update the global model, which is further distributed to all clients for next round's boosting. - -This scheme bears certain similarity to the [Random Forest mode](https://xgboost.readthedocs.io/en/stable/tutorials/rf.html) of XGBoost, where a `num_parallel_tree` is boosted based on random row/col splits, rather than a single tree. Under federated learning setting, such split is fixed to clients rather than random and without column subsampling. - -In addition to basic uniform shrinkage setting where all clients have the same learning rate, based on our research, we enabled scaled shrinkage across clients for weighted aggregation according to each client's data size, which is shown to significantly improve the model's performance on non-uniform quantity splits over HIGGS data. - -## Run automated experiments -Please make sure to finish the [preparation steps](../README.md) before running the following steps. -To run all experiments in this example with NVFlare, follow the steps below. To try out a single experiment, follow this [notebook](./xgboost_tree_higgs.ipynb). - -### Environment Preparation - -Switch to this directory and install additional requirements (suggest to do this inside virtual environment): -``` -python3 -m pip install -r requirements.txt -``` - -### Run federated experiments with simulator locally -Next, we will use the NVFlare simulator to run FL training for all the different experiment configurations. -``` -bash run_experiment_simulator.sh -``` - -### Run centralized experiments -For comparison, we train baseline models in a centralized manner with same round of training. -``` -bash run_experiment_centralized.sh -``` -This will train several models w/ and w/o random forest settings. The results are shown below. - -![Centralized validation curve](./figs/Centralized.png) - -As shown, random forest may not yield significant performance gain, -and can even make the accuracy worse if subsample rate is too low (e.g. 0.05). - -### Results comparison on 5-client and 20-client under various training settings - -Let's then summarize the result of the federated learning experiments run above. We compare the AUC scores of -the model on a standalone validation set consisted of the first 1 million instances of HIGGS dataset. - -We provide a script for plotting the tensorboard records, running -``` -python3 ./utils/plot_tensorboard_events.py -``` - -> **_NOTE:_** You need to install [./plot-requirements.txt](./plot-requirements.txt) to plot. - - -The resulting validation AUC curves (no smoothing) are shown below: - -![5 clients validation curve](./figs/5_client.png) -![20 clients validation curve](./figs/20_client.png) - -As illustrated, we can have the following observations: -- cyclic training performs ok under uniform split (the purple curve), however under non-uniform split, it will have significant performance drop (the brown curve) -- bagging training performs better than cyclic under both uniform and non-uniform data splits (orange v.s. purple, red/green v.s. brown) -- with uniform shrinkage, bagging will have significant performance drop under non-uniform split (green v.s. orange) -- data-size dependent shrinkage will be able to recover the performance drop above (red v.s. green), and achieve comparable/better performance as uniform data split (red v.s. orange) -- bagging under uniform data split (orange), and bagging with data-size dependent shrinkage under non-uniform data split(red), can achieve comparable/better performance as compared with centralized training baseline (blue) - -For model size, centralized training and cyclic training will have a model consisting of `num_round` trees, -while the bagging models consist of `num_round * num_client` trees, since each round, -bagging training boosts a forest consisting of individually trained trees from each client. - -### Run federated experiments in real world - -To run in a federated setting, follow [Real-World FL](https://nvflare.readthedocs.io/en/main/real_world_fl.html) to -start the overseer, FL servers and FL clients. - -You need to download the HIGGS data on each client site. -You will also need to install the xgboost on each client site and server site. - -You can still generate the data splits and job configs using the scripts provided. - -You will need to copy the generated data split file into each client site. -You might also need to modify the `data_path` in the `data_site-XXX.json` -inside the `/tmp/nvflare/xgboost_higgs_dataset` folder, -since each site might save the HIGGS dataset in different places. - -Then you can use admin client to submit the job via `submit_job` command. - -## Customization - -To use other dataset, can inherit the base class `XGBDataLoader` and -implement that `load_data()` method. - - -## Reference -[1] Zhao, L. et al., "InPrivate Digging: Enabling Tree-based Distributed Data Mining with Differential Privacy," IEEE INFOCOM 2018 - IEEE Conference on Computer Communications, 2018, pp. 2087-2095 - -[2] Yamamoto, F. et al., "New Approaches to Federated XGBoost Learning for Privacy-Preserving Data Analysis," ICONIP 2020 - International Conference on Neural Information Processing, 2020, Lecture Notes in Computer Science, vol 12533 diff --git a/examples/advanced/xgboost/tree-based/jobs/bagging_base/app/config/config_fed_client.json b/examples/advanced/xgboost/tree-based/jobs/bagging_base/app/config/config_fed_client.json deleted file mode 100755 index ef0f19875b..0000000000 --- a/examples/advanced/xgboost/tree-based/jobs/bagging_base/app/config/config_fed_client.json +++ /dev/null @@ -1,41 +0,0 @@ -{ - "format_version": 2, - - "executors": [ - { - "tasks": [ - "train" - ], - "executor": { - "id": "Executor", - "path": "nvflare.app_opt.xgboost.tree_based.executor.FedXGBTreeExecutor", - "args": { - "data_loader_id": "dataloader", - "training_mode": "bagging", - "num_client_bagging": 5, - "num_local_parallel_tree": 1, - "local_subsample": 1, - "local_model_path": "model.json", - "global_model_path": "model_global.json", - "learning_rate": 0.1, - "objective": "binary:logistic", - "max_depth": 8, - "eval_metric": "auc", - "tree_method": "hist", - "nthread": 16 - } - } - } - ], - "task_result_filters": [], - "task_data_filters": [], - "components": [ - { - "id": "dataloader", - "path": "higgs_data_loader.HIGGSDataLoader", - "args": { - "data_split_filename": "data_split.json" - } - } - ] -} diff --git a/examples/advanced/xgboost/tree-based/jobs/bagging_base/app/config/config_fed_server.json b/examples/advanced/xgboost/tree-based/jobs/bagging_base/app/config/config_fed_server.json deleted file mode 100755 index cfd7b83b54..0000000000 --- a/examples/advanced/xgboost/tree-based/jobs/bagging_base/app/config/config_fed_server.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "format_version": 2, - "num_rounds": 101, - - "task_data_filters": [], - "task_result_filters": [], - - "components": [ - { - "id": "persistor", - "path": "nvflare.app_opt.xgboost.tree_based.model_persistor.XGBModelPersistor", - "args": { - "save_name": "xgboost_model.json" - } - }, - { - "id": "shareable_generator", - "path": "nvflare.app_opt.xgboost.tree_based.shareable_generator.XGBModelShareableGenerator", - "args": {} - }, - { - "id": "aggregator", - "path": "nvflare.app_opt.xgboost.tree_based.bagging_aggregator.XGBBaggingAggregator", - "args": {} - } - ], - "workflows": [ - { - "id": "scatter_and_gather", - "path": "nvflare.app_common.workflows.scatter_and_gather.ScatterAndGather", - "args": { - "min_clients": 5, - "num_rounds": "{num_rounds}", - "start_round": 0, - "wait_time_after_min_received": 0, - "aggregator_id": "aggregator", - "persistor_id": "persistor", - "shareable_generator_id": "shareable_generator", - "train_task_name": "train", - "train_timeout": 0, - "allow_empty_global_weights": true, - "task_check_period": 0.01, - "persist_every_n_rounds": 0, - "snapshot_every_n_rounds": 0 - } - } - ] -} diff --git a/examples/advanced/xgboost/tree-based/jobs/bagging_base/app/custom/higgs_data_loader.py b/examples/advanced/xgboost/tree-based/jobs/bagging_base/app/custom/higgs_data_loader.py deleted file mode 100644 index 124268cfce..0000000000 --- a/examples/advanced/xgboost/tree-based/jobs/bagging_base/app/custom/higgs_data_loader.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json - -import pandas as pd -import xgboost as xgb - -from nvflare.app_opt.xgboost.data_loader import XGBDataLoader - - -def _read_higgs_with_pandas(data_path, start: int, end: int): - data_size = end - start - data = pd.read_csv(data_path, header=None, skiprows=start, nrows=data_size) - data_num = data.shape[0] - - # split to feature and label - x = data.iloc[:, 1:].copy() - y = data.iloc[:, 0].copy() - - return x, y, data_num - - -class HIGGSDataLoader(XGBDataLoader): - def __init__(self, data_split_filename): - """Reads HIGGS dataset and return XGB data matrix. - - Args: - data_split_filename: file name to data splits - """ - self.data_split_filename = data_split_filename - - def load_data(self): - with open(self.data_split_filename, "r") as file: - data_split = json.load(file) - - data_path = data_split["data_path"] - data_index = data_split["data_index"] - - # check if site_id and "valid" in the mapping dict - if self.client_id not in data_index.keys(): - raise ValueError( - f"Data does not contain Client {self.client_id} split", - ) - - if "valid" not in data_index.keys(): - raise ValueError( - "Data does not contain Validation split", - ) - - site_index = data_index[self.client_id] - valid_index = data_index["valid"] - - # training - x_train, y_train, total_train_data_num = _read_higgs_with_pandas( - data_path=data_path, start=site_index["start"], end=site_index["end"] - ) - dmat_train = xgb.DMatrix(x_train, label=y_train) - - # validation - x_valid, y_valid, total_valid_data_num = _read_higgs_with_pandas( - data_path=data_path, start=valid_index["start"], end=valid_index["end"] - ) - dmat_valid = xgb.DMatrix(x_valid, label=y_valid, data_split_mode=self.data_split_mode) - - return dmat_train, dmat_valid diff --git a/examples/advanced/xgboost/tree-based/jobs/bagging_base/meta.json b/examples/advanced/xgboost/tree-based/jobs/bagging_base/meta.json deleted file mode 100644 index aa7ac49fd6..0000000000 --- a/examples/advanced/xgboost/tree-based/jobs/bagging_base/meta.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "name": "xgboost_tree_bagging", - "resource_spec": {}, - "deploy_map": { - "app": [ - "@ALL" - ] - } -} diff --git a/examples/advanced/xgboost/tree-based/jobs/cyclic_base/app/config/config_fed_client.json b/examples/advanced/xgboost/tree-based/jobs/cyclic_base/app/config/config_fed_client.json deleted file mode 100755 index d63a3ea551..0000000000 --- a/examples/advanced/xgboost/tree-based/jobs/cyclic_base/app/config/config_fed_client.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "format_version": 2, - - "executors": [ - { - "tasks": [ - "train" - ], - "executor": { - "id": "Executor", - "path": "nvflare.app_opt.xgboost.tree_based.executor.FedXGBTreeExecutor", - "args": { - "data_loader_id": "dataloader", - "training_mode": "cyclic", - "num_client_bagging": 1, - "local_model_path": "model.json", - "global_model_path": "model_global.json", - "learning_rate": 0.1, - "objective": "binary:logistic", - "max_depth": 8, - "eval_metric": "auc", - "tree_method": "hist", - "nthread": 16 - } - } - } - ], - "task_result_filters": [], - "task_data_filters": [], - "components": [ - { - "id": "dataloader", - "path": "higgs_data_loader.HIGGSDataLoader", - "args": { - "data_split_filename": "data_split.json" - } - } - ] -} diff --git a/examples/advanced/xgboost/tree-based/jobs/cyclic_base/app/config/config_fed_server.json b/examples/advanced/xgboost/tree-based/jobs/cyclic_base/app/config/config_fed_server.json deleted file mode 100755 index 93a8e3cf4b..0000000000 --- a/examples/advanced/xgboost/tree-based/jobs/cyclic_base/app/config/config_fed_server.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "format_version": 2, - "num_rounds": 20, - "task_data_filters": [], - "task_result_filters": [], - - "components": [ - { - "id": "persistor", - "path": "nvflare.app_opt.xgboost.tree_based.model_persistor.XGBModelPersistor", - "args": { - "save_name": "xgboost_model.json", - "load_as_dict": false - } - }, - { - "id": "shareable_generator", - "path": "nvflare.app_opt.xgboost.tree_based.shareable_generator.XGBModelShareableGenerator", - "args": {} - } - ], - "workflows": [ - { - "id": "cyclic_ctl", - "path": "nvflare.app_common.workflows.cyclic_ctl.CyclicController", - "args": { - "num_rounds": "{num_rounds}", - "task_assignment_timeout": 60, - "persistor_id": "persistor", - "shareable_generator_id": "shareable_generator", - "task_name": "train", - "task_check_period": 0.01, - "persist_every_n_rounds": 0, - "snapshot_every_n_rounds": 0 - } - } - ] -} diff --git a/examples/advanced/xgboost/tree-based/jobs/cyclic_base/app/custom/higgs_data_loader.py b/examples/advanced/xgboost/tree-based/jobs/cyclic_base/app/custom/higgs_data_loader.py deleted file mode 100644 index 124268cfce..0000000000 --- a/examples/advanced/xgboost/tree-based/jobs/cyclic_base/app/custom/higgs_data_loader.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json - -import pandas as pd -import xgboost as xgb - -from nvflare.app_opt.xgboost.data_loader import XGBDataLoader - - -def _read_higgs_with_pandas(data_path, start: int, end: int): - data_size = end - start - data = pd.read_csv(data_path, header=None, skiprows=start, nrows=data_size) - data_num = data.shape[0] - - # split to feature and label - x = data.iloc[:, 1:].copy() - y = data.iloc[:, 0].copy() - - return x, y, data_num - - -class HIGGSDataLoader(XGBDataLoader): - def __init__(self, data_split_filename): - """Reads HIGGS dataset and return XGB data matrix. - - Args: - data_split_filename: file name to data splits - """ - self.data_split_filename = data_split_filename - - def load_data(self): - with open(self.data_split_filename, "r") as file: - data_split = json.load(file) - - data_path = data_split["data_path"] - data_index = data_split["data_index"] - - # check if site_id and "valid" in the mapping dict - if self.client_id not in data_index.keys(): - raise ValueError( - f"Data does not contain Client {self.client_id} split", - ) - - if "valid" not in data_index.keys(): - raise ValueError( - "Data does not contain Validation split", - ) - - site_index = data_index[self.client_id] - valid_index = data_index["valid"] - - # training - x_train, y_train, total_train_data_num = _read_higgs_with_pandas( - data_path=data_path, start=site_index["start"], end=site_index["end"] - ) - dmat_train = xgb.DMatrix(x_train, label=y_train) - - # validation - x_valid, y_valid, total_valid_data_num = _read_higgs_with_pandas( - data_path=data_path, start=valid_index["start"], end=valid_index["end"] - ) - dmat_valid = xgb.DMatrix(x_valid, label=y_valid, data_split_mode=self.data_split_mode) - - return dmat_train, dmat_valid diff --git a/examples/advanced/xgboost/tree-based/jobs/cyclic_base/meta.json b/examples/advanced/xgboost/tree-based/jobs/cyclic_base/meta.json deleted file mode 100644 index 58450dbfdd..0000000000 --- a/examples/advanced/xgboost/tree-based/jobs/cyclic_base/meta.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "name": "xgboost_tree_cyclic", - "resource_spec": {}, - "deploy_map": { - "app": [ - "@ALL" - ] - } -} diff --git a/examples/advanced/xgboost/tree-based/plot-requirements.txt b/examples/advanced/xgboost/tree-based/plot-requirements.txt deleted file mode 100644 index 7262e63060..0000000000 --- a/examples/advanced/xgboost/tree-based/plot-requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -tensorflow -seaborn diff --git a/examples/advanced/xgboost/tree-based/prepare_data.sh b/examples/advanced/xgboost/tree-based/prepare_data.sh deleted file mode 100755 index f7bdf9e68d..0000000000 --- a/examples/advanced/xgboost/tree-based/prepare_data.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env bash - -SCRIPT_DIR="$( dirname -- "$0"; )"; - -bash "${SCRIPT_DIR}"/../prepare_data.sh diff --git a/examples/advanced/xgboost/tree-based/requirements.txt b/examples/advanced/xgboost/tree-based/requirements.txt deleted file mode 100644 index d79a5bef89..0000000000 --- a/examples/advanced/xgboost/tree-based/requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ -nvflare~=2.5.0rc -pandas -scikit-learn -torch -tensorboard -matplotlib -shap -# require xgboost 2.2 version, for now need to install a nightly build -https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds/federated-secure/xgboost-2.2.0.dev0%2B4601688195708f7c31fcceeb0e0ac735e7311e61-py3-none-manylinux_2_28_x86_64.whl diff --git a/examples/advanced/xgboost/tree-based/run_experiment_centralized.sh b/examples/advanced/xgboost/tree-based/run_experiment_centralized.sh deleted file mode 100755 index 83cfa81162..0000000000 --- a/examples/advanced/xgboost/tree-based/run_experiment_centralized.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env bash -DATASET_PATH="$HOME/dataset/HIGGS.csv" - -if [ ! -f "${DATASET_PATH}" ] -then - echo "Please check if you saved HIGGS dataset in ${DATASET_PATH}" -fi - -python3 ../utils/baseline_centralized.py --num_parallel_tree 1 --data_path "${DATASET_PATH}" -python3 ../utils/baseline_centralized.py --num_parallel_tree 5 --subsample 0.8 --data_path "${DATASET_PATH}" -python3 ../utils/baseline_centralized.py --num_parallel_tree 5 --subsample 0.2 --data_path "${DATASET_PATH}" -python3 ../utils/baseline_centralized.py --num_parallel_tree 20 --subsample 0.05 --data_path "${DATASET_PATH}" -python3 ../utils/baseline_centralized.py --num_parallel_tree 20 --subsample 0.8 --data_path "${DATASET_PATH}" diff --git a/examples/advanced/xgboost/tree-based/run_experiment_simulator.sh b/examples/advanced/xgboost/tree-based/run_experiment_simulator.sh deleted file mode 100755 index 05b2a050e7..0000000000 --- a/examples/advanced/xgboost/tree-based/run_experiment_simulator.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env bash - -n=5 -for study in bagging_uniform_split_uniform_lr \ - bagging_exponential_split_uniform_lr \ - bagging_exponential_split_scaled_lr \ - cyclic_uniform_split_uniform_lr \ - cyclic_exponential_split_uniform_lr -do - nvflare simulator jobs/higgs_${n}_${study} -w ${PWD}/workspaces/xgboost_workspace_${n}_${study} -n ${n} -t ${n} -done - - -n=20 -for study in bagging_uniform_split_uniform_lr \ - bagging_square_split_uniform_lr \ - bagging_square_split_scaled_lr \ - cyclic_uniform_split_uniform_lr \ - cyclic_square_split_uniform_lr -do - nvflare simulator jobs/higgs_${n}_${study} -w ${PWD}/workspaces/xgboost_workspace_${n}_${study} -n ${n} -t ${n} -done diff --git a/examples/advanced/xgboost/tree-based/utils/plot_tensorboard_events.py b/examples/advanced/xgboost/tree-based/utils/plot_tensorboard_events.py deleted file mode 100644 index bc6953f274..0000000000 --- a/examples/advanced/xgboost/tree-based/utils/plot_tensorboard_events.py +++ /dev/null @@ -1,136 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import glob -import os - -import matplotlib.pyplot as plt -import seaborn as sns -import tensorflow as tf - -# simulator workspace -client_results_root = "./workspaces/xgboost_workspace_" -client_num_list = [5, 20] -client_pre = "app_site-" -centralized_path = "./workspaces/centralized_1_1/events.*" - -# bagging and cyclic need different handle -experiments_bagging = { - 5: { - "5_bagging_uniform_split_uniform_lr": {"tag": "AUC"}, - "5_bagging_exponential_split_uniform_lr": {"tag": "AUC"}, - "5_bagging_exponential_split_scaled_lr": {"tag": "AUC"}, - }, - 20: { - "20_bagging_uniform_split_uniform_lr": {"tag": "AUC"}, - "20_bagging_square_split_uniform_lr": {"tag": "AUC"}, - "20_bagging_square_split_scaled_lr": {"tag": "AUC"}, - }, -} -experiments_cyclic = { - 5: { - "5_cyclic_uniform_split_uniform_lr": {"tag": "AUC"}, - "5_cyclic_exponential_split_uniform_lr": {"tag": "AUC"}, - }, - 20: { - "20_cyclic_uniform_split_uniform_lr": {"tag": "AUC"}, - "20_cyclic_square_split_uniform_lr": {"tag": "AUC"}, - }, -} - -weight = 0.0 - - -def smooth(scalars, weight): # Weight between 0 and 1 - last = scalars[0] # First value in the plot (first timestep) - smoothed = list() - for point in scalars: - smoothed_val = last * weight + (1 - weight) * point # Calculate smoothed value - smoothed.append(smoothed_val) # Save it - last = smoothed_val # Anchor the last smoothed value - return smoothed - - -def read_eventfile(filepath, tags=["AUC"]): - data = {} - for summary in tf.compat.v1.train.summary_iterator(filepath): - for v in summary.summary.value: - if v.tag in tags: - if v.tag in data.keys(): - data[v.tag].append([summary.step, v.simple_value]) - else: - data[v.tag] = [[summary.step, v.simple_value]] - return data - - -def add_eventdata(data, config, filepath, tag="AUC"): - event_data = read_eventfile(filepath, tags=[tag]) - assert len(event_data[tag]) > 0, f"No data for key {tag}" - - metric = [] - for e in event_data[tag]: - # print(e) - data["Config"].append(config) - data["Round"].append(e[0]) - metric.append(e[1]) - - metric = smooth(metric, weight) - for entry in metric: - data["AUC"].append(entry) - - print(f"added {len(event_data[tag])} entries for {tag}") - - -def main(): - plt.figure() - - for client_num in client_num_list: - plt.figure - plt.title(f"{client_num} client experiments") - # add event files - data = {"Config": [], "Round": [], "AUC": []} - # add centralized result - eventfile = glob.glob(centralized_path, recursive=True) - assert len(eventfile) == 1, "No unique event file found!" + eventfile - eventfile = eventfile[0] - print("adding", eventfile) - add_eventdata(data, "centralized", eventfile, tag="AUC") - # pick first client for bagging experiments - site = 1 - for config, exp in experiments_bagging[client_num].items(): - record_path = os.path.join(client_results_root + config, "simulate_job", client_pre + str(site), "events.*") - eventfile = glob.glob(record_path, recursive=True) - assert len(eventfile) == 1, "No unique event file found!" - eventfile = eventfile[0] - print("adding", eventfile) - add_eventdata(data, config, eventfile, tag=exp["tag"]) - - # Combine all clients' records for cyclic experiments - for site in range(1, client_num + 1): - for config, exp in experiments_cyclic[client_num].items(): - record_path = os.path.join( - client_results_root + config, "simulate_job", client_pre + str(site), "events.*" - ) - eventfile = glob.glob(record_path, recursive=True) - assert len(eventfile) == 1, f"No unique event file found under {record_path}!" - eventfile = eventfile[0] - print("adding", eventfile) - add_eventdata(data, config, eventfile, tag=exp["tag"]) - - sns.lineplot(x="Round", y="AUC", hue="Config", data=data) - plt.show() - - -if __name__ == "__main__": - main() diff --git a/examples/advanced/xgboost/utils/prepare_job_config.py b/examples/advanced/xgboost/utils/prepare_job_config.py deleted file mode 100644 index c7339391ab..0000000000 --- a/examples/advanced/xgboost/utils/prepare_job_config.py +++ /dev/null @@ -1,239 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import json -import os -import pathlib -import shutil - -from nvflare.apis.fl_constant import JobConstants - -SCRIPT_PATH = pathlib.Path(os.path.realpath(__file__)) -XGB_EXAMPLE_ROOT = SCRIPT_PATH.parent.parent.absolute() -JOB_CONFIGS_ROOT = "jobs" -ALGO_DIR_MAP = { - "bagging": "tree-based", - "cyclic": "tree-based", - "histogram": "histogram-based", - "histogram_v2": "histogram-based", -} -BASE_JOB_MAP = {"bagging": "bagging_base", "cyclic": "cyclic_base", "histogram": "base", "histogram_v2": "base_v2"} - - -def job_config_args_parser(): - parser = argparse.ArgumentParser(description="generate train configs for HIGGS dataset") - parser.add_argument( - "--data_root", - type=str, - default="/tmp/nvflare/xgboost_higgs_dataset", - help="Path to dataset config files for each site", - ) - parser.add_argument("--site_num", type=int, default=5, help="Total number of sites") - parser.add_argument("--site_name_prefix", type=str, default="site-", help="Site name prefix") - parser.add_argument("--round_num", type=int, default=100, help="Total number of training rounds") - parser.add_argument( - "--training_algo", type=str, default="bagging", choices=list(ALGO_DIR_MAP.keys()), help="Training algorithm" - ) - parser.add_argument("--split_method", type=str, default="uniform", help="How to split the dataset") - parser.add_argument("--lr_mode", type=str, default="uniform", help="Whether to use uniform or scaled shrinkage") - parser.add_argument("--nthread", type=int, default=16, help="nthread for xgboost") - parser.add_argument( - "--tree_method", type=str, default="hist", help="tree_method for xgboost - use hist for best perf" - ) - parser.add_argument("--data_split_mode", type=int, default=0, help="dataset split mode, 0 or 1") - parser.add_argument("--secure_training", type=bool, default=False, help="histogram_v2 secure training or not") - return parser - - -def _read_json(filename): - if not os.path.isfile(filename): - raise ValueError(f"{filename} does not exist!") - with open(filename, "r") as f: - return json.load(f) - - -def _write_json(data, filename): - with open(filename, "w") as f: - json.dump(data, f, indent=4) - - -def _get_job_name(args) -> str: - return ( - "higgs_" - + str(args.site_num) - + "_" - + args.training_algo - + "_" - + args.split_method - + "_split" - + "_" - + args.lr_mode - + "_lr" - ) - - -def _get_data_split_name(args, site_name: str) -> str: - return os.path.join(args.data_root, f"{args.site_num}_{args.split_method}", f"data_{site_name}.json") - - -def _get_src_job_dir(training_algo): - return XGB_EXAMPLE_ROOT / ALGO_DIR_MAP[training_algo] / JOB_CONFIGS_ROOT / BASE_JOB_MAP[training_algo] - - -def _gen_deploy_map(num_sites: int, site_name_prefix: str) -> dict: - deploy_map = {"app_server": ["server"]} - for i in range(1, num_sites + 1): - deploy_map[f"app_{site_name_prefix}{i}"] = [f"{site_name_prefix}{i}"] - return deploy_map - - -def _update_meta(meta: dict, args): - name = _get_job_name(args) - meta["name"] = name - meta["deploy_map"] = _gen_deploy_map(args.site_num, args.site_name_prefix) - meta["min_clients"] = args.site_num - - -def _get_lr_scale_from_split_json(data_split: dict): - split = {} - total_data_num = 0 - for k, v in data_split["data_index"].items(): - if k == "valid": - continue - data_num = int(v["end"] - v["start"]) - total_data_num += data_num - split[k] = data_num - - lr_scales = {} - for k in split: - lr_scales[k] = split[k] / total_data_num - - return lr_scales - - -def _update_client_config(config: dict, args, lr_scale, site_name: str): - data_split_name = _get_data_split_name(args, site_name) - if args.training_algo == "bagging" or args.training_algo == "cyclic": - # update client config - config["executors"][0]["executor"]["args"]["lr_scale"] = lr_scale - config["executors"][0]["executor"]["args"]["lr_mode"] = args.lr_mode - config["executors"][0]["executor"]["args"]["nthread"] = args.nthread - config["executors"][0]["executor"]["args"]["tree_method"] = args.tree_method - config["executors"][0]["executor"]["args"]["training_mode"] = args.training_algo - num_client_bagging = 1 - if args.training_algo == "bagging": - num_client_bagging = args.site_num - config["executors"][0]["executor"]["args"]["num_client_bagging"] = num_client_bagging - elif args.training_algo == "histogram": - config["num_rounds"] = args.round_num - config["executors"][0]["executor"]["args"]["xgb_params"]["nthread"] = args.nthread - config["executors"][0]["executor"]["args"]["xgb_params"]["tree_method"] = args.tree_method - config["components"][0]["args"]["data_split_filename"] = data_split_name - - -def _update_server_config(config: dict, args): - if args.training_algo == "bagging": - config["num_rounds"] = args.round_num + 1 - config["workflows"][0]["args"]["min_clients"] = args.site_num - elif args.training_algo == "cyclic": - config["num_rounds"] = int(args.round_num / args.site_num) - elif args.training_algo == "histogram_v2": - config["num_rounds"] = args.round_num - config["workflows"][0]["args"]["xgb_params"]["nthread"] = args.nthread - config["workflows"][0]["args"]["xgb_params"]["tree_method"] = args.tree_method - config["workflows"][0]["args"]["data_split_mode"] = args.data_split_mode - config["workflows"][0]["args"]["secure_training"] = args.secure_training - - -def _copy_custom_files(src_job_path, src_app_name, dst_job_path, dst_app_name): - dst_path = dst_job_path / dst_app_name / "custom" - os.makedirs(dst_path, exist_ok=True) - src_path = src_job_path / src_app_name / "custom" - if os.path.isdir(src_path): - shutil.copytree(src_path, dst_path, dirs_exist_ok=True) - - -def create_server_app(src_job_path, src_app_name, dst_job_path, site_name, args): - dst_app_name = f"app_{site_name}" - server_config = _read_json(src_job_path / src_app_name / "config" / JobConstants.SERVER_JOB_CONFIG) - dst_config_path = dst_job_path / dst_app_name / "config" - - # make target config folders - if not os.path.exists(dst_config_path): - os.makedirs(dst_config_path) - - _update_server_config(server_config, args) - server_config_filename = dst_config_path / JobConstants.SERVER_JOB_CONFIG - _write_json(server_config, server_config_filename) - - -def create_client_app(src_job_path, src_app_name, dst_job_path, site_name, args): - dst_app_name = f"app_{site_name}" - client_config = _read_json(src_job_path / src_app_name / "config" / JobConstants.CLIENT_JOB_CONFIG) - dst_config_path = dst_job_path / dst_app_name / "config" - - # make target config folders - if not os.path.exists(dst_config_path): - os.makedirs(dst_config_path) - - # get lr scale - data_split_name = _get_data_split_name(args, site_name) - data_split = _read_json(data_split_name) - lr_scales = _get_lr_scale_from_split_json(data_split) - - # adjust file contents according to each job's specs - _update_client_config(client_config, args, lr_scales[site_name], site_name) - client_config_filename = dst_config_path / JobConstants.CLIENT_JOB_CONFIG - _write_json(client_config, client_config_filename) - - # copy custom file - _copy_custom_files(src_job_path, src_app_name, dst_job_path, dst_app_name) - - -def main(): - parser = job_config_args_parser() - args = parser.parse_args() - job_name = _get_job_name(args) - src_job_path = _get_src_job_dir(args.training_algo) - - # create a new job - dst_job_path = XGB_EXAMPLE_ROOT / ALGO_DIR_MAP[args.training_algo] / JOB_CONFIGS_ROOT / job_name - if not os.path.exists(dst_job_path): - os.makedirs(dst_job_path) - - # update meta - meta_config_dst = dst_job_path / JobConstants.META_FILE - meta_config = _read_json(src_job_path / JobConstants.META_FILE) - _update_meta(meta_config, args) - _write_json(meta_config, meta_config_dst) - - # create server side app - create_server_app( - src_job_path=src_job_path, src_app_name="app", dst_job_path=dst_job_path, site_name="server", args=args - ) - - # create client side app - for i in range(1, args.site_num + 1): - create_client_app( - src_job_path=src_job_path, - src_app_name="app", - dst_job_path=dst_job_path, - site_name=f"{args.site_name_prefix}{i}", - args=args, - ) - - -if __name__ == "__main__": - main() diff --git a/nvflare/app_opt/psi/dh_psi/dh_psi_task_handler.py b/nvflare/app_opt/psi/dh_psi/dh_psi_task_handler.py index cc433954ea..5d84224534 100644 --- a/nvflare/app_opt/psi/dh_psi/dh_psi_task_handler.py +++ b/nvflare/app_opt/psi/dh_psi/dh_psi_task_handler.py @@ -49,6 +49,8 @@ def __init__(self, local_psi_id: str): self.local_psi_handler: Optional[PSI] = None self.client_name = None self.items = None + # needed by JobAPI, add the following line to the constructor + self.local_psi_id = local_psi_id def initialize(self, fl_ctx: FLContext): super().initialize(fl_ctx)