Merge branch '2.5' into 2.5_split

NVIDIA · Oct 10, 2024 · 49cb1dd · 49cb1dd
2 parents 7205a22 + e970777
commit 49cb1dd
Show file tree

Hide file tree

Showing 190 changed files with 4,769 additions and 3,381 deletions.
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
@@ -1,27 +1,34 @@
-name: Deploy to GitHub Pages
+name: Deploy to gh-pages
 
 on:
-  # Trigger the workflow every time you push to the `main` branch
-  # Using a different branch name? Replace `main` with your branch’s name
+  # Trigger the workflow if any web/** files are modified
   push:
-    branches: [ main ]
-  # Allows you to run this workflow manually from the Actions tab on GitHub.
+    branches:
+      - "main"
+      - "2.5"
+    paths:
+      - 'web/**'
   workflow_dispatch:
 
 env:
   site_path: ./web
+  version_path: /
 
 # Allow this job to clone the repo and create a page deployment
 permissions:
-  contents: read
+  contents: write
   pages: write
   id-token: write
 
 jobs:
   build:
     runs-on: ubuntu-latest
     steps:
-      - name: Checkout your repository using git
+      - name: Update version_path for non-main branches
+        if: ${{ github.ref_type == 'branch' && github.ref_name != 'main'}}
+        run: echo version_path=/version/${{ github.ref_name }}/ >> $GITHUB_ENV
+
+      - name: Checkout your repository
         uses: actions/checkout@v4
 
       - name: Setup Node
@@ -31,28 +38,20 @@ jobs:
           cache: npm
           cache-dependency-path: "${{ env.site_path }}/package-lock.json"
 
-      - name: Install
-        shell: "bash"
-        working-directory: ${{ env.site_path }}
+      - name: Install dependencies
         run: npm install
-
-      - name: Build
-        shell: "bash"
         working-directory: ${{ env.site_path }}
+
+      - name: Build project
         run: npm run build
+        env:
+          PUBLIC_GH_BRANCH: ${{ github.ref_name }}
+        working-directory: ${{ env.site_path }}
 
-      - name: Upload Pages Artifact
-        uses: actions/upload-pages-artifact@v3
+      - name: Deploy
+        uses: JamesIves/github-pages-[email protected]
         with:
-          path: "${{ env.site_path }}/dist/"
-
-  deploy:
-    needs: build
-    runs-on: ubuntu-latest
-    environment:
-      name: github-pages
-      url: ${{ steps.deployment.outputs.page_url }}
-    steps:
-      - name: Deploy to GitHub Pages
-        id: deployment
-        uses: actions/deploy-pages@v4
+          branch: gh-pages
+          folder: ${{ env.site_path }}/dist
+          target-folder: ${{ env.version_path }}
+          clean-exclude: version
diff --git a/ci/run_integration.sh b/ci/run_integration.sh
@@ -45,8 +45,14 @@ remove_pipenv() {
 
 integration_test_tf() {
     echo "Run TF integration test..."
-    # not using pipenv because we need tensorflow package from the container
-    python -m pip install -e .[dev]
+    # since running directly in container, point python to python3.12
+    ln -sfn /usr/bin/python3.12 /usr/bin/python
+    ln -sfn /usr/bin/python3.12 /usr/bin/python3
+    # somehow the base container has blinker which should be removed
+    apt remove -y python3-blinker python-blinker-doc || true
+    # pipenv does not work with TensorFlow so using pip
+    python3.12 -m pip install -e .[dev]
+    python3.12 -m pip install tensorflow[and-cuda]
     export PYTHONPATH=$PWD
     testFolder="tests/integration_test"
     clean_up_snapshot_and_job

diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -1,3 +1,7 @@
+# For Running NVIDIA FLARE in a Docker container, see
+# https://nvflare.readthedocs.io/en/main/quickstart.html#containerized-deployment-with-docker
+# This Dockerfile is primarily for building Docker images to publish for dashboard.
+
 FROM python:3.8
 RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -y install zip
 COPY nvflare /opt/NVFlare/nvflare

diff --git a/docs/examples/federated_statistics_overview.rst b/docs/examples/federated_statistics_overview.rst
@@ -177,7 +177,3 @@ Summary
 We provided federated statistics operators that can easily aggregate and visualize the local statistics for different data site and features.
 We hope this feature will make it easier to perform federated data analysis. For more details, please look at :github_nvflare_link:`Federated Statistics (Github) <examples/advanced/federated-statistics/README.md>`
 
-Previous Versions of Federated XGBoost
---------------------------------------
-
-   - `Federated XGBoost for 2.2 <https://github.com/NVIDIA/NVFlare/tree/2.2/examples/xgboost>`_
diff --git a/docs/examples/hello_world_examples.rst b/docs/examples/hello_world_examples.rst
@@ -5,11 +5,11 @@ Hello World examples can be run from the :github_nvflare_link:`hello_world noteb
 
 .. toctree::
 
-   Deep Learning to Federated Learning (GitHub) <https://github.com/NVIDIA/NVFlare/blob/main/examples/hello-world/ml-to-fl>
-   Step-by-Step Examples (GitHub) <https://github.com/NVIDIA/NVFlare/blob/main/examples/hello-world/step-by-step>
+   :github_nvflare_link:`Deep Learning to Federated Learning (GitHub) <examples/hello-world/ml-to-fl>`
+   :github_nvflare_link:`Step-by-Step Examples (GitHub) <examples/hello-world/step-by-step>`
    hello_fedavg_numpy
    hello_cross_val
-   Hello Cyclic Weight Transfer (GitHub) <https://github.com/NVIDIA/NVFlare/blob/main/examples/hello-world/hello-cyclic>
+   :github_nvflare_link:`Hello Cyclic Weight Transfer (GitHub) <examples/hello-world/hello-cyclic>`
    hello_pt_job_api
    hello_tf_job_api
-   Hello Client Controlled Workflow (GitHub) <https://github.com/NVIDIA/NVFlare/blob/main/examples/hello-world/hello-ccwf>
+   :github_nvflare_link:`Hello Client Controlled Workflow (GitHub) <examples/hello-world/hello-ccwf>`
diff --git a/docs/examples/medical_image_analysis.rst b/docs/examples/medical_image_analysis.rst
@@ -4,6 +4,6 @@ Medical Image Analysis
 
 .. toctree::
 
-   Hello MONAI Bundle (GitHub) <https://github.com/NVIDIA/NVFlare/tree/main/integration/monai/examples/spleen_ct_segmentation_sim>
-   Differential Privacy for BraTS18 Segmentation (GitHub) <https://github.com/NVIDIA/NVFlare/tree/main/examples/advanced/brats18>
-   Prostate Segmentation from Multi-source Data (GitHub) <https://github.com/NVIDIA/NVFlare/tree/main/examples/advanced/prostate>
+   github_nvflare_link:`Hello MONAI Bundle (GitHub) <integration/monai/examples/spleen_ct_segmentation_sim>`
+   github_nvflare_link:`Differential Privacy for BraTS18 Segmentation (GitHub) <examples/advanced/brats18>`
+   github_nvflare_link:`Prostate Segmentation from Multi-source Data (GitHub) <examples/advanced/prostate>`
diff --git a/docs/examples/tutorial_notebooks.rst b/docs/examples/tutorial_notebooks.rst
@@ -4,6 +4,6 @@ Tutorial Notebooks
 
 .. toctree::
 
-   FL Simulator Notebook (GitHub) <https://github.com/NVIDIA/NVFlare/blob/main/examples/tutorials/flare_simulator.ipynb>
-   Hello FLARE API Notbook (GitHub) <https://github.com/NVIDIA/NVFlare/blob/main/examples/tutorials/flare_api.ipynb>
-   NVFLARE in POC Mode (GitHub) <https://github.com/NVIDIA/NVFlare/blob/main/examples/tutorials/setup_poc.ipynb>
+   :github_nvflare_link:`FL Simulator Notebook (GitHub) <examples/tutorials/flare_simulator.ipynb>`
+   :github_nvflare_link:`Hello FLARE API Notebook (GitHub) <examples/tutorials/flare_api.ipynb>`
+   :github_nvflare_link:`NVFLARE POC Mode in detail Notebook (GitHub) <examples/tutorials/setup_poc.ipynb>`
diff --git a/docs/index.rst b/docs/index.rst
@@ -10,7 +10,7 @@ NVIDIA FLARE
    fl_introduction
    flare_overview
    whats_new
-   getting_started
+   Getting Started <quickstart>
 
 .. toctree::
    :maxdepth: -1

diff --git a/docs/programming_guide/controllers/model_controller.rst b/docs/programming_guide/controllers/model_controller.rst
@@ -228,8 +228,9 @@ For example we can use PyTorch's save and load functions for the model parameter
             return model
 
 
-Note: for non-primitive data types such as ``torch.nn.Module`` (used for the initial PyTorch model), we must configure a corresponding FOBS decomposer for serialization and deserialization.
-Read more at :github_nvflare_link:`Flare Object Serializer (FOBS) <nvflare/fuel/utils/fobs/README.rst>`.
+Note: for non-primitive data types such as ``torch.nn.Module`` (used for the initial PyTorch model),
+we must configure a corresponding FOBS decomposer for serialization and deserialization.
+Read more at :ref:`serialization`.
 
 .. code-block:: python
 

diff --git a/docs/programming_guide/execution_api_type/client_api.rst b/docs/programming_guide/execution_api_type/client_api.rst
@@ -261,7 +261,24 @@ Client API configuration.  You can further nail down the selection by choice of
 in-process or not, type of models ( GNN, NeMo LLM), workflow patterns ( Swarm learning or standard fedavg with scatter and gather (sag)) etc.
 
 
+Custom Data Class Serialization/Deserialization
+===============================================
 
+To pass data in the form of a custom class, you can leverage the serialization tool inside NVFlare.
 
+For example:
 
+.. code-block:: python
+
+    class CustomClass:
+        def __init__(self, x, y):
+            self.x = 1
+            self.y = 2
+
+If you are using classes derived from ``Enum`` or dataclass, they will be handled by the default decomposers.
+For other custom classes, you will need to write a dedicated custom decomposer and ensure it is registered
+using fobs.register on both the server side and client side, as well as in train.py.
+
+Please note that for the custom data class to work, it must be placed in a separate file from train.py.
 
+For more details on serialization, please refer to :ref:`serialization`.
diff --git a/docs/quickstart.rst b/docs/quickstart.rst
@@ -93,7 +93,7 @@ Clone NVFlare repo to get examples, and switch to either the main branch or the
 
   $ git clone https://github.com/NVIDIA/NVFlare.git
   $ cd NVFlare
-  $ git switch 2.4
+  $ git switch 2.5
 
 Note on branches:
 

diff --git a/docs/release_notes/flare_250.rst b/docs/release_notes/flare_250.rst
@@ -60,10 +60,13 @@ learnt by the central aggregation server.
 With our encryption plugins working with XGBoost, NVFlare now supports all secure federated schemes for XGBoost model training, with
 both CPU and GPU.
 
+Please check `federated xgboost with nvflare user guide <https://nvflare.readthedocs.io/en/main/user_guide/federated_xgboost.html>`
+and the :github_nvflare_link:`example <examples/advanced/xgboost_secure>`
+
 Tensorflow support
 ==================
-With community contributions, we add FedOpt, FedProx and Scaffold algorithms using Tensorflow to create parity with Pytorch. You
-can them :github_nvflare_link:`here <nvflare/app_opt/tf>`.
+With community contributions, we add FedOpt, FedProx and Scaffold algorithms using Tensorflow.
+You can check the code :github_nvflare_link:`here <nvflare/app_opt/tf>` and the :github_nvflare_link:`example <examples/getting_started/tf>`
 
 FOBS Auto Registration
 ======================
@@ -121,7 +124,7 @@ FedOpt, FedProx, Scaffold implementation for Tensorflow.
 
 FedBN: Federated Learning on Non-IID Features via Local Batch Normalization
 ---------------------------------------------------------------------------
-The `FedBN example <https://github.com/NVIDIA/NVFlare/tree/main/research/fed-bn>`_ showcases a federated learning algorithm designed
+The :github_nvflare_link:`FedBN example <research/fed-bn>` showcases a federated learning algorithm designed
 to address the feature shift problem when aggregating models across different data distributions.
 
 In this work, we propose an effective method that uses local batch normalization to alleviate the feature shift before averaging models.
@@ -131,7 +134,7 @@ are supported by a convergence analysis that shows in a simplified setting that
 
 End-to-end Federated XGBoost examples
 -------------------------------------
-In `this example <https://github.com/NVIDIA/NVFlare/blob/5fc5ff31f35be63330dec38e1c4e80a6f84586ed/examples/advanced/finance-end-to-end/xgboost.ipynb>`__,
+In :github_nvflare_link:`this example <examples/advanced/finance-end-to-end/xgboost.ipynb>`,
 we try to show that end-to-end process of feature engineering, pre-processing and training in federated settings. You
 can use FLARE to perform federated ETL and then training. 
 

diff --git a/docs/resources/Dockerfile b/docs/resources/Dockerfile
@@ -1,7 +1,7 @@
-ARG PYTORCH_IMAGE=nvcr.io/nvidia/pytorch:24.03-py3
+ARG PYTORCH_IMAGE=nvcr.io/nvidia/pytorch:24.07-py3
 FROM ${PYTORCH_IMAGE}
 
-ARG NVF_VERSION=2.4
+ARG NVF_VERSION=2.5
 ENV NVF_BRANCH=${NVF_VERSION}
 
 RUN python3 -m pip install -U pip

diff --git a/docs/user_guide/federated_xgboost/secure_xgboost_user_guide.rst b/docs/user_guide/federated_xgboost/secure_xgboost_user_guide.rst
@@ -21,7 +21,7 @@ It supports federated training in the following 4 modes:
 When running with NVFlare, all the GRPC connections in XGBoost are local and the messages are forwarded to other clients through NVFlare's CellNet communication.
 The local GRPC ports are selected automatically by NVFlare.
 
-The encryption is handled in XGBoost by encryption plugins, which are external components that can be installed at runtime. The plugins are bundled with NVFlare.
+The encryption is handled in XGBoost by encryption plugins, which are external components that can be installed at runtime.
 
 Prerequisites
 =============
@@ -130,15 +130,14 @@ For vertical (column-split) training, the datasets on all clients contain differ
 XGBoost Plugin Configuration
 ============================
 XGBoost requires an encryption plugin to handle secure training.
-Two plugins are initially shipped with NVFlare,
 
 - **cuda_paillier**: The default plugin. This plugin uses GPU for cryptographic operations.
 - **nvflare**: This plugin forwards data locally to NVFlare process for encryption.
 
 .. note::
 
-   All clients must use the same plugin. When different plugins are used,
-   the XGBoost’s behavior is undetermined. It may cause the client to crash.
+   All clients must use the same plugin. When different plugins are used in different clients,
+   the behavior of federated XGBoost is undetermined, which can cause the job to crash.
 
 The **cuda_paillier** plugin requires NVIDIA GPUs that support compute capability 7.0 or higher. Also, CUDA
 12.2 or 12.4 must be installed. Please refer to https://developer.nvidia.com/cuda-gpus for more information.
@@ -226,10 +225,10 @@ The server_context.tenseal file is not needed.
 Building Encryption Plugins
 ===========================
 
-In case the included plugin files don't work for your environment, the plugins can be built from the source code.
+The plugins need to be built from the source code for your specific environment.
 
 To build the plugins, check out the NVFlare source code from https://github.com/NVIDIA/NVFlare and following the
-instructions in this document: https://github.com/NVIDIA/NVFlare/blob/main/integration/xgboost/encryption_plugins/README.md.
+instructions in :github_nvflare_link:`this document. <integration/xgboost/encryption_plugins/README.md>`
 
 Job Configuration
 =================

diff --git a/docs/user_guide/flower_integration/flower_job_structure.rst b/docs/user_guide/flower_integration/flower_job_structure.rst
@@ -29,7 +29,7 @@ The ``pyproject.toml`` file exists in the job's ``custom`` folder. It is an impo
 client app definition and configuration information. Such information is used by the Flower system to find the
 server app and the client app, and to pass app-specific configuration to the apps.
 
-Here is an example of ``pyproject.toml``, taken from https://github.com/NVIDIA/NVFlare/blob/main/examples/hello-world/hello-flower/jobs/hello-flwr-pt/app/custom/pyproject.toml.
+Here is an example of ``pyproject.toml``, taken from :github_nvflare_link:`this example <examples/hello-world/hello-flower/jobs/hello-flwr-pt/app/custom/pyproject.toml>`.
 
 .. code-block:: toml
 

diff --git a/docs/user_guide/security/data_privacy_protection.rst b/docs/user_guide/security/data_privacy_protection.rst
@@ -14,4 +14,4 @@ general-purpose data :ref:`filtering mechanism <filters>` for processing task da
 This mechanism has been used for the purpose of data privacy protection on the client side. For example, differential
 privacy filters can be applied to model weights before sending to the server for aggregation.
 
-NVFLARE has implemented some commonly used privacy protection filters: https://github.com/NVIDIA/NVFlare/tree/main/nvflare/app_common/filters
+NVFLARE has implemented github_nvflare_link:`some commonly used privacy protection filters. <nvflare/app_common/filters>`