From 7043c2abbeb6c6e4a41e34c209aca4ba9fe9d07a Mon Sep 17 00:00:00 2001 From: Yuhong Wen Date: Thu, 10 Oct 2024 12:25:35 -0400 Subject: [PATCH 1/3] Remove the need to create additinal ports when running a job. (#3017) --- nvflare/private/fed/app/simulator/simulator_runner.py | 2 -- nvflare/private/fed/client/client_engine.py | 4 ---- nvflare/private/fed/client/client_executor.py | 6 ------ nvflare/private/fed/server/server_engine.py | 7 ------- nvflare/private/fed/simulator/simulator_client_engine.py | 2 -- 5 files changed, 21 deletions(-) diff --git a/nvflare/private/fed/app/simulator/simulator_runner.py b/nvflare/private/fed/app/simulator/simulator_runner.py index b7b63f9349..1d19fac603 100644 --- a/nvflare/private/fed/app/simulator/simulator_runner.py +++ b/nvflare/private/fed/app/simulator/simulator_runner.py @@ -450,8 +450,6 @@ def simulator_run_main(self): try: self.create_clients() self.server.engine.run_processes[SimulatorConstants.JOB_NAME] = { - RunProcessKey.LISTEN_PORT: None, - RunProcessKey.CONNECTION: None, RunProcessKey.CHILD_PROCESS: None, RunProcessKey.JOB_ID: SimulatorConstants.JOB_NAME, RunProcessKey.PARTICIPANTS: self.server.engine.client_manager.clients, diff --git a/nvflare/private/fed/client/client_engine.py b/nvflare/private/fed/client/client_engine.py index 6f7c8d6690..6c4acb92e9 100644 --- a/nvflare/private/fed/client/client_engine.py +++ b/nvflare/private/fed/client/client_engine.py @@ -24,7 +24,6 @@ from nvflare.apis.fl_constant import FLContextKey, MachineStatus, SystemComponents, WorkspaceConstants from nvflare.apis.fl_context import FLContext, FLContextManager from nvflare.apis.workspace import Workspace -from nvflare.fuel.utils.network_utils import get_open_ports from nvflare.private.defs import ERROR_MSG_PREFIX, ClientStatusKey, EngineConstant from nvflare.private.event import fire_event from nvflare.private.fed.server.job_meta_validator import JobMetaValidator @@ -161,15 +160,12 @@ def start_app( self.logger.info("Starting client app. rank: {}".format(self.rank)) - open_port = get_open_ports(1)[0] - server_config = list(self.client.servers.values())[0] self.client_executor.start_app( self.client, job_id, self.args, app_custom_folder, - open_port, allocated_resource, token, resource_manager, diff --git a/nvflare/private/fed/client/client_executor.py b/nvflare/private/fed/client/client_executor.py index 8830cdbf9e..b93aaf637d 100644 --- a/nvflare/private/fed/client/client_executor.py +++ b/nvflare/private/fed/client/client_executor.py @@ -42,7 +42,6 @@ def start_app( job_id, args, app_custom_folder, - listen_port, allocated_resource, token, resource_manager, @@ -56,7 +55,6 @@ def start_app( job_id: the job_id args: admin command arguments for starting the FL client training app_custom_folder: FL application custom folder - listen_port: port to listen the command. allocated_resource: allocated resources token: token from resource manager resource_manager: resource manager @@ -149,7 +147,6 @@ def start_app( job_id, args, app_custom_folder, - listen_port, allocated_resource, token, resource_manager: ResourceManagerSpec, @@ -163,7 +160,6 @@ def start_app( job_id: the job_id args: admin command arguments for starting the worker process app_custom_folder: FL application custom folder - listen_port: port to listen the command. allocated_resource: allocated resources token: token from resource manager resource_manager: resource manager @@ -208,8 +204,6 @@ def start_app( with self.lock: self.run_processes[job_id] = { - RunProcessKey.LISTEN_PORT: listen_port, - RunProcessKey.CONNECTION: None, RunProcessKey.CHILD_PROCESS: process, RunProcessKey.STATUS: ClientStatus.STARTING, } diff --git a/nvflare/private/fed/server/server_engine.py b/nvflare/private/fed/server/server_engine.py index 65393b7e94..d59128cd81 100644 --- a/nvflare/private/fed/server/server_engine.py +++ b/nvflare/private/fed/server/server_engine.py @@ -50,7 +50,6 @@ from nvflare.fuel.f3.cellnet.defs import MessageHeaderKey from nvflare.fuel.f3.cellnet.defs import ReturnCode as CellMsgReturnCode from nvflare.fuel.utils.argument_utils import parse_vars -from nvflare.fuel.utils.network_utils import get_open_ports from nvflare.fuel.utils.zip_utils import zip_directory_to_bytes from nvflare.private.admin_defs import Message, MsgHeader from nvflare.private.defs import CellChannel, CellMessageHeaderKeys, RequestHeader, TrainingTopic, new_cell_message @@ -179,13 +178,11 @@ def start_app_on_server(self, run_number: str, job: Job = None, job_clients=None if not isinstance(job, Job): return "Must provide a job object to start the server app." - open_ports = get_open_ports(2) self._start_runner_process( self.args, app_root, run_number, app_custom_folder, - open_ports, job.job_id, job_clients, snapshot, @@ -233,7 +230,6 @@ def _start_runner_process( app_root, run_number, app_custom_folder, - open_ports, job_id, job_clients, snapshot, @@ -244,7 +240,6 @@ def _start_runner_process( if app_custom_folder != "": add_custom_dir_to_path(app_custom_folder, new_env) - listen_port = open_ports[1] if snapshot: restore_snapshot = True else: @@ -289,8 +284,6 @@ def _start_runner_process( with self.lock: self.run_processes[run_number] = { - RunProcessKey.LISTEN_PORT: listen_port, - RunProcessKey.CONNECTION: None, RunProcessKey.CHILD_PROCESS: process, RunProcessKey.JOB_ID: job_id, RunProcessKey.PARTICIPANTS: job_clients, diff --git a/nvflare/private/fed/simulator/simulator_client_engine.py b/nvflare/private/fed/simulator/simulator_client_engine.py index 8a7fa16377..fd0a3969c1 100644 --- a/nvflare/private/fed/simulator/simulator_client_engine.py +++ b/nvflare/private/fed/simulator/simulator_client_engine.py @@ -25,8 +25,6 @@ def __init__(self, client, args, rank=0): fl_ctx.set_prop(FLContextKey.SIMULATE_MODE, True, private=True, sticky=True) self.client_executor.run_processes[SimulatorConstants.JOB_NAME] = { - RunProcessKey.LISTEN_PORT: None, - RunProcessKey.CONNECTION: None, RunProcessKey.CHILD_PROCESS: None, RunProcessKey.STATUS: ClientStatus.STARTED, } From 36959795b9854cc795965d143e8008eebf334b83 Mon Sep 17 00:00:00 2001 From: Alessandro Giusa <148333702+agiusa@users.noreply.github.com> Date: Thu, 10 Oct 2024 19:07:23 +0200 Subject: [PATCH 2/3] Fixed broken doc ref to 'helm_chart' (#3022) --- docs/real_world_fl/overview.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/real_world_fl/overview.rst b/docs/real_world_fl/overview.rst index 23d494d0b1..637489b58b 100644 --- a/docs/real_world_fl/overview.rst +++ b/docs/real_world_fl/overview.rst @@ -68,7 +68,7 @@ For advanced users, you can customize your provision with additional behavior th - **Zip**: To create password protected zip archives for the startup kits, see :ref:`distribution_builder` - **Docker-compose**: Provision to launch NVIDIA FLARE system via docker containers. You can customize the provisioning process and ask the provisioner to generate a docker-compose file. This can be found in :ref:`docker_compose`. - **Docker**: Provision to launch NVIDIA FLARE system via docker containers. If you just want to use docker files, see :ref:`containerized_deployment`. - - **Helm**: To change the provisioning tool to generate an NVIDIA FLARE Helm chart for Kubernetes deployment, see :ref:` helm_chart`. + - **Helm**: To change the provisioning tool to generate an NVIDIA FLARE Helm chart for Kubernetes deployment, see :ref:`helm_chart`. - **CUSTOM**: you can build custom builders specific to your needs like in :ref:`distribution_builder`. Package distribution From 57d843ab77756fb524172500fc1fff0a5999c242 Mon Sep 17 00:00:00 2001 From: Sean Yang Date: Thu, 10 Oct 2024 11:01:04 -0700 Subject: [PATCH 3/3] add none default values (#3025) --- nvflare/app_opt/pt/job_config/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nvflare/app_opt/pt/job_config/model.py b/nvflare/app_opt/pt/job_config/model.py index 5d6782089e..d050148cf8 100644 --- a/nvflare/app_opt/pt/job_config/model.py +++ b/nvflare/app_opt/pt/job_config/model.py @@ -24,7 +24,7 @@ class PTModel: - def __init__(self, model, persistor: Optional[ModelPersistor], locator: Optional[ModelLocator]): + def __init__(self, model, persistor: Optional[ModelPersistor] = None, locator: Optional[ModelLocator] = None): """PyTorch model wrapper. If model is an nn.Module, add a PTFileModelPersistor with the model and a TFModelPersistor.