diff --git a/integration/monai/examples/mednist/monai_mednist_train.py b/integration/monai/examples/mednist/code/monai_mednist_train.py similarity index 100% rename from integration/monai/examples/mednist/monai_mednist_train.py rename to integration/monai/examples/mednist/code/monai_mednist_train.py diff --git a/integration/monai/examples/mednist/monai_101_fl.ipynb b/integration/monai/examples/mednist/monai_101_fl.ipynb index b2bf46fa7f..f247e121b7 100644 --- a/integration/monai/examples/mednist/monai_101_fl.ipynb +++ b/integration/monai/examples/mednist/monai_101_fl.ipynb @@ -56,9 +56,76 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\n", + " File \"\", line 1, in \n", + "ModuleNotFoundError: No module named 'monai'\n", + "Collecting nvflare\n", + " Using cached nvflare-2.4.0-py3-none-any.whl (2.1 MB)\n", + "Requirement already satisfied: numpy in /home/hroth/.venv_monai/lib/python3.10/site-packages (from nvflare) (1.26.4)\n", + "Collecting grpcio==1.51.1\n", + " Using cached grpcio-1.51.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.8 MB)\n", + "Requirement already satisfied: six>=1.15.0 in /home/hroth/.venv_monai/lib/python3.10/site-packages (from nvflare) (1.16.0)\n", + "Collecting Flask-JWT-Extended==4.4.3\n", + " Using cached Flask_JWT_Extended-4.4.3-py2.py3-none-any.whl (22 kB)\n", + "Collecting gunicorn>=20.1.0\n", + " Downloading gunicorn-22.0.0-py3-none-any.whl (84 kB)\n", + "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.4/84.4 KB\u001b[0m \u001b[31m1.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hCollecting websockets>=10.4\n", + " Using cached websockets-12.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (130 kB)\n", + "Collecting cryptography>=36.0.0\n", + " Using cached cryptography-42.0.5-cp39-abi3-manylinux_2_28_x86_64.whl (4.6 MB)\n", + "Collecting Flask-SQLAlchemy==2.5.1\n", + " Using cached Flask_SQLAlchemy-2.5.1-py2.py3-none-any.whl (17 kB)\n", + "Collecting msgpack>=1.0.3\n", + " Downloading msgpack-1.0.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (385 kB)\n", + "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m385.1/385.1 KB\u001b[0m \u001b[31m12.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting docker>=6.0\n", + " Using cached docker-7.0.0-py3-none-any.whl (147 kB)\n", + "Collecting pyhocon\n", + " Using cached pyhocon-0.3.60-py3-none-any.whl\n", + "Collecting Flask==2.2.5\n", + " Using cached Flask-2.2.5-py3-none-any.whl (101 kB)\n", + "Collecting Werkzeug==2.2.2\n", + " Using cached Werkzeug-2.2.2-py3-none-any.whl (232 kB)\n", + "Collecting protobuf==3.20.3\n", + " Using cached protobuf-3.20.3-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.1 MB)\n", + "Requirement already satisfied: psutil>=5.9.1 in /home/hroth/.venv_monai/lib/python3.10/site-packages (from nvflare) (5.9.8)\n", + "Requirement already satisfied: requests>=2.28.0 in /home/hroth/.venv_monai/lib/python3.10/site-packages (from nvflare) (2.31.0)\n", + "Requirement already satisfied: PyYAML>=6.0 in /home/hroth/.venv_monai/lib/python3.10/site-packages (from nvflare) (6.0.1)\n", + "Collecting SQLAlchemy==1.4.31\n", + " Using cached SQLAlchemy-1.4.31-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.6 MB)\n", + "Collecting click>=8.0\n", + " Using cached click-8.1.7-py3-none-any.whl (97 kB)\n", + "Collecting itsdangerous>=2.0\n", + " Downloading itsdangerous-2.2.0-py3-none-any.whl (16 kB)\n", + "Requirement already satisfied: Jinja2>=3.0 in /home/hroth/.venv_monai/lib/python3.10/site-packages (from Flask==2.2.5->nvflare) (3.1.3)\n", + "Collecting PyJWT<3.0,>=2.0\n", + " Using cached PyJWT-2.8.0-py3-none-any.whl (22 kB)\n", + "Collecting greenlet!=0.4.17\n", + " Using cached greenlet-3.0.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (616 kB)\n", + "Requirement already satisfied: MarkupSafe>=2.1.1 in /home/hroth/.venv_monai/lib/python3.10/site-packages (from Werkzeug==2.2.2->nvflare) (2.1.5)\n", + "Requirement already satisfied: cffi>=1.12 in /home/hroth/.venv_monai/lib/python3.10/site-packages (from cryptography>=36.0.0->nvflare) (1.16.0)\n", + "Requirement already satisfied: packaging>=14.0 in /home/hroth/.venv_monai/lib/python3.10/site-packages (from docker>=6.0->nvflare) (24.0)\n", + "Requirement already satisfied: urllib3>=1.26.0 in /home/hroth/.venv_monai/lib/python3.10/site-packages (from docker>=6.0->nvflare) (2.2.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /home/hroth/.venv_monai/lib/python3.10/site-packages (from requests>=2.28.0->nvflare) (3.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /home/hroth/.venv_monai/lib/python3.10/site-packages (from requests>=2.28.0->nvflare) (2024.2.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /home/hroth/.venv_monai/lib/python3.10/site-packages (from requests>=2.28.0->nvflare) (3.3.2)\n", + "Collecting pyparsing<4,>=2\n", + " Downloading pyparsing-3.1.2-py3-none-any.whl (103 kB)\n", + "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m103.2/103.2 KB\u001b[0m \u001b[31m26.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: pycparser in /home/hroth/.venv_monai/lib/python3.10/site-packages (from cffi>=1.12->cryptography>=36.0.0->nvflare) (2.22)\n", + "Installing collected packages: Werkzeug, websockets, pyparsing, PyJWT, protobuf, msgpack, itsdangerous, gunicorn, grpcio, greenlet, click, SQLAlchemy, pyhocon, Flask, docker, cryptography, Flask-SQLAlchemy, Flask-JWT-Extended, nvflare\n", + "Successfully installed Flask-2.2.5 Flask-JWT-Extended-4.4.3 Flask-SQLAlchemy-2.5.1 PyJWT-2.8.0 SQLAlchemy-1.4.31 Werkzeug-2.2.2 click-8.1.7 cryptography-42.0.5 docker-7.0.0 greenlet-3.0.3 grpcio-1.51.1 gunicorn-22.0.0 itsdangerous-2.2.0 msgpack-1.0.8 nvflare-2.4.0 protobuf-3.20.3 pyhocon-0.3.60 pyparsing-3.1.2 websockets-12.0\n" + ] + } + ], "source": [ "!python -c \"import monai\" || pip install -q \"monai-weekly[ignite, tqdm]\"\n", "!pip install nvflare" @@ -74,123 +141,360 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "The following job templates are available: \n", + "\n", + "------------------------------------------------------------------------------------------------------------------------\n", + " name Description Controller Type Execution API Type \n", + "------------------------------------------------------------------------------------------------------------------------\n", + " cyclic_cc_pt client-controlled cyclic workflow with PyTorch ClientAPI tra client NA \n", + " cyclic_pt server-controlled cyclic workflow with PyTorch ClientAPI tra server NA \n", + " psi_csv private-set intersection for csv data server NA \n", + " sag_cross_np scatter & gather and cross-site validation using numpy server NA \n", + " sag_cse_pt scatter & gather workflow and cross-site evaluation with PyT server NA \n", + " sag_gnn scatter & gather workflow for gnn learning server NA \n", + " sag_nemo Scatter and Gather Workflow for NeMo server NA \n", + " sag_np scatter & gather workflow using numpy server NA \n", + " sag_np_cell_pipe scatter & gather workflow using numpy server NA \n", + " sag_np_metrics scatter & gather workflow using numpy server NA \n", + " sag_pt scatter & gather workflow using pytorch server NA \n", + " sag_pt_deploy_map SAG workflow with pytorch, deploy_map, site-specific configs server NA \n", + " sag_pt_executor scatter & gather workflow and cross-site evaluation with PyT server NA \n", + " sag_pt_mlflow scatter & gather workflow using pytorch with MLflow tracking server NA \n", + " sag_pt_model_learner scatter & gather workflow and cross-site evaluation with PyT server NA \n", + " sag_tf scatter & gather workflow using TensorFlow server NA \n", + " sklearn_kmeans scikit-learn KMeans model server NA \n", + " sklearn_linear scikit-learn linear model server NA \n", + " sklearn_svm scikit-learn SVM model server NA \n", + " stats_df FedStats: tabular data with pandas server NA \n", + " stats_image FedStats: image intensity histogram server NA \n", + " swarm_cse_pt Swarm Learning with Cross-Site Evaluation with PyTorch client NA \n", + " swarm_cse_pt_model_l Swarm Learning with Cross-Site Evaluation with PyTorch Model client NA \n", + " vertical_xgb vertical federated xgboost server NA \n", + " xgboost_tree xgboost horizontal tree-based collaboration model server NA \n", + "------------------------------------------------------------------------------------------------------------------------\n" + ] + } + ], "source": [ - "!nvflare config -jt ../../../../job_templates/" + "!nvflare config -jt ../../../../job_templates/\n", + "!nvflare job list_templates" ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "## Prepare datasets using MONAI Apps\n", - "\n", - "We use `MedNISTDataset` in MONAI Apps to download a dataset to the specified directory and perform the pre-processing steps in the `monai.transforms` compose.\n", - "\n", - "The MedNIST dataset was gathered from several sets from [TCIA](https://wiki.cancerimagingarchive.net/display/Public/Data+Usage+Policies+and+Restrictions),\n", - "[the RSNA Bone Age Challenge](http://rsnachallenges.cloudapp.net/competitions/4),\n", - "and [the NIH Chest X-ray dataset](https://cloud.google.com/healthcare/docs/resources/public-datasets/nih-chest).\n", - "\n", - "The dataset is kindly made available by [Dr. Bradley J. Erickson M.D., Ph.D.](https://www.mayo.edu/research/labs/radiology-informatics/overview) (Department of Radiology, Mayo Clinic)\n", - "under the Creative Commons [CC BY-SA 4.0 license](https://creativecommons.org/licenses/by-sa/4.0/).\n", - "\n", - "If you use the MedNIST dataset, please acknowledge the source. " + "We will use the in-process client API, we choose the [sag_pt in_proc job template](../../../job_templates/sag_pt_in_proc) and run the following command to create the job:" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "The following are the variables you can change in the template\n", + "\n", + "---------------------------------------------------------------------------------------------------------------------------------------\n", + " \n", + " job folder: ./jobs/client_api \n", + " \n", + "---------------------------------------------------------------------------------------------------------------------------------------\n", + " file_name var_name value component \n", + "---------------------------------------------------------------------------------------------------------------------------------------\n", + " meta.conf app ['@ALL'] \n", + " meta.conf mandatory_clients [] \n", + " meta.conf min_clients 2 \n", + "\n", + " config_fed_client.conf app_config \n", + " config_fed_client.conf app_script monai_mednist_train.py \n", + " config_fed_client.conf events_to_convert ['analytix_log_stats'] \n", + " config_fed_client.conf fed_event_prefix fed. \n", + " config_fed_client.conf log_pull_interval 0.1 \n", + " config_fed_client.conf params_transfer_type DIFF \n", + " config_fed_client.conf result_pull_interval 0.5 \n", + " config_fed_client.conf task_script_args {app_config} \n", + " config_fed_client.conf task_script_path {app_script} \n", + " config_fed_client.conf train_with_evaluation True \n", + "\n", + " config_fed_server.conf \"mlflow.note.content\" ## Federated Experiment tracking wi \n", + " config_fed_server.conf allow_empty_global_weights False ScatterAndGather \n", + " config_fed_server.conf artifact_location artifacts MLflowReceiver \n", + " config_fed_server.conf best_global_model_file_name best_FL_global_model.pt PTFileModelPersistor \n", + " config_fed_server.conf events ['fed.analytix_log_stats'] MLflowReceiver \n", + " config_fed_server.conf expected_data_kind WEIGHT_DIFF InTimeAccumulateWeightedAggregator \n", + " config_fed_server.conf experiment_name nvflare-sag-pt-experiment \n", + " config_fed_server.conf global_model_file_name FL_global_model.pt PTFileModelPersistor \n", + " config_fed_server.conf ignore_result_error False ScatterAndGather \n", + " config_fed_server.conf key_metric accuracy IntimeModelSelector \n", + " config_fed_server.conf min_clients 2 ScatterAndGather \n", + " config_fed_server.conf model_class_path net.Net \n", + " config_fed_server.conf negate_key_metric False IntimeModelSelector \n", + " config_fed_server.conf num_rounds 5 ScatterAndGather \n", + " config_fed_server.conf persist_every_n_rounds 1 ScatterAndGather \n", + " config_fed_server.conf run_name nvflare-sag-pt-with-mlflow \n", + " config_fed_server.conf snapshot_every_n_rounds 1 ScatterAndGather \n", + " config_fed_server.conf start_round 0 ScatterAndGather \n", + " config_fed_server.conf task_check_period 0.5 ScatterAndGather \n", + " config_fed_server.conf tracking_uri MLflowReceiver \n", + " config_fed_server.conf train_timeout 0 ScatterAndGather \n", + " config_fed_server.conf validation_metric_name initial_metrics IntimeModelSelector \n", + " config_fed_server.conf wait_time_after_min_received 0 ScatterAndGather \n", + " config_fed_server.conf weigh_by_local_iter False IntimeModelSelector \n", + "\n", + "---------------------------------------------------------------------------------------------------------------------------------------\n" + ] + } + ], "source": [ - "dataset = MedNISTDataset(root_dir=root_dir, transform=transform, section=\"training\", download=True)" + "!nvflare job create -force -j ./jobs/client_api -w sag_pt_in_proc -sd ./code/. \\\n", + " -f config_fed_client.conf app_script=monai_mednist_train.py\n", + " -f config_fed_server.conf net_" ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "## Define a network and a supervised trainer\n", - "\n", - "To train a model that can perform the classification task, we will use the DenseNet-121 which is known for its performance on the ImageNet dataset.\n", - "\n", - "For a typical supervised training workflow, MONAI provides `SupervisedTrainer` to define the hyper-parameters." + "Then we can run it using the NVFlare Simulator:" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# If available, we use GPU to speed things up.\n", - "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", - "\n", - "max_epochs = 5\n", - "model = densenet121(spatial_dims=2, in_channels=1, out_channels=6).to(DEVICE)\n", - "\n", - "logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n", - "trainer = SupervisedTrainer(\n", - " device=torch.device(DEVICE),\n", - " max_epochs=max_epochs,\n", - " train_data_loader=DataLoader(dataset, batch_size=512, shuffle=True, num_workers=4),\n", - " network=model,\n", - " optimizer=torch.optim.Adam(model.parameters(), lr=1e-5),\n", - " loss_function=torch.nn.CrossEntropyLoss(),\n", - " inferer=SimpleInferer(),\n", - " train_handlers=StatsHandler(),\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", + "execution_count": 5, "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-04-24 14:52:20,529 - SimulatorRunner - INFO - Create the Simulator Server.\n", + "2024-04-24 14:52:20,531 - CoreCell - INFO - server: creating listener on tcp://0:32795\n", + "2024-04-24 14:52:20,545 - CoreCell - INFO - server: created backbone external listener for tcp://0:32795\n", + "2024-04-24 14:52:20,546 - ConnectorManager - INFO - 2446133: Try start_listener Listener resources: {'secure': False, 'host': 'localhost'}\n", + "2024-04-24 14:52:20,546 - nvflare.fuel.f3.sfm.conn_manager - INFO - Connector [CH00002 PASSIVE tcp://0:11500] is starting\n", + "2024-04-24 14:52:21,047 - CoreCell - INFO - server: created backbone internal listener for tcp://localhost:11500\n", + "2024-04-24 14:52:21,048 - nvflare.fuel.f3.sfm.conn_manager - INFO - Connector [CH00001 PASSIVE tcp://0:32795] is starting\n", + "2024-04-24 14:52:21,123 - nvflare.fuel.hci.server.hci - INFO - Starting Admin Server localhost on Port 40957\n", + "2024-04-24 14:52:21,123 - SimulatorRunner - INFO - Deploy the Apps.\n", + "2024-04-24 14:52:21,134 - SimulatorRunner - INFO - Create the simulate clients.\n", + "2024-04-24 14:52:21,137 - ClientManager - INFO - Client: New client site-1@192.168.1.203 joined. Sent token: 9c2877a9-bcb4-437d-a13e-2c64b1b164ef. Total clients: 1\n", + "2024-04-24 14:52:21,138 - FederatedClient - INFO - Successfully registered client:site-1 for project simulator_server. Token:9c2877a9-bcb4-437d-a13e-2c64b1b164ef SSID:\n", + "2024-04-24 14:52:21,139 - ClientManager - INFO - Client: New client site-2@192.168.1.203 joined. Sent token: d0d86230-d053-42fc-a356-1b86a43ce160. Total clients: 2\n", + "2024-04-24 14:52:21,139 - FederatedClient - INFO - Successfully registered client:site-2 for project simulator_server. Token:d0d86230-d053-42fc-a356-1b86a43ce160 SSID:\n", + "2024-04-24 14:52:21,139 - SimulatorRunner - INFO - Set the client status ready.\n", + "2024-04-24 14:52:21,139 - SimulatorRunner - INFO - Deploy and start the Server App.\n", + "2024-04-24 14:52:21,141 - Cell - INFO - Register blob CB for channel='server_command', topic='*'\n", + "2024-04-24 14:52:21,141 - Cell - INFO - Register blob CB for channel='aux_communication', topic='*'\n", + "2024-04-24 14:52:21,141 - ServerCommandAgent - INFO - ServerCommandAgent cell register_request_cb: server.simulate_job\n", + "2024-04-24 14:52:22,289 - JsonScanner - ERROR - Traceback (most recent call last):\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/fuel/utils/component_builder.py\", line 74, in build_component\n", + " t = self.build_component(v)\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/private/fed/server/server_json_config.py\", line 157, in build_component\n", + " t = super().build_component(config_dict)\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/fuel/utils/component_builder.py\", line 86, in build_component\n", + " return instantiate_class(class_path, class_args)\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/fuel/utils/class_utils.py\", line 51, in instantiate_class\n", + " c = get_class(class_path)\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/fuel/utils/class_utils.py\", line 30, in get_class\n", + " module_ = importlib.import_module(module_name)\n", + " File \"/usr/lib/python3.10/importlib/__init__.py\", line 126, in import_module\n", + " return _bootstrap._gcd_import(name[level:], package, level)\n", + " File \"\", line 1050, in _gcd_import\n", + " File \"\", line 1027, in _find_and_load\n", + " File \"\", line 1004, in _find_and_load_unlocked\n", + "ModuleNotFoundError: No module named 'net'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/fuel/utils/json_scanner.py\", line 99, in _do_scan\n", + " node.processor.process_element(node)\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/private/json_configer.py\", line 150, in process_element\n", + " self.process_config_element(self.config_ctx, node)\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/private/fed/server/server_json_config.py\", line 101, in process_config_element\n", + " FedJsonConfigurator.process_config_element(self, config_ctx, node)\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/private/fed_json_config.py\", line 91, in process_config_element\n", + " c = self.authorize_and_build_component(element, config_ctx, node)\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/private/json_configer.py\", line 103, in authorize_and_build_component\n", + " return self.build_component(config_dict)\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/private/fed/server/server_json_config.py\", line 157, in build_component\n", + " t = super().build_component(config_dict)\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/fuel/utils/component_builder.py\", line 77, in build_component\n", + " raise ValueError(f\"failed to instantiate class: {secure_format_exception(e)} \")\n", + "ValueError: failed to instantiate class: ModuleNotFoundError: No module named 'net' \n", + "\n", + "Error processing config ['/media/hroth/NVIDIA/home_old/hroth/Code2/nvflare/monai_flywheel/integration/monai/examples/mednist/client_api_workspace/simulate_job/app_server/config/config_fed_server.json']: ConfigError: Error processing '/media/hroth/NVIDIA/home_old/hroth/Code2/nvflare/monai_flywheel/integration/monai/examples/mednist/client_api_workspace/simulate_job/app_server/config/config_fed_server.conf' in element 'id = \"persistor\"\n", + "path = \"nvflare.app_opt.pt.file_model_persistor.PTFileModelPersistor\"\n", + "args {\n", + " model {\n", + " path = \"net.Net\"\n", + " }\n", + "}': path: 'components.#1', exception: 'ValueError: failed to instantiate class: ModuleNotFoundError: No module named 'net' '\n", + "2024-04-24 14:52:22,310 - SimulatorRunner - ERROR - FL server execution exception: ConfigError: Error processing '/media/hroth/NVIDIA/home_old/hroth/Code2/nvflare/monai_flywheel/integration/monai/examples/mednist/client_api_workspace/simulate_job/app_server/config/config_fed_server.conf' in element 'id = \"persistor\"\n", + "path = \"nvflare.app_opt.pt.file_model_persistor.PTFileModelPersistor\"\n", + "args {\n", + " model {\n", + " path = \"net.Net\"\n", + " }\n", + "}': path: 'components.#1', exception: 'ValueError: failed to instantiate class: ModuleNotFoundError: No module named 'net' '\n", + "Traceback (most recent call last):\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/fuel/utils/component_builder.py\", line 74, in build_component\n", + " t = self.build_component(v)\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/private/fed/server/server_json_config.py\", line 157, in build_component\n", + " t = super().build_component(config_dict)\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/fuel/utils/component_builder.py\", line 86, in build_component\n", + " return instantiate_class(class_path, class_args)\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/fuel/utils/class_utils.py\", line 51, in instantiate_class\n", + " c = get_class(class_path)\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/fuel/utils/class_utils.py\", line 30, in get_class\n", + " module_ = importlib.import_module(module_name)\n", + " File \"/usr/lib/python3.10/importlib/__init__.py\", line 126, in import_module\n", + " return _bootstrap._gcd_import(name[level:], package, level)\n", + " File \"\", line 1050, in _gcd_import\n", + " File \"\", line 1027, in _find_and_load\n", + " File \"\", line 1004, in _find_and_load_unlocked\n", + "ModuleNotFoundError: No module named 'net'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/fuel/utils/json_scanner.py\", line 99, in _do_scan\n", + " node.processor.process_element(node)\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/private/json_configer.py\", line 150, in process_element\n", + " self.process_config_element(self.config_ctx, node)\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/private/fed/server/server_json_config.py\", line 101, in process_config_element\n", + " FedJsonConfigurator.process_config_element(self, config_ctx, node)\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/private/fed_json_config.py\", line 91, in process_config_element\n", + " c = self.authorize_and_build_component(element, config_ctx, node)\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/private/json_configer.py\", line 103, in authorize_and_build_component\n", + " return self.build_component(config_dict)\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/private/fed/server/server_json_config.py\", line 157, in build_component\n", + " t = super().build_component(config_dict)\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/fuel/utils/component_builder.py\", line 77, in build_component\n", + " raise ValueError(f\"failed to instantiate class: {secure_format_exception(e)} \")\n", + "ValueError: failed to instantiate class: ModuleNotFoundError: No module named 'net' \n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/private/fed/server/server_app_runner.py\", line 72, in start_server_app\n", + " conf.configure()\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/private/json_configer.py\", line 147, in configure\n", + " raise e\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/private/json_configer.py\", line 144, in configure\n", + " self._do_configure()\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/private/json_configer.py\", line 137, in _do_configure\n", + " self.json_scanner.scan(self)\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/fuel/utils/json_scanner.py\", line 159, in scan\n", + " self._do_scan(node)\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/fuel/utils/json_scanner.py\", line 129, in _do_scan\n", + " self._do_scan(_child_node(node, k, 0, v))\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/fuel/utils/json_scanner.py\", line 132, in _do_scan\n", + " self._do_scan(_child_node(node, node.key, i + 1, element[i]))\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/fuel/utils/json_scanner.py\", line 121, in _do_scan\n", + " raise ConfigError(self.get_process_err_msg(e, elmt_str, location, node))\n", + "nvflare.fuel.common.excepts.ConfigError: Error processing '/media/hroth/NVIDIA/home_old/hroth/Code2/nvflare/monai_flywheel/integration/monai/examples/mednist/client_api_workspace/simulate_job/app_server/config/config_fed_server.conf' in element 'id = \"persistor\"\n", + "path = \"nvflare.app_opt.pt.file_model_persistor.PTFileModelPersistor\"\n", + "args {\n", + " model {\n", + " path = \"net.Net\"\n", + " }\n", + "}': path: 'components.#1', exception: 'ValueError: failed to instantiate class: ModuleNotFoundError: No module named 'net' '\n", + "2024-04-24 14:52:22,310 - SimulatorServer - INFO - Server app stopped.\n", + "\n", + "\n", + "Exception in thread Thread-7 (start_server_app):\n", + "Traceback (most recent call last):\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/fuel/utils/component_builder.py\", line 74, in build_component\n", + " t = self.build_component(v)\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/private/fed/server/server_json_config.py\", line 157, in build_component\n", + " t = super().build_component(config_dict)\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/fuel/utils/component_builder.py\", line 86, in build_component\n", + " return instantiate_class(class_path, class_args)\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/fuel/utils/class_utils.py\", line 51, in instantiate_class\n", + " c = get_class(class_path)\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/fuel/utils/class_utils.py\", line 30, in get_class\n", + " module_ = importlib.import_module(module_name)\n", + " File \"/usr/lib/python3.10/importlib/__init__.py\", line 126, in import_module\n", + " return _bootstrap._gcd_import(name[level:], package, level)\n", + " File \"\", line 1050, in _gcd_import\n", + " File \"\", line 1027, in _find_and_load\n", + " File \"\", line 1004, in _find_and_load_unlocked\n", + "ModuleNotFoundError: No module named 'net'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/fuel/utils/json_scanner.py\", line 99, in _do_scan\n", + " node.processor.process_element(node)\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/private/json_configer.py\", line 150, in process_element\n", + " self.process_config_element(self.config_ctx, node)\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/private/fed/server/server_json_config.py\", line 101, in process_config_element\n", + " FedJsonConfigurator.process_config_element(self, config_ctx, node)\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/private/fed_json_config.py\", line 91, in process_config_element\n", + " c = self.authorize_and_build_component(element, config_ctx, node)\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/private/json_configer.py\", line 103, in authorize_and_build_component\n", + " return self.build_component(config_dict)\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/private/fed/server/server_json_config.py\", line 157, in build_component\n", + " t = super().build_component(config_dict)\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/fuel/utils/component_builder.py\", line 77, in build_component\n", + " raise ValueError(f\"failed to instantiate class: {secure_format_exception(e)} \")\n", + "ValueError: failed to instantiate class: ModuleNotFoundError: No module named 'net' \n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/usr/lib/python3.10/threading.py\", line 1016, in _bootstrap_inner\n", + " self.run()\n", + " File \"/usr/lib/python3.10/threading.py\", line 953, in run\n", + " self._target(*self._args, **self._kwargs)\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/private/fed/app/simulator/simulator_runner.py\", line 471, in start_server_app\n", + " server_app_runner.start_server_app(\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/private/fed/server/server_app_runner.py\", line 85, in start_server_app\n", + " raise e\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/private/fed/server/server_app_runner.py\", line 72, in start_server_app\n", + " conf.configure()\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/private/json_configer.py\", line 147, in configure\n", + " raise e\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/private/json_configer.py\", line 144, in configure\n", + " self._do_configure()\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/private/json_configer.py\", line 137, in _do_configure\n", + " self.json_scanner.scan(self)\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/fuel/utils/json_scanner.py\", line 159, in scan\n", + " self._do_scan(node)\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/fuel/utils/json_scanner.py\", line 129, in _do_scan\n", + " self._do_scan(_child_node(node, k, 0, v))\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/fuel/utils/json_scanner.py\", line 132, in _do_scan\n", + " self._do_scan(_child_node(node, node.key, i + 1, element[i]))\n", + " File \"/home/hroth/.venv_monai/lib/python3.10/site-packages/nvflare/fuel/utils/json_scanner.py\", line 121, in _do_scan\n", + " raise ConfigError(self.get_process_err_msg(e, elmt_str, location, node))\n", + "nvflare.fuel.common.excepts.ConfigError: Error processing '/media/hroth/NVIDIA/home_old/hroth/Code2/nvflare/monai_flywheel/integration/monai/examples/mednist/client_api_workspace/simulate_job/app_server/config/config_fed_server.conf' in element 'id = \"persistor\"\n", + "path = \"nvflare.app_opt.pt.file_model_persistor.PTFileModelPersistor\"\n", + "args {\n", + " model {\n", + " path = \"net.Net\"\n", + " }\n", + "}': path: 'components.#1', exception: 'ValueError: failed to instantiate class: ModuleNotFoundError: No module named 'net' '\n", + "2024-04-24 14:52:23,147 - SimulatorRunner - ERROR - Simulator run error: RuntimeError: Could not start the Server App.\n", + "2024-04-24 14:52:27,132 - MPM - INFO - MPM: Good Bye!\n" + ] + } + ], "source": [ - "## Run the training" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "trainer.run()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Check the prediction on the test dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dataset_dir = Path(root_dir, \"MedNIST\")\n", - "class_names = sorted(f\"{x.name}\" for x in dataset_dir.iterdir() if x.is_dir())\n", - "testdata = MedNISTDataset(root_dir=root_dir, transform=transform, section=\"test\", download=False, runtime_cache=True)\n", - "\n", - "max_items_to_print = 10\n", - "with eval_mode(model):\n", - " for item in DataLoader(testdata, batch_size=1, num_workers=0):\n", - " prob = np.array(model(item[\"image\"].to(DEVICE)).detach().to(\"cpu\"))[0]\n", - " pred = class_names[prob.argmax()]\n", - " gt = item[\"class_name\"][0]\n", - " print(f\"Class prediction is {pred}. Ground-truth: {gt}\")\n", - " max_items_to_print -= 1\n", - " if max_items_to_print == 0:\n", - " break" + "!nvflare simulator -n 2 -t 2 ./jobs/client_api -w client_api_workspace" ] }, { @@ -217,7 +521,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.14" + "version": "3.10.12" } }, "nbformat": 4,