From f7161f47c44c201ac6b3da81c789bd60e86bff10 Mon Sep 17 00:00:00 2001 From: Holger Roth Date: Fri, 23 Feb 2024 14:30:50 -0500 Subject: [PATCH] formatting --- .../app/custom/downstream_flip.py | 27 +++---- .../app/custom/downstream_flip.py | 27 +++---- .../app/custom/downstream_flip.py | 27 +++---- .../downstream/sabdab/prepare_sabdab_data.py | 17 ++-- .../downstream/sabdab/run_sim_sabdab.py | 6 +- .../app1/custom/downstream_flip.py | 15 ++-- .../app2/custom/downstream_flip.py | 15 ++-- .../app3/custom/downstream_flip.py | 15 ++-- .../app1/custom/downstream_flip.py | 15 ++-- .../app2/custom/downstream_flip.py | 15 ++-- .../app3/custom/downstream_flip.py | 15 ++-- .../bionemo/downstream/scl/run_sim_scl.py | 6 +- .../app1/custom/downstream_flip.py | 15 ++-- .../app2/custom/downstream_flip.py | 15 ++-- .../app3/custom/downstream_flip.py | 15 ++-- .../app4/custom/downstream_flip.py | 15 ++-- .../app1/custom/downstream_flip.py | 15 ++-- .../app2/custom/downstream_flip.py | 15 ++-- .../app3/custom/downstream_flip.py | 15 ++-- .../app4/custom/downstream_flip.py | 15 ++-- .../app1/custom/downstream_flip.py | 15 ++-- .../app2/custom/downstream_flip.py | 15 ++-- .../app3/custom/downstream_flip.py | 15 ++-- .../app4/custom/downstream_flip.py | 15 ++-- .../downstream/tap/prepare_tap_data.py | 36 +++++---- .../bionemo/downstream/tap/run_sim_tap.py | 6 +- .../app/custom/bionemo_constants.py | 4 +- .../app/custom/bionemo_inference.py | 5 +- .../app/custom/bionemo_inference_processor.py | 16 ++-- .../embeddings/app/custom/bionemo_inferer.py | 81 +++++++++---------- .../fedavg/app/custom/bionemo_mlp_learner.py | 69 +++++++++------- .../app/custom/bionemo_mlp_model_persistor.py | 39 +++++---- .../bionemo/task_fitting/split_data.py | 44 +++++++--- 33 files changed, 322 insertions(+), 358 deletions(-) diff --git a/examples/advanced/bionemo/downstream/sabdab/jobs/central_sabdab_esm1nv/app/custom/downstream_flip.py b/examples/advanced/bionemo/downstream/sabdab/jobs/central_sabdab_esm1nv/app/custom/downstream_flip.py index 14c1c20913..3323a41316 100644 --- a/examples/advanced/bionemo/downstream/sabdab/jobs/central_sabdab_esm1nv/app/custom/downstream_flip.py +++ b/examples/advanced/bionemo/downstream/sabdab/jobs/central_sabdab_esm1nv/app/custom/downstream_flip.py @@ -13,16 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from nemo.core.config import hydra_runner -from nemo.utils import logging -from omegaconf.omegaconf import OmegaConf - from bionemo.data import FLIPPreprocess from bionemo.data.metrics import accuracy, mse, per_token_accuracy from bionemo.model.protein.downstream import FineTuneProteinModel -from bionemo.model.utils import ( - setup_trainer, -) +from bionemo.model.utils import setup_trainer +from nemo.core.config import hydra_runner +from nemo.utils import logging +from omegaconf.omegaconf import OmegaConf # (0): import nvflare lightning api import nvflare.client.lightning as flare @@ -30,12 +27,12 @@ micro_batch_size = 32 val_check_intervals = { - "site-1": int(416/micro_batch_size), - "site-2": int(238/micro_batch_size), - "site-3": int(282/micro_batch_size), - "site-4": int(472/micro_batch_size), - "site-5": int(361/micro_batch_size), - "site-6": int(157/micro_batch_size) + "site-1": int(416 / micro_batch_size), + "site-2": int(238 / micro_batch_size), + "site-3": int(282 / micro_batch_size), + "site-4": int(472 / micro_batch_size), + "site-5": int(361 / micro_batch_size), + "site-6": int(157 / micro_batch_size), } @@ -53,7 +50,7 @@ def main(cfg) -> None: print(f"Running client {site_name} with train data: {cfg.model.data.dataset.train}") logging.info("\n\n************* Finetune config ****************") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + logging.info(f"\n{OmegaConf.to_yaml(cfg)}") if cfg.do_training: logging.info("************** Starting Training ***********") @@ -111,5 +108,5 @@ def main(cfg) -> None: preprocessor.prepare_all_datasets(output_dir=cfg.model.data.preprocessed_data_path) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/examples/advanced/bionemo/downstream/sabdab/jobs/fedavg_sabdab_esm1nv/app/custom/downstream_flip.py b/examples/advanced/bionemo/downstream/sabdab/jobs/fedavg_sabdab_esm1nv/app/custom/downstream_flip.py index a953f58a24..94115b8291 100644 --- a/examples/advanced/bionemo/downstream/sabdab/jobs/fedavg_sabdab_esm1nv/app/custom/downstream_flip.py +++ b/examples/advanced/bionemo/downstream/sabdab/jobs/fedavg_sabdab_esm1nv/app/custom/downstream_flip.py @@ -13,16 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from nemo.core.config import hydra_runner -from nemo.utils import logging -from omegaconf.omegaconf import OmegaConf - from bionemo.data import FLIPPreprocess from bionemo.data.metrics import accuracy, mse, per_token_accuracy from bionemo.model.protein.downstream import FineTuneProteinModel -from bionemo.model.utils import ( - setup_trainer, -) +from bionemo.model.utils import setup_trainer +from nemo.core.config import hydra_runner +from nemo.utils import logging +from omegaconf.omegaconf import OmegaConf # (0): import nvflare lightning api import nvflare.client.lightning as flare @@ -52,12 +49,12 @@ # alpha 1.0 val_check_intervals = { - "site-1": int(80/micro_batch_size), - "site-2": int(365/micro_batch_size), - "site-3": int(216/micro_batch_size), - "site-4": int(578/micro_batch_size), - "site-5": int(568/micro_batch_size), - "site-6": int(119/micro_batch_size) + "site-1": int(80 / micro_batch_size), + "site-2": int(365 / micro_batch_size), + "site-3": int(216 / micro_batch_size), + "site-4": int(578 / micro_batch_size), + "site-5": int(568 / micro_batch_size), + "site-6": int(119 / micro_batch_size), } @@ -75,7 +72,7 @@ def main(cfg) -> None: print(f"Running client {site_name} with train data: {cfg.model.data.dataset.train}") logging.info("\n\n************* Finetune config ****************") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + logging.info(f"\n{OmegaConf.to_yaml(cfg)}") if cfg.do_training: logging.info("************** Starting Training ***********") @@ -133,5 +130,5 @@ def main(cfg) -> None: preprocessor.prepare_all_datasets(output_dir=cfg.model.data.preprocessed_data_path) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/examples/advanced/bionemo/downstream/sabdab/jobs/local_sabdab_esm1nv/app/custom/downstream_flip.py b/examples/advanced/bionemo/downstream/sabdab/jobs/local_sabdab_esm1nv/app/custom/downstream_flip.py index be1aeb1ed9..0c71161bd5 100644 --- a/examples/advanced/bionemo/downstream/sabdab/jobs/local_sabdab_esm1nv/app/custom/downstream_flip.py +++ b/examples/advanced/bionemo/downstream/sabdab/jobs/local_sabdab_esm1nv/app/custom/downstream_flip.py @@ -13,16 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from nemo.core.config import hydra_runner -from nemo.utils import logging -from omegaconf.omegaconf import OmegaConf - from bionemo.data import FLIPPreprocess from bionemo.data.metrics import accuracy, mse, per_token_accuracy from bionemo.model.protein.downstream import FineTuneProteinModel -from bionemo.model.utils import ( - setup_trainer, -) +from bionemo.model.utils import setup_trainer +from nemo.core.config import hydra_runner +from nemo.utils import logging +from omegaconf.omegaconf import OmegaConf # (0): import nvflare lightning api import nvflare.client.lightning as flare @@ -32,12 +29,12 @@ # alpha 100.0 val_check_intervals = { - "site-1": int(351/micro_batch_size), - "site-2": int(297/micro_batch_size), - "site-3": int(312/micro_batch_size), - "site-4": int(366/micro_batch_size), - "site-5": int(336/micro_batch_size), - "site-6": int(265/micro_batch_size) + "site-1": int(351 / micro_batch_size), + "site-2": int(297 / micro_batch_size), + "site-3": int(312 / micro_batch_size), + "site-4": int(366 / micro_batch_size), + "site-5": int(336 / micro_batch_size), + "site-6": int(265 / micro_batch_size), } # alpha 10.0 @@ -75,7 +72,7 @@ def main(cfg) -> None: print(f"Running client {site_name} with train data: {cfg.model.data.dataset.train}") logging.info("\n\n************* Finetune config ****************") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + logging.info(f"\n{OmegaConf.to_yaml(cfg)}") if cfg.do_training: logging.info("************** Starting Training ***********") @@ -133,5 +130,5 @@ def main(cfg) -> None: preprocessor.prepare_all_datasets(output_dir=cfg.model.data.preprocessed_data_path) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/examples/advanced/bionemo/downstream/sabdab/prepare_sabdab_data.py b/examples/advanced/bionemo/downstream/sabdab/prepare_sabdab_data.py index 08e0605ba1..5ee248c464 100644 --- a/examples/advanced/bionemo/downstream/sabdab/prepare_sabdab_data.py +++ b/examples/advanced/bionemo/downstream/sabdab/prepare_sabdab_data.py @@ -13,10 +13,11 @@ # limitations under the License. import os -import pandas as pd + import numpy as np -from tdc.utils import retrieve_label_name_list +import pandas as pd from tdc.single_pred import Develop + np.random.seed(1234) out_name = "sabdab_chen" @@ -73,7 +74,7 @@ def break_chains(df): def main(): seed = 0 - data = Develop(name='SAbDab_Chen', path="/tmp/data") + data = Develop(name="SAbDab_Chen", path="/tmp/data") split = data.get_split() train_df = pd.concat([split["train"], split["valid"]]) @@ -86,11 +87,11 @@ def main(): proportions = np.random.dirichlet(np.repeat(alpha, n_clients)) else: print("Uniform sampling") - proportions = n_clients * [1/n_clients] + proportions = n_clients * [1 / n_clients] for client_id in range(n_clients): client_name = f"site-{client_id+1}" - client_train_df = train_df.sample(frac=proportions[client_id], replace=False, random_state=seed+client_id) + client_train_df = train_df.sample(frac=proportions[client_id], replace=False, random_state=seed + client_id) if do_break_chains: client_train_df = break_chains(client_train_df) @@ -128,8 +129,8 @@ def main(): print(f"Saved {len(train_df)} training and {len(test_df)} testing proteins.") for _set, _df in zip(["TRAIN", "TEST"], [train_df, test_df]): - n_pos = np.sum(_df['Y'] == 0) - n_neg = np.sum(_df['Y'] == 1) + n_pos = np.sum(_df["Y"] == 0) + n_neg = np.sum(_df["Y"] == 1) n = len(_df) print(f" {_set} Pos/Neg ratio: neg={n_neg}, pos={n_pos}: {n_pos/n_neg:0.3f}") print(f" {_set} Trivial accuracy: {n_pos/n:0.3f}") @@ -144,7 +145,7 @@ def main(): b = np.asarray(client_train_dfs[j]["Antibody_ID"]) assert len(np.unique(a)) == len(a) assert len(np.unique(b)) == len(b) - d[i][j] = len(np.intersect1d(a, b))/len(b) + d[i][j] = len(np.intersect1d(a, b)) / len(b) print(d) overlap = np.mean(d[~np.isnan(d)]) diff --git a/examples/advanced/bionemo/downstream/sabdab/run_sim_sabdab.py b/examples/advanced/bionemo/downstream/sabdab/run_sim_sabdab.py index 660e0e1894..99849dcbf2 100644 --- a/examples/advanced/bionemo/downstream/sabdab/run_sim_sabdab.py +++ b/examples/advanced/bionemo/downstream/sabdab/run_sim_sabdab.py @@ -13,6 +13,7 @@ # limitations under the License. from nvflare import SimulatorRunner + n_clients = 6 # Choose from one of the available jobs @@ -21,10 +22,7 @@ # job_name = "fedavg_sabdab_esm1nv" simulator = SimulatorRunner( - job_folder=f"jobs/{job_name}", - workspace=f"/tmp/nvflare/results/{job_name}", - n_clients=n_clients, - threads=n_clients + job_folder=f"jobs/{job_name}", workspace=f"/tmp/nvflare/results/{job_name}", n_clients=n_clients, threads=n_clients ) run_status = simulator.run() print("Simulator finished with run_status", run_status) diff --git a/examples/advanced/bionemo/downstream/scl/jobs/fedavg_scl_finetune_esm2nv/app1/custom/downstream_flip.py b/examples/advanced/bionemo/downstream/scl/jobs/fedavg_scl_finetune_esm2nv/app1/custom/downstream_flip.py index 154b3c004d..9d10114d31 100644 --- a/examples/advanced/bionemo/downstream/scl/jobs/fedavg_scl_finetune_esm2nv/app1/custom/downstream_flip.py +++ b/examples/advanced/bionemo/downstream/scl/jobs/fedavg_scl_finetune_esm2nv/app1/custom/downstream_flip.py @@ -13,16 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from nemo.core.config import hydra_runner -from nemo.utils import logging -from omegaconf.omegaconf import OmegaConf - from bionemo.data import FLIPPreprocess from bionemo.data.metrics import accuracy, mse, per_token_accuracy from bionemo.model.protein.downstream import FineTuneProteinModel -from bionemo.model.utils import ( - setup_trainer, -) +from bionemo.model.utils import setup_trainer +from nemo.core.config import hydra_runner +from nemo.utils import logging +from omegaconf.omegaconf import OmegaConf # (0): import nvflare lightning api import nvflare.client.lightning as flare @@ -32,7 +29,7 @@ # @hydra_runner(config_path="../prott5nv/conf", config_name="downstream_flip_sec_str") # ProtT5 def main(cfg) -> None: logging.info("\n\n************* Finetune config ****************") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + logging.info(f"\n{OmegaConf.to_yaml(cfg)}") if cfg.do_training: logging.info("************** Starting Training ***********") @@ -90,5 +87,5 @@ def main(cfg) -> None: preprocessor.prepare_all_datasets(output_dir=cfg.model.data.preprocessed_data_path) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/examples/advanced/bionemo/downstream/scl/jobs/fedavg_scl_finetune_esm2nv/app2/custom/downstream_flip.py b/examples/advanced/bionemo/downstream/scl/jobs/fedavg_scl_finetune_esm2nv/app2/custom/downstream_flip.py index 154b3c004d..9d10114d31 100644 --- a/examples/advanced/bionemo/downstream/scl/jobs/fedavg_scl_finetune_esm2nv/app2/custom/downstream_flip.py +++ b/examples/advanced/bionemo/downstream/scl/jobs/fedavg_scl_finetune_esm2nv/app2/custom/downstream_flip.py @@ -13,16 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from nemo.core.config import hydra_runner -from nemo.utils import logging -from omegaconf.omegaconf import OmegaConf - from bionemo.data import FLIPPreprocess from bionemo.data.metrics import accuracy, mse, per_token_accuracy from bionemo.model.protein.downstream import FineTuneProteinModel -from bionemo.model.utils import ( - setup_trainer, -) +from bionemo.model.utils import setup_trainer +from nemo.core.config import hydra_runner +from nemo.utils import logging +from omegaconf.omegaconf import OmegaConf # (0): import nvflare lightning api import nvflare.client.lightning as flare @@ -32,7 +29,7 @@ # @hydra_runner(config_path="../prott5nv/conf", config_name="downstream_flip_sec_str") # ProtT5 def main(cfg) -> None: logging.info("\n\n************* Finetune config ****************") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + logging.info(f"\n{OmegaConf.to_yaml(cfg)}") if cfg.do_training: logging.info("************** Starting Training ***********") @@ -90,5 +87,5 @@ def main(cfg) -> None: preprocessor.prepare_all_datasets(output_dir=cfg.model.data.preprocessed_data_path) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/examples/advanced/bionemo/downstream/scl/jobs/fedavg_scl_finetune_esm2nv/app3/custom/downstream_flip.py b/examples/advanced/bionemo/downstream/scl/jobs/fedavg_scl_finetune_esm2nv/app3/custom/downstream_flip.py index 154b3c004d..9d10114d31 100644 --- a/examples/advanced/bionemo/downstream/scl/jobs/fedavg_scl_finetune_esm2nv/app3/custom/downstream_flip.py +++ b/examples/advanced/bionemo/downstream/scl/jobs/fedavg_scl_finetune_esm2nv/app3/custom/downstream_flip.py @@ -13,16 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from nemo.core.config import hydra_runner -from nemo.utils import logging -from omegaconf.omegaconf import OmegaConf - from bionemo.data import FLIPPreprocess from bionemo.data.metrics import accuracy, mse, per_token_accuracy from bionemo.model.protein.downstream import FineTuneProteinModel -from bionemo.model.utils import ( - setup_trainer, -) +from bionemo.model.utils import setup_trainer +from nemo.core.config import hydra_runner +from nemo.utils import logging +from omegaconf.omegaconf import OmegaConf # (0): import nvflare lightning api import nvflare.client.lightning as flare @@ -32,7 +29,7 @@ # @hydra_runner(config_path="../prott5nv/conf", config_name="downstream_flip_sec_str") # ProtT5 def main(cfg) -> None: logging.info("\n\n************* Finetune config ****************") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + logging.info(f"\n{OmegaConf.to_yaml(cfg)}") if cfg.do_training: logging.info("************** Starting Training ***********") @@ -90,5 +87,5 @@ def main(cfg) -> None: preprocessor.prepare_all_datasets(output_dir=cfg.model.data.preprocessed_data_path) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/examples/advanced/bionemo/downstream/scl/jobs/local_scl_finetune_esm2nv/app1/custom/downstream_flip.py b/examples/advanced/bionemo/downstream/scl/jobs/local_scl_finetune_esm2nv/app1/custom/downstream_flip.py index 154b3c004d..9d10114d31 100644 --- a/examples/advanced/bionemo/downstream/scl/jobs/local_scl_finetune_esm2nv/app1/custom/downstream_flip.py +++ b/examples/advanced/bionemo/downstream/scl/jobs/local_scl_finetune_esm2nv/app1/custom/downstream_flip.py @@ -13,16 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from nemo.core.config import hydra_runner -from nemo.utils import logging -from omegaconf.omegaconf import OmegaConf - from bionemo.data import FLIPPreprocess from bionemo.data.metrics import accuracy, mse, per_token_accuracy from bionemo.model.protein.downstream import FineTuneProteinModel -from bionemo.model.utils import ( - setup_trainer, -) +from bionemo.model.utils import setup_trainer +from nemo.core.config import hydra_runner +from nemo.utils import logging +from omegaconf.omegaconf import OmegaConf # (0): import nvflare lightning api import nvflare.client.lightning as flare @@ -32,7 +29,7 @@ # @hydra_runner(config_path="../prott5nv/conf", config_name="downstream_flip_sec_str") # ProtT5 def main(cfg) -> None: logging.info("\n\n************* Finetune config ****************") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + logging.info(f"\n{OmegaConf.to_yaml(cfg)}") if cfg.do_training: logging.info("************** Starting Training ***********") @@ -90,5 +87,5 @@ def main(cfg) -> None: preprocessor.prepare_all_datasets(output_dir=cfg.model.data.preprocessed_data_path) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/examples/advanced/bionemo/downstream/scl/jobs/local_scl_finetune_esm2nv/app2/custom/downstream_flip.py b/examples/advanced/bionemo/downstream/scl/jobs/local_scl_finetune_esm2nv/app2/custom/downstream_flip.py index 154b3c004d..9d10114d31 100644 --- a/examples/advanced/bionemo/downstream/scl/jobs/local_scl_finetune_esm2nv/app2/custom/downstream_flip.py +++ b/examples/advanced/bionemo/downstream/scl/jobs/local_scl_finetune_esm2nv/app2/custom/downstream_flip.py @@ -13,16 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from nemo.core.config import hydra_runner -from nemo.utils import logging -from omegaconf.omegaconf import OmegaConf - from bionemo.data import FLIPPreprocess from bionemo.data.metrics import accuracy, mse, per_token_accuracy from bionemo.model.protein.downstream import FineTuneProteinModel -from bionemo.model.utils import ( - setup_trainer, -) +from bionemo.model.utils import setup_trainer +from nemo.core.config import hydra_runner +from nemo.utils import logging +from omegaconf.omegaconf import OmegaConf # (0): import nvflare lightning api import nvflare.client.lightning as flare @@ -32,7 +29,7 @@ # @hydra_runner(config_path="../prott5nv/conf", config_name="downstream_flip_sec_str") # ProtT5 def main(cfg) -> None: logging.info("\n\n************* Finetune config ****************") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + logging.info(f"\n{OmegaConf.to_yaml(cfg)}") if cfg.do_training: logging.info("************** Starting Training ***********") @@ -90,5 +87,5 @@ def main(cfg) -> None: preprocessor.prepare_all_datasets(output_dir=cfg.model.data.preprocessed_data_path) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/examples/advanced/bionemo/downstream/scl/jobs/local_scl_finetune_esm2nv/app3/custom/downstream_flip.py b/examples/advanced/bionemo/downstream/scl/jobs/local_scl_finetune_esm2nv/app3/custom/downstream_flip.py index 154b3c004d..9d10114d31 100644 --- a/examples/advanced/bionemo/downstream/scl/jobs/local_scl_finetune_esm2nv/app3/custom/downstream_flip.py +++ b/examples/advanced/bionemo/downstream/scl/jobs/local_scl_finetune_esm2nv/app3/custom/downstream_flip.py @@ -13,16 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from nemo.core.config import hydra_runner -from nemo.utils import logging -from omegaconf.omegaconf import OmegaConf - from bionemo.data import FLIPPreprocess from bionemo.data.metrics import accuracy, mse, per_token_accuracy from bionemo.model.protein.downstream import FineTuneProteinModel -from bionemo.model.utils import ( - setup_trainer, -) +from bionemo.model.utils import setup_trainer +from nemo.core.config import hydra_runner +from nemo.utils import logging +from omegaconf.omegaconf import OmegaConf # (0): import nvflare lightning api import nvflare.client.lightning as flare @@ -32,7 +29,7 @@ # @hydra_runner(config_path="../prott5nv/conf", config_name="downstream_flip_sec_str") # ProtT5 def main(cfg) -> None: logging.info("\n\n************* Finetune config ****************") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + logging.info(f"\n{OmegaConf.to_yaml(cfg)}") if cfg.do_training: logging.info("************** Starting Training ***********") @@ -90,5 +87,5 @@ def main(cfg) -> None: preprocessor.prepare_all_datasets(output_dir=cfg.model.data.preprocessed_data_path) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/examples/advanced/bionemo/downstream/scl/run_sim_scl.py b/examples/advanced/bionemo/downstream/scl/run_sim_scl.py index 07cd1ad2f6..b421c565be 100644 --- a/examples/advanced/bionemo/downstream/scl/run_sim_scl.py +++ b/examples/advanced/bionemo/downstream/scl/run_sim_scl.py @@ -13,6 +13,7 @@ # limitations under the License. from nvflare import SimulatorRunner + n_clients = 3 # Choose from one of the available jobs @@ -20,10 +21,7 @@ # job_name = "fedavg_scl_finetune_esm2nv" simulator = SimulatorRunner( - job_folder=f"jobs/{job_name}", - workspace=f"/tmp/nvflare/results/{job_name}", - n_clients=n_clients, - threads=n_clients + job_folder=f"jobs/{job_name}", workspace=f"/tmp/nvflare/results/{job_name}", n_clients=n_clients, threads=n_clients ) run_status = simulator.run() print("Simulator finished with run_status", run_status) diff --git a/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app1/custom/downstream_flip.py b/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app1/custom/downstream_flip.py index 5e56d7f42c..0d44f14cac 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app1/custom/downstream_flip.py +++ b/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app1/custom/downstream_flip.py @@ -13,16 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from nemo.core.config import hydra_runner -from nemo.utils import logging -from omegaconf.omegaconf import OmegaConf - from bionemo.data import FLIPPreprocess from bionemo.data.metrics import accuracy, mse, per_token_accuracy from bionemo.model.protein.downstream import FineTuneProteinModel -from bionemo.model.utils import ( - setup_trainer, -) +from bionemo.model.utils import setup_trainer +from nemo.core.config import hydra_runner +from nemo.utils import logging +from omegaconf.omegaconf import OmegaConf # (0): import nvflare lightning api import nvflare.client.lightning as flare @@ -32,7 +29,7 @@ # @hydra_runner(config_path="../prott5nv/conf", config_name="downstream_flip_sec_str") # ProtT5 def main(cfg) -> None: logging.info("\n\n************* Finetune config ****************") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + logging.info(f"\n{OmegaConf.to_yaml(cfg)}") if cfg.do_training: logging.info("************** Starting Training ***********") @@ -90,5 +87,5 @@ def main(cfg) -> None: preprocessor.prepare_all_datasets(output_dir=cfg.model.data.preprocessed_data_path) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app2/custom/downstream_flip.py b/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app2/custom/downstream_flip.py index 5e56d7f42c..0d44f14cac 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app2/custom/downstream_flip.py +++ b/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app2/custom/downstream_flip.py @@ -13,16 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from nemo.core.config import hydra_runner -from nemo.utils import logging -from omegaconf.omegaconf import OmegaConf - from bionemo.data import FLIPPreprocess from bionemo.data.metrics import accuracy, mse, per_token_accuracy from bionemo.model.protein.downstream import FineTuneProteinModel -from bionemo.model.utils import ( - setup_trainer, -) +from bionemo.model.utils import setup_trainer +from nemo.core.config import hydra_runner +from nemo.utils import logging +from omegaconf.omegaconf import OmegaConf # (0): import nvflare lightning api import nvflare.client.lightning as flare @@ -32,7 +29,7 @@ # @hydra_runner(config_path="../prott5nv/conf", config_name="downstream_flip_sec_str") # ProtT5 def main(cfg) -> None: logging.info("\n\n************* Finetune config ****************") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + logging.info(f"\n{OmegaConf.to_yaml(cfg)}") if cfg.do_training: logging.info("************** Starting Training ***********") @@ -90,5 +87,5 @@ def main(cfg) -> None: preprocessor.prepare_all_datasets(output_dir=cfg.model.data.preprocessed_data_path) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app3/custom/downstream_flip.py b/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app3/custom/downstream_flip.py index 5e56d7f42c..0d44f14cac 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app3/custom/downstream_flip.py +++ b/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app3/custom/downstream_flip.py @@ -13,16 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from nemo.core.config import hydra_runner -from nemo.utils import logging -from omegaconf.omegaconf import OmegaConf - from bionemo.data import FLIPPreprocess from bionemo.data.metrics import accuracy, mse, per_token_accuracy from bionemo.model.protein.downstream import FineTuneProteinModel -from bionemo.model.utils import ( - setup_trainer, -) +from bionemo.model.utils import setup_trainer +from nemo.core.config import hydra_runner +from nemo.utils import logging +from omegaconf.omegaconf import OmegaConf # (0): import nvflare lightning api import nvflare.client.lightning as flare @@ -32,7 +29,7 @@ # @hydra_runner(config_path="../prott5nv/conf", config_name="downstream_flip_sec_str") # ProtT5 def main(cfg) -> None: logging.info("\n\n************* Finetune config ****************") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + logging.info(f"\n{OmegaConf.to_yaml(cfg)}") if cfg.do_training: logging.info("************** Starting Training ***********") @@ -90,5 +87,5 @@ def main(cfg) -> None: preprocessor.prepare_all_datasets(output_dir=cfg.model.data.preprocessed_data_path) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app4/custom/downstream_flip.py b/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app4/custom/downstream_flip.py index 5e56d7f42c..0d44f14cac 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app4/custom/downstream_flip.py +++ b/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app4/custom/downstream_flip.py @@ -13,16 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from nemo.core.config import hydra_runner -from nemo.utils import logging -from omegaconf.omegaconf import OmegaConf - from bionemo.data import FLIPPreprocess from bionemo.data.metrics import accuracy, mse, per_token_accuracy from bionemo.model.protein.downstream import FineTuneProteinModel -from bionemo.model.utils import ( - setup_trainer, -) +from bionemo.model.utils import setup_trainer +from nemo.core.config import hydra_runner +from nemo.utils import logging +from omegaconf.omegaconf import OmegaConf # (0): import nvflare lightning api import nvflare.client.lightning as flare @@ -32,7 +29,7 @@ # @hydra_runner(config_path="../prott5nv/conf", config_name="downstream_flip_sec_str") # ProtT5 def main(cfg) -> None: logging.info("\n\n************* Finetune config ****************") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + logging.info(f"\n{OmegaConf.to_yaml(cfg)}") if cfg.do_training: logging.info("************** Starting Training ***********") @@ -90,5 +87,5 @@ def main(cfg) -> None: preprocessor.prepare_all_datasets(output_dir=cfg.model.data.preprocessed_data_path) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app1/custom/downstream_flip.py b/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app1/custom/downstream_flip.py index 83986cb88b..3faf3ed82f 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app1/custom/downstream_flip.py +++ b/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app1/custom/downstream_flip.py @@ -13,16 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from nemo.core.config import hydra_runner -from nemo.utils import logging -from omegaconf.omegaconf import OmegaConf - from bionemo.data import FLIPPreprocess from bionemo.data.metrics import accuracy, mse, per_token_accuracy from bionemo.model.protein.downstream import FineTuneProteinModel -from bionemo.model.utils import ( - setup_trainer, -) +from bionemo.model.utils import setup_trainer +from nemo.core.config import hydra_runner +from nemo.utils import logging +from omegaconf.omegaconf import OmegaConf # (0): import nvflare lightning api import nvflare.client.lightning as flare @@ -32,7 +29,7 @@ # @hydra_runner(config_path="../prott5nv/conf", config_name="downstream_flip_sec_str") # ProtT5 def main(cfg) -> None: logging.info("\n\n************* Finetune config ****************") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + logging.info(f"\n{OmegaConf.to_yaml(cfg)}") if cfg.do_training: logging.info("************** Starting Training ***********") @@ -90,5 +87,5 @@ def main(cfg) -> None: preprocessor.prepare_all_datasets(output_dir=cfg.model.data.preprocessed_data_path) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app2/custom/downstream_flip.py b/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app2/custom/downstream_flip.py index 83986cb88b..3faf3ed82f 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app2/custom/downstream_flip.py +++ b/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app2/custom/downstream_flip.py @@ -13,16 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from nemo.core.config import hydra_runner -from nemo.utils import logging -from omegaconf.omegaconf import OmegaConf - from bionemo.data import FLIPPreprocess from bionemo.data.metrics import accuracy, mse, per_token_accuracy from bionemo.model.protein.downstream import FineTuneProteinModel -from bionemo.model.utils import ( - setup_trainer, -) +from bionemo.model.utils import setup_trainer +from nemo.core.config import hydra_runner +from nemo.utils import logging +from omegaconf.omegaconf import OmegaConf # (0): import nvflare lightning api import nvflare.client.lightning as flare @@ -32,7 +29,7 @@ # @hydra_runner(config_path="../prott5nv/conf", config_name="downstream_flip_sec_str") # ProtT5 def main(cfg) -> None: logging.info("\n\n************* Finetune config ****************") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + logging.info(f"\n{OmegaConf.to_yaml(cfg)}") if cfg.do_training: logging.info("************** Starting Training ***********") @@ -90,5 +87,5 @@ def main(cfg) -> None: preprocessor.prepare_all_datasets(output_dir=cfg.model.data.preprocessed_data_path) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app3/custom/downstream_flip.py b/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app3/custom/downstream_flip.py index 83986cb88b..3faf3ed82f 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app3/custom/downstream_flip.py +++ b/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app3/custom/downstream_flip.py @@ -13,16 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from nemo.core.config import hydra_runner -from nemo.utils import logging -from omegaconf.omegaconf import OmegaConf - from bionemo.data import FLIPPreprocess from bionemo.data.metrics import accuracy, mse, per_token_accuracy from bionemo.model.protein.downstream import FineTuneProteinModel -from bionemo.model.utils import ( - setup_trainer, -) +from bionemo.model.utils import setup_trainer +from nemo.core.config import hydra_runner +from nemo.utils import logging +from omegaconf.omegaconf import OmegaConf # (0): import nvflare lightning api import nvflare.client.lightning as flare @@ -32,7 +29,7 @@ # @hydra_runner(config_path="../prott5nv/conf", config_name="downstream_flip_sec_str") # ProtT5 def main(cfg) -> None: logging.info("\n\n************* Finetune config ****************") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + logging.info(f"\n{OmegaConf.to_yaml(cfg)}") if cfg.do_training: logging.info("************** Starting Training ***********") @@ -90,5 +87,5 @@ def main(cfg) -> None: preprocessor.prepare_all_datasets(output_dir=cfg.model.data.preprocessed_data_path) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app4/custom/downstream_flip.py b/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app4/custom/downstream_flip.py index 83986cb88b..3faf3ed82f 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app4/custom/downstream_flip.py +++ b/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app4/custom/downstream_flip.py @@ -13,16 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from nemo.core.config import hydra_runner -from nemo.utils import logging -from omegaconf.omegaconf import OmegaConf - from bionemo.data import FLIPPreprocess from bionemo.data.metrics import accuracy, mse, per_token_accuracy from bionemo.model.protein.downstream import FineTuneProteinModel -from bionemo.model.utils import ( - setup_trainer, -) +from bionemo.model.utils import setup_trainer +from nemo.core.config import hydra_runner +from nemo.utils import logging +from omegaconf.omegaconf import OmegaConf # (0): import nvflare lightning api import nvflare.client.lightning as flare @@ -32,7 +29,7 @@ # @hydra_runner(config_path="../prott5nv/conf", config_name="downstream_flip_sec_str") # ProtT5 def main(cfg) -> None: logging.info("\n\n************* Finetune config ****************") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + logging.info(f"\n{OmegaConf.to_yaml(cfg)}") if cfg.do_training: logging.info("************** Starting Training ***********") @@ -90,5 +87,5 @@ def main(cfg) -> None: preprocessor.prepare_all_datasets(output_dir=cfg.model.data.preprocessed_data_path) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app1/custom/downstream_flip.py b/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app1/custom/downstream_flip.py index 5e56d7f42c..0d44f14cac 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app1/custom/downstream_flip.py +++ b/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app1/custom/downstream_flip.py @@ -13,16 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from nemo.core.config import hydra_runner -from nemo.utils import logging -from omegaconf.omegaconf import OmegaConf - from bionemo.data import FLIPPreprocess from bionemo.data.metrics import accuracy, mse, per_token_accuracy from bionemo.model.protein.downstream import FineTuneProteinModel -from bionemo.model.utils import ( - setup_trainer, -) +from bionemo.model.utils import setup_trainer +from nemo.core.config import hydra_runner +from nemo.utils import logging +from omegaconf.omegaconf import OmegaConf # (0): import nvflare lightning api import nvflare.client.lightning as flare @@ -32,7 +29,7 @@ # @hydra_runner(config_path="../prott5nv/conf", config_name="downstream_flip_sec_str") # ProtT5 def main(cfg) -> None: logging.info("\n\n************* Finetune config ****************") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + logging.info(f"\n{OmegaConf.to_yaml(cfg)}") if cfg.do_training: logging.info("************** Starting Training ***********") @@ -90,5 +87,5 @@ def main(cfg) -> None: preprocessor.prepare_all_datasets(output_dir=cfg.model.data.preprocessed_data_path) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app2/custom/downstream_flip.py b/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app2/custom/downstream_flip.py index 5e56d7f42c..0d44f14cac 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app2/custom/downstream_flip.py +++ b/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app2/custom/downstream_flip.py @@ -13,16 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from nemo.core.config import hydra_runner -from nemo.utils import logging -from omegaconf.omegaconf import OmegaConf - from bionemo.data import FLIPPreprocess from bionemo.data.metrics import accuracy, mse, per_token_accuracy from bionemo.model.protein.downstream import FineTuneProteinModel -from bionemo.model.utils import ( - setup_trainer, -) +from bionemo.model.utils import setup_trainer +from nemo.core.config import hydra_runner +from nemo.utils import logging +from omegaconf.omegaconf import OmegaConf # (0): import nvflare lightning api import nvflare.client.lightning as flare @@ -32,7 +29,7 @@ # @hydra_runner(config_path="../prott5nv/conf", config_name="downstream_flip_sec_str") # ProtT5 def main(cfg) -> None: logging.info("\n\n************* Finetune config ****************") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + logging.info(f"\n{OmegaConf.to_yaml(cfg)}") if cfg.do_training: logging.info("************** Starting Training ***********") @@ -90,5 +87,5 @@ def main(cfg) -> None: preprocessor.prepare_all_datasets(output_dir=cfg.model.data.preprocessed_data_path) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app3/custom/downstream_flip.py b/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app3/custom/downstream_flip.py index 5e56d7f42c..0d44f14cac 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app3/custom/downstream_flip.py +++ b/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app3/custom/downstream_flip.py @@ -13,16 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from nemo.core.config import hydra_runner -from nemo.utils import logging -from omegaconf.omegaconf import OmegaConf - from bionemo.data import FLIPPreprocess from bionemo.data.metrics import accuracy, mse, per_token_accuracy from bionemo.model.protein.downstream import FineTuneProteinModel -from bionemo.model.utils import ( - setup_trainer, -) +from bionemo.model.utils import setup_trainer +from nemo.core.config import hydra_runner +from nemo.utils import logging +from omegaconf.omegaconf import OmegaConf # (0): import nvflare lightning api import nvflare.client.lightning as flare @@ -32,7 +29,7 @@ # @hydra_runner(config_path="../prott5nv/conf", config_name="downstream_flip_sec_str") # ProtT5 def main(cfg) -> None: logging.info("\n\n************* Finetune config ****************") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + logging.info(f"\n{OmegaConf.to_yaml(cfg)}") if cfg.do_training: logging.info("************** Starting Training ***********") @@ -90,5 +87,5 @@ def main(cfg) -> None: preprocessor.prepare_all_datasets(output_dir=cfg.model.data.preprocessed_data_path) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app4/custom/downstream_flip.py b/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app4/custom/downstream_flip.py index 5e56d7f42c..0d44f14cac 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app4/custom/downstream_flip.py +++ b/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app4/custom/downstream_flip.py @@ -13,16 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from nemo.core.config import hydra_runner -from nemo.utils import logging -from omegaconf.omegaconf import OmegaConf - from bionemo.data import FLIPPreprocess from bionemo.data.metrics import accuracy, mse, per_token_accuracy from bionemo.model.protein.downstream import FineTuneProteinModel -from bionemo.model.utils import ( - setup_trainer, -) +from bionemo.model.utils import setup_trainer +from nemo.core.config import hydra_runner +from nemo.utils import logging +from omegaconf.omegaconf import OmegaConf # (0): import nvflare lightning api import nvflare.client.lightning as flare @@ -32,7 +29,7 @@ # @hydra_runner(config_path="../prott5nv/conf", config_name="downstream_flip_sec_str") # ProtT5 def main(cfg) -> None: logging.info("\n\n************* Finetune config ****************") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + logging.info(f"\n{OmegaConf.to_yaml(cfg)}") if cfg.do_training: logging.info("************** Starting Training ***********") @@ -90,5 +87,5 @@ def main(cfg) -> None: preprocessor.prepare_all_datasets(output_dir=cfg.model.data.preprocessed_data_path) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/examples/advanced/bionemo/downstream/tap/prepare_tap_data.py b/examples/advanced/bionemo/downstream/tap/prepare_tap_data.py index 23f825310a..34189f08f0 100644 --- a/examples/advanced/bionemo/downstream/tap/prepare_tap_data.py +++ b/examples/advanced/bionemo/downstream/tap/prepare_tap_data.py @@ -13,10 +13,12 @@ # limitations under the License. import os -import pandas as pd + import numpy as np -from tdc.utils import retrieve_label_name_list +import pandas as pd from tdc.single_pred import Develop +from tdc.utils import retrieve_label_name_list + np.random.seed(1234) split_dir = "/tmp/data/tap" @@ -71,12 +73,12 @@ def break_chains(df): def main(): seed = 0 - label_list = retrieve_label_name_list('TAP') + label_list = retrieve_label_name_list("TAP") train_df = None test_df = None for label_name in label_list: - data = Develop(name='TAP', label_name=label_name) + data = Develop(name="TAP", label_name=label_name) split = data.get_split() train_split = pd.concat([split["train"], split["valid"]]) @@ -104,12 +106,14 @@ def main(): # normalize total_df[label_name] = (total_df[label_name] - _mean) / _std - train_df[label_name] = (train_df[label_name] - _mean)/_std - test_df[label_name] = (test_df[label_name] - _mean)/_std - print(f" ... normalize {label_name} from mean+-std {_mean:.3f}+-{_std:.3f} " - f"to train: {np.mean(train_df[label_name]):.3f}+-{np.std(train_df[label_name]):.3f}" - f"to test: {np.mean(test_df[label_name]):.3f}+-{np.std(test_df[label_name]):.3f}" - f"to total: {np.mean(total_df[label_name]):.3f}+-{np.std(total_df[label_name]):.3f}") + train_df[label_name] = (train_df[label_name] - _mean) / _std + test_df[label_name] = (test_df[label_name] - _mean) / _std + print( + f" ... normalize {label_name} from mean+-std {_mean:.3f}+-{_std:.3f} " + f"to train: {np.mean(train_df[label_name]):.3f}+-{np.std(train_df[label_name]):.3f}" + f"to test: {np.mean(test_df[label_name]):.3f}+-{np.std(test_df[label_name]):.3f}" + f"to total: {np.mean(total_df[label_name]):.3f}+-{np.std(total_df[label_name]):.3f}" + ) # split client train client_train_dfs = [] @@ -118,11 +122,11 @@ def main(): proportions = np.random.dirichlet(np.repeat(alpha, n_clients)) else: print("Uniform sampling") - proportions = n_clients * [1/n_clients] + proportions = n_clients * [1 / n_clients] for client_id in range(n_clients): client_name = f"site-{client_id+1}" - client_train_df = train_df.sample(frac=proportions[client_id], replace=False, random_state=seed+client_id) + client_train_df = train_df.sample(frac=proportions[client_id], replace=False, random_state=seed + client_id) if do_break_chains: client_train_df = break_chains(client_train_df) @@ -147,15 +151,15 @@ def main(): _split_dir = os.path.join(split_dir, "train") if not os.path.isdir(_split_dir): os.makedirs(_split_dir) - train_df.to_csv(os.path.join(_split_dir, f"tap_full_train.csv"), index=False) + train_df.to_csv(os.path.join(_split_dir, "tap_full_train.csv"), index=False) _split_dir = os.path.join(split_dir, "val") if not os.path.isdir(_split_dir): os.makedirs(_split_dir) - test_df.to_csv(os.path.join(_split_dir, f"tap_valid.csv"), index=False) + test_df.to_csv(os.path.join(_split_dir, "tap_valid.csv"), index=False) _split_dir = os.path.join(split_dir, "test") if not os.path.isdir(_split_dir): os.makedirs(_split_dir) - test_df.to_csv(os.path.join(_split_dir, f"tap_test.csv"), index=False) + test_df.to_csv(os.path.join(_split_dir, "tap_test.csv"), index=False) print(f"Saved {len(train_df)} training and {len(test_df)} testing proteins.") @@ -169,7 +173,7 @@ def main(): b = np.asarray(client_train_dfs[j]["Antibody_ID"]) assert len(np.unique(a)) == len(a) assert len(np.unique(b)) == len(b) - d[i][j] = len(np.intersect1d(a, b))/len(b) + d[i][j] = len(np.intersect1d(a, b)) / len(b) print(d) overlap = np.mean(d[~np.isnan(d)]) diff --git a/examples/advanced/bionemo/downstream/tap/run_sim_tap.py b/examples/advanced/bionemo/downstream/tap/run_sim_tap.py index 450d262dcf..54888d7385 100644 --- a/examples/advanced/bionemo/downstream/tap/run_sim_tap.py +++ b/examples/advanced/bionemo/downstream/tap/run_sim_tap.py @@ -13,6 +13,7 @@ # limitations under the License. from nvflare import SimulatorRunner + n_clients = 4 # Choose from one of the available jobs @@ -21,10 +22,7 @@ # job_name = "fedavg_tap_esm1nv" simulator = SimulatorRunner( - job_folder=f"jobs/{job_name}", - workspace=f"/tmp/nvflare/results/{job_name}", - n_clients=n_clients, - threads=n_clients + job_folder=f"jobs/{job_name}", workspace=f"/tmp/nvflare/results/{job_name}", n_clients=n_clients, threads=n_clients ) run_status = simulator.run() print("Simulator finished with run_status", run_status) diff --git a/examples/advanced/bionemo/task_fitting/jobs/embeddings/app/custom/bionemo_constants.py b/examples/advanced/bionemo/task_fitting/jobs/embeddings/app/custom/bionemo_constants.py index 6a0a01d4e2..2e7cd5caea 100644 --- a/examples/advanced/bionemo/task_fitting/jobs/embeddings/app/custom/bionemo_constants.py +++ b/examples/advanced/bionemo/task_fitting/jobs/embeddings/app/custom/bionemo_constants.py @@ -12,11 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. + class BioNeMoConstants(object): TASK_INFERENCE = "bionemo_inference" NUMBER_SEQUENCES = "bionemo_number_sequences" DATA_INFO = "bionemo_data_info" CONFIG = "bionemo_config" + class BioNeMoDataKind(object): - CONFIG = "bionemo_config" \ No newline at end of file + CONFIG = "bionemo_config" diff --git a/examples/advanced/bionemo/task_fitting/jobs/embeddings/app/custom/bionemo_inference.py b/examples/advanced/bionemo/task_fitting/jobs/embeddings/app/custom/bionemo_inference.py index 156911e6d4..52815a349f 100644 --- a/examples/advanced/bionemo/task_fitting/jobs/embeddings/app/custom/bionemo_inference.py +++ b/examples/advanced/bionemo/task_fitting/jobs/embeddings/app/custom/bionemo_inference.py @@ -14,10 +14,11 @@ from typing import List, Union +from bionemo_constants import BioNeMoConstants +from bionemo_inference_processor import BioNeMoInferenceProcessor + from nvflare.app_common.workflows.broadcast_and_process import BroadcastAndProcess -from bionemo_inference_processor import BioNeMoInferenceProcessor -from bionemo_constants import BioNeMoConstants class BioNeMoInference(BroadcastAndProcess): def __init__( diff --git a/examples/advanced/bionemo/task_fitting/jobs/embeddings/app/custom/bionemo_inference_processor.py b/examples/advanced/bionemo/task_fitting/jobs/embeddings/app/custom/bionemo_inference_processor.py index 78f23bd01c..79e182259d 100644 --- a/examples/advanced/bionemo/task_fitting/jobs/embeddings/app/custom/bionemo_inference_processor.py +++ b/examples/advanced/bionemo/task_fitting/jobs/embeddings/app/custom/bionemo_inference_processor.py @@ -15,24 +15,20 @@ import os import pprint +from bionemo_constants import BioNeMoConstants, BioNeMoDataKind from omegaconf import OmegaConf from nvflare.apis.client import Client -from nvflare.apis.dxo import DXO +from nvflare.apis.dxo import DXO, from_shareable from nvflare.apis.fl_constant import FLContextKey, ReturnCode from nvflare.apis.fl_context import FLContext from nvflare.apis.shareable import Shareable from nvflare.app_common.abstract.response_processor import ResponseProcessor -from bionemo_constants import BioNeMoConstants, BioNeMoDataKind -from nvflare.apis.dxo import DXO, DataKind, MetaKey, from_shareable - class BioNeMoInferenceProcessor(ResponseProcessor): def __init__( - self, - base_config_path: str = "config/base_infer_config.yaml", - infer_config_path: str = "config/infer.yaml" + self, base_config_path: str = "config/base_infer_config.yaml", infer_config_path: str = "config/infer.yaml" ): """Run BioNeMo model inference. @@ -62,13 +58,11 @@ def create_task_data(self, task_name: str, fl_ctx: FLContext) -> Shareable: self.infer_config_path = os.path.join(app_root, self.infer_config_path) base_config = OmegaConf.load(self.base_config_path) infer_config = OmegaConf.load(self.infer_config_path) - config = OmegaConf.merge(base_config, infer_config) + config = OmegaConf.merge(base_config, infer_config) self.log_info(fl_ctx, f"Load model configuration from {self.base_config_path} and {self.infer_config_path}") # TODO send nemo checkpoint - configs = { - BioNeMoConstants.CONFIG: OmegaConf.to_container(config) - } + configs = {BioNeMoConstants.CONFIG: OmegaConf.to_container(config)} # convert omega conf to primitive dict dxo = DXO(data=configs, data_kind=BioNeMoDataKind.CONFIG) diff --git a/examples/advanced/bionemo/task_fitting/jobs/embeddings/app/custom/bionemo_inferer.py b/examples/advanced/bionemo/task_fitting/jobs/embeddings/app/custom/bionemo_inferer.py index 6beca841ec..287c00fc5a 100644 --- a/examples/advanced/bionemo/task_fitting/jobs/embeddings/app/custom/bionemo_inferer.py +++ b/examples/advanced/bionemo/task_fitting/jobs/embeddings/app/custom/bionemo_inferer.py @@ -14,48 +14,34 @@ import os -import time +import pickle +import uuid -import numpy as np +import torch +from bionemo.data.mapped_dataset import FilteredMappedDataset +from bionemo.data.memmap_csv_fields_dataset import CSVFieldsMemmapDataset +from bionemo.data.memmap_fasta_fields_dataset import FASTAFieldsMemmapDataset +from bionemo.data.utils import expand_dataset_paths +from bionemo_constants import BioNeMoConstants +from nemo.utils.distributed import gather_objects +from nemo.utils.model_utils import import_class_by_path +from omegaconf.omegaconf import OmegaConf -from nvflare.apis.dxo import DXO, DataKind, MetaKey, from_shareable +from nvflare.apis.dxo import DXO, from_shareable from nvflare.apis.executor import Executor from nvflare.apis.fl_constant import FLContextKey, ReturnCode from nvflare.apis.fl_context import FLContext from nvflare.apis.shareable import Shareable, make_reply from nvflare.apis.signal import Signal -from nvflare.app_common.abstract.model import ModelLearnable -from nvflare.app_common.app_constant import AppConstants from nvflare.security.logging import secure_format_exception -from nvflare.apis.dxo import from_shareable -from nvflare.apis.fl_constant import ReturnCode -from nvflare.apis.fl_context import FLContext -from nvflare.apis.shareable import Shareable, make_reply -from nvflare.apis.signal import Signal -from nvflare.app_common.app_constant import AppConstants -from nvflare.app_common.executors.learner_executor import LearnerExecutor -import pickle -import torch -from omegaconf.omegaconf import OmegaConf -import os -import uuid - -from nemo.core.config import hydra_runner - -from bionemo.data.memmap_fasta_fields_dataset import FASTAFieldsMemmapDataset -from bionemo.data.memmap_csv_fields_dataset import CSVFieldsMemmapDataset -from bionemo.data.mapped_dataset import FilteredMappedDataset -from bionemo.data.utils import expand_dataset_paths -from nemo.utils.distributed import gather_objects -from nemo.utils.model_utils import import_class_by_path -from bionemo_constants import BioNeMoConstants class BioNeMoInferer(Executor): """Runs inference over all models. Supports extracting embeddings, and hiddens. - NOTE: If out of memory (OOM) error occurs, try splitting the data to multiple smaller files. + NOTE: If out of memory (OOM) error occurs, try splitting the data to multiple smaller files. """ + def __init__( self, inference_task_name=BioNeMoConstants.TASK_INFERENCE, @@ -68,7 +54,7 @@ def __init__( self._inference_task_name = inference_task_name self.cfg = None self.app_root = None - + def _set_config(self, config): if not isinstance(config, dict): raise ValueError(f"Expected config to be of type dict but received type {type(config)}") @@ -79,51 +65,56 @@ def _set_config(self, config): else: raise ValueError(f"Received config did not contain BioNeMo config! Received keys: {list(config.keys())}") - def _inference(self, shareable: Shareable, fl_ctx: FLContext, abort_signal: Signal): # First we extract DXO from the shareable. incoming_dxo = from_shareable(shareable) self._set_config(incoming_dxo.data) # Update loading paths with current app_root - self.cfg.model.downstream_task.restore_from_path = os.path.join(self.app_root, self.cfg.model.downstream_task.restore_from_path) + self.cfg.model.downstream_task.restore_from_path = os.path.join( + self.app_root, self.cfg.model.downstream_task.restore_from_path + ) self.cfg.model.tokenizer.vocab_path = os.path.join(self.app_root, self.cfg.model.tokenizer.vocab_path) self.cfg.model.tokenizer.model_path = os.path.join(self.app_root, self.cfg.model.tokenizer.model_path) self.log_info(fl_ctx, "\n\n************** Experiment configuration ***********") - self.log_info(fl_ctx, f'\n{OmegaConf.to_yaml(self.cfg)}') + self.log_info(fl_ctx, f"\n{OmegaConf.to_yaml(self.cfg)}") infer_class = import_class_by_path(self.cfg.infer_target) infer_model = infer_class(self.cfg) trainer = infer_model.trainer self.log_info(fl_ctx, "\n\n************** Restored model configuration ***********") - self.log_info(fl_ctx, f'\n{OmegaConf.to_yaml(infer_model.model.cfg)}') + self.log_info(fl_ctx, f"\n{OmegaConf.to_yaml(infer_model.model.cfg)}") # Update dataset_path to reflect client name client_name = fl_ctx.get_identity_name() self.cfg.model.data.dataset_path = os.path.join(self.cfg.model.data.dataset_path, f"data_{client_name}") # try to infer data_impl from the dataset_path file extension - if self.cfg.model.data.dataset_path.endswith('.fasta'): - self.cfg.model.data.data_impl = 'fasta_fields_mmap' + if self.cfg.model.data.dataset_path.endswith(".fasta"): + self.cfg.model.data.data_impl = "fasta_fields_mmap" else: # Data are assumed to be CSV format if no extension provided - self.log_info(fl_ctx, 'File extension not supplied for data, inferring csv.') - self.cfg.model.data.data_impl = 'csv_fields_mmap' + self.log_info(fl_ctx, "File extension not supplied for data, inferring csv.") + self.cfg.model.data.data_impl = "csv_fields_mmap" - self.log_info(fl_ctx, f'Inferred data_impl: {self.cfg.model.data.data_impl}') + self.log_info(fl_ctx, f"Inferred data_impl: {self.cfg.model.data.data_impl}") if self.cfg.model.data.data_impl == "csv_fields_mmap": dataset_paths = expand_dataset_paths(self.cfg.model.data.dataset_path, ext=".csv") self.log_info(fl_ctx, f"Loading data from {dataset_paths}") - ds = CSVFieldsMemmapDataset(dataset_paths, **self.cfg.model.data.data_impl_kwargs.get("csv_fields_mmap", {})) + ds = CSVFieldsMemmapDataset( + dataset_paths, **self.cfg.model.data.data_impl_kwargs.get("csv_fields_mmap", {}) + ) elif self.cfg.model.data.data_impl == "fasta_fields_mmap": dataset_paths = expand_dataset_paths(self.cfg.model.data.dataset_path, ext=".fasta") self.log_info(fl_ctx, f"Loading data from {dataset_paths}") - ds = FASTAFieldsMemmapDataset(dataset_paths, **self.cfg.model.data.data_impl_kwargs.get("fasta_fields_mmap", {})) + ds = FASTAFieldsMemmapDataset( + dataset_paths, **self.cfg.model.data.data_impl_kwargs.get("fasta_fields_mmap", {}) + ) else: - raise ValueError(f'Unknown data_impl: {self.cfg.model.data.data_impl}') + raise ValueError(f"Unknown data_impl: {self.cfg.model.data.data_impl}") # remove too long sequences filtered_ds = FilteredMappedDataset( @@ -166,12 +157,12 @@ def cast_to_numpy(x): if "hiddens" in self.cfg.model.downstream_task.outputs: if ("hiddens" in predictions[0]) and ("mask" in predictions[0]): for p in predictions: - p["hiddens"] = p['hiddens'][p['mask']] - del p['mask'] + p["hiddens"] = p["hiddens"][p["mask"]] + del p["mask"] else: for p in predictions: - del p['mask'] - del p['hiddens'] + del p["mask"] + del p["hiddens"] # collect all results when using DDP self.log_info(fl_ctx, "Collecting results from all GPUs...") diff --git a/examples/advanced/bionemo/task_fitting/jobs/fedavg/app/custom/bionemo_mlp_learner.py b/examples/advanced/bionemo/task_fitting/jobs/fedavg/app/custom/bionemo_mlp_learner.py index fdfd4580f3..1e42b1c559 100644 --- a/examples/advanced/bionemo/task_fitting/jobs/fedavg/app/custom/bionemo_mlp_learner.py +++ b/examples/advanced/bionemo/task_fitting/jobs/fedavg/app/custom/bionemo_mlp_learner.py @@ -13,24 +13,24 @@ # limitations under the License. import copy +import math import os -from typing import Union import pickle -import math from distutils.util import strtobool +from typing import Union import numpy as np import pandas as pd +import sklearn +from sklearn.metrics import accuracy_score +from sklearn.neural_network import MLPClassifier +from torch.utils.tensorboard import SummaryWriter + from nvflare.apis.fl_constant import FLMetaKey, ReturnCode from nvflare.app_common.abstract.fl_model import FLModel, ParamsType from nvflare.app_common.abstract.model_learner import ModelLearner from nvflare.app_common.app_constant import AppConstants, ModelName, ValidateType -from torch.utils.tensorboard import SummaryWriter from nvflare.app_common.utils.fl_model_utils import FLModelUtils -from nvflare.app_opt.pt.fedproxloss import PTFedProxLoss -from sklearn.neural_network import MLPClassifier -from sklearn.metrics import accuracy_score, confusion_matrix -import sklearn class BioNeMoMLPLearner(ModelLearner): # does not support CIFAR10ScaffoldLearner @@ -43,7 +43,7 @@ def __init__( analytic_sender_id: str = "analytic_sender", batch_size: int = 128, num_workers: int = 0, - warm_start: bool = True + warm_start: bool = True, ): """Simple CIFAR-10 Trainer. @@ -124,30 +124,43 @@ def initialize(self): labels = pd.read_csv(labels_filename).astype(str) # Prepare the data for training - for embedding in protein_embeddings: + for embedding in protein_embeddings: # get label entry from pandas dataframe - label = labels.loc[labels["id"]==str(embedding["id"])] - if label['SET'].item() == 'train': + label = labels.loc[labels["id"] == str(embedding["id"])] + if label["SET"].item() == "train": self.X_train.append(embedding["embeddings"]) - self.y_train.append(label['TARGET'].item()) - elif label['SET'].item() == 'test': + self.y_train.append(label["TARGET"].item()) + elif label["SET"].item() == "test": self.X_test.append(embedding["embeddings"]) - self.y_test.append(label['TARGET'].item()) + self.y_test.append(label["TARGET"].item()) assert len(self.X_train) > 0 assert len(self.X_test) > 0 self.info(f"There are {len(self.X_train)} training samples and {len(self.X_test)} testing samples.") - self.epoch_len = math.ceil(len(self.X_train)/self.batch_size) + self.epoch_len = math.ceil(len(self.X_train) / self.batch_size) - self.model = MLPClassifier(solver='adam', - hidden_layer_sizes=(512, 256, 128), - batch_size=self.batch_size, - learning_rate_init=self.lr, - verbose=True) + self.model = MLPClassifier( + solver="adam", + hidden_layer_sizes=(512, 256, 128), + batch_size=self.batch_size, + learning_rate_init=self.lr, + verbose=True, + ) # run a fit with random data to initialize the model - class_labels = ["Cell_membrane", "Cytoplasm", "Endoplasmic_reticulum", "Extracellular", "Golgi_apparatus", "Lysosome", "Mitochondrion", "Nucleus", "Peroxisome", "Plastid"] + class_labels = [ + "Cell_membrane", + "Cytoplasm", + "Endoplasmic_reticulum", + "Extracellular", + "Golgi_apparatus", + "Lysosome", + "Mitochondrion", + "Nucleus", + "Peroxisome", + "Plastid", + ] _X, _y = [], [] for label in class_labels: _X.append(np.random.rand(768)) @@ -276,9 +289,7 @@ def validate(self, model: FLModel) -> Union[str, FLModel]: self.warning("Simulating local validation only!") # get validation meta info - validate_type = FLModelUtils.get_meta_prop( - model, FLMetaKey.VALIDATE_TYPE, ValidateType.MODEL_VALIDATE - ) + validate_type = FLModelUtils.get_meta_prop(model, FLMetaKey.VALIDATE_TYPE, ValidateType.MODEL_VALIDATE) model_owner = self.get_shareable_header(AppConstants.MODEL_OWNER) # train score @@ -298,13 +309,15 @@ def validate(self, model: FLModel) -> Union[str, FLModel]: self.save_model(is_best=True) # write to tensorboard - if validate_type == ValidateType.BEFORE_TRAIN_VALIDATE: # TODO: also compute classification report during cross-site + if ( + validate_type == ValidateType.BEFORE_TRAIN_VALIDATE + ): # TODO: also compute classification report during cross-site self.writer.add_scalar("accuracy", accuracy, self.epoch_of_start_time) self.writer.add_scalar("train_accuracy", train_accuracy, self.epoch_of_start_time) - classifcation_report = sklearn.metrics.classification_report(self.y_test, predicted_testing_labels, - labels=list(set(predicted_testing_labels)), - output_dict=True) + classifcation_report = sklearn.metrics.classification_report( + self.y_test, predicted_testing_labels, labels=list(set(predicted_testing_labels)), output_dict=True + ) for category, metrics in classifcation_report.items(): if isinstance(metrics, dict): for k, v in metrics.items(): diff --git a/examples/advanced/bionemo/task_fitting/jobs/fedavg/app/custom/bionemo_mlp_model_persistor.py b/examples/advanced/bionemo/task_fitting/jobs/fedavg/app/custom/bionemo_mlp_model_persistor.py index b30cbc6932..c31cf74bc4 100644 --- a/examples/advanced/bionemo/task_fitting/jobs/fedavg/app/custom/bionemo_mlp_model_persistor.py +++ b/examples/advanced/bionemo/task_fitting/jobs/fedavg/app/custom/bionemo_mlp_model_persistor.py @@ -12,30 +12,25 @@ # See the License for the specific language governing permissions and # limitations under the License. -import json import os -import re -from collections import OrderedDict -from typing import Dict -import numpy as np import pickle +from typing import Dict +import numpy as np from sklearn.neural_network import MLPClassifier + from nvflare.apis.event_type import EventType from nvflare.apis.fl_constant import FLContextKey from nvflare.apis.fl_context import FLContext -from nvflare.app_common.abstract.model import ModelLearnable -from nvflare.app_common.abstract.model_persistor import ModelPersistor -from nvflare.app_common.app_constant import AppConstants, DefaultCheckpointFileName, EnvironmentKey -from nvflare.app_common.app_event_type import AppEventType -from nvflare.app_common.model_desc import ModelDescriptor -from nvflare.app_opt.pt.model_persistence_format_manager import PTModelPersistenceFormatManager from nvflare.app_common.abstract.model import ( ModelLearnable, ModelLearnableKey, make_model_learnable, validate_model_learnable, ) +from nvflare.app_common.app_constant import AppConstants, DefaultCheckpointFileName, EnvironmentKey +from nvflare.app_common.app_event_type import AppEventType +from nvflare.app_common.model_desc import ModelDescriptor from nvflare.app_opt.pt.file_model_persistor import PTFileModelPersistor @@ -61,7 +56,7 @@ def __init__( super().__init__( filter_id=filter_id, ) - self.model = MLPClassifier(solver='adam', hidden_layer_sizes=(512, 256, 128), random_state=10, max_iter=1) + self.model = MLPClassifier(solver="adam", hidden_layer_sizes=(512, 256, 128), random_state=10, max_iter=1) self.log_dir = None self.ckpt_preload_path = None self.ckpt_dir_env_key = EnvironmentKey.CHECKPOINT_DIR @@ -78,13 +73,27 @@ def __init__( def _initialize(self, fl_ctx: FLContext): # To initialize the model, fit on some random data - class_labels = ["Cell_membrane", "Cytoplasm", "Endoplasmic_reticulum", "Extracellular", "Golgi_apparatus", "Lysosome", "Mitochondrion", "Nucleus", "Peroxisome", "Plastid"] + class_labels = [ + "Cell_membrane", + "Cytoplasm", + "Endoplasmic_reticulum", + "Extracellular", + "Golgi_apparatus", + "Lysosome", + "Mitochondrion", + "Nucleus", + "Peroxisome", + "Plastid", + ] _X, _y = [], [] for label in class_labels: _X.append(np.random.rand(768)) _y.append(label) self.model.fit(_X, _y) - self.log_info(fl_ctx, f"MLPClassifier coefficients {[np.shape(x) for x in self.model.coefs_]}, intercepts {[np.shape(x) for x in self.model.intercepts_]}") + self.log_info( + fl_ctx, + f"MLPClassifier coefficients {[np.shape(x) for x in self.model.coefs_]}, intercepts {[np.shape(x) for x in self.model.intercepts_]}", + ) app_root = fl_ctx.get_prop(FLContextKey.APP_ROOT) log_dir = fl_ctx.get_prop(AppConstants.LOG_DIR) @@ -122,7 +131,7 @@ def load_model(self, fl_ctx: FLContext) -> ModelLearnable: self.learned_weights = weights - return make_model_learnable(weights, meta_props=self.default_train_conf ) + return make_model_learnable(weights, meta_props=self.default_train_conf) def handle_event(self, event: str, fl_ctx: FLContext): if event == EventType.START_RUN: diff --git a/examples/advanced/bionemo/task_fitting/split_data.py b/examples/advanced/bionemo/task_fitting/split_data.py index 2ce080f89b..687d673e77 100644 --- a/examples/advanced/bionemo/task_fitting/split_data.py +++ b/examples/advanced/bionemo/task_fitting/split_data.py @@ -15,10 +15,12 @@ import json import os from pprint import pprint + import numpy as np import pandas as pd from scipy.stats import dirichlet + def list_to_dataframe(data_list): data_dict = {} for p in data_list: @@ -56,9 +58,7 @@ def partition_data(train_labels, label_names, num_sites, alpha, seed): np.random.shuffle(idx_k) proportions = dirichlet.rvs(np.repeat(alpha, num_sites), random_state=seed) # Balance - proportions = np.array( - [p * (len(idx_j) < N / num_sites) for p, idx_j in zip(proportions, idx_batch)] - ) + proportions = np.array([p * (len(idx_j) < N / num_sites) for p, idx_j in zip(proportions, idx_batch)]) proportions = proportions / proportions.sum() proportions = (np.cumsum(proportions) * len(idx_k)).astype(int)[:-1] idx_batch = [idx_j + idx.tolist() for idx_j, idx in zip(idx_batch, np.split(idx_k, proportions))] @@ -74,6 +74,7 @@ def partition_data(train_labels, label_names, num_sites, alpha, seed): return site_idx, class_sum + def split(proteins, num_sites, split_dir=".", alpha=1.0, seed=0, concat=False): train_proteins = [] train_labels = [] @@ -86,7 +87,9 @@ def split(proteins, num_sites, split_dir=".", alpha=1.0, seed=0, concat=False): test_proteins.append(entry) assert len(train_labels) > 0 label_names = set(train_labels) - print(f"Partition protein dataset with {len(label_names)} classes into {num_sites} sites with Dirichlet sampling under alpha {alpha}") + print( + f"Partition protein dataset with {len(label_names)} classes into {num_sites} sites with Dirichlet sampling under alpha {alpha}" + ) site_idx, class_sum = partition_data(train_labels, label_names, num_sites, alpha, seed) pprint(class_sum) @@ -113,24 +116,39 @@ def split(proteins, num_sites, split_dir=".", alpha=1.0, seed=0, concat=False): if concat: split_df = pd.concat([df_split_train_proteins, df_test_proteins]) - split_df.to_csv(os.path.join(split_dir, f"data_{client_name}.csv"), index=False, columns=["id", "sequence", "TARGET", "SET"]) + split_df.to_csv( + os.path.join(split_dir, f"data_{client_name}.csv"), + index=False, + columns=["id", "sequence", "TARGET", "SET"], + ) else: _split_dir = os.path.join(split_dir, "train") if not os.path.isdir(_split_dir): os.makedirs(_split_dir) - df_split_train_proteins.to_csv(os.path.join(_split_dir, f"data_train_{client_name}.csv"), index=False, - columns=["id", "sequence", "TARGET", "SET"]) + df_split_train_proteins.to_csv( + os.path.join(_split_dir, f"data_train_{client_name}.csv"), + index=False, + columns=["id", "sequence", "TARGET", "SET"], + ) _split_dir = os.path.join(split_dir, "val") if not os.path.isdir(_split_dir): os.makedirs(_split_dir) - df_test_proteins.to_csv(os.path.join(_split_dir, f"data_val_{client_name}.csv"), index=False, - columns=["id", "sequence", "TARGET", "SET"]) + df_test_proteins.to_csv( + os.path.join(_split_dir, f"data_val_{client_name}.csv"), + index=False, + columns=["id", "sequence", "TARGET", "SET"], + ) # validation & test are the same here! _split_dir = os.path.join(split_dir, "test") if not os.path.isdir(_split_dir): os.makedirs(_split_dir) - df_test_proteins.to_csv(os.path.join(_split_dir, f"data_test_{client_name}.csv"), index=False, - columns=["id", "sequence", "TARGET", "SET"]) + df_test_proteins.to_csv( + os.path.join(_split_dir, f"data_test_{client_name}.csv"), + index=False, + columns=["id", "sequence", "TARGET", "SET"], + ) - print(f"Saved {len(df_split_train_proteins)} training and {len(test_proteins)} testing proteins for {client_name}, " - f"({len(set(df_split_train_proteins['TARGET']))}/{len(set(df_test_proteins['TARGET']))}) unique train/test classes.") + print( + f"Saved {len(df_split_train_proteins)} training and {len(test_proteins)} testing proteins for {client_name}, " + f"({len(set(df_split_train_proteins['TARGET']))}/{len(set(df_test_proteins['TARGET']))}) unique train/test classes." + )