Skip to content

Commit

Permalink
formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
holgerroth committed Feb 23, 2024
1 parent 3b69a91 commit f7161f4
Show file tree
Hide file tree
Showing 33 changed files with 322 additions and 358 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,29 +13,26 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from nemo.core.config import hydra_runner
from nemo.utils import logging
from omegaconf.omegaconf import OmegaConf

from bionemo.data import FLIPPreprocess
from bionemo.data.metrics import accuracy, mse, per_token_accuracy
from bionemo.model.protein.downstream import FineTuneProteinModel
from bionemo.model.utils import (
setup_trainer,
)
from bionemo.model.utils import setup_trainer
from nemo.core.config import hydra_runner
from nemo.utils import logging
from omegaconf.omegaconf import OmegaConf

# (0): import nvflare lightning api
import nvflare.client.lightning as flare
from nvflare.client.api import init

micro_batch_size = 32
val_check_intervals = {
"site-1": int(416/micro_batch_size),
"site-2": int(238/micro_batch_size),
"site-3": int(282/micro_batch_size),
"site-4": int(472/micro_batch_size),
"site-5": int(361/micro_batch_size),
"site-6": int(157/micro_batch_size)
"site-1": int(416 / micro_batch_size),
"site-2": int(238 / micro_batch_size),
"site-3": int(282 / micro_batch_size),
"site-4": int(472 / micro_batch_size),
"site-5": int(361 / micro_batch_size),
"site-6": int(157 / micro_batch_size),
}


Expand All @@ -53,7 +50,7 @@ def main(cfg) -> None:
print(f"Running client {site_name} with train data: {cfg.model.data.dataset.train}")

logging.info("\n\n************* Finetune config ****************")
logging.info(f'\n{OmegaConf.to_yaml(cfg)}')
logging.info(f"\n{OmegaConf.to_yaml(cfg)}")

if cfg.do_training:
logging.info("************** Starting Training ***********")
Expand Down Expand Up @@ -111,5 +108,5 @@ def main(cfg) -> None:
preprocessor.prepare_all_datasets(output_dir=cfg.model.data.preprocessed_data_path)


if __name__ == '__main__':
if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from nemo.core.config import hydra_runner
from nemo.utils import logging
from omegaconf.omegaconf import OmegaConf

from bionemo.data import FLIPPreprocess
from bionemo.data.metrics import accuracy, mse, per_token_accuracy
from bionemo.model.protein.downstream import FineTuneProteinModel
from bionemo.model.utils import (
setup_trainer,
)
from bionemo.model.utils import setup_trainer
from nemo.core.config import hydra_runner
from nemo.utils import logging
from omegaconf.omegaconf import OmegaConf

# (0): import nvflare lightning api
import nvflare.client.lightning as flare
Expand Down Expand Up @@ -52,12 +49,12 @@

# alpha 1.0
val_check_intervals = {
"site-1": int(80/micro_batch_size),
"site-2": int(365/micro_batch_size),
"site-3": int(216/micro_batch_size),
"site-4": int(578/micro_batch_size),
"site-5": int(568/micro_batch_size),
"site-6": int(119/micro_batch_size)
"site-1": int(80 / micro_batch_size),
"site-2": int(365 / micro_batch_size),
"site-3": int(216 / micro_batch_size),
"site-4": int(578 / micro_batch_size),
"site-5": int(568 / micro_batch_size),
"site-6": int(119 / micro_batch_size),
}


Expand All @@ -75,7 +72,7 @@ def main(cfg) -> None:
print(f"Running client {site_name} with train data: {cfg.model.data.dataset.train}")

logging.info("\n\n************* Finetune config ****************")
logging.info(f'\n{OmegaConf.to_yaml(cfg)}')
logging.info(f"\n{OmegaConf.to_yaml(cfg)}")

if cfg.do_training:
logging.info("************** Starting Training ***********")
Expand Down Expand Up @@ -133,5 +130,5 @@ def main(cfg) -> None:
preprocessor.prepare_all_datasets(output_dir=cfg.model.data.preprocessed_data_path)


if __name__ == '__main__':
if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from nemo.core.config import hydra_runner
from nemo.utils import logging
from omegaconf.omegaconf import OmegaConf

from bionemo.data import FLIPPreprocess
from bionemo.data.metrics import accuracy, mse, per_token_accuracy
from bionemo.model.protein.downstream import FineTuneProteinModel
from bionemo.model.utils import (
setup_trainer,
)
from bionemo.model.utils import setup_trainer
from nemo.core.config import hydra_runner
from nemo.utils import logging
from omegaconf.omegaconf import OmegaConf

# (0): import nvflare lightning api
import nvflare.client.lightning as flare
Expand All @@ -32,12 +29,12 @@

# alpha 100.0
val_check_intervals = {
"site-1": int(351/micro_batch_size),
"site-2": int(297/micro_batch_size),
"site-3": int(312/micro_batch_size),
"site-4": int(366/micro_batch_size),
"site-5": int(336/micro_batch_size),
"site-6": int(265/micro_batch_size)
"site-1": int(351 / micro_batch_size),
"site-2": int(297 / micro_batch_size),
"site-3": int(312 / micro_batch_size),
"site-4": int(366 / micro_batch_size),
"site-5": int(336 / micro_batch_size),
"site-6": int(265 / micro_batch_size),
}

# alpha 10.0
Expand Down Expand Up @@ -75,7 +72,7 @@ def main(cfg) -> None:
print(f"Running client {site_name} with train data: {cfg.model.data.dataset.train}")

logging.info("\n\n************* Finetune config ****************")
logging.info(f'\n{OmegaConf.to_yaml(cfg)}')
logging.info(f"\n{OmegaConf.to_yaml(cfg)}")

if cfg.do_training:
logging.info("************** Starting Training ***********")
Expand Down Expand Up @@ -133,5 +130,5 @@ def main(cfg) -> None:
preprocessor.prepare_all_datasets(output_dir=cfg.model.data.preprocessed_data_path)


if __name__ == '__main__':
if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@
# limitations under the License.

import os
import pandas as pd

import numpy as np
from tdc.utils import retrieve_label_name_list
import pandas as pd
from tdc.single_pred import Develop

np.random.seed(1234)

out_name = "sabdab_chen"
Expand Down Expand Up @@ -73,7 +74,7 @@ def break_chains(df):
def main():
seed = 0

data = Develop(name='SAbDab_Chen', path="/tmp/data")
data = Develop(name="SAbDab_Chen", path="/tmp/data")
split = data.get_split()

train_df = pd.concat([split["train"], split["valid"]])
Expand All @@ -86,11 +87,11 @@ def main():
proportions = np.random.dirichlet(np.repeat(alpha, n_clients))
else:
print("Uniform sampling")
proportions = n_clients * [1/n_clients]
proportions = n_clients * [1 / n_clients]

for client_id in range(n_clients):
client_name = f"site-{client_id+1}"
client_train_df = train_df.sample(frac=proportions[client_id], replace=False, random_state=seed+client_id)
client_train_df = train_df.sample(frac=proportions[client_id], replace=False, random_state=seed + client_id)

if do_break_chains:
client_train_df = break_chains(client_train_df)
Expand Down Expand Up @@ -128,8 +129,8 @@ def main():
print(f"Saved {len(train_df)} training and {len(test_df)} testing proteins.")

for _set, _df in zip(["TRAIN", "TEST"], [train_df, test_df]):
n_pos = np.sum(_df['Y'] == 0)
n_neg = np.sum(_df['Y'] == 1)
n_pos = np.sum(_df["Y"] == 0)
n_neg = np.sum(_df["Y"] == 1)
n = len(_df)
print(f" {_set} Pos/Neg ratio: neg={n_neg}, pos={n_pos}: {n_pos/n_neg:0.3f}")
print(f" {_set} Trivial accuracy: {n_pos/n:0.3f}")
Expand All @@ -144,7 +145,7 @@ def main():
b = np.asarray(client_train_dfs[j]["Antibody_ID"])
assert len(np.unique(a)) == len(a)
assert len(np.unique(b)) == len(b)
d[i][j] = len(np.intersect1d(a, b))/len(b)
d[i][j] = len(np.intersect1d(a, b)) / len(b)

print(d)
overlap = np.mean(d[~np.isnan(d)])
Expand Down
6 changes: 2 additions & 4 deletions examples/advanced/bionemo/downstream/sabdab/run_sim_sabdab.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

from nvflare import SimulatorRunner

n_clients = 6

# Choose from one of the available jobs
Expand All @@ -21,10 +22,7 @@
# job_name = "fedavg_sabdab_esm1nv"

simulator = SimulatorRunner(
job_folder=f"jobs/{job_name}",
workspace=f"/tmp/nvflare/results/{job_name}",
n_clients=n_clients,
threads=n_clients
job_folder=f"jobs/{job_name}", workspace=f"/tmp/nvflare/results/{job_name}", n_clients=n_clients, threads=n_clients
)
run_status = simulator.run()
print("Simulator finished with run_status", run_status)
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from nemo.core.config import hydra_runner
from nemo.utils import logging
from omegaconf.omegaconf import OmegaConf

from bionemo.data import FLIPPreprocess
from bionemo.data.metrics import accuracy, mse, per_token_accuracy
from bionemo.model.protein.downstream import FineTuneProteinModel
from bionemo.model.utils import (
setup_trainer,
)
from bionemo.model.utils import setup_trainer
from nemo.core.config import hydra_runner
from nemo.utils import logging
from omegaconf.omegaconf import OmegaConf

# (0): import nvflare lightning api
import nvflare.client.lightning as flare
Expand All @@ -32,7 +29,7 @@
# @hydra_runner(config_path="../prott5nv/conf", config_name="downstream_flip_sec_str") # ProtT5
def main(cfg) -> None:
logging.info("\n\n************* Finetune config ****************")
logging.info(f'\n{OmegaConf.to_yaml(cfg)}')
logging.info(f"\n{OmegaConf.to_yaml(cfg)}")

if cfg.do_training:
logging.info("************** Starting Training ***********")
Expand Down Expand Up @@ -90,5 +87,5 @@ def main(cfg) -> None:
preprocessor.prepare_all_datasets(output_dir=cfg.model.data.preprocessed_data_path)


if __name__ == '__main__':
if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from nemo.core.config import hydra_runner
from nemo.utils import logging
from omegaconf.omegaconf import OmegaConf

from bionemo.data import FLIPPreprocess
from bionemo.data.metrics import accuracy, mse, per_token_accuracy
from bionemo.model.protein.downstream import FineTuneProteinModel
from bionemo.model.utils import (
setup_trainer,
)
from bionemo.model.utils import setup_trainer
from nemo.core.config import hydra_runner
from nemo.utils import logging
from omegaconf.omegaconf import OmegaConf

# (0): import nvflare lightning api
import nvflare.client.lightning as flare
Expand All @@ -32,7 +29,7 @@
# @hydra_runner(config_path="../prott5nv/conf", config_name="downstream_flip_sec_str") # ProtT5
def main(cfg) -> None:
logging.info("\n\n************* Finetune config ****************")
logging.info(f'\n{OmegaConf.to_yaml(cfg)}')
logging.info(f"\n{OmegaConf.to_yaml(cfg)}")

if cfg.do_training:
logging.info("************** Starting Training ***********")
Expand Down Expand Up @@ -90,5 +87,5 @@ def main(cfg) -> None:
preprocessor.prepare_all_datasets(output_dir=cfg.model.data.preprocessed_data_path)


if __name__ == '__main__':
if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from nemo.core.config import hydra_runner
from nemo.utils import logging
from omegaconf.omegaconf import OmegaConf

from bionemo.data import FLIPPreprocess
from bionemo.data.metrics import accuracy, mse, per_token_accuracy
from bionemo.model.protein.downstream import FineTuneProteinModel
from bionemo.model.utils import (
setup_trainer,
)
from bionemo.model.utils import setup_trainer
from nemo.core.config import hydra_runner
from nemo.utils import logging
from omegaconf.omegaconf import OmegaConf

# (0): import nvflare lightning api
import nvflare.client.lightning as flare
Expand All @@ -32,7 +29,7 @@
# @hydra_runner(config_path="../prott5nv/conf", config_name="downstream_flip_sec_str") # ProtT5
def main(cfg) -> None:
logging.info("\n\n************* Finetune config ****************")
logging.info(f'\n{OmegaConf.to_yaml(cfg)}')
logging.info(f"\n{OmegaConf.to_yaml(cfg)}")

if cfg.do_training:
logging.info("************** Starting Training ***********")
Expand Down Expand Up @@ -90,5 +87,5 @@ def main(cfg) -> None:
preprocessor.prepare_all_datasets(output_dir=cfg.model.data.preprocessed_data_path)


if __name__ == '__main__':
if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from nemo.core.config import hydra_runner
from nemo.utils import logging
from omegaconf.omegaconf import OmegaConf

from bionemo.data import FLIPPreprocess
from bionemo.data.metrics import accuracy, mse, per_token_accuracy
from bionemo.model.protein.downstream import FineTuneProteinModel
from bionemo.model.utils import (
setup_trainer,
)
from bionemo.model.utils import setup_trainer
from nemo.core.config import hydra_runner
from nemo.utils import logging
from omegaconf.omegaconf import OmegaConf

# (0): import nvflare lightning api
import nvflare.client.lightning as flare
Expand All @@ -32,7 +29,7 @@
# @hydra_runner(config_path="../prott5nv/conf", config_name="downstream_flip_sec_str") # ProtT5
def main(cfg) -> None:
logging.info("\n\n************* Finetune config ****************")
logging.info(f'\n{OmegaConf.to_yaml(cfg)}')
logging.info(f"\n{OmegaConf.to_yaml(cfg)}")

if cfg.do_training:
logging.info("************** Starting Training ***********")
Expand Down Expand Up @@ -90,5 +87,5 @@ def main(cfg) -> None:
preprocessor.prepare_all_datasets(output_dir=cfg.model.data.preprocessed_data_path)


if __name__ == '__main__':
if __name__ == "__main__":
main()
Loading

0 comments on commit f7161f4

Please sign in to comment.