From d85a351023f77310460c06c0051499e25839890e Mon Sep 17 00:00:00 2001 From: Joosep Pata Date: Fri, 27 Dec 2024 14:33:10 +0200 Subject: [PATCH] Compute and track detailed evaluation metrics on each epoch (#385) * save jet metrics on each epoch * format * fix ray --- mlpf/model/PFDataset.py | 7 +- mlpf/model/distributed_ray.py | 3 +- mlpf/model/inference.py | 12 +- mlpf/model/mlpf.py | 2 +- mlpf/model/training.py | 230 ++++++++++++--------- mlpf/model/utils.py | 6 +- mlpf/pipeline.py | 11 +- mlpf/plotting/plot_utils.py | 27 +++ parameters/pytorch/pyg-clic.yaml | 6 +- parameters/pytorch/pyg-cms-ttbar-nopu.yaml | 9 +- parameters/pytorch/pyg-cms.yaml | 6 +- 11 files changed, 195 insertions(+), 124 deletions(-) diff --git a/mlpf/model/PFDataset.py b/mlpf/model/PFDataset.py index d1934cee9..c0ce21527 100644 --- a/mlpf/model/PFDataset.py +++ b/mlpf/model/PFDataset.py @@ -82,13 +82,16 @@ def __getitem__(self, item): ret["X"][:, 1][msk_ho] = np.sqrt(e**2 - (np.tanh(eta) * e) ** 2) # transform pt -> log(pt / elem pt), same for energy - target_pt = np.log(ret["ytarget"][:, 2] / ret["X"][:, 1]) + # where target does not exist, set to 0 + with np.errstate(divide="ignore"): + target_pt = np.log(ret["ytarget"][:, 2] / ret["X"][:, 1]) target_pt[np.isnan(target_pt)] = 0 target_pt[np.isinf(target_pt)] = 0 ret["ytarget_pt_orig"] = ret["ytarget"][:, 2].copy() ret["ytarget"][:, 2] = target_pt - target_e = np.log(ret["ytarget"][:, 6] / ret["X"][:, 5]) + with np.errstate(divide="ignore"): + target_e = np.log(ret["ytarget"][:, 6] / ret["X"][:, 5]) target_e[ret["ytarget"][:, 0] == 0] = 0 target_e[np.isnan(target_e)] = 0 target_e[np.isinf(target_e)] = 0 diff --git a/mlpf/model/distributed_ray.py b/mlpf/model/distributed_ray.py index 56e86ba49..d6202e75c 100644 --- a/mlpf/model/distributed_ray.py +++ b/mlpf/model/distributed_ray.py @@ -252,7 +252,7 @@ def train_ray_trial(config, args, outdir=None): _logger.info(table) if (rank == 0) or (rank == "cpu"): - save_HPs(args, model, model_kwargs, outdir) # save model_kwargs and hyperparameters + save_HPs(config, model, model_kwargs, outdir) # save model_kwargs and hyperparameters _logger.info("Creating experiment dir {}".format(outdir)) _logger.info(f"Model directory {outdir}", color="bold") @@ -312,6 +312,7 @@ def train_ray_trial(config, args, outdir=None): config["num_epochs"], config["patience"], outdir, + config, trainable=config["model"]["trainable"], start_epoch=start_epoch, lr_schedule=lr_schedule, diff --git a/mlpf/model/inference.py b/mlpf/model/inference.py index 0d5d06d23..039769f7a 100644 --- a/mlpf/model/inference.py +++ b/mlpf/model/inference.py @@ -155,13 +155,15 @@ def run_predictions(world_size, rank, model, loader, sample, outpath, jetdef, je ti = time.time() for i, batch in iterator: predict_one_batch(conv_type, model, i, batch, rank, jetdef, jet_ptcut, jet_match_dr, outpath, dir_name, sample) + tf = time.time() + time_total_min = (tf - ti) / 60.0 - _logger.info(f"Time taken to make predictions on device {rank} is: {((time.time() - ti) / 60):.2f} min") + _logger.info(f"Time taken to make predictions on device {rank} is: {time_total_min:.2f} min") def make_plots(outpath, sample, dataset, dir_name="", ntest_files=-1): - """Uses the predictions stored as .parquet files (see above) to make plots.""" - + """Uses the predictions stored as .parquet files from run_predictions to make plots.""" + ret_dict = {} mplhep.style.use(mplhep.styles.CMS) class_names = get_class_names(sample) os.system(f"mkdir -p {outpath}/plots{dir_name}/{sample}") @@ -181,7 +183,7 @@ def make_plots(outpath, sample, dataset, dir_name="", ntest_files=-1): dataset=dataset, sample=sample, ) - plot_jet_ratio( + ret_dict["jet_ratio"] = plot_jet_ratio( yvals, cp_dir=plots_path, bins=np.linspace(0, 5, 500), @@ -230,3 +232,5 @@ def make_plots(outpath, sample, dataset, dir_name="", ntest_files=-1): plot_particles(yvals, cp_dir=plots_path, dataset=dataset, sample=sample) plot_particle_ratio(yvals, class_names, cp_dir=plots_path, dataset=dataset, sample=sample) plot_particle_response(X, yvals, class_names, cp_dir=plots_path, dataset=dataset, sample=sample) + + return ret_dict diff --git a/mlpf/model/mlpf.py b/mlpf/model/mlpf.py index 8722f9457..c02d039dd 100644 --- a/mlpf/model/mlpf.py +++ b/mlpf/model/mlpf.py @@ -98,7 +98,7 @@ def __init__( self.norm1 = torch.nn.LayerNorm(embedding_dim) self.seq = torch.nn.Sequential(nn.Linear(embedding_dim, width), self.act(), nn.Linear(width, embedding_dim), self.act()) self.dropout = torch.nn.Dropout(dropout_ff) - _logger.info("using attention_type={}".format(attention_type)) + _logger.info("layer {} using attention_type={}".format(self.name, attention_type)) # params for torch sdp_kernel if self.enable_ctx_manager: self.attn_params = { diff --git a/mlpf/model/training.py b/mlpf/model/training.py index 08de1b591..4c7c4f3ba 100644 --- a/mlpf/model/training.py +++ b/mlpf/model/training.py @@ -44,6 +44,8 @@ def configure_model_trainable(model: MLPF, trainable: Union[str, List[str]], is_training: bool): + """Set only the given layers as trainable in the model""" + if isinstance(model, torch.nn.parallel.DistributedDataParallel): raise Exception("configure trainability before distributing the model") if is_training: @@ -76,7 +78,7 @@ def train_step(batch, model, optimizer, lr_schedule, loss_fn): loss_fn: Loss function to use Returns: - dict: Dictionary containing all computed losses + dict: Dictionary containing all computed losses with gradient detached """ ypred_raw = model(batch.X, batch.mask) ypred = unpack_predictions(ypred_raw) @@ -320,6 +322,7 @@ def train_all_epochs( num_epochs, patience, outdir, + config, trainable="all", dtype=torch.float32, start_epoch=1, @@ -356,6 +359,15 @@ def train_all_epochs( save_attention: Whether to save attention weights checkpoint_dir: Directory to save checkpoints """ + + # run per-worker setup here so all processes / threads get configured. + # Ignore divide by 0 errors + np.seterr(divide="ignore", invalid="ignore") + # disable GUI + import matplotlib + + matplotlib.use("agg") + # Setup tensorboard writers if (rank == 0) or (rank == "cpu"): tensorboard_writer_train = SummaryWriter(f"{outdir}/runs/train") @@ -418,6 +430,21 @@ def train_all_epochs( # Handle checkpointing and early stopping on rank 0 if (rank == 0) or (rank == "cpu"): + + # evaluate the model at this epoch on test datasets, make plots, track metrics + testdir_name = f"_epoch_{epoch}" + for sample in config["test_dataset"]: + run_test(rank, world_size, config, outdir, model, sample, testdir_name, dtype) + plot_metrics = make_plots(outdir, sample, config["dataset"], testdir_name, config["ntest"]) + + # track the following jet metrics in tensorboard + for k in ["med", "iqr", "match_frac"]: + tensorboard_writer_valid.add_scalar( + "epoch/{}/jet_ratio/jet_ratio_target_to_pred_pt/{}".format(sample, k), + plot_metrics["jet_ratio"]["jet_ratio_target_to_pred_pt"][k], + epoch, + ) + # Log learning rate tensorboard_writer_train.add_scalar("epoch/learning_rate", lr_schedule.get_last_lr()[0], epoch) @@ -432,7 +459,7 @@ def train_all_epochs( else: stale_epochs += 1 - # Periodic checkpointing + # Periodic epoch checkpointing if checkpoint_freq and (epoch % checkpoint_freq == 0): checkpoint_path = f"{checkpoint_dir}/checkpoint-{epoch:02d}-{losses_valid['Total']:.6f}.pth" save_checkpoint(checkpoint_path, model, optimizer, extra_state) @@ -508,8 +535,7 @@ def train_all_epochs( # Synchronize processes if world_size > 1: dist.barrier() - - # Training completed + # End loop over epochs, training completed _logger.info(f"Training completed. Total time on device {rank}: {(time.time() - t0_initial)/60:.3f}min") # Clean up @@ -518,7 +544,83 @@ def train_all_epochs( tensorboard_writer_valid.close() -def run(rank, world_size, config, args, outdir, logfile): +def run_test(rank, world_size, config, outdir, model, sample, testdir_name, dtype): + batch_size = config["gpu_batch_multiplier"] + version = config["test_dataset"][sample]["version"] + + split_configs = config["test_dataset"][sample]["splits"] + _logger.info("split_configs={}".format(split_configs)) + + dataset = [] + + ntest = None + if not (config["ntest"] is None): + ntest = config["ntest"] // len(split_configs) + + for split_config in split_configs: + ds = PFDataset(config["data_dir"], f"{sample}/{split_config}:{version}", "test", num_samples=ntest).ds + dataset.append(ds) + ds = torch.utils.data.ConcatDataset(dataset) + + if (rank == 0) or (rank == "cpu"): + _logger.info(f"test_dataset: {sample}, {len(ds)}", color="blue") + + if world_size > 1: + sampler = torch.utils.data.distributed.DistributedSampler(ds) + else: + sampler = torch.utils.data.RandomSampler(ds) + + test_loader = torch.utils.data.DataLoader( + ds, + batch_size=batch_size, + collate_fn=Collater(["X", "ytarget", "ytarget_pt_orig", "ytarget_e_orig", "ycand", "genjets", "targetjets"], ["genmet"]), + sampler=sampler, + num_workers=config["num_workers"], + prefetch_factor=config["prefetch_factor"], + # pin_memory=use_cuda, + # pin_memory_device="cuda:{}".format(rank) if use_cuda else "", + ) + + if not osp.isdir(f"{outdir}/preds{testdir_name}/{sample}"): + if (rank == 0) or (rank == "cpu"): + os.system(f"mkdir -p {outdir}/preds{testdir_name}/{sample}") + + _logger.info(f"Running predictions on {sample}") + torch.cuda.empty_cache() + + # FIXME: import this from a central place + if config["dataset"] == "clic": + import fastjet + + jetdef = fastjet.JetDefinition(fastjet.ee_genkt_algorithm, 0.4, -1.0) + jet_ptcut = 5 + elif config["dataset"] == "cms": + import fastjet + + jetdef = fastjet.JetDefinition(fastjet.antikt_algorithm, 0.4) + jet_ptcut = 3 + else: + raise Exception("not implemented") + + device_type = "cuda" if isinstance(rank, int) else "cpu" + with torch.autocast(device_type=device_type, dtype=dtype, enabled=device_type == "cuda"): + run_predictions( + world_size, + rank, + model, + test_loader, + sample, + outdir, + jetdef, + jet_ptcut=jet_ptcut, + jet_match_dr=0.1, + dir_name=testdir_name, + ) + if world_size > 1: + dist.barrier() # block until all workers finished executing run_predictions() + + +def run(rank, world_size, config, outdir, logfile): if (rank == 0) or (rank == "cpu"): # keep writing the logs _configLogger("mlpf", filename=logfile) @@ -566,7 +668,7 @@ def run(rank, world_size, config, args, outdir, logfile): if len(missing_keys) > 0: _logger.warning(f"The following parameters are missing in the checkpoint file {missing_keys}", color="red") - if args.relaxed_load: + if config["relaxed_load"]: _logger.warning("Optimizer checkpoint will not be loaded", color="bold") strict = False else: @@ -612,13 +714,13 @@ def run(rank, world_size, config, args, outdir, logfile): _logger.info(f"Total parameters: {trainable_params + nontrainable_params}") _logger.info(table.to_string(index=False)) - if args.train: + if config["train"]: if (rank == 0) or (rank == "cpu"): - save_HPs(args, model, model_kwargs, outdir) # save model_kwargs and hyperparameters + save_HPs(config, model, model_kwargs, outdir) # save model_kwargs and hyperparameters _logger.info("Creating experiment dir {}".format(outdir)) _logger.info(f"Model directory {outdir}", color="bold") - if args.comet: + if config["comet"]: comet_experiment = create_comet_experiment(config["comet_name"], comet_offline=config["comet_offline"], outdir=outdir) comet_experiment.set_name(f"rank_{rank}_{Path(outdir).name}") comet_experiment.log_parameter("run_id", Path(outdir).name) @@ -662,6 +764,7 @@ def run(rank, world_size, config, args, outdir, logfile): config["num_epochs"], config["patience"], outdir, + config, trainable=config["model"]["trainable"], dtype=dtype, start_epoch=start_epoch, @@ -683,88 +786,15 @@ def run(rank, world_size, config, args, outdir, logfile): else: testdir_name = "_best_weights" - if args.test: - for sample in args.test_datasets: - batch_size = config["gpu_batch_multiplier"] - version = config["test_dataset"][sample]["version"] - - split_configs = config["test_dataset"][sample]["splits"] - print("split_configs", split_configs) - - dataset = [] - - ntest = None - if not (config["ntest"] is None): - ntest = config["ntest"] // len(split_configs) - - for split_config in split_configs: - ds = PFDataset(config["data_dir"], f"{sample}/{split_config}:{version}", "test", num_samples=ntest).ds - dataset.append(ds) - ds = torch.utils.data.ConcatDataset(dataset) - - if (rank == 0) or (rank == "cpu"): - _logger.info(f"test_dataset: {sample}, {len(ds)}", color="blue") - - if world_size > 1: - sampler = torch.utils.data.distributed.DistributedSampler(ds) - else: - sampler = torch.utils.data.RandomSampler(ds) - - test_loader = torch.utils.data.DataLoader( - ds, - batch_size=batch_size, - collate_fn=Collater(["X", "ytarget", "ytarget_pt_orig", "ytarget_e_orig", "ycand", "genjets", "targetjets"], ["genmet"]), - sampler=sampler, - num_workers=config["num_workers"], - prefetch_factor=config["prefetch_factor"], - # pin_memory=use_cuda, - # pin_memory_device="cuda:{}".format(rank) if use_cuda else "", - ) - - if not osp.isdir(f"{outdir}/preds{testdir_name}/{sample}"): - if (rank == 0) or (rank == "cpu"): - os.system(f"mkdir -p {outdir}/preds{testdir_name}/{sample}") - - _logger.info(f"Running predictions on {sample}") - torch.cuda.empty_cache() - - # FIXME: import this from a central place - if config["dataset"] == "clic": - import fastjet - - jetdef = fastjet.JetDefinition(fastjet.ee_genkt_algorithm, 0.4, -1.0) - jet_ptcut = 5 - elif config["dataset"] == "cms": - import fastjet - - jetdef = fastjet.JetDefinition(fastjet.antikt_algorithm, 0.4) - jet_ptcut = 3 - else: - raise Exception("not implemented") - - device_type = "cuda" if isinstance(rank, int) else "cpu" - with torch.autocast(device_type=device_type, dtype=dtype, enabled=device_type == "cuda"): - run_predictions( - world_size, - rank, - model, - test_loader, - sample, - outdir, - jetdef, - jet_ptcut=jet_ptcut, - jet_match_dr=0.1, - dir_name=testdir_name, - ) - if world_size > 1: - dist.barrier() # block until all workers finished executing run_predictions() - - if (rank == 0) or (rank == "cpu"): # make plots only on a single machine - if args.make_plots: + if config["test"]: + for sample in config["test_dataset"]: + run_test(rank, world_size, config, outdir, model, sample, testdir_name, dtype) + # make plots only on a single machine + if (rank == 0) or (rank == "cpu"): + if config["make_plots"]: ntest_files = -1 - # ntest_files = 1000 - for sample in args.test_datasets: + for sample in config["test_dataset"]: _logger.info(f"Plotting distributions for {sample}") make_plots(outdir, sample, config["dataset"], testdir_name, ntest_files) @@ -772,11 +802,12 @@ def run(rank, world_size, config, args, outdir, logfile): dist.destroy_process_group() -def override_config(config, args): - """override config with values from argparse Namespace""" +def override_config(config: dict, args): + """override config dictionary with values from argparse Namespace""" for arg in vars(args): arg_value = getattr(args, arg) - if arg_value is not None: + if (arg_value is not None) and (arg in config): + _logger.info("overriding config item {}={} with {} from cmdline".format(arg, config[arg], arg_value)) config[arg] = arg_value if not (args.attention_type is None): @@ -786,14 +817,15 @@ def override_config(config, args): for model in ["gnn_lsh", "attention", "attention", "mamba"]: config["model"][model]["num_convs"] = args.num_convs - if len(args.test_datasets) == 0: - args.test_datasets = config["test_dataset"] + if len(args.test_datasets) != 0: + config["test_dataset"] = args.test_datasets return config -def device_agnostic_run(config, args, world_size, outdir): - if args.train: +# Run either on CPU, single GPU or multi-GPU using pytorch +def device_agnostic_run(config, world_size, outdir): + if config["train"]: logfile = f"{outdir}/train.log" else: logfile = f"{outdir}/test.log" @@ -812,16 +844,16 @@ def device_agnostic_run(config, args, world_size, outdir): mp.spawn( run, - args=(world_size, config, args, outdir, logfile), + args=(world_size, config, outdir, logfile), nprocs=world_size, join=True, ) elif world_size == 1: rank = 0 _logger.info(f"Will use single-gpu: {torch.cuda.get_device_name(rank)}", color="purple") - run(rank, world_size, config, args, outdir, logfile) + run(rank, world_size, config, outdir, logfile) else: rank = "cpu" _logger.info("Will use cpu", color="purple") - run(rank, world_size, config, args, outdir, logfile) + run(rank, world_size, config, outdir, logfile) diff --git a/mlpf/model/utils.py b/mlpf/model/utils.py index 3e7f3d8de..cc0567458 100644 --- a/mlpf/model/utils.py +++ b/mlpf/model/utils.py @@ -215,7 +215,7 @@ def unpack_predictions(preds): return ret -def save_HPs(args, mlpf, model_kwargs, outdir): +def save_HPs(config, mlpf, model_kwargs, outdir): """Simple function to store the model parameters and training hyperparameters.""" with open(f"{outdir}/model_kwargs.pkl", "wb") as f: # dump model architecture @@ -224,7 +224,9 @@ def save_HPs(args, mlpf, model_kwargs, outdir): num_mlpf_parameters = sum(p.numel() for p in mlpf.parameters() if p.requires_grad) with open(f"{outdir}/hyperparameters.json", "w") as fp: # dump hyperparameters - json.dump({**{"Num of mlpf parameters": num_mlpf_parameters}, **vars(args)}, fp) + outdict = {"num_mlpf_params": num_mlpf_parameters} + outdict.update(config) + json.dump(outdict, fp) def get_model_state_dict(model): diff --git a/mlpf/pipeline.py b/mlpf/pipeline.py index 08a02b714..0f3d2117d 100644 --- a/mlpf/pipeline.py +++ b/mlpf/pipeline.py @@ -1,6 +1,5 @@ """ -Developing a PyTorch Geometric supervised training of MLPF using DistributedDataParallel. - +PyTorch supervised training of MLPF using DistributedDataParallel or Ray Train. Authors: Farouk Mokhtar, Joosep Pata, Eric Wulff """ @@ -8,8 +7,6 @@ import logging import os from pathlib import Path -import matplotlib -import numpy as np # comet needs to be imported before torch from comet_ml import OfflineExperiment, Experiment # noqa: F401, isort:skip @@ -110,10 +107,6 @@ def get_outdir(resume_training, load): def main(): - # Ignore divide by 0 errors - np.seterr(divide="ignore", invalid="ignore") - matplotlib.use("agg") - # https://github.com/pytorch/pytorch/issues/11201#issuecomment-895047235 import torch @@ -179,7 +172,7 @@ def main(): if args.ray_train: run_ray_training(config, args, outdir) else: - device_agnostic_run(config, args, world_size, outdir) + device_agnostic_run(config, world_size, outdir) if __name__ == "__main__": diff --git a/mlpf/plotting/plot_utils.py b/mlpf/plotting/plot_utils.py index 188eede05..8a79cf5e3 100644 --- a/mlpf/plotting/plot_utils.py +++ b/mlpf/plotting/plot_utils.py @@ -617,7 +617,13 @@ def plot_jet_ratio( if bins is None: bins = np.linspace(0, 5, 500) + ret_dict = {} p = med_iqr(yvals["jet_ratio_gen_to_target_pt"]) + ret_dict["jet_ratio_gen_to_target_pt"] = { + "med": p[0], + "iqr": p[1], + "match_frac": awkward.count(yvals["jet_ratio_gen_to_target_pt"]) / awkward.count(yvals["jets_gen_pt"]), + } plt.hist( yvals["jet_ratio_gen_to_target_pt"], bins=bins, @@ -627,6 +633,11 @@ def plot_jet_ratio( ) p = med_iqr(yvals["jet_ratio_gen_to_cand_pt"]) + ret_dict["jet_ratio_gen_to_cand_pt"] = { + "med": p[0], + "iqr": p[1], + "match_frac": awkward.count(yvals["jet_ratio_gen_to_cand_pt"]) / awkward.count(yvals["jets_gen_pt"]), + } plt.hist( yvals["jet_ratio_gen_to_cand_pt"], bins=bins, @@ -636,6 +647,11 @@ def plot_jet_ratio( ) p = med_iqr(yvals["jet_ratio_gen_to_pred_pt"]) + ret_dict["jet_ratio_gen_to_pred_pt"] = { + "med": p[0], + "iqr": p[1], + "match_frac": awkward.count(yvals["jet_ratio_gen_to_pred_pt"]) / awkward.count(yvals["jets_gen_pt"]), + } plt.hist( yvals["jet_ratio_gen_to_pred_pt"], bins=bins, @@ -671,6 +687,11 @@ def plot_jet_ratio( ax = plt.axes() p = med_iqr(yvals["jet_ratio_target_to_cand_pt"]) + ret_dict["jet_ratio_target_to_cand_pt"] = { + "med": p[0], + "iqr": p[1], + "match_frac": awkward.count(yvals["jet_ratio_target_to_cand_pt"]) / awkward.count(yvals["jets_target_pt"]), + } plt.plot([], []) plt.hist( yvals["jet_ratio_target_to_cand_pt"], @@ -680,6 +701,11 @@ def plot_jet_ratio( label="PF $({:.2f}\pm{:.2f})$".format(p[0], p[1]), ) p = med_iqr(yvals["jet_ratio_target_to_pred_pt"]) + ret_dict["jet_ratio_target_to_pred_pt"] = { + "med": p[0], + "iqr": p[1], + "match_frac": awkward.count(yvals["jet_ratio_target_to_pred_pt"]) / awkward.count(yvals["jets_target_pt"]), + } plt.hist( yvals["jet_ratio_target_to_pred_pt"], bins=bins, @@ -698,6 +724,7 @@ def plot_jet_ratio( cp_dir=cp_dir, comet_experiment=comet_experiment, ) + return ret_dict def plot_met(met_ratio, epoch=None, cp_dir=None, comet_experiment=None, title=None, sample=None, dataset=None): diff --git a/parameters/pytorch/pyg-clic.yaml b/parameters/pytorch/pyg-clic.yaml index 0ebc7ab62..a8416ecf3 100644 --- a/parameters/pytorch/pyg-clic.yaml +++ b/parameters/pytorch/pyg-clic.yaml @@ -1,5 +1,7 @@ -backend: pytorch - +train: yes +test: yes +make_plots: yes +comet: yes save_attention: yes dataset: clic sort_data: no diff --git a/parameters/pytorch/pyg-cms-ttbar-nopu.yaml b/parameters/pytorch/pyg-cms-ttbar-nopu.yaml index 030ffad66..029281d67 100644 --- a/parameters/pytorch/pyg-cms-ttbar-nopu.yaml +++ b/parameters/pytorch/pyg-cms-ttbar-nopu.yaml @@ -1,5 +1,7 @@ -backend: pytorch - +train: yes +test: yes +make_plots: yes +comet: no save_attention: no dataset: cms sort_data: yes @@ -126,3 +128,6 @@ test_dataset: cms_pf_ttbar_nopu: version: 2.5.0 splits: [1] + cms_pf_qcd_nopu: + version: 2.5.0 + splits: [1] diff --git a/parameters/pytorch/pyg-cms.yaml b/parameters/pytorch/pyg-cms.yaml index 30b3931a3..9b5e2cd9e 100644 --- a/parameters/pytorch/pyg-cms.yaml +++ b/parameters/pytorch/pyg-cms.yaml @@ -1,5 +1,7 @@ -backend: pytorch - +train: yes +test: yes +make_plots: yes +comet: yes save_attention: no dataset: cms sort_data: yes