From 30bee605264c37931cabfc092551fbd704710b18 Mon Sep 17 00:00:00 2001 From: mpielies Date: Wed, 7 Aug 2024 17:30:04 +0200 Subject: [PATCH] :zap: :fire: Correct duplicated scaling (scale only once) Correct kld_warmup in training_loop.py: - kld_w as a fraction of beta, no dependence on num_latent --- src/move/tasks/encode_data.py | 6 ------ src/move/training/training_loop.py | 6 ++---- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/src/move/tasks/encode_data.py b/src/move/tasks/encode_data.py index f0a83633..2d5076d0 100644 --- a/src/move/tasks/encode_data.py +++ b/src/move/tasks/encode_data.py @@ -59,12 +59,6 @@ def encode_data(config: DataConfig): ) fig.savefig(fig_path) - values, mask_1d = preprocessing.scale(values) - names = names[mask_1d] - logger.debug(f"Columns with zero variance: {np.sum(~mask_1d)}") - io.dump_names(interim_data_path / f"{dataset_name}.txt", names) - np.save(interim_data_path / f"{dataset_name}.npy", values) - # Plotting the value distribution for all continuous datasets: fig = plot_value_distributions(values) fig_path = str(output_path / f"Value_distribution_{dataset_name}.png") diff --git a/src/move/training/training_loop.py b/src/move/training/training_loop.py index 2405d676..64d6af11 100644 --- a/src/move/training/training_loop.py +++ b/src/move/training/training_loop.py @@ -72,13 +72,11 @@ def training_loop( counter = 0 kld_weight = 0.0 - kld_rate = 20 / len(kld_warmup_steps) - kld_multiplier = 1 + kld_rate + for epoch in range(1, num_epochs + 1): if epoch in kld_warmup_steps: - kld_weight = 0.05 * kld_multiplier - kld_multiplier += kld_rate + kld_weight += 1 / len(kld_warmup_steps) if epoch in batch_dilation_steps: train_dataloader = dilate_batch(train_dataloader)