diff --git a/src/move/tasks/encode_data.py b/src/move/tasks/encode_data.py index 3ce3cb5c..d90f76f1 100644 --- a/src/move/tasks/encode_data.py +++ b/src/move/tasks/encode_data.py @@ -55,18 +55,21 @@ def encode_data(config: DataConfig): # before preprocessing: fig = plot_value_distributions(values) fig_path = str( - output_path / "Value_distribution_{}_unprocessed.png".format(dataset_name) + output_path / f"Value_distribution_{dataset_name}_unprocessed.png" ) fig.savefig(fig_path) - # Plotting the value distribution for all continuous datasets: - fig = plot_value_distributions(values) - fig_path = str(output_path / f"Value_distribution_{dataset_name}.png") - fig.savefig(fig_path) - if scale: + logger.debug( + f"Scaling dataset: {dataset_name}, log2 transform: {input_config.log2}" + ) values, mask_1d = preprocessing.scale(values, input_config.log2) names = names[mask_1d] logger.debug(f"Columns with zero variance: {np.sum(~mask_1d)}") + # Plotting the value distribution for all continuous datasets: + fig = plot_value_distributions(values) + fig_path = str(output_path / f"Value_distribution_{dataset_name}.png") + fig.savefig(fig_path) + io.dump_names(interim_data_path / f"{dataset_name}.txt", names) np.save(interim_data_path / f"{dataset_name}.npy", values) diff --git a/src/move/training/training_loop.py b/src/move/training/training_loop.py index 2405d676..e81667b0 100644 --- a/src/move/training/training_loop.py +++ b/src/move/training/training_loop.py @@ -72,13 +72,10 @@ def training_loop( counter = 0 kld_weight = 0.0 - kld_rate = 20 / len(kld_warmup_steps) - kld_multiplier = 1 + kld_rate for epoch in range(1, num_epochs + 1): if epoch in kld_warmup_steps: - kld_weight = 0.05 * kld_multiplier - kld_multiplier += kld_rate + kld_weight += 1 / len(kld_warmup_steps) if epoch in batch_dilation_steps: train_dataloader = dilate_batch(train_dataloader) diff --git a/tutorial/config/data/random_continuous.yaml b/tutorial/config/data/random_continuous.yaml index 373eacec..5071e1e4 100755 --- a/tutorial/config/data/random_continuous.yaml +++ b/tutorial/config/data/random_continuous.yaml @@ -22,4 +22,4 @@ continuous_inputs: # a list of continuous datasets - name: random.continuous.metagenomics # filename in raw_data_path log2: true # log2 transform data scale: true # scale data - + \ No newline at end of file diff --git a/tutorial/config/data/random_small.yaml b/tutorial/config/data/random_small.yaml index ac99e264..9dd59c4d 100644 --- a/tutorial/config/data/random_small.yaml +++ b/tutorial/config/data/random_small.yaml @@ -18,8 +18,8 @@ categorical_inputs: # a list of categorical datasets continuous_inputs: # a list of continuous datasets - name: random.small.proteomics # filename in raw_data_path - scale: true # scale data - log2: true # log2 transform data + log2: true #apply log2 before scaling + scale: true #scale data (z-score normalize) - name: random.small.metagenomics # filename in raw_data_path scale: true # scale data log2: true # log2 transform data