Skip to content

Commit

Permalink
Merge branch 'developer' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
enryH authored Aug 12, 2024
2 parents c5002cd + 8d0587a commit fe8c48b
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 13 deletions.
15 changes: 9 additions & 6 deletions src/move/tasks/encode_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,18 +55,21 @@ def encode_data(config: DataConfig):
# before preprocessing:
fig = plot_value_distributions(values)
fig_path = str(
output_path / "Value_distribution_{}_unprocessed.png".format(dataset_name)
output_path / f"Value_distribution_{dataset_name}_unprocessed.png"
)
fig.savefig(fig_path)

# Plotting the value distribution for all continuous datasets:
fig = plot_value_distributions(values)
fig_path = str(output_path / f"Value_distribution_{dataset_name}.png")
fig.savefig(fig_path)

if scale:
logger.debug(
f"Scaling dataset: {dataset_name}, log2 transform: {input_config.log2}"
)
values, mask_1d = preprocessing.scale(values, input_config.log2)
names = names[mask_1d]
logger.debug(f"Columns with zero variance: {np.sum(~mask_1d)}")
# Plotting the value distribution for all continuous datasets:
fig = plot_value_distributions(values)
fig_path = str(output_path / f"Value_distribution_{dataset_name}.png")
fig.savefig(fig_path)

io.dump_names(interim_data_path / f"{dataset_name}.txt", names)
np.save(interim_data_path / f"{dataset_name}.npy", values)
5 changes: 1 addition & 4 deletions src/move/training/training_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,10 @@ def training_loop(
counter = 0

kld_weight = 0.0
kld_rate = 20 / len(kld_warmup_steps)
kld_multiplier = 1 + kld_rate

for epoch in range(1, num_epochs + 1):
if epoch in kld_warmup_steps:
kld_weight = 0.05 * kld_multiplier
kld_multiplier += kld_rate
kld_weight += 1 / len(kld_warmup_steps)

if epoch in batch_dilation_steps:
train_dataloader = dilate_batch(train_dataloader)
Expand Down
2 changes: 1 addition & 1 deletion tutorial/config/data/random_continuous.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@ continuous_inputs: # a list of continuous datasets
- name: random.continuous.metagenomics # filename in raw_data_path
log2: true # log2 transform data
scale: true # scale data

4 changes: 2 additions & 2 deletions tutorial/config/data/random_small.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ categorical_inputs: # a list of categorical datasets

continuous_inputs: # a list of continuous datasets
- name: random.small.proteomics # filename in raw_data_path
scale: true # scale data
log2: true # log2 transform data
log2: true #apply log2 before scaling
scale: true #scale data (z-score normalize)
- name: random.small.metagenomics # filename in raw_data_path
scale: true # scale data
log2: true # log2 transform data

0 comments on commit fe8c48b

Please sign in to comment.