diff --git a/src/move/tasks/identify_associations.py b/src/move/tasks/identify_associations.py index f997a104..ce346d30 100644 --- a/src/move/tasks/identify_associations.py +++ b/src/move/tasks/identify_associations.py @@ -484,9 +484,6 @@ def _bayes_approach_parallel( f"bayes_abs max is {bayes_max}. Bayes_abs min is {bayes_min}. " f"Bayes_abs shape is {bayes_abs_shape}" ) - # file_path = output_path / "bayes_abs_multi.tsv" - # logger.debug(f"Saving bayes_abs to {file_path}") - # np.savetxt(file_path, bayes_abs, delimiter='\t') bayes_p = np.exp(bayes_abs) / ( 1 + np.exp(bayes_abs) @@ -662,74 +659,32 @@ def _bayes_approach( mask = diff.mask # Extract the mask from the masked array # Replace masked values with a placeholder (e.g., np.nan) data[mask] = np.nan - # Define the file path to save the TSV file - # output_path = Path(config.data.results_path) / "identify_associations" - # file_path = output_path / "diff_normal.tsv" - # Save the data to the TSV fil - # logger.debug(f"Saving diff to {file_path}") - # np.savetxt(file_path, diff, delimiter="\t") - - # data = prob.data # Extract the data from the masked array - # mask = prob.mask # Extract the mask from the masked array - # Replace masked values with a placeholder (e.g., np.nan) - # data[mask] = np.nan - # Define the file path to save the TSV file - # output_path = Path(config.data.results_path) / "identify_associations" - # file_path = output_path / "prob_original_script.tsv" - # # Save the data to the TSV fil - # logger.debug(f"Saving prob to {file_path}") - # np.savetxt(file_path, prob, delimiter="\t") - # logger.debug(f"prob is {prob}") - - # file_path = output_path / "bayes_k_original_all.tsv" - # logger.debug(f"Saving bayes_k (not worker, all) to {file_path}") - # np.savetxt(file_path, bayes_k, delimiter="\t") + bayes_mask[bayes_mask != 0] = 1 bayes_mask = np.array(bayes_mask, dtype=bool) # Calculate Bayes probabilities bayes_abs = np.abs(bayes_k) - # file_path = output_path / "bayes_abs_original.tsv" - # logger.debug(f"Saving bayes_abs to {file_path}") - # np.savetxt(file_path, bayes_abs, delimiter="\t") bayes_p = np.exp(bayes_abs) / (1 + np.exp(bayes_abs)) # 2D: N x C - # file_path = output_path / "bayes_p_original.tsv" - # logger.debug(f"Saving bayes_p to {file_path}") - # np.savetxt(file_path, bayes_p, delimiter="\t") # ! Marc said this is needed to not identify the features with themselves bayes_abs[bayes_mask] = np.min( bayes_abs ) # Bring feature_i feature_i associations to minimum sort_ids = np.argsort(bayes_abs, axis=None)[::-1] # 1D: N x C - # file_path = output_path / "sort_ids_original_script.tsv" - # logger.debug(f"Saving sort_ids to {file_path}") - # np.savetxt(file_path, sort_ids, delimiter="\t") logger.debug(f"sort_ids are {sort_ids}") prob = np.take(bayes_p, sort_ids) # 1D: N x C - # file_path = output_path / "prob_original_final.tsv" - # logger.debug(f"Saving prob to {file_path}") - # np.savetxt(file_path, prob, delimiter="\t") logger.debug(f"Bayes proba range: [{prob[-1]:.3f} {prob[0]:.3f}]") # Sort Bayes bayes_k = np.take(bayes_k, sort_ids) # 1D: N x C - # file_path = output_path / "sorted_bayes_k_original_script.tsv" - # logger.debug(f"Saving sorted_bayes_k to {file_path}") - # np.savetxt(file_path, bayes_k, delimiter="\t") # Calculate FDR fdr = np.cumsum(1 - prob) / np.arange(1, prob.size + 1) # 1D - # file_path = output_path / "fdr_original_script.tsv" - # logger.debug(f"Saving fdr to {file_path}") - # np.savetxt(file_path, fdr, delimiter="\t") idx = np.argmin(np.abs(fdr - task_config.sig_threshold)) logger.debug(f"Index is {idx}") - # file_path = output_path / "idx_original_script.tsv" - # logger.debug(f"Saving idx to {file_path}") - # np.savetxt(file_path, idx, delimiter='\t') logger.debug(f"FDR range: [{fdr[0]:.3f} {fdr[-1]:.3f}]") return sort_ids[:idx], prob[:idx], fdr[:idx], bayes_k[:idx]