From 84c31f0c895b283e4edaf70d3c8f49e690b29d48 Mon Sep 17 00:00:00 2001 From: Nicholas Brazeau Date: Wed, 20 Dec 2023 11:31:38 -0500 Subject: [PATCH 01/11] classing vanilla discent algorithm --- NAMESPACE | 2 +- R/RcppExports.R | 4 +- R/main_pso.R | 235 ++++++++++++++++++ R/{main.R => main_vanilla.R} | 61 ++--- ...f.Rd => deme_inbreeding_spcoef_vanilla.Rd} | 8 +- src/RcppExports.cpp | 12 +- src/main_vanilla.cpp | 131 ++++++++++ src/{main.h => main_vanilla.h} | 2 +- src/{main.cpp => particle.cpp} | 109 ++------ src/particle.h | 50 ++++ tests/testthat/test-modelruns.R | 4 +- 11 files changed, 472 insertions(+), 146 deletions(-) create mode 100644 R/main_pso.R rename R/{main.R => main_vanilla.R} (89%) rename man/{deme_inbreeding_spcoef.Rd => deme_inbreeding_spcoef_vanilla.Rd} (95%) create mode 100644 src/main_vanilla.cpp rename src/{main.h => main_vanilla.h} (65%) rename src/{main.cpp => particle.cpp} (56%) create mode 100644 src/particle.h diff --git a/NAMESPACE b/NAMESPACE index 6a477a1..5af1ea8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,7 +4,7 @@ S3method(print,DISCresult) S3method(summary,DISCresult) S3method(tidyout,DISCresult) export("%>%") -export(deme_inbreeding_spcoef) +export(deme_inbreeding_spcoef_vanilla) export(is.DISCresult) export(tidyout) importFrom(Rcpp,sourceCpp) diff --git a/R/RcppExports.R b/R/RcppExports.R index feb8a41..da6c7d5 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -1,7 +1,7 @@ # Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 -deme_inbreeding_coef_cpp <- function(args, args_functions, args_progress) { - .Call(`_discent_deme_inbreeding_coef_cpp`, args, args_functions, args_progress) +vanilla_deme_inbreeding_coef_cpp <- function(args) { + .Call(`_discent_vanilla_deme_inbreeding_coef_cpp`, args) } diff --git a/R/main_pso.R b/R/main_pso.R new file mode 100644 index 0000000..d80d0e1 --- /dev/null +++ b/R/main_pso.R @@ -0,0 +1,235 @@ +#' @title Identify Deme Inbreeding Spatial Coefficients in Continuous Space with +#' Particle Swarm Meta-Optimization +#' @inheritParams deme_inbreeding_spcoef_vanilla +#' @param +#' @inherit description deme_inbreeding_spcoef_vanilla +#' @details The gen.geo.dist dataframe must be named with the following columns: +#' "smpl1"; "smpl2"; "deme1"; "deme2"; "gendist"; "geodist"; which corresponds to: +#' Sample 1 Name; Sample 2 Name; Sample 1 Location; Sample 2 Location; +#' Pairwise Genetic Distance; Pairwise Geographpic Distance. Note, the order of the +#' columns do not matter but the names of the columns must match. +#' @details The start_params vector names must match the cluster names (i.e. clusters must be +#' have a name that we can match on for the starting relatedness paramerts). In addition, +#' you must provide a start parameter for "m". +#' @details We have implemented coding decisions to not allow the "f" inbreeding coefficients to be negative by using a +#' logit transformation internally in the code. +#' @details Gradient descent is performed using the Adam (adaptive moment estimation) optimization approach. Default values +#' for moment decay rates, epsilon, and learning rates are taken from \cite{DP Kingma, 2014}. +#' @details The Particle Swarm Optimization is a meta-optimization (meta-heuristic) approach that attempts to find optimal +#' start parameters for the user to avoid a grid-search approach as would be best practices for fine-tuning the gradient descent +#' approach. +#' @export + +deme_inbreeding_spcoef_pso <- function(discdat, + start_params = c(), + f_learningrate = 1e-3, + m_learningrate = 1e-6, + m_lowerbound = 0, + m_upperbound = Inf, + b1 = 0.9, + b2 = 0.999, + e = 1e-8, + steps = 1e3, + thin = 1, + normalize_geodist = TRUE, + report_progress = TRUE, + return_verbose = FALSE){ + + #.............................................................. + # Assertions & Catches + #.............................................................. + if (!all(colnames(discdat) %in% c("smpl1", "smpl2", "deme1", "deme2", "gendist", "geodist"))) { + stop("The discdat dataframe must contain columns with names: smpl1, smpl2, deme1, deme2, gendist, geodist") + } + # make sure correct order + for (i in 1:6) { + if (colnames(discdat)[i] != c("smpl1", "smpl2", "deme1", "deme2", "gendist", "geodist")[i]) { + stop("The discdat dataframe must contain columns with names in the exact order of: smpl1, smpl2, deme1, deme2, gendist, geodist") + } + } + + locats <- names(start_params)[!grepl("^m$", names(start_params))] + if (!all(unique(c(discdat$deme1, discdat$deme2)) %in% locats)) { + stop("You have cluster names in your discdat dataframe that are not included in your start parameters") + } + if (!any(grepl("^m$", names(start_params)))) { + stop("A m start parameter must be provided (i.e. there must be a vector element name m in your start parameter vector)") + } + assert_dataframe(discdat) + assert_vector(start_params) + assert_length(start_params, n = (length(unique(c(discdat$deme1, discdat$deme2))) + 1), + message = "Start params length not correct. You must specificy a start parameter + for each deme and the migration parameter, m") + sapply(start_params[!grepl("^m$", names(start_params))], assert_bounded, left = 0, right = 1, inclusive_left = TRUE, inclusive_right = TRUE) + assert_single_numeric(f_learningrate) + assert_single_numeric(m_learningrate) + assert_single_numeric(b1) + assert_single_numeric(b2) + assert_single_numeric(e) + assert_single_numeric(m_lowerbound) + assert_single_numeric(m_upperbound) + assert_gr(m_upperbound, m_lowerbound) + assert_single_int(steps) + assert_single_int(thin) + assert_greq(thin, 1, message = "Must be at least 1") + assert_single_logical(report_progress) + assert_single_logical(normalize_geodist) + + # no missing + if(sum(is.na(discdat)) != 0) { + stop("discdat dataframe cannot have missing values") + } + + #...................... + # check for self comparisons + #...................... + sapply(discdat$geodist, assert_neq, y = 0, + message = "No within-deme sample comparisons allowed. Geodistance should not be 0") + mapply(assert_neq, discdat$deme1, discdat$deme2, + message = "No within-deme sample comparisons allowed. Locat names should not be the same") + + #............................................................ + # R manipulations before C++ + #........................................................... + # use efficient R functions to group pairs and wrangle data for faster C++ manipulation + # get deme names and lift over sorted names for i and j + demes <- sort(unique(c(discdat$deme1, discdat$deme2))) + keyi <- data.frame(deme1 = demes, i = 1:length(demes)) + keyj <- data.frame(deme2 = demes, j = 1:length(demes)) + + # transform data w/ logit + discdat <- discdat %>% + dplyr::mutate(gendist = ifelse(gendist > 0.999, 0.999, + ifelse(gendist < 0.001, 0.001, + gendist))) %>% # reasonable bounds on logit + dplyr::mutate(gendist = logit(gendist)) + # transform start parameters w/ logit + start_params[names(start_params) != "m"] <- logit(start_params[names(start_params) != "m"]) + + + # get genetic data by pairs through efficient nest + gendist <- discdat %>% + expand_pairwise(.) %>% # get all pairwise for full matrix + dplyr::select(c("deme1", "deme2", "gendist")) %>% + dplyr::group_by_at(c("deme1", "deme2")) %>% + tidyr::nest(.) %>% + dplyr::left_join(., keyi, by = "deme1") %>% + dplyr::left_join(., keyj, by = "deme2") %>% + dplyr::arrange_at(c("i", "j")) + + + # put gendist into an array + # NB we are filling an array with dimension of size: + # locations x locations x max K-pairs + # Will fill in w/ -1 to indicate missing/skip where demes do not + # have max pairs. + # This approach wastes memory but allows for a structured array + # versus a list with varying sizes (and eventually a more efficient for-loop) + n_Kpairmax <- max(purrr::map_dbl(gendist$data, nrow)) + gendist_arr <- array(data = -1, dim = c(length(locats), length(locats), n_Kpairmax)) + for (i in 1:nrow(gendist)) { + gendist_arr[gendist$i[i], gendist$j[i], 1:nrow(gendist$data[[i]])] <- unname(unlist(gendist$data[[i]])) + } + + # normalize geodistances per user; NB have already removed self comparisons, so no 0s + if (normalize_geodist) { + mingeodist <- min(discdat$geodist) + maxgeodist <- max(discdat$geodist) + discdat <- discdat %>% + dplyr::mutate(geodist = (geodist - mingeodist)/(maxgeodist - mingeodist)) + } + # catch accidental bad M start if user is standardizing distances + if (normalize_geodist & (start_params[names(start_params) == "m"] > 500) ) { + warning("You have selected to normalize geographic distances, but your + migration rate start parameter is large. Please consider placing it on a + similar scale to your normalized geographic distances for stability.") + } + + + # put geo information into distance matrix + geodist <- discdat %>% + expand_pairwise(.) %>% # get all pairwise for full matrix + dplyr::select(c("deme1", "deme2", "geodist")) %>% + dplyr::group_by_at(c("deme1", "deme2")) %>% + tidyr::nest(.) %>% + dplyr::left_join(., keyi, by = "deme1") %>% + dplyr::left_join(., keyj, by = "deme2") %>% + dplyr::arrange_at(c("i", "j")) + + # simplify geodistance data storage + geodist$data <- purrr::map_dbl(geodist$data, function(x){ + if (length(unique(unlist(x))) != 1) { + stop("deme1 and deme2 have different geodistances among P-sample combinations. Distances should all be same among samples") + } + return( unique(unlist(x)) ) # all same by unique + } + ) + + # upper tri + geodist_mat <- matrix(data = -1, nrow = length(locats), ncol = length(locats)) + for (i in 1:nrow(geodist)) { + geodist_mat[geodist$i[i], geodist$j[i]] <- geodist$data[i] + } + diag(geodist_mat) <- 0 + + #.............................................................. + # run efficient C++ function + #.............................................................. + + args <- list(gendist = as.vector(gendist_arr), + geodist = as.vector(geodist_mat), + fvec = unname( start_params[!grepl("^m$", names(start_params))] ), + n_Kpairmax = n_Kpairmax, + m = unname(start_params["m"]), + f_learningrate = f_learningrate, + m_learningrate = m_learningrate, + m_lowerbound = m_lowerbound, + m_upperbound = m_upperbound, + b1 = b1, + b2 = b2, + e = e, + steps = steps, + report_progress = report_progress + ) + + # run + output_raw <- vanilla_deme_inbreeding_coef_cpp(args) + + + # set up thinning + thin_its <- seq(1, steps, by = thin) + thin_its <- unique(c(thin_its, steps)) # always include last iteration + + # process output + colnames(keyi) <- c("Deme", "key") + if (return_verbose) { + output <- list( + deme_key = keyi, + m_run = output_raw$m_run[thin_its], + fi_run = expit(do.call("rbind", output_raw$fi_run))[thin_its, ], + m_gradtraj = output_raw$m_gradtraj[thin_its], + fi_gradtraj = do.call("rbind", output_raw$fi_gradtraj)[thin_its, ], + m_1moment = output_raw$m_firstmoment[thin_its], + m_2moment = output_raw$m_secondmoment[thin_its], + fi_1moment = do.call("rbind", output_raw$fi_firstmoment)[thin_its, ], + fi_2moment = do.call("rbind", output_raw$fi_secondmoment)[thin_its, ], + cost = output_raw$cost[thin_its], + Final_Fis = expit(output_raw$Final_Fis), + Final_m = output_raw$Final_m + ) + + } else { + output <- list( + deme_key = keyi, + cost = output_raw$cost[thin_its], + m_run = output_raw$m_run[thin_its], + fi_run = expit(do.call("rbind", output_raw$fi_run))[thin_its, ], + Final_Fis = expit(output_raw$Final_Fis), + Final_m = output_raw$Final_m) + } + + # add S3 class structure + attr(output, "class") <- "psoDISCresult" + return(output) +} + diff --git a/R/main.R b/R/main_vanilla.R similarity index 89% rename from R/main.R rename to R/main_vanilla.R index 6ff4a2f..b9e48ee 100644 --- a/R/main.R +++ b/R/main_vanilla.R @@ -1,4 +1,4 @@ -#' @title Identify Deme Inbreeding Spatial Coefficients in Continuous Space +#' @title Identify Deme Inbreeding Spatial Coefficients in Continuous Space (Vanilla) #' @param discdat dataframe; The genetic-geographic data by deme (K) #' @param start_params named double vector; vector of start parameters. #' @param f_learningrate double; alpha parameter for how much each "step" is weighted in the gradient descent for inbreeding coefficients @@ -14,6 +14,12 @@ #' @param report_progress boolean; whether or not a progress bar should be shown as you iterate through steps #' @param return_verbose boolean; whether the inbreeding coefficients and migration rate should be returned for every iteration or #' only for the final iteration. User will typically not want to store every iteration, which can be memory intensive +#' @description The purpose of this statistic is to identify an inbreeding coefficient, or degree of +#' relatedness, for a given location in space. We assume that locations in spaces can be +#' represented as "demes," such that multiple individuals live in the same deme +#' (i.e. samples are sourced from the same location). The expected pairwise relationship +#' between two individuals, or samples, is dependent on the each sample's deme's inbreeding +#' coefficient and the geographic distance between the demes. The program assumes a symmetric distance matrix. #' @details The gen.geo.dist dataframe must be named with the following columns: #' "smpl1"; "smpl2"; "deme1"; "deme2"; "gendist"; "geodist"; which corresponds to: #' Sample 1 Name; Sample 2 Name; Sample 1 Location; Sample 2 Location; @@ -22,32 +28,27 @@ #' @details The start_params vector names must match the cluster names (i.e. clusters must be #' have a name that we can match on for the starting relatedness paramerts). In addition, #' you must provide a start parameter for "m". -#' @description The purpose of this statistic is to identify an inbreeding coefficient, or degree of -#' relatedness, for a given location in space. We assume that locations in spaces can be -#' represented as "demes," such that multiple individuals live in the same deme -#' (i.e. samples are sourced from the same location). The expected pairwise relationship -#' between two individuals, or samples, is dependent on the each sample's deme's inbreeding -#' coefficient and the geographic distance between the demes. The program assumes a symmetric distance matrix. #' @details Note: We have implemented coding decisions to not allow the "f" inbreeding coefficients to be negative by using a #' logit transformation internally in the code. #' @details Gradient descent is performed using the Adam (adaptive moment estimation) optimization approach. Default values -#' for moment decay rates, epsilon, and learning rates are taken from DP Kingma, 2014. +#' for moment decay rates, epsilon, and learning rates are taken from \cite{DP Kingma, 2014}. +#' @details The "vanilla" method does not attempt to optimize start parameters. #' @export -deme_inbreeding_spcoef <- function(discdat, - start_params = c(), - f_learningrate = 1e-3, - m_learningrate = 1e-6, - m_lowerbound = 0, - m_upperbound = Inf, - b1 = 0.9, - b2 = 0.999, - e = 1e-8, - steps = 1e3, - thin = 1, - normalize_geodist = TRUE, - report_progress = TRUE, - return_verbose = FALSE){ +deme_inbreeding_spcoef_vanilla <- function(discdat, + start_params = c(), + f_learningrate = 1e-3, + m_learningrate = 1e-6, + m_lowerbound = 0, + m_upperbound = Inf, + b1 = 0.9, + b2 = 0.999, + e = 1e-8, + steps = 1e3, + thin = 1, + normalize_geodist = TRUE, + report_progress = TRUE, + return_verbose = FALSE){ #.............................................................. # Assertions & Catches @@ -102,13 +103,6 @@ deme_inbreeding_spcoef <- function(discdat, mapply(assert_neq, discdat$deme1, discdat$deme2, message = "No within-deme sample comparisons allowed. Locat names should not be the same") - #.............................................................. - # setup and create progress bars - #.............................................................. - pb <- utils::txtProgressBar(min = 0, max = steps-1, initial = NA, style = 3) - args_progress <- list(pb = pb) - - #............................................................ # R manipulations before C++ #........................................................... @@ -213,11 +207,8 @@ deme_inbreeding_spcoef <- function(discdat, report_progress = report_progress ) - # create progress bars - pb <- txtProgressBar(min = 0, max = steps, initial = NA, style = 3) - args_progress <- list(pb = pb) - args_functions <- list(update_progress = update_progress) - output_raw <- deme_inbreeding_coef_cpp(args, args_functions, args_progress) + # run + output_raw <- vanilla_deme_inbreeding_coef_cpp(args) # set up thinning @@ -253,7 +244,7 @@ deme_inbreeding_spcoef <- function(discdat, } # add S3 class structure - attr(output, "class") <- "DISCresult" + attr(output, "class") <- "vanillaDISCresult" return(output) } diff --git a/man/deme_inbreeding_spcoef.Rd b/man/deme_inbreeding_spcoef_vanilla.Rd similarity index 95% rename from man/deme_inbreeding_spcoef.Rd rename to man/deme_inbreeding_spcoef_vanilla.Rd index 45003e0..bbee678 100644 --- a/man/deme_inbreeding_spcoef.Rd +++ b/man/deme_inbreeding_spcoef_vanilla.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/main.R -\name{deme_inbreeding_spcoef} -\alias{deme_inbreeding_spcoef} +% Please edit documentation in R/main_vanilla.R +\name{deme_inbreeding_spcoef_vanilla} +\alias{deme_inbreeding_spcoef_vanilla} \title{Identify Deme Inbreeding Spatial Coefficients in Continuous Space} \usage{ -deme_inbreeding_spcoef( +deme_inbreeding_spcoef_vanilla( discdat, start_params = c(), f_learningrate = 0.001, diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 6b8a4d8..084d728 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -10,22 +10,20 @@ Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); #endif -// deme_inbreeding_coef_cpp -Rcpp::List deme_inbreeding_coef_cpp(Rcpp::List args, Rcpp::List args_functions, Rcpp::List args_progress); -RcppExport SEXP _discent_deme_inbreeding_coef_cpp(SEXP argsSEXP, SEXP args_functionsSEXP, SEXP args_progressSEXP) { +// vanilla_deme_inbreeding_coef_cpp +Rcpp::List vanilla_deme_inbreeding_coef_cpp(Rcpp::List args); +RcppExport SEXP _discent_vanilla_deme_inbreeding_coef_cpp(SEXP argsSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< Rcpp::List >::type args(argsSEXP); - Rcpp::traits::input_parameter< Rcpp::List >::type args_functions(args_functionsSEXP); - Rcpp::traits::input_parameter< Rcpp::List >::type args_progress(args_progressSEXP); - rcpp_result_gen = Rcpp::wrap(deme_inbreeding_coef_cpp(args, args_functions, args_progress)); + rcpp_result_gen = Rcpp::wrap(vanilla_deme_inbreeding_coef_cpp(args)); return rcpp_result_gen; END_RCPP } static const R_CallMethodDef CallEntries[] = { - {"_discent_deme_inbreeding_coef_cpp", (DL_FUNC) &_discent_deme_inbreeding_coef_cpp, 3}, + {"_discent_vanilla_deme_inbreeding_coef_cpp", (DL_FUNC) &_discent_vanilla_deme_inbreeding_coef_cpp, 1}, {NULL, NULL, 0} }; diff --git a/src/main_vanilla.cpp b/src/main_vanilla.cpp new file mode 100644 index 0000000..048e676 --- /dev/null +++ b/src/main_vanilla.cpp @@ -0,0 +1,131 @@ +#include "main_vanilla.h" +#include "misc_v15.h" +#include "particle.h" +using namespace std; + +//------------------------------------------------ +// Perform gradient descent to calculate deme Fi's +// using vanilla DISCent (ADAM grad descent) +//------------------------------------------------ +// [[Rcpp::export]] +Rcpp::List vanilla_deme_inbreeding_coef_cpp(Rcpp::List args) { + + //------------------------------- + // unpack inputs + //------------------------------- + // extract proposed proposed Inb. Coeff. (Fis) for each K (deme) + vector fvec = rcpp_to_vector_double(args["fvec"]); + // extract proposed global M of migration + double m = rcpp_to_double(args["m"]); + // get dims + int n_Demes = fvec.size(); + int n_Kpairmax = rcpp_to_int(args["n_Kpairmax"]); + + // observed pairwise sample genetic distances + vector gendist = rcpp_to_vector_double(args["gendist"]); + // recast to array from vector (perserving structure) + vector>> gendist_arr(n_Demes, vector>(n_Demes, vector(n_Kpairmax))); + int geniter = 0; + for (int i = 0; i < n_Kpairmax; i++) { + for (int j = 0; j < n_Demes; j++) { + for (int k = 0; k < n_Demes; k++) { + gendist_arr[k][j][i] = gendist[geniter]; + geniter++; + } + } + } + + // observed geo data + vector geodist = rcpp_to_vector_double(args["geodist"]); // pairwise sample geo distances + vector> geodist_mat(n_Demes, vector(n_Demes)); + // recast + int geoiter = 0; + for (int i = 0; i < n_Demes; i++) { + for (int j = 0; j < n_Demes; j++) { + geodist_mat[j][i] = geodist[geoiter]; + geoiter++; + } + } + + // items for grad descent + int steps = rcpp_to_int(args["steps"]); + double f_learningrate = rcpp_to_double(args["f_learningrate"]); + double m_learningrate = rcpp_to_double(args["m_learningrate"]); + double m_lowerbound = rcpp_to_double(args["m_lowerbound"]); + double m_upperbound = rcpp_to_double(args["m_upperbound"]); + double b1 = rcpp_to_double(args["b1"]); + double b2 = rcpp_to_double(args["b2"]); + double e = rcpp_to_double(args["e"]); + bool report_progress = rcpp_to_bool(args["report_progress"]); + + //------------------------------- + // initialize and fill in params + //------------------------------- + Particle discParticle; + discParticle.steps = steps; + discParticle.n_Demes = n_Demes; + discParticle.n_Kpairmax = n_Kpairmax; + discParticle.f_learningrate = f_learningrate; + discParticle.m_learningrate = m_learningrate; + discParticle.m_lowerbound = m_lowerbound; + discParticle.m_upperbound = m_upperbound; + discParticle.b1 = b1; + discParticle.b2 = b2; + discParticle.e = e; + discParticle.m = m; + discParticle.fvec = fvec; + discParticle.gendist_arr = gendist_arr; + discParticle.geodist_mat = geodist_mat; + //------------------------------- + // storage and ADAM items + //------------------------------- + vector cost(steps); + vector m_run(steps); + vector> fi_run(steps, vector(n_Demes)); + vector m_gradtraj(steps); // m gradient storage + vector> fi_gradtraj(steps, vector(n_Demes)); // fi storage gradient + // moments for Adam + // https://arxiv.org/pdf/1412.6980 + vector m1t_m(steps); // first moment + vector v2t_m(steps); // second moment (v) + double m1t_m_hat; // first moment bias corrected + double v2t_m_hat; // second moment (v) bias corrected + vector> m1t_fi(steps, vector(n_Demes)); // first moment + vector> v2t_fi(steps, vector(n_Demes)); // second moment (v) + vector m1t_fi_hat(n_Demes); // first moment bias corrected + vector v2t_fi_hat(n_Demes); // second moment (v) bias corrected + // fill in for class + discParticle.cost = cost; + discParticle.m_run = m_run; + discParticle.fi_run = fi_run; + discParticle.m_gradtraj = m_gradtraj; + discParticle.fi_gradtraj = fi_gradtraj; + discParticle.m1t_m = m1t_m; + discParticle.v2t_m = v2t_m; + discParticle.m1t_m_hat = m1t_m_hat; + discParticle.v2t_m_hat = v2t_m_hat; + discParticle.m1t_fi = m1t_fi; + discParticle.v2t_fi = v2t_fi; + discParticle.m1t_fi_hat = m1t_fi_hat; + discParticle.v2t_fi_hat = v2t_fi_hat; + //------------------------------- + // run GD + //------------------------------- + discParticle.performGD(report_progress); + + //------------------------------- + // Out: return as Rcpp object + //------------------------------- + return Rcpp::List::create(Rcpp::Named("m_run") = discParticle.m_run, + Rcpp::Named("fi_run") = discParticle.fi_run, + Rcpp::Named("m_gradtraj") = discParticle.m_gradtraj, + Rcpp::Named("fi_gradtraj") = discParticle.fi_gradtraj, + Rcpp::Named("m_firstmoment") = discParticle.m1t_m, + Rcpp::Named("m_secondmoment") = discParticle.v2t_m, + Rcpp::Named("fi_firstmoment") = discParticle.m1t_fi, + Rcpp::Named("fi_secondmoment") = discParticle.v2t_fi, + Rcpp::Named("cost") = discParticle.cost, + Rcpp::Named("Final_Fis") = discParticle.fvec, + Rcpp::Named("Final_m") = discParticle.m); + +} diff --git a/src/main.h b/src/main_vanilla.h similarity index 65% rename from src/main.h rename to src/main_vanilla.h index 5e36c4d..4543efd 100644 --- a/src/main.h +++ b/src/main_vanilla.h @@ -3,4 +3,4 @@ //------------------------------------------------ // estimate deme inbreeding spatial coefficient -Rcpp::List deme_inbreeding_coef_cpp(Rcpp::List args); +Rcpp::List vanilla_deme_inbreeding_coef_cpp(Rcpp::List args); diff --git a/src/main.cpp b/src/particle.cpp similarity index 56% rename from src/main.cpp rename to src/particle.cpp index b4bd509..98fa1ce 100644 --- a/src/main.cpp +++ b/src/particle.cpp @@ -1,78 +1,12 @@ -#include "main.h" + +#include "particle.h" #include "misc_v15.h" using namespace std; -//------------------------------------------------ -// Perform gradient descent to calculate deme Fi's -// [[Rcpp::export]] -Rcpp::List deme_inbreeding_coef_cpp(Rcpp::List args, Rcpp::List args_functions, Rcpp::List args_progress) { - // overflow cost parameter - const double OVERFLO_DOUBLE = DBL_MAX/1000.0; - // extract proposed proposed Inb. Coeff. (Fis) for each K (deme) - vector fvec = rcpp_to_vector_double(args["fvec"]); - // extract proposed global M of migration - double m = rcpp_to_double(args["m"]); - // get dims - int n_Demes = fvec.size(); - int n_Kpairmax = rcpp_to_int(args["n_Kpairmax"]); - - // observed pairwise sample genetic distances - vector gendist = rcpp_to_vector_double(args["gendist"]); - // recast to array from vector (perserving structure) - vector>> gendist_arr(n_Demes, vector>(n_Demes, vector(n_Kpairmax))); - int geniter = 0; - for (int i = 0; i < n_Kpairmax; i++) { - for (int j = 0; j < n_Demes; j++) { - for (int k = 0; k < n_Demes; k++) { - gendist_arr[k][j][i] = gendist[geniter]; - geniter++; - } - } - } - - // observed geo data - vector geodist = rcpp_to_vector_double(args["geodist"]); // pairwise sample geo distances - vector> geodist_mat(n_Demes, vector(n_Demes)); - // recast - int geoiter = 0; - for (int i = 0; i < n_Demes; i++) { - for (int j = 0; j < n_Demes; j++) { - geodist_mat[j][i] = geodist[geoiter]; - geoiter++; - } - } - - // items for grad descent - int steps = rcpp_to_int(args["steps"]); - double f_learningrate = rcpp_to_double(args["f_learningrate"]); - double m_learningrate = rcpp_to_double(args["m_learningrate"]); - double m_lowerbound = rcpp_to_double(args["m_lowerbound"]); - double m_upperbound = rcpp_to_double(args["m_upperbound"]); - double b1 = rcpp_to_double(args["b1"]); - double b2 = rcpp_to_double(args["b2"]); - double e = rcpp_to_double(args["e"]); - bool report_progress = rcpp_to_bool(args["report_progress"]); - Rcpp::Function update_progress = args_functions["update_progress"]; - - // items of interest to keep track of - vector cost(steps); - vector m_run(steps); - vector> fi_run(steps, vector(n_Demes)); - vector m_gradtraj(steps); // m gradient storage - vector> fi_gradtraj(steps, vector(n_Demes)); // fi storage gradient - - // moments for Adam - // https://arxiv.org/pdf/1412.6980 - vector m1t_m(steps); // first moment - vector v2t_m(steps); // second moment (v) - double m1t_m_hat; // first moment bias corrected - double v2t_m_hat; // second moment (v) bias corrected - - vector> m1t_fi(steps, vector(n_Demes)); // first moment - vector> v2t_fi(steps, vector(n_Demes)); // second moment (v) - vector m1t_fi_hat(n_Demes); // first moment bias corrected - vector v2t_fi_hat(n_Demes); // second moment (v) bias corrected +//------------------------------------------------ +// run ADAM gradient descent host +void Particle::performGD(bool report_progress) { //------------------------------- // initialize storage vectors @@ -106,8 +40,6 @@ Rcpp::List deme_inbreeding_coef_cpp(Rcpp::List args, Rcpp::List args_functions, cost[0] = OVERFLO_DOUBLE; } - - //------------------------------- // start grad descent by looping through steps //------------------------------- @@ -115,10 +47,11 @@ Rcpp::List deme_inbreeding_coef_cpp(Rcpp::List args, Rcpp::List args_functions, // report progress if (report_progress) { - update_progress(args_progress, "pb", step, steps); + if (((step+1) % 100)==0) { + print(" step",step+1); + } } - //------------------------------- // F gradient // N.B. needs to be complete row (not just triangle) in order for all @@ -141,7 +74,6 @@ Rcpp::List deme_inbreeding_coef_cpp(Rcpp::List args, Rcpp::List args_functions, } } - //------------------------------- // M gradient // N.B. all terms included here, easier sum -- longer partial derivative @@ -163,8 +95,6 @@ Rcpp::List deme_inbreeding_coef_cpp(Rcpp::List args, Rcpp::List args_functions, } } - - //------------------------------- // Update F and M //------------------------------- @@ -193,7 +123,7 @@ Rcpp::List deme_inbreeding_coef_cpp(Rcpp::List args, Rcpp::List args_functions, // calculate and apply the update for M m = m - m_learningrate * (m1t_m_hat / (sqrt(v2t_m_hat) + e)); // vanilla GD - // m = m - m_learningrate * mgrad; + // m = m - m_learningrate * mgrad; // check bounds on m // will reflect with normal based on magnitude + standard normal sd it is off to proper interval; NB also want to bound m so that it can only explore distance isolation (repulsion versus attraction) if (m < m_lowerbound) { @@ -223,21 +153,12 @@ Rcpp::List deme_inbreeding_coef_cpp(Rcpp::List args, Rcpp::List args_functions, cost[step] = OVERFLO_DOUBLE; } } // end steps +} - //------------------------------- - // Out - //------------------------------- - // return as Rcpp object - return Rcpp::List::create(Rcpp::Named("m_run") = m_run, - Rcpp::Named("fi_run") = fi_run, - Rcpp::Named("m_gradtraj") = m_gradtraj, - Rcpp::Named("fi_gradtraj") = fi_gradtraj, - Rcpp::Named("m_firstmoment") = m1t_m, - Rcpp::Named("m_secondmoment") = v2t_m, - Rcpp::Named("fi_firstmoment") = m1t_fi, - Rcpp::Named("fi_secondmoment") = v2t_fi, - Rcpp::Named("cost") = cost, - Rcpp::Named("Final_Fis") = fvec, - Rcpp::Named("Final_m") = m); + +//------------------------------------------------ +// print +void Particle::print_particle() { + print("discParticle"); } diff --git a/src/particle.h b/src/particle.h new file mode 100644 index 0000000..071ab5b --- /dev/null +++ b/src/particle.h @@ -0,0 +1,50 @@ + +#pragma once +#include + +//------------------------------------------------ +// class defining a human host +class Particle { + +public: + + // PUBLIC OBJECTS + // params + int steps; + double f_learningrate; + double m_learningrate; + double m_lowerbound; + double m_upperbound; + double b1; + double b2; + double e; + double OVERFLO_DOUBLE; + // data + int n_Demes; + int n_Kpairmax; + double m; + std::vector fvec; + std::vector>> gendist_arr; + std::vector> geodist_mat; + // storage + std::vector cost; + std::vector m_run; + std::vector> fi_run; + std::vector m_gradtraj; + std::vector> fi_gradtraj; + std::vector m1t_m; + std::vector v2t_m; + double m1t_m_hat; + double v2t_m_hat; + std::vector> m1t_fi; + std::vector> v2t_fi; + std::vector m1t_fi_hat; + std::vector v2t_fi_hat; + + // PUBLIC FUNCTIONS + // constructors + Particle() {}; + // member functions + void performGD(bool report_progress); + void print_particle(); +}; diff --git a/tests/testthat/test-modelruns.R b/tests/testthat/test-modelruns.R index c7f3451..5b7d303 100644 --- a/tests/testthat/test-modelruns.R +++ b/tests/testthat/test-modelruns.R @@ -10,7 +10,7 @@ test_that("model runs", { # run model inputdisc <- dat %>% dplyr::filter(deme1 != deme2) - mod <- deme_inbreeding_spcoef(discdat = inputdisc, + mod <- deme_inbreeding_spcoef_vanilla(discdat = inputdisc, start_params = our_start_params, f_learningrate = 1e-5, m_learningrate = 1e-1, @@ -19,7 +19,7 @@ test_that("model runs", { e = 1e-8, steps = 1e3, normalize_geodist = T, - report_progress = T, + report_progress = F, return_verbose = F) testthat::expect_length(mod, 6) }) From 905666970a7db07fb9eef4e7c368b1e4d2bfed09 Mon Sep 17 00:00:00 2001 From: Nicholas Brazeau Date: Thu, 21 Dec 2023 09:12:27 -0500 Subject: [PATCH 02/11] vanilla method test and docs --- man/deme_inbreeding_spcoef_vanilla.Rd | 6 ++++-- src/main_vanilla.cpp | 4 +++- .../testthat/{test-modelruns.R => test-vanillamodelruns.R} | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) rename tests/testthat/{test-modelruns.R => test-vanillamodelruns.R} (96%) diff --git a/man/deme_inbreeding_spcoef_vanilla.Rd b/man/deme_inbreeding_spcoef_vanilla.Rd index bbee678..f8f33d0 100644 --- a/man/deme_inbreeding_spcoef_vanilla.Rd +++ b/man/deme_inbreeding_spcoef_vanilla.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/main_vanilla.R \name{deme_inbreeding_spcoef_vanilla} \alias{deme_inbreeding_spcoef_vanilla} -\title{Identify Deme Inbreeding Spatial Coefficients in Continuous Space} +\title{Identify Deme Inbreeding Spatial Coefficients in Continuous Space (Vanilla)} \usage{ deme_inbreeding_spcoef_vanilla( discdat, @@ -74,5 +74,7 @@ Note: We have implemented coding decisions to not allow the "f" inbreeding coeff logit transformation internally in the code. Gradient descent is performed using the Adam (adaptive moment estimation) optimization approach. Default values -for moment decay rates, epsilon, and learning rates are taken from DP Kingma, 2014. +for moment decay rates, epsilon, and learning rates are taken from \cite{DP Kingma, 2014}. + +The "vanilla" method does not attempt to optimize start parameters. } diff --git a/src/main_vanilla.cpp b/src/main_vanilla.cpp index 048e676..8e66599 100644 --- a/src/main_vanilla.cpp +++ b/src/main_vanilla.cpp @@ -9,7 +9,8 @@ using namespace std; //------------------------------------------------ // [[Rcpp::export]] Rcpp::List vanilla_deme_inbreeding_coef_cpp(Rcpp::List args) { - + // overflow cost parameter + const double OVERFLO_DOUBLE = DBL_MAX/1000.0; //------------------------------- // unpack inputs //------------------------------- @@ -62,6 +63,7 @@ Rcpp::List vanilla_deme_inbreeding_coef_cpp(Rcpp::List args) { // initialize and fill in params //------------------------------- Particle discParticle; + discParticle.OVERFLO_DOUBLE = OVERFLO_DOUBLE; discParticle.steps = steps; discParticle.n_Demes = n_Demes; discParticle.n_Kpairmax = n_Kpairmax; diff --git a/tests/testthat/test-modelruns.R b/tests/testthat/test-vanillamodelruns.R similarity index 96% rename from tests/testthat/test-modelruns.R rename to tests/testthat/test-vanillamodelruns.R index 5b7d303..15cf274 100644 --- a/tests/testthat/test-modelruns.R +++ b/tests/testthat/test-vanillamodelruns.R @@ -1,4 +1,4 @@ -test_that("model runs", { +test_that("Vanilla model runs", { # sim data data("IBD_simulation_data", package = "discent") From de9f5e5ba1335e53dc7bb73c583d7096de2277c6 Mon Sep 17 00:00:00 2001 From: Nicholas Brazeau Date: Thu, 21 Dec 2023 09:12:34 -0500 Subject: [PATCH 03/11] pso with initial cond met --- NAMESPACE | 1 + R/RcppExports.R | 4 + R/main_pso.R | 89 +++++++---- man/deme_inbreeding_spcoef_pso.Rd | 97 ++++++++++++ src/RcppExports.cpp | 12 ++ src/main_pso.cpp | 242 +++++++++++++++++++++++++++++ src/main_pso.h | 6 + src/particle.h | 2 + tests/testthat/test-psomodelruns.R | 33 ++++ 9 files changed, 452 insertions(+), 34 deletions(-) create mode 100644 man/deme_inbreeding_spcoef_pso.Rd create mode 100644 src/main_pso.cpp create mode 100644 src/main_pso.h create mode 100644 tests/testthat/test-psomodelruns.R diff --git a/NAMESPACE b/NAMESPACE index 5af1ea8..05b8552 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,7 @@ S3method(print,DISCresult) S3method(summary,DISCresult) S3method(tidyout,DISCresult) export("%>%") +export(deme_inbreeding_spcoef_pso) export(deme_inbreeding_spcoef_vanilla) export(is.DISCresult) export(tidyout) diff --git a/R/RcppExports.R b/R/RcppExports.R index da6c7d5..021be4f 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -1,6 +1,10 @@ # Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 +pso_deme_inbreeding_coef_cpp <- function(args) { + .Call(`_discent_pso_deme_inbreeding_coef_cpp`, args) +} + vanilla_deme_inbreeding_coef_cpp <- function(args) { .Call(`_discent_vanilla_deme_inbreeding_coef_cpp`, args) } diff --git a/R/main_pso.R b/R/main_pso.R index d80d0e1..9019141 100644 --- a/R/main_pso.R +++ b/R/main_pso.R @@ -1,16 +1,24 @@ #' @title Identify Deme Inbreeding Spatial Coefficients in Continuous Space with #' Particle Swarm Meta-Optimization #' @inheritParams deme_inbreeding_spcoef_vanilla -#' @param +#' @param fi_lowerbound +#' @param fi_upperbound +#' @param flearn_lowerbound +#' @param flearn_upperbound +#' @param mlearn_lowerbound +#' @param mlearn_upperbound +#' @param c1 +#' @param c2 +#' @param w +#' @param swarmsize +#' @param swarmsteps +#' @param babysteps #' @inherit description deme_inbreeding_spcoef_vanilla #' @details The gen.geo.dist dataframe must be named with the following columns: #' "smpl1"; "smpl2"; "deme1"; "deme2"; "gendist"; "geodist"; which corresponds to: #' Sample 1 Name; Sample 2 Name; Sample 1 Location; Sample 2 Location; #' Pairwise Genetic Distance; Pairwise Geographpic Distance. Note, the order of the #' columns do not matter but the names of the columns must match. -#' @details The start_params vector names must match the cluster names (i.e. clusters must be -#' have a name that we can match on for the starting relatedness paramerts). In addition, -#' you must provide a start parameter for "m". #' @details We have implemented coding decisions to not allow the "f" inbreeding coefficients to be negative by using a #' logit transformation internally in the code. #' @details Gradient descent is performed using the Adam (adaptive moment estimation) optimization approach. Default values @@ -21,15 +29,24 @@ #' @export deme_inbreeding_spcoef_pso <- function(discdat, - start_params = c(), - f_learningrate = 1e-3, - m_learningrate = 1e-6, - m_lowerbound = 0, + m_lowerbound = 1e-10, m_upperbound = Inf, + fi_lowerbound= 1e-3, + fi_upperbound = 0.3, + flearn_lowerbound = 1e-10, + flearn_upperbound = 1e-2, + mlearn_lowerbound = 1e-15, + mlearn_upperbound = 1e-8, + c1 = 0.1, + c2 = 0.1, + w = 0.25, b1 = 0.9, b2 = 0.999, e = 1e-8, steps = 1e3, + babysteps = 1e2, + swarmsteps = 50, + swarmsize = 25, thin = 1, normalize_geodist = TRUE, report_progress = TRUE, @@ -48,27 +65,25 @@ deme_inbreeding_spcoef_pso <- function(discdat, } } - locats <- names(start_params)[!grepl("^m$", names(start_params))] - if (!all(unique(c(discdat$deme1, discdat$deme2)) %in% locats)) { - stop("You have cluster names in your discdat dataframe that are not included in your start parameters") - } - if (!any(grepl("^m$", names(start_params)))) { - stop("A m start parameter must be provided (i.e. there must be a vector element name m in your start parameter vector)") - } assert_dataframe(discdat) - assert_vector(start_params) - assert_length(start_params, n = (length(unique(c(discdat$deme1, discdat$deme2))) + 1), - message = "Start params length not correct. You must specificy a start parameter - for each deme and the migration parameter, m") - sapply(start_params[!grepl("^m$", names(start_params))], assert_bounded, left = 0, right = 1, inclusive_left = TRUE, inclusive_right = TRUE) - assert_single_numeric(f_learningrate) - assert_single_numeric(m_learningrate) assert_single_numeric(b1) assert_single_numeric(b2) assert_single_numeric(e) + assert_single_numeric(c1) + assert_single_numeric(c2) + assert_single_numeric(w) assert_single_numeric(m_lowerbound) assert_single_numeric(m_upperbound) assert_gr(m_upperbound, m_lowerbound) + assert_single_numeric(fi_lowerbound) + assert_single_numeric(fi_upperbound) + assert_gr(fi_upperbound, fi_lowerbound) + assert_single_numeric(flearn_lowerbound) + assert_single_numeric(flearn_upperbound) + assert_gr(flearn_upperbound, flearn_lowerbound) + assert_single_numeric(mlearn_lowerbound) + assert_single_numeric(mlearn_upperbound) + assert_gr(mlearn_upperbound, mlearn_lowerbound) assert_single_int(steps) assert_single_int(thin) assert_greq(thin, 1, message = "Must be at least 1") @@ -103,9 +118,6 @@ deme_inbreeding_spcoef_pso <- function(discdat, ifelse(gendist < 0.001, 0.001, gendist))) %>% # reasonable bounds on logit dplyr::mutate(gendist = logit(gendist)) - # transform start parameters w/ logit - start_params[names(start_params) != "m"] <- logit(start_params[names(start_params) != "m"]) - # get genetic data by pairs through efficient nest gendist <- discdat %>% @@ -126,7 +138,7 @@ deme_inbreeding_spcoef_pso <- function(discdat, # This approach wastes memory but allows for a structured array # versus a list with varying sizes (and eventually a more efficient for-loop) n_Kpairmax <- max(purrr::map_dbl(gendist$data, nrow)) - gendist_arr <- array(data = -1, dim = c(length(locats), length(locats), n_Kpairmax)) + gendist_arr <- array(data = -1, dim = c(length(demes), length(demes), n_Kpairmax)) for (i in 1:nrow(gendist)) { gendist_arr[gendist$i[i], gendist$j[i], 1:nrow(gendist$data[[i]])] <- unname(unlist(gendist$data[[i]])) } @@ -139,9 +151,9 @@ deme_inbreeding_spcoef_pso <- function(discdat, dplyr::mutate(geodist = (geodist - mingeodist)/(maxgeodist - mingeodist)) } # catch accidental bad M start if user is standardizing distances - if (normalize_geodist & (start_params[names(start_params) == "m"] > 500) ) { + if (normalize_geodist & (m_upperbound > 500) ) { warning("You have selected to normalize geographic distances, but your - migration rate start parameter is large. Please consider placing it on a + migration rate upper bound parameter is large. Please consider placing it on a similar scale to your normalized geographic distances for stability.") } @@ -166,7 +178,7 @@ deme_inbreeding_spcoef_pso <- function(discdat, ) # upper tri - geodist_mat <- matrix(data = -1, nrow = length(locats), ncol = length(locats)) + geodist_mat <- matrix(data = -1, nrow = length(demes), ncol = length(demes)) for (i in 1:nrow(geodist)) { geodist_mat[geodist$i[i], geodist$j[i]] <- geodist$data[i] } @@ -178,22 +190,31 @@ deme_inbreeding_spcoef_pso <- function(discdat, args <- list(gendist = as.vector(gendist_arr), geodist = as.vector(geodist_mat), - fvec = unname( start_params[!grepl("^m$", names(start_params))] ), + n_Demes = length(demes), n_Kpairmax = n_Kpairmax, - m = unname(start_params["m"]), - f_learningrate = f_learningrate, - m_learningrate = m_learningrate, m_lowerbound = m_lowerbound, m_upperbound = m_upperbound, + fi_lowerbound = logit( fi_lowerbound ), + fi_upperbound = logit( fi_upperbound ), + flearn_lowerbound = flearn_lowerbound, + flearn_upperbound = flearn_upperbound, + mlearn_lowerbound = mlearn_lowerbound, + mlearn_upperbound = mlearn_upperbound, b1 = b1, b2 = b2, e = e, + c1 = c1, + c2 = c2, + w = w, + swarmsteps = swarmsteps, + swarmsize = swarmsize, + babysteps = babysteps, steps = steps, report_progress = report_progress ) # run - output_raw <- vanilla_deme_inbreeding_coef_cpp(args) + output_raw <- pso_deme_inbreeding_coef_cpp(args) # set up thinning diff --git a/man/deme_inbreeding_spcoef_pso.Rd b/man/deme_inbreeding_spcoef_pso.Rd new file mode 100644 index 0000000..920f18b --- /dev/null +++ b/man/deme_inbreeding_spcoef_pso.Rd @@ -0,0 +1,97 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/main_pso.R +\name{deme_inbreeding_spcoef_pso} +\alias{deme_inbreeding_spcoef_pso} +\title{Identify Deme Inbreeding Spatial Coefficients in Continuous Space with +Particle Swarm Meta-Optimization} +\usage{ +deme_inbreeding_spcoef_pso( + discdat, + m_lowerbound = 0, + m_upperbound = Inf, + fi_lowerbound = 0, + fi_upperbound = 0.3, + flearn_lowerbound = 1e-10, + flearn_upperbound = 0.01, + mlearn_lowerbound = 1e-15, + mlearn_upperbound = 1e-08, + c1 = 0.1, + c2 = 0.1, + w = 0.25, + b1 = 0.9, + b2 = 0.999, + e = 1e-08, + steps = 1000, + thin = 1, + normalize_geodist = TRUE, + report_progress = TRUE, + return_verbose = FALSE +) +} +\arguments{ +\item{discdat}{dataframe; The genetic-geographic data by deme (K)} + +\item{m_lowerbound}{double; lower limit value for the global "m" parameter; will use a reflected normal within the gradient descent algorithm to adjust any aberrant values} + +\item{m_upperbound}{double; upper limit value for the global "m" parameter; will use a reflected normal within the gradient descent algorithm to adjust any aberrant values} + +\item{fi_lowerbound}{} + +\item{fi_upperbound}{} + +\item{flearn_lowerbound}{} + +\item{flearn_upperbound}{} + +\item{mlearn_lowerbound}{} + +\item{mlearn_upperbound}{} + +\item{c1}{} + +\item{c2}{} + +\item{w}{} + +\item{b1}{double; Exponential decay rates for the first moment estimate} + +\item{b2}{double; Exponential decay rates for the second moment estimate} + +\item{e}{double; Epsilon (error) for stability in the Adam optimization algorithm} + +\item{steps}{integer; the number of "steps" as we move down the gradient} + +\item{thin}{integer; the number of "steps" to keep as part of the output (i.e. if the user specifies 10, every 10th iteration will be kept)} + +\item{normalize_geodist}{boolean; whether geographic distances between demes should be normalized (i.e. rescaled to \code{[0-1]}). Helps increase model stability at the expense of complicating the interpretation of the migration rate parameter.} + +\item{report_progress}{boolean; whether or not a progress bar should be shown as you iterate through steps} + +\item{return_verbose}{boolean; whether the inbreeding coefficients and migration rate should be returned for every iteration or +only for the final iteration. User will typically not want to store every iteration, which can be memory intensive} +} +\description{ +Identify Deme Inbreeding Spatial Coefficients in Continuous Space with +Particle Swarm Meta-Optimization +} +\details{ +The gen.geo.dist dataframe must be named with the following columns: +"smpl1"; "smpl2"; "deme1"; "deme2"; "gendist"; "geodist"; which corresponds to: +Sample 1 Name; Sample 2 Name; Sample 1 Location; Sample 2 Location; +Pairwise Genetic Distance; Pairwise Geographpic Distance. Note, the order of the +columns do not matter but the names of the columns must match. + +The start_params vector names must match the cluster names (i.e. clusters must be +have a name that we can match on for the starting relatedness paramerts). In addition, +you must provide a start parameter for "m". + +We have implemented coding decisions to not allow the "f" inbreeding coefficients to be negative by using a +logit transformation internally in the code. + +Gradient descent is performed using the Adam (adaptive moment estimation) optimization approach. Default values +for moment decay rates, epsilon, and learning rates are taken from \cite{DP Kingma, 2014}. + +The Particle Swarm Optimization is a meta-optimization (meta-heuristic) approach that attempts to find optimal +start parameters for the user to avoid a grid-search approach as would be best practices for fine-tuning the gradient descent +approach. +} diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 084d728..26f6dc6 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -10,6 +10,17 @@ Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); #endif +// pso_deme_inbreeding_coef_cpp +Rcpp::List pso_deme_inbreeding_coef_cpp(Rcpp::List args); +RcppExport SEXP _discent_pso_deme_inbreeding_coef_cpp(SEXP argsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::List >::type args(argsSEXP); + rcpp_result_gen = Rcpp::wrap(pso_deme_inbreeding_coef_cpp(args)); + return rcpp_result_gen; +END_RCPP +} // vanilla_deme_inbreeding_coef_cpp Rcpp::List vanilla_deme_inbreeding_coef_cpp(Rcpp::List args); RcppExport SEXP _discent_vanilla_deme_inbreeding_coef_cpp(SEXP argsSEXP) { @@ -23,6 +34,7 @@ END_RCPP } static const R_CallMethodDef CallEntries[] = { + {"_discent_pso_deme_inbreeding_coef_cpp", (DL_FUNC) &_discent_pso_deme_inbreeding_coef_cpp, 1}, {"_discent_vanilla_deme_inbreeding_coef_cpp", (DL_FUNC) &_discent_vanilla_deme_inbreeding_coef_cpp, 1}, {NULL, NULL, 0} }; diff --git a/src/main_pso.cpp b/src/main_pso.cpp new file mode 100644 index 0000000..9262eab --- /dev/null +++ b/src/main_pso.cpp @@ -0,0 +1,242 @@ +#include "main_pso.h" +#include "misc_v15.h" +#include "probability_v17.h" +#include "particle.h" +using namespace std; + +//---------------------------------------------------------------------------------------- +// Perform gradient descent to calculate deme Fi's +// using DISCent (ADAM grad descent) with a Particle Swarm +// Meta-Optimization Step +//---------------------------------------------------------------------------------------- +// [[Rcpp::export]] +Rcpp::List pso_deme_inbreeding_coef_cpp(Rcpp::List args) { + // overflow cost parameter + const double OVERFLO_DOUBLE = DBL_MAX/1000.0; + //--------------------------------------------------- + // SECTION 1: unpack inputs and set up storage + //--------------------------------------------------- + // get dims + int n_Demes = rcpp_to_int(args["n_Demes"]); + int n_Kpairmax = rcpp_to_int(args["n_Kpairmax"]); + + // observed pairwise sample genetic distances + vector gendist = rcpp_to_vector_double(args["gendist"]); + // recast to array from vector (perserving structure) + vector>> gendist_arr(n_Demes, vector>(n_Demes, vector(n_Kpairmax))); + int geniter = 0; + for (int i = 0; i < n_Kpairmax; i++) { + for (int j = 0; j < n_Demes; j++) { + for (int k = 0; k < n_Demes; k++) { + gendist_arr[k][j][i] = gendist[geniter]; + geniter++; + } + } + } + + // observed geo data + vector geodist = rcpp_to_vector_double(args["geodist"]); // pairwise sample geo distances + vector> geodist_mat(n_Demes, vector(n_Demes)); + // recast + int geoiter = 0; + for (int i = 0; i < n_Demes; i++) { + for (int j = 0; j < n_Demes; j++) { + geodist_mat[j][i] = geodist[geoiter]; + geoiter++; + } + } + + // items for grad descent + int steps = rcpp_to_int(args["steps"]); + int babysteps = rcpp_to_int(args["babysteps"]); + double m_lowerbound = rcpp_to_double(args["m_lowerbound"]); + double m_upperbound = rcpp_to_double(args["m_upperbound"]); + double b1 = rcpp_to_double(args["b1"]); + double b2 = rcpp_to_double(args["b2"]); + double e = rcpp_to_double(args["e"]); + bool report_progress = rcpp_to_bool(args["report_progress"]); + + // items for particle swarm + int swarmsize = rcpp_to_int(args["swarmsize"]); + int swarmsteps = rcpp_to_int(args["swarmsteps"]); + int c1 = rcpp_to_double(args["c1"]); + int c2 = rcpp_to_double(args["c2"]); + int w = rcpp_to_double(args["w"]); + // bound for init + double fi_lowerbound = rcpp_to_double(args["fi_lowerbound"]); + double fi_upperbound = rcpp_to_double(args["fi_upperbound"]); + double flearn_lowerbound = rcpp_to_double(args["flearn_lowerbound"]); + double flearn_upperbound = rcpp_to_double(args["flearn_upperbound"]); + double mlearn_lowerbound = rcpp_to_double(args["mlearn_lowerbound"]); + double mlearn_upperbound = rcpp_to_double(args["mlearn_upperbound"]); + // catch infs + m_lowerbound = (m_lowerbound < -OVERFLO_DOUBLE) ? -OVERFLO_DOUBLE : m_lowerbound; + fi_lowerbound = (fi_lowerbound < -OVERFLO_DOUBLE) ? -OVERFLO_DOUBLE : fi_lowerbound; + flearn_lowerbound = (flearn_lowerbound < -OVERFLO_DOUBLE) ? -OVERFLO_DOUBLE : flearn_lowerbound; + mlearn_lowerbound = (mlearn_lowerbound < -OVERFLO_DOUBLE) ? -OVERFLO_DOUBLE : mlearn_lowerbound; + m_upperbound = (m_upperbound g_swarm_pos(4); // global best of swarm based on our 4 start param for search + vector> fi_search(swarmsteps, vector(swarmsize)); + vector> m_search(swarmsteps, vector(swarmsize)); + vector> flearn_search(swarmsteps, vector(swarmsize)); + vector> mlearn_search(swarmsteps, vector(swarmsize)); + // nested vectors, first over time, then particles + vector> swarm(swarmsteps, vector(swarmsize)); + //--------------------------------------------------- + // SECTION 2: Run PSO + //--------------------------------------------------- + //----------------------------- + // init PSO + //----------------------------- + for (int i = 0; i < swarmsize; i++) { + // rand start params + fi_search[0][i] = runif1(fi_lowerbound, fi_upperbound); + m_search[0][i] = runif1(m_lowerbound, m_upperbound); + flearn_search[0][i] = runif1(flearn_lowerbound, flearn_upperbound); + mlearn_search[0][i] = runif1(mlearn_lowerbound, mlearn_upperbound); + // fill in particles + vector fvec(n_Demes); + fill(fvec.begin(), fvec.end(), fi_search[0][i]); + swarm[0][i].OVERFLO_DOUBLE = OVERFLO_DOUBLE; + swarm[0][i].steps = babysteps; + swarm[0][i].n_Demes = n_Demes; + swarm[0][i].n_Kpairmax = n_Kpairmax; + swarm[0][i].f_learningrate = flearn_search[0][i]; + swarm[0][i].m_learningrate = mlearn_search[0][i]; + swarm[0][i].m_lowerbound = m_lowerbound; + swarm[0][i].m_upperbound = m_upperbound; + swarm[0][i].b1 = b1; + swarm[0][i].b2 = b2; + swarm[0][i].e = e; + swarm[0][i].m = mlearn_search[0][i]; + swarm[0][i].fvec = fvec; + swarm[0][i].gendist_arr = gendist_arr; + swarm[0][i].geodist_mat = geodist_mat; + + // storage and ADAM items + vector cost(babysteps); + vector m_run(babysteps); + vector> fi_run(babysteps, vector(n_Demes)); + vector m_gradtraj(babysteps); // m gradient storage + vector> fi_gradtraj(babysteps, vector(n_Demes)); // fi storage gradient + vector m1t_m(babysteps); // first moment + vector v2t_m(babysteps); // second moment (v) + double m1t_m_hat; // first moment bias corrected + double v2t_m_hat; // second moment (v) bias corrected + vector> m1t_fi(babysteps, vector(n_Demes)); // first moment + vector> v2t_fi(babysteps, vector(n_Demes)); // second moment (v) + vector m1t_fi_hat(n_Demes); // first moment bias corrected + vector v2t_fi_hat(n_Demes); // second moment (v) bias corrected + + // fill in for class + swarm[0][i].cost = cost; + swarm[0][i].m_run = m_run; + swarm[0][i].fi_run = fi_run; + swarm[0][i].m_gradtraj = m_gradtraj; + swarm[0][i].fi_gradtraj = fi_gradtraj; + swarm[0][i].m1t_m = m1t_m; + swarm[0][i].v2t_m = v2t_m; + swarm[0][i].m1t_m_hat = m1t_m_hat; + swarm[0][i].v2t_m_hat = v2t_m_hat; + swarm[0][i].m1t_fi = m1t_fi; + swarm[0][i].v2t_fi = v2t_fi; + swarm[0][i].m1t_fi_hat = m1t_fi_hat; + swarm[0][i].v2t_fi_hat = v2t_fi_hat; + // run GD + swarm[0][i].performGD(false); + // store personal best + vector init_p_best(4); + swarm[0][i].particle_p_best = init_p_best; + swarm[0][i].particle_p_best[0] = fi_search[0][i]; + swarm[0][i].particle_p_best[1] = m_search[0][i]; + swarm[0][i].particle_p_best[2] = flearn_search[0][i]; + swarm[0][i].particle_p_best[3] = mlearn_search[0][i]; + } + cout << "made it"; + return(0); + } + + // //----------------------------- + // // Remaining Steps of PSO + // //----------------------------- + // for (int t = 1; t < swarmsteps; t++) { + // + // } + // + // + // + // //------------------------------- + // // SECTION 3: Run Long Chain of Grad Descent based on global best start parameters + // //------------------------------- + // // initialize and fill in params + // vector fvecfinal(n_Demes); + // fill(fvecfinal.begin(), fvecfinal.end(), g_swarm_pos[3]); + // Particle discParticle; + // discParticle.steps = steps; + // discParticle.n_Demes = n_Demes; + // discParticle.n_Kpairmax = n_Kpairmax; + // discParticle.f_learningrate = g_swarm_pos[0]; + // discParticle.m_learningrate = g_swarm_pos[1]; + // discParticle.m_lowerbound = m_lowerbound; + // discParticle.m_upperbound = m_upperbound; + // discParticle.b1 = b1; + // discParticle.b2 = b2; + // discParticle.e = e; + // discParticle.m = g_swarm_pos[2]; + // discParticle.fvec = fvecfinal; + // discParticle.gendist_arr = gendist_arr; + // discParticle.geodist_mat = geodist_mat; + // // storage and ADAM items + // vector cost(steps); + // vector m_run(steps); + // vector> fi_run(steps, vector(n_Demes)); + // vector m_gradtraj(steps); // m gradient storage + // vector> fi_gradtraj(steps, vector(n_Demes)); // fi storage gradient + // // moments for Adam + // // https://arxiv.org/pdf/1412.6980 + // vector m1t_m(steps); // first moment + // vector v2t_m(steps); // second moment (v) + // double m1t_m_hat; // first moment bias corrected + // double v2t_m_hat; // second moment (v) bias corrected + // vector> m1t_fi(steps, vector(n_Demes)); // first moment + // vector> v2t_fi(steps, vector(n_Demes)); // second moment (v) + // vector m1t_fi_hat(n_Demes); // first moment bias corrected + // vector v2t_fi_hat(n_Demes); // second moment (v) bias corrected + // // fill in for class + // discParticle.cost = cost; + // discParticle.m_run = m_run; + // discParticle.fi_run = fi_run; + // discParticle.m_gradtraj = m_gradtraj; + // discParticle.fi_gradtraj = fi_gradtraj; + // discParticle.m1t_m = m1t_m; + // discParticle.v2t_m = v2t_m; + // discParticle.m1t_m_hat = m1t_m_hat; + // discParticle.v2t_m_hat = v2t_m_hat; + // discParticle.m1t_fi = m1t_fi; + // discParticle.v2t_fi = v2t_fi; + // discParticle.m1t_fi_hat = m1t_fi_hat; + // discParticle.v2t_fi_hat = v2t_fi_hat; + // // run GD + // discParticle.performGD(report_progress); + // + // //------------------------------- + // // Out: return as Rcpp object + // //------------------------------- + // return Rcpp::List::create(Rcpp::Named("m_run") = discParticle.m_run, + // Rcpp::Named("fi_run") = discParticle.fi_run, + // Rcpp::Named("m_gradtraj") = discParticle.m_gradtraj, + // Rcpp::Named("fi_gradtraj") = discParticle.fi_gradtraj, + // Rcpp::Named("m_firstmoment") = discParticle.m1t_m, + // Rcpp::Named("m_secondmoment") = discParticle.v2t_m, + // Rcpp::Named("fi_firstmoment") = discParticle.m1t_fi, + // Rcpp::Named("fi_secondmoment") = discParticle.v2t_fi, + // Rcpp::Named("cost") = discParticle.cost, + // Rcpp::Named("Final_Fis") = discParticle.fvec, + // Rcpp::Named("Final_m") = discParticle.m); + // + //} diff --git a/src/main_pso.h b/src/main_pso.h new file mode 100644 index 0000000..db55c88 --- /dev/null +++ b/src/main_pso.h @@ -0,0 +1,6 @@ + +#include + +//------------------------------------------------ +// estimate deme inbreeding spatial coefficient +Rcpp::List pso_deme_inbreeding_coef_cpp(Rcpp::List args); diff --git a/src/particle.h b/src/particle.h index 071ab5b..7d48af7 100644 --- a/src/particle.h +++ b/src/particle.h @@ -40,6 +40,8 @@ class Particle { std::vector> v2t_fi; std::vector m1t_fi_hat; std::vector v2t_fi_hat; + // for pso model + std::vector particle_p_best; // PUBLIC FUNCTIONS // constructors diff --git a/tests/testthat/test-psomodelruns.R b/tests/testthat/test-psomodelruns.R new file mode 100644 index 0000000..e047133 --- /dev/null +++ b/tests/testthat/test-psomodelruns.R @@ -0,0 +1,33 @@ +test_that("PSO modelruns", { + + # sim data + data("IBD_simulation_data", package = "discent") + dat <- IBD_simulation_data + # run model + inputdisc <- dat %>% + dplyr::filter(deme1 != deme2) + mod <- deme_inbreeding_spcoef_pso(discdat = inputdisc, + m_lowerbound = 1e-10, + m_upperbound = Inf, + fi_lowerbound= 1e-3, + fi_upperbound = 0.3, + flearn_lowerbound = 1e-10, + flearn_upperbound = 1e-2, + mlearn_lowerbound = 1e-15, + mlearn_upperbound = 1e-8, + c1 = 0.1, + c2 = 0.1, + w = 0.25, + b1 = 0.9, + b2 = 0.999, + e = 1e-8, + steps = 1e3, + babysteps = 1e1, + swarmsize = 5, + swarmsteps = 10, + normalize_geodist = F, + report_progress = F, + return_verbose = F) + testthat::expect_length(mod, 6) +}) + From 0601f576f8bdd0a6870ada3337545993cb827835 Mon Sep 17 00:00:00 2001 From: Nicholas Brazeau Date: Sun, 24 Dec 2023 11:40:33 -0500 Subject: [PATCH 04/11] working swarm --- R/main_pso.R | 9 +- src/main_pso.cpp | 310 +++++++++++++++++------------ src/particle.cpp | 6 +- src/particle.h | 4 +- tests/testthat/test-psomodelruns.R | 2 +- 5 files changed, 197 insertions(+), 134 deletions(-) diff --git a/R/main_pso.R b/R/main_pso.R index 9019141..0500433 100644 --- a/R/main_pso.R +++ b/R/main_pso.R @@ -12,7 +12,7 @@ #' @param w #' @param swarmsize #' @param swarmsteps -#' @param babysteps +#' @param searchsteps #' @inherit description deme_inbreeding_spcoef_vanilla #' @details The gen.geo.dist dataframe must be named with the following columns: #' "smpl1"; "smpl2"; "deme1"; "deme2"; "gendist"; "geodist"; which corresponds to: @@ -44,7 +44,7 @@ deme_inbreeding_spcoef_pso <- function(discdat, b2 = 0.999, e = 1e-8, steps = 1e3, - babysteps = 1e2, + searchsteps = 1e2, swarmsteps = 50, swarmsize = 25, thin = 1, @@ -208,9 +208,10 @@ deme_inbreeding_spcoef_pso <- function(discdat, w = w, swarmsteps = swarmsteps, swarmsize = swarmsize, - babysteps = babysteps, + searchsteps = searchsteps, steps = steps, - report_progress = report_progress + report_progress = report_progress, + return_verbose = return_verbose ) # run diff --git a/src/main_pso.cpp b/src/main_pso.cpp index 9262eab..fbce41c 100644 --- a/src/main_pso.cpp +++ b/src/main_pso.cpp @@ -48,14 +48,14 @@ Rcpp::List pso_deme_inbreeding_coef_cpp(Rcpp::List args) { // items for grad descent int steps = rcpp_to_int(args["steps"]); - int babysteps = rcpp_to_int(args["babysteps"]); + int searchsteps = rcpp_to_int(args["searchsteps"]); double m_lowerbound = rcpp_to_double(args["m_lowerbound"]); double m_upperbound = rcpp_to_double(args["m_upperbound"]); double b1 = rcpp_to_double(args["b1"]); double b2 = rcpp_to_double(args["b2"]); double e = rcpp_to_double(args["e"]); bool report_progress = rcpp_to_bool(args["report_progress"]); - + bool return_verbose = rcpp_to_bool(args["return_verbose"]); // items for particle swarm int swarmsize = rcpp_to_int(args["swarmsize"]); int swarmsteps = rcpp_to_int(args["swarmsteps"]); @@ -70,21 +70,19 @@ Rcpp::List pso_deme_inbreeding_coef_cpp(Rcpp::List args) { double mlearn_lowerbound = rcpp_to_double(args["mlearn_lowerbound"]); double mlearn_upperbound = rcpp_to_double(args["mlearn_upperbound"]); // catch infs - m_lowerbound = (m_lowerbound < -OVERFLO_DOUBLE) ? -OVERFLO_DOUBLE : m_lowerbound; fi_lowerbound = (fi_lowerbound < -OVERFLO_DOUBLE) ? -OVERFLO_DOUBLE : fi_lowerbound; + m_lowerbound = (m_lowerbound < -OVERFLO_DOUBLE) ? -OVERFLO_DOUBLE : m_lowerbound; flearn_lowerbound = (flearn_lowerbound < -OVERFLO_DOUBLE) ? -OVERFLO_DOUBLE : flearn_lowerbound; mlearn_lowerbound = (mlearn_lowerbound < -OVERFLO_DOUBLE) ? -OVERFLO_DOUBLE : mlearn_lowerbound; - m_upperbound = (m_upperbound g_swarm_pos(4); // global best of swarm based on our 4 start param for search - vector> fi_search(swarmsteps, vector(swarmsize)); - vector> m_search(swarmsteps, vector(swarmsize)); - vector> flearn_search(swarmsteps, vector(swarmsize)); - vector> mlearn_search(swarmsteps, vector(swarmsize)); + // NB order for vector pos will be fi, m, flearn, mlearn + vector g_best_swarm_pos(5); // global best of swarm based on our 4 start param & cost for search + fill(g_best_swarm_pos.begin(), g_best_swarm_pos.end(), OVERFLO_DOUBLE); // minimalization problem // nested vectors, first over time, then particles vector> swarm(swarmsteps, vector(swarmsize)); //--------------------------------------------------- @@ -94,140 +92,202 @@ Rcpp::List pso_deme_inbreeding_coef_cpp(Rcpp::List args) { // init PSO //----------------------------- for (int i = 0; i < swarmsize; i++) { - // rand start params - fi_search[0][i] = runif1(fi_lowerbound, fi_upperbound); - m_search[0][i] = runif1(m_lowerbound, m_upperbound); - flearn_search[0][i] = runif1(flearn_lowerbound, flearn_upperbound); - mlearn_search[0][i] = runif1(mlearn_lowerbound, mlearn_upperbound); // fill in particles vector fvec(n_Demes); - fill(fvec.begin(), fvec.end(), fi_search[0][i]); + double ffill = runif1(fi_lowerbound, fi_upperbound); // rand fi start param + fill(fvec.begin(), fvec.end(), ffill); swarm[0][i].OVERFLO_DOUBLE = OVERFLO_DOUBLE; - swarm[0][i].steps = babysteps; + swarm[0][i].steps = searchsteps; swarm[0][i].n_Demes = n_Demes; swarm[0][i].n_Kpairmax = n_Kpairmax; - swarm[0][i].f_learningrate = flearn_search[0][i]; - swarm[0][i].m_learningrate = mlearn_search[0][i]; + swarm[0][i].m = runif1(m_lowerbound, m_upperbound); + swarm[0][i].f_learningrate = runif1(flearn_lowerbound, flearn_upperbound); + swarm[0][i].m_learningrate = runif1(mlearn_lowerbound, mlearn_upperbound); swarm[0][i].m_lowerbound = m_lowerbound; swarm[0][i].m_upperbound = m_upperbound; swarm[0][i].b1 = b1; swarm[0][i].b2 = b2; swarm[0][i].e = e; - swarm[0][i].m = mlearn_search[0][i]; swarm[0][i].fvec = fvec; swarm[0][i].gendist_arr = gendist_arr; swarm[0][i].geodist_mat = geodist_mat; // storage and ADAM items - vector cost(babysteps); - vector m_run(babysteps); - vector> fi_run(babysteps, vector(n_Demes)); - vector m_gradtraj(babysteps); // m gradient storage - vector> fi_gradtraj(babysteps, vector(n_Demes)); // fi storage gradient - vector m1t_m(babysteps); // first moment - vector v2t_m(babysteps); // second moment (v) - double m1t_m_hat; // first moment bias corrected - double v2t_m_hat; // second moment (v) bias corrected - vector> m1t_fi(babysteps, vector(n_Demes)); // first moment - vector> v2t_fi(babysteps, vector(n_Demes)); // second moment (v) - vector m1t_fi_hat(n_Demes); // first moment bias corrected - vector v2t_fi_hat(n_Demes); // second moment (v) bias corrected - - // fill in for class - swarm[0][i].cost = cost; - swarm[0][i].m_run = m_run; - swarm[0][i].fi_run = fi_run; - swarm[0][i].m_gradtraj = m_gradtraj; - swarm[0][i].fi_gradtraj = fi_gradtraj; - swarm[0][i].m1t_m = m1t_m; - swarm[0][i].v2t_m = v2t_m; - swarm[0][i].m1t_m_hat = m1t_m_hat; - swarm[0][i].v2t_m_hat = v2t_m_hat; - swarm[0][i].m1t_fi = m1t_fi; - swarm[0][i].v2t_fi = v2t_fi; - swarm[0][i].m1t_fi_hat = m1t_fi_hat; - swarm[0][i].v2t_fi_hat = v2t_fi_hat; + swarm[0][i].cost = vector(searchsteps); + swarm[0][i].m_run = vector(searchsteps);; + swarm[0][i].fi_run = vector>(searchsteps, vector(n_Demes));; + swarm[0][i].m_gradtraj = vector(searchsteps); // m gradient storage; + swarm[0][i].fi_gradtraj = vector>(searchsteps, vector(n_Demes)); // fi storage gradient; + swarm[0][i].m1t_m = vector(searchsteps); // first m moment; + swarm[0][i].v2t_m = vector(searchsteps); // second m moment (v); + swarm[0][i].m1t_m_hat = double(); // first moment bias corrected + swarm[0][i].v2t_m_hat = double(); // second moment (v) bias corrected + swarm[0][i].m1t_fi = vector>(searchsteps, vector(n_Demes)); // first fi moment; + swarm[0][i].v2t_fi = vector>(searchsteps, vector(n_Demes)); // second fi moment (v); + swarm[0][i].m1t_fi_hat = vector(n_Demes); // first moment bias corrected; + swarm[0][i].v2t_fi_hat = vector(n_Demes); // second moment (v) bias corrected; // run GD swarm[0][i].performGD(false); - // store personal best - vector init_p_best(4); - swarm[0][i].particle_p_best = init_p_best; - swarm[0][i].particle_p_best[0] = fi_search[0][i]; - swarm[0][i].particle_p_best[1] = m_search[0][i]; - swarm[0][i].particle_p_best[2] = flearn_search[0][i]; - swarm[0][i].particle_p_best[3] = mlearn_search[0][i]; + // store current position, personal best, and velocity + vector init_p0(4); + vector init_velocity0(4); + fill(init_velocity0.begin(), init_velocity0.end(),0); // decision to start with an initial velocity of zero, slow-start/conservative + swarm[0][i].particle_pbest = swarm[0][i].particle_pcurr = init_p0; + g_best_swarm_pos[0] = swarm[0][i].particle_pbest[0] = swarm[0][i].particle_pcurr[0] = ffill; + g_best_swarm_pos[1] = swarm[0][i].particle_pbest[1] = swarm[0][i].particle_pcurr[1] = swarm[0][i].m; + g_best_swarm_pos[2] = swarm[0][i].particle_pbest[2] = swarm[0][i].particle_pcurr[2] = swarm[0][i].f_learningrate; + g_best_swarm_pos[3] = swarm[0][i].particle_pbest[3] = swarm[0][i].particle_pcurr[3] = swarm[0][i].m_learningrate; + swarm[0][i].particle_velocity = init_velocity0; + // update global best + g_best_swarm_pos[4] = swarm[0][i].cost[searchsteps-1]; } - cout << "made it"; - return(0); - } - - // //----------------------------- - // // Remaining Steps of PSO - // //----------------------------- - // for (int t = 1; t < swarmsteps; t++) { - // - // } - // - // - // - // //------------------------------- - // // SECTION 3: Run Long Chain of Grad Descent based on global best start parameters - // //------------------------------- - // // initialize and fill in params - // vector fvecfinal(n_Demes); - // fill(fvecfinal.begin(), fvecfinal.end(), g_swarm_pos[3]); - // Particle discParticle; - // discParticle.steps = steps; - // discParticle.n_Demes = n_Demes; - // discParticle.n_Kpairmax = n_Kpairmax; - // discParticle.f_learningrate = g_swarm_pos[0]; - // discParticle.m_learningrate = g_swarm_pos[1]; - // discParticle.m_lowerbound = m_lowerbound; - // discParticle.m_upperbound = m_upperbound; - // discParticle.b1 = b1; - // discParticle.b2 = b2; - // discParticle.e = e; - // discParticle.m = g_swarm_pos[2]; - // discParticle.fvec = fvecfinal; - // discParticle.gendist_arr = gendist_arr; - // discParticle.geodist_mat = geodist_mat; - // // storage and ADAM items - // vector cost(steps); - // vector m_run(steps); - // vector> fi_run(steps, vector(n_Demes)); - // vector m_gradtraj(steps); // m gradient storage - // vector> fi_gradtraj(steps, vector(n_Demes)); // fi storage gradient - // // moments for Adam - // // https://arxiv.org/pdf/1412.6980 - // vector m1t_m(steps); // first moment - // vector v2t_m(steps); // second moment (v) - // double m1t_m_hat; // first moment bias corrected - // double v2t_m_hat; // second moment (v) bias corrected - // vector> m1t_fi(steps, vector(n_Demes)); // first moment - // vector> v2t_fi(steps, vector(n_Demes)); // second moment (v) - // vector m1t_fi_hat(n_Demes); // first moment bias corrected - // vector v2t_fi_hat(n_Demes); // second moment (v) bias corrected - // // fill in for class - // discParticle.cost = cost; - // discParticle.m_run = m_run; - // discParticle.fi_run = fi_run; - // discParticle.m_gradtraj = m_gradtraj; - // discParticle.fi_gradtraj = fi_gradtraj; - // discParticle.m1t_m = m1t_m; - // discParticle.v2t_m = v2t_m; - // discParticle.m1t_m_hat = m1t_m_hat; - // discParticle.v2t_m_hat = v2t_m_hat; - // discParticle.m1t_fi = m1t_fi; - // discParticle.v2t_fi = v2t_fi; - // discParticle.m1t_fi_hat = m1t_fi_hat; - // discParticle.v2t_fi_hat = v2t_fi_hat; - // // run GD - // discParticle.performGD(report_progress); - // + + + + //----------------------------- + // Remaining Steps of PSO + //----------------------------- + for (int t = 1; t < swarmsteps; t++) { + for (int i = 1; i < swarmsize; i++) { + // initialize swarm pieces for below + vector init_p(4); + vector init_velocity(4); + swarm[t][i].particle_pbest = swarm[t][i].particle_pcurr = init_p; + swarm[t][i].particle_velocity = init_velocity; + // calculate new velocity and position for each disc param + for (int d = 0; d < 4; d++) { + // breaking velocity calculation into inertia, cognitive, and social componentns + double inert = w * swarm[t-1][i].particle_velocity[d]; + double cog = c1 * runif_0_1() * (swarm[t-1][i].particle_pbest[d] - swarm[t-1][i].particle_pcurr[d]); + double soc = c2 * runif_0_1() * (g_best_swarm_pos[d] - swarm[t-1][i].particle_pcurr[d]); + // update velocity + swarm[t][i].particle_velocity[d] = inert + cog + soc; + // update current position + swarm[t][i].particle_pcurr[d] = swarm[t-1][i].particle_pcurr[d] + swarm[t][i].particle_velocity[d]; + } // end update of params + + //------------------------- + // now run particle for new positions + //------------------------- + vector fvec(n_Demes); + fill(fvec.begin(), fvec.end(), swarm[t][i].particle_pcurr[0]); + swarm[t][i].OVERFLO_DOUBLE = OVERFLO_DOUBLE; + swarm[t][i].steps = searchsteps; + swarm[t][i].n_Demes = n_Demes; + swarm[t][i].n_Kpairmax = n_Kpairmax; + swarm[t][i].m = swarm[t][i].particle_pcurr[1]; + swarm[t][i].f_learningrate = swarm[t][i].particle_pcurr[2]; + swarm[t][i].m_learningrate = swarm[t][i].particle_pcurr[3]; + swarm[t][i].m_lowerbound = m_lowerbound; + swarm[t][i].m_upperbound = m_upperbound; + swarm[t][i].b1 = b1; + swarm[t][i].b2 = b2; + swarm[t][i].e = e; + swarm[t][i].fvec = fvec; + swarm[t][i].gendist_arr = gendist_arr; + swarm[t][i].geodist_mat = geodist_mat; + + // storage and ADAM items + swarm[t][i].cost = vector(searchsteps); + swarm[t][i].m_run = vector(searchsteps);; + swarm[t][i].fi_run = vector>(searchsteps, vector(n_Demes));; + swarm[t][i].m_gradtraj = vector(searchsteps); // m gradient storage; + swarm[t][i].fi_gradtraj = vector>(searchsteps, vector(n_Demes)); // fi storage gradient; + swarm[t][i].m1t_m = vector(searchsteps); // first m moment; + swarm[t][i].v2t_m = vector(searchsteps); // second m moment (v); + swarm[t][i].m1t_m_hat = double(); + swarm[t][i].v2t_m_hat = double(); + swarm[t][i].m1t_fi = vector>(searchsteps, vector(n_Demes)); // first fi moment; + swarm[t][i].v2t_fi = vector>(searchsteps, vector(n_Demes)); // second fi moment (v); + swarm[t][i].m1t_fi_hat = vector(n_Demes); // first moment bias corrected; + swarm[t][i].v2t_fi_hat = vector(n_Demes); // second moment (v) bias corrected; + // run GD + swarm[t][i].performGD(false); + + // update particle best and global best + if (swarm[t][i].cost[searchsteps-1] < swarm[t-1][i].cost[searchsteps-1]) { + swarm[t][i].particle_pbest[0] = swarm[t][i].particle_pcurr[0]; + swarm[t][i].particle_pbest[1] = swarm[t][i].particle_pcurr[1]; + swarm[t][i].particle_pbest[2] = swarm[t][i].particle_pcurr[2]; + swarm[t][i].particle_pbest[3] = swarm[t][i].particle_pcurr[3]; + + // particle best must be better than curr, otherwise wouldn't make threshold for global (save if/then eval freq) + if (swarm[t][i].cost[searchsteps-1] < g_best_swarm_pos[4]) { + g_best_swarm_pos[0] = swarm[t][i].particle_pcurr[0]; + g_best_swarm_pos[1] = swarm[t][i].particle_pcurr[1]; + g_best_swarm_pos[2] = swarm[t][i].particle_pcurr[2]; + g_best_swarm_pos[3] = swarm[t][i].particle_pcurr[3]; + } + } + // end updates + + } + } + + + //------------------------------- + // SECTION 3: Run Long Chain of Grad Descent based on global best start parameters + //------------------------------- + // initialize and fill in params + vector fvecfinal(n_Demes); + fill(fvecfinal.begin(), fvecfinal.end(), g_best_swarm_pos[0]); + Particle discParticle; + discParticle.steps = steps; + discParticle.n_Demes = n_Demes; + discParticle.n_Kpairmax = n_Kpairmax; + discParticle.m = g_best_swarm_pos[1]; + discParticle.f_learningrate = g_best_swarm_pos[2]; + discParticle.m_learningrate = g_best_swarm_pos[3]; + discParticle.m_lowerbound = m_lowerbound; + discParticle.m_upperbound = m_upperbound; + discParticle.b1 = b1; + discParticle.b2 = b2; + discParticle.e = e; + discParticle.fvec = fvecfinal; + discParticle.gendist_arr = gendist_arr; + discParticle.geodist_mat = geodist_mat; + // storage and ADAM items + vector cost(steps); + vector m_run(steps); + vector> fi_run(steps, vector(n_Demes)); + vector m_gradtraj(steps); // m gradient storage + vector> fi_gradtraj(steps, vector(n_Demes)); // fi storage gradient + // moments for Adam + // https://arxiv.org/pdf/1412.6980 + vector m1t_m(steps); // first moment + vector v2t_m(steps); // second moment (v) + double m1t_m_hat; // first moment bias corrected + double v2t_m_hat; // second moment (v) bias corrected + vector> m1t_fi(steps, vector(n_Demes)); // first moment + vector> v2t_fi(steps, vector(n_Demes)); // second moment (v) + vector m1t_fi_hat(n_Demes); // first moment bias corrected + vector v2t_fi_hat(n_Demes); // second moment (v) bias corrected + // fill in for class + discParticle.cost = cost; + discParticle.m_run = m_run; + discParticle.fi_run = fi_run; + discParticle.m_gradtraj = m_gradtraj; + discParticle.fi_gradtraj = fi_gradtraj; + discParticle.m1t_m = m1t_m; + discParticle.v2t_m = v2t_m; + discParticle.m1t_m_hat = m1t_m_hat; + discParticle.v2t_m_hat = v2t_m_hat; + discParticle.m1t_fi = m1t_fi; + discParticle.v2t_fi = v2t_fi; + discParticle.m1t_fi_hat = m1t_fi_hat; + discParticle.v2t_fi_hat = v2t_fi_hat; + // run GD + discParticle.performGD(report_progress); + + cout << "made it"; + return(0); + } + // //------------------------------- // // Out: return as Rcpp object // //------------------------------- - // return Rcpp::List::create(Rcpp::Named("m_run") = discParticle.m_run, + // return Rcpp::List::create(Rcpp::Named("swarm") = swarm, + // Rcpp::Named("m_run") = discParticle.m_run, // Rcpp::Named("fi_run") = discParticle.fi_run, // Rcpp::Named("m_gradtraj") = discParticle.m_gradtraj, // Rcpp::Named("fi_gradtraj") = discParticle.fi_gradtraj, diff --git a/src/particle.cpp b/src/particle.cpp index 98fa1ce..f7fa980 100644 --- a/src/particle.cpp +++ b/src/particle.cpp @@ -36,7 +36,7 @@ void Particle::performGD(bool report_progress) { } } // Catch and Cap Extreme Costs - if (cost[0] > OVERFLO_DOUBLE) { + if (cost[0] > OVERFLO_DOUBLE | isnan(cost[0])) { cost[0] = OVERFLO_DOUBLE; } @@ -48,7 +48,7 @@ void Particle::performGD(bool report_progress) { // report progress if (report_progress) { if (((step+1) % 100)==0) { - print(" step",step+1); + print(" DISC step",step+1); } } @@ -149,7 +149,7 @@ void Particle::performGD(bool report_progress) { } } // Catch and Cap Extreme Costs - if (cost[step] > OVERFLO_DOUBLE) { + if (cost[step] > OVERFLO_DOUBLE | isnan(cost[step])) { cost[step] = OVERFLO_DOUBLE; } } // end steps diff --git a/src/particle.h b/src/particle.h index 7d48af7..e2ae4ca 100644 --- a/src/particle.h +++ b/src/particle.h @@ -41,7 +41,9 @@ class Particle { std::vector m1t_fi_hat; std::vector v2t_fi_hat; // for pso model - std::vector particle_p_best; + std::vector particle_pcurr; + std::vector particle_pbest; + std::vector particle_velocity; // PUBLIC FUNCTIONS // constructors diff --git a/tests/testthat/test-psomodelruns.R b/tests/testthat/test-psomodelruns.R index e047133..d812a10 100644 --- a/tests/testthat/test-psomodelruns.R +++ b/tests/testthat/test-psomodelruns.R @@ -22,7 +22,7 @@ test_that("PSO modelruns", { b2 = 0.999, e = 1e-8, steps = 1e3, - babysteps = 1e1, + searchsteps = 1e1, swarmsize = 5, swarmsteps = 10, normalize_geodist = F, From 436048c68c336021bc36dd1c6296ca442d322962 Mon Sep 17 00:00:00 2001 From: Nicholas Brazeau Date: Sun, 24 Dec 2023 12:49:27 -0500 Subject: [PATCH 05/11] memory efficient particle --- R/main_pso.R | 3 +- src/main_pso.cpp | 132 +++++++++++++++-------------- src/main_vanilla.cpp | 48 ++++------- src/particle.cpp | 2 +- src/particle.h | 5 +- tests/testthat/test-psomodelruns.R | 4 +- 6 files changed, 92 insertions(+), 102 deletions(-) diff --git a/R/main_pso.R b/R/main_pso.R index 0500433..736d0cb 100644 --- a/R/main_pso.R +++ b/R/main_pso.R @@ -237,7 +237,8 @@ deme_inbreeding_spcoef_pso <- function(discdat, fi_2moment = do.call("rbind", output_raw$fi_secondmoment)[thin_its, ], cost = output_raw$cost[thin_its], Final_Fis = expit(output_raw$Final_Fis), - Final_m = output_raw$Final_m + Final_m = output_raw$Final_m, + swarm = output_raw$swarm ) } else { diff --git a/src/main_pso.cpp b/src/main_pso.cpp index fbce41c..7157945 100644 --- a/src/main_pso.cpp +++ b/src/main_pso.cpp @@ -109,13 +109,11 @@ Rcpp::List pso_deme_inbreeding_coef_cpp(Rcpp::List args) { swarm[0][i].b2 = b2; swarm[0][i].e = e; swarm[0][i].fvec = fvec; - swarm[0][i].gendist_arr = gendist_arr; - swarm[0][i].geodist_mat = geodist_mat; // storage and ADAM items swarm[0][i].cost = vector(searchsteps); - swarm[0][i].m_run = vector(searchsteps);; - swarm[0][i].fi_run = vector>(searchsteps, vector(n_Demes));; + swarm[0][i].m_run = vector(searchsteps); + swarm[0][i].fi_run = vector>(searchsteps, vector(n_Demes)); swarm[0][i].m_gradtraj = vector(searchsteps); // m gradient storage; swarm[0][i].fi_gradtraj = vector>(searchsteps, vector(n_Demes)); // fi storage gradient; swarm[0][i].m1t_m = vector(searchsteps); // first m moment; @@ -127,7 +125,7 @@ Rcpp::List pso_deme_inbreeding_coef_cpp(Rcpp::List args) { swarm[0][i].m1t_fi_hat = vector(n_Demes); // first moment bias corrected; swarm[0][i].v2t_fi_hat = vector(n_Demes); // second moment (v) bias corrected; // run GD - swarm[0][i].performGD(false); + swarm[0][i].performGD(false, gendist_arr, geodist_mat); // store current position, personal best, and velocity vector init_p0(4); vector init_velocity0(4); @@ -184,25 +182,23 @@ Rcpp::List pso_deme_inbreeding_coef_cpp(Rcpp::List args) { swarm[t][i].b2 = b2; swarm[t][i].e = e; swarm[t][i].fvec = fvec; - swarm[t][i].gendist_arr = gendist_arr; - swarm[t][i].geodist_mat = geodist_mat; // storage and ADAM items swarm[t][i].cost = vector(searchsteps); - swarm[t][i].m_run = vector(searchsteps);; - swarm[t][i].fi_run = vector>(searchsteps, vector(n_Demes));; + swarm[t][i].m_run = vector(searchsteps); + swarm[t][i].fi_run = vector>(searchsteps, vector(n_Demes)); swarm[t][i].m_gradtraj = vector(searchsteps); // m gradient storage; swarm[t][i].fi_gradtraj = vector>(searchsteps, vector(n_Demes)); // fi storage gradient; swarm[t][i].m1t_m = vector(searchsteps); // first m moment; swarm[t][i].v2t_m = vector(searchsteps); // second m moment (v); swarm[t][i].m1t_m_hat = double(); - swarm[t][i].v2t_m_hat = double(); + swarm[t][i].v2t_m_hat = double(); swarm[t][i].m1t_fi = vector>(searchsteps, vector(n_Demes)); // first fi moment; swarm[t][i].v2t_fi = vector>(searchsteps, vector(n_Demes)); // second fi moment (v); swarm[t][i].m1t_fi_hat = vector(n_Demes); // first moment bias corrected; swarm[t][i].v2t_fi_hat = vector(n_Demes); // second moment (v) bias corrected; // run GD - swarm[t][i].performGD(false); + swarm[t][i].performGD(false, gendist_arr, geodist_mat); // update particle best and global best if (swarm[t][i].cost[searchsteps-1] < swarm[t-1][i].cost[searchsteps-1]) { @@ -228,10 +224,10 @@ Rcpp::List pso_deme_inbreeding_coef_cpp(Rcpp::List args) { //------------------------------- // SECTION 3: Run Long Chain of Grad Descent based on global best start parameters //------------------------------- - // initialize and fill in params vector fvecfinal(n_Demes); fill(fvecfinal.begin(), fvecfinal.end(), g_best_swarm_pos[0]); Particle discParticle; + discParticle.OVERFLO_DOUBLE = OVERFLO_DOUBLE; discParticle.steps = steps; discParticle.n_Demes = n_Demes; discParticle.n_Kpairmax = n_Kpairmax; @@ -244,59 +240,69 @@ Rcpp::List pso_deme_inbreeding_coef_cpp(Rcpp::List args) { discParticle.b2 = b2; discParticle.e = e; discParticle.fvec = fvecfinal; - discParticle.gendist_arr = gendist_arr; - discParticle.geodist_mat = geodist_mat; + + //------------------------------- // storage and ADAM items - vector cost(steps); - vector m_run(steps); - vector> fi_run(steps, vector(n_Demes)); - vector m_gradtraj(steps); // m gradient storage - vector> fi_gradtraj(steps, vector(n_Demes)); // fi storage gradient - // moments for Adam - // https://arxiv.org/pdf/1412.6980 - vector m1t_m(steps); // first moment - vector v2t_m(steps); // second moment (v) - double m1t_m_hat; // first moment bias corrected - double v2t_m_hat; // second moment (v) bias corrected - vector> m1t_fi(steps, vector(n_Demes)); // first moment - vector> v2t_fi(steps, vector(n_Demes)); // second moment (v) - vector m1t_fi_hat(n_Demes); // first moment bias corrected - vector v2t_fi_hat(n_Demes); // second moment (v) bias corrected - // fill in for class - discParticle.cost = cost; - discParticle.m_run = m_run; - discParticle.fi_run = fi_run; - discParticle.m_gradtraj = m_gradtraj; - discParticle.fi_gradtraj = fi_gradtraj; - discParticle.m1t_m = m1t_m; - discParticle.v2t_m = v2t_m; - discParticle.m1t_m_hat = m1t_m_hat; - discParticle.v2t_m_hat = v2t_m_hat; - discParticle.m1t_fi = m1t_fi; - discParticle.v2t_fi = v2t_fi; - discParticle.m1t_fi_hat = m1t_fi_hat; - discParticle.v2t_fi_hat = v2t_fi_hat; + //------------------------------- + discParticle.cost = vector(steps); + discParticle.m_run = vector(steps); + discParticle.fi_run = vector>(steps, vector(n_Demes)); + discParticle.m_gradtraj = vector(steps); // m gradient storage; + discParticle.fi_gradtraj = vector>(steps, vector(n_Demes)); // fi storage gradient; + discParticle.m1t_m = vector(steps); // first m moment; + discParticle.v2t_m = vector(steps); // second m moment (v); + discParticle.m1t_m_hat = double(); + discParticle.v2t_m_hat = double(); + discParticle.m1t_fi = vector>(steps, vector(n_Demes)); // first fi moment; + discParticle.v2t_fi = vector>(steps, vector(n_Demes)); // second fi moment (v); + discParticle.m1t_fi_hat = vector(n_Demes); // first moment bias corrected; + discParticle.v2t_fi_hat = vector(n_Demes); // second moment (v) bias corrected; + + + // run GD - discParticle.performGD(report_progress); + discParticle.performGD(report_progress, gendist_arr, geodist_mat); + + //------------------------------- + // Out: return as Rcpp object + //------------------------------- + if (return_verbose) { + vector>> swarmfill(swarmsteps, vector>(swarmsize, vector(5))); + for (int t = 1; t < swarmsteps; t++) { + for (int i = 1; i < swarmsize; i++) { + for (int d = 0; d < 4; d++) { + swarmfill[t][i][d] = swarm[t][i].particle_pcurr[d]; + } + swarmfill[t][i][4] = swarm[t][i].cost[searchsteps-1]; + } + } + // return with swarm details + return Rcpp::List::create(Rcpp::Named("swarm") = swarmfill, + Rcpp::Named("m_run") = discParticle.m_run, + Rcpp::Named("fi_run") = discParticle.fi_run, + Rcpp::Named("m_gradtraj") = discParticle.m_gradtraj, + Rcpp::Named("fi_gradtraj") = discParticle.fi_gradtraj, + Rcpp::Named("m_firstmoment") = discParticle.m1t_m, + Rcpp::Named("m_secondmoment") = discParticle.v2t_m, + Rcpp::Named("fi_firstmoment") = discParticle.m1t_fi, + Rcpp::Named("fi_secondmoment") = discParticle.v2t_fi, + Rcpp::Named("cost") = discParticle.cost, + Rcpp::Named("Final_Fis") = discParticle.fvec, + Rcpp::Named("Final_m") = discParticle.m); + } else { - cout << "made it"; - return(0); + return Rcpp::List::create(Rcpp::Named("m_run") = discParticle.m_run, + Rcpp::Named("fi_run") = discParticle.fi_run, + Rcpp::Named("m_gradtraj") = discParticle.m_gradtraj, + Rcpp::Named("fi_gradtraj") = discParticle.fi_gradtraj, + Rcpp::Named("m_firstmoment") = discParticle.m1t_m, + Rcpp::Named("m_secondmoment") = discParticle.v2t_m, + Rcpp::Named("fi_firstmoment") = discParticle.m1t_fi, + Rcpp::Named("fi_secondmoment") = discParticle.v2t_fi, + Rcpp::Named("cost") = discParticle.cost, + Rcpp::Named("Final_Fis") = discParticle.fvec, + Rcpp::Named("Final_m") = discParticle.m); } - // //------------------------------- - // // Out: return as Rcpp object - // //------------------------------- - // return Rcpp::List::create(Rcpp::Named("swarm") = swarm, - // Rcpp::Named("m_run") = discParticle.m_run, - // Rcpp::Named("fi_run") = discParticle.fi_run, - // Rcpp::Named("m_gradtraj") = discParticle.m_gradtraj, - // Rcpp::Named("fi_gradtraj") = discParticle.fi_gradtraj, - // Rcpp::Named("m_firstmoment") = discParticle.m1t_m, - // Rcpp::Named("m_secondmoment") = discParticle.v2t_m, - // Rcpp::Named("fi_firstmoment") = discParticle.m1t_fi, - // Rcpp::Named("fi_secondmoment") = discParticle.v2t_fi, - // Rcpp::Named("cost") = discParticle.cost, - // Rcpp::Named("Final_Fis") = discParticle.fvec, - // Rcpp::Named("Final_m") = discParticle.m); - // - //} + + } diff --git a/src/main_vanilla.cpp b/src/main_vanilla.cpp index 8e66599..e1a0854 100644 --- a/src/main_vanilla.cpp +++ b/src/main_vanilla.cpp @@ -76,44 +76,28 @@ Rcpp::List vanilla_deme_inbreeding_coef_cpp(Rcpp::List args) { discParticle.e = e; discParticle.m = m; discParticle.fvec = fvec; - discParticle.gendist_arr = gendist_arr; - discParticle.geodist_mat = geodist_mat; + //------------------------------- // storage and ADAM items //------------------------------- - vector cost(steps); - vector m_run(steps); - vector> fi_run(steps, vector(n_Demes)); - vector m_gradtraj(steps); // m gradient storage - vector> fi_gradtraj(steps, vector(n_Demes)); // fi storage gradient - // moments for Adam - // https://arxiv.org/pdf/1412.6980 - vector m1t_m(steps); // first moment - vector v2t_m(steps); // second moment (v) - double m1t_m_hat; // first moment bias corrected - double v2t_m_hat; // second moment (v) bias corrected - vector> m1t_fi(steps, vector(n_Demes)); // first moment - vector> v2t_fi(steps, vector(n_Demes)); // second moment (v) - vector m1t_fi_hat(n_Demes); // first moment bias corrected - vector v2t_fi_hat(n_Demes); // second moment (v) bias corrected - // fill in for class - discParticle.cost = cost; - discParticle.m_run = m_run; - discParticle.fi_run = fi_run; - discParticle.m_gradtraj = m_gradtraj; - discParticle.fi_gradtraj = fi_gradtraj; - discParticle.m1t_m = m1t_m; - discParticle.v2t_m = v2t_m; - discParticle.m1t_m_hat = m1t_m_hat; - discParticle.v2t_m_hat = v2t_m_hat; - discParticle.m1t_fi = m1t_fi; - discParticle.v2t_fi = v2t_fi; - discParticle.m1t_fi_hat = m1t_fi_hat; - discParticle.v2t_fi_hat = v2t_fi_hat; + discParticle.cost = vector(steps); + discParticle.m_run = vector(steps); + discParticle.fi_run = vector>(steps, vector(n_Demes)); + discParticle.m_gradtraj = vector(steps); // m gradient storage; + discParticle.fi_gradtraj = vector>(steps, vector(n_Demes)); // fi storage gradient; + discParticle.m1t_m = vector(steps); // first m moment; + discParticle.v2t_m = vector(steps); // second m moment (v); + discParticle.m1t_m_hat = double(); + discParticle.v2t_m_hat = double(); + discParticle.m1t_fi = vector>(steps, vector(n_Demes)); // first fi moment; + discParticle.v2t_fi = vector>(steps, vector(n_Demes)); // second fi moment (v); + discParticle.m1t_fi_hat = vector(n_Demes); // first moment bias corrected; + discParticle.v2t_fi_hat = vector(n_Demes); // second moment (v) bias corrected; + //------------------------------- // run GD //------------------------------- - discParticle.performGD(report_progress); + discParticle.performGD(report_progress, gendist_arr, geodist_mat); //------------------------------- // Out: return as Rcpp object diff --git a/src/particle.cpp b/src/particle.cpp index f7fa980..77b7b0f 100644 --- a/src/particle.cpp +++ b/src/particle.cpp @@ -6,7 +6,7 @@ using namespace std; //------------------------------------------------ // run ADAM gradient descent host -void Particle::performGD(bool report_progress) { +void Particle::performGD(bool report_progress, vector>> &gendist_arr, vector> &geodist_mat) { //------------------------------- // initialize storage vectors diff --git a/src/particle.h b/src/particle.h index e2ae4ca..36e26f6 100644 --- a/src/particle.h +++ b/src/particle.h @@ -24,8 +24,7 @@ class Particle { int n_Kpairmax; double m; std::vector fvec; - std::vector>> gendist_arr; - std::vector> geodist_mat; + // storage std::vector cost; std::vector m_run; @@ -49,6 +48,6 @@ class Particle { // constructors Particle() {}; // member functions - void performGD(bool report_progress); + void performGD(bool report_progress, std::vector>> &gendist_arr, std::vector> &geodist_mat); void print_particle(); }; diff --git a/tests/testthat/test-psomodelruns.R b/tests/testthat/test-psomodelruns.R index d812a10..e4b4854 100644 --- a/tests/testthat/test-psomodelruns.R +++ b/tests/testthat/test-psomodelruns.R @@ -13,8 +13,8 @@ test_that("PSO modelruns", { fi_upperbound = 0.3, flearn_lowerbound = 1e-10, flearn_upperbound = 1e-2, - mlearn_lowerbound = 1e-15, - mlearn_upperbound = 1e-8, + mlearn_lowerbound = 1e-5, + mlearn_upperbound = 1e-1, c1 = 0.1, c2 = 0.1, w = 0.25, From 099130222649b4a4cc3744917e48014a6f5bc972 Mon Sep 17 00:00:00 2001 From: Nicholas Brazeau Date: Sat, 11 May 2024 13:50:32 -0400 Subject: [PATCH 06/11] add vanilla to names --- tests/testthat/test-AdamByHand.R | 44 ++++++++++---------- tests/testthat/test-GradByHand.R | 46 ++++++++++----------- tests/testthat/test-Mbound.R | 52 ++++++++++++------------ tests/testthat/test-consistentgradient.R | 8 ++-- tests/testthat/test-negativeFs.R | 22 +++++----- tests/testthat/test-vanillamodelruns.R | 20 ++++----- 6 files changed, 96 insertions(+), 96 deletions(-) diff --git a/tests/testthat/test-AdamByHand.R b/tests/testthat/test-AdamByHand.R index 249aa91..a1232c8 100644 --- a/tests/testthat/test-AdamByHand.R +++ b/tests/testthat/test-AdamByHand.R @@ -24,17 +24,17 @@ test_that("Fi adam by hand", { # now run model inputdisc <- dat %>% dplyr::filter(deme1 != deme2) - ret <- deme_inbreeding_spcoef(discdat = inputdisc, - start_params = our_start_params, - f_learningrate = 1e-3, - m_learningrate = 1e-5, - b1 = 0.9, - b2 = 0.999, - e = 1e-8, - steps = 1e3, - normalize_geodist = F, - report_progress = T, - return_verbose = T) + ret <- deme_inbreeding_spcoef_vanilla(discdat = inputdisc, + start_params = our_start_params, + f_learningrate = 1e-3, + m_learningrate = 1e-5, + b1 = 0.9, + b2 = 0.999, + e = 1e-8, + steps = 1e3, + normalize_geodist = F, + report_progress = T, + return_verbose = T) # back out for Fi adam b1 <- 0.9 b2 <- 0.999 @@ -79,17 +79,17 @@ test_that("M adam by hand", { # now run model inputdisc <- dat %>% dplyr::filter(deme1 != deme2) - ret <- deme_inbreeding_spcoef(discdat = inputdisc, - start_params = our_start_params, - f_learningrate = 1e-3, - m_learningrate = 1e-5, - b1 = 0.9, - b2 = 0.999, - e = 1e-8, - steps = 1e3, - normalize_geodist = F, - report_progress = T, - return_verbose = T) + ret <- deme_inbreeding_spcoef_vanilla(discdat = inputdisc, + start_params = our_start_params, + f_learningrate = 1e-3, + m_learningrate = 1e-5, + b1 = 0.9, + b2 = 0.999, + e = 1e-8, + steps = 1e3, + normalize_geodist = F, + report_progress = T, + return_verbose = T) # back out for m adam b1 <- 0.9 b2 <- 0.999 diff --git a/tests/testthat/test-GradByHand.R b/tests/testthat/test-GradByHand.R index 019618c..7b75fc9 100644 --- a/tests/testthat/test-GradByHand.R +++ b/tests/testthat/test-GradByHand.R @@ -34,17 +34,17 @@ test_that("Fi gradient by hand", { # now run model inputdisc <- dat %>% dplyr::filter(deme1 != deme2) - ret <- deme_inbreeding_spcoef(discdat = inputdisc, - start_params = our_start_params, - f_learningrate = 1e-3, - m_learningrate = 1e-5, - b1 = 0.9, - b2 = 0.999, - e = 1e-8, - steps = 1e3, - normalize_geodist = F, - report_progress = T, - return_verbose = T) + ret <- deme_inbreeding_spcoef_vanilla(discdat = inputdisc, + start_params = our_start_params, + f_learningrate = 1e-3, + m_learningrate = 1e-5, + b1 = 0.9, + b2 = 0.999, + e = 1e-8, + steps = 1e3, + normalize_geodist = F, + report_progress = T, + return_verbose = T) # back out gradient for F1 # NBset to 0 at first iter, so the additional adam term cancels out discF1 <- ret$fi_gradtraj[2,1] @@ -59,7 +59,7 @@ test_that("M gradient by hand", { # sim data data("IBD_simulation_data", package = "discent") dat <- IBD_simulation_data - # start params + # start params our_start_params <- rep(0.2, 3) names(our_start_params) <- 1:3 our_start_params <- c(our_start_params, "m" = 1e3) @@ -92,17 +92,17 @@ test_that("M gradient by hand", { # now run model inputdisc <- dat %>% dplyr::filter(deme1 != deme2) - ret <- deme_inbreeding_spcoef(discdat = inputdisc, - start_params = our_start_params, - f_learningrate = 1e-3, - m_learningrate = 1e-5, - b1 = 0.9, - b2 = 0.999, - e = 1e-8, - steps = 1e3, - normalize_geodist = F, - report_progress = T, - return_verbose = T) + ret <- deme_inbreeding_spcoef_vanilla(discdat = inputdisc, + start_params = our_start_params, + f_learningrate = 1e-3, + m_learningrate = 1e-5, + b1 = 0.9, + b2 = 0.999, + e = 1e-8, + steps = 1e3, + normalize_geodist = F, + report_progress = T, + return_verbose = T) # back out gradient for M # NB set to 0 at first iter, so the additional adam term cancels out discM <- ret$m_gradtraj[2] diff --git a/tests/testthat/test-Mbound.R b/tests/testthat/test-Mbound.R index 26031a3..0713122 100644 --- a/tests/testthat/test-Mbound.R +++ b/tests/testthat/test-Mbound.R @@ -9,38 +9,38 @@ test_that("M is properly bounded", { # run model w/ reasonable bounds inputdisc <- dat %>% dplyr::filter(deme1 != deme2) - mod1 <- deme_inbreeding_spcoef(discdat = inputdisc, - start_params = our_start_params, - f_learningrate = 1e-3, - m_learningrate = 1e-5, - m_lowerbound = 999, - m_upperbound = 1001, - b1 = 0.9, - b2 = 0.999, - e = 1e-8, - steps = 1e2, - report_progress = TRUE, - normalize_geodist = FALSE, - return_verbose = TRUE) + mod1 <- deme_inbreeding_spcoef_vanilla(discdat = inputdisc, + start_params = our_start_params, + f_learningrate = 1e-3, + m_learningrate = 1e-5, + m_lowerbound = 999, + m_upperbound = 1001, + b1 = 0.9, + b2 = 0.999, + e = 1e-8, + steps = 1e2, + report_progress = TRUE, + normalize_geodist = FALSE, + return_verbose = TRUE) testthat::expect_gte(min(mod1$m_run), 999) testthat::expect_lte(max(mod1$m_run), 1001) #...................... # now show that M won't move if bounded completely #...................... - mod2 <- deme_inbreeding_spcoef(discdat = inputdisc, - start_params = our_start_params, - f_learningrate = 1e-3, - m_learningrate = 1e-5, - m_lowerbound = 99, - m_upperbound = 1e5, - b1 = 0.9, - b2 = 0.999, - e = 1e-8, - steps = 1e2, - normalize_geodist = FALSE, - report_progress = TRUE, - return_verbose = TRUE) + mod2 <- deme_inbreeding_spcoef_vanilla(discdat = inputdisc, + start_params = our_start_params, + f_learningrate = 1e-3, + m_learningrate = 1e-5, + m_lowerbound = 99, + m_upperbound = 1e5, + b1 = 0.9, + b2 = 0.999, + e = 1e-8, + steps = 1e2, + normalize_geodist = FALSE, + report_progress = TRUE, + return_verbose = TRUE) testthat::expect_gte(min(mod2$m_run[2:length(mod2$m_run)]), 99) # offset for init param testthat::expect_lte(max(mod2$m_run), 1e5) }) diff --git a/tests/testthat/test-consistentgradient.R b/tests/testthat/test-consistentgradient.R index e938ed5..3dcdecf 100644 --- a/tests/testthat/test-consistentgradient.R +++ b/tests/testthat/test-consistentgradient.R @@ -10,7 +10,7 @@ test_that("model has deterministic results from same start", { # given same start parameters and data and iters # model should always follow the same gradient/trajectory - mod1 <- deme_inbreeding_spcoef(discdat = inputdisc, + mod1 <- deme_inbreeding_spcoef_vanilla(discdat = inputdisc, start_params = our_start_params, f_learningrate = 1e-5, m_learningrate = 1e-1, @@ -22,7 +22,7 @@ test_that("model has deterministic results from same start", { report_progress = F, return_verbose = F) - mod2 <- deme_inbreeding_spcoef(discdat = inputdisc, + mod2 <- deme_inbreeding_spcoef_vanilla(discdat = inputdisc, start_params = our_start_params, f_learningrate = 1e-5, m_learningrate = 1e-1, @@ -34,7 +34,7 @@ test_that("model has deterministic results from same start", { report_progress = F, return_verbose = F) - mod3 <- deme_inbreeding_spcoef(discdat = inputdisc, + mod3 <- deme_inbreeding_spcoef_vanilla(discdat = inputdisc, start_params = our_start_params, f_learningrate = 1e-5, m_learningrate = 1e-1, @@ -46,7 +46,7 @@ test_that("model has deterministic results from same start", { report_progress = F, return_verbose = F) - mod4 <- deme_inbreeding_spcoef(discdat = inputdisc, + mod4 <- deme_inbreeding_spcoef_vanilla(discdat = inputdisc, start_params = our_start_params, f_learningrate = 1e-5, m_learningrate = 1e-1, diff --git a/tests/testthat/test-negativeFs.R b/tests/testthat/test-negativeFs.R index cbd9d20..6f8b420 100644 --- a/tests/testthat/test-negativeFs.R +++ b/tests/testthat/test-negativeFs.R @@ -13,17 +13,17 @@ test_that("No More negative Fs with logit", { our_start_params <- rep(0.2, 3) names(our_start_params) <- 1:3 our_start_params <- c(our_start_params, "m" = 1e3) - ret <- deme_inbreeding_spcoef(discdat = input, - start_params = our_start_params, - f_learningrate = 1e-3, - m_learningrate = 1e-5, - b1 = 0.9, - b2 = 0.999, - e = 1e-8, - steps = 1e4, - normalize_geodist = F, - report_progress = T, - return_verbose = T) + ret <- deme_inbreeding_spcoef_vanilla(discdat = input, + start_params = our_start_params, + f_learningrate = 1e-3, + m_learningrate = 1e-5, + b1 = 0.9, + b2 = 0.999, + e = 1e-8, + steps = 1e4, + normalize_geodist = F, + report_progress = T, + return_verbose = T) # Fis all positive testthat::expect_gte(min(ret$Final_Fis), 0) diff --git a/tests/testthat/test-vanillamodelruns.R b/tests/testthat/test-vanillamodelruns.R index 15cf274..6340af7 100644 --- a/tests/testthat/test-vanillamodelruns.R +++ b/tests/testthat/test-vanillamodelruns.R @@ -11,16 +11,16 @@ test_that("Vanilla model runs", { inputdisc <- dat %>% dplyr::filter(deme1 != deme2) mod <- deme_inbreeding_spcoef_vanilla(discdat = inputdisc, - start_params = our_start_params, - f_learningrate = 1e-5, - m_learningrate = 1e-1, - b1 = 0.9, - b2 = 0.999, - e = 1e-8, - steps = 1e3, - normalize_geodist = T, - report_progress = F, - return_verbose = F) + start_params = our_start_params, + f_learningrate = 1e-5, + m_learningrate = 1e-1, + b1 = 0.9, + b2 = 0.999, + e = 1e-8, + steps = 1e3, + normalize_geodist = T, + report_progress = F, + return_verbose = F) testthat::expect_length(mod, 6) }) From 937a7f2b245ba5b155633fa8b1853f91e667d311 Mon Sep 17 00:00:00 2001 From: Nicholas Brazeau Date: Sat, 11 May 2024 16:45:27 -0400 Subject: [PATCH 07/11] tidy up raw data --- data-raw/IBD_simulation_data.R | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/data-raw/IBD_simulation_data.R b/data-raw/IBD_simulation_data.R index 4b779b0..af2cca7 100644 --- a/data-raw/IBD_simulation_data.R +++ b/data-raw/IBD_simulation_data.R @@ -49,30 +49,6 @@ rnorm_interval <- function(mean, sd, a=0, b=1) { #'@export sim_IBDIBD <- function(demesize, distmat, rate, Ft) { - # #...................... - # # assertions - # #...................... - # assert_numeric(demesize) - # assert_numeric(distmat) - # assert_numeric(rate) - # assert_numeric(Ft) - # assert_single_bounded(Ft, left = 0, right = 1) - # if (inherits(distmat, "dist")) { - # assert_eq(length(demesize), nrow(as.matrix(distmat)), - # message = "Distance matrix must contain a row and column - # for each deme") - # } else { - # assert_eq(length(demesize), nrow(distmat), - # message = "Distance matrix must contain a row and column - # for each deme") - # assert_eq(nrow(distmat), ncol(distmat), - # message = "If not a distance matrix (class: 'dist'), then - # distance matrix must be square") - # assertthat::are_equal(distmat[ lower.tri(distmat) ], distmat[ upper.tri(distmat) ], - # message = "If not a distance matrix (class: 'dist'), then - # must be a symmetric matrix") - # } - #...................... # simulation From 3892cf768c0d33c5f8e354b36518163386586dc5 Mon Sep 17 00:00:00 2001 From: Nicholas Brazeau Date: Sat, 11 May 2024 16:45:36 -0400 Subject: [PATCH 08/11] update docs --- man/deme_inbreeding_spcoef_pso.Rd | 19 ++++++++++++------- man/deme_inbreeding_spcoef_vanilla.Rd | 2 +- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/man/deme_inbreeding_spcoef_pso.Rd b/man/deme_inbreeding_spcoef_pso.Rd index 920f18b..c386be9 100644 --- a/man/deme_inbreeding_spcoef_pso.Rd +++ b/man/deme_inbreeding_spcoef_pso.Rd @@ -7,9 +7,9 @@ Particle Swarm Meta-Optimization} \usage{ deme_inbreeding_spcoef_pso( discdat, - m_lowerbound = 0, + m_lowerbound = 1e-10, m_upperbound = Inf, - fi_lowerbound = 0, + fi_lowerbound = 0.001, fi_upperbound = 0.3, flearn_lowerbound = 1e-10, flearn_upperbound = 0.01, @@ -22,6 +22,9 @@ deme_inbreeding_spcoef_pso( b2 = 0.999, e = 1e-08, steps = 1000, + searchsteps = 100, + swarmsteps = 50, + swarmsize = 25, thin = 1, normalize_geodist = TRUE, report_progress = TRUE, @@ -61,9 +64,15 @@ deme_inbreeding_spcoef_pso( \item{steps}{integer; the number of "steps" as we move down the gradient} +\item{searchsteps}{} + +\item{swarmsteps}{} + +\item{swarmsize}{} + \item{thin}{integer; the number of "steps" to keep as part of the output (i.e. if the user specifies 10, every 10th iteration will be kept)} -\item{normalize_geodist}{boolean; whether geographic distances between demes should be normalized (i.e. rescaled to \code{[0-1]}). Helps increase model stability at the expense of complicating the interpretation of the migration rate parameter.} +\item{normalize_geodist}{boolean; whether geographic distances between demes should be normalized (i.e. Min-Max Feature Scaling: \eqn{X' = \frac{X - X_{min}}{X_{max} - X_{min}} }, which places the geodistances on the scale to \eqn{[0-1]}). Helps increase model stability at the expense of complicating the interpretation of the migration rate parameter.} \item{report_progress}{boolean; whether or not a progress bar should be shown as you iterate through steps} @@ -81,10 +90,6 @@ Sample 1 Name; Sample 2 Name; Sample 1 Location; Sample 2 Location; Pairwise Genetic Distance; Pairwise Geographpic Distance. Note, the order of the columns do not matter but the names of the columns must match. -The start_params vector names must match the cluster names (i.e. clusters must be -have a name that we can match on for the starting relatedness paramerts). In addition, -you must provide a start parameter for "m". - We have implemented coding decisions to not allow the "f" inbreeding coefficients to be negative by using a logit transformation internally in the code. diff --git a/man/deme_inbreeding_spcoef_vanilla.Rd b/man/deme_inbreeding_spcoef_vanilla.Rd index f8f33d0..1eb0f74 100644 --- a/man/deme_inbreeding_spcoef_vanilla.Rd +++ b/man/deme_inbreeding_spcoef_vanilla.Rd @@ -44,7 +44,7 @@ deme_inbreeding_spcoef_vanilla( \item{thin}{integer; the number of "steps" to keep as part of the output (i.e. if the user specifies 10, every 10th iteration will be kept)} -\item{normalize_geodist}{boolean; whether geographic distances between demes should be normalized (i.e. rescaled to \code{[0-1]}). Helps increase model stability at the expense of complicating the interpretation of the migration rate parameter.} +\item{normalize_geodist}{boolean; whether geographic distances between demes should be normalized (i.e. Min-Max Feature Scaling: \eqn{X' = \frac{X - X_{min}}{X_{max} - X_{min}} }, which places the geodistances on the scale to \eqn{[0-1]}). Helps increase model stability at the expense of complicating the interpretation of the migration rate parameter.} \item{report_progress}{boolean; whether or not a progress bar should be shown as you iterate through steps} From 4fc6f1ca273165843dcd40755e03298f34107ef6 Mon Sep 17 00:00:00 2001 From: Nicholas Brazeau Date: Sat, 11 May 2024 16:45:46 -0400 Subject: [PATCH 09/11] tidy up tests --- tests/testthat/test-Mbound.R | 22 +------ tests/testthat/test-consistentgradient.R | 80 ++++++++++++------------ 2 files changed, 42 insertions(+), 60 deletions(-) diff --git a/tests/testthat/test-Mbound.R b/tests/testthat/test-Mbound.R index 0713122..a140b80 100644 --- a/tests/testthat/test-Mbound.R +++ b/tests/testthat/test-Mbound.R @@ -6,13 +6,13 @@ test_that("M is properly bounded", { our_start_params <- rep(0.2, 3) names(our_start_params) <- 1:3 our_start_params <- c(our_start_params, "m" = 1e3) - # run model w/ reasonable bounds + # run model w/ EXTREMELY TIGHT bounds inputdisc <- dat %>% dplyr::filter(deme1 != deme2) mod1 <- deme_inbreeding_spcoef_vanilla(discdat = inputdisc, start_params = our_start_params, f_learningrate = 1e-3, - m_learningrate = 1e-5, + m_learningrate = 1e-1, m_lowerbound = 999, m_upperbound = 1001, b1 = 0.9, @@ -25,22 +25,4 @@ test_that("M is properly bounded", { testthat::expect_gte(min(mod1$m_run), 999) testthat::expect_lte(max(mod1$m_run), 1001) - #...................... - # now show that M won't move if bounded completely - #...................... - mod2 <- deme_inbreeding_spcoef_vanilla(discdat = inputdisc, - start_params = our_start_params, - f_learningrate = 1e-3, - m_learningrate = 1e-5, - m_lowerbound = 99, - m_upperbound = 1e5, - b1 = 0.9, - b2 = 0.999, - e = 1e-8, - steps = 1e2, - normalize_geodist = FALSE, - report_progress = TRUE, - return_verbose = TRUE) - testthat::expect_gte(min(mod2$m_run[2:length(mod2$m_run)]), 99) # offset for init param - testthat::expect_lte(max(mod2$m_run), 1e5) }) diff --git a/tests/testthat/test-consistentgradient.R b/tests/testthat/test-consistentgradient.R index 3dcdecf..5b0f1d7 100644 --- a/tests/testthat/test-consistentgradient.R +++ b/tests/testthat/test-consistentgradient.R @@ -11,52 +11,52 @@ test_that("model has deterministic results from same start", { # given same start parameters and data and iters # model should always follow the same gradient/trajectory mod1 <- deme_inbreeding_spcoef_vanilla(discdat = inputdisc, - start_params = our_start_params, - f_learningrate = 1e-5, - m_learningrate = 1e-1, - b1 = 0.9, - b2 = 0.999, - e = 1e-8, - steps = 1e4, - normalize_geodist = F, - report_progress = F, - return_verbose = F) + start_params = our_start_params, + f_learningrate = 1e-5, + m_learningrate = 1e-1, + b1 = 0.9, + b2 = 0.999, + e = 1e-8, + steps = 1e4, + normalize_geodist = F, + report_progress = F, + return_verbose = F) mod2 <- deme_inbreeding_spcoef_vanilla(discdat = inputdisc, - start_params = our_start_params, - f_learningrate = 1e-5, - m_learningrate = 1e-1, - b1 = 0.9, - b2 = 0.999, - e = 1e-8, - steps = 1e4, - normalize_geodist = F, - report_progress = F, - return_verbose = F) + start_params = our_start_params, + f_learningrate = 1e-5, + m_learningrate = 1e-1, + b1 = 0.9, + b2 = 0.999, + e = 1e-8, + steps = 1e4, + normalize_geodist = F, + report_progress = F, + return_verbose = F) mod3 <- deme_inbreeding_spcoef_vanilla(discdat = inputdisc, - start_params = our_start_params, - f_learningrate = 1e-5, - m_learningrate = 1e-1, - b1 = 0.9, - b2 = 0.999, - e = 1e-8, - steps = 1e4, - normalize_geodist = F, - report_progress = F, - return_verbose = F) + start_params = our_start_params, + f_learningrate = 1e-5, + m_learningrate = 1e-1, + b1 = 0.9, + b2 = 0.999, + e = 1e-8, + steps = 1e4, + normalize_geodist = F, + report_progress = F, + return_verbose = F) mod4 <- deme_inbreeding_spcoef_vanilla(discdat = inputdisc, - start_params = our_start_params, - f_learningrate = 1e-5, - m_learningrate = 1e-1, - b1 = 0.9, - b2 = 0.999, - e = 1e-8, - steps = 5e4, # different number of steps!! - normalize_geodist = F, - report_progress = F, - return_verbose = F) + start_params = our_start_params, + f_learningrate = 1e-5, + m_learningrate = 1e-1, + b1 = 0.9, + b2 = 0.999, + e = 1e-8, + steps = 5e4, # different number of steps!! + normalize_geodist = F, + report_progress = F, + return_verbose = F) testthat::expect_equal(mod1,mod2) testthat::expect_equal(mod1,mod3) From a96e2dae5a46f187810eacb7dcf3287e9448761b Mon Sep 17 00:00:00 2001 From: Nicholas Brazeau Date: Sat, 11 May 2024 16:46:11 -0400 Subject: [PATCH 10/11] add PSO extension and test/docs --- R/main_pso.R | 23 +++++++++------- src/main_pso.cpp | 27 ++++++++++--------- tests/testthat/test-psobounds.R | 45 +++++++++++++++++++++++++++++++ vignettes/psoextension.Rmd | 28 +++++++++++++++++++ vignettes/runmod.Rmd | 22 --------------- vignettes/runvanillamod.Rmd | 48 +++++++++++++++++++++++++++++++++ 6 files changed, 148 insertions(+), 45 deletions(-) create mode 100644 tests/testthat/test-psobounds.R create mode 100644 vignettes/psoextension.Rmd delete mode 100644 vignettes/runmod.Rmd create mode 100644 vignettes/runvanillamod.Rmd diff --git a/R/main_pso.R b/R/main_pso.R index 736d0cb..b2a5434 100644 --- a/R/main_pso.R +++ b/R/main_pso.R @@ -13,6 +13,8 @@ #' @param swarmsize #' @param swarmsteps #' @param searchsteps +#' @param report_sd_progress boolean; search chain +#' @param report_fd_progress boolean; final chain #' @inherit description deme_inbreeding_spcoef_vanilla #' @details The gen.geo.dist dataframe must be named with the following columns: #' "smpl1"; "smpl2"; "deme1"; "deme2"; "gendist"; "geodist"; which corresponds to: @@ -49,7 +51,8 @@ deme_inbreeding_spcoef_pso <- function(discdat, swarmsize = 25, thin = 1, normalize_geodist = TRUE, - report_progress = TRUE, + report_sd_progress = TRUE, + report_fd_progress = TRUE, return_verbose = FALSE){ #.............................................................. @@ -87,7 +90,8 @@ deme_inbreeding_spcoef_pso <- function(discdat, assert_single_int(steps) assert_single_int(thin) assert_greq(thin, 1, message = "Must be at least 1") - assert_single_logical(report_progress) + ssert_single_logical(report_sd_progress) + assert_single_logical(report_fd_progress) assert_single_logical(normalize_geodist) # no missing @@ -95,6 +99,11 @@ deme_inbreeding_spcoef_pso <- function(discdat, stop("discdat dataframe cannot have missing values") } + # catch accidental bad F and M bound + if ( any(round(c(fi_lowerbound, m_lowerbound), digits = 1e200) == 0) ) { + warning("The Fi or M lower-bound is zero (or essentially zero), which will result in unstable behavior in the Gradient-Descent algorithm. Consider increasing the lower-bound limit") + } + #...................... # check for self comparisons #...................... @@ -150,13 +159,6 @@ deme_inbreeding_spcoef_pso <- function(discdat, discdat <- discdat %>% dplyr::mutate(geodist = (geodist - mingeodist)/(maxgeodist - mingeodist)) } - # catch accidental bad M start if user is standardizing distances - if (normalize_geodist & (m_upperbound > 500) ) { - warning("You have selected to normalize geographic distances, but your - migration rate upper bound parameter is large. Please consider placing it on a - similar scale to your normalized geographic distances for stability.") - } - # put geo information into distance matrix geodist <- discdat %>% @@ -210,7 +212,8 @@ deme_inbreeding_spcoef_pso <- function(discdat, swarmsize = swarmsize, searchsteps = searchsteps, steps = steps, - report_progress = report_progress, + report_sd_progress = report_sd_progress, + report_fd_progress = report_fd_progress, return_verbose = return_verbose ) diff --git a/src/main_pso.cpp b/src/main_pso.cpp index 7157945..38f9460 100644 --- a/src/main_pso.cpp +++ b/src/main_pso.cpp @@ -54,7 +54,8 @@ Rcpp::List pso_deme_inbreeding_coef_cpp(Rcpp::List args) { double b1 = rcpp_to_double(args["b1"]); double b2 = rcpp_to_double(args["b2"]); double e = rcpp_to_double(args["e"]); - bool report_progress = rcpp_to_bool(args["report_progress"]); + bool report_sd_progress = rcpp_to_bool(args["report_sd_progress"]); + bool report_fd_progress = rcpp_to_bool(args["report_fd_progress"]); bool return_verbose = rcpp_to_bool(args["return_verbose"]); // items for particle swarm int swarmsize = rcpp_to_int(args["swarmsize"]); @@ -96,19 +97,19 @@ Rcpp::List pso_deme_inbreeding_coef_cpp(Rcpp::List args) { vector fvec(n_Demes); double ffill = runif1(fi_lowerbound, fi_upperbound); // rand fi start param fill(fvec.begin(), fvec.end(), ffill); + swarm[0][i].fvec = fvec; + swarm[0][i].m = runif1(m_lowerbound, m_upperbound); + swarm[0][i].f_learningrate = runif1(flearn_lowerbound, flearn_upperbound); + swarm[0][i].m_learningrate = runif1(mlearn_lowerbound, mlearn_upperbound); swarm[0][i].OVERFLO_DOUBLE = OVERFLO_DOUBLE; swarm[0][i].steps = searchsteps; swarm[0][i].n_Demes = n_Demes; swarm[0][i].n_Kpairmax = n_Kpairmax; - swarm[0][i].m = runif1(m_lowerbound, m_upperbound); - swarm[0][i].f_learningrate = runif1(flearn_lowerbound, flearn_upperbound); - swarm[0][i].m_learningrate = runif1(mlearn_lowerbound, mlearn_upperbound); swarm[0][i].m_lowerbound = m_lowerbound; swarm[0][i].m_upperbound = m_upperbound; swarm[0][i].b1 = b1; swarm[0][i].b2 = b2; swarm[0][i].e = e; - swarm[0][i].fvec = fvec; // storage and ADAM items swarm[0][i].cost = vector(searchsteps); @@ -169,19 +170,19 @@ Rcpp::List pso_deme_inbreeding_coef_cpp(Rcpp::List args) { //------------------------- vector fvec(n_Demes); fill(fvec.begin(), fvec.end(), swarm[t][i].particle_pcurr[0]); + swarm[t][i].fvec = fvec; + swarm[t][i].m = swarm[t][i].particle_pcurr[1]; + swarm[t][i].f_learningrate = swarm[t][i].particle_pcurr[2]; + swarm[t][i].m_learningrate = swarm[t][i].particle_pcurr[3]; swarm[t][i].OVERFLO_DOUBLE = OVERFLO_DOUBLE; swarm[t][i].steps = searchsteps; swarm[t][i].n_Demes = n_Demes; swarm[t][i].n_Kpairmax = n_Kpairmax; - swarm[t][i].m = swarm[t][i].particle_pcurr[1]; - swarm[t][i].f_learningrate = swarm[t][i].particle_pcurr[2]; - swarm[t][i].m_learningrate = swarm[t][i].particle_pcurr[3]; swarm[t][i].m_lowerbound = m_lowerbound; swarm[t][i].m_upperbound = m_upperbound; swarm[t][i].b1 = b1; swarm[t][i].b2 = b2; swarm[t][i].e = e; - swarm[t][i].fvec = fvec; // storage and ADAM items swarm[t][i].cost = vector(searchsteps); @@ -198,7 +199,7 @@ Rcpp::List pso_deme_inbreeding_coef_cpp(Rcpp::List args) { swarm[t][i].m1t_fi_hat = vector(n_Demes); // first moment bias corrected; swarm[t][i].v2t_fi_hat = vector(n_Demes); // second moment (v) bias corrected; // run GD - swarm[t][i].performGD(false, gendist_arr, geodist_mat); + swarm[t][i].performGD(report_sd_progress, gendist_arr, geodist_mat); // update particle best and global best if (swarm[t][i].cost[searchsteps-1] < swarm[t-1][i].cost[searchsteps-1]) { @@ -261,15 +262,15 @@ Rcpp::List pso_deme_inbreeding_coef_cpp(Rcpp::List args) { // run GD - discParticle.performGD(report_progress, gendist_arr, geodist_mat); + discParticle.performGD(report_fd_progress, gendist_arr, geodist_mat); //------------------------------- // Out: return as Rcpp object //------------------------------- if (return_verbose) { vector>> swarmfill(swarmsteps, vector>(swarmsize, vector(5))); - for (int t = 1; t < swarmsteps; t++) { - for (int i = 1; i < swarmsize; i++) { + for (int t = 0; t < swarmsteps; t++) { + for (int i = 0; i < swarmsize; i++) { for (int d = 0; d < 4; d++) { swarmfill[t][i][d] = swarm[t][i].particle_pcurr[d]; } diff --git a/tests/testthat/test-psobounds.R b/tests/testthat/test-psobounds.R new file mode 100644 index 0000000..ad82cd2 --- /dev/null +++ b/tests/testthat/test-psobounds.R @@ -0,0 +1,45 @@ +test_that("PSO respects bounds", { + + # sim data + data("IBD_simulation_data", package = "discent") + dat <- IBD_simulation_data + # run model + inputdisc <- dat %>% + dplyr::filter(deme1 != deme2) + mod <- deme_inbreeding_spcoef_pso(discdat = inputdisc, + m_lowerbound = 100, + m_upperbound = 101, + fi_lowerbound= 0.1, + fi_upperbound = 0.11, + flearn_lowerbound = 1e-3, + flearn_upperbound = 1e-1, + mlearn_lowerbound = 1e-3, + mlearn_upperbound = 1e-1, + c1 = 0.1, + c2 = 0.1, + w = 0.25, + b1 = 0.9, + b2 = 0.999, + e = 1e-8, + steps = 1e3, + searchsteps = 1e2, + swarmsize = 5, + swarmsteps = 10, + normalize_geodist = F, + report_progress = F, + return_verbose = F) + #...................... + # check that bounds are respect + #...................... + testthat::expect_gte(min(mod$m_run), 100) + testthat::expect_lte(max(mod$m_run), 101) + testthat::expect_gte(min(mod$fi_run[,1]), 0.1) + testthat::expect_lte(max(mod$fi_run[,1]), 0.11) + testthat::expect_gte(min(mod$fi_run[,2]), 0.1) + testthat::expect_lte(max(mod$fi_run[,2]), 0.11) + testthat::expect_gte(min(mod$fi_run[,3]), 0.1) + testthat::expect_lte(max(mod$fi_run[,3]), 0.11) + + +}) + diff --git a/vignettes/psoextension.Rmd b/vignettes/psoextension.Rmd new file mode 100644 index 0000000..b367028 --- /dev/null +++ b/vignettes/psoextension.Rmd @@ -0,0 +1,28 @@ +--- +title: "Running the `DISCent` Model with Particle Swarm Optimizer Metaheuristic Search" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{psoextension} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r setup} +library(discent) +``` + + +# Overview +- PSO is being used to find best start parameters +- Swarm made up of particles +- Swarm object + - swarm steps = time limit of search/swarm moves + - swarm size = number of particles + - for each particle, there is a grad descent step considered diff --git a/vignettes/runmod.Rmd b/vignettes/runmod.Rmd deleted file mode 100644 index ca73fa8..0000000 --- a/vignettes/runmod.Rmd +++ /dev/null @@ -1,22 +0,0 @@ ---- -title: "Running the Model" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Running the Model} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) -``` - -```{r setup} -library(discent) -``` - -# Simulating Data for the Model -Here, we will use data that we previously simulated with the [`polySimIBD`](https://github.com/nickbrazeau/polySimIBD) package. diff --git a/vignettes/runvanillamod.Rmd b/vignettes/runvanillamod.Rmd new file mode 100644 index 0000000..e3acf3e --- /dev/null +++ b/vignettes/runvanillamod.Rmd @@ -0,0 +1,48 @@ +--- +title: "Running the Basic Model" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Running the Model} + %\VignetteEncoding{UTF-8} + %\VignetteEngine{knitr::rmarkdown} +editor_options: + chunk_output_type: console +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r setup} +library(discent) +``` + +# *** + +```{r} +data("IBD_simulation_data") + +start_params <- rep(0.3, 3) +names(start_params) <- 1:3 +start_params <- c(start_params, "m"= 250) + +#run +ret <- discent::deme_inbreeding_spcoef_vanilla(discdat = IBD_simulation_data, + start_params = start_params, + f_learningrate = 0.001, + m_learningrate = 1e-6, + m_lowerbound = 0, + m_upperbound = Inf, + b1 = 0.9, + b2 = 0.999, + e = 1e-8, + steps = 1e4, + thin = 1, + normalize_geodist = TRUE, + report_progress = TRUE, + return_verbose = TRUE) + +``` From 973ae0dabea6ec112bb962fc2bf37ae5c3e0578d Mon Sep 17 00:00:00 2001 From: Nicholas Brazeau Date: Sat, 11 May 2024 16:46:17 -0400 Subject: [PATCH 11/11] more docs --- R/main_vanilla.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/R/main_vanilla.R b/R/main_vanilla.R index b9e48ee..7dd72a5 100644 --- a/R/main_vanilla.R +++ b/R/main_vanilla.R @@ -10,7 +10,7 @@ #' @param thin integer; the number of "steps" to keep as part of the output (i.e. if the user specifies 10, every 10th iteration will be kept) #' @param m_lowerbound double; lower limit value for the global "m" parameter; will use a reflected normal within the gradient descent algorithm to adjust any aberrant values #' @param m_upperbound double; upper limit value for the global "m" parameter; will use a reflected normal within the gradient descent algorithm to adjust any aberrant values -#' @param normalize_geodist boolean; whether geographic distances between demes should be normalized (i.e. rescaled to \code{[0-1]}). Helps increase model stability at the expense of complicating the interpretation of the migration rate parameter. +#' @param normalize_geodist boolean; whether geographic distances between demes should be normalized (i.e. Min-Max Feature Scaling: \eqn{X' = \frac{X - X_{min}}{X_{max} - X_{min}} }, which places the geodistances on the scale to \eqn{[0-1]}). Helps increase model stability at the expense of complicating the interpretation of the migration rate parameter. #' @param report_progress boolean; whether or not a progress bar should be shown as you iterate through steps #' @param return_verbose boolean; whether the inbreeding coefficients and migration rate should be returned for every iteration or #' only for the final iteration. User will typically not want to store every iteration, which can be memory intensive @@ -95,6 +95,11 @@ deme_inbreeding_spcoef_vanilla <- function(discdat, stop("discdat dataframe cannot have missing values") } + # catch accidental bad F and M start + if ( any(round(start_params, digits = 1e200) == 0) ) { + warning("At least one of your start parameters is zero (or essentially zero), which will result in unstable behavior in the Gradient-Descent algorithm. Consider increasing the start parameter.") + } + #...................... # check for self comparisons #......................