From a96e2dae5a46f187810eacb7dcf3287e9448761b Mon Sep 17 00:00:00 2001 From: Nicholas Brazeau Date: Sat, 11 May 2024 16:46:11 -0400 Subject: [PATCH] add PSO extension and test/docs --- R/main_pso.R | 23 +++++++++------- src/main_pso.cpp | 27 ++++++++++--------- tests/testthat/test-psobounds.R | 45 +++++++++++++++++++++++++++++++ vignettes/psoextension.Rmd | 28 +++++++++++++++++++ vignettes/runmod.Rmd | 22 --------------- vignettes/runvanillamod.Rmd | 48 +++++++++++++++++++++++++++++++++ 6 files changed, 148 insertions(+), 45 deletions(-) create mode 100644 tests/testthat/test-psobounds.R create mode 100644 vignettes/psoextension.Rmd delete mode 100644 vignettes/runmod.Rmd create mode 100644 vignettes/runvanillamod.Rmd diff --git a/R/main_pso.R b/R/main_pso.R index 736d0cb..b2a5434 100644 --- a/R/main_pso.R +++ b/R/main_pso.R @@ -13,6 +13,8 @@ #' @param swarmsize #' @param swarmsteps #' @param searchsteps +#' @param report_sd_progress boolean; search chain +#' @param report_fd_progress boolean; final chain #' @inherit description deme_inbreeding_spcoef_vanilla #' @details The gen.geo.dist dataframe must be named with the following columns: #' "smpl1"; "smpl2"; "deme1"; "deme2"; "gendist"; "geodist"; which corresponds to: @@ -49,7 +51,8 @@ deme_inbreeding_spcoef_pso <- function(discdat, swarmsize = 25, thin = 1, normalize_geodist = TRUE, - report_progress = TRUE, + report_sd_progress = TRUE, + report_fd_progress = TRUE, return_verbose = FALSE){ #.............................................................. @@ -87,7 +90,8 @@ deme_inbreeding_spcoef_pso <- function(discdat, assert_single_int(steps) assert_single_int(thin) assert_greq(thin, 1, message = "Must be at least 1") - assert_single_logical(report_progress) + ssert_single_logical(report_sd_progress) + assert_single_logical(report_fd_progress) assert_single_logical(normalize_geodist) # no missing @@ -95,6 +99,11 @@ deme_inbreeding_spcoef_pso <- function(discdat, stop("discdat dataframe cannot have missing values") } + # catch accidental bad F and M bound + if ( any(round(c(fi_lowerbound, m_lowerbound), digits = 1e200) == 0) ) { + warning("The Fi or M lower-bound is zero (or essentially zero), which will result in unstable behavior in the Gradient-Descent algorithm. Consider increasing the lower-bound limit") + } + #...................... # check for self comparisons #...................... @@ -150,13 +159,6 @@ deme_inbreeding_spcoef_pso <- function(discdat, discdat <- discdat %>% dplyr::mutate(geodist = (geodist - mingeodist)/(maxgeodist - mingeodist)) } - # catch accidental bad M start if user is standardizing distances - if (normalize_geodist & (m_upperbound > 500) ) { - warning("You have selected to normalize geographic distances, but your - migration rate upper bound parameter is large. Please consider placing it on a - similar scale to your normalized geographic distances for stability.") - } - # put geo information into distance matrix geodist <- discdat %>% @@ -210,7 +212,8 @@ deme_inbreeding_spcoef_pso <- function(discdat, swarmsize = swarmsize, searchsteps = searchsteps, steps = steps, - report_progress = report_progress, + report_sd_progress = report_sd_progress, + report_fd_progress = report_fd_progress, return_verbose = return_verbose ) diff --git a/src/main_pso.cpp b/src/main_pso.cpp index 7157945..38f9460 100644 --- a/src/main_pso.cpp +++ b/src/main_pso.cpp @@ -54,7 +54,8 @@ Rcpp::List pso_deme_inbreeding_coef_cpp(Rcpp::List args) { double b1 = rcpp_to_double(args["b1"]); double b2 = rcpp_to_double(args["b2"]); double e = rcpp_to_double(args["e"]); - bool report_progress = rcpp_to_bool(args["report_progress"]); + bool report_sd_progress = rcpp_to_bool(args["report_sd_progress"]); + bool report_fd_progress = rcpp_to_bool(args["report_fd_progress"]); bool return_verbose = rcpp_to_bool(args["return_verbose"]); // items for particle swarm int swarmsize = rcpp_to_int(args["swarmsize"]); @@ -96,19 +97,19 @@ Rcpp::List pso_deme_inbreeding_coef_cpp(Rcpp::List args) { vector fvec(n_Demes); double ffill = runif1(fi_lowerbound, fi_upperbound); // rand fi start param fill(fvec.begin(), fvec.end(), ffill); + swarm[0][i].fvec = fvec; + swarm[0][i].m = runif1(m_lowerbound, m_upperbound); + swarm[0][i].f_learningrate = runif1(flearn_lowerbound, flearn_upperbound); + swarm[0][i].m_learningrate = runif1(mlearn_lowerbound, mlearn_upperbound); swarm[0][i].OVERFLO_DOUBLE = OVERFLO_DOUBLE; swarm[0][i].steps = searchsteps; swarm[0][i].n_Demes = n_Demes; swarm[0][i].n_Kpairmax = n_Kpairmax; - swarm[0][i].m = runif1(m_lowerbound, m_upperbound); - swarm[0][i].f_learningrate = runif1(flearn_lowerbound, flearn_upperbound); - swarm[0][i].m_learningrate = runif1(mlearn_lowerbound, mlearn_upperbound); swarm[0][i].m_lowerbound = m_lowerbound; swarm[0][i].m_upperbound = m_upperbound; swarm[0][i].b1 = b1; swarm[0][i].b2 = b2; swarm[0][i].e = e; - swarm[0][i].fvec = fvec; // storage and ADAM items swarm[0][i].cost = vector(searchsteps); @@ -169,19 +170,19 @@ Rcpp::List pso_deme_inbreeding_coef_cpp(Rcpp::List args) { //------------------------- vector fvec(n_Demes); fill(fvec.begin(), fvec.end(), swarm[t][i].particle_pcurr[0]); + swarm[t][i].fvec = fvec; + swarm[t][i].m = swarm[t][i].particle_pcurr[1]; + swarm[t][i].f_learningrate = swarm[t][i].particle_pcurr[2]; + swarm[t][i].m_learningrate = swarm[t][i].particle_pcurr[3]; swarm[t][i].OVERFLO_DOUBLE = OVERFLO_DOUBLE; swarm[t][i].steps = searchsteps; swarm[t][i].n_Demes = n_Demes; swarm[t][i].n_Kpairmax = n_Kpairmax; - swarm[t][i].m = swarm[t][i].particle_pcurr[1]; - swarm[t][i].f_learningrate = swarm[t][i].particle_pcurr[2]; - swarm[t][i].m_learningrate = swarm[t][i].particle_pcurr[3]; swarm[t][i].m_lowerbound = m_lowerbound; swarm[t][i].m_upperbound = m_upperbound; swarm[t][i].b1 = b1; swarm[t][i].b2 = b2; swarm[t][i].e = e; - swarm[t][i].fvec = fvec; // storage and ADAM items swarm[t][i].cost = vector(searchsteps); @@ -198,7 +199,7 @@ Rcpp::List pso_deme_inbreeding_coef_cpp(Rcpp::List args) { swarm[t][i].m1t_fi_hat = vector(n_Demes); // first moment bias corrected; swarm[t][i].v2t_fi_hat = vector(n_Demes); // second moment (v) bias corrected; // run GD - swarm[t][i].performGD(false, gendist_arr, geodist_mat); + swarm[t][i].performGD(report_sd_progress, gendist_arr, geodist_mat); // update particle best and global best if (swarm[t][i].cost[searchsteps-1] < swarm[t-1][i].cost[searchsteps-1]) { @@ -261,15 +262,15 @@ Rcpp::List pso_deme_inbreeding_coef_cpp(Rcpp::List args) { // run GD - discParticle.performGD(report_progress, gendist_arr, geodist_mat); + discParticle.performGD(report_fd_progress, gendist_arr, geodist_mat); //------------------------------- // Out: return as Rcpp object //------------------------------- if (return_verbose) { vector>> swarmfill(swarmsteps, vector>(swarmsize, vector(5))); - for (int t = 1; t < swarmsteps; t++) { - for (int i = 1; i < swarmsize; i++) { + for (int t = 0; t < swarmsteps; t++) { + for (int i = 0; i < swarmsize; i++) { for (int d = 0; d < 4; d++) { swarmfill[t][i][d] = swarm[t][i].particle_pcurr[d]; } diff --git a/tests/testthat/test-psobounds.R b/tests/testthat/test-psobounds.R new file mode 100644 index 0000000..ad82cd2 --- /dev/null +++ b/tests/testthat/test-psobounds.R @@ -0,0 +1,45 @@ +test_that("PSO respects bounds", { + + # sim data + data("IBD_simulation_data", package = "discent") + dat <- IBD_simulation_data + # run model + inputdisc <- dat %>% + dplyr::filter(deme1 != deme2) + mod <- deme_inbreeding_spcoef_pso(discdat = inputdisc, + m_lowerbound = 100, + m_upperbound = 101, + fi_lowerbound= 0.1, + fi_upperbound = 0.11, + flearn_lowerbound = 1e-3, + flearn_upperbound = 1e-1, + mlearn_lowerbound = 1e-3, + mlearn_upperbound = 1e-1, + c1 = 0.1, + c2 = 0.1, + w = 0.25, + b1 = 0.9, + b2 = 0.999, + e = 1e-8, + steps = 1e3, + searchsteps = 1e2, + swarmsize = 5, + swarmsteps = 10, + normalize_geodist = F, + report_progress = F, + return_verbose = F) + #...................... + # check that bounds are respect + #...................... + testthat::expect_gte(min(mod$m_run), 100) + testthat::expect_lte(max(mod$m_run), 101) + testthat::expect_gte(min(mod$fi_run[,1]), 0.1) + testthat::expect_lte(max(mod$fi_run[,1]), 0.11) + testthat::expect_gte(min(mod$fi_run[,2]), 0.1) + testthat::expect_lte(max(mod$fi_run[,2]), 0.11) + testthat::expect_gte(min(mod$fi_run[,3]), 0.1) + testthat::expect_lte(max(mod$fi_run[,3]), 0.11) + + +}) + diff --git a/vignettes/psoextension.Rmd b/vignettes/psoextension.Rmd new file mode 100644 index 0000000..b367028 --- /dev/null +++ b/vignettes/psoextension.Rmd @@ -0,0 +1,28 @@ +--- +title: "Running the `DISCent` Model with Particle Swarm Optimizer Metaheuristic Search" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{psoextension} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r setup} +library(discent) +``` + + +# Overview +- PSO is being used to find best start parameters +- Swarm made up of particles +- Swarm object + - swarm steps = time limit of search/swarm moves + - swarm size = number of particles + - for each particle, there is a grad descent step considered diff --git a/vignettes/runmod.Rmd b/vignettes/runmod.Rmd deleted file mode 100644 index ca73fa8..0000000 --- a/vignettes/runmod.Rmd +++ /dev/null @@ -1,22 +0,0 @@ ---- -title: "Running the Model" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Running the Model} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) -``` - -```{r setup} -library(discent) -``` - -# Simulating Data for the Model -Here, we will use data that we previously simulated with the [`polySimIBD`](https://github.com/nickbrazeau/polySimIBD) package. diff --git a/vignettes/runvanillamod.Rmd b/vignettes/runvanillamod.Rmd new file mode 100644 index 0000000..e3acf3e --- /dev/null +++ b/vignettes/runvanillamod.Rmd @@ -0,0 +1,48 @@ +--- +title: "Running the Basic Model" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Running the Model} + %\VignetteEncoding{UTF-8} + %\VignetteEngine{knitr::rmarkdown} +editor_options: + chunk_output_type: console +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r setup} +library(discent) +``` + +# *** + +```{r} +data("IBD_simulation_data") + +start_params <- rep(0.3, 3) +names(start_params) <- 1:3 +start_params <- c(start_params, "m"= 250) + +#run +ret <- discent::deme_inbreeding_spcoef_vanilla(discdat = IBD_simulation_data, + start_params = start_params, + f_learningrate = 0.001, + m_learningrate = 1e-6, + m_lowerbound = 0, + m_upperbound = Inf, + b1 = 0.9, + b2 = 0.999, + e = 1e-8, + steps = 1e4, + thin = 1, + normalize_geodist = TRUE, + report_progress = TRUE, + return_verbose = TRUE) + +```