From a96e2dae5a46f187810eacb7dcf3287e9448761b Mon Sep 17 00:00:00 2001
From: Nicholas Brazeau <nbrazeau@med.unc.edu>
Date: Sat, 11 May 2024 16:46:11 -0400
Subject: [PATCH] add PSO extension and test/docs

---
 R/main_pso.R                    | 23 +++++++++-------
 src/main_pso.cpp                | 27 ++++++++++---------
 tests/testthat/test-psobounds.R | 45 +++++++++++++++++++++++++++++++
 vignettes/psoextension.Rmd      | 28 +++++++++++++++++++
 vignettes/runmod.Rmd            | 22 ---------------
 vignettes/runvanillamod.Rmd     | 48 +++++++++++++++++++++++++++++++++
 6 files changed, 148 insertions(+), 45 deletions(-)
 create mode 100644 tests/testthat/test-psobounds.R
 create mode 100644 vignettes/psoextension.Rmd
 delete mode 100644 vignettes/runmod.Rmd
 create mode 100644 vignettes/runvanillamod.Rmd

diff --git a/R/main_pso.R b/R/main_pso.R
index 736d0cb..b2a5434 100644
--- a/R/main_pso.R
+++ b/R/main_pso.R
@@ -13,6 +13,8 @@
 #' @param  swarmsize
 #' @param  swarmsteps
 #' @param  searchsteps
+#' @param report_sd_progress boolean; search chain
+#' @param report_fd_progress boolean; final chain
 #' @inherit description deme_inbreeding_spcoef_vanilla
 #' @details The gen.geo.dist dataframe must be named with the following columns:
 #'          "smpl1"; "smpl2"; "deme1"; "deme2"; "gendist"; "geodist"; which corresponds to:
@@ -49,7 +51,8 @@ deme_inbreeding_spcoef_pso <- function(discdat,
                                        swarmsize = 25,
                                        thin = 1,
                                        normalize_geodist = TRUE,
-                                       report_progress = TRUE,
+                                       report_sd_progress = TRUE,
+                                       report_fd_progress = TRUE,
                                        return_verbose = FALSE){
 
   #..............................................................
@@ -87,7 +90,8 @@ deme_inbreeding_spcoef_pso <- function(discdat,
   assert_single_int(steps)
   assert_single_int(thin)
   assert_greq(thin, 1, message = "Must be at least 1")
-  assert_single_logical(report_progress)
+  ssert_single_logical(report_sd_progress)
+  assert_single_logical(report_fd_progress)
   assert_single_logical(normalize_geodist)
 
   # no missing
@@ -95,6 +99,11 @@ deme_inbreeding_spcoef_pso <- function(discdat,
     stop("discdat dataframe cannot have missing values")
   }
 
+  # catch accidental bad F and M bound
+  if ( any(round(c(fi_lowerbound, m_lowerbound), digits = 1e200) == 0) ) {
+    warning("The Fi or M lower-bound is zero (or essentially zero), which will result in unstable behavior in the Gradient-Descent algorithm. Consider increasing the lower-bound limit")
+  }
+
   #......................
   # check for self comparisons
   #......................
@@ -150,13 +159,6 @@ deme_inbreeding_spcoef_pso <- function(discdat,
     discdat <- discdat %>%
       dplyr::mutate(geodist = (geodist - mingeodist)/(maxgeodist - mingeodist))
   }
-  # catch accidental bad M start if user is standardizing distances
-  if (normalize_geodist & (m_upperbound > 500) ) {
-    warning("You have selected to normalize geographic distances, but your
-            migration rate upper bound parameter is large. Please consider placing it on a
-            similar scale to your normalized geographic distances for stability.")
-  }
-
 
   # put geo information into distance matrix
   geodist <- discdat %>%
@@ -210,7 +212,8 @@ deme_inbreeding_spcoef_pso <- function(discdat,
                swarmsize = swarmsize,
                searchsteps = searchsteps,
                steps = steps,
-               report_progress = report_progress,
+               report_sd_progress = report_sd_progress,
+               report_fd_progress = report_fd_progress,
                return_verbose = return_verbose
   )
 
diff --git a/src/main_pso.cpp b/src/main_pso.cpp
index 7157945..38f9460 100644
--- a/src/main_pso.cpp
+++ b/src/main_pso.cpp
@@ -54,7 +54,8 @@ Rcpp::List pso_deme_inbreeding_coef_cpp(Rcpp::List args) {
   double b1 = rcpp_to_double(args["b1"]);
   double b2 = rcpp_to_double(args["b2"]);
   double e = rcpp_to_double(args["e"]);
-  bool report_progress = rcpp_to_bool(args["report_progress"]);
+  bool report_sd_progress = rcpp_to_bool(args["report_sd_progress"]);
+  bool report_fd_progress = rcpp_to_bool(args["report_fd_progress"]);
   bool return_verbose = rcpp_to_bool(args["return_verbose"]);
   // items for particle swarm
   int swarmsize = rcpp_to_int(args["swarmsize"]);
@@ -96,19 +97,19 @@ Rcpp::List pso_deme_inbreeding_coef_cpp(Rcpp::List args) {
     vector<double> fvec(n_Demes);
     double ffill = runif1(fi_lowerbound, fi_upperbound); // rand fi start param
     fill(fvec.begin(), fvec.end(), ffill);
+    swarm[0][i].fvec = fvec;
+    swarm[0][i].m = runif1(m_lowerbound, m_upperbound);
+    swarm[0][i].f_learningrate = runif1(flearn_lowerbound, flearn_upperbound);
+    swarm[0][i].m_learningrate = runif1(mlearn_lowerbound, mlearn_upperbound);
     swarm[0][i].OVERFLO_DOUBLE = OVERFLO_DOUBLE;
     swarm[0][i].steps = searchsteps;
     swarm[0][i].n_Demes = n_Demes;
     swarm[0][i].n_Kpairmax = n_Kpairmax;
-    swarm[0][i].m = runif1(m_lowerbound, m_upperbound);
-    swarm[0][i].f_learningrate = runif1(flearn_lowerbound, flearn_upperbound);
-    swarm[0][i].m_learningrate = runif1(mlearn_lowerbound, mlearn_upperbound);
     swarm[0][i].m_lowerbound = m_lowerbound;
     swarm[0][i].m_upperbound = m_upperbound;
     swarm[0][i].b1 = b1;
     swarm[0][i].b2 = b2;
     swarm[0][i].e = e;
-    swarm[0][i].fvec = fvec;
 
     // storage and ADAM items
     swarm[0][i].cost = vector<double>(searchsteps);
@@ -169,19 +170,19 @@ Rcpp::List pso_deme_inbreeding_coef_cpp(Rcpp::List args) {
       //-------------------------
       vector<double> fvec(n_Demes);
       fill(fvec.begin(), fvec.end(), swarm[t][i].particle_pcurr[0]);
+      swarm[t][i].fvec = fvec;
+      swarm[t][i].m = swarm[t][i].particle_pcurr[1];
+      swarm[t][i].f_learningrate = swarm[t][i].particle_pcurr[2];
+      swarm[t][i].m_learningrate = swarm[t][i].particle_pcurr[3];
       swarm[t][i].OVERFLO_DOUBLE = OVERFLO_DOUBLE;
       swarm[t][i].steps = searchsteps;
       swarm[t][i].n_Demes = n_Demes;
       swarm[t][i].n_Kpairmax = n_Kpairmax;
-      swarm[t][i].m = swarm[t][i].particle_pcurr[1];
-      swarm[t][i].f_learningrate = swarm[t][i].particle_pcurr[2];
-      swarm[t][i].m_learningrate = swarm[t][i].particle_pcurr[3];
       swarm[t][i].m_lowerbound = m_lowerbound;
       swarm[t][i].m_upperbound = m_upperbound;
       swarm[t][i].b1 = b1;
       swarm[t][i].b2 = b2;
       swarm[t][i].e = e;
-      swarm[t][i].fvec = fvec;
 
       // storage and ADAM items
       swarm[t][i].cost = vector<double>(searchsteps);
@@ -198,7 +199,7 @@ Rcpp::List pso_deme_inbreeding_coef_cpp(Rcpp::List args) {
       swarm[t][i].m1t_fi_hat = vector<double>(n_Demes); // first moment bias corrected;
       swarm[t][i].v2t_fi_hat = vector<double>(n_Demes); // second moment (v) bias corrected;
       // run GD
-      swarm[t][i].performGD(false, gendist_arr, geodist_mat);
+      swarm[t][i].performGD(report_sd_progress, gendist_arr, geodist_mat);
 
       // update particle best and global best
       if (swarm[t][i].cost[searchsteps-1] < swarm[t-1][i].cost[searchsteps-1]) {
@@ -261,15 +262,15 @@ Rcpp::List pso_deme_inbreeding_coef_cpp(Rcpp::List args) {
 
 
   // run GD
-  discParticle.performGD(report_progress, gendist_arr, geodist_mat);
+  discParticle.performGD(report_fd_progress, gendist_arr, geodist_mat);
 
   //-------------------------------
   // Out: return as Rcpp object
   //-------------------------------
   if (return_verbose) {
     vector<vector<vector<double>>> swarmfill(swarmsteps, vector<vector<double>>(swarmsize, vector<double>(5)));
-    for (int t = 1; t < swarmsteps; t++) {
-      for (int i = 1; i < swarmsize; i++) {
+    for (int t = 0; t < swarmsteps; t++) {
+      for (int i = 0; i < swarmsize; i++) {
         for (int d = 0; d < 4; d++) {
           swarmfill[t][i][d] = swarm[t][i].particle_pcurr[d];
         }
diff --git a/tests/testthat/test-psobounds.R b/tests/testthat/test-psobounds.R
new file mode 100644
index 0000000..ad82cd2
--- /dev/null
+++ b/tests/testthat/test-psobounds.R
@@ -0,0 +1,45 @@
+test_that("PSO respects bounds", {
+
+  # sim data
+  data("IBD_simulation_data", package = "discent")
+  dat <- IBD_simulation_data
+  # run model
+  inputdisc <- dat %>%
+    dplyr::filter(deme1 != deme2)
+  mod <- deme_inbreeding_spcoef_pso(discdat = inputdisc,
+                                    m_lowerbound = 100,
+                                    m_upperbound = 101,
+                                    fi_lowerbound= 0.1,
+                                    fi_upperbound = 0.11,
+                                    flearn_lowerbound = 1e-3,
+                                    flearn_upperbound = 1e-1,
+                                    mlearn_lowerbound = 1e-3,
+                                    mlearn_upperbound = 1e-1,
+                                    c1 = 0.1,
+                                    c2 = 0.1,
+                                    w = 0.25,
+                                    b1 = 0.9,
+                                    b2 = 0.999,
+                                    e = 1e-8,
+                                    steps = 1e3,
+                                    searchsteps = 1e2,
+                                    swarmsize = 5,
+                                    swarmsteps = 10,
+                                    normalize_geodist = F,
+                                    report_progress = F,
+                                    return_verbose = F)
+  #......................
+  # check that bounds are respect
+  #......................
+  testthat::expect_gte(min(mod$m_run), 100)
+  testthat::expect_lte(max(mod$m_run), 101)
+  testthat::expect_gte(min(mod$fi_run[,1]), 0.1)
+  testthat::expect_lte(max(mod$fi_run[,1]), 0.11)
+  testthat::expect_gte(min(mod$fi_run[,2]), 0.1)
+  testthat::expect_lte(max(mod$fi_run[,2]), 0.11)
+  testthat::expect_gte(min(mod$fi_run[,3]), 0.1)
+  testthat::expect_lte(max(mod$fi_run[,3]), 0.11)
+
+
+})
+
diff --git a/vignettes/psoextension.Rmd b/vignettes/psoextension.Rmd
new file mode 100644
index 0000000..b367028
--- /dev/null
+++ b/vignettes/psoextension.Rmd
@@ -0,0 +1,28 @@
+---
+title: "Running the `DISCent` Model with Particle Swarm Optimizer Metaheuristic Search"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{psoextension}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+```{r setup}
+library(discent)
+```
+
+
+# Overview
+- PSO is being used to find best start parameters 
+- Swarm made up of particles 
+- Swarm object 
+  - swarm steps = time limit of search/swarm moves
+  - swarm size = number of particles 
+      - for each particle, there is a grad descent step considered
diff --git a/vignettes/runmod.Rmd b/vignettes/runmod.Rmd
deleted file mode 100644
index ca73fa8..0000000
--- a/vignettes/runmod.Rmd
+++ /dev/null
@@ -1,22 +0,0 @@
----
-title: "Running the Model"
-output: rmarkdown::html_vignette
-vignette: >
-  %\VignetteIndexEntry{Running the Model}
-  %\VignetteEngine{knitr::rmarkdown}
-  %\VignetteEncoding{UTF-8}
----
-
-```{r, include = FALSE}
-knitr::opts_chunk$set(
-  collapse = TRUE,
-  comment = "#>"
-)
-```
-
-```{r setup}
-library(discent) 
-```
-
-# Simulating Data for the Model
-Here, we will use data that we previously simulated with the  [`polySimIBD`](https://github.com/nickbrazeau/polySimIBD) package.
diff --git a/vignettes/runvanillamod.Rmd b/vignettes/runvanillamod.Rmd
new file mode 100644
index 0000000..e3acf3e
--- /dev/null
+++ b/vignettes/runvanillamod.Rmd
@@ -0,0 +1,48 @@
+---
+title: "Running the Basic Model"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Running the Model}
+  %\VignetteEncoding{UTF-8}
+  %\VignetteEngine{knitr::rmarkdown}
+editor_options: 
+  chunk_output_type: console
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+```{r setup}
+library(discent) 
+```
+
+# ***
+
+```{r}
+data("IBD_simulation_data")
+
+start_params <- rep(0.3, 3)
+names(start_params) <- 1:3
+start_params <- c(start_params, "m"= 250)
+
+#run
+ret <- discent::deme_inbreeding_spcoef_vanilla(discdat = IBD_simulation_data,
+                                       start_params = start_params,
+                                       f_learningrate = 0.001,
+                                       m_learningrate = 1e-6,
+                                       m_lowerbound = 0,
+                                       m_upperbound = Inf,
+                                       b1 = 0.9,
+                                       b2 = 0.999,
+                                       e = 1e-8,
+                                       steps = 1e4,
+                                       thin = 1,
+                                       normalize_geodist = TRUE,
+                                       report_progress = TRUE,
+                                       return_verbose = TRUE)
+
+```