From a210f657a70005f580b55696359dd431b06a3fa3 Mon Sep 17 00:00:00 2001
From: pglpm <pgl@portamana.org>
Date: Sat, 1 Feb 2025 17:28:32 +0100
Subject: [PATCH] update Pr to include prior

---
 R/Pr.R        | 24 +++++++++------------
 R/tailPr.R    | 59 ++++++++++++++++++++++++---------------------------
 man/tailPr.Rd |  6 +++---
 3 files changed, 41 insertions(+), 48 deletions(-)

diff --git a/R/Pr.R b/R/Pr.R
index 3c2ed97..d1b18c8 100644
--- a/R/Pr.R
+++ b/R/Pr.R
@@ -120,6 +120,16 @@ Pr <- function(
     ncomponents <- nrow(learnt$W)
     nmcsamples <- ncol(learnt$W)
 
+    if(is.numeric(nsamples)){
+        if(is.na(nsamples) || nsamples < 1) {
+            nsamples <- NULL
+        } else if(!is.finite(nsamples)) {
+            nsamples <- nmcsamples
+        }
+    } else if (is.character(nsamples) && nsamples == 'all'){
+        nsamples <- nmcsamples
+    }
+
     ## Consistency checks
     if (length(dim(Y)) != 2) {
         stop('Y must have two dimensions')
@@ -132,20 +142,6 @@ Pr <- function(
         stop('X must be NULL or have two dimensions')
     }
 
-    if(is.numeric(nsamples)){
-        if(is.na(nsamples) || nsamples < 1) {
-            nsamples <- NULL
-        } else if(!is.finite(nsamples)) {
-            nsamples <- nmcsamples
-        }
-    } else if (is.character(nsamples) && nsamples == 'all'){
-        nsamples <- nmcsamples
-    }
-
-    ## if(!is.null(nsamples)){
-    ##     sampleseq <- round(seq(1, nmcsamples, length.out = nsamples))
-    ## }
-
     ## Check if a prior for Y is given, in that case Y and X will be swapped
     if (isFALSE(priorY) || is.null(priorY)) {
         priorY <- NULL
diff --git a/R/tailPr.R b/R/tailPr.R
index 8e679a5..c10c489 100644
--- a/R/tailPr.R
+++ b/R/tailPr.R
@@ -6,8 +6,8 @@
 #'   the joint probability of. One variate per column, one set of values per row.
 #' @param X matrix or data.table or `NULL`: set of values of variates on which we want to condition the joint probability of `Y`. If `NULL` (default), no conditioning is made (except for conditioning on the learning dataset and prior assumptions). One variate per column, one set of values per row.
 #' @param learnt Either a string with the name of a directory or full path for a 'learnt.rds' object, produced by the \code{\link{learn}} function, or such an object itself.
-#' @param quantiles numeric vector, between 0 and 1, or `NULL`: desired quantiles of the variability of the probability for `Y`. Default `c(0.055, 0.25, 0.75, 0.945)`, that is, the 5.5%, 25%, 75%, 94.5% quantiles (these are typical quantile values in the Bayesian literature: they give 50% and 89% credibility intervals, which correspond to 1 shannons and 0.5 shannons of uncertainty). If `NULL`, no quantiles are calculated.
 #' @param nsamples integer or `NULL` or `"all"`: desired number of samples of the variability of the probability for `Y`. If `NULL`, no samples are reported. If `"all"` (or `Inf`), all samples obtained by the \code{\link{learn}} function are used. Default `100`.
+#' @param quantiles numeric vector, between 0 and 1, or `NULL`: desired quantiles of the variability of the probability for `Y`. Default `c(0.055, 0.25, 0.75, 0.945)`, that is, the 5.5%, 25%, 75%, 94.5% quantiles (these are typical quantile values in the Bayesian literature: they give 50% and 89% credibility intervals, which correspond to 1 shannons and 0.5 shannons of uncertainty). If `NULL`, no quantiles are calculated.
 #' @param parallel logical or integer: whether to use pre-existing parallel
 #'   workers, or how many to create and use. Default `TRUE`.
 #' @param eq logical: include `Y = y` in the cumulative probability? Default `TRUE`.
@@ -27,8 +27,8 @@ tailPr <- function(
     Y,
     X = NULL,
     learnt,
-    quantiles = c(0.055, 0.25, 0.75, 0.945),
     nsamples = 100L,
+    quantiles = c(0.055, 0.25, 0.75, 0.945),
     parallel = TRUE,
     eq = TRUE,
     lower.tail = TRUE,
@@ -122,6 +122,16 @@ tailPr <- function(
     ncomponents <- nrow(learnt$W)
     nmcsamples <- ncol(learnt$W)
 
+    if(is.numeric(nsamples)){
+        if(is.na(nsamples) || nsamples < 1) {
+            nsamples <- NULL
+        } else if(!is.finite(nsamples)) {
+            nsamples <- nmcsamples
+        }
+    } else if (is.character(nsamples) && nsamples == 'all'){
+        nsamples <- nmcsamples
+    }
+
     ## Consistency checks
     if (length(dim(Y)) != 2) {
         stop('Y must have two dimensions')
@@ -360,23 +370,9 @@ tailPr <- function(
     ##     na.rm = TRUE
     ## ))
 
-    if(is.numeric(nsamples)){
-        if(is.na(nsamples) || nsamples < 1) {
-            nsamples <- NULL
-        } else if(!is.finite(nsamples)) {
-            nsamples <- nmcsamples
-        }
-    } else if (is.character(nsamples) && nsamples == 'all'){
-        nsamples <- nmcsamples
-    }
-
-    if(!is.null(nsamples)){
-        sampleseq <- round(seq(1, nmcsamples, length.out = nsamples))
-    }
-
     keys <- c('values',
-        if(!is.null(quantiles)){'quantiles'},
-        if(!is.null(nsamples)) {'samples'}
+        if(!is.null(nsamples)) {'samples'},
+        if(!is.null(quantiles)){'quantiles'}
     )
     ##
     combfnr <- function(...){setNames(do.call(mapply,
@@ -412,19 +408,20 @@ tailPr <- function(
                     ))
                 }
 
-                FF <- colSums(exp(lprobX + lprobY)) / colSums(exp(lprobX))
-                FF <- FF[!is.na(FF)]
+                FF <- colSums(exp(lprobX + lprobY), na.rm = TRUE) /
+                    colSums(exp(lprobX), na.rm = TRUE)
 
                 list(
                     values = mean(FF, na.rm = TRUE),
                     ##
+                    samples = (if(!is.null(nsamples)) {
+                        FF <- FF[!is.na(FF)]
+                        FF[round(seq(1, length(FF), length.out = nsamples))]
+                    }),
+                    ##
                     quantiles = (if(!is.null(quantiles)) {
                         quantile(FF, probs = quantiles, type = 6,
                             na.rm = TRUE, names = FALSE)
-                    }),
-                    ##
-                    samples = (if(!is.null(nsamples)) {
-                        FF[sampleseq]
                     })
                     ##
                     ## error = sd(FF, na.rm = TRUE)/sqrt(nmcsamples)
@@ -436,6 +433,13 @@ tailPr <- function(
     dim(out$values) <- c(nY, nX)
     dimnames(out$values) <- list(Y = NULL, X = NULL)
 
+    if(!is.null(nsamples)){
+    ## transform to grid
+        out$samples <- out$samples
+        dim(out$samples) <- c(nY, nX, nsamples)
+        dimnames(out$samples) <- list(Y = NULL, X = NULL, sampleseq)
+    }
+
     if(!is.null(quantiles)){
         out$quantiles <- out$quantiles
         temp <- names(quantile(1, probs = quantiles, names = TRUE))
@@ -444,13 +448,6 @@ tailPr <- function(
         dimnames(out$quantiles) <- list(Y = NULL, X = NULL, temp)
     }
 
-    if(!is.null(nsamples)){
-    ## transform to grid
-        out$samples <- out$samples
-        dim(out$samples) <- c(nY, nX, nsamples)
-        dimnames(out$samples) <- list(Y = NULL, X = NULL, sampleseq)
-    }
-
     if(isTRUE(keepYX)){
     ## save Y and X values in the output; useful for plotting methods
         out$Y <- Y
diff --git a/man/tailPr.Rd b/man/tailPr.Rd
index f248d96..d801f11 100644
--- a/man/tailPr.Rd
+++ b/man/tailPr.Rd
@@ -8,8 +8,8 @@ tailPr(
   Y,
   X = NULL,
   learnt,
-  quantiles = c(0.055, 0.25, 0.75, 0.945),
   nsamples = 100L,
+  quantiles = c(0.055, 0.25, 0.75, 0.945),
   parallel = TRUE,
   eq = TRUE,
   lower.tail = TRUE,
@@ -26,10 +26,10 @@ the joint probability of. One variate per column, one set of values per row.}
 
 \item{learnt}{Either a string with the name of a directory or full path for a 'learnt.rds' object, produced by the \code{\link{learn}} function, or such an object itself.}
 
-\item{quantiles}{numeric vector, between 0 and 1, or \code{NULL}: desired quantiles of the variability of the probability for \code{Y}. Default \code{c(0.055, 0.25, 0.75, 0.945)}, that is, the 5.5\%, 25\%, 75\%, 94.5\% quantiles (these are typical quantile values in the Bayesian literature: they give 50\% and 89\% credibility intervals, which correspond to 1 shannons and 0.5 shannons of uncertainty). If \code{NULL}, no quantiles are calculated.}
-
 \item{nsamples}{integer or \code{NULL} or \code{"all"}: desired number of samples of the variability of the probability for \code{Y}. If \code{NULL}, no samples are reported. If \code{"all"} (or \code{Inf}), all samples obtained by the \code{\link{learn}} function are used. Default \code{100}.}
 
+\item{quantiles}{numeric vector, between 0 and 1, or \code{NULL}: desired quantiles of the variability of the probability for \code{Y}. Default \code{c(0.055, 0.25, 0.75, 0.945)}, that is, the 5.5\%, 25\%, 75\%, 94.5\% quantiles (these are typical quantile values in the Bayesian literature: they give 50\% and 89\% credibility intervals, which correspond to 1 shannons and 0.5 shannons of uncertainty). If \code{NULL}, no quantiles are calculated.}
+
 \item{parallel}{logical or integer: whether to use pre-existing parallel
 workers, or how many to create and use. Default \code{TRUE}.}