Remove normal approximation (#153)

* Remove normal approximation Given instability of the normal approximation for many values (especially given asymmetric likelihood), and because binomial implementation is quick, this is being removed to ensure accurate outputs. * Update documentation Output NA if total_outcomes<=total_deaths * Update README example Focus on early stage * Fix typo and add news * Update tests and linting * Address linting issues * Fix remaining linting * Run styler * Automatic readme update * Remove pkgdown: as_is: true Knitted vignettes appear to show equations OK once removed. --------- Co-authored-by: GitHub Action <[email protected]>
epiverse-trace · Jul 17, 2024 · 29ee12a · 29ee12a
1 parent db8b768
commit 29ee12a
Show file tree

Hide file tree

Showing 19 changed files with 60 additions and 95 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,12 @@
 # cfr (development version)
 
+# cfr 0.1.2
+
+Updated version to fix instability in normal approximation with displayed Ebola example. This release includes:
+
+1. Removal of normal approximation from the `.estimate_severity()` function, instead using the binomial likelihood unless criteria for a Poisson approximation met.
+2. Updated README example focusing on first 30 days of outbreak, to emphasise effects of not accounting for delays to outcome.
+
 # cfr 0.1.1
 
 Maintainer is changing to @adamkucharski (#143).

diff --git a/R/estimate_severity.R b/R/estimate_severity.R
@@ -30,16 +30,8 @@
 #' the estimate and confidence intervals cannot be calculated and the output
 #' `<data.frame>` contains only `NA`s.
 #'
-#' - When `total_cases == total_deaths` _and_ `total_outcomes <= total_deaths`,
-#' while `total_cases < poisson_threshold`, the confidence intervals cannot be
-#' calculated and are returned as `NA`. The severity is returned as the lowest
-#' possible value for the method used when cases are below the Poisson
-#' threshold, which is 0.001.
-#'
-#' - When `total_outcomes == total_deaths` while
-#' `total_cases < poisson_threshold` the confidence intervals cannot be
-#' calculated and are returned as `NA`s while the severity estimate is returned
-#' as `0.999`.
+#' - When `total_outcomes <= total_deaths`, estimate and confidence intervals
+#' cannot be reliably calculated and are returned as `NA`.
 .estimate_severity <- function(total_cases,
                                total_deaths,
                                total_outcomes,
@@ -68,16 +60,38 @@
   )
 
   # maximum likelihood estimation for corrected severity
+  # using increments of 0.1% severity
   pprange <- seq(from = 1e-4, to = 1.0, by = 1e-4)
 
+  # if more expected outcomes than observed deaths, set outcomes equal to deaths
+  if (total_outcomes >= total_deaths) {
+    total_outcomes_checked <- total_outcomes
+  } else {
+    total_outcomes_checked <- NA
+    message(
+      "Total deaths = ", total_deaths,
+      " and expected outcomes = ", round(total_outcomes),
+      " so setting expected outcomes = NA. If we were to assume
+        total deaths = expected outcomes, it would produce an estimate of 1."
+    )
+  }
+
   # get likelihoods using selected function
-  lik <- func_likelihood(total_outcomes, total_deaths, pprange)
+  lik <- func_likelihood(total_outcomes_checked, total_deaths, pprange)
 
   # maximum likelihood estimate - if this is empty, return NA
+  # Otherwise return 95% confidence interval of likelihood
   severity_estimate <- pprange[which.max(lik)]
-
-  # 95% confidence interval of likelihood
-  severity_lims <- range(pprange[lik >= (max(lik) - 1.92)])
+  if (length(severity_estimate) == 0) {
+    severity_estimate <- NA
+    severity_lims <- c(NA, NA)
+  } else {
+    severity_lims <- range(
+      pprange[lik >=
+        (max(lik, na.rm = TRUE) - 1.92)],
+      na.rm = TRUE
+    )
+  }
 
   # return a vector for easy conversion to data
   severity_estimate <- c(severity_estimate, severity_lims)
@@ -111,9 +125,6 @@
 #' - Poisson approximation: when `total_cases >= poisson_threshold` but
 #' when `p_mid` < 0.05;
 #'
-#' - Normal approximation: when `total_cases >= poisson_threshold` and
-#' `p_mid >=` 0.05.
-#'
 #' @return A function with three arguments, `total_outcomes`, `total_deaths`,
 #' and `pp`, which is used to generate the profile likelihood.
 #' Also prints messages to the screen when a Poisson or Normal approximation
@@ -123,7 +134,7 @@
   # NOTE: internal function is not input checked
   # switch likelihood function based on total cases and p_mid
   # Binomial approx
-  if (total_cases < poisson_threshold) {
+  if (total_cases < poisson_threshold || (p_mid >= 0.05)) {
     func_likelihood <- function(total_outcomes, total_deaths, pp) {
       lchoose(round(total_outcomes), total_deaths) +
         (total_deaths * log(pp)) +
@@ -132,7 +143,7 @@
   }
 
   # Poisson approx
-  if ((total_cases >= poisson_threshold) && p_mid < 0.05) {
+  if ((total_cases >= poisson_threshold) && (p_mid < 0.05)) {
     func_likelihood <- function(total_outcomes, total_deaths, pp) {
       stats::dpois(
         total_deaths, pp * round(total_outcomes),
@@ -145,21 +156,5 @@
     )
   }
 
-  # Normal approx
-  if ((total_cases >= poisson_threshold) && p_mid >= 0.05) {
-    func_likelihood <- function(total_outcomes, total_deaths, pp) {
-      stats::dnorm(
-        total_deaths,
-        mean = pp * round(total_outcomes),
-        sd = pp * (1 - pp) * round(total_outcomes),
-        log = TRUE
-      )
-    }
-    message(
-      "Total cases = ", total_cases, " and p = ", signif(p_mid, 3),
-      ": using Normal approximation to binomial likelihood."
-    )
-  }
-
   func_likelihood
 }
diff --git a/README.Rmd b/README.Rmd
@@ -66,12 +66,15 @@ library(cfr)
 # Load the Ebola 1976 data provided with the package
 data(ebola1976)
 
+# Focus on the first 20 days the outbreak
+ebola1976_first_30 <- ebola1976[1:30, ]
+
 # Calculate the static CFR without correcting for delays
-cfr_static(data = ebola1976)
+cfr_static(data = ebola1976_first_30)
 
 # Calculate the static CFR while correcting for delays
 cfr_static(
-  data = ebola1976,
+  data = ebola1976_first_30,
   delay_density = function(x) dgamma(x, shape = 2.40, scale = 3.33)
 )
 ```

diff --git a/README.md b/README.md
@@ -73,21 +73,21 @@ library(cfr)
 # Load the Ebola 1976 data provided with the package
 data(ebola1976)
 
+# Focus on the first 20 days the outbreak
+ebola1976_first_30 <- ebola1976[1:30, ]
+
 # Calculate the static CFR without correcting for delays
-cfr_static(data = ebola1976)
+cfr_static(data = ebola1976_first_30)
 #>   severity_estimate severity_low severity_high
-#> 1          0.955102    0.9210866     0.9773771
-```
-
-``` r
+#> 1         0.4740741    0.3875497     0.5617606
 
 # Calculate the static CFR while correcting for delays
 cfr_static(
-  data = ebola1976,
+  data = ebola1976_first_30,
   delay_density = function(x) dgamma(x, shape = 2.40, scale = 3.33)
 )
 #>   severity_estimate severity_low severity_high
-#> 1            0.9742       0.8356        0.9877
+#> 1            0.9422       0.8701        0.9819
 ```
 
 ### Change in real-time estimates of overall severity during the 1976 Ebola outbreak
@@ -118,9 +118,6 @@ head(rolling_cfr_naive)
 #> 4 1976-08-28                 0            0         0.975
 #> 5 1976-08-29                 0            0         0.975
 #> 6 1976-08-30                 0            0         0.975
-```
-
-``` r
 
 # Calculate the rolling daily CFR while correcting for delays
 rolling_cfr_corrected <- cfr_rolling(

diff --git a/man/dot-estimate_severity.Rd b/man/dot-estimate_severity.Rd
diff --git a/man/dot-select_func_likelihood.Rd b/man/dot-select_func_likelihood.Rd
diff --git a/man/figures/README-fig-rolling-cfr-ebola-1.png b/man/figures/README-fig-rolling-cfr-ebola-1.png
diff --git a/tests/testthat/_snaps/estimate_ascertainment.md b/tests/testthat/_snaps/estimate_ascertainment.md
@@ -11,20 +11,16 @@
     Code
       estimate_ascertainment(data = ebola1976, delay_density = function(x) dgamma(x,
         shape = 2.4, scale = 3.33), severity_baseline = 0.7)
-    Message
-      Total cases = 245 and p = 0.959: using Normal approximation to binomial likelihood.
     Output
         ascertainment_estimate ascertainment_low ascertainment_high
-      1              0.7185383         0.7087172          0.8377214
+      1              0.7297748         0.7147963          0.7530931
 
 # Static ascertainment from vignette
 
     Code
       estimate_ascertainment(data = covid_uk, delay_density = function(x) dlnorm(x,
         meanlog = 2.577, sdlog = 0.44), severity_baseline = 0.014)
-    Message
-      Total cases = 283420 and p = 0.206: using Normal approximation to binomial likelihood.
     Output
         ascertainment_estimate ascertainment_low ascertainment_high
-      1             0.09810792        0.02316347          0.2167183
+      1             0.06779661        0.06734007         0.06829268
 
diff --git a/tests/testthat/_snaps/estimate_severity.md b/tests/testthat/_snaps/estimate_severity.md
@@ -4,7 +4,7 @@
       severity_estimate
     Output
       severity_estimate      severity_low     severity_high 
-                 0.9742            0.8356            0.9877 
+                 0.9592            0.9295            0.9793 
 
 ---
 

diff --git a/tests/testthat/_snaps/estimate_static.md b/tests/testthat/_snaps/estimate_static.md
@@ -12,5 +12,5 @@
       scfr_corrected
     Output
         severity_estimate severity_low severity_high
-      1            0.9742       0.8356        0.9877
+      1            0.9592       0.9295        0.9793
 
diff --git a/tests/testthat/test-estimate_ascertainment.R b/tests/testthat/test-estimate_ascertainment.R
@@ -95,7 +95,7 @@ test_that("Ascertainment > 1.0 throws a warning", {
     estimate_ascertainment(
       data = ebola1976,
       delay_density = function(x) dgamma(x, shape = 2.40, scale = 3.33),
-      severity_baseline = 0.9
+      severity_baseline = 0.99
     ),
     regexp = "Ascertainment ratios > 1.0 detected, setting these values to 1.0"
   )

diff --git a/tests/testthat/test-estimate_severity.R b/tests/testthat/test-estimate_severity.R
@@ -131,22 +131,9 @@ test_that("Special cases of `.estimate_severity()`", {
       poisson_threshold = 100
     ),
     c(
-      severity_estimate = 1e-4, # lowest possible severity under this method
-      severity_low = NA_real_,
-      severity_high = NA_real_
-    )
-  )
-
-  total_outcomes <- 99
-  expect_identical(
-    .estimate_severity(
-      total_cases, total_deaths, total_outcomes,
-      poisson_threshold = 100
-    ),
-    c(
-      severity_estimate = 1 - 1e-4, # highest possible severity
-      severity_low = NA_real_,
-      severity_high = NA_real_
+      severity_estimate = NA, # set NA because not valid calculation
+      severity_low = NA,
+      severity_high = NA
     )
   )
 

diff --git a/tests/testthat/testthat-problems.rds b/tests/testthat/testthat-problems.rds
diff --git a/vignettes/cfr.Rmd b/vignettes/cfr.Rmd
@@ -4,8 +4,6 @@ output:
   bookdown::html_vignette2:
     fig_caption: yes
     code_folding: show
-pkgdown:
-  as_is: true
 bibliography: resources/library.json
 link-citations: true
 vignette: >

diff --git a/vignettes/data_from_incidence2.Rmd b/vignettes/data_from_incidence2.Rmd
@@ -4,8 +4,6 @@ output:
   bookdown::html_vignette2:
     fig_caption: yes
     code_folding: show
-pkgdown:
-  as_is: true
 vignette: >
   %\VignetteIndexEntry{Handling data from {incidence2}}
   %\VignetteEngine{knitr::rmarkdown}

diff --git a/vignettes/delay_distributions.Rmd b/vignettes/delay_distributions.Rmd
@@ -4,8 +4,6 @@ output:
   bookdown::html_vignette2:
     fig_caption: yes
     code_folding: show
-pkgdown:
-  as_is: true
 bibliography: resources/library.json
 link-citations: true
 vignette: >

diff --git a/vignettes/estimate_ascertainment.Rmd b/vignettes/estimate_ascertainment.Rmd
@@ -4,8 +4,6 @@ output:
   bookdown::html_vignette2:
     fig_caption: yes
     code_folding: show
-pkgdown:
-  as_is: true
 bibliography: resources/library.json
 link-citations: true
 vignette: >

diff --git a/vignettes/estimate_static_severity.Rmd b/vignettes/estimate_static_severity.Rmd
@@ -4,8 +4,6 @@ output:
   bookdown::html_vignette2:
     fig_caption: yes
     code_folding: show
-pkgdown:
-  as_is: true
 bibliography: resources/library.json
 link-citations: true
 vignette: >
@@ -172,7 +170,7 @@ head(df_covid_uk)
 We retrieve the appropriate distribution for Covid-19 from @linton2020; this is a lognormal distribution with $\mu$ = 2.577 and $\sigma$ = 0.440.
 
 ::: {.alert .alert-warning}
-**Note that** @linton2020 fitted a discrete lognormal distribution and we use a continuous distribution, and that we are ignoring uncertainty in the distribution parameters and likely under-estimating uncertainty in the CFR.
+**Note that** @linton2020 fitted a discrete lognormal distribution and we use a continuous distribution, and that we are ignoring uncertainty in the distribution parameters and hence likely under-estimating uncertainty in the CFR.
 :::
 
 ### Estimating the naive and corrected CFR

diff --git a/vignettes/estimate_time_varying_severity.Rmd b/vignettes/estimate_time_varying_severity.Rmd
@@ -4,8 +4,6 @@ output:
   bookdown::html_vignette2:
     fig_caption: yes
     code_folding: show
-pkgdown:
-  as_is: true
 bibliography: resources/library.json
 link-citations: true
 vignette: >