Fix bug in merge_samples2

Resolves #52
mikemc · Sep 12, 2020 · b08474e · b08474e
1 parent 4b21680
commit b08474e
Show file tree

Hide file tree

Showing 4 changed files with 66 additions and 3 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: speedyseq
 Title: Faster implementations of phyloseq functions
-Version: 0.3.2.9002
+Version: 0.3.2.9003
 Authors@R: 
     person(given = "Michael",
            family = "McLaren",

diff --git a/R/merge_samples.R b/R/merge_samples.R
@@ -125,7 +125,7 @@ setMethod(
     ) %>%
       data.frame %>%
       vctrs::vec_set_names(new_sample_names) %>%
-      sample_data
+      sample_data_stable
     ## Put back in initial order
     if (!reorder) {
       initial_order <- group %>% unique %>% as.character
@@ -193,3 +193,29 @@ merge_groups <- function(x, group, f = unique_or_na) {
     purrr::map(f) %>%
     {vctrs::vec_c(!!!., .name_spec = rlang::zap())}
 }
+
+#' Create sample data without adjusting row/sample names
+#'
+#' `phyloseq::sample_data()` will change the sample names from the row names if
+#' they are `as.character(1:nrow(object))`. This function instead keeps the
+#' names as is.
+#' 
+#' @param object A "data.frame"-class object
+#'
+#' @keywords internal
+#'
+#' @examples 
+#' x <- data.frame(var1 = letters[1:3], var2 = 7:9)
+#' rownames(x)
+#' sample_data(x)
+#' sample_data_stable(x)
+sample_data_stable <- function(object) {
+  # Modified from phyloseq's sample_data data.frame method; see
+  # https://github.com/joey711/phyloseq/blob/master/R/sampleData-class.R
+  stopifnot(identical(class(object), "data.frame"))
+	# Make sure there are no phantom levels in categorical variables
+	object <- phyloseq:::reconcile_categories(object)
+	# instantiate first to check validity
+	SM <- new("sample_data", object)
+	SM
+}
diff --git a/man/sample_data_stable.Rd b/man/sample_data_stable.Rd
diff --git a/tests/testthat/test-merge_samples.R b/tests/testthat/test-merge_samples.R
@@ -1,7 +1,7 @@
 # run just this file:
 # devtools::test_file(here::here("tests", "testthat", "test-merge_samples.R"))
 
-# unique_or_na ----------------------------------------------------------------
+# helpers ---------------------------------------------------------------------
 
 test_that("unique_or_na() returns expected results", {
   ## Atomic vectors
@@ -48,6 +48,13 @@ test_that("unique_or_na() returns expected results", {
   expect_identical(unique_or_na(list(1,1)), 1)
 })
 
+test_that("`sample_data_stable()` maintains names", {
+  x <- data.frame(var1 = letters[1:3], var2 = 7:9)
+  expect_identical(sample_data_stable(x) %>% sample_names, as.character(1:3))
+  rownames(x) <- c("3", "two", "1")
+  expect_identical(sample_data_stable(x) %>% sample_names, c("3", "two", "1"))
+})
+
 # merge_samples2 --------------------------------------------------------------
 
 test_that("Test `merge_samples2()` on `enterotype` dataset", {
@@ -78,4 +85,11 @@ test_that("Test `merge_samples2()` on `enterotype` dataset", {
   expect_identical(ps0, ps1)
   # Remaining components should be the same as original
   expect_identical(tax_table(ps0), tax_table(ps))
+  # Should also work on columns that are numbers, or factors whose levels are
+  # numbers, even if the values are of the form 1:n. (Issue #52)
+  expect_warning(ps2 <- merge_samples2(ps, "Enterotype"))
+  sample_data(ps)$Enterotype <- sample_data(ps)$Enterotype %>% as.integer
+  expect_warning(ps3 <- merge_samples2(ps, "Enterotype"))
+  sample_data(ps3)$Enterotype <- sample_data(ps3)$Enterotype %>% as.factor
+  expect_identical(ps2, ps3)
 })