diff --git a/R/cell_count_plot.R b/R/cell_count_plot.R index 9ec82966..41434713 100644 --- a/R/cell_count_plot.R +++ b/R/cell_count_plot.R @@ -118,7 +118,7 @@ CellCountPlot.data.frame <- function( #' seur_obj1 <- seur_obj2 <- seur_obj #' seur_obj1$sample <- "1" #' seur_obj2$sample <- "2" -#' seur_obj_merged <- merge(seur_obj1, seur_obj2) +#' seur_obj_merged <- merge(seur_obj1, seur_obj2, add.cell.ids = c("A", "B")) #' CellCountPlot(seur_obj_merged, group.by = "labels", color.by = "sample") #' #' @export diff --git a/R/differential_colocalization_analysis.R b/R/differential_colocalization_analysis.R index f3886289..b688b420 100644 --- a/R/differential_colocalization_analysis.R +++ b/R/differential_colocalization_analysis.R @@ -178,7 +178,7 @@ RunDCA.data.frame <- function ( #' seur1 <- seur2 <- ReadMPX_Seurat(pxl_file, overwrite = TRUE) #' seur1$sample <- "Sample1" #' seur2$sample <- "Sample2" -#' seur_merged <- merge(seur1, seur2) +#' seur_merged <- merge(seur1, seur2, add.cell.ids = c("A", "B")) #' #' # Run DCA #' dca_markers <- RunDCA(seur_merged, contrast_column = "sample", diff --git a/R/differential_polarity_analysis.R b/R/differential_polarity_analysis.R index 4fd1b272..4b4ea29b 100644 --- a/R/differential_polarity_analysis.R +++ b/R/differential_polarity_analysis.R @@ -186,7 +186,7 @@ RunDPA.data.frame <- function ( #' seur1 <- seur2 <- ReadMPX_Seurat(pxl_file, overwrite = TRUE) #' seur1$sample <- "Sample1" #' seur2$sample <- "Sample2" -#' seur_merged <- merge(seur1, seur2) +#' seur_merged <- merge(seur1, seur2, add.cell.ids = c("A", "B")) #' #' # Run DPA #' dpa_markers <- RunDPA(seur_merged, contrast_column = "sample", diff --git a/R/load_cell_graphs.R b/R/load_cell_graphs.R index 2425ba20..69dbdeb7 100644 --- a/R/load_cell_graphs.R +++ b/R/load_cell_graphs.R @@ -45,33 +45,56 @@ LoadCellGraphs.FileSystemDataset <- function ( (chunk_length > 0) ) - # Ensure that all cell names are available in edgelist - stopifnot( - "All 'cells' must be present in the edgelist" = - all(cells %in% (object %>% pull(component, as_vector = TRUE))) - ) - # Validate load_as load_as <- match.arg(load_as, choices = c("bipartite", "Anode", "linegraph")) # Select load function - graph_load_fkn <- switch (load_as, + graph_load_fkn <- switch(load_as, "bipartite" = .load_as_bipartite, "Anode" = .load_as_anode, - "linegraph" = .load_as_linegraph - ) + "linegraph" = .load_as_linegraph) # Convert edgelist to list of Cell Graphs if (verbose && check_global_verbosity()) cli_alert(" Loading {length(cells)} edgelist(s) as {col_br_magenta(load_as)} graph(s)") # Split cells id into chunks - cells_chunks <- split(cells, ceiling(seq_along(cells) / chunk_length)) + sample_id_table <- do.call(rbind, strsplit(cells, "_")) + if (ncol(sample_id_table) == 1) { + sample_id_table <- cbind("S1", sample_id_table) + } + colnames(sample_id_table) <- c("sample", "component") + sample_id_table <- as_tibble(sample_id_table) %>% + group_by(sample) %>% + mutate(group = ceiling(seq_len(n()) / chunk_length)) %>% + group_by(sample, group) - # Load cell graphs - p <- progressr::progressor(along = cells_chunks) - cellgraphs <- lapply(cells_chunks, function (cell_ids) { - g_list <- graph_load_fkn(object, cell_ids = cell_ids, add_markers = add_marker_counts) + key_pairs <- sample_id_table %>% group_keys() + + sample_id_table_list <- sample_id_table %>% + group_split() %>% + as.list() + + # Set up progressor + p <- progressr::progressor(along = sample_id_table_list) + + # Process chunks + cellgraphs <- lapply(seq_along(sample_id_table_list), function (i) { + + cell_ids <- sample_id_table_list[[i]] + sample_id <- key_pairs[i, 1, drop = TRUE] + + # Load chunks for specific sample + object_filtered <- object %>% filter(sample == sample_id) + g_list <- try({graph_load_fkn(object_filtered, + cell_ids = cell_ids[, 2, drop = TRUE], + add_markers = add_marker_counts)}, silent = TRUE) + + if (inherits(g_list, what = "try-error") || any(sapply(g_list, is.null))) { + abort(glue("Failed to load edge list data. Most likely reason is that invalid cells were provided.")) + } + + # Add marker counts if (add_marker_counts) { g_list <- lapply(g_list, function(g) { return(CreateCellGraphObject(g$graph, counts = g$counts, verbose = FALSE)) @@ -81,10 +104,14 @@ LoadCellGraphs.FileSystemDataset <- function ( return(CreateCellGraphObject(g, verbose = FALSE)) }) } + + # Log progress p() return(g_list) }) %>% Reduce(c, .) + cellgraphs <- setNames(cellgraphs, nm = cells) + return(cellgraphs) } diff --git a/R/objects.R b/R/objects.R index 927f0306..005da480 100755 --- a/R/objects.R +++ b/R/objects.R @@ -118,12 +118,24 @@ CreateCellGraphObject <- function ( ) { # Check input parameters - stopifnot("'cellgraph' must be a non-empty 'tbl_graph' object" = inherits(cellgraph, what = "tbl_graph") & (length(cellgraph) > 0)) + stopifnot( + "'cellgraph' must be a non-empty 'tbl_graph' object" = + inherits(cellgraph, what = "tbl_graph") && + (length(cellgraph) > 0) + ) if (!is.null(counts)) { - stopifnot("'counts' must be a non-empty 'dgCMatrix' object" = inherits(counts, what = "dgCMatrix") & (length(counts) > 0)) + stopifnot( + "'counts' must be a non-empty 'dgCMatrix' object" = + inherits(counts, what = "dgCMatrix") && + (length(counts) > 0) + ) } if (!is.null(layout)) { - stopifnot("'layout' must be a non-empty 'tbl_df' object" = inherits(layout, what = "tbl_df") & (length(layout) > 0)) + stopifnot( + "'layout' must be a non-empty 'tbl_df' object" = + inherits(layout, what = "tbl_df") && + (length(layout) > 0) + ) } if (!"type" %in% names(attributes(cellgraph))) { @@ -135,8 +147,12 @@ CreateCellGraphObject <- function ( # Add checks for graph types if (attr(cellgraph, "type") == "bipartite") { - stopifnot("Node attribute 'name' is missing" = "name" %in% vertex_attr_names(cellgraph)) - stopifnot("Node attribute 'node_type' is missing" = "node_type" %in% vertex_attr_names(cellgraph)) + stopifnot( + "Node attribute 'name' is missing" = + "name" %in% vertex_attr_names(cellgraph), + "Node attribute 'node_type' is missing" = + "node_type" %in% vertex_attr_names(cellgraph) + ) } #TODO: Add check for A-node-projection and linegraph @@ -260,7 +276,10 @@ CreateCellGraphAssay <- function ( } # check for required fields - stopifnot("column 'component' is missing from cellgraphs" = "component" %in% names(fsd)) + stopifnot( + "column 'component' is missing from cellgraphs" = + "component" %in% names(fsd) + ) available_components <- fsd %>% pull(component, as_vector = TRUE) %>% unique() if (!all(colnames(counts) %in% available_components)) abort(glue("Some components are not available in the edge list")) @@ -608,13 +627,44 @@ RenameCells.CellGraphAssay <- function ( stopifnot( "'new.names' must be a character vector with the same length as the number of cells present in 'object'" = - inherits(new.names, what = "character") & (length(new.names) == ncol(object)) + inherits(new.names, what = "character") && + (length(new.names) == ncol(object)) ) - names(slot(object = object, name = "cellgraphs")) <- new.names # save original cell IDs orig.names <- colnames(object) + # Fetch unique sample IDs from orig.names and new.names + new_sample_id_table <- do.call(rbind, strsplit(new.names, "_")) + new_sample_id <- new_sample_id_table[, 1] %>% unique() + old_sample_id_table <- do.call(rbind, strsplit(orig.names, "_")) + if (ncol(old_sample_id_table) == 1) { + old_sample_id <- "S1" + } else { + old_sample_id <- old_sample_id_table[, 1] %>% unique() + } + + # Validate names + names_checked <- sapply(new.names, function(s) { + stringr::str_like(s, pattern = "^[a-zA-Z][a-zA-Z0-9]*\\_RCVCMP\\d{7}$") + }) + if (!any(names_checked)) { + abort(glue("Failed to merge CellGraphAssays.\n\n", + "Make sure to follow these steps:\n\n", + "1. CellGraphAssay column names should have the following format:\n", + " ^[a-zA-Z][a-zA-Z0-9]*_RCVCMP\\d{7}$\n", + " where the first part is a sample ID and the second part is the PXL ID, \n", + " separated by an underscore. For example, {col_green('Sample1_RCVCMP0000000')}.\n\n", + "2. When merging Seurat objects, make sure to set {col_green('add.cell.ids')}.\n\n", + "3. Cannot merge Seurat objects with CellGraphAssays twice due to naming conflicts. \n", + " Instead, merge all data sets once:\n", + " {col_green('se_merged <- merge(se1, list(se2, se3, se4, ...))')}\n", + " Attempting to merge the merged object again will fail:\n", + " {col_red('se_double_merged <- merge(se_merged, se_merged)')}")) + } + + names(slot(object = object, name = "cellgraphs")) <- new.names + names(x = new.names) <- NULL for (data.slot in object[]) { old.data <- LayerData(object = object, layer = data.slot) @@ -624,38 +674,59 @@ RenameCells.CellGraphAssay <- function ( colnames(x = slot(object = object, name = data.slot)) <- new.names } - # Handle arrow dataset - if (!is.na(slot(object, name = "arrow_dir"))) { - # Restore arrow connection if broken - object <- RestoreArrowConnection(object, verbose = FALSE) + # Get arrow dir + arrow_dir <- ArrowDir(object) + if (!is.null(arrow_dir)) { + arrow_dirs <- list.files(arrow_dir, full.names = TRUE) + + # Create new directory + session_tmpdir_random <- file.path(getOption("pixelatorR.arrow_outdir"), paste0(.generate_random_string(), "-", format(Sys.time(), "%Y-%m-%d-%H%M%S"))) - # Get schema for the component column (string or large string) - schema <- unify_schemas(schema(object@arrow_data)["component"], - schema(object@arrow_data)["component"] %>% setNames(nm = "component_new")) + if (length(arrow_dirs) == 1) { + # Handle renaming if 1 hive-style directory is present - # Create conversion table with defined schema - conv_table <- arrow_table(component = orig.names, component_new = new.names, schema = schema) + # Check sample ID + if (length(new_sample_id) > 1) { + abort(glue("Found multiple sample IDs in 'new.names' but only 1 edgelist in the arrow directory.")) + } - fsd <- slot(object, name = "arrow_data") + # Copy directory + if (dir.exists(arrow_dirs)) { + dir.create(session_tmpdir_random) + file.copy(from = arrow_dirs, to = session_tmpdir_random, recursive = TRUE) + # Rename hive-style directory + hive_style_dir_sample1 <- list.files(session_tmpdir_random, full.names = TRUE) + file.rename(from = hive_style_dir_sample1, file.path(session_tmpdir_random, paste0("sample=", new_sample_id))) + } else { + abort(glue("Directory '{arrow_dirs}' is missing Cannot rename cell IDs in edgelists.")) + } + } else { + # Handle renaming if more than 1 hive-style directories are present - # Left join new names and save edgelist - session_tmpdir_random <- file.path(getOption("pixelatorR.arrow_outdir"), paste0(.generate_random_string(), "-", format(Sys.time(), "%Y-%m-%d-%H%M%S"))) + if (!length(arrow_dirs) == length(new_sample_id)) { + abort(glue("Found {arrow_dirs} samples in arrow directory, but {length(new_sample_id)} samples in 'new.names'")) + } - # arrow doesn't support pasting to create new character columns, - # instead we do a left join with our conversion table to - # obtain the new cell/component names - fsd %>% - left_join(y = conv_table, by = "component") %>% # Add new cell names - select(-component) %>% # remove column with old names - rename(component = component_new) %>% # rename column - group_by(sample) %>% - write_dataset(path = session_tmpdir_random) # write data to a parquet file - - # Rename parquet file to ensure consistency with other functions - files <- list.files(session_tmpdir_random, pattern = "parquet", recursive = TRUE, full.names = TRUE) - for (f in files) { - if (basename(f) != "edgelist.parquet") { - file.rename(from = f, to = file.path(dirname(f), "edgelist.parquet")) + # Copy directories to new folder + if (all(dir.exists(arrow_dirs))) { + dir.create(session_tmpdir_random) + for (i in seq_along(arrow_dirs)) { + file.copy(from = arrow_dirs[i], to = session_tmpdir_random, recursive = TRUE) + } + hive_style_dir_samples <- list.files(session_tmpdir_random, full.names = TRUE) + hive_style_dir_sample_IDs <- basename(hive_style_dir_samples) %>% gsub(pattern = "sample=", replacement = "", x = .) + hive_style_dir_samples <- setNames(hive_style_dir_samples, nm = hive_style_dir_sample_IDs) + + # Reorder hive_style_dir_samples + hive_style_dir_samples <- hive_style_dir_samples[old_sample_id] + + # Rename hive-style directory + for (i in seq_along(arrow_dirs)) { + file.rename(from = hive_style_dir_samples[i], file.path(session_tmpdir_random, paste0("sample=", new_sample_id[i]))) + } + } else { + abort(glue("The following directories are missing:\n {paste0(arrow_dirs, collapse='\n')} ", + "\nCannot rename cell IDs in edgelists.")) } } @@ -754,8 +825,14 @@ as.CellGraphAssay.Assay <- function ( # Check cellgraphs if (!is.null(cellgraphs)) { - stopifnot("'cellgraphs' must be a non-empty list with the same number of elements as the number of columns in the Assay" = is.list(cellgraphs) & (length(cellgraphs) == ncol(x))) - stopifnot("'cellgraphs' names must match colnames of the Assay" = all(names(cellgraphs) == colnames(x))) + stopifnot( + "'cellgraphs' must be a non-empty list with the same number of elements as the number of columns in the Assay" = + is.list(cellgraphs) && + (length(cellgraphs) == ncol(x))) + stopifnot( + "'cellgraphs' names must match colnames of the Assay" = + all(names(cellgraphs) == colnames(x)) + ) for (i in seq_along(cellgraphs)) { if (!inherits(x = cellgraphs[[i]], what = c("CellGraph", "NULL"))) { abort(glue("Element {i} is not a CellGraph object or NULL")) @@ -772,13 +849,21 @@ as.CellGraphAssay.Assay <- function ( # Abort if cellgraphs is empty and neither arrow_dir or arrow_data is provided loaded_graphs <- sum(sapply(cellgraphs, is.null)) if (loaded_graphs == ncol(x)) { - stopifnot("One of 'arrow_dir' or 'arrow_data' must be provided if 'cellgraphs is empty'" = (!is.null(arrow_dir)) || (!is.null(arrow_data))) + stopifnot( + "One of 'arrow_dir' or 'arrow_data' must be provided if 'cellgraphs is empty'" = + (!is.null(arrow_dir)) || + (!is.null(arrow_data)) + ) } # Handle arrow_dir arrow_dir <- arrow_dir %||% NA_character_ if (!is.na(arrow_dir)) { - stopifnot("'arrow_dir' must be a non-empty character" = is.character(arrow_dir) && (length(arrow_dir) >= 1)) + stopifnot( + "'arrow_dir' must be a non-empty character" = + is.character(arrow_dir) && + (length(arrow_dir) >= 1) + ) for (path in arrow_dir) { if (!(dir.exists(path) || file.exists(path))) { abort(glue("Directory/file {path} doesn't exist")) @@ -793,7 +878,7 @@ as.CellGraphAssay.Assay <- function ( stopifnot(all(c("upia", "marker", "component", "sample") %in% names(arrow_data))) stopifnot("A valid 'arrow_dir' must be provided if 'arrow_data' is provided" = !is.na(arrow_dir)) stopifnot("column 'component' is missing from 'arrow_data" = "component" %in% names(arrow_data)) - stopifnot("One or several components are missing from 'arrow_data'" = all(colnames(x) %in% (arrow_data %>% pull(component, as_vector = TRUE)))) + #stopifnot("One or several components are missing from 'arrow_data'" = all(colnames(x) %in% (arrow_data %>% pull(component, as_vector = TRUE)))) } new.assay <- as(object = x, Class = "CellGraphAssay") @@ -1387,6 +1472,11 @@ subset.CellGraphAssay <- function ( ... ) { + stopifnot( + "All 'cells' must be present in x" = + all(cells %in% colnames(x)) + ) + # Get cellgraphs cellgraphs <- x@cellgraphs @@ -1400,6 +1490,15 @@ subset.CellGraphAssay <- function ( # Fetch arrow_dir arrow_dir <- slot(x, name = "arrow_dir") + # Get sample ids + sample_id_table <- do.call(rbind, strsplit(colnames(x), "_")) + rownames(sample_id_table) <- colnames(x) + if (ncol(sample_id_table) == 1) { + sample_id <- NULL + } else { + sample_id <- sample_id_table[, 1] %>% unique() + } + # Handle arrow data set if available if (!is.na(arrow_dir)) { x <- RestoreArrowConnection(x, verbose = FALSE) @@ -1412,12 +1511,26 @@ subset.CellGraphAssay <- function ( # Create a temporary directory with a unique name session_tmpdir_random <- file.path(getOption("pixelatorR.arrow_outdir"), paste0(.generate_random_string(), "-", format(Sys.time(), "%Y-%m-%d-%H%M%S"))) - - # Filter edgelist and export it - slot(x, name = "arrow_data") %>% - filter(component %in% cells) %>% - group_by(sample) %>% - write_dataset(session_tmpdir_random) + dir.create(session_tmpdir_random) + + # Handle samples + if (ncol(sample_id_table) > 1) { + components_keep_list <- split(sample_id_table[cells, 2], sample_id_table[cells, 1]) + for (s in sample_id) { + components_keep <- components_keep_list[[s]] + slot(x, name = "arrow_data") %>% + filter(sample == s) %>% + filter(component %in% components_keep) %>% + group_by(sample) %>% + write_dataset(path = session_tmpdir_random) + } + } else { + # Filter edgelist and export it + slot(x, name = "arrow_data") %>% + filter(component %in% cells) %>% + group_by(sample) %>% + write_dataset(session_tmpdir_random) + } # Rename parquet files for consistensy with other functions files <- list.files(session_tmpdir_random, pattern = "parquet", recursive = TRUE, full.names = TRUE) @@ -1466,6 +1579,10 @@ subset.CellGraphAssay <- function ( return(pxcellassay) } + +#' @param add.cell.ids A character vector with sample names +#' +#' @importFrom SeuratObject Key Key<- RenameCells #' @describeIn CellGraphAssay-methods Merge two or more \code{CellGraphAssay} objects together #' @concept assay #' @method merge CellGraphAssay @@ -1490,6 +1607,7 @@ merge.CellGraphAssay <- function ( x = NULL, y = NULL, merge.data = TRUE, + add.cell.ids = NULL, ... ) { @@ -1506,13 +1624,49 @@ merge.CellGraphAssay <- function ( objects <- c(x, y) + # Define add.cell.ids + # add.cell.ids <- add.cell.ids %||% paste0("Sample", seq_along(objects)) + if (!is.null(add.cell.ids)) { + stopifnot( + "Length of 'add.cell.ids' must match the number of objects to merge" = + length(add.cell.ids) == length(objects) + ) + } + # Check duplicate cell names cell.names <- unlist(lapply(objects, colnames)) - if (any(duplicated(x = cell.names))) { - cli_alert_warning("Some cell names are duplicated across objects provided. Renaming to enforce unique cell names.") + unique_names <- table(cell.names) + names_are_duplicated <- any(unique_names > 1) + sample_id_old_table <- do.call(rbind, strsplit(cell.names, "_")) + if (names_are_duplicated && is.null(add.cell.ids)) { + if (ncol(sample_id_old_table) == 1) { + add.cell.ids <- paste0("Sample", seq_along(objects)) + } else if (ncol(sample_id_old_table) == 2) { + abort("Found non-unique IDs across samples. A 'add.cell.ids' must be specified.") + } + } + + # Fetch sample IDs from column names + if (ncol(sample_id_old_table) == 1) { + objects <- + lapply(seq_along(objects), function(i) { + if (is.null(add.cell.ids)) { + return(objects[[i]]) + } else { + new_names_modified <- paste0(add.cell.ids[i], "_", Cells(x = objects[[i]])) + return(RenameCells(object = objects[[i]], new.names = new_names_modified)) + } + }) + } else { objects <- lapply(seq_along(objects), function(i) { - return(RenameCells(object = objects[[i]], new.names = paste0(Cells(x = objects[[i]]), "_", i))) + if (is.null(add.cell.ids)) { + return(objects[[i]]) + } else { + cli_alert_warning("Found multiple samples in objects. 'add.cell.ids' will be added as a prefix to old IDs.") + new_names_modified <- paste0(add.cell.ids[i], Cells(x = objects[[i]])) + return(RenameCells(object = objects[[i]], new.names = new_names_modified)) + } }) } @@ -1551,53 +1705,16 @@ merge.CellGraphAssay <- function ( new_dir <- file.path(getOption("pixelatorR.arrow_outdir"), paste0(.generate_random_string(), "-", format(Sys.time(), "%Y-%m-%d-%H%M%S"))) dir.create(path = new_dir, showWarnings = FALSE) - # List hive-partitioned directories for all objects - dirs_unpacked <- do.call(bind_rows, lapply(seq_along(all_arrow_dirs), function(i) { - cur_dir <- all_arrow_dirs[i] - dirs_cur <- list.files(cur_dir) - cur_unpacked <- do.call(bind_rows, strsplit(dirs_cur, "=") %>% - lapply(function(x) {tibble(id = x[1], sample = x[2])})) - cur_unpacked <- cur_unpacked %>% - mutate(old_dir = list.files(cur_dir, full.names = TRUE), ID = i) - })) - - # Create new names - dirs_unpacked <- dirs_unpacked %>% - mutate(sample = paste0("S", 1:n())) %>% - mutate(new_dir = file.path(new_dir, paste0("sample=", sample))) - - # Move parquet files - for (i in 1:nrow(dirs_unpacked)) { - unlink(dirs_unpacked$new_dir[i], recursive = TRUE) - dir.create(path = dirs_unpacked$new_dir[i], showWarnings = TRUE) - file.copy(from = list.files(dirs_unpacked$old_dir[i], full.names = TRUE, recursive = TRUE), - to = dirs_unpacked$new_dir[i], recursive = TRUE) + # Move hive-style old sample diretories to new directory + for (i in seq_along(all_arrow_dirs)) { + hive_style_dirs <- list.files(all_arrow_dirs[i], full.names = TRUE) + for (ii in seq_along(hive_style_dirs)) { + file.copy(from = hive_style_dirs[ii], to = new_dir, recursive = TRUE) + } } - # Load parquet files + # Open arrow data arrow_data <- open_dataset(new_dir) - - # Change ids in parquet edgelist if necessary - # This check could become slow for larger datasets - # TODO: Look for alternative solution - if (!all(colnames(new_assay) %in% (arrow_data %>% pull(component, as_vector = TRUE)))) { - - # List all parquet files in new_dir - all_parquet_files <- list.files(new_dir, recursive = TRUE, full.names = TRUE) - - # Update cell IDs with the same rules as Seurat:::CheckDuplicateCellNames - for (i in seq_along(all_parquet_files)) { - parquet_file <- all_parquet_files[i] - edgelist <- open_dataset(parquet_file) %>% - mutate(id = i) %>% - mutate(component = str_c(component, id, sep = "_")) %>% - select(-id) - write_parquet(x = edgelist, sink = parquet_file) - rm(edgelist) - } - # Load parquet files from new directory - arrow_data <- open_dataset(new_dir) - } } else { # If any path in the input objects is NA, simply inactivate the # slots required to handle the arrow Dataset diff --git a/R/tau_plot.R b/R/tau_plot.R index eefffbdb..28b4615f 100644 --- a/R/tau_plot.R +++ b/R/tau_plot.R @@ -82,7 +82,7 @@ pxContent_vs_Tau.data.frame <- function ( #' seur_obj1 <- seur_obj2 <- seur_obj #' seur_obj1$sample <- "1" #' seur_obj2$sample <- "2" -#' seur_obj_merged <- merge(seur_obj1, seur_obj2) +#' seur_obj_merged <- merge(seur_obj1, seur_obj2, add.cell.ids = c("A", "B")) #' pxContent_vs_Tau(seur_obj_merged, group.by = "sample") #' #' @export diff --git a/man/CellCountPlot.Rd b/man/CellCountPlot.Rd index 045430b3..a0b41f4a 100644 --- a/man/CellCountPlot.Rd +++ b/man/CellCountPlot.Rd @@ -74,7 +74,7 @@ CellCountPlot(seur_obj, color.by = "labels") seur_obj1 <- seur_obj2 <- seur_obj seur_obj1$sample <- "1" seur_obj2$sample <- "2" -seur_obj_merged <- merge(seur_obj1, seur_obj2) +seur_obj_merged <- merge(seur_obj1, seur_obj2, add.cell.ids = c("A", "B")) CellCountPlot(seur_obj_merged, group.by = "labels", color.by = "sample") } diff --git a/man/CellGraphAssay-methods.Rd b/man/CellGraphAssay-methods.Rd index 14d0e15a..662ee436 100644 --- a/man/CellGraphAssay-methods.Rd +++ b/man/CellGraphAssay-methods.Rd @@ -8,7 +8,7 @@ \usage{ \method{subset}{CellGraphAssay}(x, features = NULL, cells = NULL, ...) -\method{merge}{CellGraphAssay}(x = NULL, y = NULL, merge.data = TRUE, ...) +\method{merge}{CellGraphAssay}(x = NULL, y = NULL, merge.data = TRUE, add.cell.ids = NULL, ...) } \arguments{ \item{x}{A \code{\link{CellGraphAssay}} object} @@ -23,6 +23,8 @@ \item{merge.data}{Merge the data slots instead of just merging the counts (which requires renormalization); this is recommended if the same normalization approach was applied to all objects} + +\item{add.cell.ids}{A character vector with sample names} } \value{ A \code{CellGraphAssay} object diff --git a/man/RunDCA.Rd b/man/RunDCA.Rd index 2ef7ea0a..805c9fdf 100644 --- a/man/RunDCA.Rd +++ b/man/RunDCA.Rd @@ -109,7 +109,7 @@ dca_markers seur1 <- seur2 <- ReadMPX_Seurat(pxl_file, overwrite = TRUE) seur1$sample <- "Sample1" seur2$sample <- "Sample2" -seur_merged <- merge(seur1, seur2) +seur_merged <- merge(seur1, seur2, add.cell.ids = c("A", "B")) # Run DCA dca_markers <- RunDCA(seur_merged, contrast_column = "sample", diff --git a/man/RunDPA.Rd b/man/RunDPA.Rd index 3c96c209..071689f5 100644 --- a/man/RunDPA.Rd +++ b/man/RunDPA.Rd @@ -108,7 +108,7 @@ dpa_markers seur1 <- seur2 <- ReadMPX_Seurat(pxl_file, overwrite = TRUE) seur1$sample <- "Sample1" seur2$sample <- "Sample2" -seur_merged <- merge(seur1, seur2) +seur_merged <- merge(seur1, seur2, add.cell.ids = c("A", "B")) # Run DPA dpa_markers <- RunDPA(seur_merged, contrast_column = "sample", diff --git a/man/pxContent_vs_Tau.Rd b/man/pxContent_vs_Tau.Rd index 62ad52b1..1d8e36ca 100644 --- a/man/pxContent_vs_Tau.Rd +++ b/man/pxContent_vs_Tau.Rd @@ -48,7 +48,7 @@ pxContent_vs_Tau(seur_obj) seur_obj1 <- seur_obj2 <- seur_obj seur_obj1$sample <- "1" seur_obj2$sample <- "2" -seur_obj_merged <- merge(seur_obj1, seur_obj2) +seur_obj_merged <- merge(seur_obj1, seur_obj2, add.cell.ids = c("A", "B")) pxContent_vs_Tau(seur_obj_merged, group.by = "sample") } diff --git a/tests/testthat/test-CellGraphAssay-methods.R b/tests/testthat/test-CellGraphAssay-methods.R index b4753f54..14fe7c4b 100644 --- a/tests/testthat/test-CellGraphAssay-methods.R +++ b/tests/testthat/test-CellGraphAssay-methods.R @@ -43,21 +43,25 @@ test_that("subset.CellGraphAssay works as expected", { # merge method cg_assay <- seur_obj[["mpxCells"]] test_that("merge.CellGraphAssay works as expected", { - cg_assay_merged <- merge(cg_assay, y = cg_assay) + expect_no_error(cg_assay_merged <- merge(cg_assay, y = cg_assay)) expect_equal(ncol(cg_assay_merged), 20) expect_equal(length(CellGraphs(cg_assay_merged)), 20) - cg_assay_merged <- merge(cg_assay, y = list(cg_assay, cg_assay)) + expect_no_error(cg_assay_merged <- merge(cg_assay, y = list(cg_assay, cg_assay))) expect_equal(ncol(cg_assay_merged), 30) expect_equal(length(CellGraphs(cg_assay_merged)), 30) - cg_assay_merged <- merge(cg_assay, y = list(cg_assay, cg_assay)) - expect_equal(colnames(cg_assay_merged), c(paste0(colnames(cg_assay), "_1"), - paste0(colnames(cg_assay), "_2"), - paste0(colnames(cg_assay), "_3"))) + expect_no_error(cg_assay_merged <- merge(cg_assay, y = list(cg_assay, cg_assay))) + expect_equal(colnames(cg_assay_merged), c(paste0("Sample1_", colnames(cg_assay)), + paste0("Sample2_", colnames(cg_assay)), + paste0("Sample3_", colnames(cg_assay)))) + expect_no_error({cg_assay_merged <- merge(cg_assay, y = list(cg_assay, cg_assay))}) + expect_no_error({cg_assay_double_merged <- merge(cg_assay_merged, cg_assay_merged, add.cell.ids = c("A", "B"))}) }) test_that("merge.CellGraphAssay fails when invalid input is provided", { expect_error({cg_assay_merged <- merge(cg_assay, y = "Invalid")}, "'y' must be a 'CellGraphAssay' object or a list of 'CellGraphAssay' objects") expect_error({cg_assay_merged <- merge(cg_assay, y = list(cg_assay, "Invalid"))}, "Element 2 in 'y' is not a 'CellGraphAssay'") + expect_no_error({cg_assay_merged <- merge(cg_assay, y = list(cg_assay, cg_assay))}) + expect_error({cg_assay_double_merged <- merge(cg_assay_merged, cg_assay_merged)}) }) # Show method @@ -190,3 +194,4 @@ test_that("ArrowDir.CellGraphAssay fails when invalid input is provided", { # Setter invalid inout expect_error(ArrowDir(cg_assay) <- 1, "'value' must be a non-empty character vector") }) + diff --git a/tests/testthat/test-LoadCellGraphs.R b/tests/testthat/test-LoadCellGraphs.R index eebf6832..f0b78a98 100644 --- a/tests/testthat/test-LoadCellGraphs.R +++ b/tests/testthat/test-LoadCellGraphs.R @@ -3,11 +3,17 @@ pxl_file <- system.file("extdata/PBMC_10_cells", "Sample01_test.pxl", package = "pixelatorR") seur_obj <- ReadMPX_Seurat(pxl_file, overwrite = TRUE, return_cellgraphassay = TRUE) +seur_obj_merged <- merge(seur_obj, seur_obj, add.cell.ids = c("Sample1", "Sample2")) cg_assay <- seur_obj[["mpxCells"]] +cg_assay_merged <- merge(cg_assay, cg_assay) test_that("LoadCellGraphs works for Seurat objects", { + # Single data set expect_no_error({seur_obj <- LoadCellGraphs(seur_obj, cells = colnames(seur_obj)[1])}) expect_s4_class(seur_obj, "Seurat") + + # Merged data set + seur_obj_merged <- LoadCellGraphs(seur_obj_merged, cells = colnames(seur_obj_merged)[1]) }) test_that("LoadCellGraphs works for CellGraphAssay objects", { @@ -31,3 +37,12 @@ test_that("LoadCellGraphs works for CellGraphAssay objects", { expect_equal(attr(cg_assay@cellgraphs$RCVCMP0000000@cellgraph, "type"), "linegraph") }) +test_that("LoadCellGraphs works for FileSystemDataset", { + expect_no_error(el <- ReadMPX_arrow_edgelist(pxl_file)) + + # Single data set + expect_no_error({g_list <- LoadCellGraphs(el, cells = colnames(seur_obj)[1:2])}) + expect_type(g_list, "list") + expect_s4_class(g_list[[1]], "CellGraph") + expect_equal(length(g_list), 2) +}) diff --git a/tests/testthat/test-RunDCA.R b/tests/testthat/test-RunDCA.R index ef6b9989..4914f6a5 100644 --- a/tests/testthat/test-RunDCA.R +++ b/tests/testthat/test-RunDCA.R @@ -14,7 +14,7 @@ colocalization_table_merged <- bind_rows(colocalization_table1, colocalization_ seur1 <- seur2 <- ReadMPX_Seurat(pxl_file, overwrite = TRUE) seur1$sample <- "Sample1" seur2$sample <- "Sample2" -seur_merged <- suppressWarnings(merge(seur1, seur2)) +seur_merged <- merge(seur1, seur2, add.cell.ids = c("Sample1", "Sample2")) seur_merged <- subset(seur_merged, features = c("ACTB", "HLA-ABC")) test_that("RunDCA works as expected on a data.frame and that ColocalizationHeatmap works on the output", { diff --git a/tests/testthat/test-RunDPA.R b/tests/testthat/test-RunDPA.R index b3e46f0c..d8adf3de 100644 --- a/tests/testthat/test-RunDPA.R +++ b/tests/testthat/test-RunDPA.R @@ -12,7 +12,7 @@ polarization_table_merged <- bind_rows(polarization_table1, polarization_table2 seur1 <- seur2 <- ReadMPX_Seurat(pxl_file, overwrite = TRUE) seur1$sample <- "Sample1" seur2$sample <- "Sample2" -seur_merged <- suppressWarnings(merge(seur1, seur2)) +seur_merged <- merge(seur1, seur2, add.cell.ids = c("Sample1", "Sample2")) test_that("RunDPA works as expected on a data.frame", { @@ -58,3 +58,4 @@ test_that("RunDPA fails with invalid input", { expect_error(dpa_markers <- RunDPA(polarization_table_merged, contrast_column = "sample", target = "Sample1", reference = "Invalid"), "'reference' must be present in 'contrast_column' column") }) +