Skip to content

Commit

Permalink
Port tidyChromosomes from BRGenomics package
Browse files Browse the repository at this point in the history
  • Loading branch information
HDash committed Oct 18, 2024
1 parent 356667e commit 6b4aadc
Show file tree
Hide file tree
Showing 10 changed files with 294 additions and 62 deletions.
11 changes: 7 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: EpiCompare
Title: Comparison, Benchmarking & QC of Epigenomic Datasets
Version: 1.7.5
Version: 1.9.5
Authors@R: c(
person(given = "Sera", family = "Choi",
email = "[email protected]",
Expand All @@ -24,7 +24,11 @@ Authors@R: c(
comment = c(ORCID = "0000-0002-6807-3180")),
person(given="Thomas", family="Roberts",
email = "[email protected]",
role = "cre")
role = "ctb"),
person(given="Hiranyamaya", family="Dash",
email = "[email protected]",
role = "cre",
comment = c(ORCID = "0009-0005-5514-505X"))
)
Description: EpiCompare is used to compare and analyse epigenetic datasets
for quality control and benchmarking purposes.
Expand All @@ -44,7 +48,6 @@ Depends:
R (>= 4.2.0)
Imports:
AnnotationHub,
BRGenomics,
ChIPseeker,
data.table,
genomation,
Expand Down Expand Up @@ -98,4 +101,4 @@ biocViews: Epigenetics, Genetics, QualityControl, ChIPSeq,
Config/testthat/edition: 3
Encoding: UTF-8
LazyData: FALSE
RoxygenNote: 7.3.1
RoxygenNote: 7.3.2
6 changes: 5 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ export(predict_precision_recall)
export(rebin_peaks)
export(report_command)
export(report_header)
export(tidy_chromosomes)
export(tidy_peakfile)
export(translate_genome)
export(tss_plot)
Expand All @@ -33,7 +34,6 @@ export(write_example_peaks)
import(GenomicRanges)
import(ggplot2)
importFrom(AnnotationHub,AnnotationHub)
importFrom(BRGenomics,tidyChromosomes)
importFrom(BiocGenerics,"%in%")
importFrom(BiocGenerics,`%in%`)
importFrom(ChIPseeker,annotatePeak)
Expand All @@ -42,13 +42,17 @@ importFrom(ChIPseeker,getPromoters)
importFrom(ChIPseeker,getTagMatrix)
importFrom(ChIPseeker,plotAvgProf)
importFrom(ChIPseeker,readPeakFile)
importFrom(GenomeInfoDb,"genome<-")
importFrom(GenomeInfoDb,Seqinfo)
importFrom(GenomeInfoDb,genome)
importFrom(GenomeInfoDb,keepSeqlevels)
importFrom(GenomeInfoDb,mapGenomeBuilds)
importFrom(GenomeInfoDb,seqlevels)
importFrom(GenomeInfoDb,seqlevelsInUse)
importFrom(GenomeInfoDb,seqlevelsStyle)
importFrom(GenomeInfoDb,seqnames)
importFrom(GenomeInfoDb,sortSeqlevels)
importFrom(GenomeInfoDb,standardChromosomes)
importFrom(GenomicRanges,GRanges)
importFrom(GenomicRanges,GRangesList)
importFrom(GenomicRanges,binnedAverage)
Expand Down
11 changes: 11 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
## CHANGES IN VERSION 1.9.5

### New features

* Remove the soon-to-be-deprecated `BRGenomics` dependency.
- Port `tidyChromosomes` function to `EpiCompare`.

### Miscellaneous

* Update maintainer details.

## CHANGES IN VERSION 1.3.4

### New features
Expand Down
3 changes: 1 addition & 2 deletions R/peak_info.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#' @return A summary table of peak information
#'
#' @importMethodsFrom IRanges subsetByOverlaps
#' @importFrom BRGenomics tidyChromosomes
#' @export
#'
#' @examples
Expand Down Expand Up @@ -46,7 +45,7 @@ peak_info <- function(peaklist, blacklist){

### Obtain Non-standard Chromosome Percentage ###
tidy_percent <- mapply(peaklist, FUN=function(file){
peak_tidy <- BRGenomics::tidyChromosomes(file, keep.X = TRUE, keep.Y = TRUE)
peak_tidy <- tidy_chromosomes(file, keep.X = TRUE, keep.Y = TRUE)
removedN <- length(file) - length(peak_tidy)
percentage <- signif(removedN/length(file)*100, 3)
})
Expand Down
78 changes: 78 additions & 0 deletions R/tidy_chromosomes.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#' Remove odd chromosomes from GRanges objects
#'
#' This convenience function removes non-standard, mitochondrial, and/or sex
#' chromosomes from any GRanges object.
#'
#' This function is adapted from \code{tidyChromosomes} in the
#' \code{BRGenomics} package licensed under the Artistic License 2.0.
#' Original author: Mike DeBerardine <https://github.com/mdeber>
#'
#' @param gr Any GRanges object, or any another object with associated
#' \code{seqinfo} (or a \code{Seqinfo} object itself). The object should
#' typically have a standard genome associated with it, e.g. \code{genome(gr)
#' <- "hg38"}. \code{gr} can also be a list of such GRanges objects.
#' @param keep.X,keep.Y,keep.M,keep.nonstandard Logicals indicating which
#' non-autosomes should be kept. By default, sex chromosomes are kept, but
#' mitochondrial and non-standard chromosomes are removed.
#' @param genome An optional string that, if supplied, will be used to set the
#' genome of \code{gr}.
#'
#' @return A GRanges object in which both ranges and \code{seqinfo} associated
#' with trimmed chromosomes have been removed.
#'
#' @details Standard chromosomes are defined using the
#' \code{\link[GenomeInfoDb:seqlevels-wrappers]{standardChromosomes}} function
#' from the \code{GenomeInfoDb} package.
#'
#' @author Mike DeBerardine
#' @seealso \code{\link[GenomeInfoDb:seqlevels-wrappers]{
#' GenomeInfoDb::standardChromosomes}}
#'
#' @export
#' @importFrom GenomeInfoDb standardChromosomes seqlevels keepSeqlevels
#' sortSeqlevels genome genome<-
#' @importFrom methods is
#' @examples
#' # make a GRanges
#' chrom <- c("chr2", "chr3", "chrX", "chrY", "chrM", "junk")
#' gr <- GRanges(seqnames = chrom,
#' ranges = IRanges(start = 2*(1:6), end = 3*(1:6)),
#' strand = "+",
#' seqinfo = Seqinfo(chrom))
#' genome(gr) <- "hg38"
#'
#' gr
#'
#' tidy_chromosomes(gr)
#'
#' tidy_chromosomes(gr, keep.M = TRUE)
#'
#' tidy_chromosomes(gr, keep.M = TRUE, keep.Y = FALSE)
#'
#' tidy_chromosomes(gr, keep.nonstandard = TRUE)
#'
#' @keywords internal
tidy_chromosomes <- function(gr, keep.X = TRUE, keep.Y = TRUE, keep.M = FALSE,
keep.nonstandard = FALSE, genome = NULL) {

if (is.list(gr) || is(gr, "GRangesList"))
return(lapply(gr, tidy_chromosomes, keep.X, keep.Y, keep.M,
keep.nonstandard, genome))

if (!is.null(genome))
genome(gr) <- genome

chrom <- standardChromosomes(gr)

if (keep.nonstandard) chrom <- seqlevels(gr)
if (!keep.X) chrom <- chrom[ chrom != "chrX" ]
if (!keep.Y) chrom <- chrom[ chrom != "chrY" ]
if (!keep.M) chrom <- chrom[ (chrom != "chrM") & (chrom != "chrMT") ]

if (is(gr, "Seqinfo")) {
gr <- keepSeqlevels(gr, chrom)
} else {
gr <- keepSeqlevels(gr, chrom, pruning.mode = "tidy")
}
sortSeqlevels(gr)
}
7 changes: 2 additions & 5 deletions R/tidy_peakfile.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,8 @@
#' @return list of GRanges object
#' @export
#'
#' @importFrom BRGenomics tidyChromosomes
#' @importMethodsFrom IRanges subsetByOverlaps

#'
#' @examples
#' ### Load Data ###
#' data("encode_H3K27ac") # example peakfile GRanges object
Expand All @@ -36,9 +35,7 @@ tidy_peakfile <- function(peaklist, blacklist){
### standardise peakfiles ###
peaklist_tidy <- mapply(peaklist, FUN = function(file){
# remove non-standard chromosomes
sample <- BRGenomics::tidyChromosomes(file,
keep.X = TRUE,
keep.Y = TRUE)
sample <- tidy_chromosomes(file, keep.X = TRUE, keep.Y = TRUE)
# remove blacklisted regions
IRanges::subsetByOverlaps(sample,
blacklist,
Expand Down
Loading

0 comments on commit 6b4aadc

Please sign in to comment.