diff --git a/deeptools/countReadsPerBin.py b/deeptools/countReadsPerBin.py index 1b971914ef..17b727d6de 100644 --- a/deeptools/countReadsPerBin.py +++ b/deeptools/countReadsPerBin.py @@ -10,6 +10,7 @@ from deeptools import bamHandler from deeptools import mapReduce from deeptoolsintervals import GTF +import pyBigWig debug = 0 old_settings = np.seterr(all='ignore') @@ -235,7 +236,13 @@ def run(self, allArgs=None): # workers for analysis. If too short, too much time is spend loading the files # if too long, some processors end up free. # the following values are empirical - bamFilesHandlers = [bamHandler.openBam(x) for x in self.bamFilesList] + bamFilesHandlers = [] + for x in self.bamFilesList: + try: + y = bamHandler.openBam(x) + except: + y = pyBigWig.open(x) + bamFilesHandlers.append(y) chromSizes, non_common = deeptools.utilities.getCommonChrNames(bamFilesHandlers, verbose=self.verbose) # skip chromosome in the list. This is usually for the @@ -263,14 +270,28 @@ def run(self, allArgs=None): min_num_of_samples = int(genomeSize / np.mean(chrLengths)) raise ValueError("numberOfSamples has to be bigger than {} ".format(min_num_of_samples)) - max_mapped = max([x.mapped for x in bamFilesHandlers]) - - reads_per_bp = float(max_mapped) / genomeSize - # chunkSize = int(100 / ( reads_per_bp * len(bamFilesList)) ) - - chunkSize = int(self.stepSize * 1e3 / (reads_per_bp * len(bamFilesHandlers))) + max_mapped = [] + for x in bamFilesHandlers: + try: + max_mapped.append(x.mapped) + except: + # bigWig, use a fixed value + max_mapped.append(0) + max_mapped = max(max_mapped) + + # If max_mapped is 0 (i.e., bigWig input), set chunkSize to a multiple of binLength and use every bin + if max_mapped == 0: + chunkSize = 10000 * self.binLength + self.stepSize = self.binLength + else: + reads_per_bp = float(max_mapped) / genomeSize + chunkSize = int(self.stepSize * 1e3 / (reads_per_bp * len(bamFilesHandlers))) [bam_h.close() for bam_h in bamFilesHandlers] + # Ensure that chunkSize is always at least self.stepSize + if chunkSize < self.stepSize: + chunkSize = self.stepSize + if self.verbose: print("step size is {}".format(self.stepSize)) @@ -388,7 +409,12 @@ def count_reads_in_region(self, chrom, start, end, bed_regions_list=None): start_time = time.time() - bam_handlers = [bamHandler.openBam(bam) for bam in self.bamFilesList] + bam_handlers = [] + for fname in self.bamFilesList: + try: + bam_handlers.append(bamHandler.openBam(fname)) + except: + bam_handlers.append(pyBigWig.open(fname)) blackList = None if self.blackListFileName is not None: @@ -537,11 +563,22 @@ def get_coverage_of_region(self, bamHandle, chrom, regions, start_time = time.time() # caching seems faster. TODO: profile the function c = 0 - if chrom in bamHandle.references: - reads = [r for r in bamHandle.fetch(chrom, regStart, regEnd) - if r.flag & 4 == 0] - else: - raise NameError("chromosome {} not found in bam file".format(chrom)) + try: + # BAM input + if chrom in bamHandle.references: + reads = [r for r in bamHandle.fetch(chrom, regStart, regEnd) + if r.flag & 4 == 0] + else: + raise NameError("chromosome {} not found in bam file".format(chrom)) + except: + # bigWig input, as used by plotFingerprint + if bamHandle.chroms(chrom): + _ = np.array(bamHandle.stats(chrom, regStart, regEnd, type="mean", nBins=nRegBins), dtype=np.float) + _[np.isnan(_)] = 0.0 + coverages += _ + continue + else: + raise NameError("chromosome {} not found in bigWig file with chroms {}".format(chrom, bamHandle.chroms())) prev_start_pos = None # to store the start positions # of previous processed read pair diff --git a/deeptools/plotFingerprint.py b/deeptools/plotFingerprint.py index 1dbdb48143..abf40c9d01 100644 --- a/deeptools/plotFingerprint.py +++ b/deeptools/plotFingerprint.py @@ -318,7 +318,7 @@ def signalAndBinDist(x): # Compute the JSD from the PMFs M = (PMFinput + PMFchip) / 2.0 - JSD = 0.5 * (np.sum(PMFinput * np.log2(PMFinput / M))) + 0.5 * (np.sum(PMFchip * np.log2(PMFchip / M))) + JSD = 0.5 * (np.nansum(PMFinput * np.log2(PMFinput / M))) + 0.5 * (np.nansum(PMFchip * np.log2(PMFchip / M))) return np.sqrt(JSD) @@ -424,5 +424,6 @@ def main(args=None): args.outQualityMetrics.write("\n") args.outQualityMetrics.close() + if __name__ == "__main__": main() diff --git a/deeptools/utilities.py b/deeptools/utilities.py index d4b0865bd2..013b4ab56e 100644 --- a/deeptools/utilities.py +++ b/deeptools/utilities.py @@ -107,8 +107,12 @@ def get_chrom_and_size(bam_handler): :param bam_handler: :return: list of (chrom, size) tuples """ - return [(bam_handler.references[i], bam_handler.lengths[i]) - for i in range(0, len(bam_handler.references))] + try: + # BAM file + return [(bam_handler.references[i], bam_handler.lengths[i]) + for i in range(0, len(bam_handler.references))] + except: + return [(k, v) for k, v in bam_handler.chroms().items()] def print_chr_names_and_size(chr_set): sys.stderr.write("chromosome\tlength\n")