diff --git a/CHANGES.txt b/CHANGES.txt index 30e015d127..4980bddf65 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,14 @@ +2.2.0 + + * plotFingerprint now iterates through line styles as well as colors. This allows up to 35 samples per plot without repeating (not that that many would ever be recommended). This was issue #80. + * Fixed a number of Galaxy wrappers, which were rendered incorrectly due to including a section title of "Background". + * A number of image file handles were previously not explicitly closed, which caused occasional completion of a plot* program but without the files actually being there. This only happened on some NFS mount points. + * The Galaxy wrappers now support the `--outFileNameData` option on plotProfile and plotHeatmap. + * Added support for blacklist regions. These can be supplied as a BED file and the regions will largely be skipped in processing (they'll also be ignored during normalization). This is very useful to skip regions known to attract excess signal. This was issue #101. + * Modified plotPCA to include the actual eigenvalues rather than rescaled ones. Also, plotPCA can now output the underlying values (issue #231). + * Regions within each feature body can now be unscaled when using `computeMatrix`. Thus, if you're interested in unscaled signal around the TSS/TES then you can now use the `--unscaled5prime` and `--unscaled3prime` options. This was issue #108. + * bamCoverage now has a `--filterRNAstrand` option, that will produce coverage for only a single strand. Note that the strand referred to is the DNA strand and not sense/anti-sense. + * Issues with plotHeatmap x-axis labels were fixed (issue #301). 2.1.1 diff --git a/deeptools/SES_scaleFactor.py b/deeptools/SES_scaleFactor.py index af602a889d..12672bdaff 100644 --- a/deeptools/SES_scaleFactor.py +++ b/deeptools/SES_scaleFactor.py @@ -13,7 +13,7 @@ def estimateScaleFactor(bamFilesList, binLength, numberOfSamples, normalizationLength, - avg_method='median', numberOfProcessors=1, + avg_method='median', blackListFileName=None, numberOfProcessors=1, verbose=False, chrsToSkip=[]): r""" Subdivides the genome into chunks to be analyzed in parallel @@ -41,6 +41,8 @@ def estimateScaleFactor(bamFilesList, binLength, numberOfSamples, chrsToSkip : list name of the chromosomes to be excluded from the scale estimation. Usually the chrX is included. + blackListFileName : str + BED file containing blacklisted regions Returns ------- @@ -84,6 +86,7 @@ def estimateScaleFactor(bamFilesList, binLength, numberOfSamples, binLength=binLength, numberOfSamples=numberOfSamples, extendReads=False, + blackListFileName=blackListFileName, numberOfProcessors=numberOfProcessors, verbose=verbose, chrsToSkip=chrsToSkip) diff --git a/deeptools/_version.py b/deeptools/_version.py index df71596ec7..611c4e0e41 100644 --- a/deeptools/_version.py +++ b/deeptools/_version.py @@ -2,4 +2,4 @@ # This file is originally generated from Git information by running 'setup.py # version'. Distribution tarballs contain a pre-generated copy of this file. -__version__ = '2.1.0' +__version__ = '2.2.0' diff --git a/deeptools/bamCompare.py b/deeptools/bamCompare.py index f411f706bd..def1c13e50 100644 --- a/deeptools/bamCompare.py +++ b/deeptools/bamCompare.py @@ -157,7 +157,12 @@ def get_scale_factors(args): bam2 = bamHandler.openBam(args.bamfile2) bam1_mapped = parserCommon.bam_total_reads(bam1, args.ignoreForNormalization) + blacklisted = parserCommon.bam_blacklisted_reads(bam1, args.ignoreForNormalization, args.blackListFileName) + bam1_mapped -= blacklisted + bam2_mapped = parserCommon.bam_total_reads(bam2, args.ignoreForNormalization) + blacklisted = parserCommon.bam_blacklisted_reads(bam2, args.ignoreForNormalization, args.blackListFileName) + bam2_mapped -= blacklisted if args.scaleFactors: scale_factors = map(float, args.scaleFactors.split(":")) @@ -167,6 +172,7 @@ def get_scale_factors(args): [bam1.filename, bam2.filename], args.sampleLength, args.numberOfSamples, 1, + blackListFileName=args.blackListFileName, numberOfProcessors=args.numberOfProcessors, verbose=args.verbose, chrsToSkip=args.ignoreForNormalization) @@ -214,6 +220,7 @@ def get_scale_factors(args): from deeptools.getFragmentAndReadSize import get_read_and_fragment_length frag_len_dict, read_len_dict = get_read_and_fragment_length(bamfile, return_lengths=False, + blackListFileName=args.blackListFileName, numberOfProcessors=args.numberOfProcessors, verbose=args.verbose) if args.extendReads: @@ -295,6 +302,7 @@ def main(args=None): region=args.region, numberOfProcessors=args.numberOfProcessors, extendReads=args.extendReads, + blackListFileName=args.blackListFileName, minMappingQuality=args.minMappingQuality, ignoreDuplicates=args.ignoreDuplicates, center_read=args.centerReads, diff --git a/deeptools/bamCoverage.py b/deeptools/bamCoverage.py index ef4cc37649..372a5a1bac 100644 --- a/deeptools/bamCoverage.py +++ b/deeptools/bamCoverage.py @@ -77,6 +77,12 @@ def get_optional_args(): '*NOTE*: Requires paired-end data. A bin size of 1 is recommended.', action='store_true') + optional.add_argument('--filterRNAstrand', + help='Selects RNA-seq reads (single-end or paired-end) in ' + 'the given strand.', + choices=['forward', 'reverse'], + default=None) + return parser @@ -110,12 +116,15 @@ def get_scale_factor(args): scale_factor = args.scaleFactor bam_handle = bamHandler.openBam(args.bam) bam_mapped = parserCommon.bam_total_reads(bam_handle, args.ignoreForNormalization) + blacklisted = parserCommon.bam_blacklisted_reads(bam_handle, args.ignoreForNormalization, args.blackListFileName) + bam_mapped -= blacklisted if args.normalizeTo1x: # try to guess fragment length if the bam file contains paired end reads from deeptools.getFragmentAndReadSize import get_read_and_fragment_length frag_len_dict, read_len_dict = get_read_and_fragment_length(args.bam, return_lengths=False, + blackListFileName=args.blackListFileName, numberOfProcessors=args.numberOfProcessors, verbose=args.verbose) if args.extendReads: @@ -174,12 +183,14 @@ def main(args=None): debug = 0 func_args = {'scaleFactor': get_scale_factor(args)} + if args.MNase: # check that library is paired end # using getFragmentAndReadSize from deeptools.getFragmentAndReadSize import get_read_and_fragment_length frag_len_dict, read_len_dict = get_read_and_fragment_length(args.bam, return_lengths=False, + blackListFileName=args.blackListFileName, numberOfProcessors=args.numberOfProcessors, verbose=args.verbose) if frag_len_dict is None: @@ -189,6 +200,7 @@ def main(args=None): binLength=args.binSize, stepSize=args.binSize, region=args.region, + blackListFileName=args.blackListFileName, numberOfProcessors=args.numberOfProcessors, extendReads=args.extendReads, minMappingQuality=args.minMappingQuality, @@ -200,11 +212,29 @@ def main(args=None): verbose=args.verbose, ) + elif args.filterRNAstrand: + wr = filterRnaStrand([args.bam], + binLength=args.binSize, + stepSize=args.binSize, + region=args.region, + numberOfProcessors=args.numberOfProcessors, + extendReads=args.extendReads, + minMappingQuality=args.minMappingQuality, + ignoreDuplicates=args.ignoreDuplicates, + center_read=args.centerReads, + zerosToNans=args.skipNonCoveredRegions, + samFlag_include=args.samFlagInclude, + samFlag_exclude=args.samFlagExclude, + verbose=args.verbose, + ) + + wr.filter_strand = args.filterRNAstrand else: wr = writeBedGraph.WriteBedGraph([args.bam], binLength=args.binSize, stepSize=args.binSize, region=args.region, + blackListFileName=args.blackListFileName, numberOfProcessors=args.numberOfProcessors, extendReads=args.extendReads, minMappingQuality=args.minMappingQuality, @@ -217,6 +247,7 @@ def main(args=None): ) wr.run(writeBedGraph.scaleCoverage, func_args, args.outFileName, + blackListFileName=args.blackListFileName, format=args.outFileFormat, smoothLength=args.smoothLength) @@ -249,3 +280,58 @@ def get_fragment_from_read(self, read): fragment_end = fragment_start + 3 return [(fragment_start, fragment_end)] + + +class filterRnaStrand(writeBedGraph.WriteBedGraph): + """ + Class to redefine the get_fragment_from_read for the --filterRNAstrand case + + Only reads either forward or reverse are kept as follows: + + For paired-end + -------------- + reads forward: + + 1. alignments of the second in pair (128) if they map to the forward strand (~16) + 2. alignments of the first in pair (64) if they map to the reverse strand (~32) + + 1. include 128, exclude 16 + or + 2. include 64 exclude 32 + + reads reverse: + 1. alignments of the second in pair (128) if it maps to the reverse strand (16) 128 & 16 = 144 + 2. alignments of the first in pair (64) if their mates map to the reverse strand (32) 64 & 32 = 96 + + 1. include 144 + or + 2. include 96 + + For single-end + -------------- + forward: include 16 (map forward strand) + reverse: exclude 16 + + """ + + def get_fragment_from_read(self, read): + """ + Gets only reads for the given strand + """ + fragment_start = fragment_end = None + + # only paired forward reads are considered + if read.is_paired: + if self.filter_strand == 'forward': + if (read.flag & 128 == 128 and read.flag & 16 == 0) or (read.flag & 64 == 64 and read.flag & 32 == 0): + return read.get_blocks() + else: + if read.flag & 144 == 144 or read.flag & 96 == 96: + return read.get_blocks() + else: + if self.filter_strand == 'forward' and read.flag & 16 == 16: + return read.get_blocks() + elif self.filter_strand == 'reverse' and read.flag & 16 == 0: + return read.get_blocks() + + return [(fragment_start, fragment_end)] diff --git a/deeptools/bamPEFragmentSize.py b/deeptools/bamPEFragmentSize.py index 7d6bb2cb14..016fcf78a8 100644 --- a/deeptools/bamPEFragmentSize.py +++ b/deeptools/bamPEFragmentSize.py @@ -54,6 +54,10 @@ def parse_arguments(): 'decreased. (default 1000000)', default=1000000, type=int) + parser.add_argument('--blackListFileName', '-bl', + help="A BED file containing regions that should be excluded from all analyses. Currently this works by rejecting genomic chunks that happen to overlap an entry. Consequently, for BAM files, if a read partially overlaps a blacklisted region or a fragment spans over it, then the read/fragment might still be considered.", + metavar="BED file", + required=False) parser.add_argument('--verbose', help='Set if processing data messages are wanted.', action='store_true', @@ -67,6 +71,7 @@ def parse_arguments(): def main(args=None): args = parse_arguments().parse_args(args) fragment_len_dict, read_len_dict = get_read_and_fragment_length(args.bam, return_lengths=True, + blackListFileName=args.blackListFileName, numberOfProcessors=args.numberOfProcessors, verbose=args.verbose, binSize=args.binSize, @@ -111,6 +116,7 @@ def main(args=None): plt.ylabel('Frequency') plt.title(args.plotTitle) plt.savefig(args.histogram, bbox_inches=0) + plt.close() if __name__ == "__main__": diff --git a/deeptools/bigwigCompare.py b/deeptools/bigwigCompare.py index ad501f81dd..c5c9abc7ba 100644 --- a/deeptools/bigwigCompare.py +++ b/deeptools/bigwigCompare.py @@ -98,6 +98,7 @@ def main(args=None): (args.bigwig2, 'bigwig')], args.outFileName, 0, FUNC, function_args, tileSize=args.binSize, region=args.region, + blackListFileName=args.blackListFileName, numberOfProcessors=args.numberOfProcessors, format=args.outFileFormat, smoothLength=False, diff --git a/deeptools/computeGCBias.py b/deeptools/computeGCBias.py index 8011cfe2ed..7b58a01d7f 100644 --- a/deeptools/computeGCBias.py +++ b/deeptools/computeGCBias.py @@ -22,7 +22,7 @@ def parse_arguments(args=None): - parentParser = parserCommon.getParentArgParse(binSize=False) + parentParser = parserCommon.getParentArgParse(binSize=False, blackList=True) requiredArgs = getRequiredArgs() parser = argparse.ArgumentParser( parents=[requiredArgs, parentParser], @@ -96,26 +96,6 @@ def getRequiredArgs(): help='Number of sampling points to be considered.', type=int) - optional.add_argument( - '--filterOut', '-fo', - help='BED file containing genomic regions to be excluded ' - 'from the estimation of the correction. Such regions ' - 'usually contain repetitive regions and peaks that, if ' - 'included, would bias the correction. ' - 'It is recommended to filter out known repetitive regions ' - 'if multimappers (reads that map to more than one genomic ' - 'position) were excluded. In the case of ChIP-seq data, ' - 'it is recommended to first use ' - 'a peak caller to identify and filter out the identified peaks.\n\n' - 'Mappability tracks for different read lengths can be ' - 'found at: http://hgdownload.cse.ucsc.edu/gbdb/mm9/bbi/ ' - 'and http://hgdownload.cse.ucsc.edu/gbdb/hg19/bbi \n\n' - 'The script: scripts/mappabilityBigWig_to_unmappableBed.sh ' - 'can be used to get a BED file from the mappability ' - 'bigWig.', - type=argparse.FileType('r'), - metavar='BED file') - optional.add_argument('--extraSampling', help='BED file containing genomic regions for which ' 'extra sampling is required because they are ' @@ -632,13 +612,14 @@ def plotGCbias(file_name, frequencies, reads_per_gc, region_size, image_format=N ax2.set_xlim(0.2, 0.7) plt.tight_layout() plt.savefig(file_name, bbox_inches='tight', dpi=100, format=image_format) + plt.close() def main(args=None): args = parse_arguments().parse_args(args) # check if directory is writable - if args.filterOut: - filter_out_file = args.filterOut.name + if args.blackListFileName: + filter_out_file = args.BlackListFileName args.filterOut.close() else: filter_out_file = None diff --git a/deeptools/computeMatrix.py b/deeptools/computeMatrix.py index 3ff40a899d..7d444f48ec 100644 --- a/deeptools/computeMatrix.py +++ b/deeptools/computeMatrix.py @@ -176,6 +176,17 @@ def computeMatrixOptArgs(case=['scale-regions', 'reference-point'][0]): 'of the given regions. If the ' 'regions are genes, this would be the distance ' 'downstream of the transcription end site.') + optional.add_argument("--unscaled5prime", + default=0, + type=int, + help='Number of bases at the 5-prime end of the ' + 'region to exclude from scaling. By default, ' + 'each region is scaled to a given length (see the --regionBodyLength option). In some cases it is useful to look at unscaled signals around region boundaries, so this setting specifies the number of unscaled bases on the 5-prime end of each boundary.') + optional.add_argument("--unscaled3prime", + default=0, + type=int, + help='Like --unscaled3prime, but for the 3-prime ' + 'end.') elif case == 'reference-point': optional.add_argument('--referencePoint', @@ -191,6 +202,8 @@ def computeMatrixOptArgs(case=['scale-regions', 'reference-point'][0]): # set region body length to zero for reference point mode optional.add_argument('--regionBodyLength', help=argparse.SUPPRESS, default=0, type=int) + optional.add_argument('--unscaled5prime', default=0, type=int, help=argparse.SUPPRESS) + optional.add_argument('--unscaled3prime', default=0, type=int, help=argparse.SUPPRESS) optional.add_argument('--beforeRegionStartLength', '-b', '--upstream', default=500, type=int, @@ -279,6 +292,11 @@ def computeMatrixOptArgs(case=['scale-regions', 'reference-point'][0]): '(e.g. micro satellites) that may bias the average ' 'values.') + optional.add_argument('--blackListFileName', '-bl', + help="A BED file containing regions that should be excluded from all analyses. Currently this works by rejecting genomic chunks that happen to overlap an entry. Consequently, for BAM files, if a read partially overlaps a blacklisted region or a fragment spans over it, then the read/fragment might still be considered.", + metavar="BED file", + required=False) + # in contrast to other tools, # computeMatrix by default outputs # messages and the --quiet flag supresses them @@ -360,13 +378,15 @@ def main(args=None): 'nan after end': args.nanAfterEnd, 'proc number': args.numberOfProcessors, 'sort regions': args.sortRegions, - 'sort using': args.sortUsing + 'sort using': args.sortUsing, + 'unscaled 5 prime': args.unscaled5prime, + 'unscaled 3 prime': args.unscaled3prime } hm = heatmapper.heatmapper() scores_file_list = args.scoreFileName - hm.computeMatrix(scores_file_list, bed_file, parameters, verbose=args.verbose) + hm.computeMatrix(scores_file_list, bed_file, parameters, blackListFileName=args.blackListFileName, verbose=args.verbose) if args.sortRegions != 'no': hm.matrix.sort_groups(sort_using=args.sortUsing, sort_method=args.sortRegions) diff --git a/deeptools/correctGCBias.py b/deeptools/correctGCBias.py index cec5fd2f4e..e6dbbf821e 100644 --- a/deeptools/correctGCBias.py +++ b/deeptools/correctGCBias.py @@ -21,7 +21,7 @@ def parse_arguments(args=None): - parentParser = parserCommon.getParentArgParse(binSize=True) + parentParser = parserCommon.getParentArgParse(binSize=True, blackList=False) requiredArgs = getRequiredArgs() parser = argparse.ArgumentParser( parents=[requiredArgs, parentParser], diff --git a/deeptools/correctReadCounts.py b/deeptools/correctReadCounts.py index 9489fbb24e..81bf11246f 100644 --- a/deeptools/correctReadCounts.py +++ b/deeptools/correctReadCounts.py @@ -64,7 +64,7 @@ def computeCorrectedReadcounts(tileCoverage, args): def correctReadCounts(bamFilesList, binLength, numberOfSamples, defaultFragmentLength, outFileName, outFileFormat, outFileNameCorr=None, region=None, extendPairedEnds=True, - numberOfProcessors=1, Nsigmas=2, maxSignalRatio=10, verbose=False): + numberOfProcessors=1, Nsigmas=2, maxSignalRatio=10, blackListFileName=None, verbose=False): bam1 = writeBedGraph.openBam(bamFilesList[0]) bam2 = writeBedGraph.openBam(bamFilesList[1]) @@ -81,6 +81,7 @@ def correctReadCounts(bamFilesList, binLength, numberOfSamples, defaultFragmentL numberOfSamples, defaultFragmentLength, 1, + blackListFileName=blackListFileName, numberOfProcessors=numberOfProcessors, verbose=verbose) @@ -152,6 +153,7 @@ def correctReadCounts(bamFilesList, binLength, numberOfSamples, defaultFragmentL funcArgs, tileSize=binLength, region=region, format=outFileFormat, zerosToNans=False, + blackListFileName=blackListFileName, numberOfProcessors=numberOfProcessors, extendPairedEnds=extendPairedEnds) @@ -162,6 +164,7 @@ def correctReadCounts(bamFilesList, binLength, numberOfSamples, defaultFragmentL funcArgs, tileSize=binLength, region=region, format=outFileFormat, zerosToNans=False, + blackListFileName=blackListFileName, numberOfProcessors=numberOfProcessors, extendPairedEnds=extendPairedEnds) diff --git a/deeptools/correlation.py b/deeptools/correlation.py index dd99212562..10c994179b 100644 --- a/deeptools/correlation.py +++ b/deeptools/correlation.py @@ -411,6 +411,7 @@ def plot_scatter(self, plot_fiilename, plot_title='', image_format=None, log1p=F ax.set_xlim(min_value, ax.get_xlim()[1]) plt.savefig(plot_fiilename, format=image_format) + plt.close() def plot_pca(self, plot_filename, plot_title='', image_format=None, log1p=False): """ @@ -440,7 +441,7 @@ def plot_pca(self, plot_filename, plot_title='', image_format=None, log1p=False) prop={'size': 12}, markerscale=0.9) # Scree plot - eigenvalues = map(lambda x: x * n, mlab_pca.fracs) + eigenvalues = mlab_pca.s cumulative = [] c = 0 @@ -468,3 +469,4 @@ def plot_pca(self, plot_filename, plot_title='', image_format=None, log1p=False) plt.subplots_adjust(top=3.85) plt.tight_layout() plt.savefig(plot_filename, format=image_format, bbox_extra_artists=(lgd,), bbox_inches='tight') + plt.close() diff --git a/deeptools/correlation_heatmap.py b/deeptools/correlation_heatmap.py index 000c83776a..47e34fc597 100644 --- a/deeptools/correlation_heatmap.py +++ b/deeptools/correlation_heatmap.py @@ -100,3 +100,4 @@ def plot_correlation(corr_matrix, labels, plotFileName, vmax=None, ha='center', va='center') fig.savefig(plotFileName, format=image_format) + fig.close() diff --git a/deeptools/countReadsPerBin.py b/deeptools/countReadsPerBin.py index c420f13d37..859669f314 100644 --- a/deeptools/countReadsPerBin.py +++ b/deeptools/countReadsPerBin.py @@ -59,6 +59,9 @@ class CountReadsPerBin(object): File handle of a bed file containing the regions for wich to compute the coverage. This option overrules ``binLength``, ``numberOfSamples`` and ``stepSize``. + blackListFileName : str + A string containing a BED file with blacklist regions. + extendReads : bool, int Whether coverage should be computed for the extended read length (i.e. the region covered @@ -136,6 +139,7 @@ class CountReadsPerBin(object): def __init__(self, bamFilesList, binLength=50, numberOfSamples=None, numberOfProcessors=1, verbose=False, region=None, bedFile=None, extendReads=False, + blackListFileName=None, minMappingQuality=None, ignoreDuplicates=False, chrsToSkip=[], @@ -150,11 +154,16 @@ def __init__(self, bamFilesList, binLength=50, numberOfSamples=None, numberOfPro self.bamFilesList = bamFilesList self.binLength = binLength self.numberOfSamples = numberOfSamples + self.blackList = None + self.blackListFileName = blackListFileName + if blackListFileName: + self.blackList = mapReduce.BED_to_interval_tree(open(blackListFileName, "r")) if extendReads and len(bamFilesList): from deeptools.getFragmentAndReadSize import get_read_and_fragment_length frag_len_dict, read_len_dict = get_read_and_fragment_length(bamFilesList[0], return_lengths=False, + blackListFileName=blackListFileName, numberOfProcessors=numberOfProcessors, verbose=verbose) if extendReads is True: @@ -266,6 +275,7 @@ def run(self): self_=self, genomeChunkLength=chunkSize, bedFile=self.bedFile, + blackListFileName=self.blackListFileName, region=self.region, numberOfProcessors=self.numberOfProcessors) @@ -364,11 +374,15 @@ def count_reads_in_region(self, chrom, start, end, bed_regions_list=None): regionsToConsider = [] if bed_regions_list is not None: for chrom, start, end in bed_regions_list: + if mapReduce.blOverlap(self.blackList, chrom, [start, end]): + continue regionsToConsider.append((chrom, start, end, end - start)) else: for i in xrange(start, end, self.stepSize): if i + self.binLength > end: break + if mapReduce.blOverlap(self.blackList, chrom, [i, i + self.binLength]): + continue regionsToConsider.append((chrom, i, i + self.binLength, self.binLength)) if self.save_data: @@ -451,6 +465,10 @@ def get_coverage_of_region(self, bamHandle, chrom, start, end, tileSize, vector_length = length / tileSize coverage = np.zeros(vector_length, dtype='float64') + # Return 0 for overlap with a blacklisted region + if mapReduce.blOverlap(self.blackList, chrom, [start, end]): + return coverge + start_time = time.time() # caching seems faster. TODO: profile the function c = 0 diff --git a/deeptools/getFragmentAndReadSize.py b/deeptools/getFragmentAndReadSize.py index 5a6522e7e6..fb6c26fcb5 100644 --- a/deeptools/getFragmentAndReadSize.py +++ b/deeptools/getFragmentAndReadSize.py @@ -54,9 +54,9 @@ def getFragmentLength_worker(chrom, start, end, bamFile, distanceBetweenBins): return reads -def get_read_and_fragment_length(bamFile, return_lengths=False, - numberOfProcessors=None, verbose=False, - binSize=50000, distanceBetweenBins=1000000): +def get_read_and_fragment_length(bamFile, return_lengths=False, blackListFileName=None, + binSize=50000, distanceBetweenBins=1000000, + numberOfProcessors=None, verbose=False): """ Estimates the fragment length and read length through sampling @@ -89,6 +89,7 @@ def get_read_and_fragment_length(bamFile, return_lengths=False, getFragmentLength_wrapper, chrom_sizes, genomeChunkLength=stepsize, + blackListFileName=blackListFileName, numberOfProcessors=numberOfProcessors, verbose=verbose) diff --git a/deeptools/getScorePerBigWigBin.py b/deeptools/getScorePerBigWigBin.py index 52ffaef982..0ccabac1e8 100644 --- a/deeptools/getScorePerBigWigBin.py +++ b/deeptools/getScorePerBigWigBin.py @@ -20,7 +20,7 @@ def countFragmentsInRegions_worker(chrom, start, end, bigWigFiles, stepSize, binLength, save_data, - bedRegions=None, + bedRegions=None ): """ returns the average score in each bigwig file at each 'stepSize' position within the interval start, end for a 'binLength' window. @@ -179,6 +179,7 @@ def getScorePerBin(bigWigFiles, binLength, numberOfProcessors=1, verbose=False, region=None, bedFile=None, + blackListFileName=None, stepSize=None, chrsToSkip=[], out_file_for_raw_data=None): @@ -236,6 +237,7 @@ def getScorePerBin(bigWigFiles, binLength, chrom_sizes, genomeChunkLength=chunkSize, bedFile=bedFile, + blackListFileName=blackListFileName, region=region, numberOfProcessors=numberOfProcessors) diff --git a/deeptools/heatmapper.py b/deeptools/heatmapper.py index a7b65c6a11..56fb34a78f 100644 --- a/deeptools/heatmapper.py +++ b/deeptools/heatmapper.py @@ -10,6 +10,7 @@ import pyBigWig import deeptools.readBed +from deeptools import mapReduce def compute_sub_matrix_wrapper(args): @@ -27,8 +28,9 @@ def __init__(self): self.lengthDict = None self.matrix = None self.regions = None + self.blackList = None - def computeMatrix(self, score_file_list, regions_file, parameters, verbose=False): + def computeMatrix(self, score_file_list, regions_file, parameters, blackListFileName=None, verbose=False): """ Splits into multiple cores the computation of the scores @@ -54,7 +56,15 @@ def computeMatrix(self, score_file_list, regions_file, parameters, verbose=False exit("Length of region before the body has to be a multiple of " "--binSize\nCurrent value is {}\n".format(parameters['upstream'])) - regions, group_labels = self.get_regions_and_groups(regions_file, verbose=verbose) + if parameters['unscaled 5 prime'] % parameters['bin size'] > 0: + exit("Length of the unscaled 5 prime region has to be a multiple of " + "--binSize\nCurrent value is {}\n".format(parameters['unscaled 5 prime'])) + + if parameters['unscaled 3 prime'] % parameters['bin size'] > 0: + exit("Length of the unscaled 5 prime region has to be a multiple of " + "--binSize\nCurrent value is {}\n".format(parameters['unscaled 3 prime'])) + + regions, group_labels = self.get_regions_and_groups(regions_file, blackListFileName=blackListFileName, verbose=verbose) # args to pass to the multiprocessing workers mp_args = [] @@ -199,6 +209,7 @@ def compute_sub_matrix_worker(score_file_list, regions, # given by the user, times the number of score files matrix_cols = len(score_file_list) * \ ((parameters['downstream'] + + parameters['unscaled 5 prime'] + parameters['unscaled 3 prime'] + parameters['upstream'] + parameters['body']) / parameters['bin size']) @@ -212,38 +223,43 @@ def compute_sub_matrix_worker(score_file_list, regions, for feature in regions: # print some information if parameters['body'] > 0 and \ - feature.end - feature.start < parameters['bin size']: + feature.end - feature.start - parameters['unscaled 5 prime'] - parameters['unscaled 3 prime'] < parameters['bin size']: if parameters['verbose']: - sys.stderr.write("A region that is shorter than " - "then bin size was found: " - "({}) {} {}:{}:{}. Skipping...\n".format((feature.end - feature.start), + sys.stderr.write("A region that is shorter than the bin size (possibly only after accounting for unscaled regions) was found: " + "({}) {} {}:{}:{}. Skipping...\n".format((feature.end - feature.start - parameters['unscaled 5 prime'] - parameters['unscaled 3 prime']), feature.name, feature.chrom, feature.start, feature.end)) - coverage = np.zeros(matrix_cols) coverage[:] = np.nan - else: if feature.strand == '-': a = parameters['upstream'] / parameters['bin size'] b = parameters['downstream'] / parameters['bin size'] - start = feature.end - end = feature['start'] + d = parameters['unscaled 5 prime'] / parameters['bin size'] + c = parameters['unscaled 3 prime'] / parameters['bin size'] + start = feature.end - parameters['unscaled 5 prime'] + end = feature['start'] + parameters['unscaled 3 prime'] else: b = parameters['upstream'] / parameters['bin size'] a = parameters['downstream'] / parameters['bin size'] - start = feature['start'] - end = feature.end + c = parameters['unscaled 5 prime'] / parameters['bin size'] + d = parameters['unscaled 3 prime'] / parameters['bin size'] + start = feature['start'] + parameters['unscaled 5 prime'] + end = feature.end - parameters['unscaled 3 prime'] # build zones: # zone0: region before the region start, - # zone1: the body of the region (not always present) - # zone2: the region from the end of the region downstream + # zone1: unscaled 5 prime region + # zone2: the body of the region (not always present) + # zone3: unscaled 3 prime region + # zone4: the region from the end of the region downstream # the format for each zone is: start, end, number of bins if parameters['body'] > 0: zones = \ [(feature.start - b * parameters['bin size'], feature.start, b), - (feature.start, feature.end, parameters['body'] / parameters['bin size']), + (feature.start, feature.start + c * parameters['bin size'], c), + (feature.start + c * parameters['bin size'], feature.end - d * parameters['bin size'], parameters['body'] / parameters['bin size']), + (feature.end - d * parameters['bin size'], feature.end, d), (feature.end, feature.end + a * parameters['bin size'], a)] elif parameters['ref point'] == 'TES': # around TES zones = [(end - b * parameters['bin size'], end, b), @@ -483,8 +499,10 @@ def coverage_from_big_wig(bigwig, chrom, zones, binSize, avgType, nansAsZeros=Fa no regions are skipped. zones: array as follows zone0: region before the region start, - zone1: the body of the region (not always present) - zone2: the region from the end of the region downstream + zone1: 5' unscaled region (if present) + zone2: the body of the region (not always present) + zone3: 3' unscaled region (if present) + zone4: the region from the end of the region downstream each zone is a tuple containing start, end, and number of bins @@ -665,6 +683,8 @@ def save_tabulated_values(self, file_handle, reference_point_label='TSS', start_ w = self.parameters['bin size'] b = self.parameters['upstream'] a = self.parameters['downstream'] + c = self.parameters['unscaled 5 prime'] + d = self.parameters['unscaled 3 prime'] m = self.parameters['body'] if b < 1e5: @@ -684,17 +704,25 @@ def save_tabulated_values(self, file_handle, reference_point_label='TSS', start_ xtickslabel = [] # only if upstream region is set, add a x tick - if self.parameters['upstream'] > 0: + if b > 0: xticks_values.append(b) xtickslabel.append('{0:.1f}{1}'.format(-(float(b) / quotient), symbol)) - # set the x tick for the body parameter, regardless if - # upstream is 0 (not set) - xticks_values.append(b + m) xtickslabel.append(start_label) + + if c > 0: + xticks_values.append(b + c) + xtickslabel.append("") + + if d > 0: + xticks_values.append(b + c + m) + xtickslabel.append("") + + xticks_values.append(b + c + m + d) xtickslabel.append(end_label) + if a > 0: - xticks_values.append(b + m + a) + xticks_values.append(b + c + m + d + a) xtickslabel.append('{0:.1f}{1}'.format(float(a) / quotient, symbol)) xticks = [(k / w) for k in xticks_values] @@ -727,11 +755,13 @@ def save_matrix_values(self, file_name): groups_len[i])) fh.write("#{}\n".format("\t".join(info))) # add to header the x axis values - fh.write("#downstream:{}\tupstream:{}\tbody:{}\tbin size:{}\n".format( + fh.write("#downstream:{}\tupstream:{}\tbody:{}\tbin size:{}\tunscaled 5 prime:{}\tunscaled 3 prime:{}\n".format( self.parameters['downstream'], self.parameters['upstream'], self.parameters['body'], - self.parameters['bin size'])) + self.parameters['bin size'], + self.parameters['unscaled 5 prime'], + self.parameters['unscaled 3 prime'])) fh.close() # reopen again using append mode @@ -771,6 +801,7 @@ def matrix_avg(matrix, avgType='mean'): @staticmethod def get_regions_and_groups(regions_file, onlyMultiplesOf=1, default_group_name='genes', + blackListFileName=None, verbose=None): """ Reads a bed file. @@ -791,6 +822,10 @@ def get_regions_and_groups(regions_file, onlyMultiplesOf=1, group_labels = [] group_idx = 0 bed_file = deeptools.readBed.ReadBed(regions_file) + blackList = None + if blackListFileName is not None: + blackList = mapReduce.BED_to_interval_tree(open(blackListFileName, "r")) + for ginterval in bed_file: if ginterval.line.startswith("track") or ginterval.line.startswith("browser"): continue @@ -814,6 +849,11 @@ def get_regions_and_groups(regions_file, onlyMultiplesOf=1, group_labels.append(label) continue + + # Exclude blacklist overlaps + if mapReduce.blOverlap(blackList, ginterval.chrom, [ginterval.start, ginterval.end]): + continue + # if the list of regions is to big, only # consider a fraction of the data # if totalintervals % onlyMultiplesOf != 0: @@ -904,7 +944,7 @@ def get_num_individual_matrix_cols(self): of smaller matrices that are merged one after the other. """ - matrixCols = ((self.parameters['downstream'] + self.parameters['upstream'] + self.parameters['body']) / + matrixCols = ((self.parameters['downstream'] + self.parameters['upstream'] + self.parameters['body'] + self.parameters['unscaled 5 prime'] + self.parameters['unscaled 3 prime']) / self.parameters['bin size']) return matrixCols diff --git a/deeptools/heatmapper_utilities.py b/deeptools/heatmapper_utilities.py index 393ba15857..8e516fcb68 100644 --- a/deeptools/heatmapper_utilities.py +++ b/deeptools/heatmapper_utilities.py @@ -48,6 +48,7 @@ def plot_single(ax, ma, average_type, color, label, plot_type='simple'): >>> ax = plot_single(ax, matrix + 30, 'mean', color=(0.9, 0.5, 0.9, 0.5), label='violet with alpha', plot_type='std') >>> leg = ax.legend() >>> plt.savefig("/tmp/test.pdf") + >>> plt.close() >>> fig = plt.figure() @@ -87,6 +88,14 @@ def getProfileTicks(hm, referencePointLabel, startLabel, endLabel): w = hm.parameters['bin size'] b = hm.parameters['upstream'] a = hm.parameters['downstream'] + try: + c = hm.parameters['unscaled 5 prime'] + except: + c = 0 + try: + d = hm.parameters['unscaled 3 prime'] + except: + d = 0 m = hm.parameters['body'] tickPlotAdj = 0.5 @@ -108,17 +117,27 @@ def getProfileTicks(hm, referencePointLabel, startLabel, endLabel): xtickslabel = [] # only if upstream region is set, add a x tick - if hm.parameters['upstream'] > 0: + if b > 0: xticks_values.append(b) xtickslabel.append('{0:.1f}'.format(-(float(b) / quotient))) + xtickslabel.append(startLabel) + # set the x tick for the body parameter, regardless if # upstream is 0 (not set) - xticks_values.append(b + m) - xtickslabel.append(startLabel) + if c > 0: + xticks_values.append(b + c) + xtickslabel.append("") + + if d > 0: + xticks_values.append(b + c + m) + xtickslabel.append("") + + xticks_values.append(b + c + m + d) xtickslabel.append(endLabel) + if a > 0: - xticks_values.append(b + m + a) + xticks_values.append(b + c + m + d + a) xtickslabel.append('{0:.1f}{1}'.format(float(a) / quotient, symbol)) xticks = [(k / w) - tickPlotAdj for k in xticks_values] diff --git a/deeptools/mapReduce.py b/deeptools/mapReduce.py index de7aa9e17a..360701be56 100644 --- a/deeptools/mapReduce.py +++ b/deeptools/mapReduce.py @@ -7,6 +7,7 @@ def mapReduce(staticArgs, func, chromSize, genomeChunkLength=None, region=None, bedFile=None, + blackListFileName=None, numberOfProcessors=4, verbose=False, self_=None): @@ -39,6 +40,8 @@ def mapReduce(staticArgs, func, chromSize, :param bedFile: Is a bed file is given, the args to the func to be called are extended to include a list of bed defined regions. + :param blackListFileName: A list of regions to exclude from all computations. + Note that this has genomeChunkLength resolution... :param self_: In case mapreduce should make a call to an object the self variable has to be passed. """ @@ -81,6 +84,9 @@ def mapReduce(staticArgs, func, chromSize, if bed_in_region[-1].end > chromSize[0][1]: chromSize[0] = (chromSize[0][0], bed_in_region[-1].end) + if blackListFileName: + blackList = BED_to_interval_tree(open(blackListFileName, "r")) + TASKS = [] # iterate over all chromosomes for chrom, size in chromSize: @@ -88,42 +94,51 @@ def mapReduce(staticArgs, func, chromSize, start = 0 if region_start == 0 else region_start for startPos in xrange(start, size, genomeChunkLength): endPos = min(size, startPos + genomeChunkLength) - if self_ is not None: - argsList = [self_] + + # Reject a chunk if it overlaps + if blackListFileName: + regions = blSubtract(blackList, chrom, [startPos, endPos]) else: - argsList = [] - argsList.extend([chrom, startPos, endPos]) - # add to argument list the static list received the the function - argsList.extend(staticArgs) - - # if a bed file is given, append to the TASK list, - # a list of bed regions that overlap with the - # current genomeChunk. - if bedFile: - # this method to get the bedFile regions may seem - # cumbersome but I (fidel) think is better to - # balance the load between multiple processors. - # This method first partitions the genome into smaller - # chunks and then, for each chunk, the list of - # regions overlapping the chunk interval is added. - # This is preferable to sending each worker a - # single region because of the overhead of initiating - # the data. - bed_regions_list = [] - for bed_region in bed_interval_tree[chrom].find(startPos, endPos): - # start + 1 is used to avoid regions that may overlap - # with two genomeChunks to be counted twice. Such region - # is only added for the genomeChunk that contains the start - # of the bed region. - if bed_region.start < endPos < bed_region.end: + regions = [[startPos, endPos]] + + for reg in regions: + if self_ is not None: + argsList = [self_] + else: + argsList = [] + + argsList.extend([chrom, reg[0], reg[1]]) + # add to argument list the static list received the the function + argsList.extend(staticArgs) + + # if a bed file is given, append to the TASK list, + # a list of bed regions that overlap with the + # current genomeChunk. + if bedFile: + # this method to get the bedFile regions may seem + # cumbersome but I (fidel) think is better to + # balance the load between multiple processors. + # This method first partitions the genome into smaller + # chunks and then, for each chunk, the list of + # regions overlapping the chunk interval is added. + # This is preferable to sending each worker a + # single region because of the overhead of initiating + # the data. + bed_regions_list = [] + for bed_region in bed_interval_tree[chrom].find(reg[0], reg[1]): + # start + 1 is used to avoid regions that may overlap + # with two genomeChunks to be counted twice. Such region + # is only added for the genomeChunk that contains the start + # of the bed region. + if bed_region.start < endPos < bed_region.end: + continue + bed_regions_list.append([chrom, bed_region.start, bed_region.end]) + if len(bed_regions_list) == 0: continue - bed_regions_list.append([chrom, bed_region.start, bed_region.end]) - if len(bed_regions_list) == 0: - continue - # add to argument list, the position of the bed regions to use - argsList.append(bed_regions_list) + # add to argument list, the position of the bed regions to use + argsList.append(bed_regions_list) - TASKS.append(tuple(argsList)) + TASKS.append(tuple(argsList)) if len(TASKS) > 1 and numberOfProcessors > 1: if verbose: @@ -225,7 +240,7 @@ def BED_to_interval_tree(BED_file): fields = line.strip().split() chrom, start_bed, end_bed, = fields[0], int(fields[1]), int(fields[2]) - if chrom not in bed_interval_tree: + if chrom not in bed_interval_tree.keys(): bed_interval_tree[chrom] = IntervalTree() # skip if a region overlaps with a region already seen @@ -236,3 +251,56 @@ def BED_to_interval_tree(BED_file): bed_interval_tree[chrom].add_interval(Interval(start_bed, end_bed)) return bed_interval_tree + + +def blOverlap(t, chrom, chunk): + """ + Test for an overlap between an IntervalTree and a given genomic chunk. + + This attempts to account for differences in chromosome naming. + + Returns the overlaps + """ + + if t is None: + return [] + + if chrom not in t.keys(): + if chrom.startswith("chr"): + chrom = chrom[3:] + elif chrom == "MT": + chrom = "chrM" + else: + chrom = "chr" + chrom + + if chrom not in t.keys(): + return [] + + return t[chrom].find(chunk[0], chunk[1]) + + +def blSubtract(t, chrom, chunk): + """ + If a genomic region overlaps with a blacklisted region, then subtract that region out + + returns a list of lists + """ + + if t is None: + return [chunk] + + overlaps = blOverlap(t, chrom, chunk) + if len(overlaps) > 0: + output = [] + for o in overlaps: + if chunk[1] <= chunk[0]: + break + if chunk[0] < o.start: + output.append([chunk[0], o.start]) + chunk[0] = o.end + if chunk[0] < chunk[1]: + output.append([chunk[0], chunk[1]]) + else: + output = [chunk] + + return output diff --git a/deeptools/multiBamSummary.py b/deeptools/multiBamSummary.py index 760f2c67cd..153e18aab4 100644 --- a/deeptools/multiBamSummary.py +++ b/deeptools/multiBamSummary.py @@ -206,6 +206,7 @@ def main(args=None): verbose=args.verbose, region=args.region, bedFile=bed_regions, + blackListFileName=args.blackListFileName, extendReads=args.extendReads, minMappingQuality=args.minMappingQuality, ignoreDuplicates=args.ignoreDuplicates, diff --git a/deeptools/multiBigwigSummary.py b/deeptools/multiBigwigSummary.py index 8946cab5b8..758dd066b4 100644 --- a/deeptools/multiBigwigSummary.py +++ b/deeptools/multiBigwigSummary.py @@ -206,6 +206,7 @@ def main(args=None): num_reads_per_bin = score_bw.getScorePerBin( args.bwfiles, args.binSize, + blackListFileName=args.blackListFileName, numberOfProcessors=args.numberOfProcessors, stepSize=args.binSize + args.distanceBetweenBins, verbose=args.verbose, diff --git a/deeptools/parserCommon.py b/deeptools/parserCommon.py index ef33207b13..97370ce6d3 100644 --- a/deeptools/parserCommon.py +++ b/deeptools/parserCommon.py @@ -177,7 +177,7 @@ def normalization_options(): return parser -def getParentArgParse(args=None, binSize=True): +def getParentArgParse(args=None, binSize=True, blackList=True): """ Typical arguments for several tools """ @@ -206,6 +206,12 @@ def getParentArgParse(args=None, binSize=True): required=False, type=genomicRegion) + if blackList: + optional.add_argument('--blackListFileName', '-bl', + help="A BED file containing regions that should be excluded from all analyses. Currently this works by rejecting genomic chunks that happen to overlap an entry. Consequently, for BAM files, if a read partially overlaps a blacklisted region or a fragment spans over it, then the read/fragment might still be considered.", + metavar="BED file", + required=False) + optional.add_argument('--numberOfProcessors', '-p', help='Number of processors to use. Type "max/2" to ' 'use half the maximum number of processors or "max" ' @@ -643,3 +649,26 @@ def bam_total_reads(bam_handle, chroms_to_ignore): tot_mapped_reads = bam_handle.mapped return tot_mapped_reads + + +def bam_blacklisted_reads(bam_handle, chroms_to_ignore, blackListFileName=None): + blacklisted = 0 + if blackListFileName is None: + return blacklisted + + import pysam + import deeptools.mapReduce as mapReduce + + # Get the chromosome lengths + chromLens = {} + for line in pysam.idxstats(bam_handle.filename): + chrom, _len, nmapped, _nunmapped = line.split('\t') + chromLens[chrom] = int(_len) + + bl = mapReduce.BED_to_interval_tree(open(blackListFileName, "r")) + for chrom in bl.keys(): + if not chroms_to_ignore or chrom not in chroms_to_ignore: + for reg in bl[chrom].find(0, chromLens[chrom]): + blacklisted += bam_handle.count(reference=chrom, start=reg.start, end=reg.end) + + return blacklisted diff --git a/deeptools/plotCoverage.py b/deeptools/plotCoverage.py index ff98df21c6..dbdc00d105 100644 --- a/deeptools/plotCoverage.py +++ b/deeptools/plotCoverage.py @@ -128,6 +128,7 @@ def main(args=None): numberOfProcessors=args.numberOfProcessors, verbose=args.verbose, region=args.region, + blackListFileName=args.blackListFileName, extendReads=args.extendReads, minMappingQuality=args.minMappingQuality, ignoreDuplicates=args.ignoreDuplicates, diff --git a/deeptools/plotFingerprint.py b/deeptools/plotFingerprint.py index a4ec949210..16cf67fa47 100644 --- a/deeptools/plotFingerprint.py +++ b/deeptools/plotFingerprint.py @@ -139,6 +139,7 @@ def main(args=None): args.bamfiles, args.binSize, args.numberOfSamples, + blackListFileName=args.blackListFileName, numberOfProcessors=args.numberOfProcessors, verbose=args.verbose, region=args.region, @@ -166,13 +167,15 @@ def main(args=None): x = np.arange(total).astype('float') / total # normalize from 0 to 1 i = 0 - for reads in num_reads_per_bin.T: + # matplotlib won't iterate through line styles by itself + pyplot_line_styles = sum([7 * ["-"], 7 * ["--"], 7 * ["-."], 7 * [":"], 7 * ["."]], []) + for i, reads in enumerate(num_reads_per_bin.T): count = np.cumsum(np.sort(reads)) - count = count / count[-1] # to normalyze y from 0 to 1 - plt.plot(x, count, label=args.labels[i]) + count = count / count[-1] # to normalize y from 0 to 1 + j = i % 35 + plt.plot(x, count, label=args.labels[i], linestyle=pyplot_line_styles[j]) plt.xlabel('rank') plt.ylabel('fraction w.r.t. bin with highest coverage') - i += 1 plt.legend(loc='upper left') plt.suptitle(args.plotTitle) # set the plotFileFormat explicitly to None to trigger the diff --git a/deeptools/plotHeatmap.py b/deeptools/plotHeatmap.py index 451fd015c8..8962627ad6 100644 --- a/deeptools/plotHeatmap.py +++ b/deeptools/plotHeatmap.py @@ -67,6 +67,8 @@ def get_heatmap_ticks(hm, reference_point_label, startLabel, endLabel): w = hm.parameters['bin size'] b = hm.parameters['upstream'] a = hm.parameters['downstream'] + c = hm.parameters.get('unscaled 5 prime', 0) + d = hm.parameters.get('unscaled 3 prime', 0) m = hm.parameters['body'] if b < 1e5: @@ -86,17 +88,28 @@ def get_heatmap_ticks(hm, reference_point_label, startLabel, endLabel): xticks_label = [] # only if upstream region is set, add a x tick - if hm.parameters['upstream'] > 0: + if b > 0: xticks_values.append(b) xticks_label.append('{0:.1f}'.format(-(float(b) / quotient))) + xticks_label.append(startLabel) + + # 5 prime unscaled tick/label, if needed + if c > 0: + xticks_values.append(b + c) + xticks_label.append('') + # set the x tick for the body parameter, regardless if # upstream is 0 (not set) - xticks_values.append(b + m) - xticks_label.append(startLabel) + if d > 0: + xticks_values.append(b + c + m) + xticks_label.append('') + + xticks_values.append(b + c + m + d) xticks_label.append(endLabel) + if a > 0: - xticks_values.append(b + m + a) + xticks_values.append(b + c + m + d + a) xticks_label.append('{0:.1f}{1}'.format(float(a) / quotient, symbol)) xticks = [k / w for k in xticks_values] @@ -116,9 +129,7 @@ def prepare_layout(hm_matrix, heatmapsize, showSummaryPlot, showColorbar, perGro numcols = hm_matrix.get_num_samples() numrows = hm_matrix.get_num_groups() if perGroup: - temp = numcols - numcols = numrows - numrows = temp + numcols, numrows = numrows, numcols # the rows have different size depending # on the number of regions contained in the @@ -214,16 +225,15 @@ def plotMatrix(hm, outFileName, figheight += figwidth numsamples = hm.matrix.get_num_samples() - total_figwidth = figwidth * numsamples + if perGroup: + num_cols = hm.matrix.get_num_groups() + else: + num_cols = numsamples + total_figwidth = figwidth * num_cols if showColorbar: total_figwidth += 1 / 2.54 fig = plt.figure(figsize=(total_figwidth, figheight)) - hm.parameters['upstream'] - hm.parameters['downstream'] - hm.parameters['body'] - hm.parameters['bin size'] - xticks, xtickslabel = getProfileTicks(hm, reference_point_label, startLabel, endLabel) xticks_heat, xtickslabel_heat = get_heatmap_ticks(hm, reference_point_label, startLabel, endLabel) @@ -322,22 +332,26 @@ def plotMatrix(hm, outFileName, elif not perGroup and sample == 0: ax.axes.set_ylabel(sub_matrix['group']) - # add xticks to the bottom heatmap (last group) - ax.axes.get_xaxis().set_visible(True) - ax.axes.set_xticks(xticks_heat) - ax.axes.set_xticklabels(xtickslabel_heat, size=8) + # add labels to last block in a column + if (perGroup and sample == hm.matrix.get_num_samples() - 1) or \ + (not perGroup and group_idx == numgroups - 1): + + # add xticks to the bottom heatmap (last group) + ax.axes.get_xaxis().set_visible(True) + ax.axes.set_xticks(xticks_heat) + ax.axes.set_xticklabels(xtickslabel_heat, size=8) - # align the first and last label - # such that they don't fall off - # the heatmap sides - ticks = ax.xaxis.get_major_ticks() - ticks[0].label1.set_horizontalalignment('left') - ticks[-1].label1.set_horizontalalignment('right') + # align the first and last label + # such that they don't fall off + # the heatmap sides + ticks = ax.xaxis.get_major_ticks() + ticks[0].label1.set_horizontalalignment('left') + ticks[-1].label1.set_horizontalalignment('right') - ax.get_xaxis().set_tick_params( - which='both', - top='off', - direction='out') + ax.get_xaxis().set_tick_params( + which='both', + top='off', + direction='out') # plot the profiles on top of the heatmaps if showSummaryPlot: @@ -489,8 +503,8 @@ def main(args=None): if args.outFileNameMatrix: hm.save_matrix_values(args.outFileNameMatrix) - # if args.outFileNameData: - # hm.saveTabulatedValues(args.outFileNameData) + if args.outFileNameData: + hm.save_tabulated_values(args.outFileNameData) if args.outFileSortedRegions: hm.save_BED(args.outFileSortedRegions) diff --git a/deeptools/plotPCA.py b/deeptools/plotPCA.py index 6c3d96f6f4..58c3ecea1a 100644 --- a/deeptools/plotPCA.py +++ b/deeptools/plotPCA.py @@ -72,6 +72,12 @@ def plotCorrelationArgs(): 'eps, pdf and svg.', choices=['png', 'pdf', 'svg', 'eps']) + optional.add_argument('--outFileNameData', + help='File name to save the data ' + 'underlying data for the average profile, e.g., ' + 'myProfile.tab.', + type=argparse.FileType('w')) + optional.add_argument('--version', action='version', version='%(prog)s {}'.format(__version__)) @@ -90,6 +96,19 @@ def main(args=None): plot_title=args.plotTitle, image_format=args.plotFileFormat) + if args.outFileNameData is not None: + import matplotlib + mlab_pca = matplotlib.mlab.PCA(corr.matrix) + n = len(corr.labels) + of = args.outFileNameData + of.write("Component\t{}\tEigenvalue\n".format("\t".join(corr.labels))) + for i in xrange(n): + of.write("{}".format(i + 1)) + for v in mlab_pca.Wt[i, :]: + of.write("\t{}".format(v)) + of.write("\t{}\n".format(mlab_pca.s[i])) + args.outFileNameData.close() + if __name__ == "__main__": main() diff --git a/deeptools/plotProfile.py b/deeptools/plotProfile.py index c54ba274be..b591fa1937 100644 --- a/deeptools/plotProfile.py +++ b/deeptools/plotProfile.py @@ -344,7 +344,9 @@ def plot_heatmap(self): yticks = [x + d_half for x in pos] ax.axes.set_yticks(yticks) - ax.axes.set_yticklabels(labels[::-1], rotation='vertical') + # TODO: make rotation a parameter + # ax.axes.set_yticklabels(labels[::-1], rotation='vertical') + ax.axes.set_yticklabels(labels[::-1]) ax_list.append(ax) diff --git a/deeptools/test/test_heatmapper.py b/deeptools/test/test_heatmapper.py index ca77daae1e..719deb2913 100644 --- a/deeptools/test/test_heatmapper.py +++ b/deeptools/test/test_heatmapper.py @@ -67,6 +67,14 @@ def test_computeMatrix_region_extend_over_chr_end(self): assert filecmp.cmp(ROOT + '/master_extend_beyond_chr_size.mat', '/tmp/_test.mat') is True os.remove('/tmp/_test.mat') + def test_computeMatrix_unscaled(self): + args = "scale-regions -S {0}/unscaled.bigWig -R {0}/unscaled.bed -a 300 -b 500 --unscaled5prime 100 --unscaled3prime 50 " \ + "--outFileName /tmp/_test.mat.gz -bs 10 -p 1".format(ROOT).split() + deeptools.computeMatrix.main(args) + os.system('gunzip -f /tmp/_test.mat.gz') + assert filecmp.cmp(ROOT + '/master_unscaled.mat', '/tmp/_test.mat') is True + os.remove('/tmp/_test.mat') + def test_plotHeatmap_simple_plot(self): """ Test a simple plot generated using a matrix from diff --git a/deeptools/test/test_heatmapper/heatmap_master_multi_pergroup.svg b/deeptools/test/test_heatmapper/heatmap_master_multi_pergroup.svg index c708824592..3c978bf2fb 100644 --- a/deeptools/test/test_heatmapper/heatmap_master_multi_pergroup.svg +++ b/deeptools/test/test_heatmapper/heatmap_master_multi_pergroup.svg @@ -2,7 +2,7 @@ - + + + + + + + - + - + + + + + + + + @@ -978,8 +717,8 @@ z - - + + - - - + + - - - - - - - + + + - + - + - + @@ -1060,30 +799,30 @@ L 235.844373 627.338519 - - + + - + - + - + - - + + - + - + - + @@ -1092,9 +831,9 @@ L 235.844373 627.338519 - + - + @@ -1104,102 +843,102 @@ L 235.844373 627.338519 - + - - + - - + - - + - - + - @@ -1207,19 +946,24 @@ L 221.990042 112.739987 L 23.9225 19.4945 " style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;"/> - - - + + + + + + - + - + - + - + @@ -1229,40 +973,40 @@ L 23.9225 19.4945 - - + + - + - + - + - + - + - - + + - + - + - + - + - + @@ -1272,29 +1016,29 @@ L 23.9225 19.4945 - + - + +" id="mbfe9c3911b" style="stroke:#000000;stroke-width:0.5;"/> - + - + +" id="m5b98145044" style="stroke:#000000;stroke-width:0.5;"/> - + - + @@ -1304,17 +1048,17 @@ L -4 0 - + - + - + - + - + - + - + - + - + - + @@ -1374,17 +1118,17 @@ Q 48.484375 72.75 52.59375 71.296875 - + - + - + - + - + @@ -1394,9 +1138,9 @@ Q 48.484375 72.75 52.59375 71.296875 - + - + @@ -1408,112 +1152,112 @@ Q 48.484375 72.75 52.59375 71.296875 - - - + - - + - - + - - + - - - - - - - + + + - + - + - + - + - + @@ -1521,40 +1265,40 @@ L 235.844373 19.4945 - - + + - + - + - + - + - + - - + + - + - + - + - + - + @@ -1564,59 +1308,59 @@ L 235.844373 19.4945 - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -1626,15 +1370,15 @@ L 235.844373 19.4945 - - + - - + + - + @@ -1642,15 +1386,15 @@ L 256.644373 29.97325 - - + - - + + - + @@ -1658,15 +1402,15 @@ L 256.644373 41.71575 - - + - - + + - + @@ -1674,15 +1418,15 @@ L 256.644373 53.45825 - - + - - + + - + @@ -1694,43 +1438,43 @@ L 256.644373 65.20075 - - + - - - + + - + - + - + - + @@ -1738,14 +1482,14 @@ z - + - + - + - + @@ -1753,14 +1497,14 @@ z - + - + - + - + @@ -1768,12 +1512,12 @@ z - + - + - + - + @@ -1813,14 +1557,14 @@ Q 23.96875 32.421875 30.609375 32.421875 - + - + - + - + @@ -1828,14 +1572,40 @@ Q 23.96875 32.421875 30.609375 32.421875 - + - + - + - + + + + @@ -1843,12 +1613,12 @@ Q 23.96875 32.421875 30.609375 32.421875 - + - + - + - + @@ -1896,14 +1666,14 @@ Q 18.3125 60.0625 18.3125 54.390625 - + - + - + - + @@ -1911,14 +1681,14 @@ Q 18.3125 60.0625 18.3125 54.390625 - + - + - + - + @@ -1926,12 +1696,12 @@ Q 18.3125 60.0625 18.3125 54.390625 - + - + - + - + @@ -1952,14 +1722,14 @@ z - + - + - + - + @@ -1970,38 +1740,38 @@ z - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + diff --git a/deeptools/test/test_heatmapper/master.mat b/deeptools/test/test_heatmapper/master.mat index 4a78d9957b..c7f15c40da 100644 --- a/deeptools/test/test_heatmapper/master.mat +++ b/deeptools/test/test_heatmapper/master.mat @@ -1,4 +1,4 @@ -@{"body":0,"sample_labels":["test"],"scale":1,"verbose":true,"downstream":100,"nan after end":false,"missing data as zero":false,"skip zeros":false,"ref point":"TSS","group_labels":["Group 1","Group 2"],"min threshold":null,"sort using":"mean","sort regions":"no","bin size":1,"upstream":100,"proc number":1,"bin avg type":"mean","sample_boundaries":[0,200],"max threshold":null,"group_boundaries":[0,3,6]} +@{"verbose":true,"scale":1,"skip zeros":false,"nan after end":false,"sort using":"mean","unscaled 5 prime":0,"body":0,"sample_labels":["test"],"downstream":100,"unscaled 3 prime":0,"group_labels":["Group 1","Group 2"],"bin size":1,"upstream":100,"group_boundaries":[0,3,6],"sample_boundaries":[0,200],"missing data as zero":false,"ref point":"TSS","min threshold":null,"sort regions":"no","proc number":1,"bin avg type":"mean","max threshold":null} ch1 100 150 CG11023 0.0 + 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ch2 150 175 cda5 0.0 - 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ch3 100 125 cda8 0.0 + 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 diff --git a/deeptools/test/test_heatmapper/master_extend_beyond_chr_size.mat b/deeptools/test/test_heatmapper/master_extend_beyond_chr_size.mat index e1a401f1a4..7fd48a60fb 100644 --- a/deeptools/test/test_heatmapper/master_extend_beyond_chr_size.mat +++ b/deeptools/test/test_heatmapper/master_extend_beyond_chr_size.mat @@ -1,4 +1,4 @@ -@{"body":0,"sample_labels":["test"],"scale":1,"verbose":true,"downstream":500,"nan after end":false,"missing data as zero":false,"skip zeros":false,"ref point":"TSS","group_labels":["group1","group2"],"min threshold":null,"sort using":"mean","sort regions":"no","bin size":1,"upstream":100,"proc number":1,"bin avg type":"mean","sample_boundaries":[0,600],"max threshold":null,"group_boundaries":[0,3,6]} +@{"verbose":true,"scale":1,"skip zeros":false,"nan after end":false,"sort using":"mean","unscaled 5 prime":0,"body":0,"sample_labels":["test"],"downstream":500,"unscaled 3 prime":0,"group_labels":["group1","group2"],"bin size":1,"upstream":100,"group_boundaries":[0,3,6],"sample_boundaries":[0,600],"missing data as zero":false,"ref point":"TSS","min threshold":null,"sort regions":"no","proc number":1,"bin avg type":"mean","max threshold":null} ch1 100 150 CG11023 0.0 + 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan ch2 150 175 cda5 0.0 - 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan ch3 100 125 cda8 0.0 + 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan diff --git a/deeptools/test/test_heatmapper/master_multibed.mat b/deeptools/test/test_heatmapper/master_multibed.mat index 9a6e311b9a..d4d85e46e5 100644 --- a/deeptools/test/test_heatmapper/master_multibed.mat +++ b/deeptools/test/test_heatmapper/master_multibed.mat @@ -1,4 +1,4 @@ -@{"body":0,"sample_labels":["test"],"scale":1,"verbose":true,"downstream":100,"nan after end":false,"missing data as zero":false,"skip zeros":false,"ref point":"TSS","group_labels":["group1","group2"],"min threshold":null,"sort using":"mean","sort regions":"no","bin size":1,"upstream":100,"proc number":1,"bin avg type":"mean","sample_boundaries":[0,200],"max threshold":null,"group_boundaries":[0,3,6]} +@{"verbose":true,"scale":1,"skip zeros":false,"nan after end":false,"sort using":"mean","unscaled 5 prime":0,"body":0,"sample_labels":["test"],"downstream":100,"unscaled 3 prime":0,"group_labels":["group1","group2"],"bin size":1,"upstream":100,"group_boundaries":[0,3,6],"sample_boundaries":[0,200],"missing data as zero":false,"ref point":"TSS","min threshold":null,"sort regions":"no","proc number":1,"bin avg type":"mean","max threshold":null} ch1 100 150 CG11023 0.0 + 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ch2 150 175 cda5 0.0 - 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ch3 100 125 cda8 0.0 + 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 diff --git a/deeptools/test/test_heatmapper/master_nan_to_zero.mat b/deeptools/test/test_heatmapper/master_nan_to_zero.mat index 276233d24f..62d14e509d 100644 --- a/deeptools/test/test_heatmapper/master_nan_to_zero.mat +++ b/deeptools/test/test_heatmapper/master_nan_to_zero.mat @@ -1,4 +1,4 @@ -@{"body":0,"sample_labels":["test"],"scale":1,"verbose":true,"downstream":100,"nan after end":false,"missing data as zero":true,"skip zeros":false,"ref point":"TSS","group_labels":["Group 1","Group 2"],"min threshold":null,"sort using":"mean","sort regions":"no","bin size":1,"upstream":100,"proc number":1,"bin avg type":"mean","sample_boundaries":[0,200],"max threshold":null,"group_boundaries":[0,3,6]} +@{"verbose":true,"scale":1,"skip zeros":false,"nan after end":false,"sort using":"mean","unscaled 5 prime":0,"body":0,"sample_labels":["test"],"downstream":100,"unscaled 3 prime":0,"group_labels":["Group 1","Group 2"],"bin size":1,"upstream":100,"group_boundaries":[0,3,6],"sample_boundaries":[0,200],"missing data as zero":true,"ref point":"TSS","min threshold":null,"sort regions":"no","proc number":1,"bin avg type":"mean","max threshold":null} ch1 100 150 CG11023 0.0 + 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ch2 150 175 cda5 0.0 - 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ch3 100 125 cda8 0.0 + 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 diff --git a/deeptools/test/test_heatmapper/master_scale_reg.mat b/deeptools/test/test_heatmapper/master_scale_reg.mat index 4f92906977..c0e71f6b8d 100644 --- a/deeptools/test/test_heatmapper/master_scale_reg.mat +++ b/deeptools/test/test_heatmapper/master_scale_reg.mat @@ -1,4 +1,4 @@ -@{"body":100,"sample_labels":["test"],"scale":1,"verbose":true,"downstream":100,"nan after end":false,"missing data as zero":false,"skip zeros":false,"ref point":null,"group_labels":["Group 1","Group 2"],"min threshold":null,"sort using":"mean","sort regions":"no","bin size":10,"upstream":100,"proc number":1,"bin avg type":"mean","sample_boundaries":[0,30],"max threshold":null,"group_boundaries":[0,3,6]} +@{"verbose":true,"scale":1,"skip zeros":false,"nan after end":false,"sort using":"mean","unscaled 5 prime":0,"body":100,"sample_labels":["test"],"downstream":100,"unscaled 3 prime":0,"group_labels":["Group 1","Group 2"],"bin size":10,"upstream":100,"group_boundaries":[0,3,6],"sample_boundaries":[0,30],"missing data as zero":false,"ref point":null,"min threshold":null,"sort regions":"no","proc number":1,"bin avg type":"mean","max threshold":null} ch1 100 150 CG11023 0.0 + 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ch2 150 175 cda5 0.0 - 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.500000 3.000000 3.000000 ch3 100 125 cda8 0.0 + 0.000000 0.000000 0.000000 0.000000 0.000000 3.000000 3.000000 1.500000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.500000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 diff --git a/deeptools/test/test_heatmapper/master_unscaled.mat b/deeptools/test/test_heatmapper/master_unscaled.mat new file mode 100644 index 0000000000..2b94c0d80d --- /dev/null +++ b/deeptools/test/test_heatmapper/master_unscaled.mat @@ -0,0 +1,2 @@ +@{"verbose":true,"scale":1,"skip zeros":false,"nan after end":false,"sort using":"mean","unscaled 5 prime":100,"body":1000,"sample_labels":["unscaled"],"downstream":300,"unscaled 3 prime":50,"group_labels":["genes"],"bin size":10,"upstream":500,"group_boundaries":[0,1],"sample_boundaries":[0,195],"missing data as zero":false,"ref point":null,"min threshold":null,"sort regions":"no","proc number":1,"bin avg type":"mean","max threshold":null} +1 500 1650 . foo . 1.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 4.000000 4.000000 4.000000 4.000000 4.000000 5.000000 5.000000 5.000000 5.000000 5.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 8.000000 8.000000 9.000000 9.000000 9.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 11.000000 11.000000 11.000000 11.000000 11.000000 11.000000 11.000000 11.000000 11.000000 11.000000 diff --git a/deeptools/test/test_heatmapper/profile_master_heatmap.svg b/deeptools/test/test_heatmapper/profile_master_heatmap.svg index ad007d22a5..b77a97a5bd 100644 --- a/deeptools/test/test_heatmapper/profile_master_heatmap.svg +++ b/deeptools/test/test_heatmapper/profile_master_heatmap.svg @@ -19,35 +19,35 @@ z - - - + + - - - - @@ -56,20 +56,20 @@ L 23.8 24.8 +" id="mafb02c7a5a" style="stroke:#000000;stroke-width:0.5;"/> - + +" id="m9783c3455a" style="stroke:#000000;stroke-width:0.5;"/> - + @@ -120,7 +120,7 @@ L 4.890625 23.390625 z " id="BitstreamVeraSans-Roman-2d"/> - + @@ -131,12 +131,12 @@ z - + - + @@ -183,7 +183,7 @@ L -0.296875 64.59375 z " id="BitstreamVeraSans-Roman-54"/> - + @@ -193,12 +193,12 @@ z - + - + @@ -242,7 +242,7 @@ L 18.109375 75.984375 z " id="BitstreamVeraSans-Roman-62"/> - + @@ -383,7 +383,7 @@ Q 40.53125 6.109375 44.609375 11.75 Q 48.6875 17.390625 48.6875 27.296875 " id="BitstreamVeraSans-Roman-70"/> - + @@ -397,7 +397,7 @@ Q 48.6875 17.390625 48.6875 27.296875 - + @@ -486,7 +486,7 @@ Q 31.78125 56 36.171875 55.265625 Q 40.578125 54.546875 44.28125 53.078125 " id="BitstreamVeraSans-Roman-73"/> - + @@ -496,10 +496,10 @@ Q 40.578125 54.546875 44.28125 53.078125 - - + - @@ -529,10 +529,10 @@ z +" id="me225bad64f" style="stroke:#000000;stroke-width:0.5;"/> - + @@ -547,7 +547,7 @@ L -4 0 - + @@ -595,7 +595,7 @@ Q 46.96875 40.921875 40.578125 39.3125 - + @@ -640,7 +640,7 @@ Q 48.484375 72.75 52.59375 71.296875 - + @@ -685,7 +685,7 @@ Q 23.96875 32.421875 30.609375 32.421875 - + @@ -700,7 +700,7 @@ Q 23.96875 32.421875 30.609375 32.421875 - + @@ -741,7 +741,7 @@ z - + @@ -794,7 +794,7 @@ Q 18.3125 60.0625 18.3125 54.390625 - + @@ -809,7 +809,7 @@ Q 18.3125 60.0625 18.3125 54.390625 - + @@ -843,7 +843,7 @@ z - + @@ -869,7 +869,7 @@ z - + @@ -885,11 +885,11 @@ z - - + + - - + + diff --git a/deeptools/test/test_heatmapper/unscaled.bed b/deeptools/test/test_heatmapper/unscaled.bed new file mode 100644 index 0000000000..440c97ce40 --- /dev/null +++ b/deeptools/test/test_heatmapper/unscaled.bed @@ -0,0 +1 @@ +1 500 1650 . foo . diff --git a/deeptools/test/test_heatmapper/unscaled.bigWig b/deeptools/test/test_heatmapper/unscaled.bigWig new file mode 100644 index 0000000000..6574191526 Binary files /dev/null and b/deeptools/test/test_heatmapper/unscaled.bigWig differ diff --git a/deeptools/writeBedGraph.py b/deeptools/writeBedGraph.py index e7d315e061..d5ccb474df 100644 --- a/deeptools/writeBedGraph.py +++ b/deeptools/writeBedGraph.py @@ -88,7 +88,7 @@ class WriteBedGraph(cr.CountReadsPerBin): """ - def run(self, func_to_call, func_args, out_file_name, format="bedgraph", smoothLength=0): + def run(self, func_to_call, func_args, out_file_name, blackListFileName=None, format="bedgraph", smoothLength=0): r""" Given a list of bamfiles, a function and a function arguments, this method writes a bedgraph file (or bigwig) file @@ -134,6 +134,7 @@ def run(self, func_to_call, func_args, out_file_name, format="bedgraph", smoothL self_=self, genomeChunkLength=genome_chunk_length, region=self.region, + blackListFileName=blackListFileName, numberOfProcessors=self.numberOfProcessors) # concatenate intermediary bedgraph files diff --git a/deeptools/writeBedGraph_bam_and_bw.py b/deeptools/writeBedGraph_bam_and_bw.py index 4aba61c838..250bc3733f 100644 --- a/deeptools/writeBedGraph_bam_and_bw.py +++ b/deeptools/writeBedGraph_bam_and_bw.py @@ -147,7 +147,7 @@ def writeBedGraph_worker( def writeBedGraph( bamOrBwFileList, outputFileName, fragmentLength, - func, funcArgs, tileSize=25, region=None, numberOfProcessors=None, + func, funcArgs, tileSize=25, region=None, blackListFileName=None, numberOfProcessors=None, format="bedgraph", extendPairedEnds=True, missingDataAsZero=False, smoothLength=0, fixed_step=False): r""" @@ -207,6 +207,7 @@ def writeBedGraph( chromNamesAndSize, genomeChunkLength=genomeChunkLength, region=region, + blackListFileName=blackListFileName, numberOfProcessors=numberOfProcessors) # concatenate intermediary bedgraph files diff --git a/docs/content/tools/bamCoverage.rst b/docs/content/tools/bamCoverage.rst index 2c038d9807..0d797fb4d7 100644 --- a/docs/content/tools/bamCoverage.rst +++ b/docs/content/tools/bamCoverage.rst @@ -76,7 +76,10 @@ Regular bigWig track Separate tracks for each strand ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Sometimes it makes sense to generate two independent :ref:`bigWig` files for all reads on the forward and reverse strand, respectively. +Sometimes it makes sense to generate two independent :ref:`bigWig` files for all reads on the forward and reverse strand, respectively. As of deepTools version 2.2, one can simply use the ``--filterRNAstrand`` option, such as ``--filterRNAstrand forward`` or ``--filterRNAstrand reverse``. This handles paired-end and single-end datasets. For older versions of deepTools, please see the instructions below. + +Versions before 2.2 +******************* To follow the examples, you need to know that ``-f`` will tell ``samtools view`` to **include** reads with the indicated flag, while ``-F`` will lead to the **exclusion** of reads with the respective flag. diff --git a/galaxy/wrapper/bamCoverage.xml b/galaxy/wrapper/bamCoverage.xml index 1bcca7e4a3..58fb84ddb2 100644 --- a/galaxy/wrapper/bamCoverage.xml +++ b/galaxy/wrapper/bamCoverage.xml @@ -46,6 +46,10 @@ #if str($advancedOpt.ignoreForNormalization).strip() != '': --ignoreForNormalization $advancedOpt.ignoreForNormalization #end if + + #if str($advancedOpt.filterRNAstrand) != 'no': + --filterRNAstrand '$advancedOpt.filterRNAstrand' + #end if #end if ]]> @@ -105,6 +109,14 @@ label="Determine nucleosome positions from MNase-seq data" help="Only the 3 nucleotides at the center of each fragment are counted. The fragment ends are defined by the two mate reads. *NOTE*: Requires paired-end data." /> + + + + + + @@ -153,6 +165,14 @@ + + + + + + + + @@ -118,6 +120,14 @@ label="Distance downstream of the region end position" help="If the regions are genes, this would be the distance downstream of the transcription end site."/> + + diff --git a/galaxy/wrapper/plotHeatmap.xml b/galaxy/wrapper/plotHeatmap.xml index d178c9cb1a..021d794024 100644 --- a/galaxy/wrapper/plotHeatmap.xml +++ b/galaxy/wrapper/plotHeatmap.xml @@ -83,6 +83,10 @@ --plotTitle '$advancedOpt.plotTitle' #end if + #if $advancedOpt.outFileNameData: + --outFileNameData $outFileNameData + #end if + $advancedOpt.perGroup @KMEANS_CLUSTERING@ @@ -126,6 +130,8 @@ + + + outFileNameData is True + diff --git a/galaxy/wrapper/plotPCA.xml b/galaxy/wrapper/plotPCA.xml index dfdabf38ba..d7c731d171 100644 --- a/galaxy/wrapper/plotPCA.xml +++ b/galaxy/wrapper/plotPCA.xml @@ -12,15 +12,22 @@ --plotTitle "$plotTitle" --plotFile "$outFileName" --plotFileFormat "$outFileFormat" + #if $outFileNameDataBool + --outFileNameData "$outFileNameData" + #end if ]]> + + + outFileNameDataBool is True + @@ -29,6 +36,14 @@ + + + + + + + +