From 304c261af684889ef6e57ca95620398c3f27c1a0 Mon Sep 17 00:00:00 2001 From: Fidel Ramirez Date: Tue, 26 Jan 2016 23:46:58 +0100 Subject: [PATCH 01/55] added support to filter rna reads by strand --- deeptools/bamCoverage.py | 78 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/deeptools/bamCoverage.py b/deeptools/bamCoverage.py index 7d5a2aa0c..72f55b805 100644 --- a/deeptools/bamCoverage.py +++ b/deeptools/bamCoverage.py @@ -76,6 +76,12 @@ def get_optional_args(): '*NOTE*: Requires paired-end data. A bin size of 1 is recommended.', action='store_true') + optional.add_argument('--filterRNAstrand', + help='Selects RNA-seq reads (single-end or paired-end) in ' + 'the given strand.', + choices=['forward', 'reverse'], + default=None) + return parser @@ -173,6 +179,7 @@ def main(args=None): debug = 0 func_args = {'scaleFactor': get_scale_factor(args)} + if args.MNase: # check that library is paired end # using getFragmentAndReadSize @@ -199,6 +206,23 @@ def main(args=None): verbose=args.verbose, ) + elif args.filterRNAstrand: + wr = filter_rna_strand([args.bam], + binLength=args.binSize, + stepSize=args.binSize, + region=args.region, + numberOfProcessors=args.numberOfProcessors, + extendReads=args.extendReads, + minMappingQuality=args.minMappingQuality, + ignoreDuplicates=args.ignoreDuplicates, + center_read=args.centerReads, + zerosToNans=args.skipNonCoveredRegions, + samFlag_include=args.samFlagInclude, + samFlag_exclude=args.samFlagExclude, + verbose=args.verbose, + ) + + wr.filter_strand = args.filterRNAstrand else: wr = writeBedGraph.WriteBedGraph([args.bam], binLength=args.binSize, @@ -248,3 +272,57 @@ def get_fragment_from_read(self, read): fragment_end = fragment_start + 3 return [(fragment_start, fragment_end)] + + +class filterRnaStrand(writeBedGraph.WriteBedGraph): + """ + Class to redefine the get_fragment_from_read for the --filterRNAstrand case + + Only reads either forward or reverse are kept as follows: + + For paired-end + -------------- + reads forward: + + 1. alignments of the second in pair (128) if they map to the forward strand (~16) + 2. alignments of the first in pair (64) if they map to the reverse strand (~32) + + 1. include 128, exclude 16 + or + 2. include 64 exclude 32 + + reads reverse: + 1. alignments of the second in pair (128) if it maps to the reverse strand (16) 128 & 16 = 144 + 2. alignments of the first in pair (64) if their mates map to the reverse strand (32) 64 & 32 = 96 + + 1. include 144 + or + 2. include 96 + + For single-end + -------------- + forward: include 16 (map forward strand) + reverse: exclude 16 + + """ + + def get_fragment_from_read(self, read): + """ + Gets only reads for the given strand + """ + fragment_start = fragment_end = None + + # only paired forward reads are considered + if read.is_paired: + if self.filter_strand == 'forward': + if (read.flag & 128 == 128 and read.flag & 16 == 0) or (read.flag & 64 == 64 and read.flag & 32 == 0): + return read.get_blocks() + else: + if read.flag & 144 == 144 or read.flag & 96 == 96: + return read.get_blocks() + else: + if self.filter_strand == 'forward' and read.flag & 16 == 16: + return read.get_blocks() + elif self.filter_strand == 'reverse' and read.flag & 16 == 0: + return read.get_blocks() + From 688aff342c9b4574c72fa8d9b86f2eee9599a16e Mon Sep 17 00:00:00 2001 From: Fidel Ramirez Date: Tue, 26 Jan 2016 23:52:09 +0100 Subject: [PATCH 02/55] typos --- deeptools/bamCoverage.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/deeptools/bamCoverage.py b/deeptools/bamCoverage.py index 72f55b805..5efce855e 100644 --- a/deeptools/bamCoverage.py +++ b/deeptools/bamCoverage.py @@ -207,7 +207,7 @@ def main(args=None): ) elif args.filterRNAstrand: - wr = filter_rna_strand([args.bam], + wr = filterRnaStrand([args.bam], binLength=args.binSize, stepSize=args.binSize, region=args.region, @@ -326,3 +326,4 @@ def get_fragment_from_read(self, read): elif self.filter_strand == 'reverse' and read.flag & 16 == 0: return read.get_blocks() + return [(fragment_start, fragment_end)] \ No newline at end of file From dea7a3acbc61f0bcb21c5c7f9164fb6338f8e746 Mon Sep 17 00:00:00 2001 From: Devon Ryan Date: Fri, 29 Jan 2016 21:07:20 +0100 Subject: [PATCH 03/55] Oh PEP8, how I loathe thee --- deeptools/bamCoverage.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/deeptools/bamCoverage.py b/deeptools/bamCoverage.py index 5efce855e..8579f7913 100644 --- a/deeptools/bamCoverage.py +++ b/deeptools/bamCoverage.py @@ -208,19 +208,19 @@ def main(args=None): elif args.filterRNAstrand: wr = filterRnaStrand([args.bam], - binLength=args.binSize, - stepSize=args.binSize, - region=args.region, - numberOfProcessors=args.numberOfProcessors, - extendReads=args.extendReads, - minMappingQuality=args.minMappingQuality, - ignoreDuplicates=args.ignoreDuplicates, - center_read=args.centerReads, - zerosToNans=args.skipNonCoveredRegions, - samFlag_include=args.samFlagInclude, - samFlag_exclude=args.samFlagExclude, - verbose=args.verbose, - ) + binLength=args.binSize, + stepSize=args.binSize, + region=args.region, + numberOfProcessors=args.numberOfProcessors, + extendReads=args.extendReads, + minMappingQuality=args.minMappingQuality, + ignoreDuplicates=args.ignoreDuplicates, + center_read=args.centerReads, + zerosToNans=args.skipNonCoveredRegions, + samFlag_include=args.samFlagInclude, + samFlag_exclude=args.samFlagExclude, + verbose=args.verbose, + ) wr.filter_strand = args.filterRNAstrand else: @@ -326,4 +326,4 @@ def get_fragment_from_read(self, read): elif self.filter_strand == 'reverse' and read.flag & 16 == 0: return read.get_blocks() - return [(fragment_start, fragment_end)] \ No newline at end of file + return [(fragment_start, fragment_end)] From 57bfa5aade79e2f2457283af33a12d97680977c6 Mon Sep 17 00:00:00 2001 From: dpryan79 Date: Tue, 9 Feb 2016 15:25:07 +0100 Subject: [PATCH 04/55] Add initial support for a BED file with blacklist regions. computeGCBias and correctGCBias have yet to be updated. None of this has been tested, of course. --- deeptools/SES_scaleFactor.py | 5 +++- deeptools/bamCompare.py | 3 ++ deeptools/bamCoverage.py | 4 +++ deeptools/bamPEFragmentSize.py | 6 ++++ deeptools/bigwigCompare.py | 1 + deeptools/computeMatrix.py | 7 ++++- deeptools/correctReadCounts.py | 5 +++- deeptools/countReadsPerBin.py | 17 ++++++++++++ deeptools/getFragmentAndReadSize.py | 3 +- deeptools/getScorePerBigWigBin.py | 4 ++- deeptools/heatmapper.py | 15 ++++++++-- deeptools/mapReduce.py | 40 +++++++++++++++++++++++++++ deeptools/multiBamSummary.py | 1 + deeptools/multiBigwigSummary.py | 1 + deeptools/parserCommon.py | 8 +++++- deeptools/plotCoverage.py | 1 + deeptools/plotFingerprint.py | 1 + deeptools/writeBedGraph.py | 3 +- deeptools/writeBedGraph_bam_and_bw.py | 3 +- 19 files changed, 118 insertions(+), 10 deletions(-) diff --git a/deeptools/SES_scaleFactor.py b/deeptools/SES_scaleFactor.py index af602a889..743434634 100644 --- a/deeptools/SES_scaleFactor.py +++ b/deeptools/SES_scaleFactor.py @@ -13,7 +13,7 @@ def estimateScaleFactor(bamFilesList, binLength, numberOfSamples, normalizationLength, - avg_method='median', numberOfProcessors=1, + avg_method='median', blackListFile=None, numberOfProcessors=1, verbose=False, chrsToSkip=[]): r""" Subdivides the genome into chunks to be analyzed in parallel @@ -41,6 +41,8 @@ def estimateScaleFactor(bamFilesList, binLength, numberOfSamples, chrsToSkip : list name of the chromosomes to be excluded from the scale estimation. Usually the chrX is included. + blackListFile : str + BED file containing blacklisted regions Returns ------- @@ -84,6 +86,7 @@ def estimateScaleFactor(bamFilesList, binLength, numberOfSamples, binLength=binLength, numberOfSamples=numberOfSamples, extendReads=False, + blackListFile=blackListFile, numberOfProcessors=numberOfProcessors, verbose=verbose, chrsToSkip=chrsToSkip) diff --git a/deeptools/bamCompare.py b/deeptools/bamCompare.py index f411f706b..7397c227f 100644 --- a/deeptools/bamCompare.py +++ b/deeptools/bamCompare.py @@ -167,6 +167,7 @@ def get_scale_factors(args): [bam1.filename, bam2.filename], args.sampleLength, args.numberOfSamples, 1, + blackListFile=args.blackListFile, numberOfProcessors=args.numberOfProcessors, verbose=args.verbose, chrsToSkip=args.ignoreForNormalization) @@ -214,6 +215,7 @@ def get_scale_factors(args): from deeptools.getFragmentAndReadSize import get_read_and_fragment_length frag_len_dict, read_len_dict = get_read_and_fragment_length(bamfile, return_lengths=False, + blackListFile=args.blackListFile, numberOfProcessors=args.numberOfProcessors, verbose=args.verbose) if args.extendReads: @@ -295,6 +297,7 @@ def main(args=None): region=args.region, numberOfProcessors=args.numberOfProcessors, extendReads=args.extendReads, + blackListFile=args.blackListFile, minMappingQuality=args.minMappingQuality, ignoreDuplicates=args.ignoreDuplicates, center_read=args.centerReads, diff --git a/deeptools/bamCoverage.py b/deeptools/bamCoverage.py index ef4cc3764..01da07f82 100644 --- a/deeptools/bamCoverage.py +++ b/deeptools/bamCoverage.py @@ -116,6 +116,7 @@ def get_scale_factor(args): from deeptools.getFragmentAndReadSize import get_read_and_fragment_length frag_len_dict, read_len_dict = get_read_and_fragment_length(args.bam, return_lengths=False, + blackListFile=args.blackListFile, numberOfProcessors=args.numberOfProcessors, verbose=args.verbose) if args.extendReads: @@ -180,6 +181,7 @@ def main(args=None): from deeptools.getFragmentAndReadSize import get_read_and_fragment_length frag_len_dict, read_len_dict = get_read_and_fragment_length(args.bam, return_lengths=False, + blackListFile=args.blackListFile, numberOfProcessors=args.numberOfProcessors, verbose=args.verbose) if frag_len_dict is None: @@ -189,6 +191,7 @@ def main(args=None): binLength=args.binSize, stepSize=args.binSize, region=args.region, + blackListFile=args.blackListFile, numberOfProcessors=args.numberOfProcessors, extendReads=args.extendReads, minMappingQuality=args.minMappingQuality, @@ -205,6 +208,7 @@ def main(args=None): binLength=args.binSize, stepSize=args.binSize, region=args.region, + blackListFile=args.blackListFile, numberOfProcessors=args.numberOfProcessors, extendReads=args.extendReads, minMappingQuality=args.minMappingQuality, diff --git a/deeptools/bamPEFragmentSize.py b/deeptools/bamPEFragmentSize.py index bfb200849..e0b442120 100644 --- a/deeptools/bamPEFragmentSize.py +++ b/deeptools/bamPEFragmentSize.py @@ -38,6 +38,11 @@ def parse_arguments(): help='Title of the plot, to be printed on top of ' 'the generated image. Leave blank for no title.', default='') + parser.add_argument('--blackList', '-bl', + help="A BED file containing regions that should be excluded from all analyses. Currently this works by rejecting genomic chunks that happen to overlap an entry. Consequently, for BAM files, if a read partially overlaps a blacklisted region or a fragment spans over it, then the read/fragment might still be considered." + metavar="BED file", + required=False) + parser.add_argument('--verbose', help='Set if processing data messages are wanted.', action='store_true', @@ -52,6 +57,7 @@ def parse_arguments(): def main(args=None): args = parse_arguments().parse_args(args) fragment_len_dict, read_len_dict = get_read_and_fragment_length(args.bam, return_lengths=True, + blackListFile=args.blackListFile, numberOfProcessors=args.numberOfProcessors, verbose=args.verbose) diff --git a/deeptools/bigwigCompare.py b/deeptools/bigwigCompare.py index ad501f81d..f7bafa154 100644 --- a/deeptools/bigwigCompare.py +++ b/deeptools/bigwigCompare.py @@ -98,6 +98,7 @@ def main(args=None): (args.bigwig2, 'bigwig')], args.outFileName, 0, FUNC, function_args, tileSize=args.binSize, region=args.region, + blackListFile=args.blackListFile, numberOfProcessors=args.numberOfProcessors, format=args.outFileFormat, smoothLength=False, diff --git a/deeptools/computeMatrix.py b/deeptools/computeMatrix.py index 3ff40a899..6bda61e55 100644 --- a/deeptools/computeMatrix.py +++ b/deeptools/computeMatrix.py @@ -279,6 +279,11 @@ def computeMatrixOptArgs(case=['scale-regions', 'reference-point'][0]): '(e.g. micro satellites) that may bias the average ' 'values.') + optional.add_argument('--blackList', '-bl', + help="A BED file containing regions that should be excluded from all analyses. Currently this works by rejecting genomic chunks that happen to overlap an entry. Consequently, for BAM files, if a read partially overlaps a blacklisted region or a fragment spans over it, then the read/fragment might still be considered." + metavar="BED file", + required=False) + # in contrast to other tools, # computeMatrix by default outputs # messages and the --quiet flag supresses them @@ -366,7 +371,7 @@ def main(args=None): hm = heatmapper.heatmapper() scores_file_list = args.scoreFileName - hm.computeMatrix(scores_file_list, bed_file, parameters, verbose=args.verbose) + hm.computeMatrix(scores_file_list, bed_file, parameters, blackListFile=args.blackListFile, verbose=args.verbose) if args.sortRegions != 'no': hm.matrix.sort_groups(sort_using=args.sortUsing, sort_method=args.sortRegions) diff --git a/deeptools/correctReadCounts.py b/deeptools/correctReadCounts.py index 9489fbb24..7ea53df0b 100644 --- a/deeptools/correctReadCounts.py +++ b/deeptools/correctReadCounts.py @@ -64,7 +64,7 @@ def computeCorrectedReadcounts(tileCoverage, args): def correctReadCounts(bamFilesList, binLength, numberOfSamples, defaultFragmentLength, outFileName, outFileFormat, outFileNameCorr=None, region=None, extendPairedEnds=True, - numberOfProcessors=1, Nsigmas=2, maxSignalRatio=10, verbose=False): + numberOfProcessors=1, Nsigmas=2, maxSignalRatio=10, blackListFile=None, verbose=False): bam1 = writeBedGraph.openBam(bamFilesList[0]) bam2 = writeBedGraph.openBam(bamFilesList[1]) @@ -81,6 +81,7 @@ def correctReadCounts(bamFilesList, binLength, numberOfSamples, defaultFragmentL numberOfSamples, defaultFragmentLength, 1, + blackListFile=blackListFile, numberOfProcessors=numberOfProcessors, verbose=verbose) @@ -152,6 +153,7 @@ def correctReadCounts(bamFilesList, binLength, numberOfSamples, defaultFragmentL funcArgs, tileSize=binLength, region=region, format=outFileFormat, zerosToNans=False, + blackListFile=blackListFile, numberOfProcessors=numberOfProcessors, extendPairedEnds=extendPairedEnds) @@ -162,6 +164,7 @@ def correctReadCounts(bamFilesList, binLength, numberOfSamples, defaultFragmentL funcArgs, tileSize=binLength, region=region, format=outFileFormat, zerosToNans=False, + blackListFile=blackListFile, numberOfProcessors=numberOfProcessors, extendPairedEnds=extendPairedEnds) diff --git a/deeptools/countReadsPerBin.py b/deeptools/countReadsPerBin.py index c420f13d3..24f4ebc4e 100644 --- a/deeptools/countReadsPerBin.py +++ b/deeptools/countReadsPerBin.py @@ -59,6 +59,9 @@ class CountReadsPerBin(object): File handle of a bed file containing the regions for wich to compute the coverage. This option overrules ``binLength``, ``numberOfSamples`` and ``stepSize``. + blackListFile : str + A string containing a BED file with blacklist regions. + extendReads : bool, int Whether coverage should be computed for the extended read length (i.e. the region covered @@ -136,6 +139,7 @@ class CountReadsPerBin(object): def __init__(self, bamFilesList, binLength=50, numberOfSamples=None, numberOfProcessors=1, verbose=False, region=None, bedFile=None, extendReads=False, + blackListFile=None, minMappingQuality=None, ignoreDuplicates=False, chrsToSkip=[], @@ -150,11 +154,15 @@ def __init__(self, bamFilesList, binLength=50, numberOfSamples=None, numberOfPro self.bamFilesList = bamFilesList self.binLength = binLength self.numberOfSamples = numberOfSamples + self.blackList = None + if blackListFile: + self.blackList = mapReduce.BED_to_interval_tree(blackListFile) if extendReads and len(bamFilesList): from deeptools.getFragmentAndReadSize import get_read_and_fragment_length frag_len_dict, read_len_dict = get_read_and_fragment_length(bamFilesList[0], return_lengths=False, + blackListFile=blackListFile, numberOfProcessors=numberOfProcessors, verbose=verbose) if extendReads is True: @@ -266,6 +274,7 @@ def run(self): self_=self, genomeChunkLength=chunkSize, bedFile=self.bedFile, + blackListFile=blackListFile, region=self.region, numberOfProcessors=self.numberOfProcessors) @@ -364,11 +373,15 @@ def count_reads_in_region(self, chrom, start, end, bed_regions_list=None): regionsToConsider = [] if bed_regions_list is not None: for chrom, start, end in bed_regions_list: + if mapReduce.blOverlap(self.blackList, [chrom, start, end]): + continue regionsToConsider.append((chrom, start, end, end - start)) else: for i in xrange(start, end, self.stepSize): if i + self.binLength > end: break + if mapReduce.blOverlap(self.blackList, [chrom, i, i + self.binLength]): + continue regionsToConsider.append((chrom, i, i + self.binLength, self.binLength)) if self.save_data: @@ -451,6 +464,10 @@ def get_coverage_of_region(self, bamHandle, chrom, start, end, tileSize, vector_length = length / tileSize coverage = np.zeros(vector_length, dtype='float64') + # Return 0 for overlap with a blacklisted region + if mapReduce.blOverlap(self.blackList, [chrom, start, end]): + return coverge + start_time = time.time() # caching seems faster. TODO: profile the function c = 0 diff --git a/deeptools/getFragmentAndReadSize.py b/deeptools/getFragmentAndReadSize.py index 21656b0f9..73cd6967f 100644 --- a/deeptools/getFragmentAndReadSize.py +++ b/deeptools/getFragmentAndReadSize.py @@ -52,7 +52,7 @@ def getFragmentLength_worker(chrom, start, end, bamFile): return reads -def get_read_and_fragment_length(bamFile, return_lengths=False, +def get_read_and_fragment_length(bamFile, return_lengths=False, blackListFile=None numberOfProcessors=None, verbose=False): """ Estimates the fragment length and read length through sampling @@ -82,6 +82,7 @@ def get_read_and_fragment_length(bamFile, return_lengths=False, getFragmentLength_wrapper, chrom_sizes, genomeChunkLength=chunk_size, + blackListFile=blackListFile, numberOfProcessors=numberOfProcessors, verbose=verbose) diff --git a/deeptools/getScorePerBigWigBin.py b/deeptools/getScorePerBigWigBin.py index 52ffaef98..419f15b47 100644 --- a/deeptools/getScorePerBigWigBin.py +++ b/deeptools/getScorePerBigWigBin.py @@ -20,7 +20,7 @@ def countFragmentsInRegions_worker(chrom, start, end, bigWigFiles, stepSize, binLength, save_data, - bedRegions=None, + bedRegions=None ): """ returns the average score in each bigwig file at each 'stepSize' position within the interval start, end for a 'binLength' window. @@ -179,6 +179,7 @@ def getScorePerBin(bigWigFiles, binLength, numberOfProcessors=1, verbose=False, region=None, bedFile=None, + blackListFile=None, stepSize=None, chrsToSkip=[], out_file_for_raw_data=None): @@ -236,6 +237,7 @@ def getScorePerBin(bigWigFiles, binLength, chrom_sizes, genomeChunkLength=chunkSize, bedFile=bedFile, + blackListFile=blackListFile, region=region, numberOfProcessors=numberOfProcessors) diff --git a/deeptools/heatmapper.py b/deeptools/heatmapper.py index bfcf9b7fa..4c82b3911 100644 --- a/deeptools/heatmapper.py +++ b/deeptools/heatmapper.py @@ -27,8 +27,9 @@ def __init__(self): self.lengthDict = None self.matrix = None self.regions = None + self.blackList = None - def computeMatrix(self, score_file_list, regions_file, parameters, verbose=False): + def computeMatrix(self, score_file_list, regions_file, parameters, blackListFile=None, verbose=False): """ Splits into multiple cores the computation of the scores @@ -54,7 +55,7 @@ def computeMatrix(self, score_file_list, regions_file, parameters, verbose=False exit("Length of region before the body has to be a multiple of " "--binSize\nCurrent value is {}\n".format(parameters['upstream'])) - regions, group_labels = self.get_regions_and_groups(regions_file, verbose=verbose) + regions, group_labels = self.get_regions_and_groups(regions_file, blackListFile=blackListFile, verbose=verbose) # args to pass to the multiprocessing workers mp_args = [] @@ -771,6 +772,7 @@ def matrix_avg(matrix, avgType='mean'): @staticmethod def get_regions_and_groups(regions_file, onlyMultiplesOf=1, default_group_name='genes', + blackListFile=None, verbose=None): """ Reads a bed file. @@ -790,6 +792,10 @@ def get_regions_and_groups(regions_file, onlyMultiplesOf=1, group_labels = [] group_idx = 0 bed_file = deeptools.readBed.ReadBed(regions_file) + blackList = None + if blackListFile is not None: + blackList = BED_to_interval_tree(blackListFile) + for ginterval in bed_file: totalintervals += 1 if ginterval.line.startswith("track") or ginterval.line.startswith("browser"): @@ -809,6 +815,11 @@ def get_regions_and_groups(regions_file, onlyMultiplesOf=1, group_labels.append(label) continue + + # Exclude blacklist overlaps + if mapReduce.blOverlap(blackList, [ginterval.chrom, ginterval.start, ginterval.end]): + continue + # if the list of regions is to big, only # consider a fraction of the data # if totalintervals % onlyMultiplesOf != 0: diff --git a/deeptools/mapReduce.py b/deeptools/mapReduce.py index de7aa9e17..fa93db45b 100644 --- a/deeptools/mapReduce.py +++ b/deeptools/mapReduce.py @@ -7,6 +7,7 @@ def mapReduce(staticArgs, func, chromSize, genomeChunkLength=None, region=None, bedFile=None, + blackListFile=None, numberOfProcessors=4, verbose=False, self_=None): @@ -39,6 +40,7 @@ def mapReduce(staticArgs, func, chromSize, :param bedFile: Is a bed file is given, the args to the func to be called are extended to include a list of bed defined regions. + :param blackListFile: A list of regions to exclude from all computations. :param self_: In case mapreduce should make a call to an object the self variable has to be passed. """ @@ -81,6 +83,9 @@ def mapReduce(staticArgs, func, chromSize, if bed_in_region[-1].end > chromSize[0][1]: chromSize[0] = (chromSize[0][0], bed_in_region[-1].end) + if blackListFile: + blacklist = BED_to_interval_tree(blackListFile) + TASKS = [] # iterate over all chromosomes for chrom, size in chromSize: @@ -88,6 +93,12 @@ def mapReduce(staticArgs, func, chromSize, start = 0 if region_start == 0 else region_start for startPos in xrange(start, size, genomeChunkLength): endPos = min(size, startPos + genomeChunkLength) + + # Reject a chunk if it overlaps + if blackListFile: + if blOverlap(blackList, [chrom, startPos, endPos]): + continue + if self_ is not None: argsList = [self_] else: @@ -236,3 +247,32 @@ def BED_to_interval_tree(BED_file): bed_interval_tree[chrom].add_interval(Interval(start_bed, end_bed)) return bed_interval_tree + +def blOverlap(t, chunk): + """ + Test for an overlap between an IntervalTree and a given genomic chunk. + + This attempts to account for differences in chromosome naming. + + Returns True on an overlap, otherwise false + """ + + if t is None: + return False + + chrom = chunk[0] + if chrom not in t.keys(): + chrom2 = "chr" + chrom + if chrom2 in t.keys(): + chrom = chrom2 + elif len(chrom)>3: + chrom2 = chrom[3:] + if chrom2 in t.keys(): + chrom = chrom2 + else: + return False + + if len(t[chrom].find(chunk[1], chunk[2])): + return True + + return False diff --git a/deeptools/multiBamSummary.py b/deeptools/multiBamSummary.py index 760f2c67c..2ecf30400 100644 --- a/deeptools/multiBamSummary.py +++ b/deeptools/multiBamSummary.py @@ -206,6 +206,7 @@ def main(args=None): verbose=args.verbose, region=args.region, bedFile=bed_regions, + blackListFile=args.blackListFile, extendReads=args.extendReads, minMappingQuality=args.minMappingQuality, ignoreDuplicates=args.ignoreDuplicates, diff --git a/deeptools/multiBigwigSummary.py b/deeptools/multiBigwigSummary.py index 8946cab5b..da0403b19 100644 --- a/deeptools/multiBigwigSummary.py +++ b/deeptools/multiBigwigSummary.py @@ -206,6 +206,7 @@ def main(args=None): num_reads_per_bin = score_bw.getScorePerBin( args.bwfiles, args.binSize, + blackListFile=args.blackListFile, numberOfProcessors=args.numberOfProcessors, stepSize=args.binSize + args.distanceBetweenBins, verbose=args.verbose, diff --git a/deeptools/parserCommon.py b/deeptools/parserCommon.py index ef33207b1..37dcd9752 100644 --- a/deeptools/parserCommon.py +++ b/deeptools/parserCommon.py @@ -177,7 +177,7 @@ def normalization_options(): return parser -def getParentArgParse(args=None, binSize=True): +def getParentArgParse(args=None, binSize=True, blackList=True): """ Typical arguments for several tools """ @@ -206,6 +206,12 @@ def getParentArgParse(args=None, binSize=True): required=False, type=genomicRegion) + if blackList: + optional.add_argument('--blackList', '-bl', + help="A BED file containing regions that should be excluded from all analyses. Currently this works by rejecting genomic chunks that happen to overlap an entry. Consequently, for BAM files, if a read partially overlaps a blacklisted region or a fragment spans over it, then the read/fragment might still be considered." + metavar="BED file", + required=False) + optional.add_argument('--numberOfProcessors', '-p', help='Number of processors to use. Type "max/2" to ' 'use half the maximum number of processors or "max" ' diff --git a/deeptools/plotCoverage.py b/deeptools/plotCoverage.py index a49512bd1..eb4adb16c 100644 --- a/deeptools/plotCoverage.py +++ b/deeptools/plotCoverage.py @@ -128,6 +128,7 @@ def main(args=None): numberOfProcessors=args.numberOfProcessors, verbose=args.verbose, region=args.region, + blackListFile=args.blackListFile, extendReads=args.extendReads, minMappingQuality=args.minMappingQuality, ignoreDuplicates=args.ignoreDuplicates, diff --git a/deeptools/plotFingerprint.py b/deeptools/plotFingerprint.py index 4ea2cb8ab..9d46dadde 100644 --- a/deeptools/plotFingerprint.py +++ b/deeptools/plotFingerprint.py @@ -139,6 +139,7 @@ def main(args=None): args.bamfiles, args.binSize, args.numberOfSamples, + blackListFile=args.blackListFile, numberOfProcessors=args.numberOfProcessors, verbose=args.verbose, region=args.region, diff --git a/deeptools/writeBedGraph.py b/deeptools/writeBedGraph.py index e7d315e06..e32bc7dcb 100644 --- a/deeptools/writeBedGraph.py +++ b/deeptools/writeBedGraph.py @@ -88,7 +88,7 @@ class WriteBedGraph(cr.CountReadsPerBin): """ - def run(self, func_to_call, func_args, out_file_name, format="bedgraph", smoothLength=0): + def run(self, func_to_call, func_args, out_file_name, blackListFile=None, format="bedgraph", smoothLength=0): r""" Given a list of bamfiles, a function and a function arguments, this method writes a bedgraph file (or bigwig) file @@ -134,6 +134,7 @@ def run(self, func_to_call, func_args, out_file_name, format="bedgraph", smoothL self_=self, genomeChunkLength=genome_chunk_length, region=self.region, + blackListFile=blackListFile, numberOfProcessors=self.numberOfProcessors) # concatenate intermediary bedgraph files diff --git a/deeptools/writeBedGraph_bam_and_bw.py b/deeptools/writeBedGraph_bam_and_bw.py index 4aba61c83..1a73dc70f 100644 --- a/deeptools/writeBedGraph_bam_and_bw.py +++ b/deeptools/writeBedGraph_bam_and_bw.py @@ -147,7 +147,7 @@ def writeBedGraph_worker( def writeBedGraph( bamOrBwFileList, outputFileName, fragmentLength, - func, funcArgs, tileSize=25, region=None, numberOfProcessors=None, + func, funcArgs, tileSize=25, region=None, blackListFile=None, numberOfProcessors=None, format="bedgraph", extendPairedEnds=True, missingDataAsZero=False, smoothLength=0, fixed_step=False): r""" @@ -207,6 +207,7 @@ def writeBedGraph( chromNamesAndSize, genomeChunkLength=genomeChunkLength, region=region, + blackListFile=blackListFile, numberOfProcessors=numberOfProcessors) # concatenate intermediary bedgraph files From 53bf02a8c41848d3eebf94217778addeeeaaaa11 Mon Sep 17 00:00:00 2001 From: dpryan79 Date: Tue, 9 Feb 2016 16:12:31 +0100 Subject: [PATCH 05/55] PEP8 and bug fixes --- deeptools/bamPEFragmentSize.py | 4 ++-- deeptools/computeMatrix.py | 4 ++-- deeptools/countReadsPerBin.py | 3 ++- deeptools/getFragmentAndReadSize.py | 2 +- deeptools/heatmapper.py | 3 ++- deeptools/mapReduce.py | 7 ++++--- deeptools/parserCommon.py | 4 ++-- 7 files changed, 15 insertions(+), 12 deletions(-) diff --git a/deeptools/bamPEFragmentSize.py b/deeptools/bamPEFragmentSize.py index e0b442120..7d612e6eb 100644 --- a/deeptools/bamPEFragmentSize.py +++ b/deeptools/bamPEFragmentSize.py @@ -38,8 +38,8 @@ def parse_arguments(): help='Title of the plot, to be printed on top of ' 'the generated image. Leave blank for no title.', default='') - parser.add_argument('--blackList', '-bl', - help="A BED file containing regions that should be excluded from all analyses. Currently this works by rejecting genomic chunks that happen to overlap an entry. Consequently, for BAM files, if a read partially overlaps a blacklisted region or a fragment spans over it, then the read/fragment might still be considered." + parser.add_argument('--blackListFile', '-bl', + help="A BED file containing regions that should be excluded from all analyses. Currently this works by rejecting genomic chunks that happen to overlap an entry. Consequently, for BAM files, if a read partially overlaps a blacklisted region or a fragment spans over it, then the read/fragment might still be considered.", metavar="BED file", required=False) diff --git a/deeptools/computeMatrix.py b/deeptools/computeMatrix.py index 6bda61e55..29f8da011 100644 --- a/deeptools/computeMatrix.py +++ b/deeptools/computeMatrix.py @@ -279,8 +279,8 @@ def computeMatrixOptArgs(case=['scale-regions', 'reference-point'][0]): '(e.g. micro satellites) that may bias the average ' 'values.') - optional.add_argument('--blackList', '-bl', - help="A BED file containing regions that should be excluded from all analyses. Currently this works by rejecting genomic chunks that happen to overlap an entry. Consequently, for BAM files, if a read partially overlaps a blacklisted region or a fragment spans over it, then the read/fragment might still be considered." + optional.add_argument('--blackListFile', '-bl', + help="A BED file containing regions that should be excluded from all analyses. Currently this works by rejecting genomic chunks that happen to overlap an entry. Consequently, for BAM files, if a read partially overlaps a blacklisted region or a fragment spans over it, then the read/fragment might still be considered.", metavar="BED file", required=False) diff --git a/deeptools/countReadsPerBin.py b/deeptools/countReadsPerBin.py index 24f4ebc4e..f03bba053 100644 --- a/deeptools/countReadsPerBin.py +++ b/deeptools/countReadsPerBin.py @@ -155,6 +155,7 @@ def __init__(self, bamFilesList, binLength=50, numberOfSamples=None, numberOfPro self.binLength = binLength self.numberOfSamples = numberOfSamples self.blackList = None + self.blackListFile = blackListFile if blackListFile: self.blackList = mapReduce.BED_to_interval_tree(blackListFile) @@ -274,7 +275,7 @@ def run(self): self_=self, genomeChunkLength=chunkSize, bedFile=self.bedFile, - blackListFile=blackListFile, + blackListFile=self.blackListFile, region=self.region, numberOfProcessors=self.numberOfProcessors) diff --git a/deeptools/getFragmentAndReadSize.py b/deeptools/getFragmentAndReadSize.py index 73cd6967f..576d634aa 100644 --- a/deeptools/getFragmentAndReadSize.py +++ b/deeptools/getFragmentAndReadSize.py @@ -52,7 +52,7 @@ def getFragmentLength_worker(chrom, start, end, bamFile): return reads -def get_read_and_fragment_length(bamFile, return_lengths=False, blackListFile=None +def get_read_and_fragment_length(bamFile, return_lengths=False, blackListFile=None, numberOfProcessors=None, verbose=False): """ Estimates the fragment length and read length through sampling diff --git a/deeptools/heatmapper.py b/deeptools/heatmapper.py index 4c82b3911..3a059bf40 100644 --- a/deeptools/heatmapper.py +++ b/deeptools/heatmapper.py @@ -10,6 +10,7 @@ import pyBigWig import deeptools.readBed +from deeptools import mapReduce def compute_sub_matrix_wrapper(args): @@ -794,7 +795,7 @@ def get_regions_and_groups(regions_file, onlyMultiplesOf=1, bed_file = deeptools.readBed.ReadBed(regions_file) blackList = None if blackListFile is not None: - blackList = BED_to_interval_tree(blackListFile) + blackList = mapReduce.BED_to_interval_tree(blackListFile) for ginterval in bed_file: totalintervals += 1 diff --git a/deeptools/mapReduce.py b/deeptools/mapReduce.py index fa93db45b..9362c5374 100644 --- a/deeptools/mapReduce.py +++ b/deeptools/mapReduce.py @@ -84,7 +84,7 @@ def mapReduce(staticArgs, func, chromSize, chromSize[0] = (chromSize[0][0], bed_in_region[-1].end) if blackListFile: - blacklist = BED_to_interval_tree(blackListFile) + blackList = BED_to_interval_tree(blackListFile) TASKS = [] # iterate over all chromosomes @@ -94,7 +94,7 @@ def mapReduce(staticArgs, func, chromSize, for startPos in xrange(start, size, genomeChunkLength): endPos = min(size, startPos + genomeChunkLength) - # Reject a chunk if it overlaps + # Reject a chunk if it overlaps if blackListFile: if blOverlap(blackList, [chrom, startPos, endPos]): continue @@ -248,6 +248,7 @@ def BED_to_interval_tree(BED_file): return bed_interval_tree + def blOverlap(t, chunk): """ Test for an overlap between an IntervalTree and a given genomic chunk. @@ -265,7 +266,7 @@ def blOverlap(t, chunk): chrom2 = "chr" + chrom if chrom2 in t.keys(): chrom = chrom2 - elif len(chrom)>3: + elif len(chrom) > 3: chrom2 = chrom[3:] if chrom2 in t.keys(): chrom = chrom2 diff --git a/deeptools/parserCommon.py b/deeptools/parserCommon.py index 37dcd9752..daeb48a1c 100644 --- a/deeptools/parserCommon.py +++ b/deeptools/parserCommon.py @@ -207,8 +207,8 @@ def getParentArgParse(args=None, binSize=True, blackList=True): type=genomicRegion) if blackList: - optional.add_argument('--blackList', '-bl', - help="A BED file containing regions that should be excluded from all analyses. Currently this works by rejecting genomic chunks that happen to overlap an entry. Consequently, for BAM files, if a read partially overlaps a blacklisted region or a fragment spans over it, then the read/fragment might still be considered." + optional.add_argument('--blackListFile', '-bl', + help="A BED file containing regions that should be excluded from all analyses. Currently this works by rejecting genomic chunks that happen to overlap an entry. Consequently, for BAM files, if a read partially overlaps a blacklisted region or a fragment spans over it, then the read/fragment might still be considered.", metavar="BED file", required=False) From 4439c59d69ba1014d5595118e519801ae0c85621 Mon Sep 17 00:00:00 2001 From: Devon Ryan Date: Tue, 9 Feb 2016 19:43:46 +0100 Subject: [PATCH 06/55] bug fix --- deeptools/getFragmentAndReadSize.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deeptools/getFragmentAndReadSize.py b/deeptools/getFragmentAndReadSize.py index 374ae86c6..47c8f75be 100644 --- a/deeptools/getFragmentAndReadSize.py +++ b/deeptools/getFragmentAndReadSize.py @@ -55,8 +55,8 @@ def getFragmentLength_worker(chrom, start, end, bamFile, distanceBetweenBins): def get_read_and_fragment_length(bamFile, return_lengths=False, blackListFile=None, - numberOfProcessors=None, verbose=False - binSize=50000, distanceBetweenBins=1000000): + binSize=50000, distanceBetweenBins=1000000, + numberOfProcessors=None, verbose=False): """ Estimates the fragment length and read length through sampling From 28215c8b5f7fd994202144632a63ebbe3536f96a Mon Sep 17 00:00:00 2001 From: dpryan79 Date: Wed, 10 Feb 2016 14:06:49 +0100 Subject: [PATCH 07/55] computeGCBias already has a blacklist option, correctGCBias doesn't really need one. --- deeptools/computeGCBias.py | 2 +- deeptools/correctGCBias.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/deeptools/computeGCBias.py b/deeptools/computeGCBias.py index 8011cfe2e..0c6988766 100644 --- a/deeptools/computeGCBias.py +++ b/deeptools/computeGCBias.py @@ -22,7 +22,7 @@ def parse_arguments(args=None): - parentParser = parserCommon.getParentArgParse(binSize=False) + parentParser = parserCommon.getParentArgParse(binSize=False, blackList=False) requiredArgs = getRequiredArgs() parser = argparse.ArgumentParser( parents=[requiredArgs, parentParser], diff --git a/deeptools/correctGCBias.py b/deeptools/correctGCBias.py index cec5fd2f4..e6dbbf821 100644 --- a/deeptools/correctGCBias.py +++ b/deeptools/correctGCBias.py @@ -21,7 +21,7 @@ def parse_arguments(args=None): - parentParser = parserCommon.getParentArgParse(binSize=True) + parentParser = parserCommon.getParentArgParse(binSize=True, blackList=False) requiredArgs = getRequiredArgs() parser = argparse.ArgumentParser( parents=[requiredArgs, parentParser], From 2e795e9ae9387ed4c4f468068bc2dc9eccab84b4 Mon Sep 17 00:00:00 2001 From: dpryan79 Date: Thu, 11 Feb 2016 10:48:22 +0100 Subject: [PATCH 08/55] All tools that need it now accept --blackListFileName --- deeptools/SES_scaleFactor.py | 6 +++--- deeptools/bamCompare.py | 6 +++--- deeptools/bamCoverage.py | 8 ++++---- deeptools/bamPEFragmentSize.py | 4 ++-- deeptools/bigwigCompare.py | 2 +- deeptools/computeGCBias.py | 26 +++---------------------- deeptools/computeMatrix.py | 4 ++-- deeptools/correctReadCounts.py | 8 ++++---- deeptools/countReadsPerBin.py | 14 +++++++------- deeptools/getFragmentAndReadSize.py | 4 ++-- deeptools/getScorePerBigWigBin.py | 4 ++-- deeptools/heatmapper.py | 10 +++++----- deeptools/mapReduce.py | 28 +++++++++++++-------------- deeptools/multiBamSummary.py | 2 +- deeptools/multiBigwigSummary.py | 2 +- deeptools/parserCommon.py | 2 +- deeptools/plotCoverage.py | 2 +- deeptools/plotFingerprint.py | 2 +- deeptools/writeBedGraph.py | 4 ++-- deeptools/writeBedGraph_bam_and_bw.py | 4 ++-- 20 files changed, 61 insertions(+), 81 deletions(-) diff --git a/deeptools/SES_scaleFactor.py b/deeptools/SES_scaleFactor.py index 743434634..12672bdaf 100644 --- a/deeptools/SES_scaleFactor.py +++ b/deeptools/SES_scaleFactor.py @@ -13,7 +13,7 @@ def estimateScaleFactor(bamFilesList, binLength, numberOfSamples, normalizationLength, - avg_method='median', blackListFile=None, numberOfProcessors=1, + avg_method='median', blackListFileName=None, numberOfProcessors=1, verbose=False, chrsToSkip=[]): r""" Subdivides the genome into chunks to be analyzed in parallel @@ -41,7 +41,7 @@ def estimateScaleFactor(bamFilesList, binLength, numberOfSamples, chrsToSkip : list name of the chromosomes to be excluded from the scale estimation. Usually the chrX is included. - blackListFile : str + blackListFileName : str BED file containing blacklisted regions Returns @@ -86,7 +86,7 @@ def estimateScaleFactor(bamFilesList, binLength, numberOfSamples, binLength=binLength, numberOfSamples=numberOfSamples, extendReads=False, - blackListFile=blackListFile, + blackListFileName=blackListFileName, numberOfProcessors=numberOfProcessors, verbose=verbose, chrsToSkip=chrsToSkip) diff --git a/deeptools/bamCompare.py b/deeptools/bamCompare.py index 7397c227f..f70c61f16 100644 --- a/deeptools/bamCompare.py +++ b/deeptools/bamCompare.py @@ -167,7 +167,7 @@ def get_scale_factors(args): [bam1.filename, bam2.filename], args.sampleLength, args.numberOfSamples, 1, - blackListFile=args.blackListFile, + blackListFileName=args.blackListFileName, numberOfProcessors=args.numberOfProcessors, verbose=args.verbose, chrsToSkip=args.ignoreForNormalization) @@ -215,7 +215,7 @@ def get_scale_factors(args): from deeptools.getFragmentAndReadSize import get_read_and_fragment_length frag_len_dict, read_len_dict = get_read_and_fragment_length(bamfile, return_lengths=False, - blackListFile=args.blackListFile, + blackListFileName=args.blackListFileName, numberOfProcessors=args.numberOfProcessors, verbose=args.verbose) if args.extendReads: @@ -297,7 +297,7 @@ def main(args=None): region=args.region, numberOfProcessors=args.numberOfProcessors, extendReads=args.extendReads, - blackListFile=args.blackListFile, + blackListFileName=args.blackListFileName, minMappingQuality=args.minMappingQuality, ignoreDuplicates=args.ignoreDuplicates, center_read=args.centerReads, diff --git a/deeptools/bamCoverage.py b/deeptools/bamCoverage.py index 01da07f82..3704f3d4e 100644 --- a/deeptools/bamCoverage.py +++ b/deeptools/bamCoverage.py @@ -116,7 +116,7 @@ def get_scale_factor(args): from deeptools.getFragmentAndReadSize import get_read_and_fragment_length frag_len_dict, read_len_dict = get_read_and_fragment_length(args.bam, return_lengths=False, - blackListFile=args.blackListFile, + blackListFileName=args.blackListFileName, numberOfProcessors=args.numberOfProcessors, verbose=args.verbose) if args.extendReads: @@ -181,7 +181,7 @@ def main(args=None): from deeptools.getFragmentAndReadSize import get_read_and_fragment_length frag_len_dict, read_len_dict = get_read_and_fragment_length(args.bam, return_lengths=False, - blackListFile=args.blackListFile, + blackListFileName=args.blackListFileName, numberOfProcessors=args.numberOfProcessors, verbose=args.verbose) if frag_len_dict is None: @@ -191,7 +191,7 @@ def main(args=None): binLength=args.binSize, stepSize=args.binSize, region=args.region, - blackListFile=args.blackListFile, + blackListFileName=args.blackListFileName, numberOfProcessors=args.numberOfProcessors, extendReads=args.extendReads, minMappingQuality=args.minMappingQuality, @@ -208,7 +208,7 @@ def main(args=None): binLength=args.binSize, stepSize=args.binSize, region=args.region, - blackListFile=args.blackListFile, + blackListFileName=args.blackListFileName, numberOfProcessors=args.numberOfProcessors, extendReads=args.extendReads, minMappingQuality=args.minMappingQuality, diff --git a/deeptools/bamPEFragmentSize.py b/deeptools/bamPEFragmentSize.py index 3f5b6804b..98d933ac9 100644 --- a/deeptools/bamPEFragmentSize.py +++ b/deeptools/bamPEFragmentSize.py @@ -54,7 +54,7 @@ def parse_arguments(): 'decreased. (default 1000000)', default=1000000, type=int) - parser.add_argument('--blackListFile', '-bl', + parser.add_argument('--blackListFileName', '-bl', help="A BED file containing regions that should be excluded from all analyses. Currently this works by rejecting genomic chunks that happen to overlap an entry. Consequently, for BAM files, if a read partially overlaps a blacklisted region or a fragment spans over it, then the read/fragment might still be considered.", metavar="BED file", required=False) @@ -71,7 +71,7 @@ def parse_arguments(): def main(args=None): args = parse_arguments().parse_args(args) fragment_len_dict, read_len_dict = get_read_and_fragment_length(args.bam, return_lengths=True, - blackListFile=args.blackListFile, + blackListFileName=args.blackListFileName, numberOfProcessors=args.numberOfProcessors, verbose=args.verbose, binSize=args.binSize, diff --git a/deeptools/bigwigCompare.py b/deeptools/bigwigCompare.py index f7bafa154..c5c9abc7b 100644 --- a/deeptools/bigwigCompare.py +++ b/deeptools/bigwigCompare.py @@ -98,7 +98,7 @@ def main(args=None): (args.bigwig2, 'bigwig')], args.outFileName, 0, FUNC, function_args, tileSize=args.binSize, region=args.region, - blackListFile=args.blackListFile, + blackListFileName=args.blackListFileName, numberOfProcessors=args.numberOfProcessors, format=args.outFileFormat, smoothLength=False, diff --git a/deeptools/computeGCBias.py b/deeptools/computeGCBias.py index 0c6988766..fef9fd7ee 100644 --- a/deeptools/computeGCBias.py +++ b/deeptools/computeGCBias.py @@ -22,7 +22,7 @@ def parse_arguments(args=None): - parentParser = parserCommon.getParentArgParse(binSize=False, blackList=False) + parentParser = parserCommon.getParentArgParse(binSize=False, blackList=True) requiredArgs = getRequiredArgs() parser = argparse.ArgumentParser( parents=[requiredArgs, parentParser], @@ -96,26 +96,6 @@ def getRequiredArgs(): help='Number of sampling points to be considered.', type=int) - optional.add_argument( - '--filterOut', '-fo', - help='BED file containing genomic regions to be excluded ' - 'from the estimation of the correction. Such regions ' - 'usually contain repetitive regions and peaks that, if ' - 'included, would bias the correction. ' - 'It is recommended to filter out known repetitive regions ' - 'if multimappers (reads that map to more than one genomic ' - 'position) were excluded. In the case of ChIP-seq data, ' - 'it is recommended to first use ' - 'a peak caller to identify and filter out the identified peaks.\n\n' - 'Mappability tracks for different read lengths can be ' - 'found at: http://hgdownload.cse.ucsc.edu/gbdb/mm9/bbi/ ' - 'and http://hgdownload.cse.ucsc.edu/gbdb/hg19/bbi \n\n' - 'The script: scripts/mappabilityBigWig_to_unmappableBed.sh ' - 'can be used to get a BED file from the mappability ' - 'bigWig.', - type=argparse.FileType('r'), - metavar='BED file') - optional.add_argument('--extraSampling', help='BED file containing genomic regions for which ' 'extra sampling is required because they are ' @@ -637,8 +617,8 @@ def plotGCbias(file_name, frequencies, reads_per_gc, region_size, image_format=N def main(args=None): args = parse_arguments().parse_args(args) # check if directory is writable - if args.filterOut: - filter_out_file = args.filterOut.name + if args.blackListFileName: + filter_out_file = args.BlackListFileName args.filterOut.close() else: filter_out_file = None diff --git a/deeptools/computeMatrix.py b/deeptools/computeMatrix.py index 29f8da011..585d78de2 100644 --- a/deeptools/computeMatrix.py +++ b/deeptools/computeMatrix.py @@ -279,7 +279,7 @@ def computeMatrixOptArgs(case=['scale-regions', 'reference-point'][0]): '(e.g. micro satellites) that may bias the average ' 'values.') - optional.add_argument('--blackListFile', '-bl', + optional.add_argument('--blackListFileName', '-bl', help="A BED file containing regions that should be excluded from all analyses. Currently this works by rejecting genomic chunks that happen to overlap an entry. Consequently, for BAM files, if a read partially overlaps a blacklisted region or a fragment spans over it, then the read/fragment might still be considered.", metavar="BED file", required=False) @@ -371,7 +371,7 @@ def main(args=None): hm = heatmapper.heatmapper() scores_file_list = args.scoreFileName - hm.computeMatrix(scores_file_list, bed_file, parameters, blackListFile=args.blackListFile, verbose=args.verbose) + hm.computeMatrix(scores_file_list, bed_file, parameters, blackListFileName=args.blackListFileName, verbose=args.verbose) if args.sortRegions != 'no': hm.matrix.sort_groups(sort_using=args.sortUsing, sort_method=args.sortRegions) diff --git a/deeptools/correctReadCounts.py b/deeptools/correctReadCounts.py index 7ea53df0b..81bf11246 100644 --- a/deeptools/correctReadCounts.py +++ b/deeptools/correctReadCounts.py @@ -64,7 +64,7 @@ def computeCorrectedReadcounts(tileCoverage, args): def correctReadCounts(bamFilesList, binLength, numberOfSamples, defaultFragmentLength, outFileName, outFileFormat, outFileNameCorr=None, region=None, extendPairedEnds=True, - numberOfProcessors=1, Nsigmas=2, maxSignalRatio=10, blackListFile=None, verbose=False): + numberOfProcessors=1, Nsigmas=2, maxSignalRatio=10, blackListFileName=None, verbose=False): bam1 = writeBedGraph.openBam(bamFilesList[0]) bam2 = writeBedGraph.openBam(bamFilesList[1]) @@ -81,7 +81,7 @@ def correctReadCounts(bamFilesList, binLength, numberOfSamples, defaultFragmentL numberOfSamples, defaultFragmentLength, 1, - blackListFile=blackListFile, + blackListFileName=blackListFileName, numberOfProcessors=numberOfProcessors, verbose=verbose) @@ -153,7 +153,7 @@ def correctReadCounts(bamFilesList, binLength, numberOfSamples, defaultFragmentL funcArgs, tileSize=binLength, region=region, format=outFileFormat, zerosToNans=False, - blackListFile=blackListFile, + blackListFileName=blackListFileName, numberOfProcessors=numberOfProcessors, extendPairedEnds=extendPairedEnds) @@ -164,7 +164,7 @@ def correctReadCounts(bamFilesList, binLength, numberOfSamples, defaultFragmentL funcArgs, tileSize=binLength, region=region, format=outFileFormat, zerosToNans=False, - blackListFile=blackListFile, + blackListFileName=blackListFileName, numberOfProcessors=numberOfProcessors, extendPairedEnds=extendPairedEnds) diff --git a/deeptools/countReadsPerBin.py b/deeptools/countReadsPerBin.py index f03bba053..9b5479f2b 100644 --- a/deeptools/countReadsPerBin.py +++ b/deeptools/countReadsPerBin.py @@ -59,7 +59,7 @@ class CountReadsPerBin(object): File handle of a bed file containing the regions for wich to compute the coverage. This option overrules ``binLength``, ``numberOfSamples`` and ``stepSize``. - blackListFile : str + blackListFileName : str A string containing a BED file with blacklist regions. extendReads : bool, int @@ -139,7 +139,7 @@ class CountReadsPerBin(object): def __init__(self, bamFilesList, binLength=50, numberOfSamples=None, numberOfProcessors=1, verbose=False, region=None, bedFile=None, extendReads=False, - blackListFile=None, + blackListFileName=None, minMappingQuality=None, ignoreDuplicates=False, chrsToSkip=[], @@ -155,15 +155,15 @@ def __init__(self, bamFilesList, binLength=50, numberOfSamples=None, numberOfPro self.binLength = binLength self.numberOfSamples = numberOfSamples self.blackList = None - self.blackListFile = blackListFile - if blackListFile: - self.blackList = mapReduce.BED_to_interval_tree(blackListFile) + self.blackListFileName = blackListFileName + if blackListFileName: + self.blackList = mapReduce.BED_to_interval_tree(blackListFileName) if extendReads and len(bamFilesList): from deeptools.getFragmentAndReadSize import get_read_and_fragment_length frag_len_dict, read_len_dict = get_read_and_fragment_length(bamFilesList[0], return_lengths=False, - blackListFile=blackListFile, + blackListFileName=blackListFileName, numberOfProcessors=numberOfProcessors, verbose=verbose) if extendReads is True: @@ -275,7 +275,7 @@ def run(self): self_=self, genomeChunkLength=chunkSize, bedFile=self.bedFile, - blackListFile=self.blackListFile, + blackListFileName=self.blackListFileName, region=self.region, numberOfProcessors=self.numberOfProcessors) diff --git a/deeptools/getFragmentAndReadSize.py b/deeptools/getFragmentAndReadSize.py index 47c8f75be..fb6c26fcb 100644 --- a/deeptools/getFragmentAndReadSize.py +++ b/deeptools/getFragmentAndReadSize.py @@ -54,7 +54,7 @@ def getFragmentLength_worker(chrom, start, end, bamFile, distanceBetweenBins): return reads -def get_read_and_fragment_length(bamFile, return_lengths=False, blackListFile=None, +def get_read_and_fragment_length(bamFile, return_lengths=False, blackListFileName=None, binSize=50000, distanceBetweenBins=1000000, numberOfProcessors=None, verbose=False): """ @@ -89,7 +89,7 @@ def get_read_and_fragment_length(bamFile, return_lengths=False, blackListFile=No getFragmentLength_wrapper, chrom_sizes, genomeChunkLength=stepsize, - blackListFile=blackListFile, + blackListFileName=blackListFileName, numberOfProcessors=numberOfProcessors, verbose=verbose) diff --git a/deeptools/getScorePerBigWigBin.py b/deeptools/getScorePerBigWigBin.py index 419f15b47..0ccabac1e 100644 --- a/deeptools/getScorePerBigWigBin.py +++ b/deeptools/getScorePerBigWigBin.py @@ -179,7 +179,7 @@ def getScorePerBin(bigWigFiles, binLength, numberOfProcessors=1, verbose=False, region=None, bedFile=None, - blackListFile=None, + blackListFileName=None, stepSize=None, chrsToSkip=[], out_file_for_raw_data=None): @@ -237,7 +237,7 @@ def getScorePerBin(bigWigFiles, binLength, chrom_sizes, genomeChunkLength=chunkSize, bedFile=bedFile, - blackListFile=blackListFile, + blackListFileName=blackListFileName, region=region, numberOfProcessors=numberOfProcessors) diff --git a/deeptools/heatmapper.py b/deeptools/heatmapper.py index 3a059bf40..1fbaae690 100644 --- a/deeptools/heatmapper.py +++ b/deeptools/heatmapper.py @@ -30,7 +30,7 @@ def __init__(self): self.regions = None self.blackList = None - def computeMatrix(self, score_file_list, regions_file, parameters, blackListFile=None, verbose=False): + def computeMatrix(self, score_file_list, regions_file, parameters, blackListFileName=None, verbose=False): """ Splits into multiple cores the computation of the scores @@ -56,7 +56,7 @@ def computeMatrix(self, score_file_list, regions_file, parameters, blackListFile exit("Length of region before the body has to be a multiple of " "--binSize\nCurrent value is {}\n".format(parameters['upstream'])) - regions, group_labels = self.get_regions_and_groups(regions_file, blackListFile=blackListFile, verbose=verbose) + regions, group_labels = self.get_regions_and_groups(regions_file, blackListFileName=blackListFileName, verbose=verbose) # args to pass to the multiprocessing workers mp_args = [] @@ -773,7 +773,7 @@ def matrix_avg(matrix, avgType='mean'): @staticmethod def get_regions_and_groups(regions_file, onlyMultiplesOf=1, default_group_name='genes', - blackListFile=None, + blackListFileName=None, verbose=None): """ Reads a bed file. @@ -794,8 +794,8 @@ def get_regions_and_groups(regions_file, onlyMultiplesOf=1, group_idx = 0 bed_file = deeptools.readBed.ReadBed(regions_file) blackList = None - if blackListFile is not None: - blackList = mapReduce.BED_to_interval_tree(blackListFile) + if blackListFileName is not None: + blackList = mapReduce.BED_to_interval_tree(blackListFileName) for ginterval in bed_file: totalintervals += 1 diff --git a/deeptools/mapReduce.py b/deeptools/mapReduce.py index 9362c5374..3dcb01826 100644 --- a/deeptools/mapReduce.py +++ b/deeptools/mapReduce.py @@ -7,7 +7,7 @@ def mapReduce(staticArgs, func, chromSize, genomeChunkLength=None, region=None, bedFile=None, - blackListFile=None, + blackListFileName=None, numberOfProcessors=4, verbose=False, self_=None): @@ -40,7 +40,7 @@ def mapReduce(staticArgs, func, chromSize, :param bedFile: Is a bed file is given, the args to the func to be called are extended to include a list of bed defined regions. - :param blackListFile: A list of regions to exclude from all computations. + :param blackListFileName: A list of regions to exclude from all computations. :param self_: In case mapreduce should make a call to an object the self variable has to be passed. """ @@ -83,8 +83,8 @@ def mapReduce(staticArgs, func, chromSize, if bed_in_region[-1].end > chromSize[0][1]: chromSize[0] = (chromSize[0][0], bed_in_region[-1].end) - if blackListFile: - blackList = BED_to_interval_tree(blackListFile) + if blackListFileName: + blackList = BED_to_interval_tree(blackListFileName) TASKS = [] # iterate over all chromosomes @@ -95,7 +95,7 @@ def mapReduce(staticArgs, func, chromSize, endPos = min(size, startPos + genomeChunkLength) # Reject a chunk if it overlaps - if blackListFile: + if blackListFileName: if blOverlap(blackList, [chrom, startPos, endPos]): continue @@ -263,15 +263,15 @@ def blOverlap(t, chunk): chrom = chunk[0] if chrom not in t.keys(): - chrom2 = "chr" + chrom - if chrom2 in t.keys(): - chrom = chrom2 - elif len(chrom) > 3: - chrom2 = chrom[3:] - if chrom2 in t.keys(): - chrom = chrom2 - else: - return False + if chrom.startswith("chr"): + chrom = chrom[3:] + elif chrom == "MT": + chrom = "chrM" + else: + chrom = "chr" + chrom + + if chrom not in t.keys(): + return False if len(t[chrom].find(chunk[1], chunk[2])): return True diff --git a/deeptools/multiBamSummary.py b/deeptools/multiBamSummary.py index 2ecf30400..153e18aab 100644 --- a/deeptools/multiBamSummary.py +++ b/deeptools/multiBamSummary.py @@ -206,7 +206,7 @@ def main(args=None): verbose=args.verbose, region=args.region, bedFile=bed_regions, - blackListFile=args.blackListFile, + blackListFileName=args.blackListFileName, extendReads=args.extendReads, minMappingQuality=args.minMappingQuality, ignoreDuplicates=args.ignoreDuplicates, diff --git a/deeptools/multiBigwigSummary.py b/deeptools/multiBigwigSummary.py index da0403b19..758dd066b 100644 --- a/deeptools/multiBigwigSummary.py +++ b/deeptools/multiBigwigSummary.py @@ -206,7 +206,7 @@ def main(args=None): num_reads_per_bin = score_bw.getScorePerBin( args.bwfiles, args.binSize, - blackListFile=args.blackListFile, + blackListFileName=args.blackListFileName, numberOfProcessors=args.numberOfProcessors, stepSize=args.binSize + args.distanceBetweenBins, verbose=args.verbose, diff --git a/deeptools/parserCommon.py b/deeptools/parserCommon.py index daeb48a1c..596ef6b12 100644 --- a/deeptools/parserCommon.py +++ b/deeptools/parserCommon.py @@ -207,7 +207,7 @@ def getParentArgParse(args=None, binSize=True, blackList=True): type=genomicRegion) if blackList: - optional.add_argument('--blackListFile', '-bl', + optional.add_argument('--blackListFileName', '-bl', help="A BED file containing regions that should be excluded from all analyses. Currently this works by rejecting genomic chunks that happen to overlap an entry. Consequently, for BAM files, if a read partially overlaps a blacklisted region or a fragment spans over it, then the read/fragment might still be considered.", metavar="BED file", required=False) diff --git a/deeptools/plotCoverage.py b/deeptools/plotCoverage.py index eb4adb16c..cba94b3de 100644 --- a/deeptools/plotCoverage.py +++ b/deeptools/plotCoverage.py @@ -128,7 +128,7 @@ def main(args=None): numberOfProcessors=args.numberOfProcessors, verbose=args.verbose, region=args.region, - blackListFile=args.blackListFile, + blackListFileName=args.blackListFileName, extendReads=args.extendReads, minMappingQuality=args.minMappingQuality, ignoreDuplicates=args.ignoreDuplicates, diff --git a/deeptools/plotFingerprint.py b/deeptools/plotFingerprint.py index 9d46dadde..5ed792345 100644 --- a/deeptools/plotFingerprint.py +++ b/deeptools/plotFingerprint.py @@ -139,7 +139,7 @@ def main(args=None): args.bamfiles, args.binSize, args.numberOfSamples, - blackListFile=args.blackListFile, + blackListFileName=args.blackListFileName, numberOfProcessors=args.numberOfProcessors, verbose=args.verbose, region=args.region, diff --git a/deeptools/writeBedGraph.py b/deeptools/writeBedGraph.py index e32bc7dcb..d5ccb474d 100644 --- a/deeptools/writeBedGraph.py +++ b/deeptools/writeBedGraph.py @@ -88,7 +88,7 @@ class WriteBedGraph(cr.CountReadsPerBin): """ - def run(self, func_to_call, func_args, out_file_name, blackListFile=None, format="bedgraph", smoothLength=0): + def run(self, func_to_call, func_args, out_file_name, blackListFileName=None, format="bedgraph", smoothLength=0): r""" Given a list of bamfiles, a function and a function arguments, this method writes a bedgraph file (or bigwig) file @@ -134,7 +134,7 @@ def run(self, func_to_call, func_args, out_file_name, blackListFile=None, format self_=self, genomeChunkLength=genome_chunk_length, region=self.region, - blackListFile=blackListFile, + blackListFileName=blackListFileName, numberOfProcessors=self.numberOfProcessors) # concatenate intermediary bedgraph files diff --git a/deeptools/writeBedGraph_bam_and_bw.py b/deeptools/writeBedGraph_bam_and_bw.py index 1a73dc70f..250bc3733 100644 --- a/deeptools/writeBedGraph_bam_and_bw.py +++ b/deeptools/writeBedGraph_bam_and_bw.py @@ -147,7 +147,7 @@ def writeBedGraph_worker( def writeBedGraph( bamOrBwFileList, outputFileName, fragmentLength, - func, funcArgs, tileSize=25, region=None, blackListFile=None, numberOfProcessors=None, + func, funcArgs, tileSize=25, region=None, blackListFileName=None, numberOfProcessors=None, format="bedgraph", extendPairedEnds=True, missingDataAsZero=False, smoothLength=0, fixed_step=False): r""" @@ -207,7 +207,7 @@ def writeBedGraph( chromNamesAndSize, genomeChunkLength=genomeChunkLength, region=region, - blackListFile=blackListFile, + blackListFileName=blackListFileName, numberOfProcessors=numberOfProcessors) # concatenate intermediary bedgraph files From 90a2787e37f6286e2b0f2c8c05ac4c8081a21f00 Mon Sep 17 00:00:00 2001 From: dpryan79 Date: Thu, 11 Feb 2016 11:42:26 +0100 Subject: [PATCH 09/55] bug fix --- deeptools/countReadsPerBin.py | 2 +- deeptools/heatmapper.py | 2 +- deeptools/mapReduce.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/deeptools/countReadsPerBin.py b/deeptools/countReadsPerBin.py index 9b5479f2b..871ccb17f 100644 --- a/deeptools/countReadsPerBin.py +++ b/deeptools/countReadsPerBin.py @@ -157,7 +157,7 @@ def __init__(self, bamFilesList, binLength=50, numberOfSamples=None, numberOfPro self.blackList = None self.blackListFileName = blackListFileName if blackListFileName: - self.blackList = mapReduce.BED_to_interval_tree(blackListFileName) + self.blackList = mapReduce.BED_to_interval_tree(open(blackListFileName, "r")) if extendReads and len(bamFilesList): from deeptools.getFragmentAndReadSize import get_read_and_fragment_length diff --git a/deeptools/heatmapper.py b/deeptools/heatmapper.py index 1fbaae690..2ba18a74a 100644 --- a/deeptools/heatmapper.py +++ b/deeptools/heatmapper.py @@ -795,7 +795,7 @@ def get_regions_and_groups(regions_file, onlyMultiplesOf=1, bed_file = deeptools.readBed.ReadBed(regions_file) blackList = None if blackListFileName is not None: - blackList = mapReduce.BED_to_interval_tree(blackListFileName) + blackList = mapReduce.BED_to_interval_tree(open(blackListFileName, "r")) for ginterval in bed_file: totalintervals += 1 diff --git a/deeptools/mapReduce.py b/deeptools/mapReduce.py index 3dcb01826..d3359fe9d 100644 --- a/deeptools/mapReduce.py +++ b/deeptools/mapReduce.py @@ -84,7 +84,7 @@ def mapReduce(staticArgs, func, chromSize, chromSize[0] = (chromSize[0][0], bed_in_region[-1].end) if blackListFileName: - blackList = BED_to_interval_tree(blackListFileName) + blackList = BED_to_interval_tree(open(blackListFileName, "r")) TASKS = [] # iterate over all chromosomes From 0620351db823ae599050cba197dfeeb8281cc23b Mon Sep 17 00:00:00 2001 From: dpryan79 Date: Thu, 11 Feb 2016 16:13:51 +0100 Subject: [PATCH 10/55] Need to subtract blacklist regions, not just reject on overlaps. This is due to the genomic chunk length --- deeptools/bamCoverage.py | 1 + deeptools/countReadsPerBin.py | 6 +- deeptools/heatmapper.py | 2 +- deeptools/mapReduce.py | 115 +++++++++++++++++++++------------- 4 files changed, 76 insertions(+), 48 deletions(-) diff --git a/deeptools/bamCoverage.py b/deeptools/bamCoverage.py index 3704f3d4e..5560c1dd6 100644 --- a/deeptools/bamCoverage.py +++ b/deeptools/bamCoverage.py @@ -221,6 +221,7 @@ def main(args=None): ) wr.run(writeBedGraph.scaleCoverage, func_args, args.outFileName, + blackListFileName=args.blackListFileName, format=args.outFileFormat, smoothLength=args.smoothLength) diff --git a/deeptools/countReadsPerBin.py b/deeptools/countReadsPerBin.py index 871ccb17f..859669f31 100644 --- a/deeptools/countReadsPerBin.py +++ b/deeptools/countReadsPerBin.py @@ -374,14 +374,14 @@ def count_reads_in_region(self, chrom, start, end, bed_regions_list=None): regionsToConsider = [] if bed_regions_list is not None: for chrom, start, end in bed_regions_list: - if mapReduce.blOverlap(self.blackList, [chrom, start, end]): + if mapReduce.blOverlap(self.blackList, chrom, [start, end]): continue regionsToConsider.append((chrom, start, end, end - start)) else: for i in xrange(start, end, self.stepSize): if i + self.binLength > end: break - if mapReduce.blOverlap(self.blackList, [chrom, i, i + self.binLength]): + if mapReduce.blOverlap(self.blackList, chrom, [i, i + self.binLength]): continue regionsToConsider.append((chrom, i, i + self.binLength, self.binLength)) @@ -466,7 +466,7 @@ def get_coverage_of_region(self, bamHandle, chrom, start, end, tileSize, coverage = np.zeros(vector_length, dtype='float64') # Return 0 for overlap with a blacklisted region - if mapReduce.blOverlap(self.blackList, [chrom, start, end]): + if mapReduce.blOverlap(self.blackList, chrom, [start, end]): return coverge start_time = time.time() diff --git a/deeptools/heatmapper.py b/deeptools/heatmapper.py index 2ba18a74a..f22b82f16 100644 --- a/deeptools/heatmapper.py +++ b/deeptools/heatmapper.py @@ -818,7 +818,7 @@ def get_regions_and_groups(regions_file, onlyMultiplesOf=1, continue # Exclude blacklist overlaps - if mapReduce.blOverlap(blackList, [ginterval.chrom, ginterval.start, ginterval.end]): + if mapReduce.blOverlap(blackList, ginterval.chrom, [ginterval.start, ginterval.end]): continue # if the list of regions is to big, only diff --git a/deeptools/mapReduce.py b/deeptools/mapReduce.py index d3359fe9d..33cf7285f 100644 --- a/deeptools/mapReduce.py +++ b/deeptools/mapReduce.py @@ -41,6 +41,7 @@ def mapReduce(staticArgs, func, chromSize, called are extended to include a list of bed defined regions. :param blackListFileName: A list of regions to exclude from all computations. + Note that this has genomeChunkLength resolution... :param self_: In case mapreduce should make a call to an object the self variable has to be passed. """ @@ -96,45 +97,48 @@ def mapReduce(staticArgs, func, chromSize, # Reject a chunk if it overlaps if blackListFileName: - if blOverlap(blackList, [chrom, startPos, endPos]): - continue - - if self_ is not None: - argsList = [self_] + regions = blSubtract(blackList, chrom, [startPos, endPos]) else: - argsList = [] - argsList.extend([chrom, startPos, endPos]) - # add to argument list the static list received the the function - argsList.extend(staticArgs) - - # if a bed file is given, append to the TASK list, - # a list of bed regions that overlap with the - # current genomeChunk. - if bedFile: - # this method to get the bedFile regions may seem - # cumbersome but I (fidel) think is better to - # balance the load between multiple processors. - # This method first partitions the genome into smaller - # chunks and then, for each chunk, the list of - # regions overlapping the chunk interval is added. - # This is preferable to sending each worker a - # single region because of the overhead of initiating - # the data. - bed_regions_list = [] - for bed_region in bed_interval_tree[chrom].find(startPos, endPos): - # start + 1 is used to avoid regions that may overlap - # with two genomeChunks to be counted twice. Such region - # is only added for the genomeChunk that contains the start - # of the bed region. - if bed_region.start < endPos < bed_region.end: + regions = [[startPos, endPos]] + + for reg in regions: + if self_ is not None: + argsList = [self_] + else: + argsList = [] + + argsList.extend([chrom, reg[0], reg[1]]) + # add to argument list the static list received the the function + argsList.extend(staticArgs) + + # if a bed file is given, append to the TASK list, + # a list of bed regions that overlap with the + # current genomeChunk. + if bedFile: + # this method to get the bedFile regions may seem + # cumbersome but I (fidel) think is better to + # balance the load between multiple processors. + # This method first partitions the genome into smaller + # chunks and then, for each chunk, the list of + # regions overlapping the chunk interval is added. + # This is preferable to sending each worker a + # single region because of the overhead of initiating + # the data. + bed_regions_list = [] + for bed_region in bed_interval_tree[chrom].find(reg[0], reg[1]): + # start + 1 is used to avoid regions that may overlap + # with two genomeChunks to be counted twice. Such region + # is only added for the genomeChunk that contains the start + # of the bed region. + if bed_region.start < endPos < bed_region.end: + continue + bed_regions_list.append([chrom, bed_region.start, bed_region.end]) + if len(bed_regions_list) == 0: continue - bed_regions_list.append([chrom, bed_region.start, bed_region.end]) - if len(bed_regions_list) == 0: - continue - # add to argument list, the position of the bed regions to use - argsList.append(bed_regions_list) + # add to argument list, the position of the bed regions to use + argsList.append(bed_regions_list) - TASKS.append(tuple(argsList)) + TASKS.append(tuple(argsList)) if len(TASKS) > 1 and numberOfProcessors > 1: if verbose: @@ -236,7 +240,7 @@ def BED_to_interval_tree(BED_file): fields = line.strip().split() chrom, start_bed, end_bed, = fields[0], int(fields[1]), int(fields[2]) - if chrom not in bed_interval_tree: + if chrom not in bed_interval_tree.keys(): bed_interval_tree[chrom] = IntervalTree() # skip if a region overlaps with a region already seen @@ -249,19 +253,18 @@ def BED_to_interval_tree(BED_file): return bed_interval_tree -def blOverlap(t, chunk): +def blOverlap(t, chrom, chunk): """ Test for an overlap between an IntervalTree and a given genomic chunk. This attempts to account for differences in chromosome naming. - Returns True on an overlap, otherwise false + Returns the overlaps """ if t is None: return False - chrom = chunk[0] if chrom not in t.keys(): if chrom.startswith("chr"): chrom = chrom[3:] @@ -271,9 +274,33 @@ def blOverlap(t, chunk): chrom = "chr" + chrom if chrom not in t.keys(): - return False + return None + + return t[chrom].find(chunk[0], chunk[1]) + + +def blSubtract(t, chrom, chunk): + """ + If a genomic region overlaps with a blacklisted region, then subtract that region out - if len(t[chrom].find(chunk[1], chunk[2])): - return True + returns a list of lists + """ + + if t is None: + return [chunk] + + overlaps = blOverlap(t, chrom, chunk) + if len(overlaps) > 0: + output = [] + for o in overlaps: + if chunk[1] <= chunk[0]: + break + if chunk[0] < o.start: + output.append([chunk[0], o.start]) + chunk[0] = o.end + if chunk[0] < chunk[1]: + output.append([chunk[0], chunk[1]]) + else: + output = [chunk] - return False + return output From f9684353a04c15d52af701c7bacdd4582b469b8f Mon Sep 17 00:00:00 2001 From: dpryan79 Date: Fri, 12 Feb 2016 15:14:39 +0100 Subject: [PATCH 11/55] Right, we're returning a list of a given length... --- deeptools/mapReduce.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deeptools/mapReduce.py b/deeptools/mapReduce.py index 33cf7285f..360701be5 100644 --- a/deeptools/mapReduce.py +++ b/deeptools/mapReduce.py @@ -263,7 +263,7 @@ def blOverlap(t, chrom, chunk): """ if t is None: - return False + return [] if chrom not in t.keys(): if chrom.startswith("chr"): @@ -274,7 +274,7 @@ def blOverlap(t, chrom, chunk): chrom = "chr" + chrom if chrom not in t.keys(): - return None + return [] return t[chrom].find(chunk[0], chunk[1]) From a72e85508002c042995ce483efbc49112cc9abac Mon Sep 17 00:00:00 2001 From: dpryan79 Date: Fri, 12 Feb 2016 16:18:20 +0100 Subject: [PATCH 12/55] For bamCoverage, the number of reads in blacklisted regions is now subtracted from the total read count --- deeptools/bamCoverage.py | 2 ++ deeptools/parserCommon.py | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/deeptools/bamCoverage.py b/deeptools/bamCoverage.py index 5560c1dd6..0f7d95397 100644 --- a/deeptools/bamCoverage.py +++ b/deeptools/bamCoverage.py @@ -110,6 +110,8 @@ def get_scale_factor(args): scale_factor = args.scaleFactor bam_handle = bamHandler.openBam(args.bam) bam_mapped = parserCommon.bam_total_reads(bam_handle, args.ignoreForNormalization) + blacklisted = parserCommon.bam_blacklisted_reads(bam_handle, args.ignoreForNormalization, args.blackListFileName) + bam_mapped -= blacklisted if args.normalizeTo1x: # try to guess fragment length if the bam file contains paired end reads diff --git a/deeptools/parserCommon.py b/deeptools/parserCommon.py index 596ef6b12..97370ce6d 100644 --- a/deeptools/parserCommon.py +++ b/deeptools/parserCommon.py @@ -649,3 +649,26 @@ def bam_total_reads(bam_handle, chroms_to_ignore): tot_mapped_reads = bam_handle.mapped return tot_mapped_reads + + +def bam_blacklisted_reads(bam_handle, chroms_to_ignore, blackListFileName=None): + blacklisted = 0 + if blackListFileName is None: + return blacklisted + + import pysam + import deeptools.mapReduce as mapReduce + + # Get the chromosome lengths + chromLens = {} + for line in pysam.idxstats(bam_handle.filename): + chrom, _len, nmapped, _nunmapped = line.split('\t') + chromLens[chrom] = int(_len) + + bl = mapReduce.BED_to_interval_tree(open(blackListFileName, "r")) + for chrom in bl.keys(): + if not chroms_to_ignore or chrom not in chroms_to_ignore: + for reg in bl[chrom].find(0, chromLens[chrom]): + blacklisted += bam_handle.count(reference=chrom, start=reg.start, end=reg.end) + + return blacklisted From a427662a691d918ce51996d61920cb2996ccd9b0 Mon Sep 17 00:00:00 2001 From: dpryan79 Date: Fri, 12 Feb 2016 16:21:27 +0100 Subject: [PATCH 13/55] As with bamCoverage, bamCompare now excludes reads from blacklisted regions before normalization --- deeptools/bamCompare.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/deeptools/bamCompare.py b/deeptools/bamCompare.py index f70c61f16..def1c13e5 100644 --- a/deeptools/bamCompare.py +++ b/deeptools/bamCompare.py @@ -157,7 +157,12 @@ def get_scale_factors(args): bam2 = bamHandler.openBam(args.bamfile2) bam1_mapped = parserCommon.bam_total_reads(bam1, args.ignoreForNormalization) + blacklisted = parserCommon.bam_blacklisted_reads(bam1, args.ignoreForNormalization, args.blackListFileName) + bam1_mapped -= blacklisted + bam2_mapped = parserCommon.bam_total_reads(bam2, args.ignoreForNormalization) + blacklisted = parserCommon.bam_blacklisted_reads(bam2, args.ignoreForNormalization, args.blackListFileName) + bam2_mapped -= blacklisted if args.scaleFactors: scale_factors = map(float, args.scaleFactors.split(":")) From ac0a9e9674c8ee24d042d93e13bf407a4019c854 Mon Sep 17 00:00:00 2001 From: dpryan79 Date: Mon, 15 Feb 2016 23:36:17 +0100 Subject: [PATCH 14/55] add an --outFileNameData option to plotPCA. This should fix #231, though (A) this hasn't been tested in Galaxy and (B) I can't test the galaxy wrapper with planemo currently --- deeptools/plotPCA.py | 18 ++++++++++++++++++ galaxy/wrapper/plotPCA.xml | 15 +++++++++++++++ .../wrapper/test-data/plotPCA_result2.tabular | 3 +++ 3 files changed, 36 insertions(+) create mode 100644 galaxy/wrapper/test-data/plotPCA_result2.tabular diff --git a/deeptools/plotPCA.py b/deeptools/plotPCA.py index 6c3d96f6f..e209bd8bd 100644 --- a/deeptools/plotPCA.py +++ b/deeptools/plotPCA.py @@ -72,6 +72,12 @@ def plotCorrelationArgs(): 'eps, pdf and svg.', choices=['png', 'pdf', 'svg', 'eps']) + optional.add_argument('--outFileNameData', + help='File name to save the data ' + 'underlying data for the average profile, e.g., ' + 'myProfile.tab.', + type=argparse.FileType('w')) + optional.add_argument('--version', action='version', version='%(prog)s {}'.format(__version__)) @@ -90,6 +96,18 @@ def main(args=None): plot_title=args.plotTitle, image_format=args.plotFileFormat) + if args.outFileNameData is not None: + import matplotlib + mlab_pca = matplotlib.mlab.PCA(corr.matrix) + n = len(corr.labels) + of = args.outFileNameData + of.write("Component\t{}\tEigenvalue\n".format("\t".join(corr.labels))) + for i in xrange(n): + of.write("{}".format(i+1)) + for v in mlab_pca.Wt[i,:]: + of.write("\t{}".format(v)) + of.write("\t{}\n".format(n * mlab_pca.fracs[i])) + if __name__ == "__main__": main() diff --git a/galaxy/wrapper/plotPCA.xml b/galaxy/wrapper/plotPCA.xml index 5018ea8a4..13db76011 100644 --- a/galaxy/wrapper/plotPCA.xml +++ b/galaxy/wrapper/plotPCA.xml @@ -12,15 +12,22 @@ --plotTitle "$plotTitle" --plotFile "$outFileName" --plotFileFormat "$outFileFormat" + #if $outFileNameData + --outFileNameData "$matrix" + #end if ]]> + + + outFileNameData is True + @@ -29,6 +36,14 @@ + + + + + + + + Date: Tue, 16 Feb 2016 10:14:36 +0100 Subject: [PATCH 15/55] PEP8 and just use the eigenvalues from numpy (N.B., the underlying matrix is scaled beforehand) --- deeptools/correlation.py | 2 +- deeptools/plotPCA.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/deeptools/correlation.py b/deeptools/correlation.py index dd9921256..0d53336c1 100644 --- a/deeptools/correlation.py +++ b/deeptools/correlation.py @@ -440,7 +440,7 @@ def plot_pca(self, plot_filename, plot_title='', image_format=None, log1p=False) prop={'size': 12}, markerscale=0.9) # Scree plot - eigenvalues = map(lambda x: x * n, mlab_pca.fracs) + eigenvalues = mlab_pca.s cumulative = [] c = 0 diff --git a/deeptools/plotPCA.py b/deeptools/plotPCA.py index e209bd8bd..a19335eff 100644 --- a/deeptools/plotPCA.py +++ b/deeptools/plotPCA.py @@ -103,10 +103,10 @@ def main(args=None): of = args.outFileNameData of.write("Component\t{}\tEigenvalue\n".format("\t".join(corr.labels))) for i in xrange(n): - of.write("{}".format(i+1)) - for v in mlab_pca.Wt[i,:]: + of.write("{}".format(i + 1)) + for v in mlab_pca.Wt[i, :]: of.write("\t{}".format(v)) - of.write("\t{}\n".format(n * mlab_pca.fracs[i])) + of.write("\t{}\n".format(mlab_pca.s[i])) if __name__ == "__main__": From 4cc5032c2b5dca99987d92717b9c14b9a3481413 Mon Sep 17 00:00:00 2001 From: dpryan79 Date: Tue, 16 Feb 2016 14:46:39 +0100 Subject: [PATCH 16/55] Add the --filterRNAstrand option to the bamCoverage wrapper --- galaxy/wrapper/bamCoverage.xml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/galaxy/wrapper/bamCoverage.xml b/galaxy/wrapper/bamCoverage.xml index f3eb2d166..89f5292c0 100755 --- a/galaxy/wrapper/bamCoverage.xml +++ b/galaxy/wrapper/bamCoverage.xml @@ -46,6 +46,10 @@ #if str($advancedOpt.ignoreForNormalization).strip() != '': --ignoreForNormalization $advancedOpt.ignoreForNormalization #end if + + #if $advancedOpt.filterRNAstrand != "no": + --filterRNAstrand '$advancedOpt.filterRNAstrand' + #end of #end if ]]> @@ -105,6 +109,14 @@ label="Determine nucleosome positions from MNase-seq data" help="Only the 3 nucleotides at the center of each fragment are counted. The fragment ends are defined by the two mate reads. *NOTE*: Requires paired-end data." /> + + + + + + From a0c2a189c8df081fa35c4dd8761027503b70ef09 Mon Sep 17 00:00:00 2001 From: dpryan79 Date: Tue, 16 Feb 2016 15:05:02 +0100 Subject: [PATCH 17/55] It helps if one spells 'if' correctly :( --- galaxy/wrapper/bamCoverage.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/galaxy/wrapper/bamCoverage.xml b/galaxy/wrapper/bamCoverage.xml index 89f5292c0..6ec09085b 100755 --- a/galaxy/wrapper/bamCoverage.xml +++ b/galaxy/wrapper/bamCoverage.xml @@ -49,7 +49,7 @@ #if $advancedOpt.filterRNAstrand != "no": --filterRNAstrand '$advancedOpt.filterRNAstrand' - #end of + #end if #end if ]]> From 9d83f16f71cde685084657d51a948b9e2d1f79fc Mon Sep 17 00:00:00 2001 From: dpryan79 Date: Tue, 16 Feb 2016 15:18:15 +0100 Subject: [PATCH 18/55] Add a test for the filterRNAstrand option to galaxy --- galaxy/wrapper/bamCoverage.xml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/galaxy/wrapper/bamCoverage.xml b/galaxy/wrapper/bamCoverage.xml index 6ec09085b..b29ba026e 100755 --- a/galaxy/wrapper/bamCoverage.xml +++ b/galaxy/wrapper/bamCoverage.xml @@ -165,6 +165,14 @@ + + + + + + + + Date: Tue, 16 Feb 2016 16:01:32 +0100 Subject: [PATCH 19/55] Update bamCoverage wrapper and include the bigWig file for the last test --- galaxy/wrapper/bamCoverage.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/galaxy/wrapper/bamCoverage.xml b/galaxy/wrapper/bamCoverage.xml index 8c99dcae0..58fb84ddb 100644 --- a/galaxy/wrapper/bamCoverage.xml +++ b/galaxy/wrapper/bamCoverage.xml @@ -47,7 +47,7 @@ --ignoreForNormalization $advancedOpt.ignoreForNormalization #end if - #if $advancedOpt.filterRNAstrand != "no": + #if str($advancedOpt.filterRNAstrand) != 'no': --filterRNAstrand '$advancedOpt.filterRNAstrand' #end if #end if @@ -169,7 +169,7 @@ - + From fdc7e4aebb42f2ff11ae04cc7ae7906c3bda3cdc Mon Sep 17 00:00:00 2001 From: dpryan79 Date: Tue, 16 Feb 2016 16:09:26 +0100 Subject: [PATCH 20/55] Forgot to add the file... --- galaxy/wrapper/test-data/bamCoverage_result5.bw | Bin 0 -> 2892 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 galaxy/wrapper/test-data/bamCoverage_result5.bw diff --git a/galaxy/wrapper/test-data/bamCoverage_result5.bw b/galaxy/wrapper/test-data/bamCoverage_result5.bw new file mode 100644 index 0000000000000000000000000000000000000000..ab702859ec7728adfc3627542582ce90edc52c01 GIT binary patch literal 2892 zcmds3dsNcd7S_}l^QtrBgYk+ujizIo)~JCgpfxkqD$_KbCea#oe2xz?6cr@RXF4!u zW}0YOM)|VJ|$MoN)67CD+ zdTXNdGj^Z*XleC4wCNi1IsIhky^Bk#{iP*OF`MNAXx3(F{w$rw{oS-RndF$*lY1a; zoAEZYzbKe$*obj%`Fk)+nIc)mb5d#+s#6|6O$>%i6H3mN?Jc*hOn=yU$uso1vZERU-c?mBv@sh2@;nv`5x5lC#G+s*_eaJ3PY`s+CU}Mw4&4gP+z%5uDSEX9R z;kgUvj#ZYdBa^tR8-9CJzADbmvYJsmqP$y~VP8r_mW2@uRyt8P#Uo-U7!7iP5^hfO zP#H8!1)0xJ%91|@PP^~^#18scw6PqYQ?^Q#CFx7rUb`GlJ{9Z%9Yo2W2jDYu3){!4%+Ff9vGOWMm9m1Jc2lwh4Rb5yNH!^E=5TgG$y*aP#Rs8&*qdtcn zABw!pr_&z2r@_0|u^CY2u5=!2qDzvGOJB@acRPboa4@$q+N5He?| zphK_{5cbUqbQ1gA^fJ-M=lyc5k1YJ44^D)Y3Ika88?V>gXVngn^k#u7^LI^?dQ!2O zRJ@q8RCsgU3GOfj?+4Clv#N<@dL+GXzai&fi2fj#OZoLB&jnzfvSYNEN zzjCC?{+Npcq3y`10mm^m6j>Q&f{QjmlEOAt)C?2BsQ?76?>&utD{J2SaCElLj>QWX z!VI{c02fa{OSzl%W&>LKVe=_D#BpPI8e(}Ag@y3A&veJ0#Khv~G3P1NKvF^!E91Pxb)GvY-zTIUSCjt@7@AN@YMz-G>n;SwLb+>4$^ zaG1*08egoU>SWVY*dtmUwmRr#rATmbsjwKtGBacAGLo$X0ZR2wYI~jZ6ZOtf;9MGr z2qmx_M-7ZR;Wdphi5O~~>!T1elFbC#2;LB5wEP?@ZwFkk2P?jwzdfyNyde9)rcWs<8^lY2~0E4M6$goAv-RPho zS0ri z9R&^8z&L0Y57kvq{j|$++G)O2mBSU-_gs936y|339u3XxzfR$XXNIZ}sY5~5Qa;t$ zxw$b~e%6R`SB6DJXz&{P3X_D!vyP0*!1H&+%OpfGSw^o_UR+SpGanysDOSlcD9WOg z`8-5%1e)Y7iFk9yv=LBk9-jzx9ZuROFL6JB{H)OrIR~X&q?%JwdMYcY!iGTnlZ^fsoD-NK`SN!(uj^pMb=$*{tD|?L$-y~eMrYSaVniz zZb@z8ye!c%x1$i%UsWYX7DG$QlT~rb^V5V8^DrYF@NHKv`{*3AC|g-VWYZc z*OfKmW%`Yd$4i^^kxu1o-jiINI4`ONSmvT(tx38nbTjIim-3bSJu z+Ym@~i$+dDDM_(4v;mKiF-^a4yI`I)MtwAZ=t<~XcdALjb-iA}@L=M$E5F6wZ8=4k zqU!21!52~o?stdQ4iHi`B_}DMZ)-!IZ-eXuWL94QjPZwD#Mq8 zI){W!F}B_s<@--z+*HF&kp*o!8v7I96wJTdCa3H}O0xA2WHZE62MziiGCdCJRg&wk zbx@a#ZExZW?dS$p!@V#!+vlqf*FpskUL4BZa(t)$-jZG8*`G7%AANfi8sk5D!|SIb zz$dN0?BAg#J7>th2smSE&}@~#c+Y+tcjjtefvQIVoA`*ybyS76idITXw?_;yAfk;o z9_6mCM8>mV!M(+{kLBy8u`#B0LpSfiu z?QL?IiM^Ud?(-9d>^&dBeQIUz3Tg8zh(D8z-?p_uw&xsd23i?>y;`zy)~>K13jne= z$&;^Hw771}Pk~n#z7#ic_wWzMNM~e@ztEFJtgkPrUmYd>6M=u@ip3L__hpo;Up?3r zw{~OEJ;sueUG&yeRbTW)>ORk^joA~Pc2Qd~8<7+P&n>`bJ^OKKz@9^+f{LibHAd}k z6S;r}3hH!KaFZm->k1Hle5{}%JVj7p*A?eQI^BAVRZlmcD@zZm9cH{dYfv0Qrn#^M zw^ye+Aum8(=Pl#gJ6wFnC`*2_khK>@3uWf2di`3u_3Mh8R80Gno9YQRuIpa(?cpZ~|L|e@BEU|~!XCGTf~wjj!1R9k&hLe4dtdnp)dh}J*=>aMQk_>v zQU=HC7x<1D%IOiPs=l&wXti?Wz?ZXkn5`8&LB;LaD8xp}qbaT+n|Bq^ZKCLKNeJla p=4_Pnp5&=l^&%qY@c--RY3cqsR{uSEFF(9@ZMOWuvD3Vx{{_00Ax;1Q literal 0 HcmV?d00001 From 490c883bbc439c0e178e45c611802900c01af702 Mon Sep 17 00:00:00 2001 From: dpryan79 Date: Wed, 17 Feb 2016 14:19:33 +0100 Subject: [PATCH 21/55] Initial support for unscaled 5 and 3 prime regions in computeMatrix. This is still unsupported in plotProfile and plotHeatmap --- deeptools/computeMatrix.py | 17 ++++++++++++++- deeptools/heatmapper.py | 44 +++++++++++++++++++++++++++----------- 2 files changed, 47 insertions(+), 14 deletions(-) diff --git a/deeptools/computeMatrix.py b/deeptools/computeMatrix.py index 3ff40a899..c98f2489f 100644 --- a/deeptools/computeMatrix.py +++ b/deeptools/computeMatrix.py @@ -176,6 +176,17 @@ def computeMatrixOptArgs(case=['scale-regions', 'reference-point'][0]): 'of the given regions. If the ' 'regions are genes, this would be the distance ' 'downstream of the transcription end site.') + optional.add_argument("--unscaled5prime", + default=0, + type=int, + help='Number of bases at the 5-prime end of the ' + 'region to exclude from scaling. By default, ' + 'each region is scaled to a given length (see the --regionBodyLength option). In some cases it is useful to look at unscaled signals around region boundaries, so this setting specifies the number of unscaled bases on the 5-prime end of each boundary.') + optional.add_argument("--unscaled3prime", + default=0, + type=int, + help='Like --unscaled3prime, but for the 3-prime ' + 'end.') elif case == 'reference-point': optional.add_argument('--referencePoint', @@ -191,6 +202,8 @@ def computeMatrixOptArgs(case=['scale-regions', 'reference-point'][0]): # set region body length to zero for reference point mode optional.add_argument('--regionBodyLength', help=argparse.SUPPRESS, default=0, type=int) + optional.add_argument('--unscaled5prime', default=0, type=int, help=argparse.SUPPRESS) + optional.add_argument('--unscaled3prime', default=0, type=int, help=argparse.SUPPRESS) optional.add_argument('--beforeRegionStartLength', '-b', '--upstream', default=500, type=int, @@ -360,7 +373,9 @@ def main(args=None): 'nan after end': args.nanAfterEnd, 'proc number': args.numberOfProcessors, 'sort regions': args.sortRegions, - 'sort using': args.sortUsing + 'sort using': args.sortUsing, + 'unscaled 5 prime': args.unscaled5prime, + 'unscaled 3 prime': args.unscaled3prime } hm = heatmapper.heatmapper() diff --git a/deeptools/heatmapper.py b/deeptools/heatmapper.py index fb9b9139c..198171ce3 100644 --- a/deeptools/heatmapper.py +++ b/deeptools/heatmapper.py @@ -54,6 +54,14 @@ def computeMatrix(self, score_file_list, regions_file, parameters, verbose=False exit("Length of region before the body has to be a multiple of " "--binSize\nCurrent value is {}\n".format(parameters['upstream'])) + if parameters['unscaled 5 prime'] % parameters['bin size'] > 0: + exit("Length of the unscaled 5 prime region has to be a multiple of " + "--binSize\nCurrent value is {}\n".format(parameters['unscaled 5 prime'])) + + if parameters['unscaled 3 prime'] % parameters['bin size'] > 0: + exit("Length of the unscaled 5 prime region has to be a multiple of " + "--binSize\nCurrent value is {}\n".format(parameters['unscaled 3 prime'])) + regions, group_labels = self.get_regions_and_groups(regions_file, verbose=verbose) # args to pass to the multiprocessing workers @@ -199,6 +207,7 @@ def compute_sub_matrix_worker(score_file_list, regions, # given by the user, times the number of score files matrix_cols = len(score_file_list) * \ ((parameters['downstream'] + + parameters['unscaled 5 prime'] + parameters['unscaled 3 prime'] + parameters['upstream'] + parameters['body']) / parameters['bin size']) @@ -212,38 +221,43 @@ def compute_sub_matrix_worker(score_file_list, regions, for feature in regions: # print some information if parameters['body'] > 0 and \ - feature.end - feature.start < parameters['bin size']: + feature.end-feature.start-(parameters['unscaled 5 prime'] + parameters['unscaled 3 prime']) < parameters['bin size']: if parameters['verbose']: - sys.stderr.write("A region that is shorter than " - "then bin size was found: " + sys.stderr.write("A region that is shorter than the bin size (possibly only after accounting for unscaled regions) was found: " "({}) {} {}:{}:{}. Skipping...\n".format((feature.end - feature.start), feature.name, feature.chrom, feature.start, feature.end)) - coverage = np.zeros(matrix_cols) coverage[:] = np.nan - else: if feature.strand == '-': a = parameters['upstream'] / parameters['bin size'] b = parameters['downstream'] / parameters['bin size'] - start = feature.end - end = feature['start'] + d = parameters['unscaled 5 prime'] / parameters['bin size'] + c = parameters['unscaled 3 prime'] / parameters['bin size'] + start = feature.end - parameters['unscaled 5 prime'] + end = feature['start'] + parameters['unscaled 3 prime'] else: b = parameters['upstream'] / parameters['bin size'] a = parameters['downstream'] / parameters['bin size'] - start = feature['start'] - end = feature.end + c = parameters['unscaled 5 prime'] / parameters['bin size'] + d = parameters['unscaled 3 prime'] / parameters['bin size'] + start = feature['start'] + parameters['unscaled 5 prime'] + end = feature.end - parameters['unscaled 3 prime'] # build zones: # zone0: region before the region start, - # zone1: the body of the region (not always present) - # zone2: the region from the end of the region downstream + # zone1: unscaled 5 prime region + # zone2: the body of the region (not always present) + # zone3: unscaled 3 prime region + # zone4: the region from the end of the region downstream # the format for each zone is: start, end, number of bins if parameters['body'] > 0: zones = \ [(feature.start - b * parameters['bin size'], feature.start, b), + (feature.start, feature.start + c * parameters['bin size'], c), (feature.start, feature.end, parameters['body'] / parameters['bin size']), + (feature.end - d * parameters['bin size'], feature.end, d), (feature.end, feature.end + a * parameters['bin size'], a)] elif parameters['ref point'] == 'TES': # around TES zones = [(end - b * parameters['bin size'], end, b), @@ -483,8 +497,10 @@ def coverage_from_big_wig(bigwig, chrom, zones, binSize, avgType, nansAsZeros=Fa no regions are skipped. zones: array as follows zone0: region before the region start, - zone1: the body of the region (not always present) - zone2: the region from the end of the region downstream + zone1: 5' unscaled region (if present) + zone2: the body of the region (not always present) + zone3: 3' unscaled region (if present) + zone4: the region from the end of the region downstream each zone is a tuple containing start, end, and number of bins @@ -665,6 +681,8 @@ def save_tabulated_values(self, file_handle, reference_point_label='TSS', start_ w = self.parameters['bin size'] b = self.parameters['upstream'] a = self.parameters['downstream'] + c = self.parameters['unscaled 5 prime'] + d = self.parameters['unscaled 3 prime'] m = self.parameters['body'] if b < 1e5: From 0031306b49abdaa3dc8c4635db7d5050f6fdfb8b Mon Sep 17 00:00:00 2001 From: dpryan79 Date: Wed, 17 Feb 2016 15:59:12 +0100 Subject: [PATCH 22/55] Fix labels in plotProfile and add support to plotHeatmap --- deeptools/heatmapper.py | 12 ++++++----- deeptools/heatmapper_utilities.py | 26 +++++++++++++++++++---- deeptools/plotHeatmap.py | 35 ++++++++++++++++++++++++------- 3 files changed, 56 insertions(+), 17 deletions(-) diff --git a/deeptools/heatmapper.py b/deeptools/heatmapper.py index 198171ce3..8e04b7f04 100644 --- a/deeptools/heatmapper.py +++ b/deeptools/heatmapper.py @@ -221,10 +221,10 @@ def compute_sub_matrix_worker(score_file_list, regions, for feature in regions: # print some information if parameters['body'] > 0 and \ - feature.end-feature.start-(parameters['unscaled 5 prime'] + parameters['unscaled 3 prime']) < parameters['bin size']: + feature.end-feature.start-parameters['unscaled 5 prime']-parameters['unscaled 3 prime'] < parameters['bin size']: if parameters['verbose']: sys.stderr.write("A region that is shorter than the bin size (possibly only after accounting for unscaled regions) was found: " - "({}) {} {}:{}:{}. Skipping...\n".format((feature.end - feature.start), + "({}) {} {}:{}:{}. Skipping...\n".format((feature.end - feature.start - parameters['unscaled 5 prime'] - parameters['unscaled 3 prime']), feature.name, feature.chrom, feature.start, feature.end)) coverage = np.zeros(matrix_cols) @@ -745,11 +745,13 @@ def save_matrix_values(self, file_name): groups_len[i])) fh.write("#{}\n".format("\t".join(info))) # add to header the x axis values - fh.write("#downstream:{}\tupstream:{}\tbody:{}\tbin size:{}\n".format( + fh.write("#downstream:{}\tupstream:{}\tbody:{}\tbin size:{}\tunscaled 5 prime:{}\tunscaled 3 prime:{}\n".format( self.parameters['downstream'], self.parameters['upstream'], self.parameters['body'], - self.parameters['bin size'])) + self.parameters['bin size'], + self.parameters['unscaled 5 prime'], + self.parameters['unscaled 3 prime'])) fh.close() # reopen again using append mode @@ -922,7 +924,7 @@ def get_num_individual_matrix_cols(self): of smaller matrices that are merged one after the other. """ - matrixCols = ((self.parameters['downstream'] + self.parameters['upstream'] + self.parameters['body']) / + matrixCols = ((self.parameters['downstream'] + self.parameters['upstream'] + self.parameters['body'] + self.parameters['unscaled 5 prime'] + self.parameters['unscaled 3 prime']) / self.parameters['bin size']) return matrixCols diff --git a/deeptools/heatmapper_utilities.py b/deeptools/heatmapper_utilities.py index 393ba1585..872e98b16 100644 --- a/deeptools/heatmapper_utilities.py +++ b/deeptools/heatmapper_utilities.py @@ -87,6 +87,14 @@ def getProfileTicks(hm, referencePointLabel, startLabel, endLabel): w = hm.parameters['bin size'] b = hm.parameters['upstream'] a = hm.parameters['downstream'] + try: + c = hm.parameters['unscaled 5 prime'] + except: + c = 0 + try: + d = hm.parameters['unscaled 3 prime'] + except: + d = 0 m = hm.parameters['body'] tickPlotAdj = 0.5 @@ -108,17 +116,27 @@ def getProfileTicks(hm, referencePointLabel, startLabel, endLabel): xtickslabel = [] # only if upstream region is set, add a x tick - if hm.parameters['upstream'] > 0: + if b > 0: xticks_values.append(b) xtickslabel.append('{0:.1f}'.format(-(float(b) / quotient))) + xtickslabel.append(startLabel) + # set the x tick for the body parameter, regardless if # upstream is 0 (not set) - xticks_values.append(b + m) - xtickslabel.append(startLabel) + if c > 0: + xticks_values.append(b + c) + xtickslabel.append("") + + if d > 0: + xticks_values.append(b + c + m) + xtickslabel.append("") + + xticks_values.append(b + c + m + d) xtickslabel.append(endLabel) + if a > 0: - xticks_values.append(b + m + a) + xticks_values.append(b + c + m + d + a) xtickslabel.append('{0:.1f}{1}'.format(float(a) / quotient, symbol)) xticks = [(k / w) - tickPlotAdj for k in xticks_values] diff --git a/deeptools/plotHeatmap.py b/deeptools/plotHeatmap.py index a964e7409..ed8d98a61 100644 --- a/deeptools/plotHeatmap.py +++ b/deeptools/plotHeatmap.py @@ -67,6 +67,14 @@ def get_heatmap_ticks(hm, reference_point_label, startLabel, endLabel): w = hm.parameters['bin size'] b = hm.parameters['upstream'] a = hm.parameters['downstream'] + try: + c = hm.parameters['unscaled 5 prime'] + except: + c = 0 + try: + d = hm.parameters['unscaled 3 prime'] + except: + d = 0 m = hm.parameters['body'] if b < 1e5: @@ -86,17 +94,28 @@ def get_heatmap_ticks(hm, reference_point_label, startLabel, endLabel): xticks_label = [] # only if upstream region is set, add a x tick - if hm.parameters['upstream'] > 0: + if b > 0: xticks_values.append(b) xticks_label.append('{0:.1f}'.format(-(float(b) / quotient))) + xticks_label.append(startLabel) + + # 5 prime unscaled tick/label, if needed + if c > 0: + xticks_values.append(b + c) + xticks_label.append('') + # set the x tick for the body parameter, regardless if # upstream is 0 (not set) - xticks_values.append(b + m) - xticks_label.append(startLabel) + if d > 0: + xticks_values.append(b + c + m) + xticks_label.append('') + + xticks_values.append(b + c + m + d) xticks_label.append(endLabel) + if a > 0: - xticks_values.append(b + m + a) + xticks_values.append(b + c + m + d + a) xticks_label.append('{0:.1f}{1}'.format(float(a) / quotient, symbol)) xticks = [k / w for k in xticks_values] @@ -219,10 +238,10 @@ def plotMatrix(hm, outFileName, total_figwidth += 1 / 2.54 fig = plt.figure(figsize=(total_figwidth, figheight)) - hm.parameters['upstream'] - hm.parameters['downstream'] - hm.parameters['body'] - hm.parameters['bin size'] + # hm.parameters['upstream'] + # hm.parameters['downstream'] + # hm.parameters['body'] + # hm.parameters['bin size'] xticks, xtickslabel = getProfileTicks(hm, reference_point_label, startLabel, endLabel) From 9497cb7660cca6933b64a01fe209b59edab86cec Mon Sep 17 00:00:00 2001 From: dpryan79 Date: Wed, 17 Feb 2016 16:20:46 +0100 Subject: [PATCH 23/55] PEP8 and start fixing text output from plotProfile/plotHeatmap --- deeptools/heatmapper.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/deeptools/heatmapper.py b/deeptools/heatmapper.py index 8e04b7f04..7486a544c 100644 --- a/deeptools/heatmapper.py +++ b/deeptools/heatmapper.py @@ -221,7 +221,7 @@ def compute_sub_matrix_worker(score_file_list, regions, for feature in regions: # print some information if parameters['body'] > 0 and \ - feature.end-feature.start-parameters['unscaled 5 prime']-parameters['unscaled 3 prime'] < parameters['bin size']: + feature.end - feature.start - parameters['unscaled 5 prime'] - parameters['unscaled 3 prime'] < parameters['bin size']: if parameters['verbose']: sys.stderr.write("A region that is shorter than the bin size (possibly only after accounting for unscaled regions) was found: " "({}) {} {}:{}:{}. Skipping...\n".format((feature.end - feature.start - parameters['unscaled 5 prime'] - parameters['unscaled 3 prime']), @@ -242,7 +242,7 @@ def compute_sub_matrix_worker(score_file_list, regions, a = parameters['downstream'] / parameters['bin size'] c = parameters['unscaled 5 prime'] / parameters['bin size'] d = parameters['unscaled 3 prime'] / parameters['bin size'] - start = feature['start'] + parameters['unscaled 5 prime'] + start = feature['start'] + parameters['unscaled 5 prime'] end = feature.end - parameters['unscaled 3 prime'] # build zones: @@ -702,17 +702,25 @@ def save_tabulated_values(self, file_handle, reference_point_label='TSS', start_ xtickslabel = [] # only if upstream region is set, add a x tick - if self.parameters['upstream'] > 0: + if b > 0: xticks_values.append(b) xtickslabel.append('{0:.1f}{1}'.format(-(float(b) / quotient), symbol)) - # set the x tick for the body parameter, regardless if - # upstream is 0 (not set) - xticks_values.append(b + m) xtickslabel.append(start_label) + + if c > 0: + xticks_values.append(b + c) + xtickslabel.append("") + + if d > 0: + xticks_values.append(b + c + m) + xtickslabel.append("") + + xticks_values.append(b + c + m + d) xtickslabel.append(end_label) + if a > 0: - xticks_values.append(b + m + a) + xticks_values.append(b + c + m + d + a) xtickslabel.append('{0:.1f}{1}'.format(float(a) / quotient, symbol)) xticks = [(k / w) for k in xticks_values] From 07be2214b028779624d26c1d8760ea78beb47c89 Mon Sep 17 00:00:00 2001 From: dpryan79 Date: Thu, 18 Feb 2016 10:42:35 +0100 Subject: [PATCH 24/55] Update nosetests --- deeptools/test/test_heatmapper/master.mat | 2 +- .../test/test_heatmapper/master_extend_beyond_chr_size.mat | 2 +- deeptools/test/test_heatmapper/master_multibed.mat | 2 +- deeptools/test/test_heatmapper/master_nan_to_zero.mat | 2 +- deeptools/test/test_heatmapper/master_scale_reg.mat | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/deeptools/test/test_heatmapper/master.mat b/deeptools/test/test_heatmapper/master.mat index 4a78d9957..c7f15c40d 100644 --- a/deeptools/test/test_heatmapper/master.mat +++ b/deeptools/test/test_heatmapper/master.mat @@ -1,4 +1,4 @@ -@{"body":0,"sample_labels":["test"],"scale":1,"verbose":true,"downstream":100,"nan after end":false,"missing data as zero":false,"skip zeros":false,"ref point":"TSS","group_labels":["Group 1","Group 2"],"min threshold":null,"sort using":"mean","sort regions":"no","bin size":1,"upstream":100,"proc number":1,"bin avg type":"mean","sample_boundaries":[0,200],"max threshold":null,"group_boundaries":[0,3,6]} +@{"verbose":true,"scale":1,"skip zeros":false,"nan after end":false,"sort using":"mean","unscaled 5 prime":0,"body":0,"sample_labels":["test"],"downstream":100,"unscaled 3 prime":0,"group_labels":["Group 1","Group 2"],"bin size":1,"upstream":100,"group_boundaries":[0,3,6],"sample_boundaries":[0,200],"missing data as zero":false,"ref point":"TSS","min threshold":null,"sort regions":"no","proc number":1,"bin avg type":"mean","max threshold":null} ch1 100 150 CG11023 0.0 + 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ch2 150 175 cda5 0.0 - 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ch3 100 125 cda8 0.0 + 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 diff --git a/deeptools/test/test_heatmapper/master_extend_beyond_chr_size.mat b/deeptools/test/test_heatmapper/master_extend_beyond_chr_size.mat index e1a401f1a..7fd48a60f 100644 --- a/deeptools/test/test_heatmapper/master_extend_beyond_chr_size.mat +++ b/deeptools/test/test_heatmapper/master_extend_beyond_chr_size.mat @@ -1,4 +1,4 @@ -@{"body":0,"sample_labels":["test"],"scale":1,"verbose":true,"downstream":500,"nan after end":false,"missing data as zero":false,"skip zeros":false,"ref point":"TSS","group_labels":["group1","group2"],"min threshold":null,"sort using":"mean","sort regions":"no","bin size":1,"upstream":100,"proc number":1,"bin avg type":"mean","sample_boundaries":[0,600],"max threshold":null,"group_boundaries":[0,3,6]} +@{"verbose":true,"scale":1,"skip zeros":false,"nan after end":false,"sort using":"mean","unscaled 5 prime":0,"body":0,"sample_labels":["test"],"downstream":500,"unscaled 3 prime":0,"group_labels":["group1","group2"],"bin size":1,"upstream":100,"group_boundaries":[0,3,6],"sample_boundaries":[0,600],"missing data as zero":false,"ref point":"TSS","min threshold":null,"sort regions":"no","proc number":1,"bin avg type":"mean","max threshold":null} ch1 100 150 CG11023 0.0 + 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan ch2 150 175 cda5 0.0 - 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan ch3 100 125 cda8 0.0 + 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan diff --git a/deeptools/test/test_heatmapper/master_multibed.mat b/deeptools/test/test_heatmapper/master_multibed.mat index 9a6e311b9..d4d85e46e 100644 --- a/deeptools/test/test_heatmapper/master_multibed.mat +++ b/deeptools/test/test_heatmapper/master_multibed.mat @@ -1,4 +1,4 @@ -@{"body":0,"sample_labels":["test"],"scale":1,"verbose":true,"downstream":100,"nan after end":false,"missing data as zero":false,"skip zeros":false,"ref point":"TSS","group_labels":["group1","group2"],"min threshold":null,"sort using":"mean","sort regions":"no","bin size":1,"upstream":100,"proc number":1,"bin avg type":"mean","sample_boundaries":[0,200],"max threshold":null,"group_boundaries":[0,3,6]} +@{"verbose":true,"scale":1,"skip zeros":false,"nan after end":false,"sort using":"mean","unscaled 5 prime":0,"body":0,"sample_labels":["test"],"downstream":100,"unscaled 3 prime":0,"group_labels":["group1","group2"],"bin size":1,"upstream":100,"group_boundaries":[0,3,6],"sample_boundaries":[0,200],"missing data as zero":false,"ref point":"TSS","min threshold":null,"sort regions":"no","proc number":1,"bin avg type":"mean","max threshold":null} ch1 100 150 CG11023 0.0 + 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ch2 150 175 cda5 0.0 - 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ch3 100 125 cda8 0.0 + 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 diff --git a/deeptools/test/test_heatmapper/master_nan_to_zero.mat b/deeptools/test/test_heatmapper/master_nan_to_zero.mat index 276233d24..62d14e509 100644 --- a/deeptools/test/test_heatmapper/master_nan_to_zero.mat +++ b/deeptools/test/test_heatmapper/master_nan_to_zero.mat @@ -1,4 +1,4 @@ -@{"body":0,"sample_labels":["test"],"scale":1,"verbose":true,"downstream":100,"nan after end":false,"missing data as zero":true,"skip zeros":false,"ref point":"TSS","group_labels":["Group 1","Group 2"],"min threshold":null,"sort using":"mean","sort regions":"no","bin size":1,"upstream":100,"proc number":1,"bin avg type":"mean","sample_boundaries":[0,200],"max threshold":null,"group_boundaries":[0,3,6]} +@{"verbose":true,"scale":1,"skip zeros":false,"nan after end":false,"sort using":"mean","unscaled 5 prime":0,"body":0,"sample_labels":["test"],"downstream":100,"unscaled 3 prime":0,"group_labels":["Group 1","Group 2"],"bin size":1,"upstream":100,"group_boundaries":[0,3,6],"sample_boundaries":[0,200],"missing data as zero":true,"ref point":"TSS","min threshold":null,"sort regions":"no","proc number":1,"bin avg type":"mean","max threshold":null} ch1 100 150 CG11023 0.0 + 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ch2 150 175 cda5 0.0 - 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ch3 100 125 cda8 0.0 + 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 diff --git a/deeptools/test/test_heatmapper/master_scale_reg.mat b/deeptools/test/test_heatmapper/master_scale_reg.mat index 4f9290697..c0e71f6b8 100644 --- a/deeptools/test/test_heatmapper/master_scale_reg.mat +++ b/deeptools/test/test_heatmapper/master_scale_reg.mat @@ -1,4 +1,4 @@ -@{"body":100,"sample_labels":["test"],"scale":1,"verbose":true,"downstream":100,"nan after end":false,"missing data as zero":false,"skip zeros":false,"ref point":null,"group_labels":["Group 1","Group 2"],"min threshold":null,"sort using":"mean","sort regions":"no","bin size":10,"upstream":100,"proc number":1,"bin avg type":"mean","sample_boundaries":[0,30],"max threshold":null,"group_boundaries":[0,3,6]} +@{"verbose":true,"scale":1,"skip zeros":false,"nan after end":false,"sort using":"mean","unscaled 5 prime":0,"body":100,"sample_labels":["test"],"downstream":100,"unscaled 3 prime":0,"group_labels":["Group 1","Group 2"],"bin size":10,"upstream":100,"group_boundaries":[0,3,6],"sample_boundaries":[0,30],"missing data as zero":false,"ref point":null,"min threshold":null,"sort regions":"no","proc number":1,"bin avg type":"mean","max threshold":null} ch1 100 150 CG11023 0.0 + 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ch2 150 175 cda5 0.0 - 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.500000 3.000000 3.000000 ch3 100 125 cda8 0.0 + 0.000000 0.000000 0.000000 0.000000 0.000000 3.000000 3.000000 1.500000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.500000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 From b72de6fd0f0e151782267fffc68971df9aa998a1 Mon Sep 17 00:00:00 2001 From: dpryan79 Date: Thu, 18 Feb 2016 14:10:13 +0100 Subject: [PATCH 25/55] Fix a bug in the unscaled processing and add a test for it --- deeptools/heatmapper.py | 2 +- deeptools/test/test_heatmapper.py | 8 ++++++++ .../test/test_heatmapper/master_unscaled.mat | 2 ++ deeptools/test/test_heatmapper/unscaled.bed | 1 + deeptools/test/test_heatmapper/unscaled.bigWig | Bin 0 -> 689 bytes 5 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 deeptools/test/test_heatmapper/master_unscaled.mat create mode 100644 deeptools/test/test_heatmapper/unscaled.bed create mode 100644 deeptools/test/test_heatmapper/unscaled.bigWig diff --git a/deeptools/heatmapper.py b/deeptools/heatmapper.py index 7486a544c..b5718278f 100644 --- a/deeptools/heatmapper.py +++ b/deeptools/heatmapper.py @@ -256,7 +256,7 @@ def compute_sub_matrix_worker(score_file_list, regions, zones = \ [(feature.start - b * parameters['bin size'], feature.start, b), (feature.start, feature.start + c * parameters['bin size'], c), - (feature.start, feature.end, parameters['body'] / parameters['bin size']), + (feature.start + c * parameters['bin size'], feature.end - d * parameters['bin size'], parameters['body'] / parameters['bin size']), (feature.end - d * parameters['bin size'], feature.end, d), (feature.end, feature.end + a * parameters['bin size'], a)] elif parameters['ref point'] == 'TES': # around TES diff --git a/deeptools/test/test_heatmapper.py b/deeptools/test/test_heatmapper.py index ca77daae1..29b953aae 100644 --- a/deeptools/test/test_heatmapper.py +++ b/deeptools/test/test_heatmapper.py @@ -67,6 +67,14 @@ def test_computeMatrix_region_extend_over_chr_end(self): assert filecmp.cmp(ROOT + '/master_extend_beyond_chr_size.mat', '/tmp/_test.mat') is True os.remove('/tmp/_test.mat') + def test_computeMatrix_unscaled(self): + args = "scale-regions -S {0}/unscaled.bigWig -R {0}/unscaled.bed -a 300 -b 500 --unscaled5prime 100 --unscaled3prime 50 " \ + "--outFileName /tmp/_test.mat.gz -bs 10 -p 1".format(ROOT).split() + deeptools.computeMatrix.main(args) + os.system('gunzip -f /tmp/_test.mat.gz') + assert filecmp.cmp(ROOT + '/master_unscaled.mat', '/tmp/_test.mat') is True + #os.remove('/tmp/_test.mat') + def test_plotHeatmap_simple_plot(self): """ Test a simple plot generated using a matrix from diff --git a/deeptools/test/test_heatmapper/master_unscaled.mat b/deeptools/test/test_heatmapper/master_unscaled.mat new file mode 100644 index 000000000..2b94c0d80 --- /dev/null +++ b/deeptools/test/test_heatmapper/master_unscaled.mat @@ -0,0 +1,2 @@ +@{"verbose":true,"scale":1,"skip zeros":false,"nan after end":false,"sort using":"mean","unscaled 5 prime":100,"body":1000,"sample_labels":["unscaled"],"downstream":300,"unscaled 3 prime":50,"group_labels":["genes"],"bin size":10,"upstream":500,"group_boundaries":[0,1],"sample_boundaries":[0,195],"missing data as zero":false,"ref point":null,"min threshold":null,"sort regions":"no","proc number":1,"bin avg type":"mean","max threshold":null} +1 500 1650 . foo . 1.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 4.000000 4.000000 4.000000 4.000000 4.000000 5.000000 5.000000 5.000000 5.000000 5.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 8.000000 8.000000 9.000000 9.000000 9.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 11.000000 11.000000 11.000000 11.000000 11.000000 11.000000 11.000000 11.000000 11.000000 11.000000 diff --git a/deeptools/test/test_heatmapper/unscaled.bed b/deeptools/test/test_heatmapper/unscaled.bed new file mode 100644 index 000000000..440c97ce4 --- /dev/null +++ b/deeptools/test/test_heatmapper/unscaled.bed @@ -0,0 +1 @@ +1 500 1650 . foo . diff --git a/deeptools/test/test_heatmapper/unscaled.bigWig b/deeptools/test/test_heatmapper/unscaled.bigWig new file mode 100644 index 0000000000000000000000000000000000000000..6574191526f4dcdc3c1de5c732a6f768324f0fac GIT binary patch literal 689 zcmY%U)8E0uz{n86$N&bdQ2IHPhKNE)1DIF?lBghrZOsIsBcU{siebgv!Hw*l5B3nA zngf{Dx(=or+_@Yl_MEB!1s4c&KxrTa2aG@&Ll6&SAS_%f<|HRHG|1lJDL8O~;bB4J zNw#W%xl9v_7%s+39Q0uqSbK~m>@mY4?XV8k9S>C9!W5m2osVqRZCD!2go($AdK)Dh@!~U^anlh5?WZWS|_FJji*-0HmfD xDinpJ7Up7sGY2jtoIjB8fI~EmfkFQj({^HQWMF|9hs{QNXkwCr+NcIhtN@%yNDKe~ literal 0 HcmV?d00001 From 858a70d4afacc6109a76c61da76a8b7b358f07e3 Mon Sep 17 00:00:00 2001 From: dpryan79 Date: Thu, 18 Feb 2016 14:30:34 +0100 Subject: [PATCH 26/55] --outFileNameData now works for plotHeatmap --- deeptools/plotHeatmap.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deeptools/plotHeatmap.py b/deeptools/plotHeatmap.py index ed8d98a61..a1871da61 100644 --- a/deeptools/plotHeatmap.py +++ b/deeptools/plotHeatmap.py @@ -507,8 +507,8 @@ def main(args=None): if args.outFileNameMatrix: hm.save_matrix_values(args.outFileNameMatrix) - # if args.outFileNameData: - # hm.saveTabulatedValues(args.outFileNameData) + if args.outFileNameData: + hm.save_tabulated_values(args.outFileNameData) if args.outFileSortedRegions: hm.save_BED(args.outFileSortedRegions) From d86c3d1488da538f37bbc5ca2ec36f97aa1b738d Mon Sep 17 00:00:00 2001 From: dpryan79 Date: Thu, 18 Feb 2016 14:34:58 +0100 Subject: [PATCH 27/55] PEP8 --- deeptools/test/test_heatmapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deeptools/test/test_heatmapper.py b/deeptools/test/test_heatmapper.py index 29b953aae..719deb291 100644 --- a/deeptools/test/test_heatmapper.py +++ b/deeptools/test/test_heatmapper.py @@ -73,7 +73,7 @@ def test_computeMatrix_unscaled(self): deeptools.computeMatrix.main(args) os.system('gunzip -f /tmp/_test.mat.gz') assert filecmp.cmp(ROOT + '/master_unscaled.mat', '/tmp/_test.mat') is True - #os.remove('/tmp/_test.mat') + os.remove('/tmp/_test.mat') def test_plotHeatmap_simple_plot(self): """ From 4a565d36276c17ab525a914fb6a8110a3cec87f9 Mon Sep 17 00:00:00 2001 From: dpryan79 Date: Thu, 18 Feb 2016 14:48:45 +0100 Subject: [PATCH 28/55] I hope PEP8 dies in a fire --- deeptools/plotHeatmap.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deeptools/plotHeatmap.py b/deeptools/plotHeatmap.py index a1871da61..0435f51eb 100644 --- a/deeptools/plotHeatmap.py +++ b/deeptools/plotHeatmap.py @@ -508,7 +508,7 @@ def main(args=None): hm.save_matrix_values(args.outFileNameMatrix) if args.outFileNameData: - hm.save_tabulated_values(args.outFileNameData) + hm.save_tabulated_values(args.outFileNameData) if args.outFileSortedRegions: hm.save_BED(args.outFileSortedRegions) From 300435c1ca3a1f31d55cd1c97d6f25c8e57d4167 Mon Sep 17 00:00:00 2001 From: dpryan79 Date: Thu, 18 Feb 2016 15:12:45 +0100 Subject: [PATCH 29/55] Add --unscaled5prime and --unscaled3prime to the Galaxy wrappers --- galaxy/wrapper/computeMatrix.xml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/galaxy/wrapper/computeMatrix.xml b/galaxy/wrapper/computeMatrix.xml index 0abf30546..a03a77f28 100644 --- a/galaxy/wrapper/computeMatrix.xml +++ b/galaxy/wrapper/computeMatrix.xml @@ -52,6 +52,9 @@ --beforeRegionStartLength $mode.regionStartLength.beforeRegionStartLength --afterRegionStartLength $mode.regionStartLength.afterRegionStartLength #end if + + --unscaled5prime $mode.unscaled5prime + --unscaled3prime $mode.unscaled3prime #end if #if $advancedOpt.showAdvancedOpt == "yes": @@ -71,7 +74,6 @@ #if $advancedOpt.scale is not None and str($advancedOpt.scale) != '': --scale $advancedOpt.scale #end if - #end if ]]> @@ -118,6 +120,14 @@ label="Distance downstream of the region end position" help="If the regions are genes, this would be the distance downstream of the transcription end site."/> + + From 060e94eace743bdcadfcc000cf4c0bfad6500d74 Mon Sep 17 00:00:00 2001 From: dpryan79 Date: Fri, 19 Feb 2016 14:28:56 +0100 Subject: [PATCH 30/55] Allow plotFingerprint to iterate through line styles and not just colors, since matplotlib doesn't do this by itself --- deeptools/plotFingerprint.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/deeptools/plotFingerprint.py b/deeptools/plotFingerprint.py index 4ea2cb8ab..519c624c1 100644 --- a/deeptools/plotFingerprint.py +++ b/deeptools/plotFingerprint.py @@ -166,10 +166,12 @@ def main(args=None): x = np.arange(total).astype('float') / total # normalize from 0 to 1 i = 0 + # matplotlib won't iterate through line styles by itself + pyplot_line_styles= sum([7 * ["-"], 7 * ["--"], 7 * ["-."], 7 * [":"], 7 * ["."]], []) for reads in num_reads_per_bin.T: count = np.cumsum(np.sort(reads)) - count = count / count[-1] # to normalyze y from 0 to 1 - plt.plot(x, count, label=args.labels[i]) + count = count / count[-1] # to normalize y from 0 to 1 + plt.plot(x, count, label=args.labels[i], linestyle=pyplot_line_styles[i]) plt.xlabel('rank') plt.ylabel('fraction w.r.t. bin with highest coverage') i += 1 From 0061ab292dbd36721fbf90af65290a7d2af48da9 Mon Sep 17 00:00:00 2001 From: dpryan79 Date: Fri, 19 Feb 2016 14:34:58 +0100 Subject: [PATCH 31/55] PEP8. Anyway, this should fix #80 --- deeptools/plotFingerprint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deeptools/plotFingerprint.py b/deeptools/plotFingerprint.py index 519c624c1..da6037973 100644 --- a/deeptools/plotFingerprint.py +++ b/deeptools/plotFingerprint.py @@ -167,7 +167,7 @@ def main(args=None): i = 0 # matplotlib won't iterate through line styles by itself - pyplot_line_styles= sum([7 * ["-"], 7 * ["--"], 7 * ["-."], 7 * [":"], 7 * ["."]], []) + pyplot_line_styles = sum([7 * ["-"], 7 * ["--"], 7 * ["-."], 7 * [":"], 7 * ["."]], []) for reads in num_reads_per_bin.T: count = np.cumsum(np.sort(reads)) count = count / count[-1] # to normalize y from 0 to 1 From 48b267ad782963c627f5eb023afc96b935fbba50 Mon Sep 17 00:00:00 2001 From: dpryan79 Date: Fri, 19 Feb 2016 16:11:57 +0100 Subject: [PATCH 32/55] Allow --outFileNameData to work for plotHeatmap in Galaxy --- galaxy/wrapper/plotHeatmap.xml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/galaxy/wrapper/plotHeatmap.xml b/galaxy/wrapper/plotHeatmap.xml index d178c9cb1..021d79402 100644 --- a/galaxy/wrapper/plotHeatmap.xml +++ b/galaxy/wrapper/plotHeatmap.xml @@ -83,6 +83,10 @@ --plotTitle '$advancedOpt.plotTitle' #end if + #if $advancedOpt.outFileNameData: + --outFileNameData $outFileNameData + #end if + $advancedOpt.perGroup @KMEANS_CLUSTERING@ @@ -126,6 +130,8 @@ + + + outFileNameData is True + From d25f80ebaa5cbbfeea2a83d72510ebcf38bc2138 Mon Sep 17 00:00:00 2001 From: Devon Ryan Date: Wed, 24 Feb 2016 09:07:15 +0100 Subject: [PATCH 33/55] Ensure that all file handles are closed before python quits --- deeptools/bamPEFragmentSize.py | 1 + deeptools/computeGCBias.py | 1 + deeptools/correlation.py | 3 +++ deeptools/correlation_heatmap.py | 1 + deeptools/heatmapper_utilities.py | 1 + 5 files changed, 7 insertions(+) diff --git a/deeptools/bamPEFragmentSize.py b/deeptools/bamPEFragmentSize.py index 7d6bb2cb1..dc466e923 100644 --- a/deeptools/bamPEFragmentSize.py +++ b/deeptools/bamPEFragmentSize.py @@ -111,6 +111,7 @@ def main(args=None): plt.ylabel('Frequency') plt.title(args.plotTitle) plt.savefig(args.histogram, bbox_inches=0) + plt.close() if __name__ == "__main__": diff --git a/deeptools/computeGCBias.py b/deeptools/computeGCBias.py index 8011cfe2e..3cb6d592d 100644 --- a/deeptools/computeGCBias.py +++ b/deeptools/computeGCBias.py @@ -632,6 +632,7 @@ def plotGCbias(file_name, frequencies, reads_per_gc, region_size, image_format=N ax2.set_xlim(0.2, 0.7) plt.tight_layout() plt.savefig(file_name, bbox_inches='tight', dpi=100, format=image_format) + plt.close() def main(args=None): diff --git a/deeptools/correlation.py b/deeptools/correlation.py index dd9921256..77b55eb46 100644 --- a/deeptools/correlation.py +++ b/deeptools/correlation.py @@ -307,6 +307,7 @@ def plot_correlation(self, plot_fiilename, plot_title='', vmax=None, ha='center', va='center') fig.savefig(plot_fiilename, format=image_format) + fig.close() def plot_scatter(self, plot_fiilename, plot_title='', image_format=None, log1p=False): """ @@ -411,6 +412,7 @@ def plot_scatter(self, plot_fiilename, plot_title='', image_format=None, log1p=F ax.set_xlim(min_value, ax.get_xlim()[1]) plt.savefig(plot_fiilename, format=image_format) + plt.close() def plot_pca(self, plot_filename, plot_title='', image_format=None, log1p=False): """ @@ -468,3 +470,4 @@ def plot_pca(self, plot_filename, plot_title='', image_format=None, log1p=False) plt.subplots_adjust(top=3.85) plt.tight_layout() plt.savefig(plot_filename, format=image_format, bbox_extra_artists=(lgd,), bbox_inches='tight') + plt.close() diff --git a/deeptools/correlation_heatmap.py b/deeptools/correlation_heatmap.py index 000c83776..47e34fc59 100644 --- a/deeptools/correlation_heatmap.py +++ b/deeptools/correlation_heatmap.py @@ -100,3 +100,4 @@ def plot_correlation(corr_matrix, labels, plotFileName, vmax=None, ha='center', va='center') fig.savefig(plotFileName, format=image_format) + fig.close() diff --git a/deeptools/heatmapper_utilities.py b/deeptools/heatmapper_utilities.py index 393ba1585..90f75b5e4 100644 --- a/deeptools/heatmapper_utilities.py +++ b/deeptools/heatmapper_utilities.py @@ -48,6 +48,7 @@ def plot_single(ax, ma, average_type, color, label, plot_type='simple'): >>> ax = plot_single(ax, matrix + 30, 'mean', color=(0.9, 0.5, 0.9, 0.5), label='violet with alpha', plot_type='std') >>> leg = ax.legend() >>> plt.savefig("/tmp/test.pdf") + >>> plt.close() >>> fig = plt.figure() From e3c9fdf7ebcce6b9ab60c117c589204bb1c7fcd5 Mon Sep 17 00:00:00 2001 From: Devon Ryan Date: Wed, 24 Feb 2016 09:30:03 +0100 Subject: [PATCH 34/55] Closed something that has no close() method --- deeptools/correlation.py | 1 - 1 file changed, 1 deletion(-) diff --git a/deeptools/correlation.py b/deeptools/correlation.py index 77b55eb46..31fe13f5d 100644 --- a/deeptools/correlation.py +++ b/deeptools/correlation.py @@ -307,7 +307,6 @@ def plot_correlation(self, plot_fiilename, plot_title='', vmax=None, ha='center', va='center') fig.savefig(plot_fiilename, format=image_format) - fig.close() def plot_scatter(self, plot_fiilename, plot_title='', image_format=None, log1p=False): """ From baaaa07e19cd113d85a2f1e03d37e2d1dcad5070 Mon Sep 17 00:00:00 2001 From: Devon Ryan Date: Wed, 24 Feb 2016 10:07:09 +0100 Subject: [PATCH 35/55] Update test to include the actual eigenvalue --- galaxy/wrapper/test-data/plotPCA_result2.tabular | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/galaxy/wrapper/test-data/plotPCA_result2.tabular b/galaxy/wrapper/test-data/plotPCA_result2.tabular index ccc837744..fcfc743a9 100644 --- a/galaxy/wrapper/test-data/plotPCA_result2.tabular +++ b/galaxy/wrapper/test-data/plotPCA_result2.tabular @@ -1,3 +1,3 @@ Component bowtie2-test1.bam bowtie2-test1.bam Eigenvalue -1 -0.707106781187 -0.707106781187 2.0 +1 -0.707106781187 -0.707106781187 6.0 2 -0.707106781187 0.707106781187 0.0 From 3253ca860deac4d27fa2702c5a468b2a26e730f0 Mon Sep 17 00:00:00 2001 From: Devon Ryan Date: Wed, 24 Feb 2016 10:34:41 +0100 Subject: [PATCH 36/55] Try again --- galaxy/wrapper/plotPCA.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/galaxy/wrapper/plotPCA.xml b/galaxy/wrapper/plotPCA.xml index 13db76011..7737af3d0 100644 --- a/galaxy/wrapper/plotPCA.xml +++ b/galaxy/wrapper/plotPCA.xml @@ -41,7 +41,7 @@ - + From 5f79d2a884f63fa751a881ee3d6464753acdbc40 Mon Sep 17 00:00:00 2001 From: Devon Ryan Date: Wed, 24 Feb 2016 13:03:40 +0100 Subject: [PATCH 37/55] try again --- galaxy/wrapper/plotPCA.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/galaxy/wrapper/plotPCA.xml b/galaxy/wrapper/plotPCA.xml index d4158d06a..2ebbc6703 100644 --- a/galaxy/wrapper/plotPCA.xml +++ b/galaxy/wrapper/plotPCA.xml @@ -41,7 +41,7 @@ - + From 3b1ed9d2fb0f584aa2b60febde2448bc5764cc76 Mon Sep 17 00:00:00 2001 From: Devon Ryan Date: Wed, 24 Feb 2016 13:29:54 +0100 Subject: [PATCH 38/55] This is stupid --- galaxy/wrapper/plotPCA.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/galaxy/wrapper/plotPCA.xml b/galaxy/wrapper/plotPCA.xml index 2ebbc6703..72aa0bdc7 100644 --- a/galaxy/wrapper/plotPCA.xml +++ b/galaxy/wrapper/plotPCA.xml @@ -42,7 +42,7 @@ - + From 011137428f4ba39232c2df5e0dca0af88264a41d Mon Sep 17 00:00:00 2001 From: Devon Ryan Date: Wed, 24 Feb 2016 14:25:16 +0100 Subject: [PATCH 39/55] Ah, it's somehow copying the image to the outFileNameData --- galaxy/wrapper/plotPCA.xml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/galaxy/wrapper/plotPCA.xml b/galaxy/wrapper/plotPCA.xml index 72aa0bdc7..978886832 100644 --- a/galaxy/wrapper/plotPCA.xml +++ b/galaxy/wrapper/plotPCA.xml @@ -40,9 +40,8 @@ - - + From 4418ca527297772361ad8a84ed9b0207a8f0d5c4 Mon Sep 17 00:00:00 2001 From: Devon Ryan Date: Wed, 24 Feb 2016 15:20:56 +0100 Subject: [PATCH 40/55] Maybe this will work --- galaxy/wrapper/plotPCA.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/galaxy/wrapper/plotPCA.xml b/galaxy/wrapper/plotPCA.xml index 978886832..6973faccf 100644 --- a/galaxy/wrapper/plotPCA.xml +++ b/galaxy/wrapper/plotPCA.xml @@ -39,9 +39,9 @@ + - From aa40c76c5006ae29374cb1f520498199dd86dc2c Mon Sep 17 00:00:00 2001 From: Devon Ryan Date: Wed, 24 Feb 2016 15:26:49 +0100 Subject: [PATCH 41/55] Ah, I forgot the param in the wrapper --- galaxy/wrapper/plotPCA.xml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/galaxy/wrapper/plotPCA.xml b/galaxy/wrapper/plotPCA.xml index 6973faccf..4d89e87c3 100644 --- a/galaxy/wrapper/plotPCA.xml +++ b/galaxy/wrapper/plotPCA.xml @@ -39,9 +39,10 @@ - + + From 4d92a51c7deb452cf1d9294fcf8b7bab96a25160 Mon Sep 17 00:00:00 2001 From: Devon Ryan Date: Wed, 24 Feb 2016 16:24:41 +0100 Subject: [PATCH 42/55] Minor fixes --- deeptools/plotHeatmap.py | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/deeptools/plotHeatmap.py b/deeptools/plotHeatmap.py index 38d2e582d..a0d569048 100644 --- a/deeptools/plotHeatmap.py +++ b/deeptools/plotHeatmap.py @@ -67,14 +67,8 @@ def get_heatmap_ticks(hm, reference_point_label, startLabel, endLabel): w = hm.parameters['bin size'] b = hm.parameters['upstream'] a = hm.parameters['downstream'] - try: - c = hm.parameters['unscaled 5 prime'] - except: - c = 0 - try: - d = hm.parameters['unscaled 3 prime'] - except: - d = 0 + c = hm.parameters.get('unscaled 5 prime',0) + d = hm.parameters.get('unscaled 3 prime',0) m = hm.parameters['body'] if b < 1e5: @@ -238,11 +232,6 @@ def plotMatrix(hm, outFileName, total_figwidth += 1 / 2.54 fig = plt.figure(figsize=(total_figwidth, figheight)) - # hm.parameters['upstream'] - # hm.parameters['downstream'] - # hm.parameters['body'] - # hm.parameters['bin size'] - xticks, xtickslabel = getProfileTicks(hm, reference_point_label, startLabel, endLabel) xticks_heat, xtickslabel_heat = get_heatmap_ticks(hm, reference_point_label, startLabel, endLabel) From e539bb0d56ee809cb8c518217c0c9b2e3c89597f Mon Sep 17 00:00:00 2001 From: Devon Ryan Date: Wed, 24 Feb 2016 19:44:32 +0100 Subject: [PATCH 43/55] I hate you so much PEP8 --- deeptools/plotHeatmap.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deeptools/plotHeatmap.py b/deeptools/plotHeatmap.py index a0d569048..41caa25c9 100644 --- a/deeptools/plotHeatmap.py +++ b/deeptools/plotHeatmap.py @@ -67,8 +67,8 @@ def get_heatmap_ticks(hm, reference_point_label, startLabel, endLabel): w = hm.parameters['bin size'] b = hm.parameters['upstream'] a = hm.parameters['downstream'] - c = hm.parameters.get('unscaled 5 prime',0) - d = hm.parameters.get('unscaled 3 prime',0) + c = hm.parameters.get('unscaled 5 prime', 0) + d = hm.parameters.get('unscaled 3 prime', 0) m = hm.parameters['body'] if b < 1e5: From b35a9752a0b1e7c864ac13fb67b5263ed2ba5bd6 Mon Sep 17 00:00:00 2001 From: Devon Ryan Date: Wed, 24 Feb 2016 20:28:44 +0100 Subject: [PATCH 44/55] close the file handle --- deeptools/plotPCA.py | 1 + 1 file changed, 1 insertion(+) diff --git a/deeptools/plotPCA.py b/deeptools/plotPCA.py index a19335eff..2c45606be 100644 --- a/deeptools/plotPCA.py +++ b/deeptools/plotPCA.py @@ -107,6 +107,7 @@ def main(args=None): for v in mlab_pca.Wt[i, :]: of.write("\t{}".format(v)) of.write("\t{}\n".format(mlab_pca.s[i])) + of.close() if __name__ == "__main__": From 52968df9deeee92051943599d857ab9a6d10ea8d Mon Sep 17 00:00:00 2001 From: Devon Ryan Date: Wed, 24 Feb 2016 23:35:17 +0100 Subject: [PATCH 45/55] Copy over changes --- deeptools/plotPCA.py | 2 +- galaxy/wrapper/plotPCA.xml | 10 +++++----- galaxy/wrapper/test-data/plotPCA_result2.tabular | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/deeptools/plotPCA.py b/deeptools/plotPCA.py index 2c45606be..58c3ecea1 100644 --- a/deeptools/plotPCA.py +++ b/deeptools/plotPCA.py @@ -107,7 +107,7 @@ def main(args=None): for v in mlab_pca.Wt[i, :]: of.write("\t{}".format(v)) of.write("\t{}\n".format(mlab_pca.s[i])) - of.close() + args.outFileNameData.close() if __name__ == "__main__": diff --git a/galaxy/wrapper/plotPCA.xml b/galaxy/wrapper/plotPCA.xml index 4d89e87c3..3e997d4ea 100644 --- a/galaxy/wrapper/plotPCA.xml +++ b/galaxy/wrapper/plotPCA.xml @@ -12,8 +12,8 @@ --plotTitle "$plotTitle" --plotFile "$outFileName" --plotFileFormat "$outFileFormat" - #if $outFileNameData - --outFileNameData "$matrix" + #if $outFileNameDataBool + --outFileNameData "$outFileNameData" #end if ]]> @@ -25,8 +25,8 @@ - - outFileNameData is True + + outFileNameDataBool is true @@ -40,7 +40,7 @@ - + diff --git a/galaxy/wrapper/test-data/plotPCA_result2.tabular b/galaxy/wrapper/test-data/plotPCA_result2.tabular index fcfc743a9..d6ac0ce08 100644 --- a/galaxy/wrapper/test-data/plotPCA_result2.tabular +++ b/galaxy/wrapper/test-data/plotPCA_result2.tabular @@ -1,3 +1,3 @@ Component bowtie2-test1.bam bowtie2-test1.bam Eigenvalue 1 -0.707106781187 -0.707106781187 6.0 -2 -0.707106781187 0.707106781187 0.0 +2 -0.707106781187 0.707106781187 1.23259516441e-32 From 4a371d53824390309a0553de955381e0f91e9d1e Mon Sep 17 00:00:00 2001 From: Devon Ryan Date: Wed, 24 Feb 2016 23:55:46 +0100 Subject: [PATCH 46/55] Maybe we need True rather than true --- galaxy/wrapper/plotPCA.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/galaxy/wrapper/plotPCA.xml b/galaxy/wrapper/plotPCA.xml index 3e997d4ea..e856fbb21 100644 --- a/galaxy/wrapper/plotPCA.xml +++ b/galaxy/wrapper/plotPCA.xml @@ -26,7 +26,7 @@ - outFileNameDataBool is true + outFileNameDataBool is True @@ -40,7 +40,7 @@ - + From 6de60e696bc0a64bd760530393a99d3838e78391 Mon Sep 17 00:00:00 2001 From: Devon Ryan Date: Thu, 25 Feb 2016 00:16:14 +0100 Subject: [PATCH 47/55] Oops, missed a Bool --- galaxy/wrapper/plotPCA.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/galaxy/wrapper/plotPCA.xml b/galaxy/wrapper/plotPCA.xml index e856fbb21..d7c731d17 100644 --- a/galaxy/wrapper/plotPCA.xml +++ b/galaxy/wrapper/plotPCA.xml @@ -21,7 +21,7 @@ - + From ca0dee453cfbbfb5bbafac368881603ed3d32fa0 Mon Sep 17 00:00:00 2001 From: fidelram Date: Thu, 25 Feb 2016 11:29:07 +0100 Subject: [PATCH 48/55] rotate labels in heatmap mode to avoid overlaps --- deeptools/plotProfile.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/deeptools/plotProfile.py b/deeptools/plotProfile.py index 737773267..51c6c06ff 100644 --- a/deeptools/plotProfile.py +++ b/deeptools/plotProfile.py @@ -343,7 +343,8 @@ def plot_heatmap(self): yticks = [x + d_half for x in pos] ax.axes.set_yticks(yticks) - ax.axes.set_yticklabels(labels[::-1], rotation='vertical') + #ax.axes.set_yticklabels(labels[::-1], rotation='vertical') + ax.axes.set_yticklabels(labels[::-1]) ax_list.append(ax) From 62cc8b3257f20c9a7043836f0cd1bd5f7d280324 Mon Sep 17 00:00:00 2001 From: fidelram Date: Fri, 26 Feb 2016 13:03:29 +0100 Subject: [PATCH 49/55] added comment to plotProfile --- deeptools/plotProfile.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/deeptools/plotProfile.py b/deeptools/plotProfile.py index 51c6c06ff..eacc33342 100644 --- a/deeptools/plotProfile.py +++ b/deeptools/plotProfile.py @@ -343,7 +343,8 @@ def plot_heatmap(self): yticks = [x + d_half for x in pos] ax.axes.set_yticks(yticks) - #ax.axes.set_yticklabels(labels[::-1], rotation='vertical') + # TODO: make rotation a parameter + # ax.axes.set_yticklabels(labels[::-1], rotation='vertical') ax.axes.set_yticklabels(labels[::-1]) ax_list.append(ax) From d84a771d714081772e6707d1db720caa5e674ee0 Mon Sep 17 00:00:00 2001 From: fidelram Date: Fri, 26 Feb 2016 13:05:55 +0100 Subject: [PATCH 50/55] Fixed #301. Also corrected heatmaps being when --perGroup was used --- deeptools/plotHeatmap.py | 46 +- .../heatmap_master_multi_pergroup.svg | 1864 ++++++++--------- 2 files changed, 843 insertions(+), 1067 deletions(-) diff --git a/deeptools/plotHeatmap.py b/deeptools/plotHeatmap.py index a964e7409..26e545f8f 100644 --- a/deeptools/plotHeatmap.py +++ b/deeptools/plotHeatmap.py @@ -116,9 +116,7 @@ def prepare_layout(hm_matrix, heatmapsize, showSummaryPlot, showColorbar, perGro numcols = hm_matrix.get_num_samples() numrows = hm_matrix.get_num_groups() if perGroup: - temp = numcols - numcols = numrows - numrows = temp + numcols, numrows = numrows, numcols # the rows have different size depending # on the number of regions contained in the @@ -214,7 +212,11 @@ def plotMatrix(hm, outFileName, figheight += figwidth numsamples = hm.matrix.get_num_samples() - total_figwidth = figwidth * numsamples + if perGroup: + num_cols = hm.matrix.get_num_groups() + else: + num_cols = numsamples + total_figwidth = figwidth * num_cols if showColorbar: total_figwidth += 1 / 2.54 fig = plt.figure(figsize=(total_figwidth, figheight)) @@ -322,22 +324,26 @@ def plotMatrix(hm, outFileName, elif not perGroup and sample == 0: ax.axes.set_ylabel(sub_matrix['group']) - # add xticks to the bottom heatmap (last group) - ax.axes.get_xaxis().set_visible(True) - ax.axes.set_xticks(xticks_heat) - ax.axes.set_xticklabels(xtickslabel_heat, size=8) - - # align the first and last label - # such that they don't fall off - # the heatmap sides - ticks = ax.xaxis.get_major_ticks() - ticks[0].label1.set_horizontalalignment('left') - ticks[-1].label1.set_horizontalalignment('right') - - ax.get_xaxis().set_tick_params( - which='both', - top='off', - direction='out') + # add labels to last block in a column + if (perGroup and sample == hm.matrix.get_num_samples() - 1) or \ + (not perGroup and group_idx == numgroups - 1): + + # add xticks to the bottom heatmap (last group) + ax.axes.get_xaxis().set_visible(True) + ax.axes.set_xticks(xticks_heat) + ax.axes.set_xticklabels(xtickslabel_heat, size=8) + + # align the first and last label + # such that they don't fall off + # the heatmap sides + ticks = ax.xaxis.get_major_ticks() + ticks[0].label1.set_horizontalalignment('left') + ticks[-1].label1.set_horizontalalignment('right') + + ax.get_xaxis().set_tick_params( + which='both', + top='off', + direction='out') # plot the profiles on top of the heatmaps if showSummaryPlot: diff --git a/deeptools/test/test_heatmapper/heatmap_master_multi_pergroup.svg b/deeptools/test/test_heatmapper/heatmap_master_multi_pergroup.svg index c70882459..3c978bf2f 100644 --- a/deeptools/test/test_heatmapper/heatmap_master_multi_pergroup.svg +++ b/deeptools/test/test_heatmapper/heatmap_master_multi_pergroup.svg @@ -2,7 +2,7 @@ - + + + + + + + - + - + + + + + + + + @@ -978,8 +717,8 @@ z - - + + - - - + + - - - - - - - + + + - + - + - + @@ -1060,30 +799,30 @@ L 235.844373 627.338519 - - + + - + - + - + - - + + - + - + - + @@ -1092,9 +831,9 @@ L 235.844373 627.338519 - + - + @@ -1104,102 +843,102 @@ L 235.844373 627.338519 - + - - + - - + - - + - - + - @@ -1207,19 +946,24 @@ L 221.990042 112.739987 L 23.9225 19.4945 " style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;"/> - - - + + + + + + - + - + - + - + @@ -1229,40 +973,40 @@ L 23.9225 19.4945 - - + + - + - + - + - + - + - - + + - + - + - + - + - + @@ -1272,29 +1016,29 @@ L 23.9225 19.4945 - + - + +" id="mbfe9c3911b" style="stroke:#000000;stroke-width:0.5;"/> - + - + +" id="m5b98145044" style="stroke:#000000;stroke-width:0.5;"/> - + - + @@ -1304,17 +1048,17 @@ L -4 0 - + - + - + - + - + - + - + - + - + - + @@ -1374,17 +1118,17 @@ Q 48.484375 72.75 52.59375 71.296875 - + - + - + - + - + @@ -1394,9 +1138,9 @@ Q 48.484375 72.75 52.59375 71.296875 - + - + @@ -1408,112 +1152,112 @@ Q 48.484375 72.75 52.59375 71.296875 - - - + - - + - - + - - + - - - - - - - + + + - + - + - + - + - + @@ -1521,40 +1265,40 @@ L 235.844373 19.4945 - - + + - + - + - + - + - + - - + + - + - + - + - + - + @@ -1564,59 +1308,59 @@ L 235.844373 19.4945 - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -1626,15 +1370,15 @@ L 235.844373 19.4945 - - + - - + + - + @@ -1642,15 +1386,15 @@ L 256.644373 29.97325 - - + - - + + - + @@ -1658,15 +1402,15 @@ L 256.644373 41.71575 - - + - - + + - + @@ -1674,15 +1418,15 @@ L 256.644373 53.45825 - - + - - + + - + @@ -1694,43 +1438,43 @@ L 256.644373 65.20075 - - + - - - + + - + - + - + - + @@ -1738,14 +1482,14 @@ z - + - + - + - + @@ -1753,14 +1497,14 @@ z - + - + - + - + @@ -1768,12 +1512,12 @@ z - + - + - + - + @@ -1813,14 +1557,14 @@ Q 23.96875 32.421875 30.609375 32.421875 - + - + - + - + @@ -1828,14 +1572,40 @@ Q 23.96875 32.421875 30.609375 32.421875 - + - + - + - + + + + @@ -1843,12 +1613,12 @@ Q 23.96875 32.421875 30.609375 32.421875 - + - + - + - + @@ -1896,14 +1666,14 @@ Q 18.3125 60.0625 18.3125 54.390625 - + - + - + - + @@ -1911,14 +1681,14 @@ Q 18.3125 60.0625 18.3125 54.390625 - + - + - + - + @@ -1926,12 +1696,12 @@ Q 18.3125 60.0625 18.3125 54.390625 - + - + - + - + @@ -1952,14 +1722,14 @@ z - + - + - + - + @@ -1970,38 +1740,38 @@ z - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + From 32d83f8957909a9a5f1d1505a714fb78f8c328c6 Mon Sep 17 00:00:00 2001 From: fidelram Date: Fri, 26 Feb 2016 13:31:41 +0100 Subject: [PATCH 51/55] updated test image --- .../profile_master_heatmap.svg | 112 +++++++++--------- 1 file changed, 56 insertions(+), 56 deletions(-) diff --git a/deeptools/test/test_heatmapper/profile_master_heatmap.svg b/deeptools/test/test_heatmapper/profile_master_heatmap.svg index ad007d22a..b77a97a5b 100644 --- a/deeptools/test/test_heatmapper/profile_master_heatmap.svg +++ b/deeptools/test/test_heatmapper/profile_master_heatmap.svg @@ -19,35 +19,35 @@ z - - - + + - - - - @@ -56,20 +56,20 @@ L 23.8 24.8 +" id="mafb02c7a5a" style="stroke:#000000;stroke-width:0.5;"/> - + +" id="m9783c3455a" style="stroke:#000000;stroke-width:0.5;"/> - + @@ -120,7 +120,7 @@ L 4.890625 23.390625 z " id="BitstreamVeraSans-Roman-2d"/> - + @@ -131,12 +131,12 @@ z - + - + @@ -183,7 +183,7 @@ L -0.296875 64.59375 z " id="BitstreamVeraSans-Roman-54"/> - + @@ -193,12 +193,12 @@ z - + - + @@ -242,7 +242,7 @@ L 18.109375 75.984375 z " id="BitstreamVeraSans-Roman-62"/> - + @@ -383,7 +383,7 @@ Q 40.53125 6.109375 44.609375 11.75 Q 48.6875 17.390625 48.6875 27.296875 " id="BitstreamVeraSans-Roman-70"/> - + @@ -397,7 +397,7 @@ Q 48.6875 17.390625 48.6875 27.296875 - + @@ -486,7 +486,7 @@ Q 31.78125 56 36.171875 55.265625 Q 40.578125 54.546875 44.28125 53.078125 " id="BitstreamVeraSans-Roman-73"/> - + @@ -496,10 +496,10 @@ Q 40.578125 54.546875 44.28125 53.078125 - - + - @@ -529,10 +529,10 @@ z +" id="me225bad64f" style="stroke:#000000;stroke-width:0.5;"/> - + @@ -547,7 +547,7 @@ L -4 0 - + @@ -595,7 +595,7 @@ Q 46.96875 40.921875 40.578125 39.3125 - + @@ -640,7 +640,7 @@ Q 48.484375 72.75 52.59375 71.296875 - + @@ -685,7 +685,7 @@ Q 23.96875 32.421875 30.609375 32.421875 - + @@ -700,7 +700,7 @@ Q 23.96875 32.421875 30.609375 32.421875 - + @@ -741,7 +741,7 @@ z - + @@ -794,7 +794,7 @@ Q 18.3125 60.0625 18.3125 54.390625 - + @@ -809,7 +809,7 @@ Q 18.3125 60.0625 18.3125 54.390625 - + @@ -843,7 +843,7 @@ z - + @@ -869,7 +869,7 @@ z - + @@ -885,11 +885,11 @@ z - - + + - - + + From 7aab7023571cb1198626512a31280ae45d595ce5 Mon Sep 17 00:00:00 2001 From: dpryan79 Date: Mon, 29 Feb 2016 11:14:39 +0100 Subject: [PATCH 52/55] Pythonify the code and allow more than 35 samples in plotFingerprint...though it'll look like crap --- deeptools/plotFingerprint.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/deeptools/plotFingerprint.py b/deeptools/plotFingerprint.py index 77c8f56fa..16cf67fa4 100644 --- a/deeptools/plotFingerprint.py +++ b/deeptools/plotFingerprint.py @@ -169,13 +169,13 @@ def main(args=None): i = 0 # matplotlib won't iterate through line styles by itself pyplot_line_styles = sum([7 * ["-"], 7 * ["--"], 7 * ["-."], 7 * [":"], 7 * ["."]], []) - for reads in num_reads_per_bin.T: + for i, reads in enumerate(num_reads_per_bin.T): count = np.cumsum(np.sort(reads)) count = count / count[-1] # to normalize y from 0 to 1 - plt.plot(x, count, label=args.labels[i], linestyle=pyplot_line_styles[i]) + j = i % 35 + plt.plot(x, count, label=args.labels[i], linestyle=pyplot_line_styles[j]) plt.xlabel('rank') plt.ylabel('fraction w.r.t. bin with highest coverage') - i += 1 plt.legend(loc='upper left') plt.suptitle(args.plotTitle) # set the plotFileFormat explicitly to None to trigger the From 2c8514a340c7aceedd3b715d45c3b7804f584654 Mon Sep 17 00:00:00 2001 From: dpryan79 Date: Mon, 29 Feb 2016 11:51:12 +0100 Subject: [PATCH 53/55] Update change log --- CHANGES.txt | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/CHANGES.txt b/CHANGES.txt index 30e015d12..4980bddf6 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,14 @@ +2.2.0 + + * plotFingerprint now iterates through line styles as well as colors. This allows up to 35 samples per plot without repeating (not that that many would ever be recommended). This was issue #80. + * Fixed a number of Galaxy wrappers, which were rendered incorrectly due to including a section title of "Background". + * A number of image file handles were previously not explicitly closed, which caused occasional completion of a plot* program but without the files actually being there. This only happened on some NFS mount points. + * The Galaxy wrappers now support the `--outFileNameData` option on plotProfile and plotHeatmap. + * Added support for blacklist regions. These can be supplied as a BED file and the regions will largely be skipped in processing (they'll also be ignored during normalization). This is very useful to skip regions known to attract excess signal. This was issue #101. + * Modified plotPCA to include the actual eigenvalues rather than rescaled ones. Also, plotPCA can now output the underlying values (issue #231). + * Regions within each feature body can now be unscaled when using `computeMatrix`. Thus, if you're interested in unscaled signal around the TSS/TES then you can now use the `--unscaled5prime` and `--unscaled3prime` options. This was issue #108. + * bamCoverage now has a `--filterRNAstrand` option, that will produce coverage for only a single strand. Note that the strand referred to is the DNA strand and not sense/anti-sense. + * Issues with plotHeatmap x-axis labels were fixed (issue #301). 2.1.1 From 81bd55f4632ed24508b390ffb4b46bb5cf56f587 Mon Sep 17 00:00:00 2001 From: Devon Ryan Date: Mon, 29 Feb 2016 20:31:29 +0100 Subject: [PATCH 54/55] Update RTD for the RNA strand option in bamCoverage --- docs/content/tools/bamCoverage.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/content/tools/bamCoverage.rst b/docs/content/tools/bamCoverage.rst index 2c038d980..0d797fb4d 100644 --- a/docs/content/tools/bamCoverage.rst +++ b/docs/content/tools/bamCoverage.rst @@ -76,7 +76,10 @@ Regular bigWig track Separate tracks for each strand ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Sometimes it makes sense to generate two independent :ref:`bigWig` files for all reads on the forward and reverse strand, respectively. +Sometimes it makes sense to generate two independent :ref:`bigWig` files for all reads on the forward and reverse strand, respectively. As of deepTools version 2.2, one can simply use the ``--filterRNAstrand`` option, such as ``--filterRNAstrand forward`` or ``--filterRNAstrand reverse``. This handles paired-end and single-end datasets. For older versions of deepTools, please see the instructions below. + +Versions before 2.2 +******************* To follow the examples, you need to know that ``-f`` will tell ``samtools view`` to **include** reads with the indicated flag, while ``-F`` will lead to the **exclusion** of reads with the respective flag. From c449d914aa88abb3dcd753832cdb0f2a07e6ceab Mon Sep 17 00:00:00 2001 From: dpryan79 Date: Tue, 1 Mar 2016 14:30:21 +0100 Subject: [PATCH 55/55] Update the version before adding the tag (just so it's already done once the tag is added). --- deeptools/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deeptools/_version.py b/deeptools/_version.py index df71596ec..611c4e0e4 100644 --- a/deeptools/_version.py +++ b/deeptools/_version.py @@ -2,4 +2,4 @@ # This file is originally generated from Git information by running 'setup.py # version'. Distribution tarballs contain a pre-generated copy of this file. -__version__ = '2.1.0' +__version__ = '2.2.0'