Skip to content

Commit

Permalink
Merge branch 'release/v2.2'
Browse files Browse the repository at this point in the history
  • Loading branch information
dpryan79 committed Mar 1, 2016
2 parents ae628ec + 0ec91d3 commit 5400ccd
Show file tree
Hide file tree
Showing 47 changed files with 1,427 additions and 1,244 deletions.
11 changes: 11 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
2.2.0

* plotFingerprint now iterates through line styles as well as colors. This allows up to 35 samples per plot without repeating (not that that many would ever be recommended). This was issue #80.
* Fixed a number of Galaxy wrappers, which were rendered incorrectly due to including a section title of "Background".
* A number of image file handles were previously not explicitly closed, which caused occasional completion of a plot* program but without the files actually being there. This only happened on some NFS mount points.
* The Galaxy wrappers now support the `--outFileNameData` option on plotProfile and plotHeatmap.
* Added support for blacklist regions. These can be supplied as a BED file and the regions will largely be skipped in processing (they'll also be ignored during normalization). This is very useful to skip regions known to attract excess signal. This was issue #101.
* Modified plotPCA to include the actual eigenvalues rather than rescaled ones. Also, plotPCA can now output the underlying values (issue #231).
* Regions within each feature body can now be unscaled when using `computeMatrix`. Thus, if you're interested in unscaled signal around the TSS/TES then you can now use the `--unscaled5prime` and `--unscaled3prime` options. This was issue #108.
* bamCoverage now has a `--filterRNAstrand` option, that will produce coverage for only a single strand. Note that the strand referred to is the DNA strand and not sense/anti-sense.
* Issues with plotHeatmap x-axis labels were fixed (issue #301).

2.1.1

Expand Down
5 changes: 4 additions & 1 deletion deeptools/SES_scaleFactor.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

def estimateScaleFactor(bamFilesList, binLength, numberOfSamples,
normalizationLength,
avg_method='median', numberOfProcessors=1,
avg_method='median', blackListFileName=None, numberOfProcessors=1,
verbose=False, chrsToSkip=[]):
r"""
Subdivides the genome into chunks to be analyzed in parallel
Expand Down Expand Up @@ -41,6 +41,8 @@ def estimateScaleFactor(bamFilesList, binLength, numberOfSamples,
chrsToSkip : list
name of the chromosomes to be excluded from the
scale estimation. Usually the chrX is included.
blackListFileName : str
BED file containing blacklisted regions
Returns
-------
Expand Down Expand Up @@ -84,6 +86,7 @@ def estimateScaleFactor(bamFilesList, binLength, numberOfSamples,
binLength=binLength,
numberOfSamples=numberOfSamples,
extendReads=False,
blackListFileName=blackListFileName,
numberOfProcessors=numberOfProcessors,
verbose=verbose,
chrsToSkip=chrsToSkip)
Expand Down
2 changes: 1 addition & 1 deletion deeptools/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
# This file is originally generated from Git information by running 'setup.py
# version'. Distribution tarballs contain a pre-generated copy of this file.

__version__ = '2.1.0'
__version__ = '2.2.0'
8 changes: 8 additions & 0 deletions deeptools/bamCompare.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,12 @@ def get_scale_factors(args):
bam2 = bamHandler.openBam(args.bamfile2)

bam1_mapped = parserCommon.bam_total_reads(bam1, args.ignoreForNormalization)
blacklisted = parserCommon.bam_blacklisted_reads(bam1, args.ignoreForNormalization, args.blackListFileName)
bam1_mapped -= blacklisted

bam2_mapped = parserCommon.bam_total_reads(bam2, args.ignoreForNormalization)
blacklisted = parserCommon.bam_blacklisted_reads(bam2, args.ignoreForNormalization, args.blackListFileName)
bam2_mapped -= blacklisted

if args.scaleFactors:
scale_factors = map(float, args.scaleFactors.split(":"))
Expand All @@ -167,6 +172,7 @@ def get_scale_factors(args):
[bam1.filename, bam2.filename],
args.sampleLength, args.numberOfSamples,
1,
blackListFileName=args.blackListFileName,
numberOfProcessors=args.numberOfProcessors,
verbose=args.verbose,
chrsToSkip=args.ignoreForNormalization)
Expand Down Expand Up @@ -214,6 +220,7 @@ def get_scale_factors(args):
from deeptools.getFragmentAndReadSize import get_read_and_fragment_length
frag_len_dict, read_len_dict = get_read_and_fragment_length(bamfile,
return_lengths=False,
blackListFileName=args.blackListFileName,
numberOfProcessors=args.numberOfProcessors,
verbose=args.verbose)
if args.extendReads:
Expand Down Expand Up @@ -295,6 +302,7 @@ def main(args=None):
region=args.region,
numberOfProcessors=args.numberOfProcessors,
extendReads=args.extendReads,
blackListFileName=args.blackListFileName,
minMappingQuality=args.minMappingQuality,
ignoreDuplicates=args.ignoreDuplicates,
center_read=args.centerReads,
Expand Down
86 changes: 86 additions & 0 deletions deeptools/bamCoverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,12 @@ def get_optional_args():
'*NOTE*: Requires paired-end data. A bin size of 1 is recommended.',
action='store_true')

optional.add_argument('--filterRNAstrand',
help='Selects RNA-seq reads (single-end or paired-end) in '
'the given strand.',
choices=['forward', 'reverse'],
default=None)

return parser


Expand Down Expand Up @@ -110,12 +116,15 @@ def get_scale_factor(args):
scale_factor = args.scaleFactor
bam_handle = bamHandler.openBam(args.bam)
bam_mapped = parserCommon.bam_total_reads(bam_handle, args.ignoreForNormalization)
blacklisted = parserCommon.bam_blacklisted_reads(bam_handle, args.ignoreForNormalization, args.blackListFileName)
bam_mapped -= blacklisted

if args.normalizeTo1x:
# try to guess fragment length if the bam file contains paired end reads
from deeptools.getFragmentAndReadSize import get_read_and_fragment_length
frag_len_dict, read_len_dict = get_read_and_fragment_length(args.bam,
return_lengths=False,
blackListFileName=args.blackListFileName,
numberOfProcessors=args.numberOfProcessors,
verbose=args.verbose)
if args.extendReads:
Expand Down Expand Up @@ -174,12 +183,14 @@ def main(args=None):
debug = 0

func_args = {'scaleFactor': get_scale_factor(args)}

if args.MNase:
# check that library is paired end
# using getFragmentAndReadSize
from deeptools.getFragmentAndReadSize import get_read_and_fragment_length
frag_len_dict, read_len_dict = get_read_and_fragment_length(args.bam,
return_lengths=False,
blackListFileName=args.blackListFileName,
numberOfProcessors=args.numberOfProcessors,
verbose=args.verbose)
if frag_len_dict is None:
Expand All @@ -189,6 +200,7 @@ def main(args=None):
binLength=args.binSize,
stepSize=args.binSize,
region=args.region,
blackListFileName=args.blackListFileName,
numberOfProcessors=args.numberOfProcessors,
extendReads=args.extendReads,
minMappingQuality=args.minMappingQuality,
Expand All @@ -200,11 +212,29 @@ def main(args=None):
verbose=args.verbose,
)

elif args.filterRNAstrand:
wr = filterRnaStrand([args.bam],
binLength=args.binSize,
stepSize=args.binSize,
region=args.region,
numberOfProcessors=args.numberOfProcessors,
extendReads=args.extendReads,
minMappingQuality=args.minMappingQuality,
ignoreDuplicates=args.ignoreDuplicates,
center_read=args.centerReads,
zerosToNans=args.skipNonCoveredRegions,
samFlag_include=args.samFlagInclude,
samFlag_exclude=args.samFlagExclude,
verbose=args.verbose,
)

wr.filter_strand = args.filterRNAstrand
else:
wr = writeBedGraph.WriteBedGraph([args.bam],
binLength=args.binSize,
stepSize=args.binSize,
region=args.region,
blackListFileName=args.blackListFileName,
numberOfProcessors=args.numberOfProcessors,
extendReads=args.extendReads,
minMappingQuality=args.minMappingQuality,
Expand All @@ -217,6 +247,7 @@ def main(args=None):
)

wr.run(writeBedGraph.scaleCoverage, func_args, args.outFileName,
blackListFileName=args.blackListFileName,
format=args.outFileFormat, smoothLength=args.smoothLength)


Expand Down Expand Up @@ -249,3 +280,58 @@ def get_fragment_from_read(self, read):
fragment_end = fragment_start + 3

return [(fragment_start, fragment_end)]


class filterRnaStrand(writeBedGraph.WriteBedGraph):
"""
Class to redefine the get_fragment_from_read for the --filterRNAstrand case
Only reads either forward or reverse are kept as follows:
For paired-end
--------------
reads forward:
1. alignments of the second in pair (128) if they map to the forward strand (~16)
2. alignments of the first in pair (64) if they map to the reverse strand (~32)
1. include 128, exclude 16
or
2. include 64 exclude 32
reads reverse:
1. alignments of the second in pair (128) if it maps to the reverse strand (16) 128 & 16 = 144
2. alignments of the first in pair (64) if their mates map to the reverse strand (32) 64 & 32 = 96
1. include 144
or
2. include 96
For single-end
--------------
forward: include 16 (map forward strand)
reverse: exclude 16
"""

def get_fragment_from_read(self, read):
"""
Gets only reads for the given strand
"""
fragment_start = fragment_end = None

# only paired forward reads are considered
if read.is_paired:
if self.filter_strand == 'forward':
if (read.flag & 128 == 128 and read.flag & 16 == 0) or (read.flag & 64 == 64 and read.flag & 32 == 0):
return read.get_blocks()
else:
if read.flag & 144 == 144 or read.flag & 96 == 96:
return read.get_blocks()
else:
if self.filter_strand == 'forward' and read.flag & 16 == 16:
return read.get_blocks()
elif self.filter_strand == 'reverse' and read.flag & 16 == 0:
return read.get_blocks()

return [(fragment_start, fragment_end)]
6 changes: 6 additions & 0 deletions deeptools/bamPEFragmentSize.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ def parse_arguments():
'decreased. (default 1000000)',
default=1000000,
type=int)
parser.add_argument('--blackListFileName', '-bl',
help="A BED file containing regions that should be excluded from all analyses. Currently this works by rejecting genomic chunks that happen to overlap an entry. Consequently, for BAM files, if a read partially overlaps a blacklisted region or a fragment spans over it, then the read/fragment might still be considered.",
metavar="BED file",
required=False)
parser.add_argument('--verbose',
help='Set if processing data messages are wanted.',
action='store_true',
Expand All @@ -67,6 +71,7 @@ def parse_arguments():
def main(args=None):
args = parse_arguments().parse_args(args)
fragment_len_dict, read_len_dict = get_read_and_fragment_length(args.bam, return_lengths=True,
blackListFileName=args.blackListFileName,
numberOfProcessors=args.numberOfProcessors,
verbose=args.verbose,
binSize=args.binSize,
Expand Down Expand Up @@ -111,6 +116,7 @@ def main(args=None):
plt.ylabel('Frequency')
plt.title(args.plotTitle)
plt.savefig(args.histogram, bbox_inches=0)
plt.close()


if __name__ == "__main__":
Expand Down
1 change: 1 addition & 0 deletions deeptools/bigwigCompare.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def main(args=None):
(args.bigwig2, 'bigwig')],
args.outFileName, 0, FUNC,
function_args, tileSize=args.binSize, region=args.region,
blackListFileName=args.blackListFileName,
numberOfProcessors=args.numberOfProcessors,
format=args.outFileFormat,
smoothLength=False,
Expand Down
27 changes: 4 additions & 23 deletions deeptools/computeGCBias.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@


def parse_arguments(args=None):
parentParser = parserCommon.getParentArgParse(binSize=False)
parentParser = parserCommon.getParentArgParse(binSize=False, blackList=True)
requiredArgs = getRequiredArgs()
parser = argparse.ArgumentParser(
parents=[requiredArgs, parentParser],
Expand Down Expand Up @@ -96,26 +96,6 @@ def getRequiredArgs():
help='Number of sampling points to be considered.',
type=int)

optional.add_argument(
'--filterOut', '-fo',
help='BED file containing genomic regions to be excluded '
'from the estimation of the correction. Such regions '
'usually contain repetitive regions and peaks that, if '
'included, would bias the correction. '
'It is recommended to filter out known repetitive regions '
'if multimappers (reads that map to more than one genomic '
'position) were excluded. In the case of ChIP-seq data, '
'it is recommended to first use '
'a peak caller to identify and filter out the identified peaks.\n\n'
'Mappability tracks for different read lengths can be '
'found at: http://hgdownload.cse.ucsc.edu/gbdb/mm9/bbi/ '
'and http://hgdownload.cse.ucsc.edu/gbdb/hg19/bbi \n\n'
'The script: scripts/mappabilityBigWig_to_unmappableBed.sh '
'can be used to get a BED file from the mappability '
'bigWig.',
type=argparse.FileType('r'),
metavar='BED file')

optional.add_argument('--extraSampling',
help='BED file containing genomic regions for which '
'extra sampling is required because they are '
Expand Down Expand Up @@ -632,13 +612,14 @@ def plotGCbias(file_name, frequencies, reads_per_gc, region_size, image_format=N
ax2.set_xlim(0.2, 0.7)
plt.tight_layout()
plt.savefig(file_name, bbox_inches='tight', dpi=100, format=image_format)
plt.close()


def main(args=None):
args = parse_arguments().parse_args(args)
# check if directory is writable
if args.filterOut:
filter_out_file = args.filterOut.name
if args.blackListFileName:
filter_out_file = args.BlackListFileName
args.filterOut.close()
else:
filter_out_file = None
Expand Down
24 changes: 22 additions & 2 deletions deeptools/computeMatrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,17 @@ def computeMatrixOptArgs(case=['scale-regions', 'reference-point'][0]):
'of the given regions. If the '
'regions are genes, this would be the distance '
'downstream of the transcription end site.')
optional.add_argument("--unscaled5prime",
default=0,
type=int,
help='Number of bases at the 5-prime end of the '
'region to exclude from scaling. By default, '
'each region is scaled to a given length (see the --regionBodyLength option). In some cases it is useful to look at unscaled signals around region boundaries, so this setting specifies the number of unscaled bases on the 5-prime end of each boundary.')
optional.add_argument("--unscaled3prime",
default=0,
type=int,
help='Like --unscaled3prime, but for the 3-prime '
'end.')

elif case == 'reference-point':
optional.add_argument('--referencePoint',
Expand All @@ -191,6 +202,8 @@ def computeMatrixOptArgs(case=['scale-regions', 'reference-point'][0]):
# set region body length to zero for reference point mode
optional.add_argument('--regionBodyLength', help=argparse.SUPPRESS,
default=0, type=int)
optional.add_argument('--unscaled5prime', default=0, type=int, help=argparse.SUPPRESS)
optional.add_argument('--unscaled3prime', default=0, type=int, help=argparse.SUPPRESS)
optional.add_argument('--beforeRegionStartLength', '-b', '--upstream',
default=500,
type=int,
Expand Down Expand Up @@ -279,6 +292,11 @@ def computeMatrixOptArgs(case=['scale-regions', 'reference-point'][0]):
'(e.g. micro satellites) that may bias the average '
'values.')

optional.add_argument('--blackListFileName', '-bl',
help="A BED file containing regions that should be excluded from all analyses. Currently this works by rejecting genomic chunks that happen to overlap an entry. Consequently, for BAM files, if a read partially overlaps a blacklisted region or a fragment spans over it, then the read/fragment might still be considered.",
metavar="BED file",
required=False)

# in contrast to other tools,
# computeMatrix by default outputs
# messages and the --quiet flag supresses them
Expand Down Expand Up @@ -360,13 +378,15 @@ def main(args=None):
'nan after end': args.nanAfterEnd,
'proc number': args.numberOfProcessors,
'sort regions': args.sortRegions,
'sort using': args.sortUsing
'sort using': args.sortUsing,
'unscaled 5 prime': args.unscaled5prime,
'unscaled 3 prime': args.unscaled3prime
}

hm = heatmapper.heatmapper()

scores_file_list = args.scoreFileName
hm.computeMatrix(scores_file_list, bed_file, parameters, verbose=args.verbose)
hm.computeMatrix(scores_file_list, bed_file, parameters, blackListFileName=args.blackListFileName, verbose=args.verbose)
if args.sortRegions != 'no':
hm.matrix.sort_groups(sort_using=args.sortUsing, sort_method=args.sortRegions)

Expand Down
2 changes: 1 addition & 1 deletion deeptools/correctGCBias.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@


def parse_arguments(args=None):
parentParser = parserCommon.getParentArgParse(binSize=True)
parentParser = parserCommon.getParentArgParse(binSize=True, blackList=False)
requiredArgs = getRequiredArgs()
parser = argparse.ArgumentParser(
parents=[requiredArgs, parentParser],
Expand Down
Loading

0 comments on commit 5400ccd

Please sign in to comment.