diff --git a/NEWS b/NEWS index 8c15d0e..33dbf81 100644 --- a/NEWS +++ b/NEWS @@ -1,25 +1,34 @@ *********************************** -CHANGES IN VERSION 2.7.7 +CHANGES IN VERSION 2.7.8 NEW FEATURES o Religation are not discarding and considered as invalid pairs. Religation are defined as FR read pairs involving contiguous restriction fragments +SIGNIFICANT USER-VISIBLE CHANGES + + o Clean hicpro2juicebox.sh utils + +*********************************** +CHANGES IN VERSION 2.7.7 + +NEW FEATURES + o New utility : sparseToDense.py - Convert a sparse symmetric matrix file in dense format for further analysis o Pull request from J. Brayet for installation process SIGNIFICANT USER-VISIBLE CHANGES - o Update of the hicpro2juicebox.sh utils. -g option now requires the chromosome size files from HiC-Pro. This version now works for any organism. + o Update of the hicpro2juicebox.sh utils. -g option now requires the chromosome size files from HiC-Pro. This version now works for any organism o Update of the manual o Check python module version during installation - o Pull request NV - udpate iced version. Note that this version of iced rescales automatically the normalized matrix such that the total number of counts is identical to the original matrix. + o Pull request NV - udpate iced version. Note that this version of iced rescales automatically the normalized matrix such that the total number of counts is identical to the original matrix - o Add -c option in hicpro2juicebox.sh utils to write the "chr" prefix before the chromosome name. The use of "chr" depends on the reference genome of juicebox. + o Add -c option in hicpro2juicebox.sh utils to write the "chr" prefix before the chromosome name. The use of "chr" depends on the reference genome of juicebox *********************************** CHANGES IN VERSION 2.7.6 diff --git a/bin/HiC-Pro b/bin/HiC-Pro index cc77075..d0c8b1c 100755 --- a/bin/HiC-Pro +++ b/bin/HiC-Pro @@ -8,7 +8,7 @@ ## Public License, either Version 2, June 1991 or Version 3, June 2007. SOFT="HiC-Pro" -VERSION="2.7.7" +VERSION="2.7.8" function usage { echo -e "usage : $SOFT -i INPUT -o OUTPUT -c CONFIG [-s ANALYSIS_STEP] [-p] [-h] [-v]" diff --git a/bin/utils/hicpro2juicebox.sh b/bin/utils/hicpro2juicebox.sh index a93fcb5..0aaa40a 100755 --- a/bin/utils/hicpro2juicebox.sh +++ b/bin/utils/hicpro2juicebox.sh @@ -132,6 +132,6 @@ else fi ## Clean -/bin/rm -f ${TEMP}/$$_resfrag.juicebox ${TEMP}/$$_allValidPairs.pre_juicebox_sorted +#/bin/rm -f ${TEMP}/$$_resfrag.juicebox ${TEMP}/$$_allValidPairs.pre_juicebox_sorted echo "done !" diff --git a/scripts/mapped_2hic_fragments.py b/scripts/mapped_2hic_fragments.py index c7c0b7e..1f2b41b 100755 --- a/scripts/mapped_2hic_fragments.py +++ b/scripts/mapped_2hic_fragments.py @@ -247,11 +247,40 @@ def get_overlapping_restriction_fragment(resFrag, chrom, read): return None +def are_contiguous_fragments(frag1, frag2, chr1, chr2): + ''' + Compare fragment positions to check if they are contiguous + ''' + ret = False + if chr1 == chr2: + if int(frag1.start) < int(frag2.start): + d = int(frag2.start) - int(frag1.end) + else: + d = int(frag1.start) - int(frag2.end) + + if d == 0: + ret = True + + return ret + +def is_religation(read1, read2, frag1, frag2): + """ + Reads are expected to map adjacent fragments + Check the orientation of reads -><- + + """ + ret=False + if are_contiguous_fragments(frag1, frag2, read1.tid, read2.tid): + if get_read_strand(r1) == "+" and get_read_strand(r2) == "-": + ret=True + return ret + + def is_self_circle(read1, read2): """ Both reads are expected to be on the same restriction fragments - Check the orientation of reads <--> + read1 : [AlignedRead] read2 : [AlignedRead] """ @@ -284,7 +313,6 @@ def is_dangling_end(read1, read2): def get_valid_orientation(read1, read2): """ Both reads are expected to be on the different restriction fragments - Check the orientation of reads ->-> / <-<- / -><- / <--> read1 : [AlignedRead] @@ -318,6 +346,7 @@ def get_PE_fragment_size(read1, read2, resFrag1, resFrag2, interactionType): interactionType : Type of interaction from get_interaction_type() [str] """ + fragmentsize = None # Get oriented reads @@ -330,11 +359,11 @@ def get_PE_fragment_size(read1, read2, resFrag1, resFrag2, interactionType): rfrag1 = resFrag1 rfrag2 = resFrag2 - ## In this case used the read 3' end ! + ## In this case use the read 3' end ! r1pos = get_read_start(r1) r2pos = get_read_start(r2) - if interactionType == "DE": + if interactionType == "DE" or interactionType == "RE": fragmentsize = r2pos - r1pos elif interactionType == "SC": fragmentsize = (r1pos - rfrag1.start) + (rfrag2.end - r2pos) @@ -363,6 +392,7 @@ def get_interaction_type(read1, read1_chrom, resfrag1, read2, - Interaction - Self circle - Dangling end + - Religation - Unknown ## @@ -375,6 +405,7 @@ def get_interaction_type(read1, read1_chrom, resfrag1, read2, verbose = verbose mode [logical] """ + # If returned InteractionType=None -> Same restriction fragment # and same strand = Dump interactionType = None @@ -388,6 +419,8 @@ def get_interaction_type(read1, read1_chrom, resfrag1, read2, # Dangling_end -> <- elif is_dangling_end(read1, read2): interactionType = "DE" + elif is_religation(read1, read2, resfrag1, resfrag2): + interactionType = "RE" else: interactionType = "VI" elif r1.is_unmapped or r2.is_unmapped: @@ -468,6 +501,7 @@ def get_read_tag(read, tag): # Initialize variables reads_counter = 0 de_counter = 0 + re_counter = 0 sc_counter = 0 valid_counter = 0 valid_counter_FF = 0 @@ -497,6 +531,7 @@ def get_read_tag(read, tag): if allOutput: handle_de = open(outputDir + '/' + baseReadsFile + '.DEPairs', 'w') + handle_re = open(outputDir + '/' + baseReadsFile + '.REPairs', 'w') handle_sc = open(outputDir + '/' + baseReadsFile + '.SCPairs', 'w') handle_dump = open(outputDir + '/' + baseReadsFile + '.DumpPairs', 'w') handle_single = open(outputDir + '/' + baseReadsFile + '.SinglePairs', 'w') @@ -545,9 +580,7 @@ def get_read_tag(read, tag): r2_resfrag = None r2_chrom = None - if r1_resfrag is not None or r2_resfrag is not None: - interactionType = get_interaction_type(r1, r1_chrom, r1_resfrag, r2, r2_chrom, r2_resfrag, verbose) dist = get_PE_fragment_size(r1, r2, r1_resfrag, r2_resfrag, interactionType) cdist = get_cis_dist(r1, r2) @@ -608,6 +641,10 @@ def get_read_tag(read, tag): de_counter += 1 cur_handler = handle_de if allOutput else None + elif interactionType == "RE": + re_counter += 1 + cur_handler = handle_re if allOutput else None + elif interactionType == "SC": sc_counter += 1 cur_handler = handle_sc if allOutput else None @@ -692,13 +729,13 @@ def get_read_tag(read, tag): or2_fragname + "\t" + str(or1.mapping_quality) + "\t" + str(or2.mapping_quality) + "\n") + ## Keep initial order if samOut: r1.tags = r1.tags + [('CT', str(interactionType))] r2.tags = r2.tags + [('CT', str(interactionType))] handle_sam.write(r1) handle_sam.write(r2) - if (reads_counter % 100000 == 0 and verbose): print "##", reads_counter @@ -707,6 +744,7 @@ def get_read_tag(read, tag): handle_valid.close() if allOutput: handle_de.close() + handle_re.close() handle_sc.close() handle_dump.close() handle_single.close() @@ -724,6 +762,7 @@ def get_read_tag(read, tag): handle_stat.write( "Valid_interaction_pairs_FR\t" + str(valid_counter_FR) + "\n") handle_stat.write("Dangling_end_pairs\t" + str(de_counter) + "\n") + handle_stat.write("Religation_pairs\t" + str(re_counter) + "\n") handle_stat.write("Self_Cycle_pairs\t" + str(sc_counter) + "\n") handle_stat.write("Single-end_pairs\t" + str(single_counter) + "\n") handle_stat.write("Dumped_pairs\t" + str(dump_counter) + "\n") diff --git a/scripts/plot_hic_fragment.R b/scripts/plot_hic_fragment.R index 8558859..495c742 100644 --- a/scripts/plot_hic_fragment.R +++ b/scripts/plot_hic_fragment.R @@ -25,7 +25,7 @@ if (la > 0){ getHiCMat <- function(x){ require(RColorBrewer) - invalid.lab <- intersect(names(x), c("Self_Cycle_pairs", "Dangling_end_pairs", "Single-end_pairs", "Dumped_pairs")) + invalid.lab <- intersect(names(x), c("Self_Cycle_pairs", "Dangling_end_pairs", "Religation_pairs", "Single-end_pairs", "Dumped_pairs")) valid.lab <- intersect(names(x), c("Valid_interaction_pairs_FF", "Valid_interaction_pairs_RR", "Valid_interaction_pairs_RF", "Valid_interaction_pairs_FR")) x <- x[c("Valid_interaction_pairs", valid.lab, invalid.lab)]