diff --git a/chip.wdl b/chip.wdl index daf7c8a..90ddfe9 100644 --- a/chip.wdl +++ b/chip.wdl @@ -2096,6 +2096,7 @@ workflow chip { ctl_paired_ends = ctl_paired_end_, pipeline_type = pipeline_type, aligner = aligner_, + no_dup_removal = no_dup_removal, peak_caller = peak_caller_, cap_num_peak = cap_num_peak_, idr_thresh = idr_thresh, @@ -3046,6 +3047,7 @@ task qc_report { Array[Boolean] ctl_paired_ends String pipeline_type String aligner + Boolean no_dup_removal String peak_caller Int cap_num_peak Float idr_thresh @@ -3105,6 +3107,7 @@ task qc_report { command { set -e python3 $(which encode_task_qc_report.py) \ + --pipeline-prefix chip \ ${'--pipeline-ver ' + pipeline_ver} \ ${"--title '" + sub(title,"'","_") + "'"} \ ${"--desc '" + sub(description,"'","_") + "'"} \ @@ -3114,6 +3117,7 @@ task qc_report { --ctl-paired-ends ${sep=' ' ctl_paired_ends} \ --pipeline-type ${pipeline_type} \ --aligner ${aligner} \ + ${if (no_dup_removal) then '--no-dup-removal ' else ''} \ --peak-caller ${peak_caller} \ ${'--cap-num-peak ' + cap_num_peak} \ --idr-thresh ${idr_thresh} \ diff --git a/src/encode_task_qc_report.py b/src/encode_task_qc_report.py index b09c4c9..d637b9f 100755 --- a/src/encode_task_qc_report.py +++ b/src/encode_task_qc_report.py @@ -34,6 +34,8 @@ def parse_arguments(): help='Description for sample.') parser.add_argument('--genome', type=str, help='Reference genome.') + parser.add_argument('--pipeline-prefix', type=str, required=True, + help='Pipeline. e.g. atac, chip.') parser.add_argument('--pipeline-ver', type=str, help='Pipeline version.') parser.add_argument('--multimapping', default=0, type=int, @@ -50,6 +52,8 @@ def parse_arguments(): help='Pipeline type.') parser.add_argument('--aligner', type=str, required=True, help='Aligner.') + parser.add_argument('--no-dup-removal', action='store_true', + help='No duplicate removal.') parser.add_argument('--peak-caller', type=str, required=True, help='Peak caller.') parser.add_argument('--cap-num-peak', default=0, type=int, @@ -302,7 +306,7 @@ def make_cat_align(args, cat_root): html_head='
Filtered and duplicates removed
Filtered {dup_removal_detail}.
+ Subsampling with {pipeline_prefix}.{subsample_param_name} is not done in alignment steps.
+ Nodup BAM is converted into a BED type (TAGALIGN) later and then TAGALIGN is subsampled
+ with such parameter in the peak-calling step.
+
NRF (non redundant fraction)
- PBC1 (PCR Bottleneck coefficient 1)
- PBC2 (PCR Bottleneck coefficient 2)
+
Fragment: read for a single-ended dataset, pair of reads for a paired-ended dataset
+ NRF: non redundant fraction
+ PBC1: PCR Bottleneck coefficient 1
+ PBC2: PCR Bottleneck coefficient 2
PBC1 is the primary measure. Provisionally
Performed on subsampled ({xcor_subsample_reads}) reads mapped from FASTQs that are trimmed to {xcor_trim_bp}. - Such FASTQ trimming and subsampling reads are for cross-corrleation analysis only. + Such FASTQ trimming and subsampling are for the cross-corrleation analysis only and only R1 reads are taken. Untrimmed FASTQs are used for all the other analyses.
NOTE1: For SE datasets, reads from replicates are randomly subsampled to {xcor_subsample_reads}.
@@ -670,6 +683,7 @@ def make_cat_align_enrich(args, cat_root):
xcor_subsample_reads=args.xcor_subsample_reads
)
html_foot_xcor += """