Skip to content

Commit

Permalink
Wrokarounf plink2 concatetation failure
Browse files Browse the repository at this point in the history
We have seen plink2 failing to concatenate multiple chinks coming after liftover operation. \n We found he issue to be caused efter lifting over in splits within chromosomes. \n We found some positions hg19->hg39 SNP positions to fall very far from the neighbour SNPs. This causes some variants to break the incresing position ordering per chromosome, in which case, some variants should belong to other splits. \n To fix this we are performing the merge relying on plink1.9 merge-list function. \n To see more about the issue -> chrchang/plink-ng#232
  • Loading branch information
AMCalejandro committed Aug 3, 2023
1 parent b537008 commit c5db930
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 20 deletions.
29 changes: 19 additions & 10 deletions modules/geneticqc/merge.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ process MERGER_SPLITS {
file "*" //from p1_run_processed_ch_chunk.collect()
output:
tuple file("${vSimple}.psam"), file("${vSimple}.pgen"), file("${vSimple}.pvar"), file("${vSimple}.log"), emit: snpchunks_qc_merged //into p1_merge_chunk_processed_ch
//tuple file("${vSimple}.fam"), file("${vSimple}.bed"), file("${vSimple}.bim"), file("${vSimple}.log"), emit: snpchunks_qc_merged //into p1_merge_chunk_processed_ch
//file "*.log"
script:
vSimple = mergelist.getSimpleName()
Expand All @@ -17,13 +18,21 @@ process MERGER_SPLITS {
# run merge command on tmp list
set +x
plink2 --pmerge-list ${mergelist} \
--make-pgen \
--sort-vars \
--out ${vSimple}
plink2 --make-bed \
--pfile ${vSimple} \
--out ${vSimple}
plink --merge-list ${mergelist} \
--out ${vSimple}
plink2 --bfile ${vSimple} \
--make-pgen \
--sort-vars \
--out ${vSimple}
#plink2 --pmerge-list ${mergelist} \
#--make-pgen \
#--sort-vars \
#--out ${vSimple}
#plink2 --make-bed \
# --pfile ${vSimple} \
# --out ${vSimple}
"""
} else {
"""
Expand All @@ -43,15 +52,15 @@ process MERGER_CHRS {
storeDir "${STORE_DIR}/${params.dataset}/p2_merged_cache"
//publishDir "${OUTPUT_DIR}/${params.out}_${params.datetime}/logs", mode: 'copy', overwrite: true, pattern: "*.log"
publishDir "${OUTPUT_DIR}/${params.dataset}/LOGS/MERGER_CHRS_LOGS_${params.datetime}/logs", mode: 'copy', overwrite: true, pattern: "*.log"

label 'large_mem'

input:
file mergelist
file "*" //from input_p2_merge_list_files.collect()
path "*" //from input_p2_merge_list_files.collect()
output:
//file (" ${plink_prefix}.{bed,fam,bim,pgen,pvar,psam}") //into input_p2_merged_plink
file ("allchr_${params.dataset}_p2in.{bed,fam,bim,pgen,pvar,psam,log}") //into input_p2_merged_plink
path ("allchr_${params.dataset}_p2in.{bed,fam,bim,pgen,pvar,psam,log}") //into input_p2_merged_plink
//file "*.log" into p2_mergelist_log
script:
"""
Expand Down
7 changes: 6 additions & 1 deletion modules/geneticqc/qc.nf
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ process GENETICQC {
input:
tuple val(vSimple), path(fOrig), path(fSplit) //from input_p1_run_ch
output:
tuple file("${output}.pgen"), file("${output}.pvar"), file("${output}.psam"), emit: snpchunks_qc //into p1_run_processed_ch
//tuple file("${output}.pgen"), file("${output}.pvar"), file("${output}.psam"), emit: snpchunks_qc
tuple file("${output}.bed"), file("${output}.bim"), file("${output}.fam"), emit: snpchunks_merge //into p1_run_processed_ch
tuple val(vSimple), val("${output}"), emit: snpchunks_names //into p1_run_chunklist_ch

script:
Expand Down Expand Up @@ -75,5 +76,9 @@ process GENETICQC {
${params.assembly} \
${chrnum} \
${prefix}
plink2 --pfile ${output} \
--make-bed \
--out ${output}
"""
}
12 changes: 3 additions & 9 deletions subworkflows/fullqc.nf
Original file line number Diff line number Diff line change
Expand Up @@ -87,21 +87,15 @@ workflow DOQC {
// RUN THE GENOTYPE DATA QC
GENETICQC( input_p1_run_ch )

// Generate some channels we will need downstream
GENETICQC.out.snpchunks_qc
.groupTuple(by: 0)
.flatten()
.collate(5)
.set { gwasinput_genetic }

GENETICQC.out.snpchunks_names
.collectFile( newLine: true )
{ vSimple, prefix -> ["${vSimple}.mergelist.txt", prefix] }
.set{ chunknames }


// DO ALL THE MERGING AFTER SPLITTING TO HANDLE BIG DATA MEMORY HUNTING
MERGER_SPLITS(chunknames, GENETICQC.out.snpchunks_qc.collect())
//MERGER_SPLITS(chunknames, GENETICQC.out.snpchunks_qc.collect())
MERGER_SPLITS(chunknames, GENETICQC.out.snpchunks_merge.collect())

MERGER_SPLITS.out
.collect()
.flatten()
Expand Down

0 comments on commit c5db930

Please sign in to comment.