Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

hap1/hap2 scaffolding #51

Merged
merged 16 commits into from
Sep 6, 2024
207 changes: 188 additions & 19 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ process {
]
}

withName: '.*GENOME_STATISTICS_RAW:BUSCO' {
withName: '.*GENOME_STATISTICS_RAW:BUSCO_PRI' {
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasm}/${meta.id}.p_ctg.${meta.lineage}.busco" },
mode: params.publish_dir_mode,
Expand Down Expand Up @@ -132,17 +132,17 @@ process {
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
withName: '.*RAW_ASSEMBLY:GFA_TO_FASTA_PRI_HIC' {
ext.prefix = { "${meta.id}.asm.hic.p_ctg" }
withName: '.*RAW_ASSEMBLY:GFA_TO_FASTA_HAP1_HIC' {
ext.prefix = { "${meta.id}.asm.hic.hap1" }
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: '.*RAW_ASSEMBLY:GFA_TO_FASTA_ALT_HIC' {
ext.prefix = { "${meta.id}.asm.hic.a_ctg" }
withName: '.*RAW_ASSEMBLY:GFA_TO_FASTA_HAP2_HIC' {
ext.prefix = { "${meta.id}.asm.hic.hap2" }
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}" },
mode: params.publish_dir_mode,
Expand All @@ -158,7 +158,7 @@ process {
]
}
withName: '.*GENOME_STATISTICS_RAW_HIC:GFASTATS_PRI' {
ext.prefix = { "${meta.id}.asm.hic.p_ctg" }
ext.prefix = { "${meta.id}.asm.hic.hap1" }
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}" },
mode: params.publish_dir_mode,
Expand All @@ -167,16 +167,30 @@ process {
}

withName: '.*GENOME_STATISTICS_RAW_HIC:GFASTATS_HAP' {
ext.prefix = { "${meta.id}.asm.hic.a_ctg" }
ext.prefix = { "${meta.id}.asm.hic.hap2" }
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}" },
mode: params.publish_dir_mode,
pattern: '*assembly_summary'
]
}
withName: '.*GENOME_STATISTICS_RAW_HIC:BUSCO' {

withName: '.*GENOME_STATISTICS_RAW_HIC:BUSCO_PRI' {
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/${meta.id}.p_ctg.${meta.lineage}.busco" },
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/${meta.id}.hap1.${meta.lineage}.busco" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.endsWith('busco.log') ? filename :
filename.endsWith('full_table.tsv') ? filename :
filename.endsWith('missing_busco_list.tsv') ? filename :
filename.startsWith('short_summary') ? filename :
filename.endsWith('busco.batch_summary.txt') ? filename :
null }
]
}

withName: '.*GENOME_STATISTICS_RAW_HIC:BUSCO_HAP' {
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/${meta.id}.hap2.${meta.lineage}.busco" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.endsWith('busco.log') ? filename :
filename.endsWith('full_table.tsv') ? filename :
Expand All @@ -189,7 +203,7 @@ process {

withName: '.*GENOME_STATISTICS_RAW_HIC:MERQURYFK_MERQURYFK' {
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/${meta.id}.p_ctg.ccs.merquryk" },
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/${meta.id}.hap1.ccs.merquryk" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
Expand Down Expand Up @@ -332,7 +346,7 @@ process {
]
}

withName: '.*GENOME_STATISTICS_PURGED:BUSCO' {
withName: '.*GENOME_STATISTICS_PURGED:BUSCO_PRI' {
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasm}/purging/${meta.id}.purged.${meta.lineage}.busco" },
mode: params.publish_dir_mode,
Expand Down Expand Up @@ -499,7 +513,7 @@ process {
]
}

withName: '.*HIC_MAPPING:SAMTOOLS_MERGE_HIC_MAPPING' {
withName: '.*HIC_MAPPING.*:SAMTOOLS_MERGE_HIC_MAPPING' {
ext.prefix = { "${meta.id}_merged" }
}

Expand Down Expand Up @@ -530,7 +544,7 @@ process {
}


withName: '.*HIC_MAPPING:CONVERT_STATS:SAMTOOLS_VIEW' {
withName: '.*HIC_MAPPING.*:CONVERT_STATS:SAMTOOLS_VIEW' {
ext.args = "--output-fmt cram"
}

Expand Down Expand Up @@ -559,7 +573,7 @@ process {
}

// Set up of the scffolding pipeline
withName: 'YAHS' {
withName: '.*SCAFFOLDING:YAHS' {
ext.prefix = 'out'
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasm}/scaffolding/yahs/out.break.yahs" },
Expand All @@ -568,7 +582,7 @@ process {
]
}

withName: 'COOLER_CLOAD' {
withName: '.*SCAFFOLDING:COOLER_CLOAD' {
// Positions in the input file are zero-based;
// chrom1 field number (one-based) is 2;
// pos1 field number (one-based) is 3;
Expand All @@ -582,7 +596,7 @@ process {
]
}

withName: 'PRETEXTSNAPSHOT' {
withName: '.*SCAFFOLDING:PRETEXTSNAPSHOT' {
// Make one plot containing all sequences
ext.args = '--sequences \"=full\"'
publishDir = [
Expand All @@ -592,7 +606,7 @@ process {
]
}

withName: 'JUICER_TOOLS_PRE' {
withName: '.*SCAFFOLDING:JUICER_TOOLS_PRE' {
ext.juicer_tools_jar = 'juicer_tools.1.9.9_jcuda.0.8.jar'
ext.juicer_jvm_params = '-Xms1g -Xmx6g'
publishDir = [
Expand All @@ -602,7 +616,7 @@ process {
]
}

withName: 'JUICER_PRE' {
withName: '.*SCAFFOLDING:JUICER_PRE' {
ext.args2 = "LC_ALL=C sort -k2,2d -k6,6d -S50G | awk '\$3>=0 && \$7>=0'"
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasm}/scaffolding/yahs/out.break.yahs" },
Expand All @@ -620,7 +634,7 @@ process {
]
}

withName: '.*GENOME_STATISTICS_SCAFFOLDS:BUSCO' {
withName: '.*GENOME_STATISTICS_SCAFFOLDS:BUSCO_PRI' {
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasm}/scaffolding/yahs/out.break.yahs/out_scaffolds_final.${meta.lineage}.busco" },
mode: params.publish_dir_mode,
Expand All @@ -640,6 +654,161 @@ process {
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

// Scaffolding hap1/hap2
if (params.hifiasm_hic_on) {

withName: '.*HIC_MAPPING_HAP.*:SAMTOOLS_MARKDUP_HIC_MAPPING' {
ext.prefix = { "${meta.id}_mkdup" }
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: '.*HIC_MAPPING_HAP.*:BAMTOBED_SORT' {
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}


withName: '.*HIC_MAPPING_HAP.*:CONVERT_STATS:SAMTOOLS_STATS' {
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: '.*HIC_MAPPING_HAP.*:CONVERT_STATS:SAMTOOLS_FLAGSTAT' {
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: '.*HIC_MAPPING_HAP.*:CONVERT_STATS:SAMTOOLS_IDXSTATS' {
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: '.*SCAFFOLDING_HAP.*:YAHS' {
ext.prefix = { "${meta.hap_id}" }
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}/yahs/out.break.yahs" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]

}

withName: '.*SCAFFOLDING_HAP.*:COOLER_CLOAD' {
// Positions in the input file are zero-based;
// chrom1 field number (one-based) is 2;
// pos1 field number (one-based) is 3;
// chrom2 field number (one-based) is 6;
// pos2 field number (one-based) is 7
ext.args = 'pairs -0 -c1 2 -p1 3 -c2 6 -p2 7'
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}/yahs/out.break.yahs" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: '.*SCAFFOLDING_HAP.*:PRETEXTSNAPSHOT' {
// Make one plot containing all sequences
ext.args = '--sequences \"=full\"'
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}/yahs/out.break.yahs" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: '.*SCAFFOLDING_HAP.*:JUICER_TOOLS_PRE' {
ext.juicer_tools_jar = 'juicer_tools.1.9.9_jcuda.0.8.jar'
ext.juicer_jvm_params = '-Xms1g -Xmx6g'
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}/yahs/out.break.yahs" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: '.*SCAFFOLDING_HAP.*:JUICER_PRE' {
ext.args2 = "LC_ALL=C sort -k2,2d -k6,6d -S50G | awk '\$3>=0 && \$7>=0'"
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}/yahs/out.break.yahs" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
}

// End of hap1/hap2 scaffolding

withName: '.*GENOME_STATISTICS_SCAFFOLDS_HAPS:GFASTATS_PRI' {
ext.prefix = { "${meta.id}_scaffolds_final" }
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap1/yahs/out.break.yahs" },
mode: params.publish_dir_mode,
pattern: '*assembly_summary'
]
}

withName: '.*GENOME_STATISTICS_SCAFFOLDS_HAPS:GFASTATS_HAP' {
ext.prefix = { "${meta.id}_scaffolds_final" }
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap2/yahs/out.break.yahs" },
mode: params.publish_dir_mode,
pattern: '*assembly_summary'
]
}

withName: '.*GENOME_STATISTICS_SCAFFOLDS_HAPS:BUSCO_PRI' {
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap1/yahs/out.break.yahs/out_scaffolds_final.${meta.lineage}.busco" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.endsWith('busco.log') ? filename :
filename.endsWith('full_table.tsv') ? filename :
filename.endsWith('missing_busco_list.tsv') ? filename :
filename.startsWith('short_summary') ? filename :
filename.endsWith('busco.batch_summary.txt') ? filename :
null }
]
}

withName: '.*GENOME_STATISTICS_SCAFFOLDS_HAPS:BUSCO_HAP' {
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap2/yahs/out.break.yahs/out_scaffolds_final.${meta.lineage}.busco" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.endsWith('busco.log') ? filename :
filename.endsWith('full_table.tsv') ? filename :
filename.endsWith('missing_busco_list.tsv') ? filename :
filename.startsWith('short_summary') ? filename :
filename.endsWith('busco.batch_summary.txt') ? filename :
null }
]
}

withName: '.*GENOME_STATISTICS_SCAFFOLDS_HAPS:MERQURYFK_MERQURYFK' {
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap1/yahs/out.break.yahs/out_scaffolds_final.ccs.merquryk" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

// End of Scaffolding hap1/hap2
// End of Set up of the scaffolding pipeline

//Set up of assembly stats subworkflow
Expand Down
8 changes: 8 additions & 0 deletions modules/nf-core/hifiasm/environment.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 17 additions & 4 deletions modules/nf-core/hifiasm/hifiasm.diff

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading