Skip to content

Commit

Permalink
[LINT]
Browse files Browse the repository at this point in the history
  • Loading branch information
nservant committed Jan 26, 2024
1 parent 38f1ee9 commit 105da4e
Show file tree
Hide file tree
Showing 10 changed files with 58 additions and 105 deletions.
8 changes: 4 additions & 4 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@
"python.linting.flake8Path": "/opt/conda/bin/flake8",
"python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle",
"python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle",
"python.linting.pylintPath": "/opt/conda/bin/pylint"
"python.linting.pylintPath": "/opt/conda/bin/pylint",
},

// Add the IDs of extensions you want installed when the container is created.
"extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"]
}
}
"extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"],
},
},
}
4 changes: 2 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### `Added`

- New subworkflow based on `pairtools` to detect valid pairs. The user
can now choose between `--processing hicpro` (default) or `--processing pairtools`
can now choose between `--processing hicpro` (default) or `--processing pairtools`

- Default mapping options with `HiC-Pro` has been updated to give closer results in comparison
with `BWA-mem/pairtools`
with `BWA-mem/pairtools`

### `Removed`

Expand Down
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@ On release, automated continuous integration tests run the pipeline on a full-si
2. Detection of valid interaction products
3. Duplicates removal
4. Generate raw and normalized contact maps ([`iced`](https://github.com/hiclib/iced))
5. Generate `pairs` files for downstream analysis
2. [`Pairtools`](https://github.com/open2c/pairtools)
2. Generate `pairs` files for downstream analysis
3. [`Pairtools`](https://github.com/open2c/pairtools)
1. Mapping using [`BWA-mem`](https://github.com/lh3/bwa)
2. Detection of valid interaction products with [`pairtools`](https://github.com/open2c/pairtools)
3. Duplicates removal
4. Generate `pairs` files for downstream analysis
4. Detection of valid interaction products with [`pairtools`](https://github.com/open2c/pairtools)
5. Duplicates removal
6. Generate `pairs` files for downstream analysis
3. Create genome-wide contact maps at various resolutions ([`cooler`](https://github.com/open2c/cooler))
4. Contact maps normalization using balancing algorithm ([`cooler`](https://github.com/open2c/cooler))
5. Export to various contact maps formats ([`HiC-Pro`](https://github.com/nservant/HiC-Pro), [`cooler`](https://github.com/open2c/cooler))
Expand Down
15 changes: 7 additions & 8 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ process {
// PAIRTOOLS

withName: 'BWA_MEM' {
publishDir = [
publishDir = [
path: { "${params.outdir}/bwa" },
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
mode: 'copy',
Expand Down Expand Up @@ -241,19 +241,19 @@ process {
]
ext.args = { params.save_interaction_bam ? "--output-sam ${meta.id}_pairtools.bam" : '' }
}

withName: 'SAMTOOLS_SORT' {
publishDir = [
publishDir = [
path: { "${params.outdir}/pairtools" },
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
mode: 'copy',
enabled: params.save_pairs_intermediates
]
ext.prefix = { "${meta.id}_pairtools_sorted" }
ext.prefix = { "${meta.id}_pairtools_sorted" }
}

withName: 'SAMTOOLS_INDEX' {
publishDir = [
publishDir = [
path: { "${params.outdir}/pairtools" },
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
mode: 'copy',
Expand All @@ -268,7 +268,7 @@ process {
pattern: "*.pairs.stat"
]
ext.args = { "--mark-dups" }
ext.prefix = { "${meta.id}_dedup" }
ext.prefix = { "${meta.id}_dedup" }
ext.when = !params.keep_dups
}

Expand All @@ -282,8 +282,7 @@ process {
ext.args = { [
params.min_mapq > 0 ? "(mapq1>${params.min_mapq} and mapq2>${params.min_mapq})" : '',
params.min_cis_dist > 0 ? " and ((chrom1==chrom2 and abs(pos1-pos2) > ${params.min_cis_dist}) or chrom1!=chrom2)" : '',
params.keep_multi ? " and ((pair_type.upper()=='UU') or (pair_type.upper()=='UR') or (pair_type.upper()=='RU') or (pair_type.upper()=='MM') or (pair_type.upper()=='MU'))" :
" and ((pair_type.upper()=='UU') or (pair_type.upper()=='UR') or (pair_type.upper()=='RU'))",
params.keep_multi ? " and ((pair_type.upper()=='UU') or (pair_type.upper()=='UR') or (pair_type.upper()=='RU') or (pair_type.upper()=='MM') or (pair_type.upper()=='MU'))" : " and ((pair_type.upper()=='UU') or (pair_type.upper()=='UR') or (pair_type.upper()=='RU'))",
params.dnase ? '' : " and ((chrom1==chrom2 and abs(int(rfrag1) - int(rfrag2)) > 1) or chrom1!=chrom2)",
//params.min_insert_size > 0 ? " and ( (rfrag_end1 - r1pos) + (rfrag_end2 - r2pos)) > ${params.min_insert_size}" : '',
//params.max_insert_size > 0 ? " and ( (rfrag_end1 - r1pos) + (rfrag_end2 - r2pos)) < ${params.max_insert_size}" : '',
Expand Down
12 changes: 3 additions & 9 deletions docs/benchmark.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ nextflow run nf-core-hic/main.nf \
--outdir '/tmp/results_test_pairtools/'
```

The idea here was just to have a look at the final list of selected (and unselected)
The idea here was just to have a look at the final list of selected (and unselected)
read pairs classified as valid interactions (or spurious interaction products)

Here is a quick summary statistics ;
Expand All @@ -48,7 +48,7 @@ Here is a quick summary statistics ;

Overall, we can see that **70%** of valid interactions are called by both `HiC-Pro` and `Pairtools`.
Regarding the 30% of read pairs which are different between the two tools, we can see that a large
majority (>75%) are due to differences in the read mapping (`bowtie2` versus `bwa-mem`).
majority (>75%) are due to differences in the read mapping (`bowtie2` versus `bwa-mem`).

The few other differences can be at least partly explain by differences in the read pairs selection such as how a read is assigned
to a restriction fragments, etc.
Expand All @@ -68,7 +68,7 @@ nextflow run nf-core-hic/main.nf \
--res_compartments '500000,250000' \
--res_tads '40000,20000' \
--outdir './results_SRX2636669_hicpro/' \
-profile singularity
-profile singularity
```

or `bwa-mem/pairtools` ;
Expand Down Expand Up @@ -111,16 +111,10 @@ Almost **80%** of valid interactions are called in common by `HiC-Pro` and `pair
As previously observed, most of the differences observed between the two tools are
explained by distinct mapping procedures.


Finally, we generated the contact maps around a specific regions on the X chromosome
using the `cool` files and the TADs calling generated with both tools.
**No difference is observed at the contact map level.**

![X Inactivation Center - HiC-Pro processing](./images/SRX2636669_hicpro_pygentracks.png)

![X Inactivation Center - Bwa-mem / pairtools](./images/SRX2636669_pairtools_pygentracks.png)





82 changes: 21 additions & 61 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,142 +8,102 @@
"bowtie2/align": {
"branch": "master",
"git_sha": "fe54581f8bed20e4c4a51c616c93fd3379d89820",
"installed_by": [
"modules"
]
"installed_by": ["modules"]
},
"bowtie2/build": {
"branch": "master",
"git_sha": "6a24fbe314bb2e6fe6306c29a63076ea87e8eb3c",
"installed_by": [
"modules"
]
"installed_by": ["modules"]
},
"bwa/index": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": [
"modules"
]
"installed_by": ["modules"]
},
"bwa/mem": {
"branch": "master",
"git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220",
"installed_by": [
"modules"
]
"installed_by": ["modules"]
},
"cooler/balance": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": [
"modules"
]
"installed_by": ["modules"]
},
"cooler/cload": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": [
"modules"
]
"installed_by": ["modules"]
},
"cooler/dump": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": [
"modules"
]
"installed_by": ["modules"]
},
"cooler/makebins": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": [
"modules"
]
"installed_by": ["modules"]
},
"cooler/zoomify": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": [
"modules"
]
"installed_by": ["modules"]
},
"custom/dumpsoftwareversions": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": [
"modules"
]
"installed_by": ["modules"]
},
"custom/getchromsizes": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": [
"modules"
]
"installed_by": ["modules"]
},
"fastqc": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": [
"modules"
]
"installed_by": ["modules"]
},
"pairix": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": [
"modules"
]
"installed_by": ["modules"]
},
"pairtools/dedup": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": [
"modules"
]
"installed_by": ["modules"]
},
"pairtools/parse": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": [
"modules"
]
"installed_by": ["modules"]
},
"pairtools/restrict": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": [
"modules"
]
"installed_by": ["modules"]
},
"pairtools/select": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": [
"modules"
]
"installed_by": ["modules"]
},
"pairtools/sort": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": [
"modules"
]
"installed_by": ["modules"]
},
"samtools/index": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": [
"modules"
]
"installed_by": ["modules"]
},
"samtools/sort": {
"branch": "master",
"git_sha": "a0f7be95788366c1923171e358da7d049eb440f9",
"installed_by": [
"modules"
]
"installed_by": ["modules"]
}
}
},
Expand All @@ -152,4 +112,4 @@
}
}
}
}
}
8 changes: 4 additions & 4 deletions modules/local/pairtools/pairtools_merge.nf
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ process PAIRTOOLS_MERGE {
def prefix = task.ext.prefix ?: "${meta.id}_merged"
"""
pairtools merge \
${args} \
--nproc ${task.cpus} \
-o ${prefix}.pairs.gz \
${allpairs}
${args} \
--nproc ${task.cpus} \
-o ${prefix}.pairs.gz \
${allpairs}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
8 changes: 4 additions & 4 deletions modules/local/pairtools/pairtools_split.nf
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ process PAIRTOOLS_SPLIT {
def prefix = task.ext.prefix ?: "${meta.id}"
"""
pairtools split \
--nproc-in ${task.cpus} --nproc-out ${task.cpus} \
--output-pairs ${prefix}.split.pairs.gz \
${args} \
${pairs}
--nproc-in ${task.cpus} --nproc-out ${task.cpus} \
--output-pairs ${prefix}.split.pairs.gz \
${args} \
${pairs}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
8 changes: 4 additions & 4 deletions modules/local/pairtools/pairtools_stats.nf
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ process PAIRTOOLS_STATS {
def prefix = task.ext.prefix ?: "${meta.id}_stats"
"""
pairtools stats \
${args} \
--nproc-in ${task.cpus} --nproc-out ${task.cpus} \
-o ${prefix}.txt \
${pairs}
${args} \
--nproc-in ${task.cpus} --nproc-out ${task.cpus} \
-o ${prefix}.txt \
${pairs}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
8 changes: 4 additions & 4 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,10 @@
"type": "string",
"description": "Full path to directory containing Bwa-mem index",
"fa_icon": "far fa-file-alt"
}
}
}
},
"hic_processing":{
"hic_processing": {
"title": "Hi-C processing",
"type": "object",
"description": "Define the Hi-C processing subworkflow to use to extract the valid pairs from raw data",
Expand All @@ -100,7 +100,7 @@
"type": "string",
"description": "Hi-C processing method",
"default": "hicpro",
"enum": ["hicpro", "pairtools"]
"enum": ["hicpro", "pairtools"]
}
}
},
Expand Down Expand Up @@ -240,7 +240,7 @@
"pairtools_parse_opts": {
"type": "string",
"description": "Update 'pairtools parse' options",
"default": "--walks-policy 5unique"
"default": "--walks-policy 5unique"
},
"save_interaction_bam": {
"type": "boolean",
Expand Down

0 comments on commit 105da4e

Please sign in to comment.