diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 482e87bf..bd99230c 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -1,24 +1,24 @@ -# nf-core/bamtofastq: Contributing Guidelines +# qbic-pipelines/bamtofastq: Contributing Guidelines -Hi there! Many thanks for taking an interest in improving nf-core/bamtofastq. +Hi there! Many thanks for taking an interest in improving qbic-pipelines/bamtofastq. -We try to manage the required tasks for nf-core/bamtofastq using GitHub issues, you probably came to this page when creating one. Please use the pre-filled template to save time. +We try to manage the required tasks for qbic-pipelines/bamtofastq using GitHub issues, you probably came to this page when creating one. Please use the pre-filled template to save time. However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) -> If you need help using or modifying nf-core/bamtofastq then the best place to ask is on the pipeline channel on [Slack](https://nf-co.re/join/slack/). +> If you need help using or modifying qbic-pipelines/bamtofastq then the best place to ask is here. ## Contribution workflow -If you'd like to write some code for nf-core/bamtofastq, the standard workflow +If you'd like to write some code for qbic-pipelines/bamtofastq, the standard workflow is as follows: 1. Check that there isn't already an issue about your idea in the - [nf-core/bamtofastq issues](https://github.com/nf-core/bamtofastq/issues) to avoid + [qbic-pipelines/bamtofastq issues](https://github.com/qbic-pipelines/bamtofastq/issues) to avoid duplicating work. * If there isn't one already, please create one so that others know you're working on this -2. Fork the [nf-core/bamtofastq repository](https://github.com/nf-core/bamtofastq) to your GitHub account +2. Fork the [qbic-pipelinesbamtofastq repository](https://github.com/qbic-pipelines/bamtofastq) to your GitHub account 3. Make the necessary changes / additions within your forked repository 4. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged. @@ -32,9 +32,11 @@ Typically, pull-requests are only fully reviewed when these tests are passing, t There are typically two types of tests that run: ### Lint Tests -The nf-core has a [set of guidelines](https://nf-co.re/developers/guidelines) which all pipelines must adhere to. +The nf-core has a [set of guidelines](https://nf-co.re/developers/guidelines) which all pipelines must adhere to. To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core lint ` command. +*This pipeline* was created using [nf-core/tools](https://github.com/nf-core/tools) and in the future will keep using it to continuously adhere to their best practices. However, you may encounter more warnings and failures, as the linting requires the `nf-core/` tag, whereas here we generally need to use `qbic-pipelines/`. + If any failures or warnings are encountered, please follow the listed URL for more documentation. ### Pipeline Tests @@ -44,4 +46,4 @@ If there are any failures then the automated tests fail. These tests are run both with the latest available version of Nextflow and also the minimum required version that is stated in the pipeline code. ## Getting help -For further information/help, please consult the [nf-core/bamtofastq documentation](https://github.com/nf-core/bamtofastq#documentation) and don't hesitate to get in touch on the [nf-core/bamtofastq pipeline channel](https://nfcore.slack.com/channels/nf-core/bamtofastq) on [Slack](https://nf-co.re/join/slack/). +For further information/help, please consult the [qbic-pipelines/bamtofastq documentation](https://github.com/qbic-pipelines/bamtofastq#documentation) and don't hesitate to get in touch. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 4c439ee4..a6f34695 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,15 +1,15 @@ -Many thanks to contributing to nf-core/bamtofastq! +Many thanks to contributing to qbic-pipelines/bamtofastq! Please fill in the appropriate checklist below (delete whatever is not relevant). These are the most common things requested on pull requests (PRs). ## PR checklist - [ ] This comment contains a description of changes (with reason) - [ ] If you've fixed a bug or added code that should be tested, add tests! - - [ ] If necessary, also make a PR on the [nf-core/bamtofastq branch on the nf-core/test-datasets repo]( https://github.com/nf-core/test-datasets/pull/new/nf-core/bamtofastq) + - [ ] If necessary, add test data to `testdata/` - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker`). - - [ ] Make sure your code lints (`nf-core lint .`). + - [ ] Make sure your code lints (`nf-core lint .`). - [ ] Documentation in `docs` is updated - [ ] `CHANGELOG.md` is updated - [ ] `README.md` is updated -**Learn more about contributing:** https://github.com/nf-core/bamtofastq/tree/master/.github/CONTRIBUTING.md +**Learn more about contributing:** https://github.com/qbic-pipelines/bamtofastq/tree/master/.github/CONTRIBUTING.md diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml new file mode 100644 index 00000000..c66e4f2c --- /dev/null +++ b/.github/workflows/branch.yml @@ -0,0 +1,32 @@ +name: qbic-pipelines branch protection +# This workflow is triggered on PRs to master branch on the repository +# It fails when someone tries to make a PR against the nf-core `master` branch instead of `dev` +on: + pull_request: + branches: [master] + +jobs: + test: + runs-on: ubuntu-latest + steps: + # PRs to the nf-core repo master branch are only ok if coming from the nf-core repo `dev` or any `patch` branches + - name: Check PRs + if: github.repository == 'qbic-pipelines/bamtofastq' + run: | + { [[ ${{github.event.pull_request.head.repo.full_name}} == qbic-pipelines/bamtofastq ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] + + # If the above check failed, post a comment on the PR explaining the failure + # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets + - name: Post PR comment + if: failure() + uses: mshick/add-pr-comment@v1 + with: + message: | + Hi @${{ github.event.pull_request.user.login }}, + It looks like this pull-request is has been made against the ${{github.event.pull_request.head.repo.full_name}} `master` branch. + The `master` branch on qbic-pipelines repositories should always contain code from the latest release. + Because of this, PRs to `master` are only allowed if they come from the ${{github.event.pull_request.head.repo.full_name}} `dev` branch. + You do not need to close this PR, you can change the target branch to `dev` by clicking the _"Edit"_ button at the top of this page. + Thanks again for your contribution! + repo-token: ${{ secrets.GITHUB_TOKEN }} + allow-repeats: false \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..93c83e0b --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,55 @@ +name: qbic-pipelines CI +# This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors +on: + push: + branches: + - dev + pull_request: + release: + types: [published] + +jobs: + test: + name: Run workflow tests + # Only run on push if this is the qbic-pipelines dev branch (merged PRs) + runs-on: ubuntu-latest + env: + NXF_VER: ${{ matrix.nxf_ver }} + NXF_ANSI_LOG: false + strategy: + matrix: + # Nextflow versions: check pipeline minimum and current latest + nxf_ver: ['20.04.1', ''] + config: ['test_chr'] + steps: + - name: Check out pipeline code + uses: actions/checkout@v2 + + - name: Check if Dockerfile or Conda environment changed + uses: technote-space/get-diff-action@v1 + with: + PREFIX_FILTER: | + Dockerfile + environment.yml + - name: Build new docker image + if: env.GIT_DIFF + run: docker build --no-cache . -t qbicpipelines/bamtofastq:1.0.0 + + - name: Pull docker image + if: ${{ !env.GIT_DIFF }} + run: | + docker pull qbicpipelines/bamtofastq:dev + docker tag qbicpipelines/bamtofastq:dev qbicpipelines/bamtofastq:1.0.0 + - name: Install Nextflow + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + - name: Run pipeline with test data + # TODO nf-core: You can customise CI pipeline run tests as required + # For example: adding multiple test runs with different parameters + # Remember that you can parallelise this by using strategy.matrix + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test,docker + - name: Run pipeline with test data, only obtain reads mapping to chrX and chrY + run: | + nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.config }},docker \ No newline at end of file diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml new file mode 100644 index 00000000..2f5bce3e --- /dev/null +++ b/.github/workflows/linting.yml @@ -0,0 +1,51 @@ +name: qbic-pipelines linting +# This workflow is triggered on pushes and PRs to the repository. +# It runs the `nf-core lint` and markdown lint tests to ensure that the code meets the nf-core guidelines +on: + push: + pull_request: + release: + types: [published] + +jobs: + Markdown: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-node@v1 + with: + node-version: '10' + - name: Install markdownlint + run: npm install -g markdownlint-cli + - name: Run Markdownlint + run: markdownlint ${GITHUB_WORKSPACE} -c ${GITHUB_WORKSPACE}/.github/markdownlint.yml + YAML: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v1 + - uses: actions/setup-node@v1 + with: + node-version: '10' + - name: Install yaml-lint + run: npm install -g yaml-lint + - name: Run yaml-lint + run: yamllint $(find ${GITHUB_WORKSPACE} -type f -name "*.yml") +#Comment this for now, as it won't pass until the template update is done, which will come in the next release +# nf-core: +# runs-on: ubuntu-latest +# steps: +# - uses: actions/checkout@v2 +# - name: Install Nextflow +# run: | +# wget -qO- get.nextflow.io | bash +# sudo mv nextflow /usr/local/bin/ +# - uses: actions/setup-python@v1 +# with: +# python-version: '3.6' +# architecture: 'x64' +# - name: Install dependencies +# run: | +# python -m pip install --upgrade pip +# pip install nf-core +# - name: Run nf-core lint +# run: nf-core lint ${GITHUB_WORKSPACE} \ No newline at end of file diff --git a/.github/workflows/push_dockerhub.yml b/.github/workflows/push_dockerhub.yml new file mode 100644 index 00000000..8fec7f51 --- /dev/null +++ b/.github/workflows/push_dockerhub.yml @@ -0,0 +1,40 @@ +name: qbic-pipelines Docker push +# This builds the docker image and pushes it to DockerHub +# Runs on qbic-pipelines repo releases and push event to 'dev' branch (PR merges) +on: + push: + branches: + - dev + release: + types: [published] + +jobs: + push_dockerhub: + name: Push new Docker image to Docker Hub + runs-on: ubuntu-latest + # Only run for the qbic-pipelines repo, for releases and merged PRs + if: ${{ github.repository == 'qbic-pipelines/bamtofastq' }} + env: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + DOCKERHUB_PASS: ${{ secrets.DOCKERHUB_PASS }} + steps: + - name: Check out pipeline code + uses: actions/checkout@v2 + + - name: Build new docker image + run: docker build --no-cache . -t qbicpipelines/bamtofastq:latest + + - name: Push Docker image to DockerHub (dev) + if: ${{ github.event_name == 'push' }} + run: | + echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin + docker tag qbicpipelines/bamtofastq:latest qbicpipelines/bamtofastq:dev + docker push qbicpipelines/bamtofastq:dev + + - name: Push Docker image to DockerHub (release) + if: ${{ github.event_name == 'release' }} + run: | + echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin + docker push qbicpipelines/bamtofastq:latest + docker tag qbicpipelines/bamtofastq:latest qbicpipelines/bamtofastq:${{ github.event.release.tag_name }} + docker push qbicpipelines/bamtofastq:${{ github.event.release.tag_name }} \ No newline at end of file diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index ca06d704..00000000 --- a/.travis.yml +++ /dev/null @@ -1,42 +0,0 @@ -sudo: required -language: python -jdk: openjdk8 -services: docker -python: '3.6' -cache: pip -matrix: - fast_finish: true - -before_install: - # PRs to master are only ok if coming from dev branch - - '[ $TRAVIS_PULL_REQUEST = "false" ] || [ $TRAVIS_BRANCH != "master" ] || ([ $TRAVIS_PULL_REQUEST_SLUG = $TRAVIS_REPO_SLUG ] && ([ $TRAVIS_PULL_REQUEST_BRANCH = "dev" ] || [ $TRAVIS_PULL_REQUEST_BRANCH = "patch" ]))' - # Pull the docker image first so the test doesn't wait for this - - docker pull nfcore/bamtofastq:dev - # Fake the tag locally so that the pipeline runs properly - # Looks weird when this is :dev to :dev, but makes sense when testing code for a release (:dev to :1.0.1) - - docker tag nfcore/bamtofastq:dev nfcore/bamtofastq:dev - -install: - # Install Nextflow - - mkdir /tmp/nextflow && cd /tmp/nextflow - - wget -qO- get.nextflow.io | bash - - sudo ln -s /tmp/nextflow/nextflow /usr/local/bin/nextflow - # Install nf-core/tools - - pip install --upgrade pip - - pip install nf-core - # Reset - - mkdir ${TRAVIS_BUILD_DIR}/tests && cd ${TRAVIS_BUILD_DIR}/tests - # Install markdownlint-cli - - sudo apt-get install npm && npm install -g markdownlint-cli - -env: - - NXF_VER='0.32.0' # Specify a minimum NF version that should be tested and work - - NXF_VER='' # Plus: get the latest NF version and check that it works - -script: - # Lint the pipeline code - - nf-core lint ${TRAVIS_BUILD_DIR} - # Lint the documentation - - markdownlint ${TRAVIS_BUILD_DIR} -c ${TRAVIS_BUILD_DIR}/.github/markdownlint.yml - # Run the pipeline with the test profile - - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker diff --git a/CHANGELOG.md b/CHANGELOG.md index f425a3dc..b1db7ef7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ # nf-core/bamtofastq: Changelog -## v1.0dev - [date] -Initial release of nf-core/bamtofastq, created with the [nf-core](http://nf-co.re/) template. +## v1.0.0 - Ada Lovelace +Initial release of qbic-pipelines/bamtofastq, created with the [nf-core](http://nf-co.re/) template. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 1cda7600..90ddb228 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -34,7 +34,7 @@ This Code of Conduct applies both within project spaces and in public spaces whe ## Enforcement -Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team on [Slack](https://nf-co.re/join/slack/). The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. +Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team via email. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. diff --git a/Dockerfile b/Dockerfile index 2bae3218..ed3a3167 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ FROM nfcore/base:1.7 LABEL authors="Friederike Hanssen" \ - description="Docker image containing all requirements for nf-core/bamtofastq pipeline" + description="Docker image containing all requirements for qbic-pipelines/bamtofastq pipeline" COPY environment.yml / RUN conda env create -f /environment.yml && conda clean -a -ENV PATH /opt/conda/envs/nf-core-bamtofastq-1.0dev/bin:$PATH +ENV PATH /opt/conda/envs/qbic-pipelines-bamtofastq-1.0.0/bin:$PATH diff --git a/README.md b/README.md index fc830c7d..064e1c41 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,21 @@ -# ![nf-core/bamtofastq](docs/images/nf-core-bamtofastq_logo.png) +# ![qbic-pipelines/bamtofastq](docs/images/qbic-pipelines-bamtofastq_logo.png) -**Workflow converts one or multiple bam files back to the fastq format**. +> **An open-source pipeline converting (un)mapped single-end or paired-end bam files to fastq.gz**. -[![Build Status](https://travis-ci.com/nf-core/bamtofastq.svg?branch=master)](https://travis-ci.com/nf-core/bamtofastq) -[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A50.32.0-brightgreen.svg)](https://www.nextflow.io/) +[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A520.04.1-brightgreen.svg)](https://www.nextflow.io/) [![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](http://bioconda.github.io/) -[![Docker](https://img.shields.io/docker/automated/nfcore/bamtofastq.svg)](https://hub.docker.com/r/nfcore/bamtofastq) +[![Docker](https://img.shields.io/docker/automated/fhanssen/nf-core-bamtofastq.svg)](https://hub.docker.com/r/fhanssen/nf-core-bamtofastq) +[![Install with Singularity](https://img.shields.io/badge/use%20with-singularity-purple.svg)](https://www.sylabs.io/docs/) + +[![GitHub Actions CI status](https://github.com/qbic-pipelines/bamtofastq/workflows/qbic-pipelines%20CI/badge.svg)](https://github.com/qbic-pipelines/bamtofastq/actions?query=workflow%3A%22qbic-pipelines+CI%22) +[![GitHub Actions Linting status](https://github.com/qbic-pipelines/bamtofastq/workflows/qbic-pipelines%20linting/badge.svg)](https://github.com/qbic-pipelines/bamtofastq/actions?query=workflow%3A%22qbic-pipelines+linting%22) ## Introduction +This pipeline converts (un)mapped `.bam` files into `fq.gz` files. +Initially, it auto-detects, whether the input file contains single-end or paired-end reads. Following this step, the reads are sorted using `samtools collate` and extracted with `samtools fastq`. Furthermore, for mapped bam files it is possible to only convert reads mapping to a specific region or chromosome. The obtained FastQ files can then be used to further process with other pipelines. + The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with docker containers making installation trivial and results highly reproducible. ## Quick Start @@ -21,47 +27,54 @@ ii. Install one of [`docker`](https://docs.docker.com/engine/installation/), [`s iii. Download the pipeline and test it on a minimal dataset with a single command ```bash -nextflow run nf-core/bamtofastq -profile test, +nextflow run qbic-pipelines/bamtofastq -profile test, ``` iv. Start running your own analysis! - ```bash -nextflow run nf-core/bamtofastq -profile --reads '*_R{1,2}.fastq.gz' --genome GRCh37 +nextflow run qbic-pipelines/bamtofastq -profile --input '*.bam' ``` See [usage docs](docs/usage.md) for all of the available options when running the pipeline. ## Documentation -The nf-core/bamtofastq pipeline comes with documentation about the pipeline, found in the `docs/` directory: +The qbic-pipelines/bamtofastq pipeline comes with documentation about the pipeline, found in the `docs/` directory: 1. [Installation](https://nf-co.re/usage/installation) 2. Pipeline configuration * [Local installation](https://nf-co.re/usage/local_installation) * [Adding your own system config](https://nf-co.re/usage/adding_own_config) - * [Reference genomes](https://nf-co.re/usage/reference_genomes) 3. [Running the pipeline](docs/usage.md) 4. [Output and how to interpret the results](docs/output.md) 5. [Troubleshooting](https://nf-co.re/usage/troubleshooting) - - ## Credits -nf-core/bamtofastq was originally written by Friederike Hanssen. +qbic-pipelines/bamtofastq was originally written by [Friederike Hanssen](https://github.com/FriederikeHanssen). + +This pipeline was created using the [nf-core](https://github.com/nf-core) framework and still uses some of its underlying infrastructure. For more information see [nf-co.re](nf-co.re). + +Helpful contributors: + +* [Gisela Gabernet](https://github.com/ggabernet) +* [Matilda Åslin](https://github.com/matrulda) + +### Resources + +The individual steps of this pipeline are based of on the following tutorials and resources: + + 1. [Extracting paired FASTQ read data from a BAM mapping file](http://darencard.net/blog/2017-09-07-extract-fastq-bam/) + 2. [Check if BAM is derived from pair-end or single-end reads](https://www.biostars.org/p/178730/) ## Contributions and Support If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md). -For further information or help, don't hesitate to get in touch on [Slack](https://nfcore.slack.com/channels/nf-core/bamtofastq) (you can join with [this invite](https://nf-co.re/join/slack)). +For further information or help, don't hesitate to get in touch by opening an issue. ## Citation - -You can cite the `nf-core` pre-print as follows: -Ewels PA, Peltzer A, Fillinger S, Alneberg JA, Patel H, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. **nf-core: Community curated bioinformatics pipelines**. *bioRxiv*. 2019. p. 610741. [doi: 10.1101/610741](https://www.biorxiv.org/content/10.1101/610741v1). diff --git a/assets/email_template.html b/assets/email_template.html index 1ae6e9a1..8658ee33 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -5,21 +5,21 @@ - - nf-core/bamtofastq Pipeline Report + + qbic-pipelines/bamtofastq Pipeline Report
-

nf-core/bamtofastq v${version}

+

qbic-pipelines/bamtofastq v${version}

Run Name: $runName

<% if (!success){ out << """
-

nf-core/bamtofastq execution completed unsuccessfully!

+

qbic-pipelines/bamtofastq execution completed unsuccessfully!

The exit status of the task that caused the workflow execution to fail was: $exitStatus.

The full error message was:

${errorReport}
@@ -28,7 +28,7 @@

nf-core/bamtofastq execution completed } else { out << """
- nf-core/bamtofastq execution completed successfully! + qbic-pipelines/bamtofastq execution completed successfully!
""" } @@ -45,8 +45,8 @@

Pipeline Configuration:

-

nf-core/bamtofastq

-

https://github.com/nf-core/bamtofastq

+

qbic-pipelines/bamtofastq

+

https://github.com/qbic-pipelines/bamtofastq

diff --git a/assets/email_template.txt b/assets/email_template.txt index f24e9d67..2c86803c 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -4,16 +4,16 @@ |\\ | |__ __ / ` / \\ |__) |__ } { | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, `._,._,' - nf-core/bamtofastq v${version} + qbic-pipelines/bamtofastq v${version} ---------------------------------------------------- Run Name: $runName <% if (success){ - out << "## nf-core/bamtofastq execution completed successfully! ##" + out << "## qbic-pipelines/bamtofastq execution completed successfully! ##" } else { out << """#################################################### -## nf-core/bamtofastq execution completed unsuccessfully! ## +## qbic-pipelines/bamtofastq execution completed unsuccessfully! ## #################################################### The exit status of the task that caused the workflow execution to fail was: $exitStatus. The full error message was: @@ -36,5 +36,5 @@ Pipeline Configuration: <% out << summary.collect{ k,v -> " - $k: $v" }.join("\n") %> -- -nf-core/bamtofastq -https://github.com/nf-core/bamtofastq +qbic-pipelines/bamtofastq +https://github.com/qbic-pipelines/bamtofastq diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml index abad4868..d6dc452d 100644 --- a/assets/multiqc_config.yaml +++ b/assets/multiqc_config.yaml @@ -1,9 +1,31 @@ +# custom_logo: ../../../docs/images/nf-core_sarek_logo.png +custom_logo_url: https://github.com/qbic-pipelines/bamtofastq/ +custom_logo_title: 'qbic-pipelines/bamtofastq' + report_comment: > - This report has been generated by the nf-core/bamtofastq + This report has been generated by the qbic-pipelines/bamtofastq analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: - nf-core/bamtofastq-software-versions: + qbic-pipelines/bamtofastq-software-versions: order: -1000 + qbic-pipelines-bamtofastq-summary: + order: -1100 +top_modules: + - 'fastqc': + name: 'FastQC (Input Bam)' + path_filters_exclude: + - '*singleton_fastqc*' + - '*.1_fastqc*' + - '*.2_fastqc*' + - 'samtools': + name: 'Samtools (Input Bam)' + - 'fastqc': + name: 'FastQC (Output Reads)' + path_filters: + - '*singleton_fastqc*' + - '*.1_fastqc*' + - '*.2_fastqc*' + export_plots: true diff --git a/assets/qbic-pipelines-bamtofastq_logo.png b/assets/qbic-pipelines-bamtofastq_logo.png new file mode 100644 index 00000000..15d18be4 Binary files /dev/null and b/assets/qbic-pipelines-bamtofastq_logo.png differ diff --git a/assets/qbic-pipelines-bamtofastq_logo.svg b/assets/qbic-pipelines-bamtofastq_logo.svg new file mode 100644 index 00000000..64a459e9 --- /dev/null +++ b/assets/qbic-pipelines-bamtofastq_logo.svg @@ -0,0 +1,391 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + bamtofastq + + + qbic-pipelines/ + + + An open-source pipeline converting (un)mapped single-end or paired-end bam files to fastq.gz + + diff --git a/assets/sendmail_template.txt b/assets/sendmail_template.txt index d2bda0a4..6d95e170 100644 --- a/assets/sendmail_template.txt +++ b/assets/sendmail_template.txt @@ -12,9 +12,9 @@ $email_html Content-Type: image/png;name="nf-core-bamtofastq_logo.png" Content-Transfer-Encoding: base64 Content-ID: -Content-Disposition: inline; filename="nf-core-bamtofastq_logo.png" +Content-Disposition: inline; filename="qbic-pipelines-bamtofastq_logo.png" -<% out << new File("$baseDir/assets/nf-core-bamtofastq_logo.png"). +<% out << new File("$baseDir/assets/qbic-pipelines-bamtofastq_logo.png"). bytes. encodeBase64(). toString(). diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py index af40520b..76472f3c 100755 --- a/bin/scrape_software_versions.py +++ b/bin/scrape_software_versions.py @@ -3,16 +3,17 @@ from collections import OrderedDict import re -# TODO nf-core: Add additional regexes for new tools in process get_software_versions regexes = { - 'nf-core/bamtofastq': ['v_pipeline.txt', r"(\S+)"], + 'qbic-pipelines/bamtofastq': ['v_pipeline.txt', r"(\S+)"], 'Nextflow': ['v_nextflow.txt', r"(\S+)"], + 'Samtools': ['v_samtools.txt', r"samtools (\S+)"], 'FastQC': ['v_fastqc.txt', r"FastQC v(\S+)"], 'MultiQC': ['v_multiqc.txt', r"multiqc, version (\S+)"], } results = OrderedDict() -results['nf-core/bamtofastq'] = 'N/A' +results['qbic-pipelines/bamtofastq'] = 'N/A' results['Nextflow'] = 'N/A' +results['Samtools'] = 'N/A' results['FastQC'] = 'N/A' results['MultiQC'] = 'N/A' @@ -35,8 +36,8 @@ # Dump to YAML print (''' id: 'software_versions' -section_name: 'nf-core/bamtofastq Software Versions' -section_href: 'https://github.com/nf-core/bamtofastq' +section_name: 'qbic-pipelines/bamtofastq Software Versions' +section_href: 'https://github.com/qbic-pipelines/bamtofastq' plot_type: 'html' description: 'are collected at run time from the software output.' data: | diff --git a/conf/base.config b/conf/base.config index df6d4bc5..d796f6f8 100644 --- a/conf/base.config +++ b/conf/base.config @@ -11,7 +11,6 @@ process { - // TODO nf-core: Check the defaults for all processes cpus = { check_max( 1 * task.attempt, 'cpus' ) } memory = { check_max( 7.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } @@ -24,21 +23,20 @@ process { // NOTE - Only one of the labels below are used in the fastqc process in the main script. // If possible, it would be nice to keep the same label naming convention when // adding in your processes. - // TODO nf-core: Customise requirements for specific processes. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors withLabel:process_low { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } - memory = { check_max( 14.GB * task.attempt, 'memory' ) } + cpus = { check_max( 7 * task.attempt, 'cpus' ) } + memory = { check_max( 15.GB * task.attempt, 'memory' ) } time = { check_max( 6.h * task.attempt, 'time' ) } } withLabel:process_medium { - cpus = { check_max( 6 * task.attempt, 'cpus' ) } - memory = { check_max( 42.GB * task.attempt, 'memory' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } + cpus = { check_max( 15 * task.attempt, 'cpus' ) } + memory = { check_max( 31.GB * task.attempt, 'memory' ) } + time = { check_max( 16.h * task.attempt, 'time' ) } } withLabel:process_high { - cpus = { check_max( 12 * task.attempt, 'cpus' ) } - memory = { check_max( 84.GB * task.attempt, 'memory' ) } + cpus = { check_max( 15 * task.attempt, 'cpus' ) } + memory = { check_max( 120.GB * task.attempt, 'memory' ) } time = { check_max( 10.h * task.attempt, 'time' ) } } withLabel:process_long { @@ -54,5 +52,4 @@ params { max_memory = 128.GB max_cpus = 16 max_time = 240.h - igenomes_base = 's3://ngi-igenomes/igenomes/' } diff --git a/conf/igenomes.config b/conf/igenomes.config deleted file mode 100644 index 392f2507..00000000 --- a/conf/igenomes.config +++ /dev/null @@ -1,192 +0,0 @@ -/* - * ------------------------------------------------- - * Nextflow config file for iGenomes paths - * ------------------------------------------------- - * Defines reference genomes, using iGenome paths - * Can be used by any config that customises the base - * path using $params.igenomes_base / --igenomes_base - */ - -params { - // illumina iGenomes reference file paths - // TODO nf-core: Add new reference types and strip out those that are not needed - genomes { - 'GRCh37' { - bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/" - } - 'GRCm38' { - bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" - bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCh37/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCh37/Sequence/BWAIndex/" - } - 'TAIR10' { - bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" - bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/" - } - 'EB2' { - bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" - bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/" - } - 'UMD3.1' { - bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" - bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/" - - } - 'WBcel235' { - bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" - bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/" - } - 'CanFam3.1' { - bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" - bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/" - } - 'GRCz10' { - bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" - bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/" - } - 'BDGP6' { - bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" - bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/" - } - 'EquCab2' { - bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" - bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/" - } - 'EB1' { - bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" - bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/" - } - 'Galgal4' { - bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" - bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/" - } - 'Gm01' { - bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" - bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/" - } - 'Mmul_1' { - bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" - bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/" - } - 'IRGSP-1.0' { - bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" - bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/" - } - 'CHIMP2.1.4' { - bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" - bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/" - } - 'Rnor_6.0' { - bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/" - } - 'R64-1-1' { - bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" - bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/" - } - 'EF2' { - bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" - bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/" - } - 'Sbi1' { - bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" - bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/" - } - 'Sscrofa10.2' { - bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" - bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/" - } - 'AGPv3' { - bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" - bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/" - } - } -} diff --git a/conf/test.config b/conf/test.config index cf9e7f01..b21dbe46 100644 --- a/conf/test.config +++ b/conf/test.config @@ -4,7 +4,7 @@ * ------------------------------------------------- * Defines bundled input files and everything required * to run a fast and simple test. Use as follows: - * nextflow run nf-core/bamtofastq -profile test + * nextflow run qbic-pipelines/bamtofastq -profile test */ params { @@ -16,11 +16,9 @@ params { max_time = 48.h // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed - singleEnd = false - readPaths = [ - ['Testdata', ['https://github.com/nf-core/test-datasets/raw/exoseq/testdata/Testdata_R1.tiny.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/exoseq/testdata/Testdata_R2.tiny.fastq.gz']], - ['SRR389222', ['https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub2.fastq.gz']] - ] + input = ['https://github.com/nf-core/test-datasets/raw/eager/testdata/Mammoth/bam/JK2782_TGGCCGATCAACGA_L008_R1_001.fastq.gz.tengrand.fq.combined.fq.mapped.bam', + "$baseDir/testdata/First_SmallTest_Paired.bam", + "$baseDir/testdata/Second_SmallTest_Paired.bam", + "$baseDir/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.bam" + ] } diff --git a/conf/test_chr.config b/conf/test_chr.config new file mode 100644 index 00000000..2601c4a1 --- /dev/null +++ b/conf/test_chr.config @@ -0,0 +1,14 @@ +/* + * ------------------------------------------------- + * Nextflow config file for running tests + * ------------------------------------------------- + * Defines bundled input files and everything required + * to run a fast and simple test. Use as follows: + * nextflow run qbic-pipelines/bamtofastq -profile test + */ + +includeConfig 'test.config' + +params { + chr = 'chrX chrY X Y' +} \ No newline at end of file diff --git a/docs/README.md b/docs/README.md index 7bfe2d69..1c5f1883 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,12 +1,11 @@ -# nf-core/bamtofastq: Documentation +# qbic-pipelines/bamtofastq: Documentation -The nf-core/bamtofastq documentation is split into the following files: +The qbic-pipelines/bamtofastq documentation is split into the following files: 1. [Installation](https://nf-co.re/usage/installation) 2. Pipeline configuration * [Local installation](https://nf-co.re/usage/local_installation) * [Adding your own system config](https://nf-co.re/usage/adding_own_config) - * [Reference genomes](https://nf-co.re/usage/reference_genomes) 3. [Running the pipeline](usage.md) 4. [Output and how to interpret the results](output.md) 5. [Troubleshooting](https://nf-co.re/usage/troubleshooting) diff --git a/docs/images/qbic-pipelines-bamtofastq_logo.png b/docs/images/qbic-pipelines-bamtofastq_logo.png new file mode 100644 index 00000000..fcd011a7 Binary files /dev/null and b/docs/images/qbic-pipelines-bamtofastq_logo.png differ diff --git a/docs/output.md b/docs/output.md index 367e2192..ebfd8d23 100644 --- a/docs/output.md +++ b/docs/output.md @@ -1,37 +1,31 @@ -# nf-core/bamtofastq: Output +# qbic-pipelines/bamtofastq: Output This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report, which summarises results at the end of the pipeline. - - ## Pipeline overview The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: -* [FastQC](#fastqc) - read quality control +* [FastQC](#fastqc) - bam and read quality control +* [Samtools](#samtools) - collate, extract reads and compute bam stats * [MultiQC](#multiqc) - aggregate report, describing results of the whole pipeline ## FastQC -[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your reads. It provides information about the quality score distribution across your reads, the per base sequence content (%T/A/G/C). You get information about adapter contamination and other overrepresented sequences. - -For further reading and documentation see the [FastQC help](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). - -> **NB:** The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality. To see how your reads look after trimming, look at the FastQC reports in the `trim_galore` directory. + [FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your reads. It provides information about the quality score distribution across your reads, the per base sequence content (%T/A/G/C). You get information about adapter contamination and other overrepresented sequences. -**Output directory: `results/fastqc`** - -* `sample_fastqc.html` - * FastQC report, containing quality metrics for your untrimmed raw fastq files -* `zips/sample_fastqc.zip` - * zip file containing the FastQC report, tab-delimited data file and plot images + For further reading and documentation see the [FastQC help](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). + +## Samtools +[Samtools](https://www.htslib.org) is used to extract reads from the bam files and to compute some bam statistics. +The extracted reads are written to fastq files in `results/reads`. ## MultiQC [MultiQC](http://multiqc.info) is a visualisation tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in within the report data directory. The pipeline has special steps which allow the software versions used to be reported in the MultiQC output for future traceability. -**Output directory: `results/multiqc`** +**Output directory: `results/MultiQC`** * `Project_multiqc_report.html` * MultiQC report - a standalone HTML file that can be viewed in your web browser @@ -39,3 +33,4 @@ The pipeline has special steps which allow the software versions used to be repo * Directory containing parsed statistics from the different tools used in the pipeline For more information about how to use MultiQC reports, see [http://multiqc.info](http://multiqc.info) + diff --git a/docs/usage.md b/docs/usage.md index d4f9930e..098a8411 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -1,4 +1,4 @@ -# nf-core/bamtofastq: Usage +# qbic-pipelines/bamtofastq: Usage ## Table of contents @@ -11,12 +11,9 @@ * [Reproducibility](#reproducibility) * [Main arguments](#main-arguments) * [`-profile`](#-profile) - * [`--reads`](#--reads) - * [`--singleEnd`](#--singleend) -* [Reference genomes](#reference-genomes) - * [`--genome` (using iGenomes)](#--genome-using-igenomes) - * [`--fasta`](#--fasta) - * [`--igenomesIgnore`](#--igenomesignore) + * [`--bam`](#--bam) + * [`--chr`](#--chr) + * [`--no_read_QC`](#--no_read_QC) * [Job resources](#job-resources) * [Automatic resubmission](#automatic-resubmission) * [Custom resource requests](#custom-resource-requests) @@ -50,13 +47,11 @@ It is recommended to limit the Nextflow Java virtual machines memory. We recomme NXF_OPTS='-Xms1g -Xmx4g' ``` - - ## Running the pipeline The typical command for running the pipeline is as follows: ```bash -nextflow run nf-core/bamtofastq --reads '*_R{1,2}.fastq.gz' -profile docker +nextflow run qbic-pipelines/bamtofastq --input '*bam' -profile docker ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. @@ -74,13 +69,13 @@ results # Finished results (configurable, see below) When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: ```bash -nextflow pull nf-core/bamtofastq +nextflow pull qbic-pipelines/bamtofastq ``` ### Reproducibility It's a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. -First, go to the [nf-core/bamtofastq releases page](https://github.com/nf-core/bamtofastq/releases) and find the latest version number - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. +First, go to the [qbic-pipelines/bamtofastq releases page](https://github.com/qbic-pipelines/bamtofastq/releases) and find the latest version number - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. @@ -107,81 +102,39 @@ If `-profile` is not specified at all the pipeline will be run locally and expec * A profile with a complete configuration for automated testing * Includes links to test data so needs no other parameters - - -### `--reads` -Use this to specify the location of your input FastQ files. For example: +### `--bam` +Use this to specify the location of your input Bam files. For example: ```bash ---reads 'path/to/data/sample_*_{1,2}.fastq' +--bam 'path/to/data/sample_*.bam' ``` Please note the following requirements: 1. The path must be enclosed in quotes 2. The path must have at least one `*` wildcard character -3. When using the pipeline with paired end data, the path must use `{1,2}` notation to specify read pairs. -If left unspecified, a default pattern is used: `data/*{1,2}.fastq.gz` +### `--chr` (optional) -### `--singleEnd` -By default, the pipeline expects paired-end data. If you have single-end data, you need to specify `--singleEnd` on the command line when you launch the pipeline. A normal glob pattern, enclosed in quotation marks, can then be used for `--reads`. For example: +Use to only obtain reads mapping to a specific chromosome or region. +> It is important to specify the chromsome or region name **exactly** as set in the bam file. Otherwise no reads may be extracted! + +For example: ```bash ---singleEnd --reads '*.fastq' +--chr 'X chrX' ``` -It is not possible to run a mixture of single-end and paired-end files in one run. - - -## Reference genomes - -The pipeline config files come bundled with paths to the illumina iGenomes reference index files. If running with docker or AWS, the configuration is set up to use the [AWS-iGenomes](https://ewels.github.io/AWS-iGenomes/) resource. - -### `--genome` (using iGenomes) -There are 31 different species supported in the iGenomes references. To run the pipeline, you must specify which to use with the `--genome` flag. - -You can find the keys to specify the genomes in the [iGenomes config file](../conf/igenomes.config). Common genomes that are supported are: +This extracts reads mapping to `X` as well as `chrX` -* Human - * `--genome GRCh37` -* Mouse - * `--genome GRCm38` -* _Drosophila_ - * `--genome BDGP6` -* _S. cerevisiae_ - * `--genome 'R64-1-1'` +### `--no_read_QC` (optional) -> There are numerous others - check the config file for more. - -Note that you can use the same configuration setup to save sets of reference files for your own use, even if they are not part of the iGenomes resource. See the [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for instructions on where to save such a file. - -The syntax for this reference configuration is as follows: - - - -```nextflow -params { - genomes { - 'GRCh37' { - fasta = '' // Used if no star index given - } - // Any number of additional genomes, key is used with --genome - } -} -``` - - -### `--fasta` -If you prefer, you can specify the full path to your reference genome when you run the pipeline: +Use to skip `FastQC` on obtained reads. This is useful, when the reads are used as input in another pipeline, which runs `QC` on its input data as well. ```bash ---fasta '[path to Fasta reference]' +--no_read_QC ``` -### `--igenomesIgnore` -Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`. - ## Job resources ### Automatic resubmission Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with an error code of `143` (exceeded requested resources) it will automatically resubmit with higher requests (2 x original, then 3 x original). If it still fails after three times then the pipeline is stopped. @@ -204,8 +157,6 @@ Please make sure to also set the `-w/--work-dir` and `--outdir` parameters to a ## Other command line parameters - - ### `--outdir` The output directory where the results will be saved. diff --git a/environment.yml b/environment.yml index ea77b3b8..793524ed 100644 --- a/environment.yml +++ b/environment.yml @@ -1,13 +1,13 @@ # You can use this file to create a conda environment for this pipeline: # conda env create -f environment.yml -name: nf-core-bamtofastq-1.0dev +name: qbic-pipelines-bamtofastq-1.0.0 channels: - conda-forge - bioconda - defaults dependencies: - # TODO nf-core: Add required software dependencies here - - bioconda::fastqc=0.11.8 - - bioconda::multiqc=1.7 + - bioconda::multiqc=1.9 + - bioconda::samtools=1.10 + - bioconda::fastqc=0.11.9 - conda-forge::r-markdown=1.1 - conda-forge::r-base=3.6.1 diff --git a/main.nf b/main.nf index ed3bf385..9a781139 100644 --- a/main.nf +++ b/main.nf @@ -1,16 +1,17 @@ #!/usr/bin/env nextflow /* ======================================================================================== - nf-core/bamtofastq + qbic-pipelines/bamtofastq ======================================================================================== - nf-core/bamtofastq Analysis Pipeline. + qbic-pipelines/bamtofastq Analysis Pipeline. + An open-source analysis pipeline to convert mapped or unmapped single-end or paired-end + reads from bam format to fastq format #### Homepage / Documentation - https://github.com/nf-core/bamtofastq + https://github.com/qbic-pipelines/bamtofastq ---------------------------------------------------------------------------------------- */ def helpMessage() { - // TODO nf-core: Add to this help message with new command line parameters log.info nfcoreHeader() log.info""" @@ -18,30 +19,25 @@ def helpMessage() { The typical command for running the pipeline is as follows: - nextflow run nf-core/bamtofastq --reads '*_R{1,2}.fastq.gz' -profile docker + nextflow run qbic-pipelines/bamtofastq --input '*bam' -profile cfc Mandatory arguments: - --reads Path to input data (must be surrounded with quotes) - -profile Configuration profile to use. Can use multiple (comma separated) - Available: conda, docker, singularity, awsbatch, test and more. - - Options: - --genome Name of iGenomes reference - --singleEnd Specifies that the input is single end reads - - References If not specified in the configuration file or you wish to overwrite any of the references. - --fasta Path to Fasta reference - - Other options: - --outdir The output directory where the results will be saved - --email Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits - --email_on_fail Same as --email, except only send mail if the workflow is not successful - --maxMultiqcEmailFileSize Theshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB) - -name Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic. + --input [file] Path to input data, multiple files can be specified by using wildcard characters + -profile [str] Configuration profile to use. Can use multiple (comma separated) + Available: conda, docker, singularity, awsbatch, test and more. + + Other options: + --outdir [file] The output directory where the results will be saved + --chr [str] Only use reads mapping to a specific chromosome/region. Has to be specified as in bam: i.e chr1, chr{1..22} (gets all reads mapping to chr1 to 22), 1, "X Y", incorrect naming will lead to a potentially silent error + --no_read_QC [bool] If specified, no quality control will be performed on extracted reads. Useful, if this is done anyways in the subsequent workflow + --email [str] Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits + --email_on_fail [str] Same as --email, except only send mail if the workflow is not successful + --maxMultiqcEmailFileSize [str] Threshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB) + -name [str] Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic. AWSBatch options: - --awsqueue The AWSBatch JobQueue that needs to be set when running on AWSBatch - --awsregion The AWS Region for your AWS Batch job to run on + --awsqueue [str] The AWSBatch JobQueue that needs to be set when running on AWSBatch + --awsregion [str] The AWS Region for your AWS Batch job to run on """.stripIndent() } @@ -55,22 +51,6 @@ if (params.help) { * SET UP CONFIGURATION VARIABLES */ -// Check if genome exists in the config file -if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - exit 1, "The provided genome '${params.genome}' is not available in the iGenomes file. Currently the available genomes are ${params.genomes.keySet().join(", ")}" -} - -// TODO nf-core: Add any reference files that are needed -// Configurable reference genomes -// -// NOTE - THIS IS NOT USED IN THIS PIPELINE, EXAMPLE ONLY -// If you want to use the channel below in a process, define the following: -// input: -// file fasta from ch_fasta -// -params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false -if (params.fasta) { ch_fasta = file(params.fasta, checkIfExists: true) } - // Has the run name been specified by the user? // this has the bonus effect of catching both -name and --name custom_runName = params.name @@ -89,63 +69,66 @@ if ( workflow.profile == 'awsbatch') { } // Stage config files -ch_multiqc_config = file(params.multiqc_config, checkIfExists: true) +ch_multiqc_config = file("$baseDir/assets/multiqc_config.yaml", checkIfExists: true) ch_output_docs = file("$baseDir/docs/output.md", checkIfExists: true) /* - * Create a channel for input read files + * Create a channel for input bam files */ -if (params.readPaths) { - if (params.singleEnd) { - Channel - .from(params.readPaths) - .map { row -> [ row[0], [ file(row[1][0], checkIfExists: true) ] ] } - .ifEmpty { exit 1, "params.readPaths was empty - no input files supplied" } - .into { read_files_fastqc; read_files_trimming } - } else { - Channel - .from(params.readPaths) - .map { row -> [ row[0], [ file(row[1][0], checkIfExists: true), file(row[1][1], checkIfExists: true) ] ] } - .ifEmpty { exit 1, "params.readPaths was empty - no input files supplied" } - .into { read_files_fastqc; read_files_trimming } - } -} else { + +if(params.input && !params.chr) { //Checks whether bam file(s) and no chromosome/region was specified, then Step 0 is skipped Channel - .fromFilePairs( params.reads, size: params.singleEnd ? 1 : 2 ) - .ifEmpty { exit 1, "Cannot find any reads matching: ${params.reads}\nNB: Path needs to be enclosed in quotes!\nIf this is single-end data, please specify --singleEnd on the command line." } - .into { read_files_fastqc; read_files_trimming } + .fromPath(params.input, checkIfExists: true) //checks whether the specified file exists + .map { file -> tuple(file.name.replaceAll(".bam",''), file) } // map bam file name w/o bam to file + .into { bam_files_check; + bam_files_flagstats; + bam_files_idxstats; + bam_files_stats; + bam_files_fastqc } //else send to first process + +} else if(params.input && params.chr){ //Checks whether bam file(s) and chromosome(s)/region(s) was specified + Channel + .fromPath(params.input, checkIfExists: true) //checks whether the specified file exists + .map { file -> tuple(file.name.replaceAll(".bam",''), file) } // map bam file name w/o bam to file + .into { bam_chr; + bam_files_flagstats; + bam_files_idxstats; + bam_files_stats; + bam_files_fastqc} //else send to first process +}else{ + exit 1, "Parameter 'params.input' was not specified!\n" } + // Header log info log.info nfcoreHeader() def summary = [:] -if (workflow.revision) summary['Pipeline Release'] = workflow.revision -summary['Run Name'] = custom_runName ?: workflow.runName -// TODO nf-core: Report custom parameters here -summary['Reads'] = params.reads -summary['Fasta Ref'] = params.fasta -summary['Data Type'] = params.singleEnd ? 'Single-End' : 'Paired-End' -summary['Max Resources'] = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job" -if (workflow.containerEngine) summary['Container'] = "$workflow.containerEngine - $workflow.container" -summary['Output dir'] = params.outdir -summary['Launch dir'] = workflow.launchDir -summary['Working dir'] = workflow.workDir -summary['Script dir'] = workflow.projectDir -summary['User'] = workflow.userName -if (workflow.profile == 'awsbatch') { - summary['AWS Region'] = params.awsregion - summary['AWS Queue'] = params.awsqueue -} -summary['Config Profile'] = workflow.profile -if (params.config_profile_description) summary['Config Description'] = params.config_profile_description -if (params.config_profile_contact) summary['Config Contact'] = params.config_profile_contact -if (params.config_profile_url) summary['Config URL'] = params.config_profile_url +if (workflow.revision) summary['Pipeline Release'] = workflow.revision +summary['Run Name'] = custom_runName ?: workflow.runName +summary['Input'] = params.input +summary['Max Resources'] = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job" +if (workflow.containerEngine) summary['Container'] = "$workflow.containerEngine - $workflow.container" +summary['Output dir'] = params.outdir +if (params.chr) summary['Only reads mapped to chr'] = params.chr +summary['Read QC'] = params.no_read_QC ? 'No' : 'Yes' +summary['Launch dir'] = workflow.launchDir +summary['Working dir'] = workflow.workDir +summary['Script dir'] = workflow.projectDir +summary['User'] = workflow.userName +if (workflow.profile == 'awsbatch') { + summary['AWS Region'] = params.awsregion + summary['AWS Queue'] = params.awsqueue +} +summary['Config Profile'] = workflow.profile +if (params.config_profile_description) summary['Config Description'] = params.config_profile_description +if (params.config_profile_contact) summary['Config Contact'] = params.config_profile_contact +if (params.config_profile_url) summary['Config URL'] = params.config_profile_url if (params.email || params.email_on_fail) { - summary['E-mail Address'] = params.email - summary['E-mail on failure'] = params.email_on_fail - summary['MultiQC maxsize'] = params.maxMultiqcEmailFileSize + summary['E-mail Address'] = params.email + summary['E-mail on failure'] = params.email_on_fail + summary['MultiQC maxsize'] = params.maxMultiqcEmailFileSize } -log.info summary.collect { k,v -> "${k.padRight(18)}: $v" }.join("\n") +log.info summary.collect { k,v -> "${k.padRight(26)}: $v" }.join("\n") log.info "-\033[2m--------------------------------------------------\033[0m-" // Check the hostnames against configured profiles @@ -154,10 +137,10 @@ checkHostname() def create_workflow_summary(summary) { def yaml_file = workDir.resolve('workflow_summary_mqc.yaml') yaml_file.text = """ - id: 'nf-core-bamtofastq-summary' + id: 'qbic-pipelines-bamtofastq-summary' description: " - this information is collected when the pipeline is started." - section_name: 'nf-core/bamtofastq Workflow Summary' - section_href: 'https://github.com/nf-core/bamtofastq' + section_name: 'qbic-pipelines/bamtofastq Workflow Summary' + section_href: 'https://github.com/qbic-pipelines/bamtofastq' plot_type: 'html' data: |
@@ -177,75 +160,378 @@ process get_software_versions { if (filename.indexOf(".csv") > 0) filename else null } + label 'process_low' + output: file 'software_versions_mqc.yaml' into software_versions_yaml file "software_versions.csv" + file "*.txt" script: - // TODO nf-core: Get all tools to print their version number here """ echo $workflow.manifest.version > v_pipeline.txt echo $workflow.nextflow.version > v_nextflow.txt - fastqc --version > v_fastqc.txt + fastqc --version &> v_fastqc.txt + samtools --version > v_samtools.txt multiqc --version > v_multiqc.txt scrape_software_versions.py &> software_versions_mqc.yaml """ } /* - * STEP 1 - FastQC + * STEP 0: Extract reads mapping to specific chromosome(s) */ -process fastqc { - tag "$name" +if (params.chr){ + process extractReadsMappingToChromosome{ + tag "${name}.${chr_list_joined}" label 'process_medium' - publishDir "${params.outdir}/fastqc", mode: 'copy', - saveAs: { filename -> filename.indexOf(".zip") > 0 ? "zips/$filename" : "$filename" } - + input: - set val(name), file(reads) from read_files_fastqc + set val(name), file(bam) from bam_chr output: - file "*_fastqc.{zip,html}" into fastqc_results + set val("${name}.${chr_list_joined}"), file("${name}.${chr_list_joined}.bam") into bam_files_check + script: + //If multiple chr were specified, then join space separated list for naming: chr1 chr2 -> chr1_chr2, also resolve region specification with format chr:start-end + chr_list_joined = params.chr.split(' |-|:').size() > 1 ? params.chr.split(' |-|:').join('_') : params.chr """ - fastqc --quiet --threads $task.cpus $reads + samtools index -@$task.cpus $bam + samtools view -hb $bam ${params.chr} -@$task.cpus -o ${name}.${chr_list_joined}.bam """ + } } /* - * STEP 2 - MultiQC + * STEP 1: Check for paired-end or single-end bam */ -process multiqc { - publishDir "${params.outdir}/MultiQC", mode: 'copy' +process checkIfPairedEnd{ + tag "$name" + label 'process_low' + input: + set val(name), file(bam) from bam_files_check + + output: + set val(name), file(bam), file('*paired.txt') optional true into bam_files_paired_map_map, + bam_files_paired_unmap_unmap, + bam_files_paired_unmap_map, + bam_files_paired_map_unmap + set val(name), file(bam), file('*single.txt') optional true into bam_file_single_end // = is not paired end + + //Take samtools header + the first 1000 reads (to safe time, otherwise also all can be used) and check whether for + //all, the flag for paired-end is set. Compare: https://www.biostars.org/p/178730/ . + script: + """ + if [ \$({ samtools view -H $bam -@$task.cpus ; samtools view $bam -@$task.cpus | head -n1000; } | samtools view -c -f 1 -@$task.cpus | awk '{print \$1/1000}') = "1" ]; then + echo 1 > ${name}.paired.txt + else + echo 0 > ${name}.single.txt + fi + """ +} + +process computeFlagstatInput{ + tag "$name" + label 'process_medium' + + input: + set val(name), file(bam) from bam_files_flagstats + + output: + file "*.flagstat" into ch_bam_flagstat_mqc + + script: + """ + samtools flagstat -@$task.cpus $bam > ${bam}.flagstat + """ +} + + +process computeIdxstatsInput{ + tag "$name" + label 'process_medium' + + input: + set val(name), file(bam) from bam_files_idxstats + + output: + file "*.idxstats" into ch_bam_idxstat_mqc + + script: + """ + samtools index -@$task.cpus $bam + samtools idxstats -@$task.cpus $bam > ${bam}.idxstats + """ +} + +process computeStatsInput{ + + tag "$name" + label 'process_medium' + + input: + set val(name), file(bam) from bam_files_stats + + output: + file "*.stats" into ch_bam_stats_mqc + + script: + """ + samtools stats -@$task.cpus $bam > ${bam}.stats + """ +} + +process computeFastQCInput{ + tag "$name" + label 'process_medium' + + input: + set val(name), file(bam) from bam_files_fastqc + + output: + file "*.{zip,html}" into ch_fastqc_reports_mqc_input_bam + + script: + """ + fastqc --quiet --threads $task.cpus $bam + """ +} + +/* + * Step 2a: Handle paired-end bams + */ +process pairedEndMapMap{ + tag "$name" + label 'process_low' + input: + set val(name), file(bam), file(txt) from bam_files_paired_map_map + + output: + set val(name), file( '*.map_map.bam') into map_map_bam + + when: + txt.exists() + + script: + """ + samtools view -b -f1 -F12 $bam -@$task.cpus -o ${name}.map_map.bam + """ +} + +process pairedEndUnmapUnmap{ + tag "$name" + label 'process_low' + input: + set val(name), file(bam), file(txt) from bam_files_paired_unmap_unmap + + output: + set val(name), file('*.unmap_unmap.bam') into unmap_unmap_bam + + when: + txt.exists() + + script: + """ + samtools view -b -f12 -F256 $bam -@${task.cpus} -o ${name}.unmap_unmap.bam + """ +} + +process pairedEndUnmapMap{ + tag "$name" + label 'process_low' + input: + set val(name), file(bam), file(txt) from bam_files_paired_unmap_map + + output: + set val(name), file( '*.unmap_map.bam') into unmap_map_bam + + when: + txt.exists() + + script: + """ + samtools view -b -f4 -F264 $bam -@${task.cpus} -o ${name}.unmap_map.bam + """ +} + +process pairedEndMapUnmap{ + tag "$name" + label 'process_low' + input: + set val(name), file(bam), file(txt) from bam_files_paired_map_unmap + + output: + set val(name), file( '*.map_unmap.bam') into map_unmap_bam + + when: + txt.exists() + + script: + """ + samtools view -b -f8 -F260 $bam -@${task.cpus} -o ${name}.map_unmap.bam + """ +} + +unmap_unmap_bam.join(map_unmap_bam, remainder: true) + .join(unmap_map_bam, remainder: true) + .set{ all_unmapped_bam } + +process mergeUnmapped{ + tag "$name" + label 'process_low' + input: + set val(name), file(unmap_unmap), file (map_unmap), file(unmap_map) from all_unmapped_bam + + output: + set val(name), file('*.merged_unmapped.bam') into merged_unmapped + + script: + """ + samtools merge ${name}.merged_unmapped.bam $unmap_unmap $map_unmap $unmap_map -@$task.cpus + """ +} + +process sortExtractMapped{ + tag "$name" + label 'process_medium' + + input: + set val(name), file(all_map_bam) from map_map_bam + + output: + set val(name), file('*_mapped.fq.gz') into reads_mapped + + script: + """ + samtools collate -O -@$task.cpus $all_map_bam . \ + | samtools fastq -1 ${name}_R1_mapped.fq.gz -2 ${name}_R2_mapped.fq.gz -s ${name}_mapped_singletons.fq.gz -N -@$task.cpus + """ +} + +process sortExtractUnmapped{ + label 'process_medium' + tag "$name" + + input: + set val(name), file(all_unmapped) from merged_unmapped + + output: + set val(name), file('*_unmapped.fq.gz') into reads_unmapped + + script: + """ + samtools collate -O -@$task.cpus $all_unmapped . \ + | samtools fastq -1 ${name}_R1_unmapped.fq.gz -2 ${name}_R2_unmapped.fq.gz -s ${name}_unmapped_singletons.fq.gz -N -@$task.cpus + """ +} + +reads_mapped.join(reads_unmapped, remainder: true) + .map{ + row -> tuple(row[0], row[1][0], row[1][1], row[2][0], row[2][1]) + } + .set{ all_fastq } + +process joinMappedAndUnmappedFastq{ + label 'process_low' + tag "$name" + publishDir "${params.outdir}/reads", mode: 'copy', + saveAs: { filename -> + if (filename.indexOf(".fq.gz") > 0) filename + else null + } + + input: + set val(name), file(mapped_fq1), file(mapped_fq2), file(unmapped_fq1), file(unmapped_fq2) from all_fastq.filter{ it.size()>0 } + + output: + set file('*1.fq.gz'), file('*2.fq.gz') into read_qc + + + script: + """ + cat $mapped_fq1 $unmapped_fq1 > ${name}.1.fq.gz + cat $mapped_fq2 $unmapped_fq2 > ${name}.2.fq.gz + """ +} + +process pairedEndReadsQC{ + label 'process_medium' + tag "$read1" input: - file multiqc_config from ch_multiqc_config - // TODO nf-core: Add in log files from your new processes for MultiQC to find! - file ('fastqc/*') from fastqc_results.collect().ifEmpty([]) - file ('software_versions/*') from software_versions_yaml.collect() - file workflow_summary from create_workflow_summary(summary) + set file(read1), file(read2) from read_qc output: - file "*multiqc_report.html" into multiqc_report - file "*_data" - file "multiqc_plots" + file "*.{zip,html}" into ch_fastqc_reports_mqc_pe + + when: + !params.no_read_QC script: - rtitle = custom_runName ? "--title \"$custom_runName\"" : '' - rfilename = custom_runName ? "--filename " + custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : '' - // TODO nf-core: Specify which MultiQC modules to use with -m for a faster run time """ - multiqc -f $rtitle $rfilename --config $multiqc_config . + fastqc --quiet --threads $task.cpus $read1 $read2 """ } + +/* + * STEP 2b: Handle single-end bams + */ +process sortExtractSingleEnd{ + tag "$name" + label 'process_medium' + + publishDir "${params.outdir}/reads", mode: 'copy', + saveAs: { filename -> + if (filename.indexOf(".fq.gz") > 0) filename + else null + } + + input: + set val(name), file(bam), file(txt) from bam_file_single_end + + output: + set val(name), file ('*.singleton.fq.gz') into single_end_reads + + when: + txt.exists() + + script: + """ + samtools collate -O -@$task.cpus $bam . \ + | samtools fastq -0 ${name}.singleton.fq.gz -N -@$task.cpus + """ + } + +process singleEndReadQC{ + tag "$name" + label 'process_medium' + + + input: + set val(name), file(reads) from single_end_reads + + output: + file "*.{zip,html}" into ch_fastqc_reports_mqc_se + + when: + !params.no_read_QC + + script: + """ + fastqc --quiet --threads $task.cpus ${reads} + """ + +} + /* * STEP 3 - Output Description HTML */ process output_documentation { publishDir "${params.outdir}/pipeline_info", mode: 'copy' + label 'process_low' input: file output_docs from ch_output_docs @@ -259,15 +545,48 @@ process output_documentation { """ } +/* + * STEP 4 - MultiQC + */ +process multiqc { + publishDir "${params.outdir}/MultiQC", mode: 'copy' + label 'process_low' + + input: + file multiqc_config from ch_multiqc_config + + file ('software_versions/*') from software_versions_yaml.collect() + file workflow_summary from create_workflow_summary(summary) + file flagstats from ch_bam_flagstat_mqc.collect() + file stats from ch_bam_stats_mqc.collect() + file idxstats from ch_bam_idxstat_mqc.collect() + file fastqc_bam from ch_fastqc_reports_mqc_input_bam.collect().ifEmpty([]) + file fastqc_se from ch_fastqc_reports_mqc_se.collect().ifEmpty([]) + file fastqc_pe from ch_fastqc_reports_mqc_pe.collect().ifEmpty([]) + + output: + file "*multiqc_report.html" + file "*_data" + file "multiqc_plots" + + script: + rtitle = custom_runName ? "--title \"$custom_runName\"" : '' + rfilename = custom_runName ? "--filename " + custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : '' + """ + multiqc -f -s $rtitle $rfilename $multiqc_config . + """ + +} + /* * Completion e-mail notification */ workflow.onComplete { // Set up the e-mail variables - def subject = "[nf-core/bamtofastq] Successful: $workflow.runName" + def subject = "[qbic-pipelines/bamtofastq] Successful: $workflow.runName" if (!workflow.success) { - subject = "[nf-core/bamtofastq] FAILED: $workflow.runName" + subject = "[qbic-pipelines/bamtofastq] FAILED: $workflow.runName" } def email_fields = [:] email_fields['version'] = workflow.manifest.version @@ -293,19 +612,18 @@ workflow.onComplete { email_fields['summary']['Nextflow Build'] = workflow.nextflow.build email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - // TODO nf-core: If not using MultiQC, strip out this code (including params.maxMultiqcEmailFileSize) // On success try attach the multiqc report def mqc_report = null try { if (workflow.success) { mqc_report = multiqc_report.getVal() if (mqc_report.getClass() == ArrayList) { - log.warn "[nf-core/bamtofastq] Found multiple reports from process 'multiqc', will use only one" + log.warn "[qbic-pipelines/bamtofastq] Found multiple reports from process 'multiqc', will use only one" mqc_report = mqc_report[0] } } } catch (all) { - log.warn "[nf-core/bamtofastq] Could not attach MultiQC report to summary email" + log.warn "[qbic-pipelines/bamtofastq] Could not attach MultiQC report to summary email" } // Check if we are only sending emails on failure @@ -337,11 +655,11 @@ workflow.onComplete { if ( params.plaintext_email ){ throw GroovyException('Send plaintext e-mail, not HTML') } // Try to send HTML e-mail using sendmail [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "[nf-core/bamtofastq] Sent summary e-mail to $email_address (sendmail)" + log.info "[qbic-pipelines/bamtofastq] Sent summary e-mail to $email_address (sendmail)" } catch (all) { // Catch failures and try with plaintext [ 'mail', '-s', subject, email_address ].execute() << email_txt - log.info "[nf-core/bamtofastq] Sent summary e-mail to $email_address (mail)" + log.info "[qbic-pipelines/bamtofastq] Sent summary e-mail to $email_address (mail)" } } @@ -367,10 +685,10 @@ workflow.onComplete { } if (workflow.success) { - log.info "${c_purple}[nf-core/bamtofastq]${c_green} Pipeline completed successfully${c_reset}" + log.info "${c_purple}[qbic-pipelines/bamtofastq]${c_green} Pipeline completed successfully${c_reset}" } else { checkHostname() - log.info "${c_purple}[nf-core/bamtofastq]${c_red} Pipeline completed with errors${c_reset}" + log.info "${c_purple}[qbic-pipelines/bamtofastq]${c_red} Pipeline completed with errors${c_reset}" } } @@ -394,7 +712,7 @@ def nfcoreHeader(){ ${c_blue} |\\ | |__ __ / ` / \\ |__) |__ ${c_yellow}} {${c_reset} ${c_blue} | \\| | \\__, \\__/ | \\ |___ ${c_green}\\`-._,-`-,${c_reset} ${c_green}`._,._,\'${c_reset} - ${c_purple} nf-core/bamtofastq v${workflow.manifest.version}${c_reset} + ${c_purple} qbic-pipelines/bamtofastq v${workflow.manifest.version}${c_reset} -${c_dim}--------------------------------------------------${c_reset}- """.stripIndent() } diff --git a/nextflow.config b/nextflow.config index b05612dd..9ee2887a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -9,10 +9,9 @@ params { // Workflow flags - // TODO nf-core: Specify your pipeline's command line flags - genome = false - reads = "data/*{1,2}.fastq.gz" - singleEnd = false + input = false + chr = false + no_read_QC = false //By default: QC is perfored on extrcted reads outdir = './results' // Boilerplate options @@ -24,11 +23,9 @@ params { plaintext_email = false monochrome_logs = false help = false - igenomes_base = "./iGenomes" tracedir = "${params.outdir}/pipeline_info" awsqueue = false awsregion = 'eu-west-1' - igenomesIgnore = false custom_config_version = 'master' custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" hostnames = false @@ -39,7 +36,7 @@ params { // Container slug. Stable releases should specify release tag! // Developmental code should specify :dev -process.container = 'nfcore/bamtofastq:dev' +process.container = 'qbicpipelines/bamtofastq:1.0.0' // Load base.config by default for all pipelines includeConfig 'conf/base.config' @@ -58,6 +55,8 @@ profiles { docker { docker.enabled = true } singularity { singularity.enabled = true } test { includeConfig 'conf/test.config' } + test_chr { includeConfig 'conf/test_chr.config' } + } // Avoid this error: @@ -65,11 +64,6 @@ profiles { // Testing this in nf-core after discussion here https://github.com/nf-core/tools/pull/351, once this is established and works well, nextflow might implement this behavior as new default. docker.runOptions = '-u \$(id -u):\$(id -g)' -// Load igenomes.config if required -if (!params.igenomesIgnore) { - includeConfig 'conf/igenomes.config' -} - // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] @@ -91,13 +85,13 @@ dag { } manifest { - name = 'nf-core/bamtofastq' + name = 'qbic-pipelines/bamtofastq' author = 'Friederike Hanssen' - homePage = 'https://github.com/nf-core/bamtofastq' + homePage = 'https://github.com/qbic-pipelines/bamtofastq' description = 'Workflow converts one or multiple bam files back to the fastq format' mainScript = 'main.nf' - nextflowVersion = '>=0.32.0' - version = '1.0dev' + nextflowVersion = '>=20.04.1' + version = '1.0.0' } // Function to ensure that resource requirements don't go beyond diff --git a/testdata/First_SmallTest_Paired.bam b/testdata/First_SmallTest_Paired.bam new file mode 100644 index 00000000..4d4d3b84 Binary files /dev/null and b/testdata/First_SmallTest_Paired.bam differ diff --git a/testdata/Second_SmallTest_Paired.bam b/testdata/Second_SmallTest_Paired.bam new file mode 100644 index 00000000..2b4412f6 Binary files /dev/null and b/testdata/Second_SmallTest_Paired.bam differ diff --git a/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.bam b/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.bam new file mode 100644 index 00000000..1329d402 Binary files /dev/null and b/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.bam differ