Skip to content

Commit

Permalink
PIP-1240 add cpg correlation (#33)
Browse files Browse the repository at this point in the history
  • Loading branch information
paul-sud authored Jun 17, 2020
1 parent d63d65c commit 52f334d
Show file tree
Hide file tree
Showing 26 changed files with 303 additions and 92 deletions.
1 change: 1 addition & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
!wgbs_pipeline/*/
!Cargo.*
!conf/*
!requirements*.txt
2 changes: 1 addition & 1 deletion .isort.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ include_trailing_comma = True
force_grid_wrap = 0
use_parentheses = True
line_length = 88
known_third_party = attr,bs4,pytest,setuptools
known_third_party = attr,bs4,pandas,pytest,qc_utils
3 changes: 2 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,11 @@
- id: flake8
- id: trailing-whitespace
- id: end-of-file-fixer
exclude: tests\/data\/bed_pearson_correlation_qc\.json$
- id: debug-statements
- id: check-json
- id: pretty-format-json
exclude: tests\/data\/sample.+\.json$
exclude: tests\/data\/.+\.json$
args:
- --autofix
- id: check-yaml
Expand Down
4 changes: 3 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ RUN apt-get update && \
&& wget https://bootstrap.pypa.io/get-pip.py && python3 get-pip.py && rm get-pip.py \
&& rm -rf /var/lib/apt/lists/*

RUN pip3 install --no-cache-dir beautifulsoup4 matplotlib==3.0.2 multiprocess
COPY requirements*.txt ./
RUN pip3 install --no-cache-dir -r requirements.txt -r requirements-gembs.txt && \
rm requirements*.txt

# Install bsmooth.R dependencies
RUN echo "r <- getOption('repos'); r['CRAN'] <- 'https://cloud.r-project.org'; options(repos = r);" > ~/.Rprofile && \
Expand Down
3 changes: 0 additions & 3 deletions pyproject.toml

This file was deleted.

2 changes: 2 additions & 0 deletions requirements-gembs.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
matplotlib==3.0.2
multiprocess
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
beautifulsoup4==4.8.2
pandas==1.0.4
qc-utils>=20.6.1
80 changes: 0 additions & 80 deletions setup.py

This file was deleted.

14 changes: 12 additions & 2 deletions tests/caper_run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,17 @@ fi

WDL=$1
INPUT=$2
WORKFLOW_OPTIONS="tests/pytest_workflow_options.json"
WORKFLOW_OPTIONS_FLAG="--no-relative-output-paths"

echo "Running caper with WDL ${WDL}, input ${INPUT}, and image ${WGBS_DOCKER_IMAGE_TAG}"
if [ $# -gt 2 ]; then
if [ "$3" != "${WORKFLOW_OPTIONS_FLAG}" ]; then
echo "Third argument must be ${WORKFLOW_OPTIONS_FLAG}"
exit 1
fi
WORKFLOW_OPTIONS="tests/pytest_workflow_no_relative_output_paths.json"
fi

echo "Running caper with WDL ${WDL}, input ${INPUT}, workflow options ${WORKFLOW_OPTIONS}, and image ${WGBS_DOCKER_IMAGE_TAG}"

caper run "${WDL}" -i "${INPUT}" --docker "${WGBS_DOCKER_IMAGE_TAG}" -o ./tests/pytest_workflow_options.json
caper run "${WDL}" -i "${INPUT}" --docker "${WGBS_DOCKER_IMAGE_TAG}" -o "./${WORKFLOW_OPTIONS}"
1 change: 1 addition & 0 deletions tests/data/bed_pearson_correlation_qc.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"pearson_correlation": {"pearson_correlation": 0.9446482132888102}}
Binary file added tests/data/encode_cpg_10e5_gt_10x_coverage.bed.gz
Binary file not shown.
Binary file not shown.
Binary file not shown.
24 changes: 24 additions & 0 deletions tests/functional/json/test_wgbs_two_reps.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"wgbs.benchmark_mode": true,
"wgbs.extra_reference": "tests/data/conversion_control.fa.gz",
"wgbs.fastqs": [
[
[
"tests/data/sample5_data_1_200000.fastq.gz",
"tests/data/sample5_data_2_200000.fastq.gz"
]
],
[
[
"tests/data/sample5_data_1_200000_s100_100000.fastq.gz",
"tests/data/sample5_data_2_200000_s100_100000.fastq.gz"
]
]
],
"wgbs.include_conf_file": "/software/conf/IHEC_standard.conf",
"wgbs.indexed_contig_sizes": "tests/data/sacCer3.contig.sizes",
"wgbs.indexed_reference": "tests/data/indexes.tar.gz",
"wgbs.reference": "tests/data/sacCer3.fa.gz",
"wgbs.run_bsmooth": false,
"wgbs.underconversion_sequence_name": "NC_001416.1"
}
2 changes: 2 additions & 0 deletions tests/functional/test_wgbs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,5 @@
md5sum: a09ae01f70fa6d2461e37d5814ceb579
- path: test-output/coverage.bw
md5sum: afa224c2037829dccacea4a67b6fa84a
- path: test-output/bed_pearson_correlation_qc.json
should_exist: false
22 changes: 22 additions & 0 deletions tests/functional/test_wgbs_two_reps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import json
import math

import pytest


@pytest.mark.workflow("test_wgbs_two_reps")
def test_wgbs_two_reps_check_pearson_qc(workflow_dir, test_data_dir):
with open(test_data_dir / "bed_pearson_correlation_qc.json") as f:
expected_qc = json.load(f)

result_path = next(
(workflow_dir / "test-output").glob(
"wgbs/*/call-calculate_bed_pearson_correlation/execution/bed_pearson_correlation_qc.json"
)
)
with open(result_path) as f:
result_qc = json.load(f)

result = result_qc["pearson_correlation"]["pearson_correlation"]
expected = expected_qc["pearson_correlation"]["pearson_correlation"]
assert math.isclose(result, expected, rel_tol=1e-5)
12 changes: 12 additions & 0 deletions tests/functional/test_wgbs_two_reps.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
---
- name: test_wgbs_two_reps
tags:
- functional
# Float output in the QC is subject to rounding errors, need custom comparison. Due
# to path collisons we cannot use relative output paths for Cromwell outputs, wf
# will fail otherwise.
command: >-
tests/caper_run.sh
wgbs-pipeline.wdl
tests/functional/json/test_wgbs_two_reps.json
--no-relative-output-paths
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"test_calculate_bed_pearson_correlation.bed1": "tests/data/encode_cpg_10e5.bed.gz",
"test_calculate_bed_pearson_correlation.bed2": "tests/data/encode_cpg_10e5_gt_10x_coverage.bed.gz"
}
11 changes: 11 additions & 0 deletions tests/integration/test_calculate_bed_pearson_correlation.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
---
- name: test_calculate_bed_pearson_correlation
tags:
- integration
command: >-
tests/caper_run.sh
tests/integration/wdl/test_calculate_bed_pearson_correlation.wdl
tests/integration/json/test_calculate_bed_pearson_correlation.json
files:
- path: test-output/bed_pearson_correlation_qc.json
md5sum: cddfd85f87898abeff6701f1c58c71f7
11 changes: 11 additions & 0 deletions tests/integration/wdl/test_calculate_bed_pearson_correlation.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import "../../../wgbs-pipeline.wdl" as wgbs

workflow test_calculate_bed_pearson_correlation {
File bed1
File bed2

call wgbs.calculate_bed_pearson_correlation { input:
bed1 = bed1,
bed2 = bed2,
}
}
8 changes: 8 additions & 0 deletions tests/pytest_workflow_no_relative_output_paths.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"default_runtime_attributes": {
"docker_user": "$EUID"
},
"final_workflow_outputs_dir": "test-output",
"maxRetries": 1,
"use_relative_output_paths": false
}
Empty file added tests/python/__init__.py
Empty file.
66 changes: 66 additions & 0 deletions tests/python/test_calculate_bed_pearson_correlation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from contextlib import suppress as does_not_raise
from io import BytesIO

import pandas as pd
import pytest

from wgbs_pipeline.calculate_bed_pearson_correlation import (
DF_COLUMN_NAMES,
calculate_pearson,
get_parser,
load_bedmethyl,
make_pearson_qc,
)


def test_load_bedmethyl():
data = BytesIO(
(
b'track name="ENCSR156JXJ" description="ENCSR156JXJ" visibility=2 itemRgb="'
b'On"\nchr1\t10468\t10469\t"ENCSR156JXJ"\t1\t+\t10468\t10469\t255,0,0\t1\t1'
b"00\tCG\tCG\t2"
)
)
df = load_bedmethyl(data)
assert df.shape[0] == 1
expected = ("chr1", 10468, 10469, 1, 100)
for key, expected in zip(DF_COLUMN_NAMES, expected):
assert df.at[0, key] == expected


def test_calculate_pearson():
df1 = pd.DataFrame.from_records(
[("chr1", 1, 2, 14, 50), ("chr1", 3, 4, 12, 100), ("chr1", 5, 6, 3, 100)],
columns=DF_COLUMN_NAMES,
)
df2 = pd.DataFrame.from_records(
[
("chr1", 1, 2, 16, 25),
("chr1", 3, 4, 12, 50),
("chr1", 5, 6, 3, 100),
("chr1", 7, 8, 3, 100),
],
columns=DF_COLUMN_NAMES,
)
pearson = calculate_pearson(df1, df2)
assert pearson == 1.0


def test_make_pearson_qc():
qc = make_pearson_qc(0.33)
assert dict(qc.to_ordered_dict()) == {
"pearson_correlation": {"pearson_correlation": 0.33}
}


@pytest.mark.parametrize(
"condition,args",
[
(does_not_raise(), ["--bedmethyls", "foo", "bar", "--outfile", "baz"]),
(pytest.raises(SystemExit), ["--bedmethyls", "foo", "--outfile", "baz"]),
],
)
def test_get_parser(condition, args):
parser = get_parser()
with condition:
parser.parse_args(args)
15 changes: 12 additions & 3 deletions tox.ini
Original file line number Diff line number Diff line change
@@ -1,9 +1,17 @@
[tox]
envlist = lint,py37,coverage-report
isolated_build = True
skipsdist = True

[base]
deps =
attrs
pytest
pytest-mock
-rrequirements.txt

[testenv]
extras = tests
deps = {[base]deps}
commands = python -m pytest --ignore=tests/functional/ --ignore=tests/integration --ignore=tests/unit --noconftest {posargs}

[testenv:wdl]
Expand All @@ -22,9 +30,10 @@ deps = pre-commit
commands = pre-commit run --all-files

[testenv:coverage-report]
basepython = python3.7
extras = tests
commands = pytest --cov-report term-missing --ignore=tests/functional/ --ignore=tests/integration --ignore=tests/unit --noconftest --cov=wgbs_pipeline
deps =
pytest-cov
{[base]deps}

[flake8]
max_line_length = 88
Expand Down
Loading

0 comments on commit 52f334d

Please sign in to comment.