diff --git a/.gitignore b/.gitignore index 84229f4..f96c6a7 100644 --- a/.gitignore +++ b/.gitignore @@ -100,3 +100,10 @@ ENV/ # mypy .mypy_cache/ + +# Macbook +.DS_Store + +#VScode +.vscode + diff --git a/.travis.yml b/.travis.yml index 5749769..6995e4b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,9 +3,6 @@ language: python python: - 3.7 - - 3.6 - - 3.5 - - 2.7 # Command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors install: pip install -U tox-travis @@ -20,10 +17,10 @@ script: tox deploy: provider: pypi distributions: sdist bdist_wheel - user: rhshah + user: msk-access password: - secure: PLEASE_REPLACE_ME + secure: ithmgopowELxskUKR2LAi/cMnet6J+PH4emAOiJ57rBqFkwq8lVXmT/NW8D7k2VoJdTguB9v+RH+Q/91ShJh1VnwbRQ1bX7Ecr7P7O35DIQBp40Eqg5AH6ap8se37dsufnNyVNhj8vNtusd65jC3+6w4kQkcCfysD5eByUDDw04FNYCbhbXIn4S/JJ/EJUFFb8ElNzo5iWpVPnfP9uddYxpXTEgWZBo+TylZsa1gxMYUqio5xa34TMkUzN5N4tooe0C1uZ6H9/zRzh2pLwoof1ITf3wws/FOJkPwsJFZWviC+0K1pNJTaTY77kHEEC5W8/JupvdShrFg+BzldLHWsGuyC+Mfj3bjJFTqyDnbthc0kosQ3x7UdTKzyztL+gekdCcNEsusN10VTdO0DYkUnYTc1YWpsC+ORQxPGANg+RrvjO8lGobvQ4ZlhcWBJ1ynlvntlg+b0iHzuOntznwZGTJRcTqtjsH+zzIEo12FiWnJwjvM78OkEsNp5XYaKgYryhQBIo5Uqy79h7wtF8mAnIRrsK5cyQmYTJtWZ+OIQbuzj+l9o+Ff19hykD7LEB8I9So8240w09+HoNxpZKYMQFndGGZnC1wkoZNdBEOjn9Az9ZouHO7qkFHZVSp3rqSAEqiG8fm3TTl/5VAO9RNEKT5vmZMDhl04F6LXZYGCLKU= on: tags: true - repo: rhshah/merge_fastq + repo: msk-access/merge_fastq python: 3.7 diff --git a/README.rst b/README.rst index a8c1b6b..068e6b3 100644 --- a/README.rst +++ b/README.rst @@ -6,16 +6,16 @@ merge_fastq .. image:: https://img.shields.io/pypi/v/merge_fastq.svg :target: https://pypi.python.org/pypi/merge_fastq -.. image:: https://img.shields.io/travis/rhshah/merge_fastq.svg - :target: https://travis-ci.org/rhshah/merge_fastq +.. image:: https://img.shields.io/travis/msk-access/merge_fastq.svg + :target: https://travis-ci.com/msk-access/merge_fastq .. image:: https://readthedocs.org/projects/merge-fastq/badge/?version=latest :target: https://merge-fastq.readthedocs.io/en/latest/?badge=latest :alt: Documentation Status -.. image:: https://pyup.io/repos/github/rhshah/merge_fastq/shield.svg - :target: https://pyup.io/repos/github/rhshah/merge_fastq/ +.. image:: https://pyup.io/repos/github/msk-access/merge_fastq/shield.svg + :target: https://pyup.io/repos/github/msk-access/merge_fastq/ :alt: Updates @@ -30,7 +30,33 @@ Package to merge multiple pair of pair-end fastq data Features -------- -* TODO +* Given multiple pair-end fastq data merge them into single pair-end fastq w.r.t each READ1 and READ2 + +Usage +----- + +> merge_fastq --help + +Usage: merge_fastq [OPTIONS] + + Console script for merge_fastq. + +Options: + -fp1, --fastq1 PATH Full path to gziped READ1 fastq files, can be + specified multiple times for example: --fastq1 + test_part1_R1.fastq.gz --fastq1 + test_part2_R1.fastq.gz [required] + -fp2, --fastq2 PATH Full path to gziped READ2 fastq files, can be + specified multiple times for example: --fastq2 + test_part1_R2.fastq.gz --fastq2 + test_part2_R2.fastq.gz [required] + -op, --output-path PATH Full path to write the output files (default: + Current working directory) + -of1, --out-fastq1 TEXT Name of the merged output READ1 fastq file + (default: merged_fastq_R1.fastq.gz) + -of2, --out-fastq2 TEXT Name of the merged output READ2 fastq file + (default: merged_fastq_R2.fastq.gz) + --help Show this message and exit. Credits ------- diff --git a/data/test/test_R1_001.fastq.gz b/data/test/test_R1_001.fastq.gz new file mode 100644 index 0000000..77b94c4 Binary files /dev/null and b/data/test/test_R1_001.fastq.gz differ diff --git a/data/test/test_R1_002.fastq.gz b/data/test/test_R1_002.fastq.gz new file mode 100644 index 0000000..77b94c4 Binary files /dev/null and b/data/test/test_R1_002.fastq.gz differ diff --git a/data/test/test_R1_003.fastq.gz b/data/test/test_R1_003.fastq.gz new file mode 100644 index 0000000..77b94c4 Binary files /dev/null and b/data/test/test_R1_003.fastq.gz differ diff --git a/data/test/test_R2_001.fastq.gz b/data/test/test_R2_001.fastq.gz new file mode 100644 index 0000000..51e06f9 Binary files /dev/null and b/data/test/test_R2_001.fastq.gz differ diff --git a/data/test/test_R2_002.fastq.gz b/data/test/test_R2_002.fastq.gz new file mode 100644 index 0000000..51e06f9 Binary files /dev/null and b/data/test/test_R2_002.fastq.gz differ diff --git a/data/test/test_R2_003.fastq.gz b/data/test/test_R2_003.fastq.gz new file mode 100644 index 0000000..51e06f9 Binary files /dev/null and b/data/test/test_R2_003.fastq.gz differ diff --git a/docs/usage.rst b/docs/usage.rst index 1d1b626..3881d23 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -5,3 +5,58 @@ Usage To use merge_fastq in a project:: import merge_fastq + merge_fastq(fastq1,fastq2) + +To use merge_fastq for command line:: + + > merge_fastq --help + + Usage: merge_fastq [OPTIONS] + + Console script for merge_fastq. + + Options: + -fp1, --fastq1 PATH Full path to gziped READ1 fastq files, can be + specified multiple times for example: --fastq1 + test_part1_R1.fastq.gz --fastq1 + test_part2_R1.fastq.gz [required] + -fp2, --fastq2 PATH Full path to gziped READ2 fastq files, can be + specified multiple times for example: --fastq2 + test_part1_R2.fastq.gz --fastq2 + test_part2_R2.fastq.gz [required] + -op, --output-path PATH Full path to write the output files (default: + Current working directory) + -of1, --out-fastq1 TEXT Name of the merged output READ1 fastq file + (default: merged_fastq_R1.fastq.gz) + -of2, --out-fastq2 TEXT Name of the merged output READ2 fastq file + (default: merged_fastq_R2.fastq.gz) + --help Show this message and exit. + +Example commandline: + +* Using default option for multiple fastq1 and fastq2 files + + .. code-block:: console + + $ merge_fastq \ + --fastq1 test_part1_R1.fastq.gz \ + --fastq1 test_part2_R1.fastq.gz \ + --fastq2 test_part1_R2.fastq.gz \ + --fastq2 test_part2_R2.fastq.gz \ + + .. code + +* Using custom option for multiple fastq1 and fastq2 files + + .. code-block:: console + + $ merge_fastq \ + --fastq1 test_part1_R1.fastq.gz \ + --fastq1 test_part2_R1.fastq.gz \ + --fastq2 test_part1_R2.fastq.gz \ + --fastq2 test_part2_R2.fastq.gz \ + --output-path /path/to/where/you/want/output + --out-fastq1 test_merged_R1.fastq.gz + --out-fastq2 test_merged_R2.fastq.gz + + .. code diff --git a/merge_fastq/cli.py b/merge_fastq/cli.py index 8938dd3..92d8a81 100644 --- a/merge_fastq/cli.py +++ b/merge_fastq/cli.py @@ -1,18 +1,126 @@ # -*- coding: utf-8 -*- -"""Console script for merge_fastq.""" +import os import sys -import click +import logging +import time +import pathlib +try: + import click +except ImportError as e: + print( + "cli: click is not installed, please install click as it is one of the requirements. \n", e + ) + exit(1) +try: + import click_log +except ImportError as e: + print( + "cli: click-log is not installed, please install click_log as it is one of the requirements.\n", e + ) + exit(1) +try: + import merge_fastq.merge_fastq as mf +except ImportError as e: + print( + "cli: merge_fastq module could not be loaded, please install package correctly to get this running. \n", e + ) + exit(1) + +""" +cli +~~~~~~~~~~~~~~~ +:Description: console script for running merge_fastq +""" +""" +Created on October 10, 2019 +Description: console script for running merge_fastq +@author: Ronak H Shah +""" + +version = None +scriptpath = os.path.realpath(__file__) +p_scriptpath = pathlib.Path(scriptpath) +with open(os.path.join(p_scriptpath.parent, "__init__.py"), "r") as f: + for line in f.readlines(): + line = line.strip() + if line.startswith("__version__"): + version = line.split("=")[-1].strip() +__all__ = [] +__version__ = version +__date__ = "2019-10-21" +__updated__ = "2019-10-21" +# Making logging possible +logger = logging.getLogger("merge_fastq") +click_log.basic_config(logger) +click_log.ColorFormatter.colors["info"] = dict(fg="green") @click.command() -def main(args=None): +@click.option( + "--fastq1", + "-fp1", + required=True, + multiple=True, + type=click.Path(exists=True), + help="Full path to gziped READ1 fastq files, can be specified multiple times for example: --fastq1 test_part1_R1.fastq.gz --fastq1 test_part2_R1.fastq.gz", +) +@click.option( + "--fastq2", + "-fp2", + required=True, + multiple=True, + type=click.Path(exists=True), + help="Full path to gziped READ2 fastq files, can be specified multiple times for example: --fastq2 test_part1_R2.fastq.gz --fastq2 test_part2_R2.fastq.gz", +) +@click.option( + "--output-path", + "-op", + required=False, + default=os.getcwd(), + type=click.Path(exists=True), + help="Full path to write the output files (default: Current working directory)", +) +@click.option( + "--out-fastq1", + "-of1", + required=False, + default="merged_fastq_R1.fastq.gz", + type=click.STRING, + help="Name of the merged output READ1 fastq file (default: merged_fastq_R1.fastq.gz)", +) +@click.option( + "--out-fastq2", + "-of2", + required=False, + default="merged_fastq_R2.fastq.gz", + type=click.STRING, + help="Name of the merged output READ2 fastq file (default: merged_fastq_R2.fastq.gz)", +) +def main(fastq1, fastq2, output_path, out_fastq1, out_fastq2): """Console script for merge_fastq.""" - click.echo("Replace this message by putting your code into " - "merge_fastq.cli.main") - click.echo("See click documentation at https://click.palletsprojects.com/") + logger_output = os.path.join(output_path, "merge_fastq.log") + fh = logging.FileHandler(logger_output) + formatter = logging.Formatter( + fmt="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + datefmt="%m/%d/%Y %I:%M:%S %p", + ) + fh.setFormatter(formatter) + logger.addHandler(fh) + logger.info("==================================================") + logger.info(">>> Running merge_fastq for <<<") + logger.info("==================================================") + t1_start = time.perf_counter() + t2_start = time.process_time() + mf.run(fastq1, fastq2, output_path, out_fastq1, out_fastq2) + t1_stop = time.perf_counter() + t2_stop = time.process_time() + logger.info("--------------------------------------------------") + logger.info("Elapsed time: %.1f [min]" % ((t1_stop - t1_start) / 60)) + logger.info("CPU process time: %.1f [min]" % ((t2_stop - t2_start) / 60)) + logger.info("--------------------------------------------------") return 0 if __name__ == "__main__": - sys.exit(main()) # pragma: no cover + sys.exit(main()) diff --git a/merge_fastq/merge_fastq.py b/merge_fastq/merge_fastq.py index 7fbbae4..5e1f092 100644 --- a/merge_fastq/merge_fastq.py +++ b/merge_fastq/merge_fastq.py @@ -1,3 +1,56 @@ # -*- coding: utf-8 -*- -"""Main module.""" +import os +import logging +import shutil + +""" +merge_fastq +~~~~~~~~~~~~~~~ +:Description: main module for merge_fastq +""" +""" +Created on October 21, 2019 +Description: main module for merge_fastq +@author: Ronak H Shah +""" + +# Making logging possible +logger = logging.getLogger("merge_fastq") + + +def run(fastq1, fastq2, output_path, out_fastq1, out_fastq2): + out_file1 = os.path.join(output_path, out_fastq1) + out_file2 = os.path.join(output_path, out_fastq2) + if(len(fastq1) == len(fastq2)): + if len(fastq1) == 1: + try: + shutil.copyfile(fastq1[0], out_file1) + except IOError as e: + logging.error( + "Could not copy file %s to %s, please see the execution error. \n %s \n", fastq1[0], out_file1, e) + exit(1) + try: + shutil.copyfile(fastq2[0], out_file2) + except IOError as e: + logging.error( + "Could not copy file %s to %s, please see the execution error. \n %s \n", fastq2[0], out_file2, e) + exit(1) + else: + merge_fastq(fastq1, fastq2, out_file1, out_file2) + logging.info("Done merging fastq file in %s and %s", out_file1, out_file2) + + else: + logger.error("The program expects that the same number of fastq are provided for READ1 and READ2, current they dont match. \n\n ### READ1 ### \n %s \n ### READ2 ### \n %s \n", fastq1, fastq2) + exit(1) + + +def merge_fastq(fastq_list_R1, fastq_list_R2, out_file1, out_file2): + with open(out_file1, 'wb') as outfile: + for fastq in fastq_list_R1: + with open(fastq, 'rb') as infile: + shutil.copyfileobj(infile, outfile) + with open(out_file2, 'wb') as outfile: + for fastq in fastq_list_R2: + with open(fastq, 'rb') as infile: + shutil.copyfileobj(infile, outfile) diff --git a/requirements_dev.txt b/requirements.txt similarity index 63% rename from requirements_dev.txt rename to requirements.txt index 283f5d5..011d5f9 100644 --- a/requirements_dev.txt +++ b/requirements.txt @@ -1,12 +1,13 @@ -pip==19.2.3 +pip==19.3.1 bump2version==0.5.11 wheel==0.33.6 watchdog==0.9.0 flake8==3.7.8 tox==3.14.0 coverage==4.5.4 -Sphinx==1.8.5 -twine==1.14.0 +Sphinx==2.2.0 +twine==2.0.0 Click==7.0 -pytest==4.6.5 +click-log==0.3.2 +pytest==5.2.1 pytest-runner==5.1 \ No newline at end of file diff --git a/setup.py b/setup.py index ab95087..2944685 100644 --- a/setup.py +++ b/setup.py @@ -26,11 +26,6 @@ 'Intended Audience :: Developers', 'License :: OSI Approved :: Apache Software License', 'Natural Language :: English', - "Programming Language :: Python :: 2", - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', ], description="Package to merge multiple pair of pair-end fastq data", diff --git a/tests/test_merge_fastq.py b/tests/test_merge_fastq.py index 724c637..774a5b0 100644 --- a/tests/test_merge_fastq.py +++ b/tests/test_merge_fastq.py @@ -3,36 +3,69 @@ """Tests for `merge_fastq` package.""" +import os import pytest - +import subprocess from click.testing import CliRunner - from merge_fastq import merge_fastq from merge_fastq import cli -@pytest.fixture -def response(): - """Sample pytest fixture. - - See more at: http://doc.pytest.org/en/latest/fixture.html - """ - # import requests - # return requests.get('https://github.com/audreyr/cookiecutter-pypackage') - - -def test_content(response): - """Sample pytest test function with the pytest fixture as an argument.""" - # from bs4 import BeautifulSoup - # assert 'GitHub' in BeautifulSoup(response.content).title.string - - def test_command_line_interface(): """Test the CLI.""" runner = CliRunner() - result = runner.invoke(cli.main) - assert result.exit_code == 0 - assert 'merge_fastq.cli.main' in result.output + #result = runner.invoke(cli.main) + #assert result.exit_code == 0 + #assert 'merge_fastq.cli.main' in result.output help_result = runner.invoke(cli.main, ['--help']) assert help_result.exit_code == 0 - assert '--help Show this message and exit.' in help_result.output + + +def test_multi_fastq(): + cmd = [ + "merge_fastq", + "--fastq1", + "data/test/test_R1_001.fastq.gz", + "--fastq1", + "data/test/test_R1_002.fastq.gz", + "--fastq1", + "data/test/test_R1_003.fastq.gz", + "--fastq2", + "data/test/test_R2_001.fastq.gz", + "--fastq2", + "data/test/test_R2_002.fastq.gz", + "--fastq2", + "data/test/test_R2_003.fastq.gz", + "--out-fastq1", + "test_R1_merged.fastq.gz", + "--out-fastq2", + "test_R2_merged.fastq.gz" + + ] + ret_code = run_cmd(cmd) + assert ret_code == 0 + assert os.path.isfile("test_R1_merged.fastq.gz") is True + assert os.path.isfile("test_R2_merged.fastq.gz") is True + + +def test_single_fastq(): + cmd = [ + "merge_fastq", + "--fastq1", + "data/test/test_R1_001.fastq.gz", + "--fastq2", + "data/test/test_R2_001.fastq.gz", + ] + ret_code = run_cmd(cmd) + assert ret_code == 0 + assert os.path.isfile("merged_fastq_R1.fastq.gz") is True + assert os.path.isfile("merged_fastq_R2.fastq.gz") is True + + +def run_cmd(cmd): + print("Command:", cmd) + process = subprocess.Popen( + cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) + output, errors = process.communicate() + ret_code = process.wait() + return ret_code diff --git a/tox.ini b/tox.ini index c3d5bc9..56115e7 100644 --- a/tox.ini +++ b/tox.ini @@ -1,27 +1,24 @@ [tox] -envlist = py27, py35, py36, py37 flake8 +envlist = py37 flake8 [travis] python = 3.7: py37 - 3.6: py36 - 3.5: py35 - 2.7: py27 [testenv:flake8] basepython = python deps = flake8 commands = flake8 merge_fastq -[testenv] +[testenv:py37] setenv = PYTHONPATH = {toxinidir} deps = - -r{toxinidir}/requirements_dev.txt + -r{toxinidir}/requirements.txt ; If you want to make tox run the tests with the same versions, create a ; requirements.txt with the pinned versions and uncomment the following line: ; -r{toxinidir}/requirements.txt commands = pip install -U pip - pytest --basetemp={envtmpdir} + pytest -s --cache-clear tests