Skip to content

Commit

Permalink
Merge pull request #139 from CAST-genomics/feat/actions
Browse files Browse the repository at this point in the history
fix: file reads and writes in py3.10+
  • Loading branch information
aryarm authored Nov 7, 2022
2 parents 61fa7e2 + fd6cbe6 commit e717e49
Show file tree
Hide file tree
Showing 16 changed files with 437 additions and 109 deletions.
6 changes: 4 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@ snakemake/out
docs/_build/
# python cache
__pycache__
# pytest cache
# pytest and testing-related caches
.pytest_cache
.coverage
.nox
# poetry
dist/

Expand All @@ -18,4 +20,4 @@ dist/
test.par
test.phen
example_simgenotype.bp
example_simgenotype.vcf
example_simgenotype.vcf
10 changes: 5 additions & 5 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,9 @@
html_static_path = []

html_context = {
"display_github": True, # Integrate GitHub
"github_user": "CAST-genomics", # Username
"github_repo": "haptools", # Repo name
"github_version": "main", # Version
"conf_py_path": "/docs/", # Path in the checkout to the docs root
"display_github": True, # Integrate GitHub
"github_user": "CAST-genomics", # Username
"github_repo": "haptools", # Repo name
"github_version": "main", # Version
"conf_py_path": "/docs/", # Path in the checkout to the docs root
}
16 changes: 11 additions & 5 deletions docs/project_info/contributing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -96,26 +96,32 @@ For example, to add a pypi dependency to our list and install it, just run
-----------
Code Checks
-----------
Before creating your pull request, please do the following.
Before creating your pull request, please run our code checks through ``nox``.

.. code-block:: bash
nox
You can also execute each of our code checks individually.

1. Format the code correctly

.. code-block:: bash
black .
nox --session=lint
2. If you made changes to the docs, check that they appear correctly.

.. code-block:: bash
( cd docs && sphinx-build -M html . _build )
open docs/_build/html/index.html
nox --session=docs
open docs/_build/index.html
3. Run all of the tests

.. code-block:: bash
pytest tests/
nox --session=tests
-----
Style
Expand Down
4 changes: 2 additions & 2 deletions docs/project_info/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ Installing ``haptools`` with the "files" extra requirements enables automatic su

.. code-block:: bash
pip install haptools[files]
pip install 'haptools[files]'
.. note::
Installing ``haptools`` with the "files" extra requirement requires ``gcc`` and a few other compiler tools. Please make sure that they are installed first. To install with conda, for example, please execute the following:
Expand All @@ -37,7 +37,7 @@ Installing ``haptools`` with the "files" extra requirements enables automatic su
Using conda
-----------

We also support installing ``haptools`` from bioconda.
We also support installing ``haptools`` from bioconda using ``conda``.

.. code-block:: bash
Expand Down
5 changes: 2 additions & 3 deletions haptools/data/breakpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from collections import namedtuple
from collections.abc import Iterable
from logging import getLogger, Logger
from fileinput import hook_compressed

import numpy as np
import numpy.typing as npt
Expand Down Expand Up @@ -118,7 +117,7 @@ def __iter__(self, samples: set[str] = None) -> Iterable[str, SampleBlocks]:
first as a string, and then followed by its SampleBlocks
"""
# TODO: add a region parameter
bps = hook_compressed(self.fname, mode="rt")
bps = self.hook_compressed(self.fname, mode="r")
bp_text = csv.reader(bps, delimiter="\t")
samp = None
blocks = {}
Expand Down Expand Up @@ -346,7 +345,7 @@ def write(self):
>>> }
>>> breakpoints.write()
"""
with hook_compressed(self.fname, mode="wt") as bkpts:
with self.hook_compressed(self.fname, mode="w") as bkpts:
csv_writer = csv.writer(
bkpts, delimiter="\t", dialect="unix", quoting=csv.QUOTE_NONE
)
Expand Down
6 changes: 0 additions & 6 deletions haptools/data/covariates.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,7 @@
from __future__ import annotations
from csv import reader
from pathlib import Path
from collections import namedtuple
from logging import getLogger, Logger
from fileinput import hook_compressed

import numpy as np

from .data import Data
from .phenotypes import Phenotypes


Expand Down
32 changes: 31 additions & 1 deletion haptools/data/data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from __future__ import annotations
import os
import gzip
from csv import reader
from pathlib import Path
from typing import Iterator
from typing import Iterator, IO
from collections import namedtuple
from abc import ABC, abstractmethod
from logging import getLogger, Logger
Expand Down Expand Up @@ -78,3 +80,31 @@ def __iter__(self) -> Iterator[namedtuple]:
namedtuple containing each of the class properties
"""
pass

@staticmethod
def hook_compressed(filename: str, mode: str) -> IO:
"""
A utility to help open files regardless of their compression
Based off of python's fileinput.hook_compressed and copied from
https://stackoverflow.com/a/64106815/16815703
Parameters
----------
filename : str
The path to the file
mode : str
Either 'r' for read or 'w' for write
Returns
-------
IO
The resolved file object
"""
if "b" not in mode:
mode += "t"
ext = os.path.splitext(filename)[1]
if ext == ".gz":
return gzip.open(filename, mode)
else:
return open(filename, mode)
7 changes: 3 additions & 4 deletions haptools/data/genotypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from pathlib import Path
from typing import Iterator
from logging import getLogger, Logger
from fileinput import hook_compressed
from collections import namedtuple, Counter

import numpy as np
Expand Down Expand Up @@ -737,7 +736,7 @@ def read_samples(self, samples: list[str] = None):
self.log.warning("Sample data has already been loaded. Overriding.")
if samples is not None and not isinstance(samples, set):
samples = set(samples)
with hook_compressed(self.fname.with_suffix(".psam"), mode="rt") as psam:
with self.hook_compressed(self.fname.with_suffix(".psam"), mode="r") as psam:
psamples = reader(psam, delimiter="\t")
# find the line that declares the header
for header in psamples:
Expand Down Expand Up @@ -862,7 +861,7 @@ def _iterate_variants(
region = re.split(":|-", region)
if len(region) > 1:
region[1:] = [int(pos) for pos in region[1:] if pos]
with hook_compressed(self.fname.with_suffix(".pvar"), mode="rt") as pvar:
with self.hook_compressed(self.fname.with_suffix(".pvar"), mode="r") as pvar:
pvariants = reader(pvar, delimiter="\t")
# find the line that declares the header
for header in pvariants:
Expand Down Expand Up @@ -1158,7 +1157,7 @@ def write_samples(self):
This method is called automatically by :py:meth:`~.GenotypesPLINK.write`
"""
with hook_compressed(self.fname.with_suffix(".psam"), mode="wt") as psam:
with self.hook_compressed(self.fname.with_suffix(".psam"), mode="w") as psam:
psam.write("#IID\n")
psam.write("\n".join(self.samples))
psam.write("\n")
Expand Down
6 changes: 3 additions & 3 deletions haptools/data/haplotypes.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from __future__ import annotations
import os
from pathlib import Path
from functools import total_ordering
from logging import getLogger, Logger
from fileinput import hook_compressed
from dataclasses import dataclass, field, fields
from typing import Iterator, get_type_hints, Generator, Callable

Expand Down Expand Up @@ -959,7 +959,7 @@ def __iter__(
else:
# the file is not indexed, so we can't assume it's sorted, either
# use hook_compressed to automatically handle gz files
with hook_compressed(self.fname, mode="rt") as haps:
with self.hook_compressed(self.fname, mode="r") as haps:
self.log.info("Not taking advantage of indexing.")
header_lines = []
for line in haps:
Expand Down Expand Up @@ -1047,7 +1047,7 @@ def write(self):
>>> haplotypes.data = {'H1': Haplotype('chr1', 0, 10, 'H1')}
>>> haplotypes.write()
"""
with hook_compressed(self.fname, mode="wt") as haps:
with self.hook_compressed(self.fname, mode="w") as haps:
for line in self.to_str():
haps.write(line + "\n")

Expand Down
5 changes: 2 additions & 3 deletions haptools/data/phenotypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from io import TextIOBase
from collections.abc import Iterable
from logging import getLogger, Logger
from fileinput import hook_compressed
from collections import namedtuple, Counter

import numpy as np
Expand Down Expand Up @@ -145,7 +144,7 @@ def __iter__(self, samples: list[str] = None) -> Iterable[namedtuple]:
Iterable[namedtuple]
See documentation for :py:meth:`~.Phenotypes._iterate`
"""
phens = hook_compressed(self.fname, mode="rt")
phens = self.hook_compressed(self.fname, mode="r")
phen_text = reader(phens, delimiter="\t")
# ignore all of the comment lines
while True:
Expand Down Expand Up @@ -192,7 +191,7 @@ def write(self):
names[idx] = name + suffix
uniq_names[name] += 1
# now we can finally write the file
with hook_compressed(self.fname, mode="wt") as phens:
with self.hook_compressed(self.fname, mode="w") as phens:
phens.write("#IID\t" + "\t".join(names) + "\n")
formatter = {"float_kind": lambda x: "%.2f" % x}
for samp, phen in zip(self.samples, self.data):
Expand Down
3 changes: 1 addition & 2 deletions haptools/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import logging
import tempfile
from pathlib import Path
from fileinput import hook_compressed

from pysam import tabix_index

Expand Down Expand Up @@ -70,7 +69,7 @@ def index_haps(
# copy the file to a tmp location in case the input is /dev/stdin
# or a file that might otherwise be deleted by tabix_index afterward
with tempfile.NamedTemporaryFile(delete=False, mode="wt") as tmp:
with hook_compressed(str(hp.fname), mode="rt") as haps:
with data.Data.hook_compressed(str(hp.fname), mode="r") as haps:
hp.fname = Path(tmp.name)
tmp.write(haps.read())

Expand Down
3 changes: 1 addition & 2 deletions haptools/ld.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import annotations
import logging
from pathlib import Path
from fileinput import hook_compressed
from dataclasses import dataclass, field

import numpy as np
Expand Down Expand Up @@ -198,7 +197,7 @@ def calc_ld(

if from_gts:
log.info("Computing LD between genotypes and the target")
with hook_compressed(output, mode="wt") as ld_file:
with data.Data.hook_compressed(output, mode="w") as ld_file:
log.info("Outputting .ld file with LD values")
ld_file.write("CHR\tBP\tSNP\tR\n")
for idx, variant in enumerate(gt.variants[["chrom", "pos", "id"]]):
Expand Down
74 changes: 74 additions & 0 deletions noxfile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
"""Nox sessions."""
import os
import sys
import shutil
from pathlib import Path

import nox
from nox_poetry import Session
from nox_poetry import session


package = "haptools"
python_versions = ["3.7", "3.8", "3.9", "3.10"]
nox.needs_version = ">= 2021.6.6"
nox.options.sessions = (
"docs",
"lint",
"tests",
)


# detect whether mamba is installed
conda_cmd = "conda"
if (Path(os.getenv("CONDA_EXE")).parent / "mamba").exists():
conda_cmd = "mamba"
conda_args = ["-c", "conda-forge"]


@session(python=False)
def docs(session: Session) -> None:
"""Build the documentation."""
args = session.posargs or ["docs", "docs/_build"]
if not session.posargs and "FORCE_COLOR" in os.environ:
args.insert(0, "--color")

build_dir = Path("docs", "_build")
if build_dir.exists():
shutil.rmtree(build_dir)

session.run("sphinx-build", *args)


@session(python=False)
def lint(session: Session) -> None:
"""Lint our code."""
session.run("black", "--check", ".")


@session(venv_backend=conda_cmd, venv_params=conda_args, python=python_versions)
def tests(session: Session) -> None:
"""Run the test suite."""
session.conda_install(
"coverage[toml]", "pytest", "numpy>=1.20.0", channel="conda-forge"
)
# TODO: change this to ".[files]" once plink-ng Alpha 3.8 is released
# https://github.com/chrchang/plink-ng/releases
session.install(".")

try:
session.run("coverage", "run", "--parallel", "-m", "pytest", *session.posargs)
finally:
if session.interactive:
session.notify("coverage", posargs=[])


@session(python=False)
def coverage(session: Session) -> None:
"""Produce the coverage report."""
args = session.posargs or ["report"]

if not session.posargs and any(Path().glob(".coverage.*")):
session.run("coverage", "combine")

session.run("coverage", *args)
Loading

0 comments on commit e717e49

Please sign in to comment.