diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..608def7 --- /dev/null +++ b/.clang-format @@ -0,0 +1,5 @@ +--- +Language: Cpp +BasedOnStyle: LLVM +ColumnLimit: 80 +--- diff --git a/.github/workflows/tests.yaml b/.github/workflows/ci.yaml similarity index 55% rename from .github/workflows/tests.yaml rename to .github/workflows/ci.yaml index 8f5109a..eaee8b4 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/ci.yaml @@ -1,24 +1,30 @@ -name: tests +name: GitHub CI -on: [push] +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] jobs: build: - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} strategy: + fail-fast: false matrix: - python-version: [3.6, 3.7, 3.8, 3.9] + os: [ubuntu-latest, macos-latest] + python-version: ['3.9', '3.10'] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }}. - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies. run: | - python -m pip install --upgrade pip + python -m pip install .[test] -v pip install numpy pip install pytest pytest-cov if [ -f requirements.txt ]; then pip install -r requirements.txt; fi @@ -26,5 +32,4 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | - make install - make tests + python -m pytest --cov=./ --cov-report=xml diff --git a/.gitignore b/.gitignore index 3951c67..e563265 100644 --- a/.gitignore +++ b/.gitignore @@ -1,16 +1,242 @@ -dist -build -.egg -.tox -.vscode -.coverage -__pycache__ -.mypy_cache -.pytest_cache -.ipynb_checkpoints +# Created by https://www.toptal.com/developers/gitignore/api/c++,python +# Edit at https://www.toptal.com/developers/gitignore?templates=c++,python -*.py[cod] +### C++ ### +# Prerequisites +*.d + +# Compiled Object files +*.slo +*.lo *.o +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Compiled Dynamic libraries *.so +*.dylib +*.dll + +# Fortran module files +*.mod +*.smod + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg *.egg -*.egg-info \ No newline at end of file +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +### Python Patch ### +# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration +poetry.toml + +# ruff +.ruff_cache/ + +# LSP config files +pyrightconfig.json + +### macOS ### +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +### macOS Patch ### +# iCloud generated files +*.icloud + +# End of https://www.toptal.com/developers/gitignore/api/c++,python \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..7129a0a --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,37 @@ +cmake_minimum_required(VERSION 3.15...3.27) + +# Scikit-build-core sets these values for you, or you can just hard-code the +# name and version. +project( + ${SKBUILD_PROJECT_NAME} + VERSION ${SKBUILD_PROJECT_VERSION} + LANGUAGES CXX) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_POSITION_INDEPENDENT_CODE ON) + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra") +set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g") +set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -DNDEBUG -march=native -ffast-math") + +# Find the module development requirements (requires FindPython from 3.17 or +# scikit-build-core's built-in backport) +set(PYBIND11_NEWPYTHON ON) +find_package(OpenMP) +find_package(pybind11 CONFIG REQUIRED) +pybind11_add_module(numbits src/numbits.cpp) + +target_link_libraries(numbits PRIVATE pybind11::pybind11) + +# Check if OpenMP was found +if(OpenMP_CXX_FOUND) + target_link_libraries(numbits PRIVATE OpenMP::OpenMP_CXX) + target_compile_definitions(numbits PRIVATE USE_OPENMP) +endif() + +# This is passing in the version as a define just as an example +target_compile_definitions(numbits PRIVATE VERSION_INFO=${PROJECT_VERSION}) + +# The install directory is the output (wheel) directory +install(TARGETS numbits DESTINATION .) \ No newline at end of file diff --git a/README.md b/README.md index 7a76e32..9174e40 100644 --- a/README.md +++ b/README.md @@ -42,18 +42,26 @@ or you can: python setup.py install ``` -### Test call +### Usage ```python import numpy as np import numbits a = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype='uint8') -b = numbits.unpack(a, nbits=2) +b = numbits.unpack(a, nbits=2, bitorder="big", parallel=False) >>> array([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 2, 0, 0, 1, 3, 0, 0, 2, 0], dtype=uint8) ``` +### Benchmarks +| | | +| --------- | --------- | +| ![](tests/benchmarks/benchmark_unpack_1bit_little.png) | ![](tests/benchmarks/benchmark_unpack_1bit_big.png) | +| ![](tests/benchmarks/benchmark_pack_1bit_little.png) | ![](tests/benchmarks/benchmark_pack_1bit_big.png) | + + + [tests]: https://github.com/telegraphic/numbits/actions/workflows/tests.yaml/badge.svg [pybind]: https://github.com/pybind/pybind11 [sigpyproc]: https://github.com/FRBs/sigpyproc3 diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..bb8134d --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,47 @@ +[build-system] +requires = ["scikit-build-core", "pybind11"] +build-backend = "scikit_build_core.build" + + +[project] +name = "numbits" +version = "0.0.3" +description="Pack and unpack 1, 2 and 4 bit data to/from 8-bit numpy arrays." +readme = "README.md" +license = {text = "MIT"} +authors = [{ name = "Danny Price", email = "dancpr@berkeley.edu" }] +requires-python = ">=3.8" +dependencies = ["numpy"] +classifiers = [ + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Scientific/Engineering :: Astronomy", +] + +[project.urls] +Github = "https://github.com/telegraphic/numbits" + + +[project.optional-dependencies] +test = ["pytest"] + + +[tool.scikit-build] +cmake.build-type = "Release" +wheel.expand-macos-universal-tags = true +cmake.verbose = true +logging.level = "INFO" + + +[tool.cibuildwheel] +test-command = "python -m pytest {project}/tests -v" +test-extras = ["test"] +test-skip = ["pp* *-musllinux* *-manylinux_i686", "*universal2:arm64"] +build-verbosity = 1 + + diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 0792c86..0000000 --- a/setup.cfg +++ /dev/null @@ -1,2 +0,0 @@ -[metadata] -license_files = LICENSE \ No newline at end of file diff --git a/setup.py b/setup.py deleted file mode 100644 index c7da65d..0000000 --- a/setup.py +++ /dev/null @@ -1,167 +0,0 @@ -# type: ignore - -import os -import sys -import pathlib -import setuptools - -from setuptools.command.build_ext import build_ext -from setuptools import setup, find_packages, Extension - - -__version__ = "0.0.2" - - -class get_pybind_include(object): - - """ - Helper class to determine the pybind11 include path - The purpose of this class is to postpone importing pybind11 - until it is actually installed, so that the ``get_include()`` - method can be invoked. - """ - - def __init__(self, user=False): - self.user = user - - def __str__(self): - import pybind11 - - return pybind11.get_include(self.user) - - -ext_modules = [ - Extension( - "numbits", - sorted(["src/numbits.cpp"]), - include_dirs=[ - get_pybind_include(), - get_pybind_include(user=True), - ], - language="c++", - ), -] - - -# cf http://bugs.python.org/issue26689 -def has_flag( - compiler, - flagname, -) -> bool: - - """ - Return a boolean indicating whether a flag name is supported on - the specified compiler. - """ - - import os - import tempfile - - with tempfile.NamedTemporaryFile( - "w", - suffix=".cpp", - delete=False, - ) as f: - f.write("int main (int argc, char **argv) { return 0; }") - fname = f.name - try: - compiler.compile([fname], extra_postargs=[flagname]) - except setuptools.distutils.errors.CompileError: - return False - finally: - try: - os.remove(fname) - except OSError: - pass - return True - - -def cpp_flag(compiler): - - """ - Return the -std=c++[11/14/17] compiler flag. - The newer version is prefered over c++11 (when it is available). - """ - - flags = ["-std=c++17", "-std=c++14", "-std=c++11"] - for flag in flags: - if has_flag(compiler, flag): - return flag - raise RuntimeError("Unsupported compiler -- at least C++11 support is needed!") - - -class BuildExt(build_ext): - - """ - A custom build extension for adding compiler-specific options. - """ - - c_opts = { - "msvc": ["/EHsc"], - "unix": ["-O3", "-march=native", "-ffast-math"], - } - l_opts = { - "msvc": [], - "unix": [], - } - - if sys.platform == "darwin": - darwin_opts = ["-stdlib=libc++", "-mmacosx-version-min=10.7"] - c_opts["unix"] += darwin_opts - l_opts["unix"] += darwin_opts - - def build_extensions(self) -> None: - ct = self.compiler.compiler_type - opts = self.c_opts.get(ct, []) - link_opts = self.l_opts.get(ct, []) - if ct == "unix": - opts.append(cpp_flag(self.compiler)) - if has_flag(self.compiler, "-fvisibility=hidden"): - opts.append("-fvisibility=hidden") - - for ext in self.extensions: - ext.define_macros = [ - ("VERSION_INFO", '"{}"'.format(self.distribution.get_version())) - ] - ext.extra_compile_args = opts - ext.extra_link_args = link_opts - build_ext.build_extensions(self) - - -here = pathlib.Path(__file__).parent.resolve() -long_description = (here / "README.md").read_text(encoding="utf-8") -install_requires = [] -setup_requires = ["pybind11>=2.5.0"] - - -setup( - name="numbits", - version=__version__, - description="Pack and unpack 1, 2 and 4 bit data to/from 8-bit numpy arrays.", - long_description=long_description, - long_description_content_type="text/markdown", - url="https://github.com/telegraphic/numbits", - author="Danny Price", - author_email="dancpr@berkeley.edu", - classifiers=[ - "License :: OSI Approved :: MIT License", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Topic :: Scientific/Engineering :: Astronomy", - ], - package_dir={"": "src"}, - packages=find_packages(where="src"), - install_package_data=True, - python_requires=">=3.5, <4", - setup_requires=setup_requires, - install_requires=install_requires, - project_urls={ - "Source": "https://github.com/telegraphic/numbits", - "Bug Reports": "https://github.com/telegraphic/numbits/issues", - }, - ext_modules=ext_modules, - cmdclass={"build_ext": BuildExt}, - zip_safe=False, -) diff --git a/src/numbits.cpp b/src/numbits.cpp index 06d29cf..81bc9e1 100644 --- a/src/numbits.cpp +++ b/src/numbits.cpp @@ -1,7 +1,15 @@ -#include -#include -#include -#include +#include +#include +#include +#include + +#include +#include +#ifdef USE_OPENMP +#include +#endif + +namespace py = pybind11; #define HI4BITS 240 #define LO4BITS 15 @@ -10,134 +18,392 @@ #define LOMED2BITS 12 #define LO2BITS 3 -#include -#include -namespace py = pybind11; - /*----------------------------------------------------------------------------*/ +// Lookup table for bit unpacking +template struct unpack_lookup_table { + static constexpr size_t Size = 256; + static constexpr size_t Elements = 8 / NBits; + alignas(64) uint8_t data[Size][Elements]{}; // 256 * 8/NBits bytes -/* -Function to unpack 1,2 and 4 bit data -data is unpacked into an empty buffer -NOTE: Only unpacks big endian bit ordering -*/ -py::array_t unpack(py::array_t inarray, int nbits) -{ - // Setup input/output buffers. - py::buffer_info inbuf = inarray.request(); - int nbytes = inbuf.size; - - auto outarray = py::array_t(inbuf.size * 8 / nbits); - py::buffer_info outbuf = outarray.request(); - - uint8_t *indata = (uint8_t *)inbuf.ptr; - uint8_t *outdata = (uint8_t *)outbuf.ptr; - - int ii, jj; - switch (nbits) - { - case 1: - for (ii = 0; ii < nbytes; ii++) - { - for (jj = 0; jj < 8; jj++) - { - outdata[(ii * 8) + (7 - jj)] = (indata[ii] >> jj) & 1; + constexpr unpack_lookup_table() { + for (size_t ii = 0; ii < Size; ii++) { + for (size_t jj = 0; jj < Elements; jj++) { + if constexpr (BigEndian) { + data[ii][Elements - 1 - jj] = + (ii >> (jj * NBits)) & ((1 << NBits) - 1); + } else { + data[ii][jj] = (ii >> (jj * NBits)) & ((1 << NBits) - 1); + } } } - break; - case 2: - for (ii = 0; ii < nbytes; ii++) - { - outdata[(ii * 4) + 3] = indata[ii] & LO2BITS; - outdata[(ii * 4) + 2] = (indata[ii] & LOMED2BITS) >> 2; - outdata[(ii * 4) + 1] = (indata[ii] & UPMED2BITS) >> 4; - outdata[(ii * 4) + 0] = (indata[ii] & HI2BITS) >> 6; + } +}; + +// Compile-time lookup table initialization +constexpr unpack_lookup_table<1, false> unpack_lookup_table_1bit_little{}; +constexpr unpack_lookup_table<1, true> unpack_lookup_table_1bit_big{}; +constexpr unpack_lookup_table<2, false> unpack_lookup_table_2bit_little{}; +constexpr unpack_lookup_table<2, true> unpack_lookup_table_2bit_big{}; +constexpr unpack_lookup_table<4, false> unpack_lookup_table_4bit_little{}; +constexpr unpack_lookup_table<4, true> unpack_lookup_table_4bit_big{}; + +template +void unpack_1bit_lookup(const uint8_t *inbuffer, uint8_t *outbuffer, + size_t nbytes) { +#ifdef USE_OPENMP +#pragma omp parallel for if (parallel) +#endif + for (size_t ii = 0; ii < nbytes; ii++) { + if constexpr (BigEndian) { + std::copy(&unpack_lookup_table_1bit_big.data[inbuffer[ii]][0], + &unpack_lookup_table_1bit_big.data[inbuffer[ii]][8], + &outbuffer[ii * 8]); + } else { + std::copy(&unpack_lookup_table_1bit_little.data[inbuffer[ii]][0], + &unpack_lookup_table_1bit_little.data[inbuffer[ii]][8], + &outbuffer[ii * 8]); } - break; - case 4: - for (ii = 0; ii < nbytes; ii++) - { - outdata[(ii * 2) + 1] = indata[ii] & LO4BITS; - outdata[(ii * 2) + 0] = (indata[ii] & HI4BITS) >> 4; + } +} + +template +void unpack_2bit_lookup(const uint8_t *inbuffer, uint8_t *outbuffer, + size_t nbytes) { +#ifdef USE_OPENMP +#pragma omp parallel for if (parallel) +#endif + for (size_t ii = 0; ii < nbytes; ii++) { + if constexpr (BigEndian) { + std::copy(&unpack_lookup_table_2bit_big.data[inbuffer[ii]][0], + &unpack_lookup_table_2bit_big.data[inbuffer[ii]][4], + &outbuffer[ii * 4]); + } else { + std::copy(&unpack_lookup_table_2bit_little.data[inbuffer[ii]][0], + &unpack_lookup_table_2bit_little.data[inbuffer[ii]][4], + &outbuffer[ii * 4]); } - break; } - return outarray; } -/* -Function to pack bit data into an empty buffer -*/ -py::array_t pack(py::array_t inarray, int nbits) -{ - // Setup input/output buffers. - py::buffer_info inbuf = inarray.request(); - int nbytes = inbuf.size; - - auto outarray = py::array_t(inbuf.size * nbits / 8); - py::buffer_info outbuf = outarray.request(); - - uint8_t *indata = (uint8_t *)inbuf.ptr; - uint8_t *outdata = (uint8_t *)outbuf.ptr; - - int ii, pos; - int bitfact = 8 / nbits; - unsigned char val; - - switch (nbits) - { - case 1: - for (ii = 0; ii < nbytes / bitfact; ii++) - { - pos = ii * 8; - val = (indata[pos + 0] << 7) | - (indata[pos + 1] << 6) | - (indata[pos + 2] << 5) | - (indata[pos + 3] << 4) | - (indata[pos + 4] << 3) | - (indata[pos + 5] << 2) | - (indata[pos + 6] << 1) | - indata[pos + 7]; - outdata[ii] = val; +template +void unpack_4bit_lookup(const uint8_t *inbuffer, uint8_t *outbuffer, + size_t nbytes) { +#ifdef USE_OPENMP +#pragma omp parallel for if (parallel) +#endif + for (size_t ii = 0; ii < nbytes; ii++) { + if constexpr (BigEndian) { + std::copy(&unpack_lookup_table_4bit_big.data[inbuffer[ii]][0], + &unpack_lookup_table_4bit_big.data[inbuffer[ii]][2], + &outbuffer[ii * 2]); + } else { + std::copy(&unpack_lookup_table_4bit_little.data[inbuffer[ii]][0], + &unpack_lookup_table_4bit_little.data[inbuffer[ii]][2], + &outbuffer[ii * 2]); } - break; - case 2: - for (ii = 0; ii < nbytes / bitfact; ii++) - { - pos = ii * 4; - val = (indata[pos] << 6) | - (indata[pos + 1] << 4) | - (indata[pos + 2] << 2) | - indata[pos + 3]; - outdata[ii] = val; + } +} + +template +void unpack_1bit(const uint8_t *inbuffer, uint8_t *outbuffer, size_t nbytes) { +#ifdef USE_OPENMP +#pragma omp parallel for if (parallel) +#endif + for (size_t ii = 0; ii < nbytes; ii++) { + for (size_t jj = 0; jj < 8; jj++) { + if constexpr (bigEndian) { + outbuffer[(ii << 3) + (7 - jj)] = (inbuffer[ii] >> jj) & 1; + } else { + outbuffer[(ii << 3) + jj] = (inbuffer[ii] >> jj) & 1; + } + } + } +} + +template +void unpack_2bit(const uint8_t *inbuffer, uint8_t *outbuffer, size_t nbytes) { +#ifdef USE_OPENMP +#pragma omp parallel for if (parallel) +#endif + for (size_t ii = 0; ii < nbytes; ii++) { + if constexpr (bigEndian) { + outbuffer[(ii << 2) + 3] = inbuffer[ii] & LO2BITS; + outbuffer[(ii << 2) + 2] = (inbuffer[ii] & LOMED2BITS) >> 2; + outbuffer[(ii << 2) + 1] = (inbuffer[ii] & UPMED2BITS) >> 4; + outbuffer[(ii << 2) + 0] = (inbuffer[ii] & HI2BITS) >> 6; + } else { + outbuffer[(ii << 2) + 0] = inbuffer[ii] & LO2BITS; + outbuffer[(ii << 2) + 1] = (inbuffer[ii] & LOMED2BITS) >> 2; + outbuffer[(ii << 2) + 2] = (inbuffer[ii] & UPMED2BITS) >> 4; + outbuffer[(ii << 2) + 3] = (inbuffer[ii] & HI2BITS) >> 6; } - break; - case 4: - for (ii = 0; ii < nbytes / bitfact; ii++) - { - pos = ii * 2; - val = (indata[pos] << 4) | indata[pos + 1]; - outdata[ii] = val; + } +} + +template +void unpack_4bit(const uint8_t *inbuffer, uint8_t *outbuffer, size_t nbytes) { +#ifdef USE_OPENMP +#pragma omp parallel for if (parallel) +#endif + for (size_t ii = 0; ii < nbytes; ii++) { + if constexpr (bigEndian) { + outbuffer[(ii << 1) + 1] = inbuffer[ii] & LO4BITS; + outbuffer[(ii << 1) + 0] = (inbuffer[ii] & HI4BITS) >> 4; + } else { + outbuffer[(ii << 1) + 0] = inbuffer[ii] & LO4BITS; + outbuffer[(ii << 1) + 1] = (inbuffer[ii] & HI4BITS) >> 4; + } + } +} + +template +void pack_1bit(const uint8_t *inbuffer, uint8_t *outbuffer, size_t nbytes) { + size_t pos; +#ifdef USE_OPENMP +#pragma omp parallel for if (parallel) +#endif + for (size_t ii = 0; ii < nbytes / 8; ii++) { + pos = ii * 8; + if constexpr (bigEndian) { + outbuffer[ii] = (inbuffer[pos + 0] << 7) | (inbuffer[pos + 1] << 6) | + (inbuffer[pos + 2] << 5) | (inbuffer[pos + 3] << 4) | + (inbuffer[pos + 4] << 3) | (inbuffer[pos + 5] << 2) | + (inbuffer[pos + 6] << 1) | inbuffer[pos + 7]; + } else { + outbuffer[ii] = inbuffer[pos + 0] | (inbuffer[pos + 1] << 1) | + (inbuffer[pos + 2] << 2) | (inbuffer[pos + 3] << 3) | + (inbuffer[pos + 4] << 4) | (inbuffer[pos + 5] << 5) | + (inbuffer[pos + 6] << 6) | (inbuffer[pos + 7] << 7); + } + } +} + +template +void pack_2bit(const uint8_t *inbuffer, uint8_t *outbuffer, size_t nbytes) { + size_t pos; +#ifdef USE_OPENMP +#pragma omp parallel for if (parallel) +#endif + for (size_t ii = 0; ii < nbytes / 4; ii++) { + pos = ii * 4; + if constexpr (bigEndian) { + outbuffer[ii] = (inbuffer[pos + 0] << 6) | (inbuffer[pos + 1] << 4) | + (inbuffer[pos + 2] << 2) | inbuffer[pos + 3]; + } else { + outbuffer[ii] = inbuffer[pos + 0] | (inbuffer[pos + 1] << 2) | + (inbuffer[pos + 2] << 4) | (inbuffer[pos + 3] << 6); + } + } +} + +template +void pack_4bit(const uint8_t *inbuffer, uint8_t *outbuffer, size_t nbytes) { + size_t pos; +#ifdef USE_OPENMP +#pragma omp parallel for if (parallel) +#endif + for (size_t ii = 0; ii < nbytes / 2; ii++) { + pos = ii * 2; + if constexpr (bigEndian) { + outbuffer[ii] = (inbuffer[pos] << 4) | inbuffer[pos + 1]; + } else { + outbuffer[ii] = inbuffer[pos] | (inbuffer[pos + 1] << 4); } - break; } +} + +using PackUnpackFunc = void (*)(const uint8_t *, uint8_t *, size_t); + +constexpr std::array, 2>, 3> + unpackLookupDispatcher = {{{{ + {unpack_1bit_lookup, + unpack_1bit_lookup}, // little + {unpack_1bit_lookup, + unpack_1bit_lookup} // big + }}, + {{ + {unpack_2bit_lookup, + unpack_2bit_lookup}, // little + {unpack_2bit_lookup, + unpack_2bit_lookup} // big + }}, + {{ + {unpack_4bit_lookup, + unpack_4bit_lookup}, // little + {unpack_4bit_lookup, + unpack_4bit_lookup} // big + }}}}; + +constexpr std::array, 2>, 3> + unpackDispatcher = { + {{{ + {unpack_1bit, unpack_1bit}, // little + {unpack_1bit, unpack_1bit} // big + }}, + {{ + {unpack_2bit, unpack_2bit}, // little + {unpack_2bit, unpack_2bit} // big + }}, + {{ + {unpack_4bit, unpack_4bit}, // little + {unpack_4bit, unpack_4bit} // big + }}}}; + +constexpr std::array, 2>, 3> + packDispatcher = { + {{{ + {pack_1bit, pack_1bit}, // little + {pack_1bit, pack_1bit} // big + }}, + {{ + {pack_2bit, pack_2bit}, // little + {pack_2bit, pack_2bit} // big + }}, + {{ + {pack_4bit, pack_4bit}, // little + {pack_4bit, pack_4bit} // big + }}}}; + +size_t get_bitorder_index(const std::string &bitorder) { + if (bitorder.empty() || (bitorder[0] != 'l' && bitorder[0] != 'b')) { + throw std::invalid_argument( + "Invalid bitorder. Must begin with 'l' or 'b'."); + } + return (bitorder[0] == 'b') ? 1 : 0; +} + +/* +Function to unpack 1, 2 and 4 bit data into an 8-bit array. +*/ +py::array_t +unpack_lookup(const py::array_t &inarray, + size_t nbits, const std::string &bitorder, + bool parallel = false) { + if (nbits != 1 && nbits != 2 && nbits != 4) { + throw std::invalid_argument( + "Invalid number of bits. Supported values are 1, 2, and 4."); + } + size_t bitorder_idx = get_bitorder_index(bitorder); + size_t nbits_idx = nbits >> 1; + size_t nbytes = inarray.size(); + auto outarray = py::array_t(nbytes * 8 / nbits); + + PackUnpackFunc unpackFunc = + unpackLookupDispatcher[nbits_idx][bitorder_idx][parallel ? 1 : 0]; + unpackFunc(inarray.data(), outarray.mutable_data(), nbytes); + return outarray; } -PYBIND11_MODULE(numbits, m) -{ - // Optional module docstring. - m.doc() = "Pack and unpack 1, 2 and 4 bit data"; - - m.def("unpack", - &unpack, - py::arg("array"), - py::arg("nbits"), - "Unpack 1, 2 and 4 bit data into an 8-bit numpy array."); - - m.def("pack", - &pack, - py::arg("array"), - py::arg("nbits"), - "Pack 1, 2 and 4 bit data into an 8-bit numpy array."); +py::array_t +unpack(const py::array_t &inarray, size_t nbits, + const std::string &bitorder, bool parallel = false) { + if (nbits != 1 && nbits != 2 && nbits != 4) { + throw std::invalid_argument( + "Invalid number of bits. Supported values are 1, 2, and 4."); + } + size_t bitorder_idx = get_bitorder_index(bitorder); + size_t nbits_idx = nbits >> 1; + size_t nbytes = inarray.size(); + auto outarray = py::array_t(nbytes * 8 / nbits); + + PackUnpackFunc unpackFunc = + unpackDispatcher[nbits_idx][bitorder_idx][parallel ? 1 : 0]; + unpackFunc(inarray.data(), outarray.mutable_data(), nbytes); + + return outarray; +} + +void unpack_buffered(const py::array_t &inarray, + py::array_t &outarray, + size_t nbits, const std::string &bitorder, + bool parallel = false) { + if (nbits != 1 && nbits != 2 && nbits != 4) { + throw std::invalid_argument( + "Invalid number of bits. Supported values are 1, 2, and 4."); + } + size_t bitorder_idx = get_bitorder_index(bitorder); + size_t nbits_idx = nbits >> 1; + size_t nbytes = inarray.size(); + size_t outsize = outarray.size(); + if (outsize != nbytes * 8 / nbits) { + throw std::invalid_argument("Output buffer size is not correct."); + } + + PackUnpackFunc unpackFunc = + unpackDispatcher[nbits_idx][bitorder_idx][parallel ? 1 : 0]; + unpackFunc(inarray.data(), outarray.mutable_data(), nbytes); +} + +/* +Function to pack 1, 2 and 4 bit data into an 8-bit array. +*/ +py::array_t +pack(const py::array_t &inarray, size_t nbits, + const std::string &bitorder, bool parallel = false) { + if (nbits != 1 && nbits != 2 && nbits != 4) { + throw std::invalid_argument( + "Invalid number of bits. Supported values are 1, 2, and 4."); + } + size_t bitorder_idx = get_bitorder_index(bitorder); + size_t nbits_idx = nbits >> 1; + size_t nbytes = inarray.size(); + auto outarray = py::array_t(nbytes * nbits / 8); + + PackUnpackFunc packFunc = + packDispatcher[nbits_idx][bitorder_idx][parallel ? 1 : 0]; + packFunc(inarray.data(), outarray.mutable_data(), nbytes); + + return outarray; +} + +void pack_buffered(const py::array_t &inarray, + py::array_t &outarray, + size_t nbits, const std::string &bitorder, + bool parallel = false) { + if (nbits != 1 && nbits != 2 && nbits != 4) { + throw std::invalid_argument( + "Invalid number of bits. Supported values are 1, 2, and 4."); + } + size_t bitorder_idx = get_bitorder_index(bitorder); + size_t nbits_idx = nbits >> 1; + size_t nbytes = inarray.size(); + size_t outsize = outarray.size(); + if (outsize != nbytes * nbits / 8) { + throw std::invalid_argument("Output buffer size is not correct."); + } + + PackUnpackFunc packFunc = + packDispatcher[nbits_idx][bitorder_idx][parallel ? 1 : 0]; + packFunc(inarray.data(), outarray.mutable_data(), nbytes); +} + +PYBIND11_MODULE(numbits, m) { + m.doc() = "Pack and unpack 1, 2 and 4 bit data into/from an 8-bit array."; + + m.def( + "unpack_lookup", &unpack_lookup, + "Unpack 1, 2 and 4-bit data from an 8-bit numpy array using lookup table", + py::arg("inarray"), py::arg("nbits"), py::arg("bitorder") = "big", + py::arg("parallel") = false); + + m.def("unpack", &unpack, + "Unpack 1, 2 and 4-bit data from an 8-bit numpy array", + py::arg("inarray"), py::arg("nbits"), py::arg("bitorder") = "big", + py::arg("parallel") = false); + + m.def("unpack_buffered", &unpack_buffered, + "Unpack 1, 2 and 4-bit data from an 8-bit numpy array into a " + "pre-allocated buffer", + py::arg("inarray"), py::arg("outarray"), py::arg("nbits"), + py::arg("bitorder") = "big", py::arg("parallel") = false); + + m.def("pack", &pack, "Pack 1, 2 and 4-bit data into an 8-bit numpy array", + py::arg("inarray"), py::arg("nbits"), py::arg("bitorder") = "big", + py::arg("parallel") = false); + + m.def("pack_buffered", &pack_buffered, + "Pack 1, 2 and 4-bit data into an pre-allocated 8-bit numpy array", + py::arg("inarray"), py::arg("outarray"), py::arg("nbits"), + py::arg("bitorder") = "big", py::arg("parallel") = false); } \ No newline at end of file diff --git a/tests/benchmark.py b/tests/benchmark.py new file mode 100644 index 0000000..c5ed59f --- /dev/null +++ b/tests/benchmark.py @@ -0,0 +1,113 @@ +import numpy as np +import perfplot +import click + +import numbits + + +@click.command() +@click.option("--test", default="unpack", help="Choose between 'unpack' and 'pack'") +@click.option("--bitorder", default="big", help="Choose between 'big' and 'little'") +@click.option( + "--nbits", + default=1, + type=click.IntRange(min=1, max=4), + help="Number of bits to pack/unpack", +) +def main(test="unpack", bitorder="big", nbits=1): + if test == "unpack": + kernels = [ + lambda arr, out: numbits.unpack( + arr, nbits, parallel=False, bitorder=bitorder + ), + lambda arr, out: numbits.unpack( + arr, nbits, parallel=True, bitorder=bitorder + ), + lambda arr, out: numbits.unpack_lookup( + arr, nbits, parallel=False, bitorder=bitorder + ), + lambda arr, out: numbits.unpack_lookup( + arr, nbits, parallel=True, bitorder=bitorder + ), + lambda arr, out: numbits.unpack_buffered( + arr, out, nbits, parallel=False, bitorder=bitorder + ), + lambda arr, out: numbits.unpack_buffered( + arr, out, nbits, parallel=True, bitorder=bitorder + ), + ] + labels = [ + "numbits", + "numbits_parallel", + "numbits_lookup", + "numbits_lookup_parallel", + "numbits_buffered", + "numbits_buffered_parallel", + ] + if nbits == 1: + kernels.insert(0, lambda arr, out: np.unpackbits(arr, bitorder=bitorder)) + labels.insert(0, "numpy") + bench_stat = perfplot.bench( + setup=lambda n: ( + np.random.randint(256, size=n, dtype="uint8"), + np.zeros(n * 8 // nbits, dtype="uint8"), + ), + n_range=[2**k for k in range(0, 24)], + kernels=kernels, + labels=labels, + xlabel="n", + title=f"Unpack {nbits} bit ({bitorder} endian)", + target_time_per_measurement=1, + equality_check=None, + ) + bench_stat.save( + f"benchmark_unpack_{nbits}bit_{bitorder}.png", + transparent=False, + bbox_inches="tight", + ) + else: + kernels = [ + lambda arr, out: numbits.pack( + arr, nbits, parallel=False, bitorder=bitorder + ), + lambda arr, out: numbits.pack(arr, nbits, parallel=True, bitorder=bitorder), + lambda arr, out: numbits.pack_buffered( + arr, out, nbits, parallel=False, bitorder=bitorder + ), + lambda arr, out: numbits.pack_buffered( + arr, out, nbits, parallel=True, bitorder=bitorder + ), + ] + labels = [ + "numbits", + "numbits_parallel", + "numbits_buffered", + "numbits_buffered_parallel", + ] + if nbits == 1: + kernels.insert(0, lambda arr, out: np.packbits(arr, bitorder=bitorder)) + labels.insert(0, "numpy") + + bench_stat = perfplot.bench( + setup=lambda n: ( + np.random.randint((1 << nbits) - 1, size=n, dtype="uint8"), + np.zeros(n * nbits // 8, dtype="uint8"), + ), + n_range=[2**k for k in range(3, 24)], + kernels=kernels, + labels=labels, + xlabel="n", + title=f"Pack {nbits} bit ({bitorder} endian)", + target_time_per_measurement=1, + equality_check=None, + ) + bench_stat.save( + f"benchmark_pack_{nbits}bit_{bitorder}.png", + transparent=False, + bbox_inches="tight", + ) + bench_stat.show() + + +if __name__ == "__main__": + main() diff --git a/tests/benchmarks/benchmark_pack_1bit_big.png b/tests/benchmarks/benchmark_pack_1bit_big.png new file mode 100644 index 0000000..94ac062 Binary files /dev/null and b/tests/benchmarks/benchmark_pack_1bit_big.png differ diff --git a/tests/benchmarks/benchmark_pack_1bit_little.png b/tests/benchmarks/benchmark_pack_1bit_little.png new file mode 100644 index 0000000..4aeb27a Binary files /dev/null and b/tests/benchmarks/benchmark_pack_1bit_little.png differ diff --git a/tests/benchmarks/benchmark_pack_2bit_big.png b/tests/benchmarks/benchmark_pack_2bit_big.png new file mode 100644 index 0000000..b744c2a Binary files /dev/null and b/tests/benchmarks/benchmark_pack_2bit_big.png differ diff --git a/tests/benchmarks/benchmark_pack_4bit_big.png b/tests/benchmarks/benchmark_pack_4bit_big.png new file mode 100644 index 0000000..c0cfac6 Binary files /dev/null and b/tests/benchmarks/benchmark_pack_4bit_big.png differ diff --git a/tests/benchmarks/benchmark_unpack_1bit_big.png b/tests/benchmarks/benchmark_unpack_1bit_big.png new file mode 100644 index 0000000..d9557e1 Binary files /dev/null and b/tests/benchmarks/benchmark_unpack_1bit_big.png differ diff --git a/tests/benchmarks/benchmark_unpack_1bit_little.png b/tests/benchmarks/benchmark_unpack_1bit_little.png new file mode 100644 index 0000000..948418f Binary files /dev/null and b/tests/benchmarks/benchmark_unpack_1bit_little.png differ diff --git a/tests/benchmarks/benchmark_unpack_2bit_big.png b/tests/benchmarks/benchmark_unpack_2bit_big.png new file mode 100644 index 0000000..459a63a Binary files /dev/null and b/tests/benchmarks/benchmark_unpack_2bit_big.png differ diff --git a/tests/benchmarks/benchmark_unpack_2bit_little.png b/tests/benchmarks/benchmark_unpack_2bit_little.png new file mode 100644 index 0000000..3d5404e Binary files /dev/null and b/tests/benchmarks/benchmark_unpack_2bit_little.png differ diff --git a/tests/benchmarks/benchmark_unpack_4bit_big.png b/tests/benchmarks/benchmark_unpack_4bit_big.png new file mode 100644 index 0000000..c7120ed Binary files /dev/null and b/tests/benchmarks/benchmark_unpack_4bit_big.png differ diff --git a/tests/benchmarks/benchmark_unpack_4bit_little.png b/tests/benchmarks/benchmark_unpack_4bit_little.png new file mode 100644 index 0000000..8f1c042 Binary files /dev/null and b/tests/benchmarks/benchmark_unpack_4bit_little.png differ diff --git a/tests/test_numbits.py b/tests/test_numbits.py index 2ca3c77..05fb82d 100644 --- a/tests/test_numbits.py +++ b/tests/test_numbits.py @@ -1,47 +1,186 @@ -import numbits import pytest +import numbits import numpy as np + +def unpack_bits(arr: np.ndarray, nbits: int, bitorder: str = "big") -> np.ndarray: + assert arr.dtype == np.uint8 + assert nbits in {1, 2, 4} + + mask = (1 << nbits) - 1 + shifts = np.arange(0, 8, nbits) + if bitorder == "big": + shifts = shifts[::-1] + unpacked = (arr[..., np.newaxis] >> shifts) & mask + return unpacked.reshape(-1).astype(np.uint8) + + +def pack_bits(arr: np.ndarray, nbits: int, bitorder: str = "big") -> np.ndarray: + assert arr.dtype == np.uint8 + assert nbits in {1, 2, 4} + + packed = np.zeros(arr.size * nbits // 8, dtype=np.uint8) + shifts = np.arange(0, 8, nbits) + if bitorder == "big": + shifts = shifts[::-1] + for ishift, shift in enumerate(shifts): + packed |= arr[ishift :: 8 // nbits] << shift + + return packed + + class Testnumbits(object): - def test_unpackbits(self): - input_arr = np.array([0, 2, 7, 23], dtype=np.uint8) - expected_bit1 = np.unpackbits(input_arr, bitorder="big") - expected_bit2 = np.array( - [0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 1, 3, 0, 1, 1, 3], dtype=np.uint8 + @pytest.mark.parametrize("nbits", [1, 2, 4]) + @pytest.mark.parametrize("bitorder", ["big", "little"]) + @pytest.mark.parametrize("parallel", [False, True]) + @pytest.mark.parametrize("funcn", ["unpack", "unpack_lookup"]) + def test_unpack(self, nbits, bitorder, parallel, funcn): + rng = np.random.default_rng() + arr = rng.integers(255, size=2**10, dtype=np.uint8) + expected = unpack_bits(arr, nbits, bitorder) + output = getattr(numbits, funcn)( + arr, nbits=nbits, bitorder=bitorder, parallel=parallel ) - expected_bit4 = np.array([0, 0, 0, 2, 0, 7, 1, 7], dtype=np.uint8) - np.testing.assert_array_equal(expected_bit1, numbits.unpack(input_arr, nbits=1)) - np.testing.assert_array_equal(expected_bit2, numbits.unpack(input_arr, nbits=2)) - np.testing.assert_array_equal(expected_bit4, numbits.unpack(input_arr, nbits=4)) + np.testing.assert_array_equal(output, expected, strict=True) + + @pytest.mark.parametrize("bitorder", ["big", "little"]) + @pytest.mark.parametrize("parallel", [False, True]) + @pytest.mark.parametrize("funcn", ["unpack", "unpack_lookup"]) + def test_unpack_invalid(self, bitorder, parallel, funcn): + arr = np.arange(255, dtype=np.uint8) + with pytest.raises(ValueError): + getattr(numbits, funcn)(arr, nbits=3, bitorder=bitorder, parallel=parallel) @pytest.mark.parametrize("nbits", [1, 2, 4]) - def test_unpackbits_empty(self, nbits): - input_arr = np.empty((0,), dtype=np.uint8) - output = numbits.unpack(input_arr, nbits=nbits) - np.testing.assert_array_equal(input_arr, output) + @pytest.mark.parametrize("bitorder", ["big", "little"]) + @pytest.mark.parametrize("parallel", [False, True]) + @pytest.mark.parametrize("funcn", ["unpack", "unpack_lookup"]) + def test_unpack_empty(self, nbits, bitorder, parallel, funcn): + arr = np.empty((0,), dtype=np.uint8) + output = getattr(numbits, funcn)( + arr, nbits=nbits, bitorder=bitorder, parallel=parallel + ) + np.testing.assert_array_equal(output, arr, strict=True) @pytest.mark.parametrize("nbits", [1, 2, 4]) - def test_packbits(self, nbits): - input_arr = np.arange(255, dtype=np.uint8) - output = numbits.pack(numbits.unpack(input_arr, nbits=nbits), nbits=nbits) - np.testing.assert_array_equal(input_arr, output) + @pytest.mark.parametrize("bitorder", ["big", "little"]) + @pytest.mark.parametrize("parallel", [False, True]) + def test_unpack_buffered(self, nbits, bitorder, parallel): + rng = np.random.default_rng() + arr = rng.integers(255, size=2**10, dtype=np.uint8) + expected = unpack_bits(arr, nbits, bitorder) + output = np.zeros(arr.size * 8 // nbits, dtype=np.uint8) + numbits.unpack_buffered( + arr, output, nbits=nbits, bitorder=bitorder, parallel=parallel + ) + np.testing.assert_array_equal(output, expected, strict=True) -def test_numbits(): + @pytest.mark.parametrize("bitorder", ["big", "little"]) + @pytest.mark.parametrize("parallel", [False, True]) + def test_unpack_buffered_invalid(self, bitorder, parallel): + arr = np.arange(255, dtype=np.uint8) + output = np.zeros(arr.size * 8 // 3, dtype=np.uint8) + with pytest.raises(ValueError): + numbits.unpack_buffered( + arr, output, nbits=3, bitorder=bitorder, parallel=parallel + ) - """ - Test the pack and unpack functions from the numbits package. - """ + @pytest.mark.parametrize("nbits", [1, 2, 4]) + @pytest.mark.parametrize("bitorder", ["big", "little"]) + @pytest.mark.parametrize("parallel", [False, True]) + def test_unpack_buffered_empty(self, nbits, bitorder, parallel): + arr = np.empty((0,), dtype=np.uint8) + output = np.empty((0,), dtype=np.uint8) + numbits.unpack_buffered( + arr, output, nbits=nbits, bitorder=bitorder, parallel=parallel + ) + np.testing.assert_array_equal(output, arr, strict=True) - a = np.arange(255, dtype="uint8") + @pytest.mark.parametrize("nbits", [1, 2, 4]) + @pytest.mark.parametrize("bitorder", ["big", "little"]) + @pytest.mark.parametrize("parallel", [False, True]) + def test_pack(self, nbits, bitorder, parallel): + rng = np.random.default_rng() + arr = rng.integers((1 << nbits) - 1, size=2**10, dtype=np.uint8) + expected = pack_bits(arr, nbits, bitorder) + output = numbits.pack(arr, nbits=nbits, bitorder=bitorder, parallel=parallel) + np.testing.assert_array_equal(output, expected, strict=True) + + @pytest.mark.parametrize("bitorder", ["big", "little"]) + @pytest.mark.parametrize("parallel", [False, True]) + def test_pack_invalid(self, bitorder, parallel): + arr = np.arange((1 << 3) - 1, dtype=np.uint8) + with pytest.raises(ValueError): + numbits.pack(arr, nbits=3, bitorder=bitorder, parallel=parallel) + + @pytest.mark.parametrize("nbits", [1, 2, 4]) + @pytest.mark.parametrize("bitorder", ["big", "little"]) + @pytest.mark.parametrize("parallel", [False, True]) + def test_pack_empty(self, nbits, bitorder, parallel): + arr = np.empty((0,), dtype=np.uint8) + output = numbits.pack(arr, nbits=nbits, bitorder=bitorder, parallel=parallel) + np.testing.assert_array_equal(output, arr, strict=True) - b1 = numbits.unpack(a, nbits=1) - c1 = numbits.pack(b1, nbits=1) - np.allclose(a, c1) + @pytest.mark.parametrize("nbits", [1, 2, 4]) + @pytest.mark.parametrize("bitorder", ["big", "little"]) + @pytest.mark.parametrize("parallel", [False, True]) + def test_pack_buffered(self, nbits, bitorder, parallel): + rng = np.random.default_rng() + arr = rng.integers((1 << nbits) - 1, size=2**10, dtype=np.uint8) + expected = pack_bits(arr, nbits, bitorder) + output = np.zeros(arr.size * nbits // 8, dtype=np.uint8) + numbits.pack_buffered( + arr, output, nbits=nbits, bitorder=bitorder, parallel=parallel + ) + np.testing.assert_array_equal(output, expected, strict=True) - b2 = numbits.unpack(a, nbits=2) - c2 = numbits.pack(b2, nbits=2) - np.allclose(a, c2) + @pytest.mark.parametrize("bitorder", ["big", "little"]) + @pytest.mark.parametrize("parallel", [False, True]) + def test_pack_buffered_invalid(self, bitorder, parallel): + arr = np.arange((1 << 3) - 1, dtype=np.uint8) + output = np.zeros(arr.size * 3 // 8, dtype=np.uint8) + with pytest.raises(ValueError): + numbits.pack_buffered( + arr, output, nbits=3, bitorder=bitorder, parallel=parallel + ) - b4 = numbits.unpack(a, nbits=4) - c4 = numbits.pack(b4, nbits=4) - np.allclose(a, c4) \ No newline at end of file + @pytest.mark.parametrize("nbits", [1, 2, 4]) + @pytest.mark.parametrize("bitorder", ["big", "little"]) + @pytest.mark.parametrize("parallel", [False, True]) + def test_pack_buffered_empty(self, nbits, bitorder, parallel): + arr = np.empty((0,), dtype=np.uint8) + output = np.empty((0,), dtype=np.uint8) + numbits.pack_buffered( + arr, output, nbits=nbits, bitorder=bitorder, parallel=parallel + ) + np.testing.assert_array_equal(output, arr, strict=True) + + @pytest.mark.parametrize("nbits", [1, 2, 4]) + @pytest.mark.parametrize("bitorder", ["big", "little"]) + @pytest.mark.parametrize("parallel", [False, True]) + def test_pack_unpack(self, nbits, bitorder, parallel): + rng = np.random.default_rng() + arr = rng.integers(255, size=2**10, dtype=np.uint8) + output = numbits.pack( + numbits.unpack(arr, nbits=nbits, bitorder=bitorder, parallel=parallel), + nbits=nbits, + bitorder=bitorder, + parallel=parallel, + ) + np.testing.assert_array_equal(output, arr, strict=True) + + @pytest.mark.parametrize("nbits", [1, 2, 4]) + @pytest.mark.parametrize("bitorder", ["big", "little"]) + @pytest.mark.parametrize("parallel", [False, True]) + def test_pack_unpack_buffered(self, nbits, bitorder, parallel): + rng = np.random.default_rng() + arr = rng.integers(255, size=2**10, dtype=np.uint8) + tmp = np.zeros(arr.size * 8 // nbits, dtype=np.uint8) + numbits.unpack_buffered( + arr, tmp, nbits=nbits, bitorder=bitorder, parallel=parallel + ) + output = np.zeros_like(arr) + numbits.pack_buffered( + tmp, output, nbits=nbits, bitorder=bitorder, parallel=parallel + ) + np.testing.assert_array_equal(output, arr, strict=True)