Skip to content

Commit

Permalink
Merge pull request #94 from paulsengroup/deps/pin-pyarrow
Browse files Browse the repository at this point in the history
Pin pyarrow version and detect ABI incompatibilities at runtime
  • Loading branch information
robomics authored Oct 18, 2024
2 parents 636336c + 4a38f3c commit 28dfed0
Show file tree
Hide file tree
Showing 10 changed files with 83 additions and 190 deletions.
5 changes: 1 addition & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,10 @@ set(HICTK_BUILD_TOOLS OFF)
set(HICTK_ENABLE_FUZZY_TESTING OFF)
set(HICTK_ENABLE_TESTING OFF)
set(HICTK_INSTALL OFF)
set(HICTK_WITH_ARROW_SHARED ON)
set(HICTK_WITH_EIGEN ON)
set(HICTK_ENABLE_GIT_VERSION_TRACKING OFF)

# This must be called before finding hictk, such that hictk will link to the arrow lib shipped with the Python wheel
# Furthermore, we are finding hictk here to avoid name collisions with the ENABLE_* variables defined in the coming lines
find_package(Arrow REQUIRED)
# We are finding hictk here to avoid name collisions with the ENABLE_* variables defined in the coming lines
find_package(hictk REQUIRED)

get_property(BUILDING_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG)
Expand Down
41 changes: 0 additions & 41 deletions cmake/modules/FindArrow.cmake

This file was deleted.

93 changes: 8 additions & 85 deletions cmake/modules/FindPyarrow.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,13 @@
FindPyarrow
---------
Finds pyarrow and the Arrow library that is shipped as part of the pyarrow wheels.
Finds pyarrow library that is shipped as part of the pyarrow wheels.
Imported Targets
^^^^^^^^^^^^^^^^
This module provides the following imported targets, if found:
``Arrow::arrow_shared``
The Arrow shared library
``Arrow::python
The pyarrow library
Expand All @@ -25,11 +23,11 @@ Result Variables
This will define the following variables:
``Pyarrow_FOUND``
True if the system has the Arrow library.
True if the system has the Pyarrow library.
``Pyarrow_VERSION``
The version of the Arrow library which was found.
The version of the Pyarrow library which was found.
``Pyarrow_INCLUDE_DIRS``
Include directories needed to use Arrow.
Include directories needed to use Pyarrow.
Cache Variables
^^^^^^^^^^^^^^^
Expand All @@ -39,26 +37,8 @@ The following cache variables may also be set:
``Pyarrow_LIBRARY``
The path to the Arrow::python library.
``Arrow_LIBRARY``
The path to the Arrow library.
#]=======================================================================]

function(symlink_pyarrow_libs Python_EXECUTABLE)
file(REAL_PATH "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/../../utils/devel/symlink_pyarrow_libs.py" SCRIPT)
execute_process(
COMMAND
"${Python_EXECUTABLE}" "${SCRIPT}"
RESULT_VARIABLE STATUS
)
if(NOT STATUS EQUAL 0)
message(
FATAL_ERROR
"Unable to create symlink to pyarrow libraries. Please make sure that: pyarrow is installed, and that you have write permissions for the Python site-package folder"
)
endif()
endfunction()

find_package(
Python
3.9
Expand All @@ -68,14 +48,11 @@ find_package(
REQUIRED
)

if(TARGET Arrow::arrow_shared AND TARGET Arrow::python)
message(DEBUG "Arrow::arrow_shared and Arrow::python have already been defined")
symlink_pyarrow_libs("${Python_EXECUTABLE}")
if(TARGET Arrow::python)
message(DEBUG "Arrow::python has already been defined")
return()
endif()

set(Arrow_FOUND FALSE)

# Try to import pyarrow
execute_process(
COMMAND
Expand All @@ -102,17 +79,15 @@ if(NOT STATUS EQUAL 0)
return()
endif()

if(Pyarrow_VERSION VERSION_LESS 14.0.0)
message(WARNING "pyarrow version ${Pyarrow_VERSION} is too old. Minimum version required: 14.0.0")
if(Pyarrow_VERSION VERSION_LESS 15.0.0)
message(WARNING "pyarrow version ${Pyarrow_VERSION} is too old. Minimum version required: 15.0.0")
set(Pyarrow_FOUND FALSE)
unset(Pyarrow_VERSION)
return()
endif()

set(Pyarrow_VERSION_STRING "${Pyarrow_VERSION}")

symlink_pyarrow_libs("${Python_EXECUTABLE}")

# Get include dirs
execute_process(
COMMAND
Expand Down Expand Up @@ -154,25 +129,8 @@ find_package_handle_standard_args(
VERSION_VAR Pyarrow_VERSION
)

set(Arrow_FOUND ${Pyarrow_FOUND})
set(Arrow_INCLUDE_DIRS ${Pyarrow_INCLUDE_DIRS})
set(Arrow_LIBRARY_DIRS ${Pyarrow_LIBRARY_DIRS})
set(Arrow_VERSION ${Pyarrow_VERSION})

find_package_handle_standard_args(
Arrow
FOUND_VAR Arrow_FOUND
REQUIRED_VARS
Arrow_INCLUDE_DIRS
Arrow_LIBRARY_DIRS
VERSION_VAR Arrow_VERSION
NAME_MISMATCHED
)

find_library(Pyarrow_LIBRARY arrow_python REQUIRED PATHS ${Pyarrow_LIBRARY_DIRS} NO_DEFAULT_PATH)

find_library(Arrow_LIBRARY arrow REQUIRED PATHS ${Arrow_LIBRARY_DIRS} NO_DEFAULT_PATH)

if(Pyarrow_FOUND AND NOT TARGET Arrow::python)
if(WIN32)
add_library(Arrow::python UNKNOWN IMPORTED)
Expand All @@ -194,43 +152,8 @@ if(Pyarrow_FOUND AND NOT TARGET Arrow::python)
${Python_NumPy_INCLUDE_DIR}
)
target_link_directories(Arrow::python INTERFACE ${Pyarrow_LIBRARY_DIRS})

file(REAL_PATH "${CMAKE_CURRENT_LIST_DIR}/../../utils/devel/symlink_pyarrow_libs.py" SCRIPT)
add_custom_target(
update_arrow_lib_symlinks
BYPRODUCTS
"${Arrow_LIBRARY}"
COMMAND
"${Python_EXECUTABLE}" "${SCRIPT}"
)
unset(SCRIPT)

add_dependencies(Arrow::python update_arrow_lib_symlinks)
endif()

if(Arrow_FOUND AND NOT TARGET Arrow::arrow_shared)
if(WIN32)
add_library(Arrow::arrow_shared UNKNOWN IMPORTED)
else()
add_library(Arrow::arrow_shared SHARED IMPORTED)
endif()
set_target_properties(
Arrow::arrow_shared
PROPERTIES
IMPORTED_LOCATION
"${Arrow_LIBRARY}"
IMPORTED_CONFIGURATION
Release
)
target_include_directories(Arrow::arrow_shared INTERFACE ${Arrow_INCLUDE_DIRS})
target_link_directories(Arrow::arrow_shared INTERFACE ${Arrow_LIBRARY_DIRS})
endif()

mark_as_advanced(
Arrow_VERSION
Arrow_INCLUDE_DIRS
Arrow_LIBRARY_DIRS
)
mark_as_advanced(
Pyarrow_VERSION
Pyarrow_INCLUDE_DIRS
Expand Down
50 changes: 50 additions & 0 deletions conanfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ def _min_cppstd(self):
return 17

def requirements(self):
self.requires("arrow/17.0.0#81be2aa6c49800df8cc163adf4b99e9f")
self.requires("boost/1.86.0#cd839a2082585255010f9e82eea94c7f", force=True)
self.requires("bshoshany-thread-pool/4.1.0#be1802a8768416a6c9b1393cf0ce5e9c")
self.requires("concurrentqueue/1.0.4#1e48e1c712bcfd892087c9c622a51502")
self.requires("eigen/3.4.0#2e192482a8acff96fe34766adca2b24c")
Expand All @@ -57,6 +59,54 @@ def configure(self):
if self.settings.compiler in ["clang", "gcc"]:
self.settings.compiler.libcxx = "libstdc++11"

self.options["arrow"].compute = True
self.options["arrow"].parquet = False
self.options["arrow"].with_boost = True
self.options["arrow"].with_re2 = True
self.options["arrow"].with_thrift = False
self.options["boost"].system_no_deprecated = True
self.options["boost"].asio_no_deprecated = True
self.options["boost"].filesystem_no_deprecated = True
self.options["boost"].filesystem_version = 4
self.options["boost"].zlib = False
self.options["boost"].bzip2 = False
self.options["boost"].lzma = False
self.options["boost"].zstd = False
self.options["boost"].without_atomic = False
self.options["boost"].without_charconv = True
self.options["boost"].without_chrono = True
self.options["boost"].without_cobalt = True
self.options["boost"].without_container = True
self.options["boost"].without_context = True
self.options["boost"].without_contract = True
self.options["boost"].without_coroutine = True
self.options["boost"].without_date_time = True
self.options["boost"].without_exception = True
self.options["boost"].without_fiber = True
self.options["boost"].without_filesystem = False
self.options["boost"].without_graph = True
self.options["boost"].without_graph_parallel = True
self.options["boost"].without_iostreams = True
self.options["boost"].without_json = True
self.options["boost"].without_locale = True
self.options["boost"].without_log = True
self.options["boost"].without_math = True
self.options["boost"].without_mpi = True
self.options["boost"].without_nowide = True
self.options["boost"].without_process = False
self.options["boost"].without_program_options = True
self.options["boost"].without_python = True
self.options["boost"].without_random = True
self.options["boost"].without_regex = True
self.options["boost"].without_serialization = True
self.options["boost"].without_stacktrace = True
self.options["boost"].without_system = False
self.options["boost"].without_test = True
self.options["boost"].without_thread = True
self.options["boost"].without_timer = True
self.options["boost"].without_type_erasure = True
self.options["boost"].without_url = True
self.options["boost"].without_wave = True
self.options["fmt"].header_only = True
self.options["hdf5"].enable_cxx = False
self.options["hdf5"].hl = False
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ requires = [
"nanobind>=2", # This is required in order to run stubgen
"numpy",
"pandas>=2.1,!=2.2.0",
"pyarrow>=14",
"pyarrow==17.0",
"scikit-build-core>=0.10",
"scipy",
"typing_extensions",
Expand All @@ -33,7 +33,7 @@ classifiers = [

dependencies = [
"numpy",
"pyarrow>=14",
"pyarrow==17.0",
]

optional-dependencies.pandas = [
Expand Down
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#
# SPDX-License-Identifier: MIT

find_package(Arrow REQUIRED)
find_package(FMT REQUIRED)
find_package(nanobind REQUIRED)
find_package(Pyarrow REQUIRED)
Expand Down
5 changes: 4 additions & 1 deletion src/hictkpy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
//
// SPDX-License-Identifier: MIT

#include <arrow/python/api.h>
#include <arrow/config.h>
#include <arrow/python/pyarrow.h>
#include <spdlog/spdlog.h>

#include <cstdint>
Expand Down Expand Up @@ -37,6 +38,8 @@ NB_MODULE(_hictkpy, m) {
throw std::runtime_error("failed to initialize pyarrow runtime");
}

m.attr("__hictkpy_arrow_version__") =
std::make_tuple(ARROW_VERSION_MAJOR, ARROW_VERSION_MINOR, ARROW_VERSION_PATCH);
m.attr("__hictk_version__") = hictk::config::version::str();

m.doc() = "Blazing fast toolkit to work with .hic and .cool files.";
Expand Down
19 changes: 16 additions & 3 deletions src/hictkpy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,24 @@
# SPDX-License-Identifier: MIT


def _load_arrow_python_lib():
import pyarrow
def _load_pyarrow_and_check_abi_compat():
import pyarrow as pa

from ._hictkpy import __hictkpy_arrow_version__

_load_arrow_python_lib()
major, minor, patch = __hictkpy_arrow_version__

if not pa.__version__.startswith(f"{major}.{minor}"):
raise ImportError(
"Detected Arrow ABI version mismatch!\n"
f"hictkpy was compiled with Arrow v{major}.{minor}.{patch}, which is not ABI compatible with the currently "
f"installed version of pyarrow (v{pa.__version__}).\n"
'Please install a compatible version of pyarrow with e.g. "pip install '
f'pyarrow=={major}.{minor}".'
)


_load_pyarrow_and_check_abi_compat()


from importlib.metadata import version
Expand Down
2 changes: 1 addition & 1 deletion src/pixel_selector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#include <winsock2.h>
#endif

#include <arrow/python/api.h>
#include <arrow/python/pyarrow.h>
#include <arrow/table.h>
#include <fmt/format.h>

Expand Down
Loading

0 comments on commit 28dfed0

Please sign in to comment.