Skip to content

Commit

Permalink
Use numpy headers rather than pybind11's alternative numpy interface
Browse files Browse the repository at this point in the history
pybind11 effectively includes its own version of the numpy headers,
with its own implementation for accessing NumPy APIs through NumPy's
custom dynamic linking mechanism.  It expects the NumPy version to be
>= 1.7.

That avoids the need to have the numpy headers available at build
time.  However, pybind11 only provides access to a small subset of
numpy APIs that it actually uses itself, and only as an implementation
detail.

This commit changes tensorstore to use the normal numpy headers, based
on the approach used by Tensorflow.

When using the NumPy headers, the NumPy version at run time must be >=
the version used at build time.  Consequently, when building the
binary packages, we arrange to build against the oldest NumPy version
available for a given Python version, by using the
`oldest-supported-numpy` package.

PiperOrigin-RevId: 364097273
Change-Id: I24cce9d39b56dbbfc1e7823c3013b984a603c7be
  • Loading branch information
jbms authored and copybara-github committed Mar 20, 2021
1 parent 440f746 commit 83b3107
Show file tree
Hide file tree
Showing 15 changed files with 358 additions and 177 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ requires = [
"setuptools>=30.3.0",
"wheel",
"setuptools_scm",
"oldest-supported-numpy",
]
16 changes: 16 additions & 0 deletions python/tensorstore/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ pybind11_py_extension(
":downsample",
":future",
":index_space",
":numpy",
":spec",
":tensorstore_class",
":transaction",
Expand All @@ -28,6 +29,16 @@ pybind11_py_extension(
],
)

pybind11_cc_library(
name = "numpy",
srcs = ["numpy.cc"],
textual_hdrs = ["numpy.h"],
deps = [
"@local_config_python//:python_headers", # buildcleaner: keep
"@pypa_numpy//:headers",
],
)

py_library(
name = "tensorstore",
srcs = ["__init__.py"],
Expand Down Expand Up @@ -193,6 +204,7 @@ pybind11_cc_library(
":dim_expression",
":index",
":json_type_caster",
":numpy",
":numpy_indexing_spec",
":result_type_caster",
":status",
Expand Down Expand Up @@ -239,6 +251,7 @@ pybind11_cc_library(
deps = [
":data_type",
":json_type_caster",
":numpy",
"//tensorstore:array",
"//tensorstore:container_kind",
"//tensorstore:contiguous_layout",
Expand Down Expand Up @@ -288,12 +301,14 @@ pybind11_cc_library(
hdrs = ["data_type.h"],
deps = [
":json_type_caster",
":numpy",
"//tensorstore:data_type",
"//tensorstore/util:quote_string",
"//tensorstore/util:str_cat",
"@com_github_nlohmann_json//:nlohmann_json",
"@com_github_pybind_pybind11//:pybind11",
"@com_google_absl//absl/hash",
"@com_google_absl//absl/strings",
],
)

Expand Down Expand Up @@ -384,6 +399,7 @@ pybind11_cc_library(
":array_type_caster",
":data_type",
":index",
":numpy",
":result_type_caster",
":status",
":subscript_method",
Expand Down
67 changes: 35 additions & 32 deletions python/tensorstore/array_type_caster.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,16 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include "python/tensorstore/array_type_caster.h"
#include "python/tensorstore/numpy.h"

// numpy.h must be included first

#include <algorithm>
#include <memory>
#include <string>
#include <utility>

#include "python/tensorstore/array_type_caster.h"
#include "python/tensorstore/data_type.h"
#include "python/tensorstore/json_type_caster.h"
#include "pybind11/numpy.h"
Expand Down Expand Up @@ -140,18 +143,17 @@ constexpr const internal::ElementwiseFunction<2, Status*>*
};

pybind11::object GetNumpyObjectArrayImpl(SharedArrayView<const void> source) {
auto& api = npy_api::get();
ssize_t target_shape_ssize_t[kMaxNumpyRank];
ssize_t target_shape_ssize_t[NPY_MAXDIMS];
std::copy(source.shape().begin(), source.shape().end(), target_shape_ssize_t);
auto array_obj = py::reinterpret_steal<py::array>(api.PyArray_NewFromDescr_(
api.PyArray_Type_, api.PyArray_DescrFromType_(NPY_OBJECT_),
auto array_obj = py::reinterpret_steal<py::array>(PyArray_NewFromDescr(
&PyArray_Type, PyArray_DescrFromType(NPY_OBJECT),
static_cast<int>(source.rank()), target_shape_ssize_t,
/*strides=*/nullptr,
/*data=*/nullptr,
/*flags=*/NPY_ARRAY_C_CONTIGUOUS_ | NPY_ARRAY_WRITEABLE_,
/*flags=*/NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_WRITEABLE,
/*obj=*/nullptr));
if (!array_obj) throw py::error_already_set();
Index target_strides[kMaxNumpyRank];
Index target_strides[NPY_MAXDIMS];
std::copy_n(array_obj.strides(), source.rank(), target_strides);
auto iterate_result = internal::IterateOverStridedLayouts<2>(
/*closure=*/{kConvertDataTypeToNumpyObjectArray[static_cast<size_t>(
Expand All @@ -171,7 +173,7 @@ pybind11::object GetNumpyObjectArrayImpl(SharedArrayView<const void> source) {
/// Creates an array by copying from a NumPy object array.
///
/// \param array_obj The NumPy object array, must have a data type number of
/// `NPY_OBJECT_`.
/// `NPY_OBJECT`.
/// \param dtype The TensorStore data type of the returned array. If the
/// invalid `DataType()` is specified, uses json.
/// \returns The new array.
Expand All @@ -182,7 +184,7 @@ SharedArray<void, dynamic_rank> ArrayFromNumpyObjectArray(
dtype = dtype_v<tensorstore::json_t>;
}
const DimensionIndex rank = array_obj.ndim();
StridedLayout<dynamic_rank(kMaxNumpyRank)> array_obj_layout;
StridedLayout<dynamic_rank(NPY_MAXDIMS)> array_obj_layout;
array_obj_layout.set_rank(rank);
AssignArrayLayout(array_obj, rank, array_obj_layout.shape().data(),
array_obj_layout.byte_strides().data());
Expand Down Expand Up @@ -212,7 +214,7 @@ void AssignArrayLayout(pybind11::array array_obj, DimensionIndex rank,
[[maybe_unused]] const int flags =
py::detail::array_proxy(array_obj.ptr())->flags;
assert(array_obj.ndim() == rank);
assert(flags & NPY_ARRAY_ALIGNED_);
assert(flags & NPY_ARRAY_ALIGNED);
std::copy_n(array_obj.shape(), rank, shape);
for (DimensionIndex i = 0; i < rank; ++i) {
if (shape[i] < 0 || shape[i] > kMaxFiniteIndex) {
Expand All @@ -225,36 +227,38 @@ void AssignArrayLayout(pybind11::array array_obj, DimensionIndex rank,

pybind11::object GetNumpyArrayImpl(SharedArrayView<const void> value,
bool is_const) {
if (value.rank() > kMaxNumpyRank) {
if (value.rank() > NPY_MAXDIMS) {
throw std::out_of_range(StrCat("Array of rank ", value.rank(),
" (which is greater than ", kMaxNumpyRank,
" (which is greater than ", NPY_MAXDIMS,
") cannot be converted to NumPy array"));
}
if (const DataTypeId id = value.dtype().id();
id != DataTypeId::custom &&
kConvertDataTypeToNumpyObjectArray[static_cast<size_t>(id)]) {
return GetNumpyObjectArrayImpl(value);
}
auto& api = npy_api::get();
ssize_t shape[kMaxNumpyRank];
ssize_t strides[kMaxNumpyRank];
ssize_t shape[NPY_MAXDIMS];
ssize_t strides[NPY_MAXDIMS];
std::copy_n(value.shape().data(), value.rank(), shape);
std::copy_n(value.byte_strides().data(), value.rank(), strides);
int flags = 0;
if (!is_const) {
flags |= NPY_ARRAY_WRITEABLE_;
flags |= NPY_ARRAY_WRITEABLE;
}
auto obj = py::reinterpret_steal<py::array>(api.PyArray_NewFromDescr_(
api.PyArray_Type_, GetNumpyDtypeOrThrow(value.dtype()).release().ptr(),
auto obj = py::reinterpret_steal<py::array>(PyArray_NewFromDescr(
&PyArray_Type,
reinterpret_cast<PyArray_Descr*>(
GetNumpyDtypeOrThrow(value.dtype()).release().ptr()),
static_cast<int>(value.rank()), shape, strides,
const_cast<void*>(value.data()), flags, nullptr));
if (!obj) throw py::error_already_set();
using Pointer = std::shared_ptr<const void>;
api.PyArray_SetBaseObject_(
obj.ptr(), py::capsule(new Pointer(std::move(value.pointer())),
[](void* p) { delete static_cast<Pointer*>(p); })
.release()
.ptr());
PyArray_SetBaseObject(
reinterpret_cast<PyArrayObject*>(obj.ptr()),
py::capsule(new Pointer(std::move(value.pointer())),
[](void* p) { delete static_cast<Pointer*>(p); })
.release()
.ptr());
return std::move(obj);
}

Expand Down Expand Up @@ -294,16 +298,15 @@ bool ConvertToArrayImpl(pybind11::handle src,
data_type_constraint.valid()
? GetNumpyDtypeOrThrow(data_type_constraint)
: pybind11::reinterpret_steal<pybind11::dtype>(pybind11::handle());
int flags = NPY_ARRAY_ALIGNED_;
int flags = NPY_ARRAY_ALIGNED;
if (writable) {
flags |= NPY_ARRAY_WRITEABLE_;
flags |= NPY_ARRAY_WRITEABLE;
}
// Convert `src` to a NumPy array.
auto obj = pybind11::reinterpret_steal<pybind11::array>(
npy_api::get().PyArray_FromAny_(src.ptr(), dtype_handle.release().ptr(),
min_rank == dynamic_rank ? 0 : min_rank,
max_rank == dynamic_rank ? 0 : max_rank,
flags, nullptr));
auto obj = pybind11::reinterpret_steal<pybind11::array>(PyArray_FromAny(
src.ptr(), reinterpret_cast<PyArray_Descr*>(dtype_handle.release().ptr()),
min_rank == dynamic_rank ? 0 : min_rank,
max_rank == dynamic_rank ? 0 : max_rank, flags, nullptr));
const auto try_convert = [&] {
if (!obj) {
if (no_throw) {
Expand All @@ -314,7 +317,7 @@ bool ConvertToArrayImpl(pybind11::handle src,
}
const int type_num =
pybind11::detail::array_descriptor_proxy(obj.dtype().ptr())->type_num;
if (!allow_copy && (obj.ptr() != src.ptr() || type_num == NPY_OBJECT_)) {
if (!allow_copy && (obj.ptr() != src.ptr() || type_num == NPY_OBJECT)) {
// The caller has specified that copying is not allowed, and either
// PyArray_FromAny created a copy, or the caller passed in a NumPy
// "object" array which always requires a copy.
Expand All @@ -338,7 +341,7 @@ bool ConvertToArrayImpl(pybind11::handle src,
*out = MakeScalarArray(PyObjectToJson(src));
return true;
}
if (type_num == NPY_OBJECT_) {
if (type_num == NPY_OBJECT) {
// Arrays of Python objects require copying.
*out = ArrayFromNumpyObjectArray(std::move(obj), data_type_constraint);
return true;
Expand Down
8 changes: 0 additions & 8 deletions python/tensorstore/array_type_caster.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,6 @@
namespace tensorstore {
namespace internal_python {

constexpr DimensionIndex kMaxNumpyRank = 32;

using pybind11::detail::npy_api;
constexpr auto NPY_ARRAY_ALIGNED_ = npy_api::NPY_ARRAY_ALIGNED_;
constexpr auto NPY_ARRAY_WRITEABLE_ = npy_api::NPY_ARRAY_WRITEABLE_;
constexpr auto NPY_ARRAY_C_CONTIGUOUS_ = npy_api::NPY_ARRAY_C_CONTIGUOUS_;
constexpr auto NPY_ARRAY_F_CONTIGUOUS_ = npy_api::NPY_ARRAY_F_CONTIGUOUS_;

/// Copies the shape and byte_strides from `array_obj` to `shape` and
/// `byte_strides`.
///
Expand Down
20 changes: 11 additions & 9 deletions python/tensorstore/data_type.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include "python/tensorstore/data_type.h"
#include "python/tensorstore/numpy.h"

// numpy.h must be included first.

#include <array>
#include <new>
Expand All @@ -22,6 +24,7 @@

#include "absl/hash/hash.h"
#include <nlohmann/json.hpp>
#include "python/tensorstore/data_type.h"
#include "python/tensorstore/json_type_caster.h"
#include "pybind11/numpy.h"
#include "pybind11/pybind11.h"
Expand All @@ -35,10 +38,8 @@ namespace internal_python {
namespace py = ::pybind11;

pybind11::dtype GetNumpyDtype(int type_num) {
using py::detail::npy_api;
auto& api = npy_api::get();
if (auto* obj = api.PyArray_DescrFromType_(type_num)) {
return py::reinterpret_borrow<py::dtype>(obj);
if (auto* obj = PyArray_DescrFromType(type_num)) {
return py::reinterpret_borrow<py::dtype>(reinterpret_cast<PyObject*>(obj));
}
throw py::error_already_set();
}
Expand Down Expand Up @@ -70,7 +71,7 @@ py::dtype GetNumpyDtypeOrThrow(DataType dtype) {

DataType GetDataType(pybind11::dtype dt) {
const int type_num = py::detail::array_descriptor_proxy(dt.ptr())->type_num;
if (type_num < 0 || type_num > NPY_NTYPES_) return DataType();
if (type_num < 0 || type_num > NPY_NTYPES) return DataType();
const DataTypeId id = kDataTypeIdForNumpyTypeNum[type_num];
if (id == DataTypeId::custom) return DataType();
return kDataTypes[static_cast<size_t>(id)];
Expand Down Expand Up @@ -178,16 +179,17 @@ bool type_caster<tensorstore::internal_python::DataTypeLike>::load(
value.value = dtype_v<tensorstore::string_t>;
return true;
}
PyObject* ptr = nullptr;
if (!pybind11::detail::npy_api::get().PyArray_DescrConverter_(
PyArray_Descr* ptr = nullptr;
if (!PyArray_DescrConverter(
pybind11::reinterpret_borrow<pybind11::object>(src).release().ptr(),
&ptr) ||
!ptr) {
PyErr_Clear();
return false;
}
value.value = tensorstore::internal_python::GetDataTypeOrThrow(
pybind11::reinterpret_steal<pybind11::dtype>(ptr));
pybind11::reinterpret_steal<pybind11::dtype>(
reinterpret_cast<PyObject*>(ptr)));
return true;
}

Expand Down
Loading

0 comments on commit 83b3107

Please sign in to comment.