Skip to content

Commit

Permalink
Refactor tests to be able to run test_array.py with and without numpy
Browse files Browse the repository at this point in the history
  • Loading branch information
raulcd committed Jun 6, 2024
1 parent d1e417c commit 3eda3d2
Show file tree
Hide file tree
Showing 14 changed files with 162 additions and 39 deletions.
6 changes: 5 additions & 1 deletion ci/scripts/python_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -69,4 +69,8 @@ export PYARROW_TEST_PARQUET_ENCRYPTION
export PYARROW_TEST_S3

# Testing PyArrow
pytest -r s ${PYTEST_ARGS} --pyargs pyarrow
if [ -z "${TEST_COMMAND}" ]; then
pytest -r s ${PYTEST_ARGS} --pyargs pyarrow
else
${TEST_COMMAND}
fi
6 changes: 4 additions & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1245,7 +1245,9 @@ services:
environment:
<<: [*common, *ccache, *sccache]
PARQUET_REQUIRE_ENCRYPTION: # inherit
PYTEST_ARGS: # inherit
# In the future we are supposed to run all tests.
# This is only temporary.
TEST_COMMAND: "pytest -r s --pyargs pyarrow.tests.test_array"
HYPOTHESIS_PROFILE: # inherit
PYARROW_TEST_HYPOTHESIS: # inherit
volumes: *conda-volumes
Expand All @@ -1254,7 +1256,7 @@ services:
/arrow/ci/scripts/cpp_build.sh /arrow /build &&
/arrow/ci/scripts/python_build.sh /arrow /build &&
pip uninstall numpy -y &&
python -c 'import pyarrow as pa; arr = pa.array([1,2,3]); assert len(arr.buffers()) == 2'"]
/arrow/ci/scripts/python_test.sh /arrow"]

conda-python-docs:
# Usage:
Expand Down
7 changes: 5 additions & 2 deletions python/pyarrow/_compute.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,10 @@ from pyarrow.util import _DEPR_MSG
from libcpp cimport bool as c_bool

import inspect
import numpy as np
try:
import numpy as np
except ImportError:
pass
import warnings


Expand Down Expand Up @@ -473,7 +476,7 @@ cdef class MetaFunction(Function):

cdef _pack_compute_args(object values, vector[CDatum]* out):
for val in values:
if isinstance(val, (list, np.ndarray)):
if "numpy" in sys.modules and isinstance(val, (list, np.ndarray)):
val = lib.asarray(val)

if isinstance(val, Array):
Expand Down
13 changes: 12 additions & 1 deletion python/pyarrow/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@
from pyarrow import Codec
from pyarrow import fs

import numpy as np
try:
import numpy as np
except ImportError:
pass

groups = [
'acero',
Expand All @@ -40,6 +43,7 @@
'lz4',
'memory_leak',
'nopandas',
'numpy',
'orc',
'pandas',
'parquet',
Expand Down Expand Up @@ -72,6 +76,7 @@
'lz4': Codec.is_available('lz4'),
'memory_leak': False,
'nopandas': False,
'numpy': False,
'orc': False,
'pandas': False,
'parquet': False,
Expand Down Expand Up @@ -126,6 +131,12 @@
except ImportError:
defaults['nopandas'] = True

try:
import numpy # noqa
defaults['numpy'] = True
except ImportError:
pass

try:
import pyarrow.parquet # noqa
defaults['parquet'] = True
Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/includes/libarrow_python.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py::internal" nogil:


cdef extern from "arrow/python/init.h":
int arrow_init_numpy() except -1
int arrow_init_numpy(c_bool import_numpy) except -1


cdef extern from "arrow/python/pyarrow.h" namespace "arrow::py":
Expand Down
4 changes: 3 additions & 1 deletion python/pyarrow/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@ cimport cpython as cp

def initialize_numpy():
if "numpy" in sys.modules:
arrow_init_numpy()
arrow_init_numpy(True)
else:
arrow_init_numpy(False)


initialize_numpy()
Expand Down
4 changes: 2 additions & 2 deletions python/pyarrow/src/arrow/python/inference.cc
Original file line number Diff line number Diff line change
Expand Up @@ -395,11 +395,11 @@ class TypeInferrer {
*keep_going = make_unions_;
} else if (arrow::py::is_scalar(obj)) {
RETURN_NOT_OK(VisitArrowScalar(obj, keep_going));
} else if (PyArray_CheckAnyScalarExact(obj)) {
} else if (get_numpy_imported() && PyArray_CheckAnyScalarExact(obj)) {
RETURN_NOT_OK(VisitDType(PyArray_DescrFromScalar(obj), keep_going));
} else if (PySet_Check(obj) || (Py_TYPE(obj) == &PyDictValues_Type)) {
RETURN_NOT_OK(VisitSet(obj, keep_going));
} else if (PyArray_Check(obj)) {
} else if (get_numpy_imported() && PyArray_Check(obj)) {
RETURN_NOT_OK(VisitNdarray(obj, keep_going));
} else if (PyDict_Check(obj)) {
RETURN_NOT_OK(VisitDict(obj));
Expand Down
15 changes: 14 additions & 1 deletion python/pyarrow/src/arrow/python/init.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,17 @@
#include "arrow/python/init.h"
#include "arrow/python/numpy_interop.h"

int arrow_init_numpy() { return arrow::py::import_numpy(); }
bool numpy_imported = false;

int arrow_init_numpy(bool import_numpy) {
if (import_numpy) {
numpy_imported = true;
return arrow::py::import_numpy();
} else {
return 0;
}
}

bool get_numpy_imported() {
return numpy_imported;
}
3 changes: 2 additions & 1 deletion python/pyarrow/src/arrow/python/init.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,6 @@

extern "C" {
ARROW_PYTHON_EXPORT
int arrow_init_numpy();
int arrow_init_numpy(bool import_numpy);
bool get_numpy_imported();
}
7 changes: 3 additions & 4 deletions python/pyarrow/src/arrow/python/iterators.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

#include "arrow/array/array_primitive.h"

#include "arrow/python/init.h"
#include "arrow/python/common.h"
#include "arrow/python/numpy_internal.h"

Expand All @@ -44,8 +45,7 @@ inline Status VisitSequenceGeneric(PyObject* obj, int64_t offset, VisitorFunc&&
// VisitorFunc may set to false to terminate iteration
bool keep_going = true;

#ifdef NUMPY_IMPORT_ARRAY
if (PyArray_Check(obj)) {
if (get_numpy_imported() && PyArray_Check(obj)) {
PyArrayObject* arr_obj = reinterpret_cast<PyArrayObject*>(obj);
if (PyArray_NDIM(arr_obj) != 1) {
return Status::Invalid("Only 1D arrays accepted");
Expand All @@ -65,7 +65,6 @@ inline Status VisitSequenceGeneric(PyObject* obj, int64_t offset, VisitorFunc&&
// This code path is inefficient: callers should implement dedicated
// logic for non-object arrays.
}
#endif

if (PySequence_Check(obj)) {
if (PyList_Check(obj) || PyTuple_Check(obj)) {
Expand Down Expand Up @@ -104,7 +103,7 @@ inline Status VisitSequence(PyObject* obj, int64_t offset, VisitorFunc&& func) {
template <class VisitorFunc>
inline Status VisitSequenceMasked(PyObject* obj, PyObject* mo, int64_t offset,
VisitorFunc&& func) {
if (PyArray_Check(mo)) {
if (get_numpy_imported() && PyArray_Check(mo)) {
PyArrayObject* mask = reinterpret_cast<PyArrayObject*>(mo);
if (PyArray_NDIM(mask) != 1) {
return Status::Invalid("Mask must be 1D array");
Expand Down
19 changes: 16 additions & 3 deletions python/pyarrow/src/arrow/python/numpy_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

#pragma once

#include "arrow/python/init.h"
#include "arrow/python/numpy_interop.h"

#include "arrow/status.h"
Expand Down Expand Up @@ -155,15 +156,27 @@ inline Status VisitNumpyArrayInline(PyArrayObject* arr, VISITOR* visitor) {
namespace internal {

inline bool PyFloatScalar_Check(PyObject* obj) {
return PyFloat_Check(obj) || PyArray_IsScalar(obj, Floating);
if(get_numpy_imported()) {
return PyFloat_Check(obj) || PyArray_IsScalar(obj, Floating);
} else {
return PyFloat_Check(obj);
}
}

inline bool PyIntScalar_Check(PyObject* obj) {
return PyLong_Check(obj) || PyArray_IsScalar(obj, Integer);
if(get_numpy_imported()) {
return PyLong_Check(obj) || PyArray_IsScalar(obj, Integer);
} else {
return PyLong_Check(obj);
}
}

inline bool PyBoolScalar_Check(PyObject* obj) {
return PyBool_Check(obj) || PyArray_IsScalar(obj, Bool);
if(get_numpy_imported()) {
return PyBool_Check(obj) || PyArray_IsScalar(obj, Bool);
} else {
return PyBool_Check(obj);
}
}

static inline PyArray_Descr* GetSafeNumPyDtype(int type) {
Expand Down
7 changes: 3 additions & 4 deletions python/pyarrow/src/arrow/python/python_to_arrow.cc
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ class PyValue {
return true;
} else if (obj == Py_False) {
return false;
} else if (PyArray_IsScalar(obj, Bool)) {
} else if (get_numpy_imported() && PyArray_IsScalar(obj, Bool)) {
return reinterpret_cast<PyBoolScalarObject*>(obj)->obval == NPY_TRUE;
} else {
return internal::InvalidValue(obj, "tried to convert to boolean");
Expand Down Expand Up @@ -663,7 +663,7 @@ class PyPrimitiveConverter<
ARROW_ASSIGN_OR_RAISE(
auto converted, PyValue::Convert(this->primitive_type_, this->options_, value));
// Numpy NaT sentinels can be checked after the conversion
if (PyArray_CheckAnyScalarExact(value) &&
if (get_numpy_imported() && PyArray_CheckAnyScalarExact(value) &&
PyValue::IsNaT(this->primitive_type_, converted)) {
this->primitive_builder_->UnsafeAppendNull();
} else {
Expand Down Expand Up @@ -803,8 +803,7 @@ class PyListConverter : public ListConverter<T, PyConverter, PyConverterTrait> {
if (PyValue::IsNull(this->options_, value)) {
return this->list_builder_->AppendNull();
}

if (PyArray_Check(value)) {
if (get_numpy_imported() && PyArray_Check(value)) {
RETURN_NOT_OK(AppendNdarray(value));
} else if (PySequence_Check(value)) {
RETURN_NOT_OK(AppendSequence(value));
Expand Down
10 changes: 8 additions & 2 deletions python/pyarrow/tests/strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@
import pytest
import hypothesis as h
import hypothesis.strategies as st
import hypothesis.extra.numpy as npst
try:
import hypothesis.extra.numpy as npst
except ImportError:
pass
try:
import hypothesis.extra.pytz as tzst
except ImportError:
Expand All @@ -35,7 +38,10 @@
import tzdata # noqa:F401
except ImportError:
zoneinfo = None
import numpy as np
try:
import numpy as np
except ImportError:
pass

import pyarrow as pa

Expand Down
Loading

0 comments on commit 3eda3d2

Please sign in to comment.