Skip to content

Commit

Permalink
Fix a bunch more tests
Browse files Browse the repository at this point in the history
  • Loading branch information
raulcd committed Jun 6, 2024
1 parent 1232842 commit bccf733
Show file tree
Hide file tree
Showing 8 changed files with 168 additions and 80 deletions.
4 changes: 2 additions & 2 deletions python/pyarrow/builder.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ cdef class StringBuilder(_Weakrefable):
value : string/bytes or np.nan/None
The value to append to the string array builder.
"""
if value is None or value is np.nan:
if value is None or ('numpy' in sys.modules and value is np.nan):
self.builder.get().AppendNull()
elif isinstance(value, (bytes, str)):
self.builder.get().Append(tobytes(value))
Expand Down Expand Up @@ -108,7 +108,7 @@ cdef class StringViewBuilder(_Weakrefable):
value : string/bytes or np.nan/None
The value to append to the string array builder.
"""
if value is None or value is np.nan:
if value is None or ('numpy' in sys.modules and value is np.nan):
self.builder.get().AppendNull()
elif isinstance(value, (bytes, str)):
self.builder.get().Append(tobytes(value))
Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/src/arrow/python/python_to_arrow.cc
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ class PyValue {
default:
return Status::UnknownError("Invalid time unit");
}
} else if (PyArray_CheckAnyScalarExact(obj)) {
} else if (get_numpy_imported() && PyArray_CheckAnyScalarExact(obj)) {
// validate that the numpy scalar has np.datetime64 dtype
ARROW_ASSIGN_OR_RAISE(auto numpy_type, NumPyScalarToArrowDataType(obj));
if (!numpy_type->Equals(*type)) {
Expand Down
30 changes: 30 additions & 0 deletions python/pyarrow/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,14 @@

import pytest
import hypothesis as h
try:
import numpy as np
except ImportError:
pass

from ..conftest import groups, defaults

import pyarrow as pa
from pyarrow import set_timezone_db_path
from pyarrow.util import find_free_port

Expand Down Expand Up @@ -116,6 +122,30 @@ def pytest_runtest_setup(item):
item.config.pyarrow.apply_mark(mark)


@pytest.fixture
def all_array_types():
return [
('bool', [True, False, False, True, True]),
('uint8', np.arange(5)),
('int8', np.arange(5)),
('uint16', np.arange(5)),
('int16', np.arange(5)),
('uint32', np.arange(5)),
('int32', np.arange(5)),
('uint64', np.arange(5, 10)),
('int64', np.arange(5, 10)),
('float', np.arange(0, 0.5, 0.1)),
('double', np.arange(0, 0.5, 0.1)),
('string', ['a', 'b', None, 'ddd', 'ee']),
('binary', [b'a', b'b', b'c', b'ddd', b'ee']),
(pa.binary(3), [b'abc', b'bcd', b'cde', b'def', b'efg']),
(pa.list_(pa.int8()), [[1, 2], [3, 4], [5, 6], None, [9, 16]]),
(pa.large_list(pa.int16()), [[1], [2, 3, 4], [5, 6], None, [9, 16]]),
(pa.struct([('a', pa.int8()), ('b', pa.int8())]), [
{'a': 1, 'b': 2}, None, {'a': 3, 'b': 4}, None, {'a': 5, 'b': 6}]),
]


@pytest.fixture
def tempdir(tmpdir):
# convert pytest's LocalPath to pathlib.Path
Expand Down
6 changes: 5 additions & 1 deletion python/pyarrow/tests/test_adhoc_memory_leak.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@

import pytest

import numpy as np
try:
import numpy as np
except ImportError:
pass
import pyarrow as pa

import pyarrow.tests.util as test_util
Expand All @@ -28,6 +31,7 @@
pass


@pytest.mark.numpy
@pytest.mark.memory_leak
@pytest.mark.pandas
def test_deserialize_pandas_arrow_7956():
Expand Down
6 changes: 5 additions & 1 deletion python/pyarrow/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,6 @@ def test_array_from_dictionary_scalar():
assert result.equals(expected)


@pytest.mark.numpy
def test_array_getitem():
arr = pa.array(range(10, 15))
lst = arr.to_pylist()
Expand All @@ -440,6 +439,11 @@ def test_array_getitem():
with pytest.raises(IndexError):
arr[idx]


@pytest.mark.numpy
def test_array_getitem_numpy_scalars():
arr = pa.array(range(10, 15))
lst = arr.to_pylist()
# check that numpy scalars are supported
for idx in range(-len(arr), len(arr)):
assert arr[np.int32(idx)].as_py() == lst[idx]
Expand Down
72 changes: 67 additions & 5 deletions python/pyarrow/tests/test_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,12 @@

import weakref

import numpy as np
import pytest

try:
import numpy as np
except ImportError:
pass

import pyarrow as pa
from pyarrow.lib import StringBuilder, StringViewBuilder
Expand All @@ -31,7 +36,8 @@ def test_weakref():
assert wr() is None


def test_string_builder_append():
@pytest.mark.numpy
def test_string_builder_append_with_nan():
sbuilder = StringBuilder()
sbuilder.append(b"a byte string")
sbuilder.append("a string")
Expand All @@ -48,7 +54,24 @@ def test_string_builder_append():
assert arr.to_pylist() == expected


def test_string_builder_append_values():
def test_string_builder_append():
sbuilder = StringBuilder()
sbuilder.append(b"a byte string")
sbuilder.append("a string")
sbuilder.append(None)
assert len(sbuilder) == 3
assert sbuilder.null_count == 1
arr = sbuilder.finish()
assert len(sbuilder) == 0
assert isinstance(arr, pa.Array)
assert arr.null_count == 1
assert arr.type == 'str'
expected = ["a byte string", "a string", None]
assert arr.to_pylist() == expected


@pytest.mark.numpy
def test_string_builder_append_values_with_nan():
sbuilder = StringBuilder()
sbuilder.append_values([np.nan, None, "text", None, "other text"])
assert sbuilder.null_count == 3
Expand All @@ -58,7 +81,18 @@ def test_string_builder_append_values():
assert arr.to_pylist() == expected


def test_string_builder_append_after_finish():
def test_string_builder_append_values():
sbuilder = StringBuilder()
sbuilder.append_values([None, "text", None, "other text"])
assert sbuilder.null_count == 2
arr = sbuilder.finish()
assert arr.null_count == 2
expected = [None, "text", None, "other text"]
assert arr.to_pylist() == expected


@pytest.mark.numpy
def test_string_builder_append_after_finish_with_nan():
sbuilder = StringBuilder()
sbuilder.append_values([np.nan, None, "text", None, "other text"])
arr = sbuilder.finish()
Expand All @@ -67,7 +101,17 @@ def test_string_builder_append_after_finish():
assert arr.to_pylist() == expected


def test_string_view_builder():
def test_string_builder_append_after_finish():
sbuilder = StringBuilder()
sbuilder.append_values([None, "text", None, "other text"])
arr = sbuilder.finish()
sbuilder.append("No effect")
expected = [None, "text", None, "other text"]
assert arr.to_pylist() == expected


@pytest.mark.numpy
def test_string_view_builder_with_nan():
builder = StringViewBuilder()
builder.append(b"a byte string")
builder.append("a string")
Expand All @@ -84,3 +128,21 @@ def test_string_view_builder():
"a byte string", "a string", "a longer not-inlined string", None, None, "text"
]
assert arr.to_pylist() == expected


def test_string_view_builder():
builder = StringViewBuilder()
builder.append(b"a byte string")
builder.append("a string")
builder.append("a longer not-inlined string")
builder.append_values([None, "text"])
assert len(builder) == 5
assert builder.null_count == 1
arr = builder.finish()
assert isinstance(arr, pa.Array)
assert arr.null_count == 1
assert arr.type == 'string_view'
expected = [
"a byte string", "a string", "a longer not-inlined string", None, "text"
]
assert arr.to_pylist() == expected
123 changes: 54 additions & 69 deletions python/pyarrow/tests/test_compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,10 @@
import sys
import textwrap

import numpy as np
try:
import numpy as np
except ImportError:
pass

try:
import pandas as pd
Expand All @@ -45,27 +48,6 @@
except ImportError:
pas = None

all_array_types = [
('bool', [True, False, False, True, True]),
('uint8', np.arange(5)),
('int8', np.arange(5)),
('uint16', np.arange(5)),
('int16', np.arange(5)),
('uint32', np.arange(5)),
('int32', np.arange(5)),
('uint64', np.arange(5, 10)),
('int64', np.arange(5, 10)),
('float', np.arange(0, 0.5, 0.1)),
('double', np.arange(0, 0.5, 0.1)),
('string', ['a', 'b', None, 'ddd', 'ee']),
('binary', [b'a', b'b', b'c', b'ddd', b'ee']),
(pa.binary(3), [b'abc', b'bcd', b'cde', b'def', b'efg']),
(pa.list_(pa.int8()), [[1, 2], [3, 4], [5, 6], None, [9, 16]]),
(pa.large_list(pa.int16()), [[1], [2, 3, 4], [5, 6], None, [9, 16]]),
(pa.struct([('a', pa.int8()), ('b', pa.int8())]), [
{'a': 1, 'b': 2}, None, {'a': 3, 'b': 4}, None, {'a': 5, 'b': 6}]),
]

exported_functions = [
func for (name, func) in sorted(pc.__dict__.items())
if hasattr(func, '__arrow_compute_function__')]
Expand Down Expand Up @@ -1111,30 +1093,31 @@ def test_binary_join_element_wise():
'a', 'b', null, options=replace).as_py() is None


@pytest.mark.parametrize(('ty', 'values'), all_array_types)
def test_take(ty, values):
arr = pa.array(values, type=ty)
for indices_type in [pa.int8(), pa.int64()]:
indices = pa.array([0, 4, 2, None], type=indices_type)
result = arr.take(indices)
result.validate()
expected = pa.array([values[0], values[4], values[2], None], type=ty)
assert result.equals(expected)
@pytest.mark.numpy
def test_take(all_array_types):
for ty, values in all_array_types:
arr = pa.array(values, type=ty)
for indices_type in [pa.int8(), pa.int64()]:
indices = pa.array([0, 4, 2, None], type=indices_type)
result = arr.take(indices)
result.validate()
expected = pa.array([values[0], values[4], values[2], None], type=ty)
assert result.equals(expected)

# empty indices
indices = pa.array([], type=indices_type)
result = arr.take(indices)
result.validate()
expected = pa.array([], type=ty)
assert result.equals(expected)
# empty indices
indices = pa.array([], type=indices_type)
result = arr.take(indices)
result.validate()
expected = pa.array([], type=ty)
assert result.equals(expected)

indices = pa.array([2, 5])
with pytest.raises(IndexError):
arr.take(indices)
indices = pa.array([2, 5])
with pytest.raises(IndexError):
arr.take(indices)

indices = pa.array([2, -1])
with pytest.raises(IndexError):
arr.take(indices)
indices = pa.array([2, -1])
with pytest.raises(IndexError):
arr.take(indices)


def test_take_indices_types():
Expand Down Expand Up @@ -1217,14 +1200,15 @@ def test_take_null_type():
assert len(table.take(indices).column(0)) == 4


@pytest.mark.parametrize(('ty', 'values'), all_array_types)
def test_drop_null(ty, values):
arr = pa.array(values, type=ty)
result = arr.drop_null()
result.validate(full=True)
indices = [i for i in range(len(arr)) if arr[i].is_valid]
expected = arr.take(pa.array(indices))
assert result.equals(expected)
@pytest.mark.numpy
def test_drop_null(all_array_types):
for ty, values in all_array_types:
arr = pa.array(values, type=ty)
result = arr.drop_null()
result.validate(full=True)
indices = [i for i in range(len(arr)) if arr[i].is_valid]
expected = arr.take(pa.array(indices))
assert result.equals(expected)


def test_drop_null_chunked_array():
Expand Down Expand Up @@ -1293,27 +1277,28 @@ def test_drop_null_null_type():
assert len(table.drop_null().column(0)) == 0


@pytest.mark.parametrize(('ty', 'values'), all_array_types)
def test_filter(ty, values):
arr = pa.array(values, type=ty)
@pytest.mark.nump
def test_filter(all_array_types):
for ty, values in all_array_types:
arr = pa.array(values, type=ty)

mask = pa.array([True, False, False, True, None])
result = arr.filter(mask, null_selection_behavior='drop')
result.validate()
assert result.equals(pa.array([values[0], values[3]], type=ty))
result = arr.filter(mask, null_selection_behavior='emit_null')
result.validate()
assert result.equals(pa.array([values[0], values[3], None], type=ty))
mask = pa.array([True, False, False, True, None])
result = arr.filter(mask, null_selection_behavior='drop')
result.validate()
assert result.equals(pa.array([values[0], values[3]], type=ty))
result = arr.filter(mask, null_selection_behavior='emit_null')
result.validate()
assert result.equals(pa.array([values[0], values[3], None], type=ty))

# non-boolean dtype
mask = pa.array([0, 1, 0, 1, 0])
with pytest.raises(NotImplementedError):
arr.filter(mask)
# non-boolean dtype
mask = pa.array([0, 1, 0, 1, 0])
with pytest.raises(NotImplementedError):
arr.filter(mask)

# wrong length
mask = pa.array([True, False, True])
with pytest.raises(ValueError, match="must all be the same length"):
arr.filter(mask)
# wrong length
mask = pa.array([True, False, True])
with pytest.raises(ValueError, match="must all be the same length"):
arr.filter(mask)


def test_filter_chunked_array():
Expand Down
5 changes: 4 additions & 1 deletion python/pyarrow/tests/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@
import contextlib
import decimal
import gc
import numpy as np
try:
import numpy as np
except ImportError:
pass
import os
import random
import re
Expand Down

0 comments on commit bccf733

Please sign in to comment.