Skip to content

Commit

Permalink
Allow pyarrow 3.x. Adjust tests. (#250)
Browse files Browse the repository at this point in the history
* Add pyarrow 3 compatibility to tests.

* Allow pyarrow 1.x to 3.x.
  • Loading branch information
Gal Topper authored Jun 24, 2021
1 parent 1bca512 commit 5938394
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 5 deletions.
8 changes: 6 additions & 2 deletions integration/test_filesystems_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,12 +205,16 @@ def test_write_to_parquet_to_v3io(setup_teardown_test):
for i in range(10):
controller.emit([i, f'this is {i}'])
expected.append([i, f'this is {i}'])
expected = pd.DataFrame(expected, columns=columns, dtype='int32')
expected_in_pyarrow1 = pd.DataFrame(expected, columns=columns)
expected_in_pyarrow3 = expected_in_pyarrow1.copy()
expected_in_pyarrow1['my_int'] = expected_in_pyarrow1['my_int'].astype('int32')
expected_in_pyarrow3['my_int'] = expected_in_pyarrow3['my_int'].astype('category')

controller.terminate()
controller.await_termination()

read_back_df = pd.read_parquet(out_dir, columns=columns)
assert read_back_df.equals(expected), f"{read_back_df}\n!=\n{expected}"
assert read_back_df.equals(expected_in_pyarrow1) or read_back_df.equals(expected_in_pyarrow3)


def test_write_to_parquet_to_v3io_single_file_on_termination(setup_teardown_test):
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ aiohttp~=3.6
v3io~=0.5.8
pandas~=1.0
numpy>=1.16.5, <1.20.0
pyarrow~=1.0
pyarrow>=1,<4
grpcio-tools~=1.30.0
grpcio~=1.30.0
v3io-frames~=0.8
Expand Down
7 changes: 5 additions & 2 deletions tests/test_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -1601,12 +1601,15 @@ def test_write_to_parquet(tmpdir):
for i in range(10):
controller.emit([i, f'this is {i}'])
expected.append([i, f'this is {i}'])
expected = pd.DataFrame(expected, columns=columns, dtype='int32')
expected_in_pyarrow1 = pd.DataFrame(expected, columns=columns)
expected_in_pyarrow3 = expected_in_pyarrow1.copy()
expected_in_pyarrow1['my_int'] = expected_in_pyarrow1['my_int'].astype('int32')
expected_in_pyarrow3['my_int'] = expected_in_pyarrow3['my_int'].astype('category')
controller.terminate()
controller.await_termination()

read_back_df = pd.read_parquet(out_dir, columns=columns)
assert read_back_df.equals(expected), f"{read_back_df}\n!=\n{expected}"
assert read_back_df.equals(expected_in_pyarrow1) or read_back_df.equals(expected_in_pyarrow3)


def test_write_sparse_data_to_parquet(tmpdir):
Expand Down

0 comments on commit 5938394

Please sign in to comment.