From 59383947238b077d090a2146dc88e90696f2107a Mon Sep 17 00:00:00 2001 From: Gal Topper Date: Thu, 24 Jun 2021 12:02:44 +0300 Subject: [PATCH] Allow pyarrow 3.x. Adjust tests. (#250) * Add pyarrow 3 compatibility to tests. * Allow pyarrow 1.x to 3.x. --- integration/test_filesystems_integration.py | 8 ++++++-- requirements.txt | 2 +- tests/test_flow.py | 7 +++++-- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/integration/test_filesystems_integration.py b/integration/test_filesystems_integration.py index 35d009d8..eb347c28 100644 --- a/integration/test_filesystems_integration.py +++ b/integration/test_filesystems_integration.py @@ -205,12 +205,16 @@ def test_write_to_parquet_to_v3io(setup_teardown_test): for i in range(10): controller.emit([i, f'this is {i}']) expected.append([i, f'this is {i}']) - expected = pd.DataFrame(expected, columns=columns, dtype='int32') + expected_in_pyarrow1 = pd.DataFrame(expected, columns=columns) + expected_in_pyarrow3 = expected_in_pyarrow1.copy() + expected_in_pyarrow1['my_int'] = expected_in_pyarrow1['my_int'].astype('int32') + expected_in_pyarrow3['my_int'] = expected_in_pyarrow3['my_int'].astype('category') + controller.terminate() controller.await_termination() read_back_df = pd.read_parquet(out_dir, columns=columns) - assert read_back_df.equals(expected), f"{read_back_df}\n!=\n{expected}" + assert read_back_df.equals(expected_in_pyarrow1) or read_back_df.equals(expected_in_pyarrow3) def test_write_to_parquet_to_v3io_single_file_on_termination(setup_teardown_test): diff --git a/requirements.txt b/requirements.txt index 32646384..062d458f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ aiohttp~=3.6 v3io~=0.5.8 pandas~=1.0 numpy>=1.16.5, <1.20.0 -pyarrow~=1.0 +pyarrow>=1,<4 grpcio-tools~=1.30.0 grpcio~=1.30.0 v3io-frames~=0.8 diff --git a/tests/test_flow.py b/tests/test_flow.py index e1e7c77d..ef3873a5 100644 --- a/tests/test_flow.py +++ b/tests/test_flow.py @@ -1601,12 +1601,15 @@ def test_write_to_parquet(tmpdir): for i in range(10): controller.emit([i, f'this is {i}']) expected.append([i, f'this is {i}']) - expected = pd.DataFrame(expected, columns=columns, dtype='int32') + expected_in_pyarrow1 = pd.DataFrame(expected, columns=columns) + expected_in_pyarrow3 = expected_in_pyarrow1.copy() + expected_in_pyarrow1['my_int'] = expected_in_pyarrow1['my_int'].astype('int32') + expected_in_pyarrow3['my_int'] = expected_in_pyarrow3['my_int'].astype('category') controller.terminate() controller.await_termination() read_back_df = pd.read_parquet(out_dir, columns=columns) - assert read_back_df.equals(expected), f"{read_back_df}\n!=\n{expected}" + assert read_back_df.equals(expected_in_pyarrow1) or read_back_df.equals(expected_in_pyarrow3) def test_write_sparse_data_to_parquet(tmpdir):