From 856c985e6acc28d3e0550e33698c4fc3882dc008 Mon Sep 17 00:00:00 2001
From: Ed Seidl <etseidl@users.noreply.github.com>
Date: Tue, 9 Jan 2024 14:15:28 -0800
Subject: [PATCH 01/23] Split parquet test into multiple files (#14663)

Compiling the parquet C++ tests takes a long time.  This work attempts to address this by splitting the (mostly) monolithic `parquet_test.cpp` into multiple compilation units. On my workstation (10 core i7) I was able to reduce the compile time for PARQUET_TEST from 48s down to 23s.

This PR also splits the testing `base_fixture.hpp` file into three parts to cut down on unnecessary rmm includes. This change resulted in a time savings of around 20% when compiling the entire test suite locally.

Authors:
  - Ed Seidl (https://github.com/etseidl)
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/14663
---
 cpp/include/cudf_test/base_fixture.hpp        |  308 +-
 cpp/include/cudf_test/random.hpp              |  173 +
 cpp/include/cudf_test/testing_main.hpp        |  178 +
 cpp/tests/CMakeLists.txt                      |   10 +-
 cpp/tests/ast/transform_tests.cpp             |    4 +-
 cpp/tests/binaryop/binop-compiled-test.cpp    |    3 +-
 cpp/tests/binaryop/binop-fixture.hpp          |    3 +-
 cpp/tests/bitmask/bitmask_tests.cpp           |    4 +-
 cpp/tests/column/column_test.cpp              |    3 +-
 cpp/tests/copying/concatenate_tests.cpp       |    3 +-
 cpp/tests/copying/gather_tests.cpp            |    3 +-
 cpp/tests/copying/utility_tests.cpp           |    3 +-
 cpp/tests/datetime/datetime_ops_test.cpp      |    3 +-
 .../device_atomics/device_atomics_test.cu     |    4 +-
 cpp/tests/dictionary/add_keys_test.cpp        |    3 +-
 cpp/tests/encode/encode_tests.cpp             |    3 +-
 cpp/tests/error/error_handling_test.cu        |    3 +-
 cpp/tests/filling/fill_tests.cpp              |    3 +-
 cpp/tests/filling/repeat_tests.cpp            |    3 +-
 cpp/tests/fixed_point/fixed_point_tests.cpp   |    3 +-
 cpp/tests/groupby/argmax_tests.cpp            |    3 +-
 cpp/tests/hash_map/map_test.cu                |    3 +-
 cpp/tests/hashing/murmurhash3_x86_32_test.cpp |    3 +-
 cpp/tests/interop/to_arrow_test.cpp           |    3 +-
 cpp/tests/io/arrow_io_source_test.cpp         |    3 +-
 cpp/tests/io/comp/decomp_test.cpp             |    1 +
 cpp/tests/io/csv_test.cpp                     |    4 +-
 cpp/tests/io/file_io_test.cpp                 |    3 +-
 cpp/tests/io/fst/fst_test.cu                  |    3 +-
 cpp/tests/io/fst/logical_stack_test.cu        |    3 +-
 cpp/tests/io/fst/quote_normalization_test.cu  |    1 +
 cpp/tests/io/json_test.cpp                    |    4 +-
 cpp/tests/io/json_tree.cpp                    |    4 +-
 cpp/tests/io/json_type_cast_test.cu           |    3 +-
 cpp/tests/io/json_writer.cpp                  |    3 +-
 cpp/tests/io/nested_json_test.cpp             |    4 +-
 cpp/tests/io/orc_test.cpp                     |    2 +
 cpp/tests/io/parquet_chunked_writer_test.cpp  |  855 ++
 cpp/tests/io/parquet_common.cpp               |  798 ++
 cpp/tests/io/parquet_common.hpp               |  264 +
 cpp/tests/io/parquet_misc_test.cpp            |  235 +
 cpp/tests/io/parquet_reader_test.cpp          | 2340 ++++++
 cpp/tests/io/parquet_test.cpp                 | 7292 +----------------
 cpp/tests/io/parquet_v2_test.cpp              | 1528 ++++
 cpp/tests/io/parquet_writer_test.cpp          | 1720 ++++
 cpp/tests/io/row_selection_test.cpp           |    3 +-
 cpp/tests/io/text/data_chunk_source_test.cpp  |    3 +-
 cpp/tests/io/text/multibyte_split_test.cpp    |    3 +-
 cpp/tests/io/type_inference_test.cu           |    3 +-
 .../optional_iterator_test_numeric.cu         |    4 +-
 .../iterator/pair_iterator_test_numeric.cu    |    4 +-
 cpp/tests/iterator/scalar_iterator_test.cu    |    4 +-
 cpp/tests/iterator/value_iterator.cpp         |    3 +-
 .../iterator/value_iterator_test_transform.cu |    4 +-
 cpp/tests/jit/parse_ptx_function.cpp          |    3 +-
 cpp/tests/join/join_tests.cpp                 |    3 +-
 cpp/tests/labeling/label_bins_tests.cpp       |    3 +-
 cpp/tests/lists/extract_tests.cpp             |    3 +-
 cpp/tests/merge/merge_test.cpp                |    3 +-
 .../partitioning/hash_partition_test.cpp      |    3 +-
 cpp/tests/quantiles/quantile_test.cpp         |    3 +-
 cpp/tests/reductions/reduction_tests.cpp      |    3 +-
 cpp/tests/replace/clamp_test.cpp              |    3 +-
 cpp/tests/replace/normalize_replace_tests.cpp |    3 +-
 cpp/tests/replace/replace_nans_tests.cpp      |    3 +-
 cpp/tests/replace/replace_nulls_tests.cpp     |    3 +-
 cpp/tests/replace/replace_tests.cpp           |    3 +-
 .../reshape/interleave_columns_tests.cpp      |    3 +-
 cpp/tests/rolling/rolling_test.cpp            |    4 +-
 cpp/tests/round/round_tests.cpp               |    3 +-
 cpp/tests/scalar/factories_test.cpp           |    3 +-
 cpp/tests/scalar/scalar_test.cpp              |    3 +-
 cpp/tests/search/search_test.cpp              |    3 +-
 cpp/tests/sort/is_sorted_tests.cpp            |    3 +-
 cpp/tests/sort/sort_test.cpp                  |    3 +-
 .../apply_boolean_mask_tests.cpp              |    4 +-
 cpp/tests/strings/array_tests.cpp             |    3 +-
 cpp/tests/structs/structs_column_tests.cpp    |    3 +-
 cpp/tests/table/table_tests.cpp               |    3 +-
 cpp/tests/text/ngrams_tests.cpp               |    3 +-
 cpp/tests/transform/bools_to_mask_test.cpp    |    3 +-
 .../integration/unary_transform_test.cpp      |    3 +-
 cpp/tests/transform/row_conversion.cpp        |    3 +-
 cpp/tests/transpose/transpose_test.cpp        |    3 +-
 cpp/tests/types/traits_test.cpp               |    3 +-
 cpp/tests/types/type_dispatcher_test.cu       |    3 +-
 cpp/tests/unary/unary_ops_test.cpp            |    3 +-
 .../column_utilities_tests.cpp                |    4 +-
 .../utilities_tests/column_wrapper_tests.cpp  |    3 +-
 cpp/tests/utilities_tests/span_tests.cu       |    3 +-
 cpp/tests/wrappers/timestamps_test.cu         |    3 +-
 91 files changed, 8282 insertions(+), 7665 deletions(-)
 create mode 100644 cpp/include/cudf_test/random.hpp
 create mode 100644 cpp/include/cudf_test/testing_main.hpp
 create mode 100644 cpp/tests/io/parquet_chunked_writer_test.cpp
 create mode 100644 cpp/tests/io/parquet_common.cpp
 create mode 100644 cpp/tests/io/parquet_common.hpp
 create mode 100644 cpp/tests/io/parquet_misc_test.cpp
 create mode 100644 cpp/tests/io/parquet_reader_test.cpp
 create mode 100644 cpp/tests/io/parquet_v2_test.cpp
 create mode 100644 cpp/tests/io/parquet_writer_test.cpp

diff --git a/cpp/include/cudf_test/base_fixture.hpp b/cpp/include/cudf_test/base_fixture.hpp
index 06aabbe4e9c..14b94e061ae 100644
--- a/cpp/include/cudf_test/base_fixture.hpp
+++ b/cpp/include/cudf_test/base_fixture.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,29 +16,17 @@
 
 #pragma once
 
-#include <random>
-
-#include <cudf/utilities/default_stream.hpp>
-#include <cudf/utilities/error.hpp>
-#include <cudf/utilities/traits.hpp>
 #include <cudf_test/cudf_gtest.hpp>
-#include <cudf_test/cxxopts.hpp>
-#include <cudf_test/default_stream.hpp>
 #include <cudf_test/file_utilities.hpp>
-#include <cudf_test/stream_checking_resource_adaptor.hpp>
 
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/mr/device/arena_memory_resource.hpp>
-#include <rmm/mr/device/binning_memory_resource.hpp>
-#include <rmm/mr/device/cuda_async_memory_resource.hpp>
-#include <rmm/mr/device/cuda_memory_resource.hpp>
-#include <rmm/mr/device/managed_memory_resource.hpp>
-#include <rmm/mr/device/owning_wrapper.hpp>
+#include <cudf/utilities/traits.hpp>
+
+#include <rmm/mr/device/device_memory_resource.hpp>
 #include <rmm/mr/device/per_device_resource.hpp>
-#include <rmm/mr/device/pool_memory_resource.hpp>
 
 namespace cudf {
 namespace test {
+
 /**
  * @brief Base test fixture class from which all libcudf tests should inherit.
  *
@@ -80,152 +68,6 @@ class BaseFixtureWithParam : public ::testing::TestWithParam<T> {
   rmm::mr::device_memory_resource* mr() const { return _mr; }
 };
 
-template <typename T, typename Enable = void>
-struct uniform_distribution_impl {};
-template <typename T>
-struct uniform_distribution_impl<T, std::enable_if_t<std::is_integral_v<T>>> {
-  using type = std::uniform_int_distribution<T>;
-};
-
-template <>
-struct uniform_distribution_impl<bool> {
-  using type = std::bernoulli_distribution;
-};
-
-template <typename T>
-struct uniform_distribution_impl<T, std::enable_if_t<std::is_floating_point_v<T>>> {
-  using type = std::uniform_real_distribution<T>;
-};
-
-template <typename T>
-struct uniform_distribution_impl<
-  T,
-  std::enable_if_t<cudf::is_chrono<T>() or cudf::is_fixed_point<T>()>> {
-  using type = std::uniform_int_distribution<typename T::rep>;
-};
-
-template <typename T>
-using uniform_distribution_t = typename uniform_distribution_impl<T>::type;
-
-namespace detail {
-
-/**
- * @brief Returns an incrementing seed value for use with UniformRandomGenerator.
- *
- *  The intent behind this is to handle the following case:
- *
- * auto lhs = make_random_wrapped_column<TypeLhs>(10000);
- * auto rhs = make_random_wrapped_column<TypeRhs>(10000);
- *
- * Previously, the binops test framework had a persistent UniformRandomGenerator
- * that would produce unique values across two calls to make_random_wrapped_column()
- * like this.  However that code has been changed and each call to make_random_wrapped_column()
- * now uses a local UniformRandomGenerator object.  If we didn't generate an incrementing seed
- * for each one, every call to make_random_wrapped_column() would return the same values. This
- * fixes that case and also leaves results across multiple test runs deterministic.
- */
-uint64_t random_generator_incrementing_seed();
-
-}  // namespace detail
-
-/**
- * @brief Provides uniform random number generation.
- *
- * It is often useful in testing to have a convenient source of random numbers.
- * This class is intended to serve as a base class for test fixtures to provide
- * random number generation. `UniformRandomGenerator::generate()` will generate
- * the next random number in the sequence.
- *
- * Example:
- * ```c++
- * UniformRandomGenerator g(0,100);
- * g.generate(); // Returns a random number in the range [0,100]
- * ```
- *
- * @tparam T The type of values that will be generated.
- */
-template <typename T = cudf::size_type, typename Engine = std::default_random_engine>
-class UniformRandomGenerator {
- public:
-  using uniform_distribution = uniform_distribution_t<T>;  ///< The uniform distribution type for T.
-
-  UniformRandomGenerator() : rng{std::mt19937_64{detail::random_generator_incrementing_seed()}()} {}
-
-  /**
-   * @brief Construct a new Uniform Random Generator to generate uniformly
-   * random numbers in the range `[upper,lower]`
-   *
-   * @param lower Lower bound of the range
-   * @param upper Upper bound of the desired range
-   * @param seed  seed to initialize generator with
-   */
-  template <typename TL                                                          = T,
-            std::enable_if_t<cudf::is_numeric<TL>() && !cudf::is_boolean<TL>()>* = nullptr>
-  UniformRandomGenerator(T lower,
-                         T upper,
-                         uint64_t seed = detail::random_generator_incrementing_seed())
-    : dist{lower, upper}, rng{std::mt19937_64{seed}()}
-  {
-  }
-
-  /**
-   * @brief Construct a new Uniform Random Generator to generate uniformly random booleans
-   *
-   * @param lower ignored
-   * @param upper ignored
-   * @param seed  seed to initialize generator with
-   */
-  template <typename TL = T, std::enable_if_t<cudf::is_boolean<TL>()>* = nullptr>
-  UniformRandomGenerator(T lower,
-                         T upper,
-                         uint64_t seed = detail::random_generator_incrementing_seed())
-    : dist{0.5}, rng{std::mt19937_64{seed}()}
-  {
-  }
-
-  /**
-   * @brief Construct a new Uniform Random Generator to generate uniformly
-   * random numbers in the range `[upper,lower]`
-   *
-   * @param lower Lower bound of the range
-   * @param upper Upper bound of the desired range
-   * @param seed  seed to initialize generator with
-   */
-  template <typename TL                                                            = T,
-            std::enable_if_t<cudf::is_chrono<TL>() or cudf::is_fixed_point<TL>()>* = nullptr>
-  UniformRandomGenerator(typename TL::rep lower,
-                         typename TL::rep upper,
-                         uint64_t seed = detail::random_generator_incrementing_seed())
-    : dist{lower, upper}, rng{std::mt19937_64{seed}()}
-  {
-  }
-
-  /**
-   * @brief Returns the next random number.
-   *
-   * @return generated random number
-   */
-  template <typename TL = T, std::enable_if_t<!cudf::is_timestamp<TL>()>* = nullptr>
-  T generate()
-  {
-    return T{dist(rng)};
-  }
-
-  /**
-   * @brief Returns the next random number.
-   * @return generated random number
-   */
-  template <typename TL = T, std::enable_if_t<cudf::is_timestamp<TL>()>* = nullptr>
-  T generate()
-  {
-    return T{typename T::duration{dist(rng)}};
-  }
-
- private:
-  uniform_distribution dist{};  ///< Distribution
-  Engine rng;                   ///< Random generator
-};
-
 /**
  * @brief Provides temporary directory for temporary test files.
  *
@@ -255,145 +97,5 @@ class TempDirTestEnvironment : public ::testing::Environment {
   std::string get_temp_filepath(std::string filename) { return tmpdir.path() + filename; }
 };
 
-/// MR factory functions
-inline auto make_cuda() { return std::make_shared<rmm::mr::cuda_memory_resource>(); }
-
-inline auto make_async() { return std::make_shared<rmm::mr::cuda_async_memory_resource>(); }
-
-inline auto make_managed() { return std::make_shared<rmm::mr::managed_memory_resource>(); }
-
-inline auto make_pool()
-{
-  auto const [free, total] = rmm::detail::available_device_memory();
-  auto min_alloc =
-    rmm::detail::align_down(std::min(free, total / 10), rmm::detail::CUDA_ALLOCATION_ALIGNMENT);
-  return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(make_cuda(), min_alloc);
-}
-
-inline auto make_arena()
-{
-  return rmm::mr::make_owning_wrapper<rmm::mr::arena_memory_resource>(make_cuda());
-}
-
-inline auto make_binning()
-{
-  auto pool = make_pool();
-  // Add a binning_memory_resource with fixed-size bins of sizes 256, 512, 1024, 2048 and 4096KiB
-  // Larger allocations will use the pool resource
-  auto mr = rmm::mr::make_owning_wrapper<rmm::mr::binning_memory_resource>(pool, 18, 22);
-  return mr;
-}
-
-/**
- * @brief Creates a memory resource for the unit test environment
- * given the name of the allocation mode.
- *
- * The returned resource instance must be kept alive for the duration of
- * the tests. Attaching the resource to a TestEnvironment causes
- * issues since the environment objects are not destroyed until
- * after the runtime is shutdown.
- *
- * @throw cudf::logic_error if the `allocation_mode` is unsupported.
- *
- * @param allocation_mode String identifies which resource type.
- *        Accepted types are "pool", "cuda", and "managed" only.
- * @return Memory resource instance
- */
-inline std::shared_ptr<rmm::mr::device_memory_resource> create_memory_resource(
-  std::string const& allocation_mode)
-{
-  if (allocation_mode == "binning") return make_binning();
-  if (allocation_mode == "cuda") return make_cuda();
-  if (allocation_mode == "async") return make_async();
-  if (allocation_mode == "pool") return make_pool();
-  if (allocation_mode == "arena") return make_arena();
-  if (allocation_mode == "managed") return make_managed();
-  CUDF_FAIL("Invalid RMM allocation mode: " + allocation_mode);
-}
-
 }  // namespace test
 }  // namespace cudf
-
-/**
- * @brief Parses the cuDF test command line options.
- *
- * Currently only supports 'rmm_mode' string parameter, which set the rmm
- * allocation mode. The default value of the parameter is 'pool'.
- * Environment variable 'CUDF_TEST_RMM_MODE' can also be used to set the rmm
- * allocation mode. If both are set, the value of 'rmm_mode' string parameter
- * takes precedence.
- *
- * @return Parsing results in the form of unordered map
- */
-inline auto parse_cudf_test_opts(int argc, char** argv)
-{
-  try {
-    cxxopts::Options options(argv[0], " - cuDF tests command line options");
-    char const* env_rmm_mode = std::getenv("GTEST_CUDF_RMM_MODE");  // Overridden by CLI options
-    char const* env_stream_mode =
-      std::getenv("GTEST_CUDF_STREAM_MODE");  // Overridden by CLI options
-    char const* env_stream_error_mode =
-      std::getenv("GTEST_CUDF_STREAM_ERROR_MODE");  // Overridden by CLI options
-    auto default_rmm_mode          = env_rmm_mode ? env_rmm_mode : "pool";
-    auto default_stream_mode       = env_stream_mode ? env_stream_mode : "default";
-    auto default_stream_error_mode = env_stream_error_mode ? env_stream_error_mode : "error";
-    options.allow_unrecognised_options().add_options()(
-      "rmm_mode",
-      "RMM allocation mode",
-      cxxopts::value<std::string>()->default_value(default_rmm_mode));
-    // `new_cudf_default` means that cudf::get_default_stream has been patched,
-    // so we raise errors anywhere that a CUDA default stream is observed
-    // instead of cudf::get_default_stream(). This corresponds to compiling
-    // identify_stream_usage with STREAM_MODE_TESTING=OFF (must do both at the
-    // same time).
-    // `new_testing_default` means that cudf::test::get_default_stream has been
-    // patched, so we raise errors anywhere that _any_ other stream is
-    // observed. This corresponds to compiling identify_stream_usage with
-    // STREAM_MODE_TESTING=ON (must do both at the same time).
-    options.allow_unrecognised_options().add_options()(
-      "stream_mode",
-      "Whether to use a non-default stream",
-      cxxopts::value<std::string>()->default_value(default_stream_mode));
-    options.allow_unrecognised_options().add_options()(
-      "stream_error_mode",
-      "Whether to error or print to stdout when a non-default stream is observed and stream_mode "
-      "is not \"default\"",
-      cxxopts::value<std::string>()->default_value(default_stream_error_mode));
-    return options.parse(argc, argv);
-  } catch (cxxopts::OptionException const& e) {
-    CUDF_FAIL("Error parsing command line options");
-  }
-}
-
-/**
- * @brief Macro that defines main function for gtest programs that use rmm
- *
- * Should be included in every test program that uses rmm allocators since
- * it maintains the lifespan of the rmm default memory resource.
- * This `main` function is a wrapper around the google test generated `main`,
- * maintaining the original functionality. In addition, this custom `main`
- * function parses the command line to customize test behavior, like the
- * allocation mode used for creating the default memory resource.
- */
-#define CUDF_TEST_PROGRAM_MAIN()                                                              \
-  int main(int argc, char** argv)                                                             \
-  {                                                                                           \
-    ::testing::InitGoogleTest(&argc, argv);                                                   \
-    auto const cmd_opts = parse_cudf_test_opts(argc, argv);                                   \
-    auto const rmm_mode = cmd_opts["rmm_mode"].as<std::string>();                             \
-    auto resource       = cudf::test::create_memory_resource(rmm_mode);                       \
-    rmm::mr::set_current_device_resource(resource.get());                                     \
-                                                                                              \
-    auto const stream_mode = cmd_opts["stream_mode"].as<std::string>();                       \
-    if ((stream_mode == "new_cudf_default") || (stream_mode == "new_testing_default")) {      \
-      auto const stream_error_mode       = cmd_opts["stream_error_mode"].as<std::string>();   \
-      auto const error_on_invalid_stream = (stream_error_mode == "error");                    \
-      auto const check_default_stream    = (stream_mode == "new_cudf_default");               \
-      auto adaptor                       = make_stream_checking_resource_adaptor(             \
-        resource.get(), error_on_invalid_stream, check_default_stream); \
-      rmm::mr::set_current_device_resource(&adaptor);                                         \
-      return RUN_ALL_TESTS();                                                                 \
-    }                                                                                         \
-                                                                                              \
-    return RUN_ALL_TESTS();                                                                   \
-  }
diff --git a/cpp/include/cudf_test/random.hpp b/cpp/include/cudf_test/random.hpp
new file mode 100644
index 00000000000..498bacc81c9
--- /dev/null
+++ b/cpp/include/cudf_test/random.hpp
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <random>
+
+#include <cudf/utilities/traits.hpp>
+
+namespace cudf {
+namespace test {
+
+template <typename T, typename Enable = void>
+struct uniform_distribution_impl {};
+template <typename T>
+struct uniform_distribution_impl<T, std::enable_if_t<std::is_integral_v<T>>> {
+  using type = std::uniform_int_distribution<T>;
+};
+
+template <>
+struct uniform_distribution_impl<bool> {
+  using type = std::bernoulli_distribution;
+};
+
+template <typename T>
+struct uniform_distribution_impl<T, std::enable_if_t<std::is_floating_point_v<T>>> {
+  using type = std::uniform_real_distribution<T>;
+};
+
+template <typename T>
+struct uniform_distribution_impl<
+  T,
+  std::enable_if_t<cudf::is_chrono<T>() or cudf::is_fixed_point<T>()>> {
+  using type = std::uniform_int_distribution<typename T::rep>;
+};
+
+template <typename T>
+using uniform_distribution_t = typename uniform_distribution_impl<T>::type;
+
+namespace detail {
+
+/**
+ * @brief Returns an incrementing seed value for use with UniformRandomGenerator.
+ *
+ *  The intent behind this is to handle the following case:
+ *
+ * auto lhs = make_random_wrapped_column<TypeLhs>(10000);
+ * auto rhs = make_random_wrapped_column<TypeRhs>(10000);
+ *
+ * Previously, the binops test framework had a persistent UniformRandomGenerator
+ * that would produce unique values across two calls to make_random_wrapped_column()
+ * like this.  However that code has been changed and each call to make_random_wrapped_column()
+ * now uses a local UniformRandomGenerator object.  If we didn't generate an incrementing seed
+ * for each one, every call to make_random_wrapped_column() would return the same values. This
+ * fixes that case and also leaves results across multiple test runs deterministic.
+ */
+uint64_t random_generator_incrementing_seed();
+
+}  // namespace detail
+
+/**
+ * @brief Provides uniform random number generation.
+ *
+ * It is often useful in testing to have a convenient source of random numbers.
+ * This class is intended to serve as a base class for test fixtures to provide
+ * random number generation. `UniformRandomGenerator::generate()` will generate
+ * the next random number in the sequence.
+ *
+ * Example:
+ * ```c++
+ * UniformRandomGenerator g(0,100);
+ * g.generate(); // Returns a random number in the range [0,100]
+ * ```
+ *
+ * @tparam T The type of values that will be generated.
+ */
+template <typename T = cudf::size_type, typename Engine = std::default_random_engine>
+class UniformRandomGenerator {
+ public:
+  using uniform_distribution = uniform_distribution_t<T>;  ///< The uniform distribution type for T.
+
+  UniformRandomGenerator() : rng{std::mt19937_64{detail::random_generator_incrementing_seed()}()} {}
+
+  /**
+   * @brief Construct a new Uniform Random Generator to generate uniformly
+   * random numbers in the range `[upper,lower]`
+   *
+   * @param lower Lower bound of the range
+   * @param upper Upper bound of the desired range
+   * @param seed  seed to initialize generator with
+   */
+  template <typename TL                                                          = T,
+            std::enable_if_t<cudf::is_numeric<TL>() && !cudf::is_boolean<TL>()>* = nullptr>
+  UniformRandomGenerator(T lower,
+                         T upper,
+                         uint64_t seed = detail::random_generator_incrementing_seed())
+    : dist{lower, upper}, rng{std::mt19937_64{seed}()}
+  {
+  }
+
+  /**
+   * @brief Construct a new Uniform Random Generator to generate uniformly random booleans
+   *
+   * @param lower ignored
+   * @param upper ignored
+   * @param seed  seed to initialize generator with
+   */
+  template <typename TL = T, std::enable_if_t<cudf::is_boolean<TL>()>* = nullptr>
+  UniformRandomGenerator(T lower,
+                         T upper,
+                         uint64_t seed = detail::random_generator_incrementing_seed())
+    : dist{0.5}, rng{std::mt19937_64{seed}()}
+  {
+  }
+
+  /**
+   * @brief Construct a new Uniform Random Generator to generate uniformly
+   * random numbers in the range `[upper,lower]`
+   *
+   * @param lower Lower bound of the range
+   * @param upper Upper bound of the desired range
+   * @param seed  seed to initialize generator with
+   */
+  template <typename TL                                                            = T,
+            std::enable_if_t<cudf::is_chrono<TL>() or cudf::is_fixed_point<TL>()>* = nullptr>
+  UniformRandomGenerator(typename TL::rep lower,
+                         typename TL::rep upper,
+                         uint64_t seed = detail::random_generator_incrementing_seed())
+    : dist{lower, upper}, rng{std::mt19937_64{seed}()}
+  {
+  }
+
+  /**
+   * @brief Returns the next random number.
+   *
+   * @return generated random number
+   */
+  template <typename TL = T, std::enable_if_t<!cudf::is_timestamp<TL>()>* = nullptr>
+  T generate()
+  {
+    return T{dist(rng)};
+  }
+
+  /**
+   * @brief Returns the next random number.
+   * @return generated random number
+   */
+  template <typename TL = T, std::enable_if_t<cudf::is_timestamp<TL>()>* = nullptr>
+  T generate()
+  {
+    return T{typename T::duration{dist(rng)}};
+  }
+
+ private:
+  uniform_distribution dist{};  ///< Distribution
+  Engine rng;                   ///< Random generator
+};
+
+}  // namespace test
+}  // namespace cudf
diff --git a/cpp/include/cudf_test/testing_main.hpp b/cpp/include/cudf_test/testing_main.hpp
new file mode 100644
index 00000000000..12dbb4c7851
--- /dev/null
+++ b/cpp/include/cudf_test/testing_main.hpp
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf_test/cxxopts.hpp>
+#include <cudf_test/stream_checking_resource_adaptor.hpp>
+
+#include <cudf/utilities/error.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/mr/device/arena_memory_resource.hpp>
+#include <rmm/mr/device/binning_memory_resource.hpp>
+#include <rmm/mr/device/cuda_async_memory_resource.hpp>
+#include <rmm/mr/device/cuda_memory_resource.hpp>
+#include <rmm/mr/device/managed_memory_resource.hpp>
+#include <rmm/mr/device/owning_wrapper.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
+#include <rmm/mr/device/pool_memory_resource.hpp>
+
+namespace cudf {
+namespace test {
+
+/// MR factory functions
+inline auto make_cuda() { return std::make_shared<rmm::mr::cuda_memory_resource>(); }
+
+inline auto make_async() { return std::make_shared<rmm::mr::cuda_async_memory_resource>(); }
+
+inline auto make_managed() { return std::make_shared<rmm::mr::managed_memory_resource>(); }
+
+inline auto make_pool()
+{
+  auto const [free, total] = rmm::detail::available_device_memory();
+  auto min_alloc =
+    rmm::detail::align_down(std::min(free, total / 10), rmm::detail::CUDA_ALLOCATION_ALIGNMENT);
+  return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(make_cuda(), min_alloc);
+}
+
+inline auto make_arena()
+{
+  return rmm::mr::make_owning_wrapper<rmm::mr::arena_memory_resource>(make_cuda());
+}
+
+inline auto make_binning()
+{
+  auto pool = make_pool();
+  // Add a binning_memory_resource with fixed-size bins of sizes 256, 512, 1024, 2048 and 4096KiB
+  // Larger allocations will use the pool resource
+  auto mr = rmm::mr::make_owning_wrapper<rmm::mr::binning_memory_resource>(pool, 18, 22);
+  return mr;
+}
+
+/**
+ * @brief Creates a memory resource for the unit test environment
+ * given the name of the allocation mode.
+ *
+ * The returned resource instance must be kept alive for the duration of
+ * the tests. Attaching the resource to a TestEnvironment causes
+ * issues since the environment objects are not destroyed until
+ * after the runtime is shutdown.
+ *
+ * @throw cudf::logic_error if the `allocation_mode` is unsupported.
+ *
+ * @param allocation_mode String identifies which resource type.
+ *        Accepted types are "pool", "cuda", and "managed" only.
+ * @return Memory resource instance
+ */
+inline std::shared_ptr<rmm::mr::device_memory_resource> create_memory_resource(
+  std::string const& allocation_mode)
+{
+  if (allocation_mode == "binning") return make_binning();
+  if (allocation_mode == "cuda") return make_cuda();
+  if (allocation_mode == "async") return make_async();
+  if (allocation_mode == "pool") return make_pool();
+  if (allocation_mode == "arena") return make_arena();
+  if (allocation_mode == "managed") return make_managed();
+  CUDF_FAIL("Invalid RMM allocation mode: " + allocation_mode);
+}
+
+}  // namespace test
+}  // namespace cudf
+
+/**
+ * @brief Parses the cuDF test command line options.
+ *
+ * Currently only supports 'rmm_mode' string parameter, which set the rmm
+ * allocation mode. The default value of the parameter is 'pool'.
+ * Environment variable 'CUDF_TEST_RMM_MODE' can also be used to set the rmm
+ * allocation mode. If both are set, the value of 'rmm_mode' string parameter
+ * takes precedence.
+ *
+ * @return Parsing results in the form of unordered map
+ */
+inline auto parse_cudf_test_opts(int argc, char** argv)
+{
+  try {
+    cxxopts::Options options(argv[0], " - cuDF tests command line options");
+    char const* env_rmm_mode = std::getenv("GTEST_CUDF_RMM_MODE");  // Overridden by CLI options
+    char const* env_stream_mode =
+      std::getenv("GTEST_CUDF_STREAM_MODE");  // Overridden by CLI options
+    char const* env_stream_error_mode =
+      std::getenv("GTEST_CUDF_STREAM_ERROR_MODE");  // Overridden by CLI options
+    auto default_rmm_mode          = env_rmm_mode ? env_rmm_mode : "pool";
+    auto default_stream_mode       = env_stream_mode ? env_stream_mode : "default";
+    auto default_stream_error_mode = env_stream_error_mode ? env_stream_error_mode : "error";
+    options.allow_unrecognised_options().add_options()(
+      "rmm_mode",
+      "RMM allocation mode",
+      cxxopts::value<std::string>()->default_value(default_rmm_mode));
+    // `new_cudf_default` means that cudf::get_default_stream has been patched,
+    // so we raise errors anywhere that a CUDA default stream is observed
+    // instead of cudf::get_default_stream(). This corresponds to compiling
+    // identify_stream_usage with STREAM_MODE_TESTING=OFF (must do both at the
+    // same time).
+    // `new_testing_default` means that cudf::test::get_default_stream has been
+    // patched, so we raise errors anywhere that _any_ other stream is
+    // observed. This corresponds to compiling identify_stream_usage with
+    // STREAM_MODE_TESTING=ON (must do both at the same time).
+    options.allow_unrecognised_options().add_options()(
+      "stream_mode",
+      "Whether to use a non-default stream",
+      cxxopts::value<std::string>()->default_value(default_stream_mode));
+    options.allow_unrecognised_options().add_options()(
+      "stream_error_mode",
+      "Whether to error or print to stdout when a non-default stream is observed and stream_mode "
+      "is not \"default\"",
+      cxxopts::value<std::string>()->default_value(default_stream_error_mode));
+    return options.parse(argc, argv);
+  } catch (cxxopts::OptionException const& e) {
+    CUDF_FAIL("Error parsing command line options");
+  }
+}
+
+/**
+ * @brief Macro that defines main function for gtest programs that use rmm
+ *
+ * Should be included in every test program that uses rmm allocators since
+ * it maintains the lifespan of the rmm default memory resource.
+ * This `main` function is a wrapper around the google test generated `main`,
+ * maintaining the original functionality. In addition, this custom `main`
+ * function parses the command line to customize test behavior, like the
+ * allocation mode used for creating the default memory resource.
+ */
+#define CUDF_TEST_PROGRAM_MAIN()                                                              \
+  int main(int argc, char** argv)                                                             \
+  {                                                                                           \
+    ::testing::InitGoogleTest(&argc, argv);                                                   \
+    auto const cmd_opts = parse_cudf_test_opts(argc, argv);                                   \
+    auto const rmm_mode = cmd_opts["rmm_mode"].as<std::string>();                             \
+    auto resource       = cudf::test::create_memory_resource(rmm_mode);                       \
+    rmm::mr::set_current_device_resource(resource.get());                                     \
+                                                                                              \
+    auto const stream_mode = cmd_opts["stream_mode"].as<std::string>();                       \
+    if ((stream_mode == "new_cudf_default") || (stream_mode == "new_testing_default")) {      \
+      auto const stream_error_mode       = cmd_opts["stream_error_mode"].as<std::string>();   \
+      auto const error_on_invalid_stream = (stream_error_mode == "error");                    \
+      auto const check_default_stream    = (stream_mode == "new_cudf_default");               \
+      auto adaptor                       = make_stream_checking_resource_adaptor(             \
+        resource.get(), error_on_invalid_stream, check_default_stream); \
+      rmm::mr::set_current_device_resource(&adaptor);                                         \
+      return RUN_ALL_TESTS();                                                                 \
+    }                                                                                         \
+                                                                                              \
+    return RUN_ALL_TESTS();                                                                   \
+  }
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index d0abcc225d1..48bc4ac6fc1 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -286,7 +286,15 @@ ConfigureTest(
   PERCENT 30
 )
 ConfigureTest(
-  PARQUET_TEST io/parquet_test.cpp io/parquet_chunked_reader_test.cpp
+  PARQUET_TEST
+  io/parquet_test.cpp
+  io/parquet_chunked_reader_test.cpp
+  io/parquet_chunked_writer_test.cpp
+  io/parquet_common.cpp
+  io/parquet_misc_test.cpp
+  io/parquet_reader_test.cpp
+  io/parquet_writer_test.cpp
+  io/parquet_v2_test.cpp
   GPUS 1
   PERCENT 30
 )
diff --git a/cpp/tests/ast/transform_tests.cpp b/cpp/tests/ast/transform_tests.cpp
index 0476cb17693..01842969268 100644
--- a/cpp/tests/ast/transform_tests.cpp
+++ b/cpp/tests/ast/transform_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -31,6 +31,7 @@
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/iterator_utilities.hpp>
 #include <cudf_test/table_utilities.hpp>
+#include <cudf_test/testing_main.hpp>
 
 #include <rmm/device_uvector.hpp>
 
@@ -38,6 +39,7 @@
 
 #include <algorithm>
 #include <limits>
+#include <list>
 #include <random>
 #include <type_traits>
 #include <vector>
diff --git a/cpp/tests/binaryop/binop-compiled-test.cpp b/cpp/tests/binaryop/binop-compiled-test.cpp
index 3c5adafc894..27865bd062f 100644
--- a/cpp/tests/binaryop/binop-compiled-test.cpp
+++ b/cpp/tests/binaryop/binop-compiled-test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,6 +20,7 @@
 
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <cudf/binaryop.hpp>
diff --git a/cpp/tests/binaryop/binop-fixture.hpp b/cpp/tests/binaryop/binop-fixture.hpp
index bc3820da822..68e8b0f6fc6 100644
--- a/cpp/tests/binaryop/binop-fixture.hpp
+++ b/cpp/tests/binaryop/binop-fixture.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Copyright 2018-2019 BlazingDB, Inc.
  *     Copyright 2018 Christian Noboa Mardini <christian@blazingdb.com>
@@ -21,6 +21,7 @@
 
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/random.hpp>
 
 #include <cudf/detail/iterator.cuh>
 
diff --git a/cpp/tests/bitmask/bitmask_tests.cpp b/cpp/tests/bitmask/bitmask_tests.cpp
index ab008b51b51..1dd39c1c7ae 100644
--- a/cpp/tests/bitmask/bitmask_tests.cpp
+++ b/cpp/tests/bitmask/bitmask_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -25,6 +25,8 @@
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/cudf_gtest.hpp>
+#include <cudf_test/random.hpp>
+#include <cudf_test/testing_main.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_buffer.hpp>
diff --git a/cpp/tests/column/column_test.cpp b/cpp/tests/column/column_test.cpp
index b278e4928e5..1ba9b14dc1f 100644
--- a/cpp/tests/column/column_test.cpp
+++ b/cpp/tests/column/column_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/cudf_gtest.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_list_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
 
diff --git a/cpp/tests/copying/concatenate_tests.cpp b/cpp/tests/copying/concatenate_tests.cpp
index b8faa0bd081..06fb687ac2d 100644
--- a/cpp/tests/copying/concatenate_tests.cpp
+++ b/cpp/tests/copying/concatenate_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/random.hpp>
 #include <cudf_test/table_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
 
diff --git a/cpp/tests/copying/gather_tests.cpp b/cpp/tests/copying/gather_tests.cpp
index 3120b737427..284b6c4c50c 100644
--- a/cpp/tests/copying/gather_tests.cpp
+++ b/cpp/tests/copying/gather_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/cudf_gtest.hpp>
+#include <cudf_test/random.hpp>
 #include <cudf_test/table_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
 
diff --git a/cpp/tests/copying/utility_tests.cpp b/cpp/tests/copying/utility_tests.cpp
index dadb8ea4eb8..f69bea2834f 100644
--- a/cpp/tests/copying/utility_tests.cpp
+++ b/cpp/tests/copying/utility_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -23,6 +23,7 @@
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/table_utilities.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <thrust/iterator/transform_iterator.h>
diff --git a/cpp/tests/datetime/datetime_ops_test.cpp b/cpp/tests/datetime/datetime_ops_test.cpp
index 6bae20efa8c..b58cd0e0cb9 100644
--- a/cpp/tests/datetime/datetime_ops_test.cpp
+++ b/cpp/tests/datetime/datetime_ops_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,6 +24,7 @@
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/iterator_utilities.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/timestamp_utilities.cuh>
 #include <cudf_test/type_lists.hpp>
 
diff --git a/cpp/tests/device_atomics/device_atomics_test.cu b/cpp/tests/device_atomics/device_atomics_test.cu
index 24195362d92..f0c69ea6bfb 100644
--- a/cpp/tests/device_atomics/device_atomics_test.cu
+++ b/cpp/tests/device_atomics/device_atomics_test.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,6 +21,8 @@
 #include <cudf/wrappers/timestamps.hpp>
 
 #include <cudf_test/base_fixture.hpp>
+#include <cudf_test/random.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/timestamp_utilities.cuh>
 #include <cudf_test/type_lists.hpp>
 
diff --git a/cpp/tests/dictionary/add_keys_test.cpp b/cpp/tests/dictionary/add_keys_test.cpp
index adbcf41a66c..32a6885df09 100644
--- a/cpp/tests/dictionary/add_keys_test.cpp
+++ b/cpp/tests/dictionary/add_keys_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,6 +20,7 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/testing_main.hpp>
 
 #include <vector>
 
diff --git a/cpp/tests/encode/encode_tests.cpp b/cpp/tests/encode/encode_tests.cpp
index 3638706ba7b..87818e16bb9 100644
--- a/cpp/tests/encode/encode_tests.cpp
+++ b/cpp/tests/encode/encode_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,6 +19,7 @@
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/table_utilities.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
 template <typename T>
diff --git a/cpp/tests/error/error_handling_test.cu b/cpp/tests/error/error_handling_test.cu
index 5b842322681..6bb1afda2a8 100644
--- a/cpp/tests/error/error_handling_test.cu
+++ b/cpp/tests/error/error_handling_test.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/default_stream.hpp>
 #include <cudf_test/stream_checking_resource_adaptor.hpp>
+#include <cudf_test/testing_main.hpp>
 
 #include <cudf/filling.hpp>
 #include <cudf/utilities/default_stream.hpp>
diff --git a/cpp/tests/filling/fill_tests.cpp b/cpp/tests/filling/fill_tests.cpp
index 564f5547009..95a27defa4e 100644
--- a/cpp/tests/filling/fill_tests.cpp
+++ b/cpp/tests/filling/fill_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/cudf_gtest.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <cudf/detail/iterator.cuh>
diff --git a/cpp/tests/filling/repeat_tests.cpp b/cpp/tests/filling/repeat_tests.cpp
index 4f74523ec7c..6326765c68b 100644
--- a/cpp/tests/filling/repeat_tests.cpp
+++ b/cpp/tests/filling/repeat_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/cudf_gtest.hpp>
+#include <cudf_test/random.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <cudf/detail/iterator.cuh>
diff --git a/cpp/tests/fixed_point/fixed_point_tests.cpp b/cpp/tests/fixed_point/fixed_point_tests.cpp
index 227a75d1bd5..1c1680fcd6e 100644
--- a/cpp/tests/fixed_point/fixed_point_tests.cpp
+++ b/cpp/tests/fixed_point/fixed_point_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <cudf/binaryop.hpp>
diff --git a/cpp/tests/groupby/argmax_tests.cpp b/cpp/tests/groupby/argmax_tests.cpp
index e0c5f37d1b8..f9d034ad0c7 100644
--- a/cpp/tests/groupby/argmax_tests.cpp
+++ b/cpp/tests/groupby/argmax_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,6 +19,7 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/iterator_utilities.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <cudf/detail/aggregation/aggregation.hpp>
diff --git a/cpp/tests/hash_map/map_test.cu b/cpp/tests/hash_map/map_test.cu
index 8d71c512c79..0232696a123 100644
--- a/cpp/tests/hash_map/map_test.cu
+++ b/cpp/tests/hash_map/map_test.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,6 +16,7 @@
 
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/cudf_gtest.hpp>
+#include <cudf_test/testing_main.hpp>
 
 #include <hash/concurrent_unordered_map.cuh>
 
diff --git a/cpp/tests/hashing/murmurhash3_x86_32_test.cpp b/cpp/tests/hashing/murmurhash3_x86_32_test.cpp
index a4b16550398..c3cc20c28b7 100644
--- a/cpp/tests/hashing/murmurhash3_x86_32_test.cpp
+++ b/cpp/tests/hashing/murmurhash3_x86_32_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,6 +21,7 @@
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/iterator_utilities.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
 constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::ALL_ERRORS};
diff --git a/cpp/tests/interop/to_arrow_test.cpp b/cpp/tests/interop/to_arrow_test.cpp
index d6762e70d80..82c4ad7d2f1 100644
--- a/cpp/tests/interop/to_arrow_test.cpp
+++ b/cpp/tests/interop/to_arrow_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -31,6 +31,7 @@
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/table_utilities.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <tests/interop/arrow_utils.hpp>
diff --git a/cpp/tests/io/arrow_io_source_test.cpp b/cpp/tests/io/arrow_io_source_test.cpp
index 979f8e4fb05..ffdf2c7e00f 100644
--- a/cpp/tests/io/arrow_io_source_test.cpp
+++ b/cpp/tests/io/arrow_io_source_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,6 +19,7 @@
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/cudf_gtest.hpp>
 #include <cudf_test/table_utilities.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <cudf/io/arrow_io_source.hpp>
diff --git a/cpp/tests/io/comp/decomp_test.cpp b/cpp/tests/io/comp/decomp_test.cpp
index 7bff730afe6..cf5a4f1fda5 100644
--- a/cpp/tests/io/comp/decomp_test.cpp
+++ b/cpp/tests/io/comp/decomp_test.cpp
@@ -21,6 +21,7 @@
 #include <cudf/utilities/default_stream.hpp>
 
 #include <cudf_test/base_fixture.hpp>
+#include <cudf_test/testing_main.hpp>
 
 #include <rmm/device_buffer.hpp>
 #include <rmm/device_uvector.hpp>
diff --git a/cpp/tests/io/csv_test.cpp b/cpp/tests/io/csv_test.cpp
index 8922658ac97..c6e9114605b 100644
--- a/cpp/tests/io/csv_test.cpp
+++ b/cpp/tests/io/csv_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,7 +18,9 @@
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/cudf_gtest.hpp>
+#include <cudf_test/random.hpp>
 #include <cudf_test/table_utilities.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <cudf/detail/iterator.cuh>
diff --git a/cpp/tests/io/file_io_test.cpp b/cpp/tests/io/file_io_test.cpp
index c9a17185bc7..3c41f21b0a4 100644
--- a/cpp/tests/io/file_io_test.cpp
+++ b/cpp/tests/io/file_io_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,6 +16,7 @@
 
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/cudf_gtest.hpp>
+#include <cudf_test/testing_main.hpp>
 
 #include <src/io/utilities/file_io_utilities.hpp>
 
diff --git a/cpp/tests/io/fst/fst_test.cu b/cpp/tests/io/fst/fst_test.cu
index 74ccde6c364..4064204c56d 100644
--- a/cpp/tests/io/fst/fst_test.cu
+++ b/cpp/tests/io/fst/fst_test.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,6 +20,7 @@
 
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/cudf_gtest.hpp>
+#include <cudf_test/testing_main.hpp>
 
 #include <cudf/scalar/scalar_factories.hpp>
 #include <cudf/strings/repeat_strings.hpp>
diff --git a/cpp/tests/io/fst/logical_stack_test.cu b/cpp/tests/io/fst/logical_stack_test.cu
index 20b8674a717..f434736d7f5 100644
--- a/cpp/tests/io/fst/logical_stack_test.cu
+++ b/cpp/tests/io/fst/logical_stack_test.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,6 +15,7 @@
  */
 
 #include <cudf_test/base_fixture.hpp>
+#include <cudf_test/testing_main.hpp>
 
 #include <cudf/types.hpp>
 #include <io/utilities/hostdevice_vector.hpp>
diff --git a/cpp/tests/io/fst/quote_normalization_test.cu b/cpp/tests/io/fst/quote_normalization_test.cu
index e2636ab029f..d0794b8f17e 100644
--- a/cpp/tests/io/fst/quote_normalization_test.cu
+++ b/cpp/tests/io/fst/quote_normalization_test.cu
@@ -19,6 +19,7 @@
 
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/cudf_gtest.hpp>
+#include <cudf_test/testing_main.hpp>
 
 #include <cudf/scalar/scalar_factories.hpp>
 #include <cudf/strings/repeat_strings.hpp>
diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json_test.cpp
index 09c9179de82..7fce31461ef 100644
--- a/cpp/tests/io/json_test.cpp
+++ b/cpp/tests/io/json_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,7 +20,9 @@
 #include <cudf_test/cudf_gtest.hpp>
 #include <cudf_test/default_stream.hpp>
 #include <cudf_test/iterator_utilities.hpp>
+#include <cudf_test/random.hpp>
 #include <cudf_test/table_utilities.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <cudf/detail/iterator.cuh>
diff --git a/cpp/tests/io/json_tree.cpp b/cpp/tests/io/json_tree.cpp
index 56e2404b683..f5d03293d30 100644
--- a/cpp/tests/io/json_tree.cpp
+++ b/cpp/tests/io/json_tree.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -26,10 +26,12 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/cudf_gtest.hpp>
+#include <cudf_test/random.hpp>
 
 #include <rmm/cuda_stream.hpp>
 #include <rmm/cuda_stream_view.hpp>
 
+#include <stack>
 #include <string>
 #include <unordered_map>
 
diff --git a/cpp/tests/io/json_type_cast_test.cu b/cpp/tests/io/json_type_cast_test.cu
index 9eb5e8f5230..036b9170250 100644
--- a/cpp/tests/io/json_type_cast_test.cu
+++ b/cpp/tests/io/json_type_cast_test.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,6 +19,7 @@
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/cudf_gtest.hpp>
 #include <cudf_test/iterator_utilities.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <io/utilities/string_parsing.hpp>
diff --git a/cpp/tests/io/json_writer.cpp b/cpp/tests/io/json_writer.cpp
index a85a696565b..946b939f456 100644
--- a/cpp/tests/io/json_writer.cpp
+++ b/cpp/tests/io/json_writer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/default_stream.hpp>
 #include <cudf_test/iterator_utilities.hpp>
+#include <cudf_test/testing_main.hpp>
 
 #include <cudf/detail/iterator.cuh>
 #include <cudf/io/json.hpp>
diff --git a/cpp/tests/io/nested_json_test.cpp b/cpp/tests/io/nested_json_test.cpp
index 93ad05a29fe..070ac5ce870 100644
--- a/cpp/tests/io/nested_json_test.cpp
+++ b/cpp/tests/io/nested_json_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -30,7 +30,9 @@
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/cudf_gtest.hpp>
 #include <cudf_test/io_metadata_utilities.hpp>
+#include <cudf_test/random.hpp>
 #include <cudf_test/table_utilities.hpp>
+#include <cudf_test/testing_main.hpp>
 
 #include <rmm/exec_policy.hpp>
 
diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp
index 5124ac579fd..2ae6edc6c7d 100644
--- a/cpp/tests/io/orc_test.cpp
+++ b/cpp/tests/io/orc_test.cpp
@@ -20,7 +20,9 @@
 #include <cudf_test/cudf_gtest.hpp>
 #include <cudf_test/io_metadata_utilities.hpp>
 #include <cudf_test/iterator_utilities.hpp>
+#include <cudf_test/random.hpp>
 #include <cudf_test/table_utilities.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <cudf/concatenate.hpp>
diff --git a/cpp/tests/io/parquet_chunked_writer_test.cpp b/cpp/tests/io/parquet_chunked_writer_test.cpp
new file mode 100644
index 00000000000..a0c9641097b
--- /dev/null
+++ b/cpp/tests/io/parquet_chunked_writer_test.cpp
@@ -0,0 +1,855 @@
+/*
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "parquet_common.hpp"
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/io_metadata_utilities.hpp>
+#include <cudf_test/table_utilities.hpp>
+
+#include <cudf/io/parquet.hpp>
+
+// Base test fixture for chunked writer tests
+struct ParquetChunkedWriterTest : public cudf::test::BaseFixture {};
+
+// Typed test fixture for numeric type tests
+template <typename T>
+struct ParquetChunkedWriterNumericTypeTest : public ParquetChunkedWriterTest {
+  auto type() { return cudf::data_type{cudf::type_to_id<T>()}; }
+};
+
+TEST_F(ParquetChunkedWriterTest, SingleTable)
+{
+  srand(31337);
+  auto table1 = create_random_fixed_table<int>(5, 5, true);
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedSingle.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer(args).write(*table1);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *table1);
+}
+
+TEST_F(ParquetChunkedWriterTest, SimpleTable)
+{
+  srand(31337);
+  auto table1 = create_random_fixed_table<int>(5, 5, true);
+  auto table2 = create_random_fixed_table<int>(5, 5, true);
+
+  auto full_table = cudf::concatenate(std::vector<table_view>({*table1, *table2}));
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedSimple.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer(args).write(*table1).write(*table2);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
+}
+
+TEST_F(ParquetChunkedWriterTest, LargeTables)
+{
+  srand(31337);
+  auto table1 = create_random_fixed_table<int>(512, 4096, true);
+  auto table2 = create_random_fixed_table<int>(512, 8192, true);
+
+  auto full_table = cudf::concatenate(std::vector<table_view>({*table1, *table2}));
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedLarge.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  auto md = cudf::io::parquet_chunked_writer(args).write(*table1).write(*table2).close();
+  ASSERT_EQ(md, nullptr);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
+}
+
+TEST_F(ParquetChunkedWriterTest, ManyTables)
+{
+  srand(31337);
+  std::vector<std::unique_ptr<table>> tables;
+  std::vector<table_view> table_views;
+  constexpr int num_tables = 96;
+  for (int idx = 0; idx < num_tables; idx++) {
+    auto tbl = create_random_fixed_table<int>(16, 64, true);
+    table_views.push_back(*tbl);
+    tables.push_back(std::move(tbl));
+  }
+
+  auto expected = cudf::concatenate(table_views);
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedManyTables.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer writer(args);
+  std::for_each(table_views.begin(), table_views.end(), [&writer](table_view const& tbl) {
+    writer.write(tbl);
+  });
+  auto md = writer.close({"dummy/path"});
+  ASSERT_NE(md, nullptr);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
+}
+
+TEST_F(ParquetChunkedWriterTest, Strings)
+{
+  std::vector<std::unique_ptr<cudf::column>> cols;
+
+  bool mask1[] = {true, true, false, true, true, true, true};
+  std::vector<char const*> h_strings1{"four", "score", "and", "seven", "years", "ago", "abcdefgh"};
+  cudf::test::strings_column_wrapper strings1(h_strings1.begin(), h_strings1.end(), mask1);
+  cols.push_back(strings1.release());
+  cudf::table tbl1(std::move(cols));
+
+  bool mask2[] = {false, true, true, true, true, true, true};
+  std::vector<char const*> h_strings2{"ooooo", "ppppppp", "fff", "j", "cccc", "bbb", "zzzzzzzzzzz"};
+  cudf::test::strings_column_wrapper strings2(h_strings2.begin(), h_strings2.end(), mask2);
+  cols.push_back(strings2.release());
+  cudf::table tbl2(std::move(cols));
+
+  auto expected = cudf::concatenate(std::vector<table_view>({tbl1, tbl2}));
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedStrings.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer(args).write(tbl1).write(tbl2);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
+}
+
+TEST_F(ParquetChunkedWriterTest, ListColumn)
+{
+  auto valids  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; });
+  auto valids2 = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; });
+
+  using lcw = cudf::test::lists_column_wrapper<int32_t>;
+
+  // COL0 (Same nullability) ====================
+  // [NULL, 2, NULL]
+  // []
+  // [4, 5]
+  // NULL
+  lcw col0_tbl0{{{{1, 2, 3}, valids}, {}, {4, 5}, {}}, valids2};
+
+  // [7, 8, 9]
+  // []
+  // [NULL, 11]
+  // NULL
+  lcw col0_tbl1{{{7, 8, 9}, {}, {{10, 11}, valids}, {}}, valids2};
+
+  // COL1 (Nullability different in different chunks, test of merging nullability in writer)
+  // [NULL, 2, NULL]
+  // []
+  // [4, 5]
+  // []
+  lcw col1_tbl0{{{1, 2, 3}, valids}, {}, {4, 5}, {}};
+
+  // [7, 8, 9]
+  // []
+  // [10, 11]
+  // NULL
+  lcw col1_tbl1{{{7, 8, 9}, {}, {10, 11}, {}}, valids2};
+
+  // COL2 (non-nested columns to test proper schema construction)
+  size_t num_rows_tbl0 = static_cast<cudf::column_view>(col0_tbl0).size();
+  size_t num_rows_tbl1 = static_cast<cudf::column_view>(col0_tbl1).size();
+  auto seq_col0        = random_values<int>(num_rows_tbl0);
+  auto seq_col1        = random_values<int>(num_rows_tbl1);
+
+  column_wrapper<int> col2_tbl0{seq_col0.begin(), seq_col0.end(), valids};
+  column_wrapper<int> col2_tbl1{seq_col1.begin(), seq_col1.end(), valids2};
+
+  auto tbl0 = table_view({col0_tbl0, col1_tbl0, col2_tbl0});
+  auto tbl1 = table_view({col0_tbl1, col1_tbl1, col2_tbl1});
+
+  auto expected = cudf::concatenate(std::vector<table_view>({tbl0, tbl1}));
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedLists.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer(args).write(tbl0).write(tbl1);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
+}
+
+TEST_F(ParquetChunkedWriterTest, ListOfStruct)
+{
+  // Table 1
+  auto weight_1   = cudf::test::fixed_width_column_wrapper<float>{{57.5, 51.1, 15.3}};
+  auto ages_1     = cudf::test::fixed_width_column_wrapper<int32_t>{{30, 27, 5}};
+  auto struct_1_1 = cudf::test::structs_column_wrapper{weight_1, ages_1};
+  auto is_human_1 = cudf::test::fixed_width_column_wrapper<bool>{{true, true, false}};
+  auto struct_2_1 = cudf::test::structs_column_wrapper{{is_human_1, struct_1_1}};
+
+  auto list_offsets_column_1 =
+    cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 2, 3, 3}.release();
+  auto num_list_rows_1 = list_offsets_column_1->size() - 1;
+
+  auto list_col_1 = cudf::make_lists_column(
+    num_list_rows_1, std::move(list_offsets_column_1), struct_2_1.release(), 0, {});
+
+  auto table_1 = table_view({*list_col_1});
+
+  // Table 2
+  auto weight_2   = cudf::test::fixed_width_column_wrapper<float>{{1.1, -1.0, -1.0}};
+  auto ages_2     = cudf::test::fixed_width_column_wrapper<int32_t>{{31, 351, 351}, {1, 1, 0}};
+  auto struct_1_2 = cudf::test::structs_column_wrapper{{weight_2, ages_2}, {1, 0, 1}};
+  auto is_human_2 = cudf::test::fixed_width_column_wrapper<bool>{{false, false, false}, {1, 1, 0}};
+  auto struct_2_2 = cudf::test::structs_column_wrapper{{is_human_2, struct_1_2}};
+
+  auto list_offsets_column_2 =
+    cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 1, 2, 3}.release();
+  auto num_list_rows_2 = list_offsets_column_2->size() - 1;
+
+  auto list_col_2 = cudf::make_lists_column(
+    num_list_rows_2, std::move(list_offsets_column_2), struct_2_2.release(), 0, {});
+
+  auto table_2 = table_view({*list_col_2});
+
+  auto full_table = cudf::concatenate(std::vector<table_view>({table_1, table_2}));
+
+  cudf::io::table_input_metadata expected_metadata(table_1);
+  expected_metadata.column_metadata[0].set_name("family");
+  expected_metadata.column_metadata[0].child(1).set_nullability(false);
+  expected_metadata.column_metadata[0].child(1).child(0).set_name("human?");
+  expected_metadata.column_metadata[0].child(1).child(1).set_name("particulars");
+  expected_metadata.column_metadata[0].child(1).child(1).child(0).set_name("weight");
+  expected_metadata.column_metadata[0].child(1).child(1).child(1).set_name("age");
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedListOfStruct.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  args.set_metadata(expected_metadata);
+  cudf::io::parquet_chunked_writer(args).write(table_1).write(table_2);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *full_table);
+  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
+}
+
+TEST_F(ParquetChunkedWriterTest, ListOfStructOfStructOfListOfList)
+{
+  auto valids  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; });
+  auto valids2 = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; });
+
+  using lcw = cudf::test::lists_column_wrapper<int32_t>;
+
+  // Table 1 ===========================
+
+  // []
+  // [NULL, 2, NULL]
+  // [4, 5]
+  // NULL
+  lcw land_1{{{}, {{1, 2, 3}, valids}, {4, 5}, {}}, valids2};
+
+  // []
+  // [[1, 2, 3], [], [4, 5], [], [0, 6, 0]]
+  // [[7, 8], []]
+  // [[]]
+  lcw flats_1{lcw{}, {{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, {{7, 8}, {}}, lcw{lcw{}}};
+
+  auto weight_1   = cudf::test::fixed_width_column_wrapper<float>{{57.5, 51.1, 15.3, 1.1}};
+  auto ages_1     = cudf::test::fixed_width_column_wrapper<int32_t>{{30, 27, 5, 31}};
+  auto struct_1_1 = cudf::test::structs_column_wrapper{weight_1, ages_1, land_1, flats_1};
+  auto is_human_1 = cudf::test::fixed_width_column_wrapper<bool>{{true, true, false, false}};
+  auto struct_2_1 = cudf::test::structs_column_wrapper{{is_human_1, struct_1_1}};
+
+  auto list_offsets_column_1 =
+    cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 2, 3, 4}.release();
+  auto num_list_rows_1 = list_offsets_column_1->size() - 1;
+
+  auto list_col_1 = cudf::make_lists_column(
+    num_list_rows_1, std::move(list_offsets_column_1), struct_2_1.release(), 0, {});
+
+  auto table_1 = table_view({*list_col_1});
+
+  // Table 2 ===========================
+
+  // []
+  // [7, 8, 9]
+  lcw land_2{{}, {7, 8, 9}};
+
+  // [[]]
+  // [[], [], []]
+  lcw flats_2{lcw{lcw{}}, lcw{lcw{}, lcw{}, lcw{}}};
+
+  auto weight_2   = cudf::test::fixed_width_column_wrapper<float>{{-1.0, -1.0}};
+  auto ages_2     = cudf::test::fixed_width_column_wrapper<int32_t>{{351, 351}, {1, 0}};
+  auto struct_1_2 = cudf::test::structs_column_wrapper{{weight_2, ages_2, land_2, flats_2}, {0, 1}};
+  auto is_human_2 = cudf::test::fixed_width_column_wrapper<bool>{{false, false}, {1, 0}};
+  auto struct_2_2 = cudf::test::structs_column_wrapper{{is_human_2, struct_1_2}};
+
+  auto list_offsets_column_2 =
+    cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 1, 2}.release();
+  auto num_list_rows_2 = list_offsets_column_2->size() - 1;
+
+  auto list_col_2 = cudf::make_lists_column(
+    num_list_rows_2, std::move(list_offsets_column_2), struct_2_2.release(), 0, {});
+
+  auto table_2 = table_view({*list_col_2});
+
+  auto full_table = cudf::concatenate(std::vector<table_view>({table_1, table_2}));
+
+  cudf::io::table_input_metadata expected_metadata(table_1);
+  expected_metadata.column_metadata[0].set_name("family");
+  expected_metadata.column_metadata[0].child(1).set_nullability(false);
+  expected_metadata.column_metadata[0].child(1).child(0).set_name("human?");
+  expected_metadata.column_metadata[0].child(1).child(1).set_name("particulars");
+  expected_metadata.column_metadata[0].child(1).child(1).child(0).set_name("weight");
+  expected_metadata.column_metadata[0].child(1).child(1).child(1).set_name("age");
+  expected_metadata.column_metadata[0].child(1).child(1).child(2).set_name("land_unit");
+  expected_metadata.column_metadata[0].child(1).child(1).child(3).set_name("flats");
+
+  auto filepath = temp_env->get_temp_filepath("ListOfStructOfStructOfListOfList.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  args.set_metadata(expected_metadata);
+  cudf::io::parquet_chunked_writer(args).write(table_1).write(table_2);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *full_table);
+  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
+
+  // We specifically mentioned in input schema that struct_2 is non-nullable across chunked calls.
+  auto result_parent_list = result.tbl->get_column(0);
+  auto result_struct_2    = result_parent_list.child(cudf::lists_column_view::child_column_index);
+  EXPECT_EQ(result_struct_2.nullable(), false);
+}
+
+TEST_F(ParquetChunkedWriterTest, MismatchedTypes)
+{
+  srand(31337);
+  auto table1 = create_random_fixed_table<int>(4, 4, true);
+  auto table2 = create_random_fixed_table<float>(4, 4, true);
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedMismatchedTypes.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer writer(args);
+  writer.write(*table1);
+  EXPECT_THROW(writer.write(*table2), cudf::logic_error);
+  writer.close();
+}
+
+TEST_F(ParquetChunkedWriterTest, ChunkedWriteAfterClosing)
+{
+  srand(31337);
+  auto table = create_random_fixed_table<int>(4, 4, true);
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedWriteAfterClosing.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer writer(args);
+  writer.write(*table).close();
+  EXPECT_THROW(writer.write(*table), cudf::logic_error);
+}
+
+TEST_F(ParquetChunkedWriterTest, ReadingUnclosedFile)
+{
+  srand(31337);
+  auto table = create_random_fixed_table<int>(4, 4, true);
+
+  auto filepath = temp_env->get_temp_filepath("ReadingUnclosedFile.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer writer(args);
+  writer.write(*table);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error);
+}
+
+TEST_F(ParquetChunkedWriterTest, MismatchedStructure)
+{
+  srand(31337);
+  auto table1 = create_random_fixed_table<int>(4, 4, true);
+  auto table2 = create_random_fixed_table<float>(3, 4, true);
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedMismatchedStructure.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer writer(args);
+  writer.write(*table1);
+  EXPECT_THROW(writer.write(*table2), cudf::logic_error);
+  writer.close();
+}
+
+TEST_F(ParquetChunkedWriterTest, MismatchedStructureList)
+{
+  auto valids  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; });
+  auto valids2 = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; });
+
+  using lcw = cudf::test::lists_column_wrapper<int32_t>;
+
+  // COL0 (mismatched depth) ====================
+  // [NULL, 2, NULL]
+  // []
+  // [4, 5]
+  // NULL
+  lcw col00{{{{1, 2, 3}, valids}, {}, {4, 5}, {}}, valids2};
+
+  // [[1, 2, 3], [], [4, 5], [], [0, 6, 0]]
+  // [[7, 8]]
+  // []
+  // [[]]
+  lcw col01{{{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, {{7, 8}}, lcw{}, lcw{lcw{}}};
+
+  // COL2 (non-nested columns to test proper schema construction)
+  size_t num_rows = static_cast<cudf::column_view>(col00).size();
+  auto seq_col0   = random_values<int>(num_rows);
+  auto seq_col1   = random_values<int>(num_rows);
+
+  column_wrapper<int> col10{seq_col0.begin(), seq_col0.end(), valids};
+  column_wrapper<int> col11{seq_col1.begin(), seq_col1.end(), valids2};
+
+  auto tbl0 = table_view({col00, col10});
+  auto tbl1 = table_view({col01, col11});
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedLists.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer writer(args);
+  writer.write(tbl0);
+  EXPECT_THROW(writer.write(tbl1), cudf::logic_error);
+}
+
+TEST_F(ParquetChunkedWriterTest, DifferentNullability)
+{
+  srand(31337);
+  auto table1 = create_random_fixed_table<int>(5, 5, true);
+  auto table2 = create_random_fixed_table<int>(5, 5, false);
+
+  auto full_table = cudf::concatenate(std::vector<table_view>({*table1, *table2}));
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedNullable.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer(args).write(*table1).write(*table2);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
+}
+
+TEST_F(ParquetChunkedWriterTest, DifferentNullabilityStruct)
+{
+  // Struct<is_human:bool (non-nullable),
+  //        Struct<weight:float>,
+  //               age:int
+  //              > (nullable)
+  //       > (non-nullable)
+
+  // Table 1: is_human and struct_1 are non-nullable but should be nullable when read back.
+  auto weight_1   = cudf::test::fixed_width_column_wrapper<float>{{57.5, 51.1, 15.3}};
+  auto ages_1     = cudf::test::fixed_width_column_wrapper<int32_t>{{30, 27, 5}};
+  auto struct_1_1 = cudf::test::structs_column_wrapper{weight_1, ages_1};
+  auto is_human_1 = cudf::test::fixed_width_column_wrapper<bool>{{true, true, false}};
+  auto struct_2_1 = cudf::test::structs_column_wrapper{{is_human_1, struct_1_1}};
+  auto table_1    = cudf::table_view({struct_2_1});
+
+  // Table 2: struct_1 and is_human are nullable now so if we hadn't assumed worst case (nullable)
+  // when writing table_1, we would have wrong pages for it.
+  auto weight_2   = cudf::test::fixed_width_column_wrapper<float>{{1.1, -1.0, -1.0}};
+  auto ages_2     = cudf::test::fixed_width_column_wrapper<int32_t>{{31, 351, 351}, {1, 1, 0}};
+  auto struct_1_2 = cudf::test::structs_column_wrapper{{weight_2, ages_2}, {1, 0, 1}};
+  auto is_human_2 = cudf::test::fixed_width_column_wrapper<bool>{{false, false, false}, {1, 1, 0}};
+  auto struct_2_2 = cudf::test::structs_column_wrapper{{is_human_2, struct_1_2}};
+  auto table_2    = cudf::table_view({struct_2_2});
+
+  auto full_table = cudf::concatenate(std::vector<table_view>({table_1, table_2}));
+
+  cudf::io::table_input_metadata expected_metadata(table_1);
+  expected_metadata.column_metadata[0].set_name("being");
+  expected_metadata.column_metadata[0].child(0).set_name("human?");
+  expected_metadata.column_metadata[0].child(1).set_name("particulars");
+  expected_metadata.column_metadata[0].child(1).child(0).set_name("weight");
+  expected_metadata.column_metadata[0].child(1).child(1).set_name("age");
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedNullableStruct.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  args.set_metadata(expected_metadata);
+  cudf::io::parquet_chunked_writer(args).write(table_1).write(table_2);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *full_table);
+  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
+}
+
+TEST_F(ParquetChunkedWriterTest, ForcedNullability)
+{
+  srand(31337);
+  auto table1 = create_random_fixed_table<int>(5, 5, false);
+  auto table2 = create_random_fixed_table<int>(5, 5, false);
+
+  auto full_table = cudf::concatenate(std::vector<table_view>({*table1, *table2}));
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedNoNullable.parquet");
+
+  cudf::io::table_input_metadata metadata(*table1);
+
+  // In the absence of prescribed per-column nullability in metadata, the writer assumes the worst
+  // and considers all columns nullable. However cudf::concatenate will not force nulls in case no
+  // columns are nullable. To get the expected result, we tell the writer the nullability of all
+  // columns in advance.
+  for (auto& col_meta : metadata.column_metadata) {
+    col_meta.set_nullability(false);
+  }
+
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath})
+      .metadata(std::move(metadata));
+  cudf::io::parquet_chunked_writer(args).write(*table1).write(*table2);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
+}
+
+TEST_F(ParquetChunkedWriterTest, ForcedNullabilityList)
+{
+  srand(31337);
+
+  auto valids  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; });
+  auto valids2 = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; });
+
+  using lcw = cudf::test::lists_column_wrapper<int32_t>;
+
+  // COL0 ====================
+  // [1, 2, 3]
+  // []
+  // [4, 5]
+  // NULL
+  lcw col00{{{1, 2, 3}, {}, {4, 5}, {}}, valids2};
+
+  // [7]
+  // []
+  // [8, 9, 10, 11]
+  // NULL
+  lcw col01{{{7}, {}, {8, 9, 10, 11}, {}}, valids2};
+
+  // COL1 (non-nested columns to test proper schema construction)
+  size_t num_rows = static_cast<cudf::column_view>(col00).size();
+  auto seq_col0   = random_values<int>(num_rows);
+  auto seq_col1   = random_values<int>(num_rows);
+
+  column_wrapper<int> col10{seq_col0.begin(), seq_col0.end(), valids};
+  column_wrapper<int> col11{seq_col1.begin(), seq_col1.end(), valids2};
+
+  auto table1 = table_view({col00, col10});
+  auto table2 = table_view({col01, col11});
+
+  auto full_table = cudf::concatenate(std::vector<table_view>({table1, table2}));
+
+  cudf::io::table_input_metadata metadata(table1);
+  metadata.column_metadata[0].set_nullability(true);  // List is nullable at first (root) level
+  metadata.column_metadata[0].child(1).set_nullability(
+    false);  // non-nullable at second (leaf) level
+  metadata.column_metadata[1].set_nullability(true);
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedListNullable.parquet");
+
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath})
+      .metadata(std::move(metadata));
+  cudf::io::parquet_chunked_writer(args).write(table1).write(table2);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
+}
+
+TEST_F(ParquetChunkedWriterTest, ForcedNullabilityStruct)
+{
+  // Struct<is_human:bool (non-nullable),
+  //        Struct<weight:float>,
+  //               age:int
+  //              > (nullable)
+  //       > (non-nullable)
+
+  // Table 1: is_human and struct_2 are non-nullable and should stay that way when read back.
+  auto weight_1   = cudf::test::fixed_width_column_wrapper<float>{{57.5, 51.1, 15.3}};
+  auto ages_1     = cudf::test::fixed_width_column_wrapper<int32_t>{{30, 27, 5}};
+  auto struct_1_1 = cudf::test::structs_column_wrapper{weight_1, ages_1};
+  auto is_human_1 = cudf::test::fixed_width_column_wrapper<bool>{{true, true, false}};
+  auto struct_2_1 = cudf::test::structs_column_wrapper{{is_human_1, struct_1_1}};
+  auto table_1    = cudf::table_view({struct_2_1});
+
+  auto weight_2   = cudf::test::fixed_width_column_wrapper<float>{{1.1, -1.0, -1.0}};
+  auto ages_2     = cudf::test::fixed_width_column_wrapper<int32_t>{{31, 351, 351}, {1, 1, 0}};
+  auto struct_1_2 = cudf::test::structs_column_wrapper{{weight_2, ages_2}, {1, 0, 1}};
+  auto is_human_2 = cudf::test::fixed_width_column_wrapper<bool>{{false, false, false}};
+  auto struct_2_2 = cudf::test::structs_column_wrapper{{is_human_2, struct_1_2}};
+  auto table_2    = cudf::table_view({struct_2_2});
+
+  auto full_table = cudf::concatenate(std::vector<table_view>({table_1, table_2}));
+
+  cudf::io::table_input_metadata expected_metadata(table_1);
+  expected_metadata.column_metadata[0].set_name("being").set_nullability(false);
+  expected_metadata.column_metadata[0].child(0).set_name("human?").set_nullability(false);
+  expected_metadata.column_metadata[0].child(1).set_name("particulars");
+  expected_metadata.column_metadata[0].child(1).child(0).set_name("weight");
+  expected_metadata.column_metadata[0].child(1).child(1).set_name("age");
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedNullableStruct.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  args.set_metadata(expected_metadata);
+  cudf::io::parquet_chunked_writer(args).write(table_1).write(table_2);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
+  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
+}
+
+TEST_F(ParquetChunkedWriterTest, ReadRowGroups)
+{
+  srand(31337);
+  auto table1 = create_random_fixed_table<int>(5, 5, true);
+  auto table2 = create_random_fixed_table<int>(5, 5, true);
+
+  auto full_table = cudf::concatenate(std::vector<table_view>({*table2, *table1, *table2}));
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedRowGroups.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  {
+    cudf::io::parquet_chunked_writer(args).write(*table1).write(*table2);
+  }
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+      .row_groups({{1, 0, 1}});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
+}
+
+TEST_F(ParquetChunkedWriterTest, ReadRowGroupsError)
+{
+  srand(31337);
+  auto table1 = create_random_fixed_table<int>(5, 5, true);
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedRowGroupsError.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer(args).write(*table1);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}).row_groups({{0, 1}});
+  EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error);
+  read_opts.set_row_groups({{-1}});
+  EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error);
+  read_opts.set_row_groups({{0}, {0}});
+  EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error);
+}
+
+TEST_F(ParquetChunkedWriterTest, RowGroupPageSizeMatch)
+{
+  std::vector<char> out_buffer;
+
+  auto options = cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer))
+                   .row_group_size_bytes(128 * 1024)
+                   .max_page_size_bytes(512 * 1024)
+                   .row_group_size_rows(10000)
+                   .max_page_size_rows(20000)
+                   .build();
+  EXPECT_EQ(options.get_row_group_size_bytes(), options.get_max_page_size_bytes());
+  EXPECT_EQ(options.get_row_group_size_rows(), options.get_max_page_size_rows());
+}
+
+TEST_F(ParquetChunkedWriterTest, CompStats)
+{
+  auto table = create_random_fixed_table<int>(1, 100000, true);
+
+  auto const stats = std::make_shared<cudf::io::writer_compression_statistics>();
+
+  std::vector<char> unused_buffer;
+  cudf::io::chunked_parquet_writer_options opts =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{&unused_buffer})
+      .compression_statistics(stats);
+  cudf::io::parquet_chunked_writer(opts).write(*table);
+
+  EXPECT_NE(stats->num_compressed_bytes(), 0);
+  EXPECT_EQ(stats->num_failed_bytes(), 0);
+  EXPECT_EQ(stats->num_skipped_bytes(), 0);
+  EXPECT_FALSE(std::isnan(stats->compression_ratio()));
+
+  auto const single_table_comp_stats = *stats;
+  cudf::io::parquet_chunked_writer(opts).write(*table);
+
+  EXPECT_EQ(stats->compression_ratio(), single_table_comp_stats.compression_ratio());
+  EXPECT_EQ(stats->num_compressed_bytes(), 2 * single_table_comp_stats.num_compressed_bytes());
+
+  EXPECT_EQ(stats->num_failed_bytes(), 0);
+  EXPECT_EQ(stats->num_skipped_bytes(), 0);
+}
+
+TEST_F(ParquetChunkedWriterTest, CompStatsEmptyTable)
+{
+  auto table_no_rows = create_random_fixed_table<int>(20, 0, false);
+
+  auto const stats = std::make_shared<cudf::io::writer_compression_statistics>();
+
+  std::vector<char> unused_buffer;
+  cudf::io::chunked_parquet_writer_options opts =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{&unused_buffer})
+      .compression_statistics(stats);
+  cudf::io::parquet_chunked_writer(opts).write(*table_no_rows);
+
+  expect_compression_stats_empty(stats);
+}
+
+TYPED_TEST_SUITE(ParquetChunkedWriterNumericTypeTest, SupportedTypes);
+
+TYPED_TEST(ParquetChunkedWriterNumericTypeTest, UnalignedSize)
+{
+  // write out two 31 row tables and make sure they get
+  // read back with all their validity bits in the right place
+
+  using T = TypeParam;
+
+  int num_els = 31;
+  std::vector<std::unique_ptr<cudf::column>> cols;
+
+  bool mask[] = {false, true, true, true, true, true, true, true, true, true, true,
+                 true,  true, true, true, true, true, true, true, true, true, true,
+
+                 true,  true, true, true, true, true, true, true, true};
+  T c1a[num_els];
+  std::fill(c1a, c1a + num_els, static_cast<T>(5));
+  T c1b[num_els];
+  std::fill(c1b, c1b + num_els, static_cast<T>(6));
+  column_wrapper<T> c1a_w(c1a, c1a + num_els, mask);
+  column_wrapper<T> c1b_w(c1b, c1b + num_els, mask);
+  cols.push_back(c1a_w.release());
+  cols.push_back(c1b_w.release());
+  cudf::table tbl1(std::move(cols));
+
+  T c2a[num_els];
+  std::fill(c2a, c2a + num_els, static_cast<T>(8));
+  T c2b[num_els];
+  std::fill(c2b, c2b + num_els, static_cast<T>(9));
+  column_wrapper<T> c2a_w(c2a, c2a + num_els, mask);
+  column_wrapper<T> c2b_w(c2b, c2b + num_els, mask);
+  cols.push_back(c2a_w.release());
+  cols.push_back(c2b_w.release());
+  cudf::table tbl2(std::move(cols));
+
+  auto expected = cudf::concatenate(std::vector<table_view>({tbl1, tbl2}));
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedUnalignedSize.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer(args).write(tbl1).write(tbl2);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
+}
+
+TYPED_TEST(ParquetChunkedWriterNumericTypeTest, UnalignedSize2)
+{
+  // write out two 33 row tables and make sure they get
+  // read back with all their validity bits in the right place
+
+  using T = TypeParam;
+
+  int num_els = 33;
+  std::vector<std::unique_ptr<cudf::column>> cols;
+
+  bool mask[] = {false, true, true, true, true, true, true, true, true, true, true,
+                 true,  true, true, true, true, true, true, true, true, true, true,
+                 true,  true, true, true, true, true, true, true, true, true, true};
+
+  T c1a[num_els];
+  std::fill(c1a, c1a + num_els, static_cast<T>(5));
+  T c1b[num_els];
+  std::fill(c1b, c1b + num_els, static_cast<T>(6));
+  column_wrapper<T> c1a_w(c1a, c1a + num_els, mask);
+  column_wrapper<T> c1b_w(c1b, c1b + num_els, mask);
+  cols.push_back(c1a_w.release());
+  cols.push_back(c1b_w.release());
+  cudf::table tbl1(std::move(cols));
+
+  T c2a[num_els];
+  std::fill(c2a, c2a + num_els, static_cast<T>(8));
+  T c2b[num_els];
+  std::fill(c2b, c2b + num_els, static_cast<T>(9));
+  column_wrapper<T> c2a_w(c2a, c2a + num_els, mask);
+  column_wrapper<T> c2b_w(c2b, c2b + num_els, mask);
+  cols.push_back(c2a_w.release());
+  cols.push_back(c2b_w.release());
+  cudf::table tbl2(std::move(cols));
+
+  auto expected = cudf::concatenate(std::vector<table_view>({tbl1, tbl2}));
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedUnalignedSize2.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer(args).write(tbl1).write(tbl2);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
+}
diff --git a/cpp/tests/io/parquet_common.cpp b/cpp/tests/io/parquet_common.cpp
new file mode 100644
index 00000000000..b64cd230bc6
--- /dev/null
+++ b/cpp/tests/io/parquet_common.cpp
@@ -0,0 +1,798 @@
+/*
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "parquet_common.hpp"
+
+#include <cudf/io/parquet.hpp>
+
+// Global environment for temporary files
+cudf::test::TempDirTestEnvironment* const temp_env =
+  static_cast<cudf::test::TempDirTestEnvironment*>(
+    ::testing::AddGlobalTestEnvironment(new cudf::test::TempDirTestEnvironment));
+
+template <typename T, typename Elements>
+std::unique_ptr<cudf::table> create_fixed_table(cudf::size_type num_columns,
+                                                cudf::size_type num_rows,
+                                                bool include_validity,
+                                                Elements elements)
+{
+  auto valids =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; });
+  std::vector<cudf::test::fixed_width_column_wrapper<T>> src_cols(num_columns);
+  for (int idx = 0; idx < num_columns; idx++) {
+    if (include_validity) {
+      src_cols[idx] =
+        cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_rows, valids);
+    } else {
+      src_cols[idx] = cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_rows);
+    }
+  }
+  std::vector<std::unique_ptr<cudf::column>> columns(num_columns);
+  std::transform(src_cols.begin(),
+                 src_cols.end(),
+                 columns.begin(),
+                 [](cudf::test::fixed_width_column_wrapper<T>& in) {
+                   auto ret = in.release();
+                   // pre-cache the null count
+                   [[maybe_unused]] auto const nulls = ret->has_nulls();
+                   return ret;
+                 });
+  return std::make_unique<cudf::table>(std::move(columns));
+}
+
+template <typename T>
+std::unique_ptr<cudf::table> create_random_fixed_table(cudf::size_type num_columns,
+                                                       cudf::size_type num_rows,
+                                                       bool include_validity)
+{
+  auto rand_elements =
+    cudf::detail::make_counting_transform_iterator(0, [](T i) { return rand(); });
+  return create_fixed_table<T>(num_columns, num_rows, include_validity, rand_elements);
+}
+
+template <typename T>
+std::unique_ptr<cudf::table> create_compressible_fixed_table(cudf::size_type num_columns,
+                                                             cudf::size_type num_rows,
+                                                             cudf::size_type period,
+                                                             bool include_validity)
+{
+  auto compressible_elements =
+    cudf::detail::make_counting_transform_iterator(0, [period](T i) { return i / period; });
+  return create_fixed_table<T>(num_columns, num_rows, include_validity, compressible_elements);
+}
+
+template std::unique_ptr<cudf::table> create_random_fixed_table<int>(cudf::size_type num_columns,
+                                                                     cudf::size_type num_rows,
+                                                                     bool include_validity);
+template std::unique_ptr<cudf::table> create_random_fixed_table<float>(cudf::size_type num_columns,
+                                                                       cudf::size_type num_rows,
+                                                                       bool include_validity);
+
+template std::unique_ptr<cudf::table> create_compressible_fixed_table<int>(
+  cudf::size_type num_columns,
+  cudf::size_type num_rows,
+  cudf::size_type period,
+  bool include_validity);
+
+template std::unique_ptr<cudf::table> create_compressible_fixed_table<float>(
+  cudf::size_type num_columns,
+  cudf::size_type num_rows,
+  cudf::size_type period,
+  bool include_validity);
+
+// this function replicates the "list_gen" function in
+// python/cudf/cudf/tests/test_parquet.py
+template <typename T>
+std::unique_ptr<cudf::column> make_parquet_list_list_col(
+  int skip_rows, int num_rows, int lists_per_row, int list_size, bool include_validity)
+{
+  auto valids =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0 ? 1 : 0; });
+
+  // root list
+  std::vector<int> row_offsets(num_rows + 1);
+  int row_offset_count = 0;
+  {
+    int offset = 0;
+    for (int idx = 0; idx < (num_rows) + 1; idx++) {
+      row_offsets[row_offset_count] = offset;
+      if (!include_validity || valids[idx]) { offset += lists_per_row; }
+      row_offset_count++;
+    }
+  }
+  cudf::test::fixed_width_column_wrapper<int> offsets(row_offsets.begin(),
+                                                      row_offsets.begin() + row_offset_count);
+
+  // child list
+  std::vector<int> child_row_offsets((num_rows * lists_per_row) + 1);
+  int child_row_offset_count = 0;
+  {
+    int offset = 0;
+    for (int idx = 0; idx < (num_rows * lists_per_row); idx++) {
+      int row_index = idx / lists_per_row;
+      if (include_validity && !valids[row_index]) { continue; }
+
+      child_row_offsets[child_row_offset_count] = offset;
+      offset += list_size;
+      child_row_offset_count++;
+    }
+    child_row_offsets[child_row_offset_count++] = offset;
+  }
+  cudf::test::fixed_width_column_wrapper<int> child_offsets(
+    child_row_offsets.begin(), child_row_offsets.begin() + child_row_offset_count);
+
+  // child values
+  std::vector<T> child_values(num_rows * lists_per_row * list_size);
+  T first_child_value_index = skip_rows * lists_per_row * list_size;
+  int child_value_count     = 0;
+  {
+    for (int idx = 0; idx < (num_rows * lists_per_row * list_size); idx++) {
+      int row_index = idx / (lists_per_row * list_size);
+
+      int val = first_child_value_index;
+      first_child_value_index++;
+
+      if (include_validity && !valids[row_index]) { continue; }
+
+      child_values[child_value_count] = val;
+      child_value_count++;
+    }
+  }
+  // validity by value instead of index
+  auto valids2 = cudf::detail::make_counting_transform_iterator(
+    0, [list_size](auto i) { return (i % list_size) % 2 == 0 ? 1 : 0; });
+  auto child_data = include_validity
+                      ? cudf::test::fixed_width_column_wrapper<T>(
+                          child_values.begin(), child_values.begin() + child_value_count, valids2)
+                      : cudf::test::fixed_width_column_wrapper<T>(
+                          child_values.begin(), child_values.begin() + child_value_count);
+
+  int child_offsets_size = static_cast<cudf::column_view>(child_offsets).size() - 1;
+  auto child             = cudf::make_lists_column(
+    child_offsets_size, child_offsets.release(), child_data.release(), 0, rmm::device_buffer{});
+
+  int offsets_size             = static_cast<cudf::column_view>(offsets).size() - 1;
+  auto [null_mask, null_count] = cudf::test::detail::make_null_mask(valids, valids + offsets_size);
+  return include_validity
+           ? cudf::make_lists_column(
+               offsets_size, offsets.release(), std::move(child), null_count, std::move(null_mask))
+           : cudf::make_lists_column(
+               offsets_size, offsets.release(), std::move(child), 0, rmm::device_buffer{});
+}
+
+template std::unique_ptr<cudf::column> make_parquet_list_list_col<int>(
+  int skip_rows, int num_rows, int lists_per_row, int list_size, bool include_validity);
+
+template <typename T>
+std::vector<T> random_values(size_t size)
+{
+  std::vector<T> values(size);
+
+  using T1 = T;
+  using uniform_distribution =
+    typename std::conditional_t<std::is_same_v<T1, bool>,
+                                std::bernoulli_distribution,
+                                std::conditional_t<std::is_floating_point_v<T1>,
+                                                   std::uniform_real_distribution<T1>,
+                                                   std::uniform_int_distribution<T1>>>;
+
+  static constexpr auto seed = 0xf00d;
+  static std::mt19937 engine{seed};
+  static uniform_distribution dist{};
+  std::generate_n(values.begin(), size, [&]() { return T{dist(engine)}; });
+
+  return values;
+}
+
+template std::vector<float> random_values<float>(size_t size);
+template std::vector<double> random_values<double>(size_t size);
+template std::vector<int8_t> random_values<int8_t>(size_t size);
+template std::vector<int16_t> random_values<int16_t>(size_t size);
+template std::vector<int32_t> random_values<int32_t>(size_t size);
+template std::vector<int64_t> random_values<int64_t>(size_t size);
+template std::vector<uint8_t> random_values<uint8_t>(size_t size);
+template std::vector<uint16_t> random_values<uint16_t>(size_t size);
+template std::vector<uint32_t> random_values<uint32_t>(size_t size);
+template std::vector<uint64_t> random_values<uint64_t>(size_t size);
+
+// given a datasource pointing to a parquet file, read the footer
+// of the file to populate the FileMetaData pointed to by file_meta_data.
+// throws cudf::logic_error if the file or metadata is invalid.
+void read_footer(std::unique_ptr<cudf::io::datasource> const& source,
+                 cudf::io::parquet::detail::FileMetaData* file_meta_data)
+{
+  constexpr auto header_len = sizeof(cudf::io::parquet::detail::file_header_s);
+  constexpr auto ender_len  = sizeof(cudf::io::parquet::detail::file_ender_s);
+
+  auto const len           = source->size();
+  auto const header_buffer = source->host_read(0, header_len);
+  auto const header =
+    reinterpret_cast<cudf::io::parquet::detail::file_header_s const*>(header_buffer->data());
+  auto const ender_buffer = source->host_read(len - ender_len, ender_len);
+  auto const ender =
+    reinterpret_cast<cudf::io::parquet::detail::file_ender_s const*>(ender_buffer->data());
+
+  // checks for valid header, footer, and file length
+  ASSERT_GT(len, header_len + ender_len);
+  ASSERT_TRUE(header->magic == cudf::io::parquet::detail::parquet_magic &&
+              ender->magic == cudf::io::parquet::detail::parquet_magic);
+  ASSERT_TRUE(ender->footer_len != 0 && ender->footer_len <= (len - header_len - ender_len));
+
+  // parquet files end with 4-byte footer_length and 4-byte magic == "PAR1"
+  // seek backwards from the end of the file (footer_length + 8 bytes of ender)
+  auto const footer_buffer =
+    source->host_read(len - ender->footer_len - ender_len, ender->footer_len);
+  cudf::io::parquet::detail::CompactProtocolReader cp(footer_buffer->data(), ender->footer_len);
+
+  cp.read(file_meta_data);
+}
+
+// returns the number of bits used for dictionary encoding data at the given page location.
+// this assumes the data is uncompressed.
+// throws cudf::logic_error if the page_loc data is invalid.
+int read_dict_bits(std::unique_ptr<cudf::io::datasource> const& source,
+                   cudf::io::parquet::detail::PageLocation const& page_loc)
+{
+  CUDF_EXPECTS(page_loc.offset > 0, "Cannot find page header");
+  CUDF_EXPECTS(page_loc.compressed_page_size > 0, "Invalid page header length");
+
+  cudf::io::parquet::detail::PageHeader page_hdr;
+  auto const page_buf = source->host_read(page_loc.offset, page_loc.compressed_page_size);
+  cudf::io::parquet::detail::CompactProtocolReader cp(page_buf->data(), page_buf->size());
+  cp.read(&page_hdr);
+
+  // cp should be pointing at the start of page data now. the first byte
+  // should be the encoding bit size
+  return cp.getb();
+}
+
+// read column index from datasource at location indicated by chunk,
+// parse and return as a ColumnIndex struct.
+// throws cudf::logic_error if the chunk data is invalid.
+cudf::io::parquet::detail::ColumnIndex read_column_index(
+  std::unique_ptr<cudf::io::datasource> const& source,
+  cudf::io::parquet::detail::ColumnChunk const& chunk)
+{
+  CUDF_EXPECTS(chunk.column_index_offset > 0, "Cannot find column index");
+  CUDF_EXPECTS(chunk.column_index_length > 0, "Invalid column index length");
+
+  cudf::io::parquet::detail::ColumnIndex colidx;
+  auto const ci_buf = source->host_read(chunk.column_index_offset, chunk.column_index_length);
+  cudf::io::parquet::detail::CompactProtocolReader cp(ci_buf->data(), ci_buf->size());
+  cp.read(&colidx);
+  return colidx;
+}
+
+// read offset index from datasource at location indicated by chunk,
+// parse and return as an OffsetIndex struct.
+// throws cudf::logic_error if the chunk data is invalid.
+cudf::io::parquet::detail::OffsetIndex read_offset_index(
+  std::unique_ptr<cudf::io::datasource> const& source,
+  cudf::io::parquet::detail::ColumnChunk const& chunk)
+{
+  CUDF_EXPECTS(chunk.offset_index_offset > 0, "Cannot find offset index");
+  CUDF_EXPECTS(chunk.offset_index_length > 0, "Invalid offset index length");
+
+  cudf::io::parquet::detail::OffsetIndex offidx;
+  auto const oi_buf = source->host_read(chunk.offset_index_offset, chunk.offset_index_length);
+  cudf::io::parquet::detail::CompactProtocolReader cp(oi_buf->data(), oi_buf->size());
+  cp.read(&offidx);
+  return offidx;
+}
+
+// Return as a Statistics from the column chunk
+cudf::io::parquet::detail::Statistics const& get_statistics(
+  cudf::io::parquet::detail::ColumnChunk const& chunk)
+{
+  return chunk.meta_data.statistics;
+}
+
+// read page header from datasource at location indicated by page_loc,
+// parse and return as a PageHeader struct.
+// throws cudf::logic_error if the page_loc data is invalid.
+cudf::io::parquet::detail::PageHeader read_page_header(
+  std::unique_ptr<cudf::io::datasource> const& source,
+  cudf::io::parquet::detail::PageLocation const& page_loc)
+{
+  CUDF_EXPECTS(page_loc.offset > 0, "Cannot find page header");
+  CUDF_EXPECTS(page_loc.compressed_page_size > 0, "Invalid page header length");
+
+  cudf::io::parquet::detail::PageHeader page_hdr;
+  auto const page_buf = source->host_read(page_loc.offset, page_loc.compressed_page_size);
+  cudf::io::parquet::detail::CompactProtocolReader cp(page_buf->data(), page_buf->size());
+  cp.read(&page_hdr);
+  return page_hdr;
+}
+
+// =============================================================================
+// ---- test data for stats sort order tests
+
+namespace testdata {
+// ----- most numerics. scale by 100 so all values fit in a single byte
+
+template <typename T>
+std::enable_if_t<std::is_arithmetic_v<T> && !std::is_same_v<T, bool>,
+                 cudf::test::fixed_width_column_wrapper<T>>
+ascending()
+{
+  int start = std::is_signed_v<T> ? -num_ordered_rows / 2 : 0;
+  auto elements =
+    cudf::detail::make_counting_transform_iterator(start, [](auto i) { return i / 100; });
+  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
+}
+
+template <typename T>
+std::enable_if_t<std::is_arithmetic_v<T> && !std::is_same_v<T, bool>,
+                 cudf::test::fixed_width_column_wrapper<T>>
+descending()
+{
+  if (std::is_signed_v<T>) {
+    auto elements = cudf::detail::make_counting_transform_iterator(-num_ordered_rows / 2,
+                                                                   [](auto i) { return -i / 100; });
+    return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
+  } else {
+    auto elements = cudf::detail::make_counting_transform_iterator(
+      0, [](auto i) { return (num_ordered_rows - i) / 100; });
+    return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
+  }
+}
+
+template <typename T>
+std::enable_if_t<std::is_arithmetic_v<T> && !std::is_same_v<T, bool>,
+                 cudf::test::fixed_width_column_wrapper<T>>
+unordered()
+{
+  if (std::is_signed_v<T>) {
+    auto elements = cudf::detail::make_counting_transform_iterator(
+      -num_ordered_rows / 2, [](auto i) { return (i % 2 ? i : -i) / 100; });
+    return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
+  } else {
+    auto elements = cudf::detail::make_counting_transform_iterator(
+      0, [](auto i) { return (i % 2 ? i : num_ordered_rows - i) / 100; });
+    return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
+  }
+}
+
+// ----- bool
+
+template <typename T>
+std::enable_if_t<std::is_same_v<T, bool>, cudf::test::fixed_width_column_wrapper<bool>> ascending()
+{
+  auto elements = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return i >= num_ordered_rows / 2; });
+  return cudf::test::fixed_width_column_wrapper<bool>(elements, elements + num_ordered_rows);
+}
+
+template <typename T>
+std::enable_if_t<std::is_same_v<T, bool>, cudf::test::fixed_width_column_wrapper<bool>> descending()
+{
+  auto elements = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return i < num_ordered_rows / 2; });
+  return cudf::test::fixed_width_column_wrapper<bool>(elements, elements + num_ordered_rows);
+}
+
+template <typename T>
+std::enable_if_t<std::is_same_v<T, bool>, cudf::test::fixed_width_column_wrapper<bool>> unordered()
+{
+  auto elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
+    switch (i / page_size_for_ordered_tests) {
+      case 0: return true;
+      case 1: return false;
+      case 2: return true;
+      default: return false;
+    }
+  });
+  return cudf::test::fixed_width_column_wrapper<bool>(elements, elements + num_ordered_rows);
+}
+
+// ----- fixed point types
+
+template <typename T>
+std::enable_if_t<cudf::is_fixed_point<T>(), cudf::test::fixed_width_column_wrapper<T>> ascending()
+{
+  auto elements = cudf::detail::make_counting_transform_iterator(
+    -num_ordered_rows / 2, [](auto i) { return T(i, numeric::scale_type{0}); });
+  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
+}
+
+template <typename T>
+std::enable_if_t<cudf::is_fixed_point<T>(), cudf::test::fixed_width_column_wrapper<T>> descending()
+{
+  auto elements = cudf::detail::make_counting_transform_iterator(
+    -num_ordered_rows / 2, [](auto i) { return T(-i, numeric::scale_type{0}); });
+  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
+}
+
+template <typename T>
+std::enable_if_t<cudf::is_fixed_point<T>(), cudf::test::fixed_width_column_wrapper<T>> unordered()
+{
+  auto elements = cudf::detail::make_counting_transform_iterator(
+    -num_ordered_rows / 2, [](auto i) { return T(i % 2 ? i : -i, numeric::scale_type{0}); });
+  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
+}
+
+// ----- chrono types
+// ----- timstamp
+
+template <typename T>
+std::enable_if_t<cudf::is_timestamp<T>(), cudf::test::fixed_width_column_wrapper<T>> ascending()
+{
+  auto elements = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return T(typename T::duration(i)); });
+  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
+}
+
+template <typename T>
+std::enable_if_t<cudf::is_timestamp<T>(), cudf::test::fixed_width_column_wrapper<T>> descending()
+{
+  auto elements = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return T(typename T::duration(num_ordered_rows - i)); });
+  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
+}
+
+template <typename T>
+std::enable_if_t<cudf::is_timestamp<T>(), cudf::test::fixed_width_column_wrapper<T>> unordered()
+{
+  auto elements = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return T(typename T::duration(i % 2 ? i : num_ordered_rows - i)); });
+  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
+}
+
+// ----- duration
+
+template <typename T>
+std::enable_if_t<cudf::is_duration<T>(), cudf::test::fixed_width_column_wrapper<T>> ascending()
+{
+  auto elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return T(i); });
+  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
+}
+
+template <typename T>
+std::enable_if_t<cudf::is_duration<T>(), cudf::test::fixed_width_column_wrapper<T>> descending()
+{
+  auto elements = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return T(num_ordered_rows - i); });
+  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
+}
+
+template <typename T>
+std::enable_if_t<cudf::is_duration<T>(), cudf::test::fixed_width_column_wrapper<T>> unordered()
+{
+  auto elements = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return T(i % 2 ? i : num_ordered_rows - i); });
+  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
+}
+
+// ----- string_view
+
+template <typename T>
+std::enable_if_t<std::is_same_v<T, cudf::string_view>, cudf::test::strings_column_wrapper>
+ascending()
+{
+  char buf[10];
+  auto elements = cudf::detail::make_counting_transform_iterator(0, [&buf](auto i) {
+    sprintf(buf, "%09d", i);
+    return std::string(buf);
+  });
+  return cudf::test::strings_column_wrapper(elements, elements + num_ordered_rows);
+}
+
+template <typename T>
+std::enable_if_t<std::is_same_v<T, cudf::string_view>, cudf::test::strings_column_wrapper>
+descending()
+{
+  char buf[10];
+  auto elements = cudf::detail::make_counting_transform_iterator(0, [&buf](auto i) {
+    sprintf(buf, "%09d", num_ordered_rows - i);
+    return std::string(buf);
+  });
+  return cudf::test::strings_column_wrapper(elements, elements + num_ordered_rows);
+}
+
+template <typename T>
+std::enable_if_t<std::is_same_v<T, cudf::string_view>, cudf::test::strings_column_wrapper>
+unordered()
+{
+  char buf[10];
+  auto elements = cudf::detail::make_counting_transform_iterator(0, [&buf](auto i) {
+    sprintf(buf, "%09d", (i % 2 == 0) ? i : (num_ordered_rows - i));
+    return std::string(buf);
+  });
+  return cudf::test::strings_column_wrapper(elements, elements + num_ordered_rows);
+}
+
+#define FIXED_WIDTH_ORDERED_DATA(type)                                      \
+  template cudf::test::fixed_width_column_wrapper<type> ascending<type>();  \
+  template cudf::test::fixed_width_column_wrapper<type> descending<type>(); \
+  template cudf::test::fixed_width_column_wrapper<type> unordered<type>()
+
+FIXED_WIDTH_ORDERED_DATA(bool);
+FIXED_WIDTH_ORDERED_DATA(int8_t);
+FIXED_WIDTH_ORDERED_DATA(int16_t);
+FIXED_WIDTH_ORDERED_DATA(int32_t);
+FIXED_WIDTH_ORDERED_DATA(int64_t);
+FIXED_WIDTH_ORDERED_DATA(uint8_t);
+FIXED_WIDTH_ORDERED_DATA(uint16_t);
+FIXED_WIDTH_ORDERED_DATA(uint32_t);
+FIXED_WIDTH_ORDERED_DATA(uint64_t);
+FIXED_WIDTH_ORDERED_DATA(float);
+FIXED_WIDTH_ORDERED_DATA(double);
+FIXED_WIDTH_ORDERED_DATA(cudf::duration_D);
+FIXED_WIDTH_ORDERED_DATA(cudf::duration_s);
+FIXED_WIDTH_ORDERED_DATA(cudf::duration_ms);
+FIXED_WIDTH_ORDERED_DATA(cudf::duration_us);
+FIXED_WIDTH_ORDERED_DATA(cudf::duration_ns);
+FIXED_WIDTH_ORDERED_DATA(cudf::timestamp_D);
+FIXED_WIDTH_ORDERED_DATA(cudf::timestamp_s);
+FIXED_WIDTH_ORDERED_DATA(cudf::timestamp_ms);
+FIXED_WIDTH_ORDERED_DATA(cudf::timestamp_us);
+FIXED_WIDTH_ORDERED_DATA(cudf::timestamp_ns);
+FIXED_WIDTH_ORDERED_DATA(numeric::decimal32);
+FIXED_WIDTH_ORDERED_DATA(numeric::decimal64);
+FIXED_WIDTH_ORDERED_DATA(numeric::decimal128);
+
+template cudf::test::strings_column_wrapper ascending<cudf::string_view>();
+template cudf::test::strings_column_wrapper descending<cudf::string_view>();
+template cudf::test::strings_column_wrapper unordered<cudf::string_view>();
+
+}  // namespace testdata
+
+template <typename T>
+std::unique_ptr<cudf::column> make_parquet_list_col(std::mt19937& engine,
+                                                    int num_rows,
+                                                    int max_vals_per_row,
+                                                    bool include_validity)
+{
+  std::vector<cudf::size_type> row_sizes(num_rows);
+
+  auto const min_values_per_row = include_validity ? 0 : 1;
+  std::uniform_int_distribution<cudf::size_type> dist{min_values_per_row, max_vals_per_row};
+  std::generate_n(row_sizes.begin(), num_rows, [&]() { return cudf::size_type{dist(engine)}; });
+
+  std::vector<cudf::size_type> offsets(num_rows + 1);
+  std::exclusive_scan(row_sizes.begin(), row_sizes.end(), offsets.begin(), 0);
+  offsets[num_rows] = offsets[num_rows - 1] + row_sizes.back();
+
+  std::vector<T> values = random_values<T>(offsets[num_rows]);
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> offsets_col(offsets.begin(),
+                                                                      offsets.end());
+
+  if (include_validity) {
+    auto valids = random_validity(engine);
+    auto values_col =
+      cudf::test::fixed_width_column_wrapper<T>(values.begin(), values.end(), valids);
+    auto [null_mask, null_count] = cudf::test::detail::make_null_mask(valids, valids + num_rows);
+
+    auto col = cudf::make_lists_column(
+      num_rows, offsets_col.release(), values_col.release(), null_count, std::move(null_mask));
+    return cudf::purge_nonempty_nulls(*col);
+  } else {
+    auto values_col = cudf::test::fixed_width_column_wrapper<T>(values.begin(), values.end());
+    return cudf::make_lists_column(num_rows,
+                                   offsets_col.release(),
+                                   values_col.release(),
+                                   0,
+                                   cudf::create_null_mask(num_rows, cudf::mask_state::ALL_VALID));
+  }
+}
+
+template std::unique_ptr<cudf::column> make_parquet_list_col<int8_t>(std::mt19937& engine,
+                                                                     int num_rows,
+                                                                     int max_vals_per_row,
+                                                                     bool include_validity);
+template std::unique_ptr<cudf::column> make_parquet_list_col<int16_t>(std::mt19937& engine,
+                                                                      int num_rows,
+                                                                      int max_vals_per_row,
+                                                                      bool include_validity);
+template std::unique_ptr<cudf::column> make_parquet_list_col<int32_t>(std::mt19937& engine,
+                                                                      int num_rows,
+                                                                      int max_vals_per_row,
+                                                                      bool include_validity);
+template std::unique_ptr<cudf::column> make_parquet_list_col<int64_t>(std::mt19937& engine,
+                                                                      int num_rows,
+                                                                      int max_vals_per_row,
+                                                                      bool include_validity);
+
+std::vector<std::string> string_values(std::mt19937& engine, int num_rows, int max_string_len)
+{
+  static std::uniform_int_distribution<char> char_dist{'a', 'z'};
+  static std::uniform_int_distribution<cudf::size_type> strlen_dist{1, max_string_len};
+
+  std::vector<std::string> values(num_rows);
+  std::generate_n(values.begin(), values.size(), [&]() {
+    int str_len     = strlen_dist(engine);
+    std::string res = "";
+    for (int i = 0; i < str_len; i++) {
+      res += char_dist(engine);
+    }
+    return res;
+  });
+
+  return values;
+}
+
+// make a random list<string> column, with random string lengths of 0..max_string_len,
+// and up to max_vals_per_row strings in each list.
+std::unique_ptr<cudf::column> make_parquet_string_list_col(std::mt19937& engine,
+                                                           int num_rows,
+                                                           int max_vals_per_row,
+                                                           int max_string_len,
+                                                           bool include_validity)
+{
+  auto const range_min = include_validity ? 0 : 1;
+
+  std::uniform_int_distribution<cudf::size_type> dist{range_min, max_vals_per_row};
+
+  std::vector<cudf::size_type> row_sizes(num_rows);
+  std::generate_n(row_sizes.begin(), num_rows, [&]() { return cudf::size_type{dist(engine)}; });
+
+  std::vector<cudf::size_type> offsets(num_rows + 1);
+  std::exclusive_scan(row_sizes.begin(), row_sizes.end(), offsets.begin(), 0);
+  offsets[num_rows] = offsets[num_rows - 1] + row_sizes.back();
+
+  std::uniform_int_distribution<cudf::size_type> strlen_dist{range_min, max_string_len};
+  auto const values = string_values(engine, offsets[num_rows], max_string_len);
+
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> offsets_col(offsets.begin(),
+                                                                      offsets.end());
+
+  if (include_validity) {
+    auto valids     = random_validity(engine);
+    auto values_col = cudf::test::strings_column_wrapper(values.begin(), values.end(), valids);
+    auto [null_mask, null_count] = cudf::test::detail::make_null_mask(valids, valids + num_rows);
+
+    auto col = cudf::make_lists_column(
+      num_rows, offsets_col.release(), values_col.release(), null_count, std::move(null_mask));
+    return cudf::purge_nonempty_nulls(*col);
+  } else {
+    auto values_col = cudf::test::strings_column_wrapper(values.begin(), values.end());
+    return cudf::make_lists_column(num_rows,
+                                   offsets_col.release(),
+                                   values_col.release(),
+                                   0,
+                                   cudf::create_null_mask(num_rows, cudf::mask_state::ALL_VALID));
+  }
+}
+
+template <typename T>
+std::pair<cudf::table, std::string> create_parquet_typed_with_stats(std::string const& filename)
+{
+  auto col0 = testdata::ascending<T>();
+  auto col1 = testdata::descending<T>();
+  auto col2 = testdata::unordered<T>();
+
+  auto const written_table = table_view{{col0, col1, col2}};
+  auto const filepath      = temp_env->get_temp_filepath("FilterTyped.parquet");
+  {
+    cudf::io::table_input_metadata expected_metadata(written_table);
+    expected_metadata.column_metadata[0].set_name("col0");
+    expected_metadata.column_metadata[1].set_name("col1");
+    expected_metadata.column_metadata[2].set_name("col2");
+
+    const cudf::io::parquet_writer_options out_opts =
+      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, written_table)
+        .metadata(std::move(expected_metadata))
+        .row_group_size_rows(8000);
+    cudf::io::write_parquet(out_opts);
+  }
+
+  std::vector<std::unique_ptr<column>> columns;
+  columns.push_back(col0.release());
+  columns.push_back(col1.release());
+  columns.push_back(col2.release());
+
+  return std::pair{cudf::table{std::move(columns)}, filepath};
+}
+
+#define TYPED_WITH_STATS(type)                                                        \
+  template std::pair<cudf::table, std::string> create_parquet_typed_with_stats<type>( \
+    std::string const& filename)
+
+TYPED_WITH_STATS(cudf::string_view);
+TYPED_WITH_STATS(bool);
+TYPED_WITH_STATS(int8_t);
+TYPED_WITH_STATS(int16_t);
+TYPED_WITH_STATS(int32_t);
+TYPED_WITH_STATS(int64_t);
+TYPED_WITH_STATS(uint8_t);
+TYPED_WITH_STATS(uint16_t);
+TYPED_WITH_STATS(uint32_t);
+TYPED_WITH_STATS(uint64_t);
+TYPED_WITH_STATS(float);
+TYPED_WITH_STATS(double);
+// TYPED_WITH_STATS(cudf::duration_D);
+// TYPED_WITH_STATS(cudf::duration_s);
+TYPED_WITH_STATS(cudf::duration_ms);
+TYPED_WITH_STATS(cudf::duration_us);
+TYPED_WITH_STATS(cudf::duration_ns);
+TYPED_WITH_STATS(cudf::timestamp_D);
+// TYPED_WITH_STATS(cudf::timestamp_s);
+TYPED_WITH_STATS(cudf::timestamp_ms);
+TYPED_WITH_STATS(cudf::timestamp_us);
+TYPED_WITH_STATS(cudf::timestamp_ns);
+TYPED_WITH_STATS(numeric::decimal32);
+TYPED_WITH_STATS(numeric::decimal64);
+TYPED_WITH_STATS(numeric::decimal128);
+
+// utility functions for column index tests
+
+// compare two values.  return -1 if v1 < v2,
+// 0 if v1 == v2, and 1 if v1 > v2.
+template <typename T>
+int32_t compare(T& v1, T& v2)
+{
+  return (v1 > v2) - (v1 < v2);
+}
+
+// compare two binary statistics blobs based on their physical
+// and converted types. returns -1 if v1 < v2, 0 if v1 == v2, and
+// 1 if v1 > v2.
+int32_t compare_binary(std::vector<uint8_t> const& v1,
+                       std::vector<uint8_t> const& v2,
+                       cudf::io::parquet::detail::Type ptype,
+                       thrust::optional<cudf::io::parquet::detail::ConvertedType> const& ctype)
+{
+  auto ctype_val = ctype.value_or(cudf::io::parquet::detail::UNKNOWN);
+  switch (ptype) {
+    case cudf::io::parquet::detail::INT32:
+      switch (ctype_val) {
+        case cudf::io::parquet::detail::UINT_8:
+        case cudf::io::parquet::detail::UINT_16:
+        case cudf::io::parquet::detail::UINT_32:
+          return compare(*(reinterpret_cast<uint32_t const*>(v1.data())),
+                         *(reinterpret_cast<uint32_t const*>(v2.data())));
+        default:
+          return compare(*(reinterpret_cast<int32_t const*>(v1.data())),
+                         *(reinterpret_cast<int32_t const*>(v2.data())));
+      }
+
+    case cudf::io::parquet::detail::INT64:
+      if (ctype_val == cudf::io::parquet::detail::UINT_64) {
+        return compare(*(reinterpret_cast<uint64_t const*>(v1.data())),
+                       *(reinterpret_cast<uint64_t const*>(v2.data())));
+      }
+      return compare(*(reinterpret_cast<int64_t const*>(v1.data())),
+                     *(reinterpret_cast<int64_t const*>(v2.data())));
+
+    case cudf::io::parquet::detail::FLOAT:
+      return compare(*(reinterpret_cast<float const*>(v1.data())),
+                     *(reinterpret_cast<float const*>(v2.data())));
+
+    case cudf::io::parquet::detail::DOUBLE:
+      return compare(*(reinterpret_cast<double const*>(v1.data())),
+                     *(reinterpret_cast<double const*>(v2.data())));
+
+    case cudf::io::parquet::detail::BYTE_ARRAY: {
+      int32_t v1sz = v1.size();
+      int32_t v2sz = v2.size();
+      int32_t ret  = memcmp(v1.data(), v2.data(), std::min(v1sz, v2sz));
+      if (ret != 0 or v1sz == v2sz) { return ret; }
+      return v1sz - v2sz;
+    }
+
+    default: CUDF_FAIL("Invalid type in compare_binary");
+  }
+
+  return 0;
+}
+
+void expect_compression_stats_empty(std::shared_ptr<cudf::io::writer_compression_statistics> stats)
+{
+  EXPECT_EQ(stats->num_compressed_bytes(), 0);
+  EXPECT_EQ(stats->num_failed_bytes(), 0);
+  EXPECT_EQ(stats->num_skipped_bytes(), 0);
+  EXPECT_TRUE(std::isnan(stats->compression_ratio()));
+}
diff --git a/cpp/tests/io/parquet_common.hpp b/cpp/tests/io/parquet_common.hpp
new file mode 100644
index 00000000000..59ee85444f2
--- /dev/null
+++ b/cpp/tests/io/parquet_common.hpp
@@ -0,0 +1,264 @@
+/*
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/type_lists.hpp>
+
+#include <cudf/column/column.hpp>
+#include <cudf/io/datasource.hpp>
+#include <cudf/io/parquet_metadata.hpp>
+#include <cudf/table/table.hpp>
+#include <cudf/table/table_view.hpp>
+
+#include <src/io/parquet/compact_protocol_reader.hpp>
+#include <src/io/parquet/parquet.hpp>
+#include <src/io/parquet/parquet_gpu.hpp>
+
+#include <random>
+#include <type_traits>
+
+template <typename T, typename SourceElementT = T>
+using column_wrapper =
+  typename std::conditional<std::is_same_v<T, cudf::string_view>,
+                            cudf::test::strings_column_wrapper,
+                            cudf::test::fixed_width_column_wrapper<T, SourceElementT>>::type;
+using column     = cudf::column;
+using table      = cudf::table;
+using table_view = cudf::table_view;
+
+// Global environment for temporary files
+extern cudf::test::TempDirTestEnvironment* const temp_env;
+
+// TODO: Replace with `NumericTypes` when unsigned support is added. Issue #5352
+using SupportedTypes = cudf::test::Types<int8_t, int16_t, int32_t, int64_t, bool, float, double>;
+
+using ComparableAndFixedTypes =
+  cudf::test::Concat<cudf::test::ComparableTypes, cudf::test::FixedPointTypes>;
+
+using SupportedTimestampTypes =
+  cudf::test::Types<cudf::timestamp_ms, cudf::timestamp_us, cudf::timestamp_ns>;
+
+using ByteLikeTypes = cudf::test::Types<int8_t, char, uint8_t, unsigned char, std::byte>;
+
+// These chrono types are not supported because parquet writer does not have a type to represent
+// them.
+using UnsupportedChronoTypes =
+  cudf::test::Types<cudf::timestamp_s, cudf::duration_D, cudf::duration_s>;
+// Also fixed point types unsupported, because AST does not support them yet.
+using SupportedTestTypes = cudf::test::RemoveIf<cudf::test::ContainedIn<UnsupportedChronoTypes>,
+                                                cudf::test::ComparableTypes>;
+
+// removing duration_D, duration_s, and timestamp_s as they don't appear to be supported properly.
+// see definition of UnsupportedChronoTypes above.
+using DeltaDecimalTypes = cudf::test::Types<numeric::decimal32, numeric::decimal64>;
+using DeltaBinaryTypes =
+  cudf::test::Concat<cudf::test::IntegralTypesNotBool, cudf::test::ChronoTypes, DeltaDecimalTypes>;
+using SupportedDeltaTestTypes =
+  cudf::test::RemoveIf<cudf::test::ContainedIn<UnsupportedChronoTypes>, DeltaBinaryTypes>;
+
+//////////////////////////////////////////////////////////////////////
+// Test fixtures
+
+// Base test fixture for tests
+struct ParquetWriterTest : public cudf::test::BaseFixture {};
+
+// Base test fixture for tests
+struct ParquetReaderTest : public cudf::test::BaseFixture {};
+
+////////////////////////////////////////////////////////////////////
+
+// Generates a vector of uniform random values of type T
+template <typename T>
+std::vector<T> random_values(size_t size);
+
+template <typename T>
+std::unique_ptr<cudf::table> create_random_fixed_table(cudf::size_type num_columns,
+                                                       cudf::size_type num_rows,
+                                                       bool include_validity);
+
+template <typename T>
+std::unique_ptr<cudf::table> create_compressible_fixed_table(cudf::size_type num_columns,
+                                                             cudf::size_type num_rows,
+                                                             cudf::size_type period,
+                                                             bool include_validity);
+
+// this function replicates the "list_gen" function in
+// python/cudf/cudf/tests/test_parquet.py
+template <typename T>
+std::unique_ptr<cudf::column> make_parquet_list_list_col(
+  int skip_rows, int num_rows, int lists_per_row, int list_size, bool include_validity);
+
+// given a datasource pointing to a parquet file, read the footer
+// of the file to populate the FileMetaData pointed to by file_meta_data.
+// throws cudf::logic_error if the file or metadata is invalid.
+void read_footer(std::unique_ptr<cudf::io::datasource> const& source,
+                 cudf::io::parquet::detail::FileMetaData* file_meta_data);
+
+// returns the number of bits used for dictionary encoding data at the given page location.
+// this assumes the data is uncompressed.
+// throws cudf::logic_error if the page_loc data is invalid.
+int read_dict_bits(std::unique_ptr<cudf::io::datasource> const& source,
+                   cudf::io::parquet::detail::PageLocation const& page_loc);
+
+// read column index from datasource at location indicated by chunk,
+// parse and return as a ColumnIndex struct.
+// throws cudf::logic_error if the chunk data is invalid.
+cudf::io::parquet::detail::ColumnIndex read_column_index(
+  std::unique_ptr<cudf::io::datasource> const& source,
+  cudf::io::parquet::detail::ColumnChunk const& chunk);
+
+// read offset index from datasource at location indicated by chunk,
+// parse and return as an OffsetIndex struct.
+// throws cudf::logic_error if the chunk data is invalid.
+cudf::io::parquet::detail::OffsetIndex read_offset_index(
+  std::unique_ptr<cudf::io::datasource> const& source,
+  cudf::io::parquet::detail::ColumnChunk const& chunk);
+
+// Return as a Statistics from the column chunk
+cudf::io::parquet::detail::Statistics const& get_statistics(
+  cudf::io::parquet::detail::ColumnChunk const& chunk);
+
+// read page header from datasource at location indicated by page_loc,
+// parse and return as a PageHeader struct.
+// throws cudf::logic_error if the page_loc data is invalid.
+cudf::io::parquet::detail::PageHeader read_page_header(
+  std::unique_ptr<cudf::io::datasource> const& source,
+  cudf::io::parquet::detail::PageLocation const& page_loc);
+
+// make a random validity iterator
+inline auto random_validity(std::mt19937& engine)
+{
+  static std::bernoulli_distribution bn(0.7f);
+  return cudf::detail::make_counting_transform_iterator(0, [&](int index) { return bn(engine); });
+}
+
+// make a random list<T> column
+template <typename T>
+std::unique_ptr<cudf::column> make_parquet_list_col(std::mt19937& engine,
+                                                    int num_rows,
+                                                    int max_vals_per_row,
+                                                    bool include_validity);
+
+// return vector of random strings
+std::vector<std::string> string_values(std::mt19937& engine, int num_rows, int max_string_len);
+
+// make a random list<string> column, with random string lengths of 0..max_string_len,
+// and up to max_vals_per_row strings in each list.
+std::unique_ptr<cudf::column> make_parquet_string_list_col(std::mt19937& engine,
+                                                           int num_rows,
+                                                           int max_vals_per_row,
+                                                           int max_string_len,
+                                                           bool include_validity);
+
+template <typename T>
+std::pair<cudf::table, std::string> create_parquet_typed_with_stats(std::string const& filename);
+
+int32_t compare_binary(std::vector<uint8_t> const& v1,
+                       std::vector<uint8_t> const& v2,
+                       cudf::io::parquet::detail::Type ptype,
+                       thrust::optional<cudf::io::parquet::detail::ConvertedType> const& ctype);
+
+void expect_compression_stats_empty(std::shared_ptr<cudf::io::writer_compression_statistics> stats);
+
+// =============================================================================
+// ---- test data for stats sort order tests
+// need at least 3 pages, and min page count is 5000, so need at least 15000 values.
+// use 20000 to be safe.
+static constexpr int num_ordered_rows            = 20000;
+static constexpr int page_size_for_ordered_tests = 5000;
+
+namespace testdata {
+
+// ----- most numerics
+template <typename T>
+std::enable_if_t<std::is_arithmetic_v<T> && !std::is_same_v<T, bool>,
+                 cudf::test::fixed_width_column_wrapper<T>>
+ascending();
+
+template <typename T>
+std::enable_if_t<std::is_arithmetic_v<T> && !std::is_same_v<T, bool>,
+                 cudf::test::fixed_width_column_wrapper<T>>
+descending();
+
+template <typename T>
+std::enable_if_t<std::is_arithmetic_v<T> && !std::is_same_v<T, bool>,
+                 cudf::test::fixed_width_column_wrapper<T>>
+unordered();
+
+// ----- bool
+
+template <typename T>
+std::enable_if_t<std::is_same_v<T, bool>, cudf::test::fixed_width_column_wrapper<bool>> ascending();
+
+template <typename T>
+std::enable_if_t<std::is_same_v<T, bool>, cudf::test::fixed_width_column_wrapper<bool>>
+descending();
+
+template <typename T>
+std::enable_if_t<std::is_same_v<T, bool>, cudf::test::fixed_width_column_wrapper<bool>> unordered();
+
+// ----- fixed point types
+
+template <typename T>
+std::enable_if_t<cudf::is_fixed_point<T>(), cudf::test::fixed_width_column_wrapper<T>> ascending();
+
+template <typename T>
+std::enable_if_t<cudf::is_fixed_point<T>(), cudf::test::fixed_width_column_wrapper<T>> descending();
+
+template <typename T>
+std::enable_if_t<cudf::is_fixed_point<T>(), cudf::test::fixed_width_column_wrapper<T>> unordered();
+
+// ----- chrono types
+// ----- timstamp
+
+template <typename T>
+std::enable_if_t<cudf::is_timestamp<T>(), cudf::test::fixed_width_column_wrapper<T>> ascending();
+
+template <typename T>
+std::enable_if_t<cudf::is_timestamp<T>(), cudf::test::fixed_width_column_wrapper<T>> descending();
+
+template <typename T>
+std::enable_if_t<cudf::is_timestamp<T>(), cudf::test::fixed_width_column_wrapper<T>> unordered();
+
+// ----- duration
+
+template <typename T>
+std::enable_if_t<cudf::is_duration<T>(), cudf::test::fixed_width_column_wrapper<T>> ascending();
+
+template <typename T>
+std::enable_if_t<cudf::is_duration<T>(), cudf::test::fixed_width_column_wrapper<T>> descending();
+
+template <typename T>
+std::enable_if_t<cudf::is_duration<T>(), cudf::test::fixed_width_column_wrapper<T>> unordered();
+
+// ----- string_view
+
+template <typename T>
+std::enable_if_t<std::is_same_v<T, cudf::string_view>, cudf::test::strings_column_wrapper>
+ascending();
+
+template <typename T>
+std::enable_if_t<std::is_same_v<T, cudf::string_view>, cudf::test::strings_column_wrapper>
+descending();
+
+template <typename T>
+std::enable_if_t<std::is_same_v<T, cudf::string_view>, cudf::test::strings_column_wrapper>
+unordered();
+
+}  // namespace testdata
diff --git a/cpp/tests/io/parquet_misc_test.cpp b/cpp/tests/io/parquet_misc_test.cpp
new file mode 100644
index 00000000000..49b6b8fd259
--- /dev/null
+++ b/cpp/tests/io/parquet_misc_test.cpp
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "parquet_common.hpp"
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/table_utilities.hpp>
+
+#include <cudf/io/parquet.hpp>
+#include <cudf/stream_compaction.hpp>
+#include <cudf/transform.hpp>
+
+////////////////////////////////
+// delta encoding writer tests
+
+// Test fixture for delta encoding tests
+template <typename T>
+struct ParquetWriterDeltaTest : public ParquetWriterTest {};
+
+TYPED_TEST_SUITE(ParquetWriterDeltaTest, SupportedDeltaTestTypes);
+
+TYPED_TEST(ParquetWriterDeltaTest, SupportedDeltaTestTypes)
+{
+  using T   = TypeParam;
+  auto col0 = testdata::ascending<T>();
+  auto col1 = testdata::unordered<T>();
+
+  auto const expected = table_view{{col0, col1}};
+
+  auto const filepath = temp_env->get_temp_filepath("DeltaBinaryPacked.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .write_v2_headers(true)
+      .dictionary_policy(cudf::io::dictionary_policy::NEVER);
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(in_opts);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+}
+
+TYPED_TEST(ParquetWriterDeltaTest, SupportedDeltaTestTypesSliced)
+{
+  using T                = TypeParam;
+  constexpr int num_rows = 4'000;
+  auto col0              = testdata::ascending<T>();
+  auto col1              = testdata::unordered<T>();
+
+  auto const expected = table_view{{col0, col1}};
+  auto expected_slice = cudf::slice(expected, {num_rows, 2 * num_rows});
+  ASSERT_EQ(expected_slice[0].num_rows(), num_rows);
+
+  auto const filepath = temp_env->get_temp_filepath("DeltaBinaryPackedSliced.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected_slice)
+      .write_v2_headers(true)
+      .dictionary_policy(cudf::io::dictionary_policy::NEVER);
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(in_opts);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected_slice, result.tbl->view());
+}
+
+TYPED_TEST(ParquetWriterDeltaTest, SupportedDeltaListSliced)
+{
+  using T = TypeParam;
+
+  constexpr int num_slice = 4'000;
+  constexpr int num_rows  = 32 * 1024;
+
+  std::mt19937 gen(6542);
+  std::bernoulli_distribution bn(0.7f);
+  auto valids =
+    cudf::detail::make_counting_transform_iterator(0, [&](int index) { return bn(gen); });
+  auto values = thrust::make_counting_iterator(0);
+
+  // list<T>
+  constexpr int vals_per_row = 4;
+  auto c1_offset_iter        = cudf::detail::make_counting_transform_iterator(
+    0, [vals_per_row](cudf::size_type idx) { return idx * vals_per_row; });
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> c1_offsets(c1_offset_iter,
+                                                                     c1_offset_iter + num_rows + 1);
+  cudf::test::fixed_width_column_wrapper<T> c1_vals(
+    values, values + (num_rows * vals_per_row), valids);
+  auto [null_mask, null_count] = cudf::test::detail::make_null_mask(valids, valids + num_rows);
+
+  auto _c1 = cudf::make_lists_column(
+    num_rows, c1_offsets.release(), c1_vals.release(), null_count, std::move(null_mask));
+  auto c1 = cudf::purge_nonempty_nulls(*_c1);
+
+  auto const expected = table_view{{*c1}};
+  auto expected_slice = cudf::slice(expected, {num_slice, 2 * num_slice});
+  ASSERT_EQ(expected_slice[0].num_rows(), num_slice);
+
+  auto const filepath = temp_env->get_temp_filepath("DeltaBinaryPackedListSliced.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected_slice)
+      .write_v2_headers(true)
+      .dictionary_policy(cudf::io::dictionary_policy::NEVER);
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(in_opts);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected_slice, result.tbl->view());
+}
+
+////////////////////////
+// sized tests
+
+// Base test fixture for size-parameterized tests
+class ParquetSizedTest : public ::cudf::test::BaseFixtureWithParam<int> {};
+
+// test the allowed bit widths for dictionary encoding
+INSTANTIATE_TEST_SUITE_P(ParquetDictionaryTest,
+                         ParquetSizedTest,
+                         testing::Range(1, 25),
+                         testing::PrintToStringParamName());
+
+TEST_P(ParquetSizedTest, DictionaryTest)
+{
+  unsigned int const cardinality = (1 << (GetParam() - 1)) + 1;
+  unsigned int const nrows       = std::max(cardinality * 3 / 2, 3'000'000U);
+
+  auto elements       = cudf::detail::make_counting_transform_iterator(0, [cardinality](auto i) {
+    return "a unique string value suffixed with " + std::to_string(i % cardinality);
+  });
+  auto const col0     = cudf::test::strings_column_wrapper(elements, elements + nrows);
+  auto const expected = table_view{{col0}};
+
+  auto const filepath = temp_env->get_temp_filepath("DictionaryTest.parquet");
+  // set row group size so that there will be only one row group
+  // no compression so we can easily read page data
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .compression(cudf::io::compression_type::NONE)
+      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
+      .dictionary_policy(cudf::io::dictionary_policy::ALWAYS)
+      .row_group_size_rows(nrows)
+      .row_group_size_bytes(512 * 1024 * 1024);
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options default_in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto const result = cudf::io::read_parquet(default_in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+
+  // make sure dictionary was used
+  auto const source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::detail::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+  auto used_dict = [&fmd]() {
+    for (auto enc : fmd.row_groups[0].columns[0].meta_data.encodings) {
+      if (enc == cudf::io::parquet::detail::Encoding::PLAIN_DICTIONARY or
+          enc == cudf::io::parquet::detail::Encoding::RLE_DICTIONARY) {
+        return true;
+      }
+    }
+    return false;
+  };
+  EXPECT_TRUE(used_dict());
+
+  // and check that the correct number of bits was used
+  auto const oi    = read_offset_index(source, fmd.row_groups[0].columns[0]);
+  auto const nbits = read_dict_bits(source, oi.page_locations[0]);
+  EXPECT_EQ(nbits, GetParam());
+}
+
+///////////////////////
+// comparable tests
+
+// Typed test fixture for comparable type tests
+template <typename T>
+struct ParquetWriterComparableTypeTest : public ParquetWriterTest {
+  auto type() { return cudf::data_type{cudf::type_to_id<T>()}; }
+};
+
+TYPED_TEST_SUITE(ParquetWriterComparableTypeTest, ComparableAndFixedTypes);
+
+TYPED_TEST(ParquetWriterComparableTypeTest, ThreeColumnSorted)
+{
+  using T = TypeParam;
+
+  auto col0 = testdata::ascending<T>();
+  auto col1 = testdata::descending<T>();
+  auto col2 = testdata::unordered<T>();
+
+  auto const expected = table_view{{col0, col1, col2}};
+
+  auto const filepath = temp_env->get_temp_filepath("ThreeColumnSorted.parquet");
+  const cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .max_page_size_rows(page_size_for_ordered_tests)
+      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN);
+  cudf::io::write_parquet(out_opts);
+
+  auto const source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::detail::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+  ASSERT_GT(fmd.row_groups.size(), 0);
+
+  auto const& columns = fmd.row_groups[0].columns;
+  ASSERT_EQ(columns.size(), static_cast<size_t>(expected.num_columns()));
+
+  // now check that the boundary order for chunk 1 is ascending,
+  // chunk 2 is descending, and chunk 3 is unordered
+  cudf::io::parquet::detail::BoundaryOrder expected_orders[] = {
+    cudf::io::parquet::detail::BoundaryOrder::ASCENDING,
+    cudf::io::parquet::detail::BoundaryOrder::DESCENDING,
+    cudf::io::parquet::detail::BoundaryOrder::UNORDERED};
+
+  for (std::size_t i = 0; i < columns.size(); i++) {
+    auto const ci = read_column_index(source, columns[i]);
+    EXPECT_EQ(ci.boundary_order, expected_orders[i]);
+  }
+}
diff --git a/cpp/tests/io/parquet_reader_test.cpp b/cpp/tests/io/parquet_reader_test.cpp
new file mode 100644
index 00000000000..5cb05ac7011
--- /dev/null
+++ b/cpp/tests/io/parquet_reader_test.cpp
@@ -0,0 +1,2340 @@
+/*
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "parquet_common.hpp"
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/io_metadata_utilities.hpp>
+#include <cudf_test/iterator_utilities.hpp>
+#include <cudf_test/table_utilities.hpp>
+
+#include <cudf/column/column.hpp>
+#include <cudf/io/parquet.hpp>
+#include <cudf/stream_compaction.hpp>
+#include <cudf/table/table.hpp>
+#include <cudf/table/table_view.hpp>
+#include <cudf/transform.hpp>
+
+TEST_F(ParquetReaderTest, UserBounds)
+{
+  // trying to read more rows than there are should result in
+  // receiving the properly capped # of rows
+  {
+    srand(31337);
+    auto expected = create_random_fixed_table<int>(4, 4, false);
+
+    auto filepath = temp_env->get_temp_filepath("TooManyRows.parquet");
+    cudf::io::parquet_writer_options args =
+      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected);
+    cudf::io::write_parquet(args);
+
+    // attempt to read more rows than there actually are
+    cudf::io::parquet_reader_options read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}).num_rows(16);
+    auto result = cudf::io::read_parquet(read_opts);
+
+    // we should only get back 4 rows
+    EXPECT_EQ(result.tbl->view().column(0).size(), 4);
+  }
+
+  // trying to read past the end of the # of actual rows should result
+  // in empty columns.
+  {
+    srand(31337);
+    auto expected = create_random_fixed_table<int>(4, 4, false);
+
+    auto filepath = temp_env->get_temp_filepath("PastBounds.parquet");
+    cudf::io::parquet_writer_options args =
+      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected);
+    cudf::io::write_parquet(args);
+
+    // attempt to read more rows than there actually are
+    cudf::io::parquet_reader_options read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}).skip_rows(4);
+    auto result = cudf::io::read_parquet(read_opts);
+
+    // we should get empty columns back
+    EXPECT_EQ(result.tbl->view().num_columns(), 4);
+    EXPECT_EQ(result.tbl->view().column(0).size(), 0);
+  }
+
+  // trying to read 0 rows should result in empty columns
+  {
+    srand(31337);
+    auto expected = create_random_fixed_table<int>(4, 4, false);
+
+    auto filepath = temp_env->get_temp_filepath("ZeroRows.parquet");
+    cudf::io::parquet_writer_options args =
+      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected);
+    cudf::io::write_parquet(args);
+
+    // attempt to read more rows than there actually are
+    cudf::io::parquet_reader_options read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}).num_rows(0);
+    auto result = cudf::io::read_parquet(read_opts);
+
+    EXPECT_EQ(result.tbl->view().num_columns(), 4);
+    EXPECT_EQ(result.tbl->view().column(0).size(), 0);
+  }
+
+  // trying to read 0 rows past the end of the # of actual rows should result
+  // in empty columns.
+  {
+    srand(31337);
+    auto expected = create_random_fixed_table<int>(4, 4, false);
+
+    auto filepath = temp_env->get_temp_filepath("ZeroRowsPastBounds.parquet");
+    cudf::io::parquet_writer_options args =
+      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected);
+    cudf::io::write_parquet(args);
+
+    // attempt to read more rows than there actually are
+    cudf::io::parquet_reader_options read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+        .skip_rows(4)
+        .num_rows(0);
+    auto result = cudf::io::read_parquet(read_opts);
+
+    // we should get empty columns back
+    EXPECT_EQ(result.tbl->view().num_columns(), 4);
+    EXPECT_EQ(result.tbl->view().column(0).size(), 0);
+  }
+}
+
+TEST_F(ParquetReaderTest, UserBoundsWithNulls)
+{
+  // clang-format off
+  cudf::test::fixed_width_column_wrapper<float> col{{1,1,1,1,1,1,1,1, 2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4,  5,5,5,5,5,5,5,5, 6,6,6,6,6,6,6,6, 7,7,7,7,7,7,7,7, 8,8,8,8,8,8,8,8}
+                                                   ,{1,1,1,0,0,0,1,1, 1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0, 1,1,1,1,1,1,0,0,  1,0,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,0}};
+  // clang-format on
+  cudf::table_view tbl({col});
+  auto filepath = temp_env->get_temp_filepath("UserBoundsWithNulls.parquet");
+  cudf::io::parquet_writer_options out_args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl);
+  cudf::io::write_parquet(out_args);
+
+  // skip_rows / num_rows
+  // clang-format off
+  std::vector<std::pair<int, int>> params{ {-1, -1}, {1, 3}, {3, -1},
+                                           {31, -1}, {32, -1}, {33, -1},
+                                           {31, 5}, {32, 5}, {33, 5},
+                                           {-1, 7}, {-1, 31}, {-1, 32}, {-1, 33},
+                                           {62, -1}, {63, -1},
+                                           {62, 2}, {63, 1}};
+  // clang-format on
+  for (auto p : params) {
+    cudf::io::parquet_reader_options read_args =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+    if (p.first >= 0) { read_args.set_skip_rows(p.first); }
+    if (p.second >= 0) { read_args.set_num_rows(p.second); }
+    auto result = cudf::io::read_parquet(read_args);
+
+    p.first  = p.first < 0 ? 0 : p.first;
+    p.second = p.second < 0 ? static_cast<cudf::column_view>(col).size() - p.first : p.second;
+    std::vector<cudf::size_type> slice_indices{p.first, p.first + p.second};
+    auto expected = cudf::slice(col, slice_indices);
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(0), expected[0]);
+  }
+}
+
+TEST_F(ParquetReaderTest, UserBoundsWithNullsMixedTypes)
+{
+  constexpr int num_rows = 32 * 1024;
+
+  std::mt19937 gen(6542);
+  std::bernoulli_distribution bn(0.7f);
+  auto valids =
+    cudf::detail::make_counting_transform_iterator(0, [&](int index) { return bn(gen); });
+  auto values = thrust::make_counting_iterator(0);
+
+  // int64
+  cudf::test::fixed_width_column_wrapper<int64_t> c0(values, values + num_rows, valids);
+
+  // list<float>
+  constexpr int floats_per_row = 4;
+  auto c1_offset_iter          = cudf::detail::make_counting_transform_iterator(
+    0, [floats_per_row](cudf::size_type idx) { return idx * floats_per_row; });
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> c1_offsets(c1_offset_iter,
+                                                                     c1_offset_iter + num_rows + 1);
+  cudf::test::fixed_width_column_wrapper<float> c1_floats(
+    values, values + (num_rows * floats_per_row), valids);
+  auto [null_mask, null_count] = cudf::test::detail::make_null_mask(valids, valids + num_rows);
+
+  auto _c1 = cudf::make_lists_column(
+    num_rows, c1_offsets.release(), c1_floats.release(), null_count, std::move(null_mask));
+  auto c1 = cudf::purge_nonempty_nulls(*_c1);
+
+  // list<list<int>>
+  auto c2 = make_parquet_list_list_col<int>(0, num_rows, 5, 8, true);
+
+  // struct<list<string>, int, float>
+  std::vector<std::string> strings{
+    "abc", "x", "bananas", "gpu", "minty", "backspace", "", "cayenne", "turbine", "soft"};
+  std::uniform_int_distribution<int> uni(0, strings.size() - 1);
+  auto string_iter = cudf::detail::make_counting_transform_iterator(
+    0, [&](cudf::size_type idx) { return strings[uni(gen)]; });
+  constexpr int string_per_row  = 3;
+  constexpr int num_string_rows = num_rows * string_per_row;
+  cudf::test::strings_column_wrapper string_col{string_iter, string_iter + num_string_rows};
+  auto offset_iter = cudf::detail::make_counting_transform_iterator(
+    0, [string_per_row](cudf::size_type idx) { return idx * string_per_row; });
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> offsets(offset_iter,
+                                                                  offset_iter + num_rows + 1);
+
+  auto _c3_valids =
+    cudf::detail::make_counting_transform_iterator(0, [&](int index) { return index % 200; });
+  std::vector<bool> c3_valids(num_rows);
+  std::copy(_c3_valids, _c3_valids + num_rows, c3_valids.begin());
+  std::tie(null_mask, null_count) = cudf::test::detail::make_null_mask(valids, valids + num_rows);
+  auto _c3_list                   = cudf::make_lists_column(
+    num_rows, offsets.release(), string_col.release(), null_count, std::move(null_mask));
+  auto c3_list = cudf::purge_nonempty_nulls(*_c3_list);
+  cudf::test::fixed_width_column_wrapper<int> c3_ints(values, values + num_rows, valids);
+  cudf::test::fixed_width_column_wrapper<float> c3_floats(values, values + num_rows, valids);
+  std::vector<std::unique_ptr<cudf::column>> c3_children;
+  c3_children.push_back(std::move(c3_list));
+  c3_children.push_back(c3_ints.release());
+  c3_children.push_back(c3_floats.release());
+  cudf::test::structs_column_wrapper _c3(std::move(c3_children), c3_valids);
+  auto c3 = cudf::purge_nonempty_nulls(_c3);
+
+  // write it out
+  cudf::table_view tbl({c0, *c1, *c2, *c3});
+  auto filepath = temp_env->get_temp_filepath("UserBoundsWithNullsMixedTypes.parquet");
+  cudf::io::parquet_writer_options out_args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl);
+  cudf::io::write_parquet(out_args);
+
+  // read it back
+  std::vector<std::pair<int, int>> params{
+    {-1, -1}, {0, num_rows}, {1, num_rows - 1}, {num_rows - 1, 1}, {517, 22000}};
+  for (auto p : params) {
+    cudf::io::parquet_reader_options read_args =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+    if (p.first >= 0) { read_args.set_skip_rows(p.first); }
+    if (p.second >= 0) { read_args.set_num_rows(p.second); }
+    auto result = cudf::io::read_parquet(read_args);
+
+    p.first  = p.first < 0 ? 0 : p.first;
+    p.second = p.second < 0 ? num_rows - p.first : p.second;
+    std::vector<cudf::size_type> slice_indices{p.first, p.first + p.second};
+    auto expected = cudf::slice(tbl, slice_indices);
+
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, expected[0]);
+  }
+}
+
+TEST_F(ParquetReaderTest, UserBoundsWithNullsLarge)
+{
+  constexpr int num_rows = 30 * 1000000;
+
+  std::mt19937 gen(6747);
+  std::bernoulli_distribution bn(0.7f);
+  auto valids =
+    cudf::detail::make_counting_transform_iterator(0, [&](int index) { return bn(gen); });
+  auto values = thrust::make_counting_iterator(0);
+
+  cudf::test::fixed_width_column_wrapper<int> col(values, values + num_rows, valids);
+
+  // this file will have row groups of 1,000,000 each
+  cudf::table_view tbl({col});
+  auto filepath = temp_env->get_temp_filepath("UserBoundsWithNullsLarge.parquet");
+  cudf::io::parquet_writer_options out_args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl);
+  cudf::io::write_parquet(out_args);
+
+  // skip_rows / num_rows
+  // clang-format off
+  std::vector<std::pair<int, int>> params{ {-1, -1}, {31, -1}, {32, -1}, {33, -1}, {1613470, -1}, {1999999, -1},
+                                           {31, 1}, {32, 1}, {33, 1},
+                                           // deliberately span some row group boundaries
+                                           {999000, 1001}, {999000, 2000}, {2999999, 2}, {13999997, -1},
+                                           {16785678, 3}, {22996176, 31},
+                                           {24001231, 17}, {29000001, 989999}, {29999999, 1} };
+  // clang-format on
+  for (auto p : params) {
+    cudf::io::parquet_reader_options read_args =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+    if (p.first >= 0) { read_args.set_skip_rows(p.first); }
+    if (p.second >= 0) { read_args.set_num_rows(p.second); }
+    auto result = cudf::io::read_parquet(read_args);
+
+    p.first  = p.first < 0 ? 0 : p.first;
+    p.second = p.second < 0 ? static_cast<cudf::column_view>(col).size() - p.first : p.second;
+    std::vector<cudf::size_type> slice_indices{p.first, p.first + p.second};
+    auto expected = cudf::slice(col, slice_indices);
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(0), expected[0]);
+  }
+}
+
+TEST_F(ParquetReaderTest, ListUserBoundsWithNullsLarge)
+{
+  constexpr int num_rows = 5 * 1000000;
+  auto colp              = make_parquet_list_list_col<int>(0, num_rows, 5, 8, true);
+  cudf::column_view col  = *colp;
+
+  // this file will have row groups of 1,000,000 each
+  cudf::table_view tbl({col});
+  auto filepath = temp_env->get_temp_filepath("ListUserBoundsWithNullsLarge.parquet");
+  cudf::io::parquet_writer_options out_args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl);
+  cudf::io::write_parquet(out_args);
+
+  // skip_rows / num_rows
+  // clang-format off
+  std::vector<std::pair<int, int>> params{ {-1, -1}, {31, -1}, {32, -1}, {33, -1}, {161470, -1}, {4499997, -1},
+                                           {31, 1}, {32, 1}, {33, 1},
+                                           // deliberately span some row group boundaries
+                                           {999000, 1001}, {999000, 2000}, {2999999, 2},
+                                           {1678567, 3}, {4299676, 31},
+                                           {4001231, 17}, {1900000, 989999}, {4999999, 1} };
+  // clang-format on
+  for (auto p : params) {
+    cudf::io::parquet_reader_options read_args =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+    if (p.first >= 0) { read_args.set_skip_rows(p.first); }
+    if (p.second >= 0) { read_args.set_num_rows(p.second); }
+    auto result = cudf::io::read_parquet(read_args);
+
+    p.first  = p.first < 0 ? 0 : p.first;
+    p.second = p.second < 0 ? static_cast<cudf::column_view>(col).size() - p.first : p.second;
+    std::vector<cudf::size_type> slice_indices{p.first, p.first + p.second};
+    auto expected = cudf::slice(col, slice_indices);
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(0), expected[0]);
+  }
+}
+
+TEST_F(ParquetReaderTest, ReorderedColumns)
+{
+  {
+    auto a = cudf::test::strings_column_wrapper{{"a", "", "c"}, {true, false, true}};
+    auto b = cudf::test::fixed_width_column_wrapper<int>{1, 2, 3};
+
+    cudf::table_view tbl{{a, b}};
+    auto filepath = temp_env->get_temp_filepath("ReorderedColumns.parquet");
+    cudf::io::table_input_metadata md(tbl);
+    md.column_metadata[0].set_name("a");
+    md.column_metadata[1].set_name("b");
+    cudf::io::parquet_writer_options opts =
+      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl).metadata(md);
+    cudf::io::write_parquet(opts);
+
+    // read them out of order
+    cudf::io::parquet_reader_options read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+        .columns({"b", "a"});
+    auto result = cudf::io::read_parquet(read_opts);
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), b);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), a);
+  }
+
+  {
+    auto a = cudf::test::fixed_width_column_wrapper<int>{1, 2, 3};
+    auto b = cudf::test::strings_column_wrapper{{"a", "", "c"}, {true, false, true}};
+
+    cudf::table_view tbl{{a, b}};
+    auto filepath = temp_env->get_temp_filepath("ReorderedColumns2.parquet");
+    cudf::io::table_input_metadata md(tbl);
+    md.column_metadata[0].set_name("a");
+    md.column_metadata[1].set_name("b");
+    cudf::io::parquet_writer_options opts =
+      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl).metadata(md);
+    cudf::io::write_parquet(opts);
+
+    // read them out of order
+    cudf::io::parquet_reader_options read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+        .columns({"b", "a"});
+    auto result = cudf::io::read_parquet(read_opts);
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), b);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), a);
+  }
+
+  auto a = cudf::test::fixed_width_column_wrapper<int>{1, 2, 3, 10, 20, 30};
+  auto b = cudf::test::strings_column_wrapper{{"a", "", "c", "cats", "dogs", "owls"},
+                                              {true, false, true, true, false, true}};
+  auto c = cudf::test::fixed_width_column_wrapper<int>{{15, 16, 17, 25, 26, 32},
+                                                       {false, true, true, true, true, false}};
+  auto d = cudf::test::strings_column_wrapper{"ducks", "sheep", "cows", "fish", "birds", "ants"};
+
+  cudf::table_view tbl{{a, b, c, d}};
+  auto filepath = temp_env->get_temp_filepath("ReorderedColumns3.parquet");
+  cudf::io::table_input_metadata md(tbl);
+  md.column_metadata[0].set_name("a");
+  md.column_metadata[1].set_name("b");
+  md.column_metadata[2].set_name("c");
+  md.column_metadata[3].set_name("d");
+  cudf::io::parquet_writer_options opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl)
+      .metadata(std::move(md));
+  cudf::io::write_parquet(opts);
+
+  {
+    // read them out of order
+    cudf::io::parquet_reader_options read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+        .columns({"d", "a", "b", "c"});
+    auto result = cudf::io::read_parquet(read_opts);
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), d);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), a);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(2), b);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(3), c);
+  }
+
+  {
+    // read them out of order
+    cudf::io::parquet_reader_options read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+        .columns({"c", "d", "a", "b"});
+    auto result = cudf::io::read_parquet(read_opts);
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), c);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), d);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(2), a);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(3), b);
+  }
+
+  {
+    // read them out of order
+    cudf::io::parquet_reader_options read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+        .columns({"d", "c", "b", "a"});
+    auto result = cudf::io::read_parquet(read_opts);
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), d);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), c);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(2), b);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(3), a);
+  }
+}
+
+TEST_F(ParquetReaderTest, SelectNestedColumn)
+{
+  // Struct<is_human:bool,
+  //        Struct<weight:float,
+  //               ages:int,
+  //               land_unit:List<int>>,
+  //               flats:List<List<int>>
+  //              >
+  //       >
+
+  auto weights_col = cudf::test::fixed_width_column_wrapper<float>{1.1, 2.4, 5.3, 8.0, 9.6, 6.9};
+
+  auto ages_col =
+    cudf::test::fixed_width_column_wrapper<int32_t>{{48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 0}};
+
+  auto struct_1 = cudf::test::structs_column_wrapper{{weights_col, ages_col}, {1, 1, 1, 1, 0, 1}};
+
+  auto is_human_col = cudf::test::fixed_width_column_wrapper<bool>{
+    {true, true, false, false, false, false}, {1, 1, 0, 1, 1, 0}};
+
+  auto struct_2 =
+    cudf::test::structs_column_wrapper{{is_human_col, struct_1}, {0, 1, 1, 1, 1, 1}}.release();
+
+  auto input = table_view({*struct_2});
+
+  cudf::io::table_input_metadata input_metadata(input);
+  input_metadata.column_metadata[0].set_name("being");
+  input_metadata.column_metadata[0].child(0).set_name("human?");
+  input_metadata.column_metadata[0].child(1).set_name("particulars");
+  input_metadata.column_metadata[0].child(1).child(0).set_name("weight");
+  input_metadata.column_metadata[0].child(1).child(1).set_name("age");
+
+  auto filepath = temp_env->get_temp_filepath("SelectNestedColumn.parquet");
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, input)
+      .metadata(std::move(input_metadata));
+  cudf::io::write_parquet(args);
+
+  {  // Test selecting a single leaf from the table
+    cudf::io::parquet_reader_options read_args =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath))
+        .columns({"being.particulars.age"});
+    auto const result = cudf::io::read_parquet(read_args);
+
+    auto expect_ages_col = cudf::test::fixed_width_column_wrapper<int32_t>{
+      {48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 0}};
+    auto expect_s_1 = cudf::test::structs_column_wrapper{{expect_ages_col}, {1, 1, 1, 1, 0, 1}};
+    auto expect_s_2 =
+      cudf::test::structs_column_wrapper{{expect_s_1}, {0, 1, 1, 1, 1, 1}}.release();
+    auto expected = table_view({*expect_s_2});
+
+    cudf::io::table_input_metadata expected_metadata(expected);
+    expected_metadata.column_metadata[0].set_name("being");
+    expected_metadata.column_metadata[0].child(0).set_name("particulars");
+    expected_metadata.column_metadata[0].child(0).child(0).set_name("age");
+
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+    cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
+  }
+
+  {  // Test selecting a non-leaf and expecting all hierarchy from that node onwards
+    cudf::io::parquet_reader_options read_args =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath))
+        .columns({"being.particulars"});
+    auto const result = cudf::io::read_parquet(read_args);
+
+    auto expected_weights_col =
+      cudf::test::fixed_width_column_wrapper<float>{1.1, 2.4, 5.3, 8.0, 9.6, 6.9};
+
+    auto expected_ages_col = cudf::test::fixed_width_column_wrapper<int32_t>{
+      {48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 0}};
+
+    auto expected_s_1 = cudf::test::structs_column_wrapper{
+      {expected_weights_col, expected_ages_col}, {1, 1, 1, 1, 0, 1}};
+
+    auto expect_s_2 =
+      cudf::test::structs_column_wrapper{{expected_s_1}, {0, 1, 1, 1, 1, 1}}.release();
+    auto expected = table_view({*expect_s_2});
+
+    cudf::io::table_input_metadata expected_metadata(expected);
+    expected_metadata.column_metadata[0].set_name("being");
+    expected_metadata.column_metadata[0].child(0).set_name("particulars");
+    expected_metadata.column_metadata[0].child(0).child(0).set_name("weight");
+    expected_metadata.column_metadata[0].child(0).child(1).set_name("age");
+
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+    cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
+  }
+
+  {  // Test selecting struct children out of order
+    cudf::io::parquet_reader_options read_args =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath))
+        .columns({"being.particulars.age", "being.particulars.weight", "being.human?"});
+    auto const result = cudf::io::read_parquet(read_args);
+
+    auto expected_weights_col =
+      cudf::test::fixed_width_column_wrapper<float>{1.1, 2.4, 5.3, 8.0, 9.6, 6.9};
+
+    auto expected_ages_col = cudf::test::fixed_width_column_wrapper<int32_t>{
+      {48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 0}};
+
+    auto expected_is_human_col = cudf::test::fixed_width_column_wrapper<bool>{
+      {true, true, false, false, false, false}, {1, 1, 0, 1, 1, 0}};
+
+    auto expect_s_1 = cudf::test::structs_column_wrapper{{expected_ages_col, expected_weights_col},
+                                                         {1, 1, 1, 1, 0, 1}};
+
+    auto expect_s_2 =
+      cudf::test::structs_column_wrapper{{expect_s_1, expected_is_human_col}, {0, 1, 1, 1, 1, 1}}
+        .release();
+
+    auto expected = table_view({*expect_s_2});
+
+    cudf::io::table_input_metadata expected_metadata(expected);
+    expected_metadata.column_metadata[0].set_name("being");
+    expected_metadata.column_metadata[0].child(0).set_name("particulars");
+    expected_metadata.column_metadata[0].child(0).child(0).set_name("age");
+    expected_metadata.column_metadata[0].child(0).child(1).set_name("weight");
+    expected_metadata.column_metadata[0].child(1).set_name("human?");
+
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+    cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
+  }
+}
+
+TEST_F(ParquetReaderTest, DecimalRead)
+{
+  {
+    /* We could add a dataset to include this file, but we don't want tests in cudf to have data.
+       This test is a temporary test until python gains the ability to write decimal, so we're
+       embedding
+       a parquet file directly into the code here to prevent issues with finding the file */
+    unsigned char const decimals_parquet[] = {
+      0x50, 0x41, 0x52, 0x31, 0x15, 0x00, 0x15, 0xb0, 0x03, 0x15, 0xb8, 0x03, 0x2c, 0x15, 0x6a,
+      0x15, 0x00, 0x15, 0x06, 0x15, 0x08, 0x1c, 0x36, 0x02, 0x28, 0x04, 0x7f, 0x96, 0x98, 0x00,
+      0x18, 0x04, 0x81, 0x69, 0x67, 0xff, 0x00, 0x00, 0x00, 0xd8, 0x01, 0xf0, 0xd7, 0x04, 0x00,
+      0x00, 0x00, 0x64, 0x01, 0x03, 0x06, 0x68, 0x12, 0xdc, 0xff, 0xbd, 0x18, 0xfd, 0xff, 0x64,
+      0x13, 0x80, 0x00, 0xb3, 0x5d, 0x62, 0x00, 0x90, 0x35, 0xa9, 0xff, 0xa2, 0xde, 0xe3, 0xff,
+      0xe9, 0xbf, 0x96, 0xff, 0x1f, 0x8a, 0x98, 0xff, 0xb1, 0x50, 0x34, 0x00, 0x88, 0x24, 0x59,
+      0x00, 0x2a, 0x33, 0xbe, 0xff, 0xd5, 0x16, 0xbc, 0xff, 0x13, 0x50, 0x8d, 0xff, 0xcb, 0x63,
+      0x2d, 0x00, 0x80, 0x8f, 0xbe, 0xff, 0x82, 0x40, 0x10, 0x00, 0x84, 0x68, 0x70, 0xff, 0x9b,
+      0x69, 0x78, 0x00, 0x14, 0x6c, 0x10, 0x00, 0x50, 0xd9, 0xe1, 0xff, 0xaa, 0xcd, 0x6a, 0x00,
+      0xcf, 0xb1, 0x28, 0x00, 0x77, 0x57, 0x8d, 0x00, 0xee, 0x05, 0x79, 0x00, 0xf0, 0x15, 0xeb,
+      0xff, 0x02, 0xe2, 0x06, 0x00, 0x87, 0x43, 0x86, 0x00, 0xf8, 0x2d, 0x2e, 0x00, 0xee, 0x2e,
+      0x98, 0xff, 0x39, 0xcb, 0x4d, 0x00, 0x1e, 0x6b, 0xea, 0xff, 0x80, 0x8e, 0x6c, 0xff, 0x97,
+      0x25, 0x26, 0x00, 0x4d, 0x0d, 0x0a, 0x00, 0xca, 0x64, 0x7f, 0x00, 0xf4, 0xbe, 0xa1, 0xff,
+      0xe2, 0x12, 0x6c, 0xff, 0xbd, 0x77, 0xae, 0xff, 0xf9, 0x4b, 0x36, 0x00, 0xb0, 0xe3, 0x79,
+      0xff, 0xa2, 0x2a, 0x29, 0x00, 0xcd, 0x06, 0xbc, 0xff, 0x2d, 0xa3, 0x7e, 0x00, 0xa9, 0x08,
+      0xa1, 0xff, 0xbf, 0x81, 0xd0, 0xff, 0x4f, 0x03, 0x73, 0x00, 0xb0, 0x99, 0x0c, 0x00, 0xbd,
+      0x6f, 0xf8, 0xff, 0x6b, 0x02, 0x05, 0x00, 0xc1, 0xe1, 0xba, 0xff, 0x81, 0x69, 0x67, 0xff,
+      0x7f, 0x96, 0x98, 0x00, 0x15, 0x00, 0x15, 0xd0, 0x06, 0x15, 0xda, 0x06, 0x2c, 0x15, 0x6a,
+      0x15, 0x00, 0x15, 0x06, 0x15, 0x08, 0x1c, 0x36, 0x02, 0x28, 0x08, 0xff, 0x3f, 0x7a, 0x10,
+      0xf3, 0x5a, 0x00, 0x00, 0x18, 0x08, 0x01, 0xc0, 0x85, 0xef, 0x0c, 0xa5, 0xff, 0xff, 0x00,
+      0x00, 0x00, 0xa8, 0x03, 0xf4, 0xa7, 0x01, 0x04, 0x00, 0x00, 0x00, 0x64, 0x01, 0x03, 0x06,
+      0x55, 0x6f, 0xc5, 0xe4, 0x9f, 0x1a, 0x00, 0x00, 0x47, 0x89, 0x0a, 0xe8, 0x58, 0xf0, 0xff,
+      0xff, 0x63, 0xee, 0x21, 0xdd, 0xdd, 0xca, 0xff, 0xff, 0xbe, 0x6f, 0x3b, 0xaa, 0xe9, 0x3d,
+      0x00, 0x00, 0xd6, 0x91, 0x2a, 0xb7, 0x08, 0x02, 0x00, 0x00, 0x75, 0x45, 0x2c, 0xd7, 0x76,
+      0x0c, 0x00, 0x00, 0x54, 0x49, 0x92, 0x44, 0x9c, 0xbf, 0xff, 0xff, 0x41, 0xa9, 0x6d, 0xec,
+      0x7a, 0xd0, 0xff, 0xff, 0x27, 0xa0, 0x23, 0x41, 0x44, 0xc1, 0xff, 0xff, 0x18, 0xd4, 0xe1,
+      0x30, 0xd3, 0xe0, 0xff, 0xff, 0x59, 0xac, 0x14, 0xf4, 0xec, 0x58, 0x00, 0x00, 0x2c, 0x17,
+      0x29, 0x57, 0x44, 0x13, 0x00, 0x00, 0xa2, 0x0d, 0x4a, 0xcc, 0x63, 0xff, 0xff, 0xff, 0x81,
+      0x33, 0xbc, 0xda, 0xd5, 0xda, 0xff, 0xff, 0x4c, 0x05, 0xf4, 0x78, 0x19, 0xea, 0xff, 0xff,
+      0x06, 0x71, 0x25, 0xde, 0x5a, 0xaf, 0xff, 0xff, 0x95, 0x32, 0x5f, 0x76, 0x98, 0xb3, 0xff,
+      0xff, 0xf1, 0x34, 0x3c, 0xbf, 0xa8, 0xbe, 0xff, 0xff, 0x27, 0x73, 0x40, 0x0c, 0x7d, 0xcd,
+      0xff, 0xff, 0x68, 0xa9, 0xc2, 0xe9, 0x2c, 0x03, 0x00, 0x00, 0x3f, 0x79, 0xd9, 0x04, 0x8c,
+      0xe5, 0xff, 0xff, 0x91, 0xb4, 0x9b, 0xe3, 0x8f, 0x21, 0x00, 0x00, 0xb8, 0x20, 0xc8, 0xc2,
+      0x4d, 0xa6, 0xff, 0xff, 0x47, 0xfa, 0xde, 0x36, 0x4a, 0xf3, 0xff, 0xff, 0x72, 0x80, 0x94,
+      0x59, 0xdd, 0x4e, 0x00, 0x00, 0x29, 0xe4, 0xd6, 0x43, 0xb0, 0xf0, 0xff, 0xff, 0x68, 0x36,
+      0xbc, 0x2d, 0xd1, 0xa9, 0xff, 0xff, 0xbc, 0xe4, 0xbe, 0xd7, 0xed, 0x1b, 0x00, 0x00, 0x02,
+      0x8b, 0xcb, 0xd7, 0xed, 0x47, 0x00, 0x00, 0x3c, 0x06, 0xe4, 0xda, 0xc7, 0x47, 0x00, 0x00,
+      0xf3, 0x39, 0x55, 0x28, 0x97, 0xba, 0xff, 0xff, 0x07, 0x79, 0x38, 0x4e, 0xe0, 0x21, 0x00,
+      0x00, 0xde, 0xed, 0x1c, 0x23, 0x09, 0x49, 0x00, 0x00, 0x49, 0x46, 0x49, 0x5d, 0x8f, 0x34,
+      0x00, 0x00, 0x38, 0x18, 0x50, 0xf6, 0xa1, 0x11, 0x00, 0x00, 0xdf, 0xb8, 0x19, 0x14, 0xd1,
+      0xe1, 0xff, 0xff, 0x2c, 0x56, 0x72, 0x93, 0x64, 0x3f, 0x00, 0x00, 0x1c, 0xe0, 0xbe, 0x87,
+      0x7d, 0xf9, 0xff, 0xff, 0x73, 0x0e, 0x3c, 0x01, 0x91, 0xf9, 0xff, 0xff, 0xb2, 0x37, 0x85,
+      0x81, 0x5f, 0x54, 0x00, 0x00, 0x58, 0x44, 0xb0, 0x1a, 0xac, 0xbb, 0xff, 0xff, 0x36, 0xbf,
+      0xbe, 0x5e, 0x22, 0xff, 0xff, 0xff, 0x06, 0x20, 0xa0, 0x23, 0x0d, 0x3b, 0x00, 0x00, 0x19,
+      0xc6, 0x49, 0x0a, 0x00, 0xcf, 0xff, 0xff, 0x4f, 0xcd, 0xc6, 0x95, 0x4b, 0xf1, 0xff, 0xff,
+      0xa3, 0x59, 0xaf, 0x65, 0xec, 0xe9, 0xff, 0xff, 0x58, 0xef, 0x05, 0x50, 0x63, 0xe4, 0xff,
+      0xff, 0xc7, 0x6a, 0x9e, 0xf1, 0x69, 0x20, 0x00, 0x00, 0xd1, 0xb3, 0xc9, 0x14, 0xb2, 0x29,
+      0x00, 0x00, 0x1d, 0x48, 0x16, 0x70, 0xf0, 0x40, 0x00, 0x00, 0x01, 0xc0, 0x85, 0xef, 0x0c,
+      0xa5, 0xff, 0xff, 0xff, 0x3f, 0x7a, 0x10, 0xf3, 0x5a, 0x00, 0x00, 0x15, 0x00, 0x15, 0x90,
+      0x0d, 0x15, 0x9a, 0x0d, 0x2c, 0x15, 0x6a, 0x15, 0x00, 0x15, 0x06, 0x15, 0x08, 0x1c, 0x36,
+      0x02, 0x28, 0x10, 0x4b, 0x3b, 0x4c, 0xa8, 0x5a, 0x86, 0xc4, 0x7a, 0x09, 0x8a, 0x22, 0x3f,
+      0xff, 0xff, 0xff, 0xff, 0x18, 0x10, 0xb4, 0xc4, 0xb3, 0x57, 0xa5, 0x79, 0x3b, 0x85, 0xf6,
+      0x75, 0xdd, 0xc0, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xc8, 0x06, 0xf4, 0x47, 0x03,
+      0x04, 0x00, 0x00, 0x00, 0x64, 0x01, 0x03, 0x06, 0x05, 0x49, 0xf7, 0xfc, 0x89, 0x3d, 0x3e,
+      0x20, 0x07, 0x72, 0x3e, 0xa1, 0x66, 0x81, 0x67, 0x80, 0x23, 0x78, 0x06, 0x68, 0x0e, 0x78,
+      0xf5, 0x08, 0xed, 0x20, 0xcd, 0x0e, 0x7f, 0x9c, 0x70, 0xa0, 0xb9, 0x16, 0x44, 0xb2, 0x41,
+      0x62, 0xba, 0x82, 0xad, 0xe1, 0x12, 0x9b, 0xa6, 0x53, 0x8d, 0x20, 0x27, 0xd5, 0x84, 0x63,
+      0xb8, 0x07, 0x4b, 0x5b, 0xa4, 0x1c, 0xa4, 0x1c, 0x17, 0xbf, 0x4b, 0x00, 0x24, 0x04, 0x56,
+      0xa8, 0x52, 0xaf, 0x33, 0xf7, 0xad, 0x7c, 0xc8, 0x83, 0x25, 0x13, 0xaf, 0x80, 0x25, 0x6f,
+      0xbd, 0xd1, 0x15, 0x69, 0x64, 0x20, 0x7b, 0xd7, 0x33, 0xba, 0x66, 0x29, 0x8a, 0x00, 0xda,
+      0x42, 0x07, 0x2c, 0x6c, 0x39, 0x76, 0x9f, 0xdc, 0x17, 0xad, 0xb6, 0x58, 0xdf, 0x5f, 0x00,
+      0x18, 0x3a, 0xae, 0x1c, 0xd6, 0x5f, 0x9d, 0x78, 0x8d, 0x73, 0xdd, 0x3e, 0xd6, 0x18, 0x33,
+      0x40, 0xe4, 0x36, 0xde, 0xb0, 0xb7, 0x33, 0x2a, 0x6b, 0x08, 0x03, 0x6c, 0x6d, 0x8f, 0x13,
+      0x93, 0xd0, 0xd7, 0x87, 0x62, 0x63, 0x53, 0xfb, 0xd8, 0xbb, 0xc9, 0x54, 0x90, 0xd6, 0xa9,
+      0x8f, 0xc8, 0x60, 0xbd, 0xec, 0x75, 0x23, 0x9a, 0x21, 0xec, 0xe4, 0x86, 0x43, 0xd7, 0xc1,
+      0x88, 0xdc, 0x82, 0x00, 0x32, 0x79, 0xc9, 0x2b, 0x70, 0x85, 0xb7, 0x25, 0xa1, 0xcc, 0x7d,
+      0x0b, 0x29, 0x03, 0xea, 0x80, 0xff, 0x9b, 0xf3, 0x24, 0x7f, 0xd1, 0xff, 0xf0, 0x22, 0x65,
+      0x85, 0x99, 0x17, 0x63, 0xc2, 0xc0, 0xb7, 0x62, 0x05, 0xda, 0x7a, 0xa0, 0xc3, 0x2a, 0x6f,
+      0x1f, 0xee, 0x1f, 0x31, 0xa8, 0x42, 0x80, 0xe4, 0xb7, 0x6c, 0xf6, 0xac, 0x47, 0xb0, 0x17,
+      0x69, 0xcb, 0xff, 0x66, 0x8a, 0xd6, 0x25, 0x00, 0xf3, 0xcf, 0x0a, 0xaf, 0xf8, 0x92, 0x8a,
+      0xa0, 0xdf, 0x71, 0x13, 0x8d, 0x9d, 0xff, 0x7e, 0xe0, 0x0a, 0x52, 0xf1, 0x97, 0x01, 0xa9,
+      0x73, 0x27, 0xfd, 0x63, 0x58, 0x00, 0x32, 0xa6, 0xf6, 0x78, 0xb8, 0xe4, 0xfd, 0x20, 0x7c,
+      0x90, 0xee, 0xad, 0x8c, 0xc9, 0x71, 0x35, 0x66, 0x71, 0x3c, 0xe0, 0xe4, 0x0b, 0xbb, 0xa0,
+      0x50, 0xe9, 0xf2, 0x81, 0x1d, 0x3a, 0x95, 0x94, 0x00, 0xd5, 0x49, 0x00, 0x07, 0xdf, 0x21,
+      0x53, 0x36, 0x8d, 0x9e, 0xd9, 0xa5, 0x52, 0x4d, 0x0d, 0x29, 0x74, 0xf0, 0x40, 0xbd, 0xda,
+      0x63, 0x4e, 0xdd, 0x91, 0x8e, 0xa6, 0xa7, 0xf6, 0x78, 0x58, 0x3b, 0x0a, 0x5c, 0x60, 0x3c,
+      0x15, 0x34, 0xf8, 0x2c, 0x21, 0xe3, 0x56, 0x1b, 0x9e, 0xd9, 0x56, 0xd3, 0x13, 0x2e, 0x80,
+      0x2c, 0x36, 0xda, 0x1d, 0xc8, 0xfb, 0x52, 0xee, 0x17, 0xb3, 0x2b, 0xf3, 0xd2, 0xeb, 0x29,
+      0xa0, 0x37, 0xa0, 0x12, 0xce, 0x1c, 0x50, 0x6a, 0xf4, 0x11, 0xcd, 0x96, 0x88, 0x3f, 0x43,
+      0x78, 0xc0, 0x2c, 0x53, 0x6c, 0xa6, 0xdf, 0xb9, 0x9e, 0x93, 0xd4, 0x1e, 0xa9, 0x7f, 0x67,
+      0xa6, 0xc1, 0x80, 0x46, 0x0f, 0x63, 0x7d, 0x15, 0xf2, 0x4c, 0xc5, 0xda, 0x11, 0x9a, 0x20,
+      0x67, 0x27, 0xe8, 0x00, 0xec, 0x03, 0x1d, 0x15, 0xa7, 0x92, 0xb3, 0x1f, 0xda, 0x20, 0x92,
+      0xd8, 0x00, 0xfb, 0x06, 0x80, 0xeb, 0x4b, 0x0c, 0xc1, 0x1f, 0x49, 0x40, 0x06, 0x8d, 0x8a,
+      0xf8, 0x34, 0xb1, 0x0c, 0x1d, 0x20, 0xd0, 0x47, 0xe5, 0xb1, 0x7e, 0xf7, 0xe4, 0xb4, 0x7e,
+      0x9c, 0x84, 0x18, 0x61, 0x32, 0x4f, 0xc0, 0xc2, 0xb2, 0xcc, 0x63, 0xf6, 0xe1, 0x16, 0xd6,
+      0xd9, 0x4b, 0x74, 0x13, 0x01, 0xa1, 0xe2, 0x00, 0xb7, 0x9e, 0xc1, 0x3a, 0xc5, 0xaf, 0xe8,
+      0x54, 0x07, 0x2a, 0x20, 0xfd, 0x2c, 0x6f, 0xb9, 0x80, 0x18, 0x92, 0x87, 0xa0, 0x81, 0x24,
+      0x60, 0x47, 0x17, 0x4f, 0xbc, 0xbe, 0xf5, 0x03, 0x69, 0x80, 0xe3, 0x10, 0x54, 0xd6, 0x68,
+      0x7d, 0x75, 0xd3, 0x0a, 0x45, 0x38, 0x9e, 0xa9, 0xfd, 0x05, 0x40, 0xd2, 0x1e, 0x6f, 0x5c,
+      0x30, 0x10, 0xfe, 0x9b, 0x9f, 0x6d, 0xc0, 0x9d, 0x6c, 0x17, 0x7d, 0x00, 0x09, 0xb6, 0x8a,
+      0x31, 0x8e, 0x1b, 0x6b, 0x84, 0x1e, 0x79, 0xce, 0x10, 0x55, 0x59, 0x6a, 0x40, 0x16, 0xdc,
+      0x9a, 0xcf, 0x4d, 0xb0, 0x8f, 0xac, 0xe3, 0x8d, 0xee, 0xd2, 0xef, 0x01, 0x8c, 0xe0, 0x2b,
+      0x24, 0xe5, 0xb4, 0xe1, 0x86, 0x72, 0x00, 0x30, 0x07, 0xce, 0x02, 0x23, 0x41, 0x33, 0x40,
+      0xf0, 0x9b, 0xc2, 0x2d, 0x30, 0xec, 0x3b, 0x17, 0xb2, 0x8f, 0x64, 0x7d, 0xcd, 0x70, 0x9e,
+      0x80, 0x22, 0xb5, 0xdf, 0x6d, 0x2a, 0x43, 0xd4, 0x2b, 0x5a, 0xf6, 0x96, 0xa6, 0xea, 0x91,
+      0x62, 0x80, 0x39, 0xf2, 0x5a, 0x8e, 0xc0, 0xb9, 0x29, 0x99, 0x17, 0xe7, 0x35, 0x2c, 0xf6,
+      0x4d, 0x18, 0x00, 0x48, 0x10, 0x85, 0xb4, 0x3f, 0x89, 0x60, 0x49, 0x6e, 0xf0, 0xcd, 0x9d,
+      0x92, 0xeb, 0x96, 0x80, 0xcf, 0xf9, 0xf1, 0x46, 0x1d, 0xc0, 0x49, 0xb3, 0x36, 0x2e, 0x24,
+      0xc8, 0xdb, 0x41, 0x72, 0x20, 0xf5, 0xde, 0x5c, 0xf9, 0x4a, 0x6e, 0xa0, 0x0b, 0x13, 0xfc,
+      0x2d, 0x17, 0x07, 0x16, 0x5e, 0x00, 0x3c, 0x54, 0x41, 0x0e, 0xa2, 0x0d, 0xf3, 0x48, 0x12,
+      0x2e, 0x7c, 0xab, 0x3c, 0x59, 0x1c, 0x40, 0xca, 0xb0, 0x71, 0xc7, 0x29, 0xf0, 0xbb, 0x9f,
+      0xf4, 0x3f, 0x25, 0x49, 0xad, 0xc2, 0x8f, 0x80, 0x04, 0x38, 0x6d, 0x35, 0x02, 0xca, 0xe6,
+      0x02, 0x83, 0x89, 0x4e, 0x74, 0xdb, 0x08, 0x5a, 0x80, 0x13, 0x99, 0xd4, 0x26, 0xc1, 0x27,
+      0xce, 0xb0, 0x98, 0x99, 0xca, 0xf6, 0x3e, 0x50, 0x49, 0xd0, 0xbf, 0xcb, 0x6f, 0xbe, 0x5b,
+      0x92, 0x63, 0xde, 0x94, 0xd3, 0x8f, 0x07, 0x06, 0x0f, 0x2b, 0x80, 0x36, 0xf1, 0x77, 0xf6,
+      0x29, 0x33, 0x13, 0xa9, 0x4a, 0x55, 0x3d, 0x6c, 0xca, 0xdb, 0x4e, 0x40, 0xc4, 0x95, 0x54,
+      0xf4, 0xe2, 0x8c, 0x1b, 0xa0, 0xfe, 0x30, 0x50, 0x9d, 0x62, 0xbc, 0x5c, 0x00, 0xb4, 0xc4,
+      0xb3, 0x57, 0xa5, 0x79, 0x3b, 0x85, 0xf6, 0x75, 0xdd, 0xc0, 0x00, 0x00, 0x00, 0x01, 0x4b,
+      0x3b, 0x4c, 0xa8, 0x5a, 0x86, 0xc4, 0x7a, 0x09, 0x8a, 0x22, 0x3f, 0xff, 0xff, 0xff, 0xff,
+      0x15, 0x02, 0x19, 0x4c, 0x48, 0x0c, 0x73, 0x70, 0x61, 0x72, 0x6b, 0x5f, 0x73, 0x63, 0x68,
+      0x65, 0x6d, 0x61, 0x15, 0x06, 0x00, 0x15, 0x02, 0x25, 0x02, 0x18, 0x06, 0x64, 0x65, 0x63,
+      0x37, 0x70, 0x34, 0x25, 0x0a, 0x15, 0x08, 0x15, 0x0e, 0x00, 0x15, 0x04, 0x25, 0x02, 0x18,
+      0x07, 0x64, 0x65, 0x63, 0x31, 0x34, 0x70, 0x35, 0x25, 0x0a, 0x15, 0x0a, 0x15, 0x1c, 0x00,
+      0x15, 0x0e, 0x15, 0x20, 0x15, 0x02, 0x18, 0x08, 0x64, 0x65, 0x63, 0x33, 0x38, 0x70, 0x31,
+      0x38, 0x25, 0x0a, 0x15, 0x24, 0x15, 0x4c, 0x00, 0x16, 0x6a, 0x19, 0x1c, 0x19, 0x3c, 0x26,
+      0x08, 0x1c, 0x15, 0x02, 0x19, 0x35, 0x06, 0x08, 0x00, 0x19, 0x18, 0x06, 0x64, 0x65, 0x63,
+      0x37, 0x70, 0x34, 0x15, 0x02, 0x16, 0x6a, 0x16, 0xf6, 0x03, 0x16, 0xfe, 0x03, 0x26, 0x08,
+      0x3c, 0x36, 0x02, 0x28, 0x04, 0x7f, 0x96, 0x98, 0x00, 0x18, 0x04, 0x81, 0x69, 0x67, 0xff,
+      0x00, 0x19, 0x1c, 0x15, 0x00, 0x15, 0x00, 0x15, 0x02, 0x00, 0x00, 0x00, 0x26, 0x86, 0x04,
+      0x1c, 0x15, 0x04, 0x19, 0x35, 0x06, 0x08, 0x00, 0x19, 0x18, 0x07, 0x64, 0x65, 0x63, 0x31,
+      0x34, 0x70, 0x35, 0x15, 0x02, 0x16, 0x6a, 0x16, 0xa6, 0x07, 0x16, 0xb0, 0x07, 0x26, 0x86,
+      0x04, 0x3c, 0x36, 0x02, 0x28, 0x08, 0xff, 0x3f, 0x7a, 0x10, 0xf3, 0x5a, 0x00, 0x00, 0x18,
+      0x08, 0x01, 0xc0, 0x85, 0xef, 0x0c, 0xa5, 0xff, 0xff, 0x00, 0x19, 0x1c, 0x15, 0x00, 0x15,
+      0x00, 0x15, 0x02, 0x00, 0x00, 0x00, 0x26, 0xb6, 0x0b, 0x1c, 0x15, 0x0e, 0x19, 0x35, 0x06,
+      0x08, 0x00, 0x19, 0x18, 0x08, 0x64, 0x65, 0x63, 0x33, 0x38, 0x70, 0x31, 0x38, 0x15, 0x02,
+      0x16, 0x6a, 0x16, 0x86, 0x0e, 0x16, 0x90, 0x0e, 0x26, 0xb6, 0x0b, 0x3c, 0x36, 0x02, 0x28,
+      0x10, 0x4b, 0x3b, 0x4c, 0xa8, 0x5a, 0x86, 0xc4, 0x7a, 0x09, 0x8a, 0x22, 0x3f, 0xff, 0xff,
+      0xff, 0xff, 0x18, 0x10, 0xb4, 0xc4, 0xb3, 0x57, 0xa5, 0x79, 0x3b, 0x85, 0xf6, 0x75, 0xdd,
+      0xc0, 0x00, 0x00, 0x00, 0x01, 0x00, 0x19, 0x1c, 0x15, 0x00, 0x15, 0x00, 0x15, 0x02, 0x00,
+      0x00, 0x00, 0x16, 0xa2, 0x19, 0x16, 0x6a, 0x00, 0x19, 0x2c, 0x18, 0x18, 0x6f, 0x72, 0x67,
+      0x2e, 0x61, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2e, 0x73, 0x70, 0x61, 0x72, 0x6b, 0x2e, 0x76,
+      0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x05, 0x33, 0x2e, 0x30, 0x2e, 0x31, 0x00, 0x18,
+      0x29, 0x6f, 0x72, 0x67, 0x2e, 0x61, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2e, 0x73, 0x70, 0x61,
+      0x72, 0x6b, 0x2e, 0x73, 0x71, 0x6c, 0x2e, 0x70, 0x61, 0x72, 0x71, 0x75, 0x65, 0x74, 0x2e,
+      0x72, 0x6f, 0x77, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0xf4, 0x01,
+      0x7b, 0x22, 0x74, 0x79, 0x70, 0x65, 0x22, 0x3a, 0x22, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74,
+      0x22, 0x2c, 0x22, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x22, 0x3a, 0x5b, 0x7b, 0x22, 0x6e,
+      0x61, 0x6d, 0x65, 0x22, 0x3a, 0x22, 0x64, 0x65, 0x63, 0x37, 0x70, 0x34, 0x22, 0x2c, 0x22,
+      0x74, 0x79, 0x70, 0x65, 0x22, 0x3a, 0x22, 0x64, 0x65, 0x63, 0x69, 0x6d, 0x61, 0x6c, 0x28,
+      0x37, 0x2c, 0x34, 0x29, 0x22, 0x2c, 0x22, 0x6e, 0x75, 0x6c, 0x6c, 0x61, 0x62, 0x6c, 0x65,
+      0x22, 0x3a, 0x74, 0x72, 0x75, 0x65, 0x2c, 0x22, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74,
+      0x61, 0x22, 0x3a, 0x7b, 0x7d, 0x7d, 0x2c, 0x7b, 0x22, 0x6e, 0x61, 0x6d, 0x65, 0x22, 0x3a,
+      0x22, 0x64, 0x65, 0x63, 0x31, 0x34, 0x70, 0x35, 0x22, 0x2c, 0x22, 0x74, 0x79, 0x70, 0x65,
+      0x22, 0x3a, 0x22, 0x64, 0x65, 0x63, 0x69, 0x6d, 0x61, 0x6c, 0x28, 0x31, 0x34, 0x2c, 0x35,
+      0x29, 0x22, 0x2c, 0x22, 0x6e, 0x75, 0x6c, 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x22, 0x3a, 0x74,
+      0x72, 0x75, 0x65, 0x2c, 0x22, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x22, 0x3a,
+      0x7b, 0x7d, 0x7d, 0x2c, 0x7b, 0x22, 0x6e, 0x61, 0x6d, 0x65, 0x22, 0x3a, 0x22, 0x64, 0x65,
+      0x63, 0x33, 0x38, 0x70, 0x31, 0x38, 0x22, 0x2c, 0x22, 0x74, 0x79, 0x70, 0x65, 0x22, 0x3a,
+      0x22, 0x64, 0x65, 0x63, 0x69, 0x6d, 0x61, 0x6c, 0x28, 0x33, 0x38, 0x2c, 0x31, 0x38, 0x29,
+      0x22, 0x2c, 0x22, 0x6e, 0x75, 0x6c, 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x22, 0x3a, 0x74, 0x72,
+      0x75, 0x65, 0x2c, 0x22, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x22, 0x3a, 0x7b,
+      0x7d, 0x7d, 0x5d, 0x7d, 0x00, 0x18, 0x4a, 0x70, 0x61, 0x72, 0x71, 0x75, 0x65, 0x74, 0x2d,
+      0x6d, 0x72, 0x20, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x20, 0x31, 0x2e, 0x31, 0x30,
+      0x2e, 0x31, 0x20, 0x28, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x20, 0x61, 0x38, 0x39, 0x64, 0x66,
+      0x38, 0x66, 0x39, 0x39, 0x33, 0x32, 0x62, 0x36, 0x65, 0x66, 0x36, 0x36, 0x33, 0x33, 0x64,
+      0x30, 0x36, 0x30, 0x36, 0x39, 0x65, 0x35, 0x30, 0x63, 0x39, 0x62, 0x37, 0x39, 0x37, 0x30,
+      0x62, 0x65, 0x62, 0x64, 0x31, 0x29, 0x19, 0x3c, 0x1c, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x1c,
+      0x00, 0x00, 0x00, 0xd3, 0x02, 0x00, 0x00, 0x50, 0x41, 0x52, 0x31};
+    unsigned int decimals_parquet_len = 2366;
+
+    cudf::io::parquet_reader_options read_opts = cudf::io::parquet_reader_options::builder(
+      cudf::io::source_info{reinterpret_cast<char const*>(decimals_parquet), decimals_parquet_len});
+    auto result = cudf::io::read_parquet(read_opts);
+
+    auto validity =
+      cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 50; });
+
+    EXPECT_EQ(result.tbl->view().num_columns(), 3);
+
+    int32_t col0_data[] = {
+      -2354584, -190275,  8393572,  6446515,  -5687920, -1843550, -6897687, -6780385, 3428529,
+      5842056,  -4312278, -4450603, -7516141, 2974667,  -4288640, 1065090,  -9410428, 7891355,
+      1076244,  -1975984, 6999466,  2666959,  9262967,  7931374,  -1370640, 451074,   8799111,
+      3026424,  -6803730, 5098297,  -1414370, -9662848, 2499991,  658765,   8348874,  -6177036,
+      -9694494, -5343299, 3558393,  -8789072, 2697890,  -4454707, 8299309,  -6223703, -3112513,
+      7537487,  825776,   -495683,  328299,   -4529727, 0,        -9999999, 9999999};
+
+    EXPECT_EQ(static_cast<std::size_t>(result.tbl->view().column(0).size()),
+              sizeof(col0_data) / sizeof(col0_data[0]));
+    cudf::test::fixed_point_column_wrapper<int32_t> col0(
+      std::begin(col0_data), std::end(col0_data), validity, numeric::scale_type{-4});
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), col0);
+
+    int64_t col1_data[] = {29274040266581,  -17210335917753, -58420730139037,
+                           68073792696254,  2236456014294,   13704555677045,
+                           -70797090469548, -52248605513407, -68976081919961,
+                           -34277313883112, 97774730521689,  21184241014572,
+                           -670882460254,   -40862944054399, -24079852370612,
+                           -88670167797498, -84007574359403, -71843004533519,
+                           -55538016554201, 3491435293032,   -29085437167297,
+                           36901882672273,  -98622066122568, -13974902998457,
+                           86712597643378,  -16835133643735, -94759096142232,
+                           30708340810940,  79086853262082,  78923696440892,
+                           -76316597208589, 37247268714759,  80303592631774,
+                           57790350050889,  19387319851064,  -33186875066145,
+                           69701203023404,  -7157433049060,  -7073790423437,
+                           92769171617714,  -75127120182184, -951893180618,
+                           64927618310150,  -53875897154023, -16168039035569,
+                           -24273449166429, -30359781249192, 35639397345991,
+                           45844829680593,  71401416837149,  0,
+                           -99999999999999, 99999999999999};
+
+    EXPECT_EQ(static_cast<std::size_t>(result.tbl->view().column(1).size()),
+              sizeof(col1_data) / sizeof(col1_data[0]));
+    cudf::test::fixed_point_column_wrapper<int64_t> col1(
+      std::begin(col1_data), std::end(col1_data), validity, numeric::scale_type{-5});
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), col1);
+
+    cudf::io::parquet_reader_options read_strict_opts = read_opts;
+    read_strict_opts.set_columns({"dec7p4", "dec14p5"});
+    EXPECT_NO_THROW(cudf::io::read_parquet(read_strict_opts));
+  }
+  {
+    // dec7p3: Decimal(precision=7, scale=3) backed by FIXED_LENGTH_BYTE_ARRAY(length = 4)
+    // dec12p11: Decimal(precision=12, scale=11) backed by FIXED_LENGTH_BYTE_ARRAY(length = 6)
+    // dec20p1: Decimal(precision=20, scale=1) backed by FIXED_LENGTH_BYTE_ARRAY(length = 9)
+    unsigned char const fixed_len_bytes_decimal_parquet[] = {
+      0x50, 0x41, 0x52, 0x31, 0x15, 0x00, 0x15, 0xA8, 0x01, 0x15, 0xAE, 0x01, 0x2C, 0x15, 0x28,
+      0x15, 0x00, 0x15, 0x06, 0x15, 0x08, 0x1C, 0x36, 0x02, 0x28, 0x04, 0x00, 0x97, 0x45, 0x72,
+      0x18, 0x04, 0x00, 0x01, 0x81, 0x3B, 0x00, 0x00, 0x00, 0x54, 0xF0, 0x53, 0x04, 0x00, 0x00,
+      0x00, 0x26, 0x01, 0x03, 0x00, 0x00, 0x61, 0x10, 0xCF, 0x00, 0x0A, 0xA9, 0x08, 0x00, 0x77,
+      0x58, 0x6F, 0x00, 0x6B, 0xEE, 0xA4, 0x00, 0x92, 0xF8, 0x94, 0x00, 0x2E, 0x18, 0xD4, 0x00,
+      0x4F, 0x45, 0x33, 0x00, 0x97, 0x45, 0x72, 0x00, 0x0D, 0xC2, 0x75, 0x00, 0x76, 0xAA, 0xAA,
+      0x00, 0x30, 0x9F, 0x86, 0x00, 0x4B, 0x9D, 0xB1, 0x00, 0x4E, 0x4B, 0x3B, 0x00, 0x01, 0x81,
+      0x3B, 0x00, 0x22, 0xD4, 0x53, 0x00, 0x72, 0xC4, 0xAF, 0x00, 0x43, 0x9B, 0x72, 0x00, 0x1D,
+      0x91, 0xC3, 0x00, 0x45, 0x27, 0x48, 0x15, 0x00, 0x15, 0xF4, 0x01, 0x15, 0xFA, 0x01, 0x2C,
+      0x15, 0x28, 0x15, 0x00, 0x15, 0x06, 0x15, 0x08, 0x1C, 0x36, 0x02, 0x28, 0x06, 0x00, 0xD5,
+      0xD7, 0x31, 0x99, 0xA6, 0x18, 0x06, 0xFF, 0x17, 0x2B, 0x5A, 0xF0, 0x01, 0x00, 0x00, 0x00,
+      0x7A, 0xF0, 0x79, 0x04, 0x00, 0x00, 0x00, 0x24, 0x01, 0x03, 0x02, 0x00, 0x54, 0x23, 0xCF,
+      0x13, 0x0A, 0x00, 0x07, 0x22, 0xB1, 0x21, 0x7E, 0x00, 0x64, 0x19, 0xD6, 0xD2, 0xA5, 0x00,
+      0x61, 0x7F, 0xF6, 0xB9, 0xB0, 0x00, 0xD0, 0x7F, 0x9C, 0xA9, 0xE9, 0x00, 0x65, 0x58, 0xF0,
+      0xAD, 0xFB, 0x00, 0xBC, 0x61, 0xE2, 0x03, 0xDA, 0xFF, 0x17, 0x2B, 0x5A, 0xF0, 0x01, 0x00,
+      0x63, 0x4B, 0x4C, 0xFE, 0x45, 0x00, 0x7A, 0xA0, 0xD8, 0xD1, 0xC0, 0x00, 0xC0, 0x63, 0xF7,
+      0x9D, 0x0A, 0x00, 0x88, 0x22, 0x0F, 0x1B, 0x25, 0x00, 0x1A, 0x80, 0x56, 0x34, 0xC7, 0x00,
+      0x5F, 0x48, 0x61, 0x09, 0x7C, 0x00, 0x61, 0xEF, 0x92, 0x42, 0x2F, 0x00, 0xD5, 0xD7, 0x31,
+      0x99, 0xA6, 0xFF, 0x17, 0x2B, 0x5A, 0xF0, 0x01, 0x00, 0x71, 0xDD, 0xE2, 0x22, 0x7B, 0x00,
+      0x54, 0xBF, 0xAE, 0xE9, 0x3C, 0x15, 0x00, 0x15, 0xD4, 0x02, 0x15, 0xDC, 0x02, 0x2C, 0x15,
+      0x28, 0x15, 0x00, 0x15, 0x06, 0x15, 0x08, 0x1C, 0x36, 0x04, 0x28, 0x09, 0x00, 0x7D, 0xFE,
+      0x02, 0xDA, 0xB2, 0x62, 0xA3, 0xFB, 0x18, 0x09, 0x00, 0x03, 0x9C, 0xCD, 0x5A, 0xAC, 0xBB,
+      0xF1, 0xE3, 0x00, 0x00, 0x00, 0xAA, 0x01, 0xF0, 0xA9, 0x04, 0x00, 0x00, 0x00, 0x07, 0xBF,
+      0xBF, 0x0F, 0x00, 0x7D, 0xFE, 0x02, 0xDA, 0xB2, 0x62, 0xA3, 0xFB, 0x00, 0x7D, 0x9A, 0xCB,
+      0xDA, 0x4B, 0x10, 0x8B, 0xAC, 0x00, 0x20, 0xBA, 0x97, 0x87, 0x2E, 0x3B, 0x4E, 0x04, 0x00,
+      0x15, 0xBB, 0xC2, 0xDF, 0x2D, 0x25, 0x08, 0xB6, 0x00, 0x5C, 0x67, 0x0E, 0x36, 0x30, 0xF1,
+      0xAC, 0xA4, 0x00, 0x44, 0xF1, 0x8E, 0xFB, 0x17, 0x5E, 0xE1, 0x96, 0x00, 0x64, 0x69, 0xF9,
+      0x66, 0x3F, 0x11, 0xED, 0xB9, 0x00, 0x45, 0xB5, 0xDA, 0x14, 0x9C, 0xA3, 0xFA, 0x64, 0x00,
+      0x26, 0x5F, 0xDE, 0xD7, 0x67, 0x95, 0xEF, 0xB1, 0x00, 0x35, 0xDB, 0x9B, 0x88, 0x46, 0xD0,
+      0xA1, 0x0E, 0x00, 0x45, 0xA9, 0x92, 0x8E, 0x89, 0xD1, 0xAC, 0x4C, 0x00, 0x4C, 0xF1, 0xCB,
+      0x27, 0x82, 0x3A, 0x7D, 0xB7, 0x00, 0x64, 0xD3, 0xD2, 0x2F, 0x9C, 0x83, 0x16, 0x75, 0x00,
+      0x15, 0xDF, 0xC2, 0xA9, 0x63, 0xB8, 0x33, 0x65, 0x00, 0x27, 0x40, 0x28, 0x97, 0x05, 0x8E,
+      0xE3, 0x46, 0x00, 0x03, 0x9C, 0xCD, 0x5A, 0xAC, 0xBB, 0xF1, 0xE3, 0x00, 0x22, 0x23, 0xF5,
+      0xE8, 0x9D, 0x55, 0xD4, 0x9C, 0x00, 0x25, 0xB9, 0xD8, 0x87, 0x2D, 0xF1, 0xF2, 0x17, 0x15,
+      0x02, 0x19, 0x4C, 0x48, 0x0C, 0x73, 0x70, 0x61, 0x72, 0x6B, 0x5F, 0x73, 0x63, 0x68, 0x65,
+      0x6D, 0x61, 0x15, 0x06, 0x00, 0x15, 0x0E, 0x15, 0x08, 0x15, 0x02, 0x18, 0x06, 0x64, 0x65,
+      0x63, 0x37, 0x70, 0x33, 0x25, 0x0A, 0x15, 0x06, 0x15, 0x0E, 0x00, 0x15, 0x0E, 0x15, 0x0C,
+      0x15, 0x02, 0x18, 0x08, 0x64, 0x65, 0x63, 0x31, 0x32, 0x70, 0x31, 0x31, 0x25, 0x0A, 0x15,
+      0x16, 0x15, 0x18, 0x00, 0x15, 0x0E, 0x15, 0x12, 0x15, 0x02, 0x18, 0x07, 0x64, 0x65, 0x63,
+      0x32, 0x30, 0x70, 0x31, 0x25, 0x0A, 0x15, 0x02, 0x15, 0x28, 0x00, 0x16, 0x28, 0x19, 0x1C,
+      0x19, 0x3C, 0x26, 0x08, 0x1C, 0x15, 0x0E, 0x19, 0x35, 0x06, 0x08, 0x00, 0x19, 0x18, 0x06,
+      0x64, 0x65, 0x63, 0x37, 0x70, 0x33, 0x15, 0x02, 0x16, 0x28, 0x16, 0xEE, 0x01, 0x16, 0xF4,
+      0x01, 0x26, 0x08, 0x3C, 0x36, 0x02, 0x28, 0x04, 0x00, 0x97, 0x45, 0x72, 0x18, 0x04, 0x00,
+      0x01, 0x81, 0x3B, 0x00, 0x19, 0x1C, 0x15, 0x00, 0x15, 0x00, 0x15, 0x02, 0x00, 0x00, 0x00,
+      0x26, 0xFC, 0x01, 0x1C, 0x15, 0x0E, 0x19, 0x35, 0x06, 0x08, 0x00, 0x19, 0x18, 0x08, 0x64,
+      0x65, 0x63, 0x31, 0x32, 0x70, 0x31, 0x31, 0x15, 0x02, 0x16, 0x28, 0x16, 0xC2, 0x02, 0x16,
+      0xC8, 0x02, 0x26, 0xFC, 0x01, 0x3C, 0x36, 0x02, 0x28, 0x06, 0x00, 0xD5, 0xD7, 0x31, 0x99,
+      0xA6, 0x18, 0x06, 0xFF, 0x17, 0x2B, 0x5A, 0xF0, 0x01, 0x00, 0x19, 0x1C, 0x15, 0x00, 0x15,
+      0x00, 0x15, 0x02, 0x00, 0x00, 0x00, 0x26, 0xC4, 0x04, 0x1C, 0x15, 0x0E, 0x19, 0x35, 0x06,
+      0x08, 0x00, 0x19, 0x18, 0x07, 0x64, 0x65, 0x63, 0x32, 0x30, 0x70, 0x31, 0x15, 0x02, 0x16,
+      0x28, 0x16, 0xAE, 0x03, 0x16, 0xB6, 0x03, 0x26, 0xC4, 0x04, 0x3C, 0x36, 0x04, 0x28, 0x09,
+      0x00, 0x7D, 0xFE, 0x02, 0xDA, 0xB2, 0x62, 0xA3, 0xFB, 0x18, 0x09, 0x00, 0x03, 0x9C, 0xCD,
+      0x5A, 0xAC, 0xBB, 0xF1, 0xE3, 0x00, 0x19, 0x1C, 0x15, 0x00, 0x15, 0x00, 0x15, 0x02, 0x00,
+      0x00, 0x00, 0x16, 0xDE, 0x07, 0x16, 0x28, 0x00, 0x19, 0x2C, 0x18, 0x18, 0x6F, 0x72, 0x67,
+      0x2E, 0x61, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2E, 0x73, 0x70, 0x61, 0x72, 0x6B, 0x2E, 0x76,
+      0x65, 0x72, 0x73, 0x69, 0x6F, 0x6E, 0x18, 0x05, 0x33, 0x2E, 0x30, 0x2E, 0x31, 0x00, 0x18,
+      0x29, 0x6F, 0x72, 0x67, 0x2E, 0x61, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2E, 0x73, 0x70, 0x61,
+      0x72, 0x6B, 0x2E, 0x73, 0x71, 0x6C, 0x2E, 0x70, 0x61, 0x72, 0x71, 0x75, 0x65, 0x74, 0x2E,
+      0x72, 0x6F, 0x77, 0x2E, 0x6D, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0xF4, 0x01,
+      0x7B, 0x22, 0x74, 0x79, 0x70, 0x65, 0x22, 0x3A, 0x22, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74,
+      0x22, 0x2C, 0x22, 0x66, 0x69, 0x65, 0x6C, 0x64, 0x73, 0x22, 0x3A, 0x5B, 0x7B, 0x22, 0x6E,
+      0x61, 0x6D, 0x65, 0x22, 0x3A, 0x22, 0x64, 0x65, 0x63, 0x37, 0x70, 0x33, 0x22, 0x2C, 0x22,
+      0x74, 0x79, 0x70, 0x65, 0x22, 0x3A, 0x22, 0x64, 0x65, 0x63, 0x69, 0x6D, 0x61, 0x6C, 0x28,
+      0x37, 0x2C, 0x33, 0x29, 0x22, 0x2C, 0x22, 0x6E, 0x75, 0x6C, 0x6C, 0x61, 0x62, 0x6C, 0x65,
+      0x22, 0x3A, 0x74, 0x72, 0x75, 0x65, 0x2C, 0x22, 0x6D, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74,
+      0x61, 0x22, 0x3A, 0x7B, 0x7D, 0x7D, 0x2C, 0x7B, 0x22, 0x6E, 0x61, 0x6D, 0x65, 0x22, 0x3A,
+      0x22, 0x64, 0x65, 0x63, 0x31, 0x32, 0x70, 0x31, 0x31, 0x22, 0x2C, 0x22, 0x74, 0x79, 0x70,
+      0x65, 0x22, 0x3A, 0x22, 0x64, 0x65, 0x63, 0x69, 0x6D, 0x61, 0x6C, 0x28, 0x31, 0x32, 0x2C,
+      0x31, 0x31, 0x29, 0x22, 0x2C, 0x22, 0x6E, 0x75, 0x6C, 0x6C, 0x61, 0x62, 0x6C, 0x65, 0x22,
+      0x3A, 0x74, 0x72, 0x75, 0x65, 0x2C, 0x22, 0x6D, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61,
+      0x22, 0x3A, 0x7B, 0x7D, 0x7D, 0x2C, 0x7B, 0x22, 0x6E, 0x61, 0x6D, 0x65, 0x22, 0x3A, 0x22,
+      0x64, 0x65, 0x63, 0x32, 0x30, 0x70, 0x31, 0x22, 0x2C, 0x22, 0x74, 0x79, 0x70, 0x65, 0x22,
+      0x3A, 0x22, 0x64, 0x65, 0x63, 0x69, 0x6D, 0x61, 0x6C, 0x28, 0x32, 0x30, 0x2C, 0x31, 0x29,
+      0x22, 0x2C, 0x22, 0x6E, 0x75, 0x6C, 0x6C, 0x61, 0x62, 0x6C, 0x65, 0x22, 0x3A, 0x74, 0x72,
+      0x75, 0x65, 0x2C, 0x22, 0x6D, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x22, 0x3A, 0x7B,
+      0x7D, 0x7D, 0x5D, 0x7D, 0x00, 0x18, 0x4A, 0x70, 0x61, 0x72, 0x71, 0x75, 0x65, 0x74, 0x2D,
+      0x6D, 0x72, 0x20, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6F, 0x6E, 0x20, 0x31, 0x2E, 0x31, 0x30,
+      0x2E, 0x31, 0x20, 0x28, 0x62, 0x75, 0x69, 0x6C, 0x64, 0x20, 0x61, 0x38, 0x39, 0x64, 0x66,
+      0x38, 0x66, 0x39, 0x39, 0x33, 0x32, 0x62, 0x36, 0x65, 0x66, 0x36, 0x36, 0x33, 0x33, 0x64,
+      0x30, 0x36, 0x30, 0x36, 0x39, 0x65, 0x35, 0x30, 0x63, 0x39, 0x62, 0x37, 0x39, 0x37, 0x30,
+      0x62, 0x65, 0x62, 0x64, 0x31, 0x29, 0x19, 0x3C, 0x1C, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x1C,
+      0x00, 0x00, 0x00, 0xC5, 0x02, 0x00, 0x00, 0x50, 0x41, 0x52, 0x31,
+    };
+
+    unsigned int parquet_len = 1226;
+
+    cudf::io::parquet_reader_options read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{
+        reinterpret_cast<char const*>(fixed_len_bytes_decimal_parquet), parquet_len});
+    auto result = cudf::io::read_parquet(read_opts);
+    EXPECT_EQ(result.tbl->view().num_columns(), 3);
+
+    auto validity_c0    = cudf::test::iterators::nulls_at({19});
+    int32_t col0_data[] = {6361295, 698632,  7821423, 7073444, 9631892, 3021012, 5195059,
+                           9913714, 901749,  7776938, 3186566, 4955569, 5131067, 98619,
+                           2282579, 7521455, 4430706, 1937859, 4532040, 0};
+
+    EXPECT_EQ(static_cast<std::size_t>(result.tbl->view().column(0).size()),
+              sizeof(col0_data) / sizeof(col0_data[0]));
+    cudf::test::fixed_point_column_wrapper<int32_t> col0(
+      std::begin(col0_data), std::end(col0_data), validity_c0, numeric::scale_type{-3});
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), col0);
+
+    auto validity_c1    = cudf::test::iterators::nulls_at({18});
+    int64_t col1_data[] = {361378026250,
+                           30646804862,
+                           429930238629,
+                           418758703536,
+                           895494171113,
+                           435283865083,
+                           809096053722,
+                           -999999999999,
+                           426465099333,
+                           526684574144,
+                           826310892810,
+                           584686967589,
+                           113822282951,
+                           409236212092,
+                           420631167535,
+                           918438386086,
+                           -999999999999,
+                           489053889147,
+                           0,
+                           363993164092};
+
+    EXPECT_EQ(static_cast<std::size_t>(result.tbl->view().column(1).size()),
+              sizeof(col1_data) / sizeof(col1_data[0]));
+    cudf::test::fixed_point_column_wrapper<int64_t> col1(
+      std::begin(col1_data), std::end(col1_data), validity_c1, numeric::scale_type{-11});
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), col1);
+
+    auto validity_c2       = cudf::test::iterators::nulls_at({6, 14});
+    __int128_t col2_data[] = {9078697037144433659,
+                              9050770539577117612,
+                              2358363961733893636,
+                              1566059559232276662,
+                              6658306200002735268,
+                              4967909073046397334,
+                              0,
+                              7235588493887532473,
+                              5023160741463849572,
+                              2765173712965988273,
+                              3880866513515749646,
+                              5019704400576359500,
+                              5544435986818825655,
+                              7265381725809874549,
+                              0,
+                              1576192427381240677,
+                              2828305195087094598,
+                              260308667809395171,
+                              2460080200895288476,
+                              2718441925197820439};
+
+    EXPECT_EQ(static_cast<std::size_t>(result.tbl->view().column(2).size()),
+              sizeof(col2_data) / sizeof(col2_data[0]));
+    cudf::test::fixed_point_column_wrapper<__int128_t> col2(
+      std::begin(col2_data), std::end(col2_data), validity_c2, numeric::scale_type{-1});
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(2), col2);
+  }
+}
+
+TEST_F(ParquetReaderTest, EmptyOutput)
+{
+  cudf::test::fixed_width_column_wrapper<int> c0;
+  cudf::test::strings_column_wrapper c1;
+  cudf::test::fixed_point_column_wrapper<int> c2({}, numeric::scale_type{2});
+  cudf::test::lists_column_wrapper<float> _c3{{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}};
+  auto c3 = cudf::empty_like(_c3);
+
+  cudf::test::fixed_width_column_wrapper<int> sc0;
+  cudf::test::strings_column_wrapper sc1;
+  cudf::test::lists_column_wrapper<int> _sc2{{1, 2}};
+  std::vector<std::unique_ptr<cudf::column>> struct_children;
+  struct_children.push_back(sc0.release());
+  struct_children.push_back(sc1.release());
+  struct_children.push_back(cudf::empty_like(_sc2));
+  cudf::test::structs_column_wrapper c4(std::move(struct_children));
+
+  table_view expected({c0, c1, c2, *c3, c4});
+
+  // set precision on the decimal column
+  cudf::io::table_input_metadata expected_metadata(expected);
+  expected_metadata.column_metadata[2].set_decimal_precision(1);
+
+  auto filepath = temp_env->get_temp_filepath("EmptyOutput.parquet");
+  cudf::io::parquet_writer_options out_args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
+  out_args.set_metadata(std::move(expected_metadata));
+  cudf::io::write_parquet(out_args);
+
+  cudf::io::parquet_reader_options read_args =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_args);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+}
+
+TEST_F(ParquetReaderTest, EmptyColumnsParam)
+{
+  srand(31337);
+  auto const expected = create_random_fixed_table<int>(2, 4, false);
+
+  std::vector<char> out_buffer;
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&out_buffer}, *expected);
+  cudf::io::write_parquet(args);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(
+      cudf::io::source_info{out_buffer.data(), out_buffer.size()})
+      .columns({});
+  auto const result = cudf::io::read_parquet(read_opts);
+
+  EXPECT_EQ(result.tbl->num_columns(), 0);
+  EXPECT_EQ(result.tbl->num_rows(), 0);
+}
+
+TEST_F(ParquetReaderTest, BinaryAsStrings)
+{
+  std::vector<char const*> strings{
+    "Monday", "Wednesday", "Friday", "Monday", "Friday", "Friday", "Friday", "Funday"};
+  auto const num_rows = strings.size();
+
+  auto seq_col0 = random_values<int>(num_rows);
+  auto seq_col2 = random_values<float>(num_rows);
+  auto seq_col3 = random_values<uint8_t>(num_rows);
+  auto validity = cudf::test::iterators::no_nulls();
+
+  column_wrapper<int> int_col{seq_col0.begin(), seq_col0.end(), validity};
+  column_wrapper<cudf::string_view> string_col{strings.begin(), strings.end()};
+  column_wrapper<float> float_col{seq_col2.begin(), seq_col2.end(), validity};
+  cudf::test::lists_column_wrapper<uint8_t> list_int_col{
+    {'M', 'o', 'n', 'd', 'a', 'y'},
+    {'W', 'e', 'd', 'n', 'e', 's', 'd', 'a', 'y'},
+    {'F', 'r', 'i', 'd', 'a', 'y'},
+    {'M', 'o', 'n', 'd', 'a', 'y'},
+    {'F', 'r', 'i', 'd', 'a', 'y'},
+    {'F', 'r', 'i', 'd', 'a', 'y'},
+    {'F', 'r', 'i', 'd', 'a', 'y'},
+    {'F', 'u', 'n', 'd', 'a', 'y'}};
+
+  auto output = table_view{{int_col, string_col, float_col, string_col, list_int_col}};
+  cudf::io::table_input_metadata output_metadata(output);
+  output_metadata.column_metadata[0].set_name("col_other");
+  output_metadata.column_metadata[1].set_name("col_string");
+  output_metadata.column_metadata[2].set_name("col_float");
+  output_metadata.column_metadata[3].set_name("col_string2").set_output_as_binary(true);
+  output_metadata.column_metadata[4].set_name("col_binary").set_output_as_binary(true);
+
+  auto filepath = temp_env->get_temp_filepath("BinaryReadStrings.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, output)
+      .metadata(std::move(output_metadata));
+  cudf::io::write_parquet(out_opts);
+
+  auto expected_string = table_view{{int_col, string_col, float_col, string_col, string_col}};
+  auto expected_mixed  = table_view{{int_col, string_col, float_col, list_int_col, list_int_col}};
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+      .set_column_schema({{}, {}, {}, {}, {}});
+  auto result = cudf::io::read_parquet(in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected_string, result.tbl->view());
+
+  cudf::io::parquet_reader_options default_in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  result = cudf::io::read_parquet(default_in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected_string, result.tbl->view());
+
+  std::vector<cudf::io::reader_column_schema> md{
+    {},
+    {},
+    {},
+    cudf::io::reader_column_schema().set_convert_binary_to_strings(false),
+    cudf::io::reader_column_schema().set_convert_binary_to_strings(false)};
+
+  cudf::io::parquet_reader_options mixed_in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+      .set_column_schema(md);
+  result = cudf::io::read_parquet(mixed_in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected_mixed, result.tbl->view());
+}
+
+TEST_F(ParquetReaderTest, NestedByteArray)
+{
+  constexpr auto num_rows = 8;
+
+  auto seq_col0       = random_values<int>(num_rows);
+  auto seq_col2       = random_values<float>(num_rows);
+  auto seq_col3       = random_values<uint8_t>(num_rows);
+  auto const validity = cudf::test::iterators::no_nulls();
+
+  column_wrapper<int> int_col{seq_col0.begin(), seq_col0.end(), validity};
+  column_wrapper<float> float_col{seq_col2.begin(), seq_col2.end(), validity};
+  cudf::test::lists_column_wrapper<uint8_t> list_list_int_col{
+    {{'M', 'o', 'n', 'd', 'a', 'y'},
+     {'W', 'e', 'd', 'n', 'e', 's', 'd', 'a', 'y'},
+     {'F', 'r', 'i', 'd', 'a', 'y'}},
+    {{'M', 'o', 'n', 'd', 'a', 'y'}, {'F', 'r', 'i', 'd', 'a', 'y'}},
+    {{'M', 'o', 'n', 'd', 'a', 'y'},
+     {'W', 'e', 'd', 'n', 'e', 's', 'd', 'a', 'y'},
+     {'F', 'r', 'i', 'd', 'a', 'y'}},
+    {{'F', 'r', 'i', 'd', 'a', 'y'},
+     {'F', 'r', 'i', 'd', 'a', 'y'},
+     {'F', 'u', 'n', 'd', 'a', 'y'}},
+    {{'M', 'o', 'n', 'd', 'a', 'y'},
+     {'W', 'e', 'd', 'n', 'e', 's', 'd', 'a', 'y'},
+     {'F', 'r', 'i', 'd', 'a', 'y'}},
+    {{'F', 'r', 'i', 'd', 'a', 'y'},
+     {'F', 'r', 'i', 'd', 'a', 'y'},
+     {'F', 'u', 'n', 'd', 'a', 'y'}},
+    {{'M', 'o', 'n', 'd', 'a', 'y'},
+     {'W', 'e', 'd', 'n', 'e', 's', 'd', 'a', 'y'},
+     {'F', 'r', 'i', 'd', 'a', 'y'}},
+    {{'M', 'o', 'n', 'd', 'a', 'y'}, {'F', 'r', 'i', 'd', 'a', 'y'}}};
+
+  auto const expected = table_view{{int_col, float_col, list_list_int_col}};
+  cudf::io::table_input_metadata output_metadata(expected);
+  output_metadata.column_metadata[0].set_name("col_other");
+  output_metadata.column_metadata[1].set_name("col_float");
+  output_metadata.column_metadata[2].set_name("col_binary").child(1).set_output_as_binary(true);
+
+  auto filepath = temp_env->get_temp_filepath("NestedByteArray.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .metadata(std::move(output_metadata));
+  cudf::io::write_parquet(out_opts);
+
+  auto source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::detail::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+  EXPECT_EQ(fmd.schema[5].type, cudf::io::parquet::detail::Type::BYTE_ARRAY);
+
+  std::vector<cudf::io::reader_column_schema> md{
+    {},
+    {},
+    cudf::io::reader_column_schema().add_child(
+      cudf::io::reader_column_schema().set_convert_binary_to_strings(false))};
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+      .set_column_schema(md);
+  auto result = cudf::io::read_parquet(in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+}
+
+TEST_F(ParquetReaderTest, StructByteArray)
+{
+  constexpr auto num_rows = 100;
+
+  auto seq_col0       = random_values<uint8_t>(num_rows);
+  auto const validity = cudf::test::iterators::no_nulls();
+
+  column_wrapper<uint8_t> int_col{seq_col0.begin(), seq_col0.end(), validity};
+  cudf::test::lists_column_wrapper<uint8_t> list_of_int{{seq_col0.begin(), seq_col0.begin() + 50},
+                                                        {seq_col0.begin() + 50, seq_col0.end()}};
+  auto struct_col = cudf::test::structs_column_wrapper{{list_of_int}, validity};
+
+  auto const expected = table_view{{struct_col}};
+  EXPECT_EQ(1, expected.num_columns());
+  cudf::io::table_input_metadata output_metadata(expected);
+  output_metadata.column_metadata[0]
+    .set_name("struct_binary")
+    .child(0)
+    .set_name("a")
+    .set_output_as_binary(true);
+
+  auto filepath = temp_env->get_temp_filepath("StructByteArray.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .metadata(std::move(output_metadata));
+  cudf::io::write_parquet(out_opts);
+
+  std::vector<cudf::io::reader_column_schema> md{cudf::io::reader_column_schema().add_child(
+    cudf::io::reader_column_schema().set_convert_binary_to_strings(false))};
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+      .set_column_schema(md);
+  auto result = cudf::io::read_parquet(in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+}
+
+TEST_F(ParquetReaderTest, NestingOptimizationTest)
+{
+  // test nesting levels > cudf::io::parquet::detail::max_cacheable_nesting_decode_info deep.
+  constexpr cudf::size_type num_nesting_levels = 16;
+  static_assert(num_nesting_levels > cudf::io::parquet::detail::max_cacheable_nesting_decode_info);
+  constexpr cudf::size_type rows_per_level = 2;
+
+  constexpr cudf::size_type num_values = (1 << num_nesting_levels) * rows_per_level;
+  auto value_iter                      = thrust::make_counting_iterator(0);
+  auto validity =
+    cudf::detail::make_counting_transform_iterator(0, [](cudf::size_type i) { return i % 2; });
+  cudf::test::fixed_width_column_wrapper<int> values(value_iter, value_iter + num_values, validity);
+
+  // ~256k values with num_nesting_levels = 16
+  int total_values_produced = num_values;
+  auto prev_col             = values.release();
+  for (int idx = 0; idx < num_nesting_levels; idx++) {
+    auto const depth    = num_nesting_levels - idx;
+    auto const num_rows = (1 << (num_nesting_levels - idx));
+
+    auto offsets_iter = cudf::detail::make_counting_transform_iterator(
+      0, [depth, rows_per_level](cudf::size_type i) { return i * rows_per_level; });
+    total_values_produced += (num_rows + 1);
+
+    cudf::test::fixed_width_column_wrapper<cudf::size_type> offsets(offsets_iter,
+                                                                    offsets_iter + num_rows + 1);
+    auto c   = cudf::make_lists_column(num_rows, offsets.release(), std::move(prev_col), 0, {});
+    prev_col = std::move(c);
+  }
+  auto const& expect = prev_col;
+
+  auto filepath = temp_env->get_temp_filepath("NestingDecodeCache.parquet");
+  cudf::io::parquet_writer_options opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, table_view{{*expect}});
+  cudf::io::write_parquet(opts);
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(in_opts);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expect, result.tbl->get_column(0));
+}
+
+TEST_F(ParquetReaderTest, SingleLevelLists)
+{
+  unsigned char list_bytes[] = {
+    0x50, 0x41, 0x52, 0x31, 0x15, 0x00, 0x15, 0x28, 0x15, 0x28, 0x15, 0xa7, 0xce, 0x91, 0x8c, 0x06,
+    0x1c, 0x15, 0x04, 0x15, 0x00, 0x15, 0x06, 0x15, 0x06, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03,
+    0x02, 0x02, 0x00, 0x00, 0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x15,
+    0x02, 0x19, 0x3c, 0x48, 0x0c, 0x73, 0x70, 0x61, 0x72, 0x6b, 0x5f, 0x73, 0x63, 0x68, 0x65, 0x6d,
+    0x61, 0x15, 0x02, 0x00, 0x35, 0x00, 0x18, 0x01, 0x66, 0x15, 0x02, 0x15, 0x06, 0x4c, 0x3c, 0x00,
+    0x00, 0x00, 0x15, 0x02, 0x25, 0x04, 0x18, 0x05, 0x61, 0x72, 0x72, 0x61, 0x79, 0x00, 0x16, 0x02,
+    0x19, 0x1c, 0x19, 0x1c, 0x26, 0x08, 0x1c, 0x15, 0x02, 0x19, 0x25, 0x00, 0x06, 0x19, 0x28, 0x01,
+    0x66, 0x05, 0x61, 0x72, 0x72, 0x61, 0x79, 0x15, 0x00, 0x16, 0x04, 0x16, 0x56, 0x16, 0x56, 0x26,
+    0x08, 0x3c, 0x18, 0x04, 0x01, 0x00, 0x00, 0x00, 0x18, 0x04, 0x00, 0x00, 0x00, 0x00, 0x16, 0x00,
+    0x28, 0x04, 0x01, 0x00, 0x00, 0x00, 0x18, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x19, 0x1c, 0x15,
+    0x00, 0x15, 0x00, 0x15, 0x02, 0x00, 0x00, 0x00, 0x16, 0x56, 0x16, 0x02, 0x26, 0x08, 0x16, 0x56,
+    0x14, 0x00, 0x00, 0x28, 0x13, 0x52, 0x41, 0x50, 0x49, 0x44, 0x53, 0x20, 0x53, 0x70, 0x61, 0x72,
+    0x6b, 0x20, 0x50, 0x6c, 0x75, 0x67, 0x69, 0x6e, 0x19, 0x1c, 0x1c, 0x00, 0x00, 0x00, 0x9f, 0x00,
+    0x00, 0x00, 0x50, 0x41, 0x52, 0x31};
+
+  // read single level list reproducing parquet file
+  cudf::io::parquet_reader_options read_opts = cudf::io::parquet_reader_options::builder(
+    cudf::io::source_info{reinterpret_cast<char const*>(list_bytes), sizeof(list_bytes)});
+  auto table = cudf::io::read_parquet(read_opts);
+
+  auto const c0 = table.tbl->get_column(0);
+  EXPECT_TRUE(c0.type().id() == cudf::type_id::LIST);
+
+  auto const lc    = cudf::lists_column_view(c0);
+  auto const child = lc.child();
+  EXPECT_TRUE(child.type().id() == cudf::type_id::INT32);
+}
+
+TEST_F(ParquetReaderTest, ChunkedSingleLevelLists)
+{
+  unsigned char list_bytes[] = {
+    0x50, 0x41, 0x52, 0x31, 0x15, 0x00, 0x15, 0x28, 0x15, 0x28, 0x15, 0xa7, 0xce, 0x91, 0x8c, 0x06,
+    0x1c, 0x15, 0x04, 0x15, 0x00, 0x15, 0x06, 0x15, 0x06, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03,
+    0x02, 0x02, 0x00, 0x00, 0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x15,
+    0x02, 0x19, 0x3c, 0x48, 0x0c, 0x73, 0x70, 0x61, 0x72, 0x6b, 0x5f, 0x73, 0x63, 0x68, 0x65, 0x6d,
+    0x61, 0x15, 0x02, 0x00, 0x35, 0x00, 0x18, 0x01, 0x66, 0x15, 0x02, 0x15, 0x06, 0x4c, 0x3c, 0x00,
+    0x00, 0x00, 0x15, 0x02, 0x25, 0x04, 0x18, 0x05, 0x61, 0x72, 0x72, 0x61, 0x79, 0x00, 0x16, 0x02,
+    0x19, 0x1c, 0x19, 0x1c, 0x26, 0x08, 0x1c, 0x15, 0x02, 0x19, 0x25, 0x00, 0x06, 0x19, 0x28, 0x01,
+    0x66, 0x05, 0x61, 0x72, 0x72, 0x61, 0x79, 0x15, 0x00, 0x16, 0x04, 0x16, 0x56, 0x16, 0x56, 0x26,
+    0x08, 0x3c, 0x18, 0x04, 0x01, 0x00, 0x00, 0x00, 0x18, 0x04, 0x00, 0x00, 0x00, 0x00, 0x16, 0x00,
+    0x28, 0x04, 0x01, 0x00, 0x00, 0x00, 0x18, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x19, 0x1c, 0x15,
+    0x00, 0x15, 0x00, 0x15, 0x02, 0x00, 0x00, 0x00, 0x16, 0x56, 0x16, 0x02, 0x26, 0x08, 0x16, 0x56,
+    0x14, 0x00, 0x00, 0x28, 0x13, 0x52, 0x41, 0x50, 0x49, 0x44, 0x53, 0x20, 0x53, 0x70, 0x61, 0x72,
+    0x6b, 0x20, 0x50, 0x6c, 0x75, 0x67, 0x69, 0x6e, 0x19, 0x1c, 0x1c, 0x00, 0x00, 0x00, 0x9f, 0x00,
+    0x00, 0x00, 0x50, 0x41, 0x52, 0x31};
+
+  auto reader = cudf::io::chunked_parquet_reader(
+    1L << 31,
+    cudf::io::parquet_reader_options::builder(
+      cudf::io::source_info{reinterpret_cast<char const*>(list_bytes), sizeof(list_bytes)}));
+  int iterations = 0;
+  while (reader.has_next() && iterations < 10) {
+    auto chunk = reader.read_chunk();
+  }
+  EXPECT_TRUE(iterations < 10);
+}
+
+TEST_F(ParquetReaderTest, ReorderedReadMultipleFiles)
+{
+  constexpr auto num_rows    = 50'000;
+  constexpr auto cardinality = 20'000;
+
+  // table 1
+  auto str1 = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return "cat " + std::to_string(i % cardinality); });
+  auto cols1 = cudf::test::strings_column_wrapper(str1, str1 + num_rows);
+
+  auto int1 =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % cardinality; });
+  auto coli1 = cudf::test::fixed_width_column_wrapper<int>(int1, int1 + num_rows);
+
+  auto const expected1 = table_view{{cols1, coli1}};
+  auto const swapped1  = table_view{{coli1, cols1}};
+
+  auto const filepath1 = temp_env->get_temp_filepath("LargeReorderedRead1.parquet");
+  auto out_opts1 =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath1}, expected1)
+      .compression(cudf::io::compression_type::NONE);
+  cudf::io::write_parquet(out_opts1);
+
+  // table 2
+  auto str2 = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return "dog " + std::to_string(i % cardinality); });
+  auto cols2 = cudf::test::strings_column_wrapper(str2, str2 + num_rows);
+
+  auto int2 = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return (i % cardinality) + cardinality; });
+  auto coli2 = cudf::test::fixed_width_column_wrapper<int>(int2, int2 + num_rows);
+
+  auto const expected2 = table_view{{cols2, coli2}};
+  auto const swapped2  = table_view{{coli2, cols2}};
+
+  auto const filepath2 = temp_env->get_temp_filepath("LargeReorderedRead2.parquet");
+  auto out_opts2 =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath2}, expected2)
+      .compression(cudf::io::compression_type::NONE);
+  cudf::io::write_parquet(out_opts2);
+
+  // read in both files swapping the columns
+  auto read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{{filepath1, filepath2}})
+      .columns({"_col1", "_col0"});
+  auto result = cudf::io::read_parquet(read_opts);
+  auto sliced = cudf::slice(result.tbl->view(), {0, num_rows, num_rows, 2 * num_rows});
+  CUDF_TEST_EXPECT_TABLES_EQUAL(sliced[0], swapped1);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(sliced[1], swapped2);
+}
+
+TEST_F(ParquetReaderTest, FilterSimple)
+{
+  srand(31337);
+  auto written_table = create_random_fixed_table<int>(9, 9, false);
+
+  auto filepath = temp_env->get_temp_filepath("FilterSimple.parquet");
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *written_table);
+  cudf::io::write_parquet(args);
+
+  // Filtering AST - table[0] < RAND_MAX/2
+  auto literal_value     = cudf::numeric_scalar<decltype(RAND_MAX)>(RAND_MAX / 2);
+  auto literal           = cudf::ast::literal(literal_value);
+  auto col_ref_0         = cudf::ast::column_reference(0);
+  auto filter_expression = cudf::ast::operation(cudf::ast::ast_operator::LESS, col_ref_0, literal);
+
+  auto predicate = cudf::compute_column(*written_table, filter_expression);
+  EXPECT_EQ(predicate->view().type().id(), cudf::type_id::BOOL8)
+    << "Predicate filter should return a boolean";
+  auto expected = cudf::apply_boolean_mask(*written_table, *predicate);
+  // To make sure AST filters out some elements
+  EXPECT_LT(expected->num_rows(), written_table->num_rows());
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+      .filter(filter_expression);
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
+}
+
+auto create_parquet_with_stats(std::string const& filename)
+{
+  auto col0 = testdata::ascending<uint32_t>();
+  auto col1 = testdata::descending<int64_t>();
+  auto col2 = testdata::unordered<double>();
+
+  auto const expected = table_view{{col0, col1, col2}};
+
+  cudf::io::table_input_metadata expected_metadata(expected);
+  expected_metadata.column_metadata[0].set_name("col_uint32");
+  expected_metadata.column_metadata[1].set_name("col_int64");
+  expected_metadata.column_metadata[2].set_name("col_double");
+
+  auto const filepath = temp_env->get_temp_filepath(filename);
+  const cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .metadata(std::move(expected_metadata))
+      .row_group_size_rows(8000)
+      .stats_level(cudf::io::statistics_freq::STATISTICS_ROWGROUP);
+  cudf::io::write_parquet(out_opts);
+
+  std::vector<std::unique_ptr<column>> columns;
+  columns.push_back(col0.release());
+  columns.push_back(col1.release());
+  columns.push_back(col2.release());
+
+  return std::pair{cudf::table{std::move(columns)}, filepath};
+}
+
+TEST_F(ParquetReaderTest, FilterIdentity)
+{
+  auto [src, filepath] = create_parquet_with_stats("FilterIdentity.parquet");
+
+  // Filtering AST - identity function, always true.
+  auto literal_value     = cudf::numeric_scalar<bool>(true);
+  auto literal           = cudf::ast::literal(literal_value);
+  auto filter_expression = cudf::ast::operation(cudf::ast::ast_operator::IDENTITY, literal);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+      .filter(filter_expression);
+  auto result = cudf::io::read_parquet(read_opts);
+
+  cudf::io::parquet_reader_options read_opts2 =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result2 = cudf::io::read_parquet(read_opts2);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *result2.tbl);
+}
+
+TEST_F(ParquetReaderTest, FilterReferenceExpression)
+{
+  auto [src, filepath] = create_parquet_with_stats("FilterReferenceExpression.parquet");
+  // Filtering AST - table[0] < 150
+  auto literal_value     = cudf::numeric_scalar<uint32_t>(150);
+  auto literal           = cudf::ast::literal(literal_value);
+  auto col_ref_0         = cudf::ast::column_reference(0);
+  auto filter_expression = cudf::ast::operation(cudf::ast::ast_operator::LESS, col_ref_0, literal);
+
+  // Expected result
+  auto predicate = cudf::compute_column(src, filter_expression);
+  auto expected  = cudf::apply_boolean_mask(src, *predicate);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+      .filter(filter_expression);
+  auto result = cudf::io::read_parquet(read_opts);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
+}
+
+TEST_F(ParquetReaderTest, FilterNamedExpression)
+{
+  auto [src, filepath] = create_parquet_with_stats("NamedExpression.parquet");
+  // Filtering AST - table["col_uint32"] < 150
+  auto literal_value  = cudf::numeric_scalar<uint32_t>(150);
+  auto literal        = cudf::ast::literal(literal_value);
+  auto col_name_0     = cudf::ast::column_name_reference("col_uint32");
+  auto parquet_filter = cudf::ast::operation(cudf::ast::ast_operator::LESS, col_name_0, literal);
+  auto col_ref_0      = cudf::ast::column_reference(0);
+  auto table_filter   = cudf::ast::operation(cudf::ast::ast_operator::LESS, col_ref_0, literal);
+
+  // Expected result
+  auto predicate = cudf::compute_column(src, table_filter);
+  auto expected  = cudf::apply_boolean_mask(src, *predicate);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+      .filter(parquet_filter);
+  auto result = cudf::io::read_parquet(read_opts);
+
+  // tests
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
+}
+
+TEST_F(ParquetReaderTest, FilterMultiple1)
+{
+  using T = cudf::string_view;
+
+  auto const [src, filepath] = create_parquet_typed_with_stats<T>("FilterMultiple1.parquet");
+  auto const written_table   = src.view();
+
+  // Filtering AST - 10000 < table[0] < 12000
+  std::string const low  = "000010000";
+  std::string const high = "000012000";
+  auto lov               = cudf::string_scalar(low, true);
+  auto hiv               = cudf::string_scalar(high, true);
+  auto filter_col        = cudf::ast::column_reference(0);
+  auto lo_lit            = cudf::ast::literal(lov);
+  auto hi_lit            = cudf::ast::literal(hiv);
+  auto expr_1 = cudf::ast::operation(cudf::ast::ast_operator::GREATER_EQUAL, filter_col, lo_lit);
+  auto expr_2 = cudf::ast::operation(cudf::ast::ast_operator::LESS, filter_col, hi_lit);
+  auto expr_3 = cudf::ast::operation(cudf::ast::ast_operator::LOGICAL_AND, expr_1, expr_2);
+
+  // Expected result
+  auto predicate = cudf::compute_column(written_table, expr_3);
+  auto expected  = cudf::apply_boolean_mask(written_table, *predicate);
+
+  auto si                  = cudf::io::source_info(filepath);
+  auto builder             = cudf::io::parquet_reader_options::builder(si).filter(expr_3);
+  auto table_with_metadata = cudf::io::read_parquet(builder);
+  auto result              = table_with_metadata.tbl->view();
+
+  // tests
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result);
+}
+
+TEST_F(ParquetReaderTest, FilterMultiple2)
+{
+  // multiple conditions on same column.
+  using T = cudf::string_view;
+
+  auto const [src, filepath] = create_parquet_typed_with_stats<T>("FilterMultiple2.parquet");
+  auto const written_table   = src.view();
+  // 0-8000, 8001-16000, 16001-20000
+
+  // Filtering AST
+  // (table[0] >= "000010000" AND table[0] < "000012000") OR
+  // (table[0] >= "000017000" AND table[0] < "000019000")
+  std::string const low1  = "000010000";
+  std::string const high1 = "000012000";
+  auto lov                = cudf::string_scalar(low1, true);
+  auto hiv                = cudf::string_scalar(high1, true);
+  auto filter_col         = cudf::ast::column_reference(0);
+  auto lo_lit             = cudf::ast::literal(lov);
+  auto hi_lit             = cudf::ast::literal(hiv);
+  auto expr_1 = cudf::ast::operation(cudf::ast::ast_operator::GREATER_EQUAL, filter_col, lo_lit);
+  auto expr_2 = cudf::ast::operation(cudf::ast::ast_operator::LESS, filter_col, hi_lit);
+  auto expr_3 = cudf::ast::operation(cudf::ast::ast_operator::LOGICAL_AND, expr_1, expr_2);
+  std::string const low2  = "000017000";
+  std::string const high2 = "000019000";
+  auto lov2               = cudf::string_scalar(low2, true);
+  auto hiv2               = cudf::string_scalar(high2, true);
+  auto lo_lit2            = cudf::ast::literal(lov2);
+  auto hi_lit2            = cudf::ast::literal(hiv2);
+  auto expr_4 = cudf::ast::operation(cudf::ast::ast_operator::GREATER_EQUAL, filter_col, lo_lit2);
+  auto expr_5 = cudf::ast::operation(cudf::ast::ast_operator::LESS, filter_col, hi_lit2);
+  auto expr_6 = cudf::ast::operation(cudf::ast::ast_operator::LOGICAL_AND, expr_4, expr_5);
+  auto expr_7 = cudf::ast::operation(cudf::ast::ast_operator::LOGICAL_OR, expr_3, expr_6);
+
+  // Expected result
+  auto predicate = cudf::compute_column(written_table, expr_7);
+  auto expected  = cudf::apply_boolean_mask(written_table, *predicate);
+
+  auto si                  = cudf::io::source_info(filepath);
+  auto builder             = cudf::io::parquet_reader_options::builder(si).filter(expr_7);
+  auto table_with_metadata = cudf::io::read_parquet(builder);
+  auto result              = table_with_metadata.tbl->view();
+
+  // tests
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result);
+}
+
+TEST_F(ParquetReaderTest, FilterMultiple3)
+{
+  // multiple conditions with reference to multiple columns.
+  // index and name references mixed.
+  using T                    = uint32_t;
+  auto const [src, filepath] = create_parquet_typed_with_stats<T>("FilterMultiple3.parquet");
+  auto const written_table   = src.view();
+
+  // Filtering AST - (table[0] >= 70 AND table[0] < 90) OR (table[1] >= 100 AND table[1] < 120)
+  // row groups min, max:
+  // table[0] 0-80, 81-160, 161-200.
+  // table[1] 200-121, 120-41, 40-0.
+  auto filter_col1  = cudf::ast::column_reference(0);
+  auto filter_col2  = cudf::ast::column_name_reference("col1");
+  T constexpr low1  = 70;
+  T constexpr high1 = 90;
+  T constexpr low2  = 100;
+  T constexpr high2 = 120;
+  auto lov          = cudf::numeric_scalar(low1, true);
+  auto hiv          = cudf::numeric_scalar(high1, true);
+  auto lo_lit1      = cudf::ast::literal(lov);
+  auto hi_lit1      = cudf::ast::literal(hiv);
+  auto expr_1  = cudf::ast::operation(cudf::ast::ast_operator::GREATER_EQUAL, filter_col1, lo_lit1);
+  auto expr_2  = cudf::ast::operation(cudf::ast::ast_operator::LESS, filter_col1, hi_lit1);
+  auto expr_3  = cudf::ast::operation(cudf::ast::ast_operator::LOGICAL_AND, expr_1, expr_2);
+  auto lov2    = cudf::numeric_scalar(low2, true);
+  auto hiv2    = cudf::numeric_scalar(high2, true);
+  auto lo_lit2 = cudf::ast::literal(lov2);
+  auto hi_lit2 = cudf::ast::literal(hiv2);
+  auto expr_4  = cudf::ast::operation(cudf::ast::ast_operator::GREATER_EQUAL, filter_col2, lo_lit2);
+  auto expr_5  = cudf::ast::operation(cudf::ast::ast_operator::LESS, filter_col2, hi_lit2);
+  auto expr_6  = cudf::ast::operation(cudf::ast::ast_operator::LOGICAL_AND, expr_4, expr_5);
+  // expression to test
+  auto expr_7 = cudf::ast::operation(cudf::ast::ast_operator::LOGICAL_OR, expr_3, expr_6);
+
+  // Expected result
+  auto filter_col2_ref = cudf::ast::column_reference(1);
+  auto expr_4_ref =
+    cudf::ast::operation(cudf::ast::ast_operator::GREATER_EQUAL, filter_col2_ref, lo_lit2);
+  auto expr_5_ref = cudf::ast::operation(cudf::ast::ast_operator::LESS, filter_col2_ref, hi_lit2);
+  auto expr_6_ref =
+    cudf::ast::operation(cudf::ast::ast_operator::LOGICAL_AND, expr_4_ref, expr_5_ref);
+  auto expr_7_ref = cudf::ast::operation(cudf::ast::ast_operator::LOGICAL_OR, expr_3, expr_6_ref);
+  auto predicate  = cudf::compute_column(written_table, expr_7_ref);
+  auto expected   = cudf::apply_boolean_mask(written_table, *predicate);
+
+  auto si                  = cudf::io::source_info(filepath);
+  auto builder             = cudf::io::parquet_reader_options::builder(si).filter(expr_7);
+  auto table_with_metadata = cudf::io::read_parquet(builder);
+  auto result              = table_with_metadata.tbl->view();
+
+  // tests
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result);
+}
+
+TEST_F(ParquetReaderTest, FilterSupported)
+{
+  using T                    = uint32_t;
+  auto const [src, filepath] = create_parquet_typed_with_stats<T>("FilterSupported.parquet");
+  auto const written_table   = src.view();
+
+  // Filtering AST - ((table[0] > 70 AND table[0] <= 90) OR (table[1] >= 100 AND table[1] < 120))
+  //              AND (table[1] != 110)
+  // row groups min, max:
+  // table[0] 0-80, 81-160, 161-200.
+  // table[1] 200-121, 120-41, 40-0.
+  auto filter_col1       = cudf::ast::column_reference(0);
+  auto filter_col2       = cudf::ast::column_reference(1);
+  T constexpr low1       = 70;
+  T constexpr high1      = 90;
+  T constexpr low2       = 100;
+  T constexpr high2      = 120;
+  T constexpr skip_value = 110;
+  auto lov               = cudf::numeric_scalar(low1, true);
+  auto hiv               = cudf::numeric_scalar(high1, true);
+  auto lo_lit1           = cudf::ast::literal(lov);
+  auto hi_lit1           = cudf::ast::literal(hiv);
+  auto expr_1  = cudf::ast::operation(cudf::ast::ast_operator::GREATER, filter_col1, lo_lit1);
+  auto expr_2  = cudf::ast::operation(cudf::ast::ast_operator::LESS_EQUAL, filter_col1, hi_lit1);
+  auto expr_3  = cudf::ast::operation(cudf::ast::ast_operator::LOGICAL_AND, expr_1, expr_2);
+  auto lov2    = cudf::numeric_scalar(low2, true);
+  auto hiv2    = cudf::numeric_scalar(high2, true);
+  auto lo_lit2 = cudf::ast::literal(lov2);
+  auto hi_lit2 = cudf::ast::literal(hiv2);
+  auto expr_4  = cudf::ast::operation(cudf::ast::ast_operator::GREATER_EQUAL, filter_col2, lo_lit2);
+  auto expr_5  = cudf::ast::operation(cudf::ast::ast_operator::LESS, filter_col2, hi_lit2);
+  auto expr_6  = cudf::ast::operation(cudf::ast::ast_operator::LOGICAL_AND, expr_4, expr_5);
+  auto expr_7  = cudf::ast::operation(cudf::ast::ast_operator::LOGICAL_OR, expr_3, expr_6);
+  auto skip_ov = cudf::numeric_scalar(skip_value, true);
+  auto skip_lit = cudf::ast::literal(skip_ov);
+  auto expr_8   = cudf::ast::operation(cudf::ast::ast_operator::NOT_EQUAL, filter_col2, skip_lit);
+  auto expr_9   = cudf::ast::operation(cudf::ast::ast_operator::LOGICAL_AND, expr_7, expr_8);
+
+  // Expected result
+  auto predicate = cudf::compute_column(written_table, expr_9);
+  auto expected  = cudf::apply_boolean_mask(written_table, *predicate);
+
+  auto si                  = cudf::io::source_info(filepath);
+  auto builder             = cudf::io::parquet_reader_options::builder(si).filter(expr_9);
+  auto table_with_metadata = cudf::io::read_parquet(builder);
+  auto result              = table_with_metadata.tbl->view();
+
+  // tests
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result);
+}
+
+TEST_F(ParquetReaderTest, FilterSupported2)
+{
+  using T                 = uint32_t;
+  constexpr auto num_rows = 4000;
+  auto elements0 =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i / 2000; });
+  auto elements1 =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i / 1000; });
+  auto elements2 =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i / 500; });
+  auto col0 = cudf::test::fixed_width_column_wrapper<T>(elements0, elements0 + num_rows);
+  auto col1 = cudf::test::fixed_width_column_wrapper<T>(elements1, elements1 + num_rows);
+  auto col2 = cudf::test::fixed_width_column_wrapper<T>(elements2, elements2 + num_rows);
+  auto const written_table = table_view{{col0, col1, col2}};
+  auto const filepath      = temp_env->get_temp_filepath("FilterSupported2.parquet");
+  {
+    const cudf::io::parquet_writer_options out_opts =
+      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, written_table)
+        .row_group_size_rows(1000);
+    cudf::io::write_parquet(out_opts);
+  }
+  auto si          = cudf::io::source_info(filepath);
+  auto filter_col0 = cudf::ast::column_reference(0);
+  auto filter_col1 = cudf::ast::column_reference(1);
+  auto filter_col2 = cudf::ast::column_reference(2);
+  auto s_value     = cudf::numeric_scalar<T>(1, true);
+  auto lit_value   = cudf::ast::literal(s_value);
+
+  auto test_expr = [&](auto& expr) {
+    // Expected result
+    auto predicate = cudf::compute_column(written_table, expr);
+    auto expected  = cudf::apply_boolean_mask(written_table, *predicate);
+
+    // tests
+    auto builder             = cudf::io::parquet_reader_options::builder(si).filter(expr);
+    auto table_with_metadata = cudf::io::read_parquet(builder);
+    auto result              = table_with_metadata.tbl->view();
+
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result);
+  };
+
+  // row groups min, max:
+  // table[0] 0-0, 0-0, 1-1, 1-1
+  // table[1] 0-0, 1-1, 2-2, 3-3
+  // table[2] 0-1, 2-3, 4-5, 6-7
+
+  // Filtering AST -   table[i] == 1
+  {
+    auto expr0 = cudf::ast::operation(cudf::ast::ast_operator::EQUAL, filter_col0, lit_value);
+    test_expr(expr0);
+
+    auto expr1 = cudf::ast::operation(cudf::ast::ast_operator::EQUAL, filter_col1, lit_value);
+    test_expr(expr1);
+
+    auto expr2 = cudf::ast::operation(cudf::ast::ast_operator::EQUAL, filter_col2, lit_value);
+    test_expr(expr2);
+  }
+  // Filtering AST -   table[i] != 1
+  {
+    auto expr0 = cudf::ast::operation(cudf::ast::ast_operator::NOT_EQUAL, filter_col0, lit_value);
+    test_expr(expr0);
+
+    auto expr1 = cudf::ast::operation(cudf::ast::ast_operator::NOT_EQUAL, filter_col1, lit_value);
+    test_expr(expr1);
+
+    auto expr2 = cudf::ast::operation(cudf::ast::ast_operator::NOT_EQUAL, filter_col2, lit_value);
+    test_expr(expr2);
+  }
+}
+
+// Error types - type mismatch, invalid column name, invalid literal type, invalid operator,
+// non-bool filter output type.
+TEST_F(ParquetReaderTest, FilterErrors)
+{
+  using T                    = uint32_t;
+  auto const [src, filepath] = create_parquet_typed_with_stats<T>("FilterErrors.parquet");
+  auto const written_table   = src.view();
+  auto si                    = cudf::io::source_info(filepath);
+
+  // Filtering AST - invalid column index
+  {
+    auto filter_col1 = cudf::ast::column_reference(3);
+    T constexpr low  = 100;
+    auto lov         = cudf::numeric_scalar(low, true);
+    auto low_lot     = cudf::ast::literal(lov);
+    auto expr        = cudf::ast::operation(cudf::ast::ast_operator::LESS, filter_col1, low_lot);
+
+    auto builder = cudf::io::parquet_reader_options::builder(si).filter(expr);
+    EXPECT_THROW(cudf::io::read_parquet(builder), cudf::logic_error);
+  }
+
+  // Filtering AST - invalid column name
+  {
+    auto filter_col1 = cudf::ast::column_name_reference("col3");
+    T constexpr low  = 100;
+    auto lov         = cudf::numeric_scalar(low, true);
+    auto low_lot     = cudf::ast::literal(lov);
+    auto expr        = cudf::ast::operation(cudf::ast::ast_operator::LESS, filter_col1, low_lot);
+    auto builder     = cudf::io::parquet_reader_options::builder(si).filter(expr);
+    EXPECT_THROW(cudf::io::read_parquet(builder), cudf::logic_error);
+  }
+
+  // Filtering AST - incompatible literal type
+  {
+    auto filter_col1      = cudf::ast::column_name_reference("col0");
+    auto filter_col2      = cudf::ast::column_reference(1);
+    int64_t constexpr low = 100;
+    auto lov              = cudf::numeric_scalar(low, true);
+    auto low_lot          = cudf::ast::literal(lov);
+    auto expr1    = cudf::ast::operation(cudf::ast::ast_operator::LESS, filter_col1, low_lot);
+    auto expr2    = cudf::ast::operation(cudf::ast::ast_operator::LESS, filter_col2, low_lot);
+    auto builder1 = cudf::io::parquet_reader_options::builder(si).filter(expr1);
+    EXPECT_THROW(cudf::io::read_parquet(builder1), cudf::logic_error);
+
+    auto builder2 = cudf::io::parquet_reader_options::builder(si).filter(expr2);
+    EXPECT_THROW(cudf::io::read_parquet(builder2), cudf::logic_error);
+  }
+
+  // Filtering AST - "table[0] + 110" is invalid filter expression
+  {
+    auto filter_col1      = cudf::ast::column_reference(0);
+    T constexpr add_value = 110;
+    auto add_v            = cudf::numeric_scalar(add_value, true);
+    auto add_lit          = cudf::ast::literal(add_v);
+    auto expr_8 = cudf::ast::operation(cudf::ast::ast_operator::ADD, filter_col1, add_lit);
+
+    auto si      = cudf::io::source_info(filepath);
+    auto builder = cudf::io::parquet_reader_options::builder(si).filter(expr_8);
+    EXPECT_THROW(cudf::io::read_parquet(builder), cudf::logic_error);
+
+    // Expected result throw to show that the filter expression is invalid,
+    // not a limitation of the parquet predicate pushdown.
+    auto predicate = cudf::compute_column(written_table, expr_8);
+    EXPECT_THROW(cudf::apply_boolean_mask(written_table, *predicate), cudf::logic_error);
+  }
+
+  // Filtering AST - INT64(table[0] < 100) non-bool expression
+  {
+    auto filter_col1 = cudf::ast::column_reference(0);
+    T constexpr low  = 100;
+    auto lov         = cudf::numeric_scalar(low, true);
+    auto low_lot     = cudf::ast::literal(lov);
+    auto bool_expr   = cudf::ast::operation(cudf::ast::ast_operator::LESS, filter_col1, low_lot);
+    auto cast        = cudf::ast::operation(cudf::ast::ast_operator::CAST_TO_INT64, bool_expr);
+
+    auto builder = cudf::io::parquet_reader_options::builder(si).filter(cast);
+    EXPECT_THROW(cudf::io::read_parquet(builder), cudf::logic_error);
+    EXPECT_NO_THROW(cudf::compute_column(written_table, cast));
+    auto predicate = cudf::compute_column(written_table, cast);
+    EXPECT_NE(predicate->view().type().id(), cudf::type_id::BOOL8);
+  }
+}
+
+// Filter without stats information in file.
+TEST_F(ParquetReaderTest, FilterNoStats)
+{
+  using T                 = uint32_t;
+  constexpr auto num_rows = 16000;
+  auto elements =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i / 1000; });
+  auto col0 = cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_rows);
+  auto const written_table = table_view{{col0}};
+  auto const filepath      = temp_env->get_temp_filepath("FilterNoStats.parquet");
+  {
+    const cudf::io::parquet_writer_options out_opts =
+      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, written_table)
+        .row_group_size_rows(8000)
+        .stats_level(cudf::io::statistics_freq::STATISTICS_NONE);
+    cudf::io::write_parquet(out_opts);
+  }
+  auto si          = cudf::io::source_info(filepath);
+  auto filter_col0 = cudf::ast::column_reference(0);
+  auto s_value     = cudf::numeric_scalar<T>(1, true);
+  auto lit_value   = cudf::ast::literal(s_value);
+
+  // row groups min, max:
+  // table[0] 0-0, 1-1, 2-2, 3-3
+  // Filtering AST - table[0] > 1
+  auto expr = cudf::ast::operation(cudf::ast::ast_operator::GREATER, filter_col0, lit_value);
+
+  // Expected result
+  auto predicate = cudf::compute_column(written_table, expr);
+  auto expected  = cudf::apply_boolean_mask(written_table, *predicate);
+
+  // tests
+  auto builder             = cudf::io::parquet_reader_options::builder(si).filter(expr);
+  auto table_with_metadata = cudf::io::read_parquet(builder);
+  auto result              = table_with_metadata.tbl->view();
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result);
+}
+
+// Filter for float column with NaN values
+TEST_F(ParquetReaderTest, FilterFloatNAN)
+{
+  constexpr auto num_rows = 24000;
+  auto elements           = cudf::detail::make_counting_transform_iterator(
+    0, [num_rows](auto i) { return i > num_rows / 2 ? NAN : i; });
+  auto col0 = cudf::test::fixed_width_column_wrapper<float>(elements, elements + num_rows);
+  auto col1 = cudf::test::fixed_width_column_wrapper<double>(elements, elements + num_rows);
+
+  auto const written_table = table_view{{col0, col1}};
+  auto const filepath      = temp_env->get_temp_filepath("FilterFloatNAN.parquet");
+  {
+    const cudf::io::parquet_writer_options out_opts =
+      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, written_table)
+        .row_group_size_rows(8000);
+    cudf::io::write_parquet(out_opts);
+  }
+  auto si          = cudf::io::source_info(filepath);
+  auto filter_col0 = cudf::ast::column_reference(0);
+  auto filter_col1 = cudf::ast::column_reference(1);
+  auto s0_value    = cudf::numeric_scalar<float>(NAN, true);
+  auto lit0_value  = cudf::ast::literal(s0_value);
+  auto s1_value    = cudf::numeric_scalar<double>(NAN, true);
+  auto lit1_value  = cudf::ast::literal(s1_value);
+
+  // row groups min, max:
+  // table[0] 0-0, 1-1, 2-2, 3-3
+  // Filtering AST - table[0] == NAN, table[1] != NAN
+  auto expr_eq  = cudf::ast::operation(cudf::ast::ast_operator::EQUAL, filter_col0, lit0_value);
+  auto expr_neq = cudf::ast::operation(cudf::ast::ast_operator::NOT_EQUAL, filter_col1, lit1_value);
+
+  // Expected result
+  auto predicate0 = cudf::compute_column(written_table, expr_eq);
+  auto expected0  = cudf::apply_boolean_mask(written_table, *predicate0);
+  auto predicate1 = cudf::compute_column(written_table, expr_neq);
+  auto expected1  = cudf::apply_boolean_mask(written_table, *predicate1);
+
+  // tests
+  auto builder0             = cudf::io::parquet_reader_options::builder(si).filter(expr_eq);
+  auto table_with_metadata0 = cudf::io::read_parquet(builder0);
+  auto result0              = table_with_metadata0.tbl->view();
+  auto builder1             = cudf::io::parquet_reader_options::builder(si).filter(expr_neq);
+  auto table_with_metadata1 = cudf::io::read_parquet(builder1);
+  auto result1              = table_with_metadata1.tbl->view();
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected0->view(), result0);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected1->view(), result1);
+}
+
+TEST_F(ParquetReaderTest, RepeatedNoAnnotations)
+{
+  constexpr unsigned char repeated_bytes[] = {
+    0x50, 0x41, 0x52, 0x31, 0x15, 0x04, 0x15, 0x30, 0x15, 0x30, 0x4c, 0x15, 0x0c, 0x15, 0x00, 0x12,
+    0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x04, 0x00,
+    0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x15, 0x0a, 0x15, 0x0a,
+    0x2c, 0x15, 0x0c, 0x15, 0x10, 0x15, 0x06, 0x15, 0x06, 0x00, 0x00, 0x03, 0x03, 0x88, 0xc6, 0x02,
+    0x26, 0x80, 0x01, 0x1c, 0x15, 0x02, 0x19, 0x25, 0x00, 0x10, 0x19, 0x18, 0x02, 0x69, 0x64, 0x15,
+    0x00, 0x16, 0x0c, 0x16, 0x78, 0x16, 0x78, 0x26, 0x54, 0x26, 0x08, 0x00, 0x00, 0x15, 0x04, 0x15,
+    0x40, 0x15, 0x40, 0x4c, 0x15, 0x08, 0x15, 0x00, 0x12, 0x00, 0x00, 0xe3, 0x0c, 0x23, 0x4b, 0x01,
+    0x00, 0x00, 0x00, 0xc7, 0x35, 0x3a, 0x42, 0x00, 0x00, 0x00, 0x00, 0x8e, 0x6b, 0x74, 0x84, 0x00,
+    0x00, 0x00, 0x00, 0x55, 0xa1, 0xae, 0xc6, 0x00, 0x00, 0x00, 0x00, 0x15, 0x00, 0x15, 0x22, 0x15,
+    0x22, 0x2c, 0x15, 0x10, 0x15, 0x10, 0x15, 0x06, 0x15, 0x06, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x03, 0xc0, 0x03, 0x00, 0x00, 0x00, 0x03, 0x90, 0xaa, 0x02, 0x03, 0x94, 0x03, 0x26, 0xda, 0x02,
+    0x1c, 0x15, 0x04, 0x19, 0x25, 0x00, 0x10, 0x19, 0x38, 0x0c, 0x70, 0x68, 0x6f, 0x6e, 0x65, 0x4e,
+    0x75, 0x6d, 0x62, 0x65, 0x72, 0x73, 0x05, 0x70, 0x68, 0x6f, 0x6e, 0x65, 0x06, 0x6e, 0x75, 0x6d,
+    0x62, 0x65, 0x72, 0x15, 0x00, 0x16, 0x10, 0x16, 0xa0, 0x01, 0x16, 0xa0, 0x01, 0x26, 0x96, 0x02,
+    0x26, 0xba, 0x01, 0x00, 0x00, 0x15, 0x04, 0x15, 0x24, 0x15, 0x24, 0x4c, 0x15, 0x04, 0x15, 0x00,
+    0x12, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x68, 0x6f, 0x6d, 0x65, 0x06, 0x00, 0x00, 0x00, 0x6d,
+    0x6f, 0x62, 0x69, 0x6c, 0x65, 0x15, 0x00, 0x15, 0x20, 0x15, 0x20, 0x2c, 0x15, 0x10, 0x15, 0x10,
+    0x15, 0x06, 0x15, 0x06, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0xc0, 0x03, 0x00, 0x00, 0x00,
+    0x03, 0x90, 0xef, 0x01, 0x03, 0x04, 0x26, 0xcc, 0x04, 0x1c, 0x15, 0x0c, 0x19, 0x25, 0x00, 0x10,
+    0x19, 0x38, 0x0c, 0x70, 0x68, 0x6f, 0x6e, 0x65, 0x4e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x73, 0x05,
+    0x70, 0x68, 0x6f, 0x6e, 0x65, 0x04, 0x6b, 0x69, 0x6e, 0x64, 0x15, 0x00, 0x16, 0x10, 0x16, 0x82,
+    0x01, 0x16, 0x82, 0x01, 0x26, 0x8a, 0x04, 0x26, 0xca, 0x03, 0x00, 0x00, 0x15, 0x02, 0x19, 0x6c,
+    0x48, 0x04, 0x75, 0x73, 0x65, 0x72, 0x15, 0x04, 0x00, 0x15, 0x02, 0x25, 0x00, 0x18, 0x02, 0x69,
+    0x64, 0x00, 0x35, 0x02, 0x18, 0x0c, 0x70, 0x68, 0x6f, 0x6e, 0x65, 0x4e, 0x75, 0x6d, 0x62, 0x65,
+    0x72, 0x73, 0x15, 0x02, 0x00, 0x35, 0x04, 0x18, 0x05, 0x70, 0x68, 0x6f, 0x6e, 0x65, 0x15, 0x04,
+    0x00, 0x15, 0x04, 0x25, 0x00, 0x18, 0x06, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x00, 0x15, 0x0c,
+    0x25, 0x02, 0x18, 0x04, 0x6b, 0x69, 0x6e, 0x64, 0x25, 0x00, 0x00, 0x16, 0x00, 0x19, 0x1c, 0x19,
+    0x3c, 0x26, 0x80, 0x01, 0x1c, 0x15, 0x02, 0x19, 0x25, 0x00, 0x10, 0x19, 0x18, 0x02, 0x69, 0x64,
+    0x15, 0x00, 0x16, 0x0c, 0x16, 0x78, 0x16, 0x78, 0x26, 0x54, 0x26, 0x08, 0x00, 0x00, 0x26, 0xda,
+    0x02, 0x1c, 0x15, 0x04, 0x19, 0x25, 0x00, 0x10, 0x19, 0x38, 0x0c, 0x70, 0x68, 0x6f, 0x6e, 0x65,
+    0x4e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x73, 0x05, 0x70, 0x68, 0x6f, 0x6e, 0x65, 0x06, 0x6e, 0x75,
+    0x6d, 0x62, 0x65, 0x72, 0x15, 0x00, 0x16, 0x10, 0x16, 0xa0, 0x01, 0x16, 0xa0, 0x01, 0x26, 0x96,
+    0x02, 0x26, 0xba, 0x01, 0x00, 0x00, 0x26, 0xcc, 0x04, 0x1c, 0x15, 0x0c, 0x19, 0x25, 0x00, 0x10,
+    0x19, 0x38, 0x0c, 0x70, 0x68, 0x6f, 0x6e, 0x65, 0x4e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x73, 0x05,
+    0x70, 0x68, 0x6f, 0x6e, 0x65, 0x04, 0x6b, 0x69, 0x6e, 0x64, 0x15, 0x00, 0x16, 0x10, 0x16, 0x82,
+    0x01, 0x16, 0x82, 0x01, 0x26, 0x8a, 0x04, 0x26, 0xca, 0x03, 0x00, 0x00, 0x16, 0x9a, 0x03, 0x16,
+    0x0c, 0x00, 0x28, 0x49, 0x70, 0x61, 0x72, 0x71, 0x75, 0x65, 0x74, 0x2d, 0x72, 0x73, 0x20, 0x76,
+    0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x20, 0x30, 0x2e, 0x33, 0x2e, 0x30, 0x20, 0x28, 0x62, 0x75,
+    0x69, 0x6c, 0x64, 0x20, 0x62, 0x34, 0x35, 0x63, 0x65, 0x37, 0x63, 0x62, 0x61, 0x32, 0x31, 0x39,
+    0x39, 0x66, 0x32, 0x32, 0x64, 0x39, 0x33, 0x32, 0x36, 0x39, 0x63, 0x31, 0x35, 0x30, 0x64, 0x38,
+    0x61, 0x38, 0x33, 0x39, 0x31, 0x36, 0x63, 0x36, 0x39, 0x62, 0x35, 0x65, 0x29, 0x00, 0x32, 0x01,
+    0x00, 0x00, 0x50, 0x41, 0x52, 0x31};
+
+  auto read_opts = cudf::io::parquet_reader_options::builder(
+    cudf::io::source_info{reinterpret_cast<char const*>(repeated_bytes), sizeof(repeated_bytes)});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  EXPECT_EQ(result.tbl->view().column(0).size(), 6);
+  EXPECT_EQ(result.tbl->view().num_columns(), 2);
+
+  column_wrapper<int32_t> col0{1, 2, 3, 4, 5, 6};
+  column_wrapper<int64_t> child0{{5555555555l, 1111111111l, 1111111111l, 2222222222l, 3333333333l}};
+  cudf::test::strings_column_wrapper child1{{"-", "home", "home", "-", "mobile"}, {0, 1, 1, 0, 1}};
+  auto struct_col = cudf::test::structs_column_wrapper{{child0, child1}};
+
+  auto list_offsets_column =
+    cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 0, 0, 0, 1, 2, 5}.release();
+  auto num_list_rows = list_offsets_column->size() - 1;
+
+  auto mask = cudf::create_null_mask(6, cudf::mask_state::ALL_VALID);
+  cudf::set_null_mask(static_cast<cudf::bitmask_type*>(mask.data()), 0, 2, false);
+
+  auto list_col = cudf::make_lists_column(
+    num_list_rows, std::move(list_offsets_column), struct_col.release(), 2, std::move(mask));
+
+  std::vector<std::unique_ptr<cudf::column>> struct_children;
+  struct_children.push_back(std::move(list_col));
+
+  auto outer_struct =
+    cudf::test::structs_column_wrapper{{std::move(struct_children)}, {0, 0, 1, 1, 1, 1}};
+  table_view expected{{col0, outer_struct}};
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(result.tbl->view(), expected);
+}
+
+TEST_F(ParquetReaderTest, DeltaSkipRowsWithNulls)
+{
+  constexpr int num_rows = 50'000;
+  constexpr auto seed    = 21337;
+
+  std::mt19937 engine{seed};
+  auto int32_list_nulls = make_parquet_list_col<int32_t>(engine, num_rows, 5, true);
+  auto int32_list       = make_parquet_list_col<int32_t>(engine, num_rows, 5, false);
+  auto int64_list_nulls = make_parquet_list_col<int64_t>(engine, num_rows, 5, true);
+  auto int64_list       = make_parquet_list_col<int64_t>(engine, num_rows, 5, false);
+  auto int16_list_nulls = make_parquet_list_col<int16_t>(engine, num_rows, 5, true);
+  auto int16_list       = make_parquet_list_col<int16_t>(engine, num_rows, 5, false);
+  auto int8_list_nulls  = make_parquet_list_col<int8_t>(engine, num_rows, 5, true);
+  auto int8_list        = make_parquet_list_col<int8_t>(engine, num_rows, 5, false);
+
+  auto str_list_nulls     = make_parquet_string_list_col(engine, num_rows, 5, 32, true);
+  auto str_list           = make_parquet_string_list_col(engine, num_rows, 5, 32, false);
+  auto big_str_list_nulls = make_parquet_string_list_col(engine, num_rows, 5, 256, true);
+  auto big_str_list       = make_parquet_string_list_col(engine, num_rows, 5, 256, false);
+
+  auto int32_data   = random_values<int32_t>(num_rows);
+  auto int64_data   = random_values<int64_t>(num_rows);
+  auto int16_data   = random_values<int16_t>(num_rows);
+  auto int8_data    = random_values<int8_t>(num_rows);
+  auto str_data     = string_values(engine, num_rows, 32);
+  auto big_str_data = string_values(engine, num_rows, 256);
+
+  auto const validity = random_validity(engine);
+  auto const no_nulls = cudf::test::iterators::no_nulls();
+  column_wrapper<int32_t> int32_nulls_col{int32_data.begin(), int32_data.end(), validity};
+  column_wrapper<int32_t> int32_col{int32_data.begin(), int32_data.end(), no_nulls};
+  column_wrapper<int64_t> int64_nulls_col{int64_data.begin(), int64_data.end(), validity};
+  column_wrapper<int64_t> int64_col{int64_data.begin(), int64_data.end(), no_nulls};
+
+  auto str_col = cudf::test::strings_column_wrapper(str_data.begin(), str_data.end(), no_nulls);
+  auto str_col_nulls = cudf::purge_nonempty_nulls(
+    cudf::test::strings_column_wrapper(str_data.begin(), str_data.end(), validity));
+  auto big_str_col =
+    cudf::test::strings_column_wrapper(big_str_data.begin(), big_str_data.end(), no_nulls);
+  auto big_str_col_nulls = cudf::purge_nonempty_nulls(
+    cudf::test::strings_column_wrapper(big_str_data.begin(), big_str_data.end(), validity));
+
+  cudf::table_view tbl({int32_col,   int32_nulls_col,    *int32_list,   *int32_list_nulls,
+                        int64_col,   int64_nulls_col,    *int64_list,   *int64_list_nulls,
+                        *int16_list, *int16_list_nulls,  *int8_list,    *int8_list_nulls,
+                        str_col,     *str_col_nulls,     *str_list,     *str_list_nulls,
+                        big_str_col, *big_str_col_nulls, *big_str_list, *big_str_list_nulls});
+
+  auto const filepath = temp_env->get_temp_filepath("DeltaSkipRowsWithNulls.parquet");
+  auto const out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl)
+      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
+      .compression(cudf::io::compression_type::NONE)
+      .dictionary_policy(cudf::io::dictionary_policy::NEVER)
+      .max_page_size_rows(20'000)
+      .write_v2_headers(true)
+      .build();
+  cudf::io::write_parquet(out_opts);
+
+  // skip_rows / num_rows
+  // clang-format off
+  std::vector<std::pair<int, int>> params{
+    // skip and then read rest of file
+    {-1, -1}, {1, -1}, {2, -1}, {32, -1}, {33, -1}, {128, -1}, {1000, -1},
+    // no skip but read fewer rows
+    {0, 1}, {0, 2}, {0, 31}, {0, 32}, {0, 33}, {0, 128}, {0, 129}, {0, 130},
+    // skip and truncate
+    {1, 32}, {1, 33}, {32, 32}, {33, 139},
+    // cross page boundaries
+    {10'000, 20'000}
+  };
+
+  // clang-format on
+  for (auto p : params) {
+    cudf::io::parquet_reader_options read_args =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+    if (p.first >= 0) { read_args.set_skip_rows(p.first); }
+    if (p.second >= 0) { read_args.set_num_rows(p.second); }
+    auto result = cudf::io::read_parquet(read_args);
+
+    p.first  = p.first < 0 ? 0 : p.first;
+    p.second = p.second < 0 ? num_rows - p.first : p.second;
+    std::vector<cudf::size_type> slice_indices{p.first, p.first + p.second};
+    std::vector<cudf::table_view> expected = cudf::slice(tbl, slice_indices);
+
+    CUDF_TEST_EXPECT_TABLES_EQUAL(result.tbl->view(), expected[0]);
+
+    // test writing the result back out as a further check of the delta writer's correctness
+    std::vector<char> out_buffer;
+    cudf::io::parquet_writer_options out_opts2 =
+      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&out_buffer},
+                                                result.tbl->view())
+        .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
+        .compression(cudf::io::compression_type::NONE)
+        .dictionary_policy(cudf::io::dictionary_policy::NEVER)
+        .max_page_size_rows(20'000)
+        .write_v2_headers(true);
+    cudf::io::write_parquet(out_opts2);
+
+    cudf::io::parquet_reader_options default_in_opts = cudf::io::parquet_reader_options::builder(
+      cudf::io::source_info{out_buffer.data(), out_buffer.size()});
+    auto const result2 = cudf::io::read_parquet(default_in_opts);
+
+    CUDF_TEST_EXPECT_TABLES_EQUAL(result.tbl->view(), result2.tbl->view());
+  }
+}
+
+///////////////////
+// metadata tests
+
+// Test fixture for metadata tests
+struct ParquetMetadataReaderTest : public cudf::test::BaseFixture {
+  std::string print(cudf::io::parquet_column_schema schema, int depth = 0)
+  {
+    std::string child_str;
+    for (auto const& child : schema.children()) {
+      child_str += print(child, depth + 1);
+    }
+    return std::string(depth, ' ') + schema.name() + "\n" + child_str;
+  }
+};
+
+TEST_F(ParquetMetadataReaderTest, TestBasic)
+{
+  auto const num_rows = 1200;
+
+  auto ints   = random_values<int>(num_rows);
+  auto floats = random_values<float>(num_rows);
+  column_wrapper<int> int_col(ints.begin(), ints.end());
+  column_wrapper<float> float_col(floats.begin(), floats.end());
+
+  table_view expected({int_col, float_col});
+
+  cudf::io::table_input_metadata expected_metadata(expected);
+  expected_metadata.column_metadata[0].set_name("int_col");
+  expected_metadata.column_metadata[1].set_name("float_col");
+
+  auto filepath = temp_env->get_temp_filepath("MetadataTest.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .metadata(std::move(expected_metadata));
+  cudf::io::write_parquet(out_opts);
+
+  auto meta = read_parquet_metadata(cudf::io::source_info{filepath});
+  EXPECT_EQ(meta.num_rows(), num_rows);
+
+  std::string expected_schema = R"(schema
+ int_col
+ float_col
+)";
+  EXPECT_EQ(expected_schema, print(meta.schema().root()));
+
+  EXPECT_EQ(meta.schema().root().name(), "schema");
+  EXPECT_EQ(meta.schema().root().type_kind(), cudf::io::parquet::TypeKind::UNDEFINED_TYPE);
+  ASSERT_EQ(meta.schema().root().num_children(), 2);
+
+  EXPECT_EQ(meta.schema().root().child(0).name(), "int_col");
+  EXPECT_EQ(meta.schema().root().child(1).name(), "float_col");
+}
+
+TEST_F(ParquetMetadataReaderTest, TestNested)
+{
+  auto const num_rows       = 1200;
+  auto const lists_per_row  = 4;
+  auto const num_child_rows = num_rows * lists_per_row;
+
+  auto keys = random_values<int>(num_child_rows);
+  auto vals = random_values<float>(num_child_rows);
+  column_wrapper<int> keys_col(keys.begin(), keys.end());
+  column_wrapper<float> vals_col(vals.begin(), vals.end());
+  auto s_col = cudf::test::structs_column_wrapper({keys_col, vals_col}).release();
+
+  std::vector<int> row_offsets(num_rows + 1);
+  for (int idx = 0; idx < num_rows + 1; ++idx) {
+    row_offsets[idx] = idx * lists_per_row;
+  }
+  column_wrapper<int> offsets(row_offsets.begin(), row_offsets.end());
+
+  auto list_col =
+    cudf::make_lists_column(num_rows, offsets.release(), std::move(s_col), 0, rmm::device_buffer{});
+
+  table_view expected({*list_col, *list_col});
+
+  cudf::io::table_input_metadata expected_metadata(expected);
+  expected_metadata.column_metadata[0].set_name("maps");
+  expected_metadata.column_metadata[0].set_list_column_as_map();
+  expected_metadata.column_metadata[1].set_name("lists");
+  expected_metadata.column_metadata[1].child(1).child(0).set_name("int_field");
+  expected_metadata.column_metadata[1].child(1).child(1).set_name("float_field");
+
+  auto filepath = temp_env->get_temp_filepath("MetadataTest.orc");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .metadata(std::move(expected_metadata));
+  cudf::io::write_parquet(out_opts);
+
+  auto meta = read_parquet_metadata(cudf::io::source_info{filepath});
+  EXPECT_EQ(meta.num_rows(), num_rows);
+
+  std::string expected_schema = R"(schema
+ maps
+  key_value
+   key
+   value
+ lists
+  list
+   element
+    int_field
+    float_field
+)";
+  EXPECT_EQ(expected_schema, print(meta.schema().root()));
+
+  EXPECT_EQ(meta.schema().root().name(), "schema");
+  EXPECT_EQ(meta.schema().root().type_kind(),
+            cudf::io::parquet::TypeKind::UNDEFINED_TYPE);  // struct
+  ASSERT_EQ(meta.schema().root().num_children(), 2);
+
+  auto const& out_map_col = meta.schema().root().child(0);
+  EXPECT_EQ(out_map_col.name(), "maps");
+  EXPECT_EQ(out_map_col.type_kind(), cudf::io::parquet::TypeKind::UNDEFINED_TYPE);  // map
+
+  ASSERT_EQ(out_map_col.num_children(), 1);
+  EXPECT_EQ(out_map_col.child(0).name(), "key_value");  // key_value (named in parquet writer)
+  ASSERT_EQ(out_map_col.child(0).num_children(), 2);
+  EXPECT_EQ(out_map_col.child(0).child(0).name(), "key");    // key (named in parquet writer)
+  EXPECT_EQ(out_map_col.child(0).child(1).name(), "value");  // value (named in parquet writer)
+  EXPECT_EQ(out_map_col.child(0).child(0).type_kind(), cudf::io::parquet::TypeKind::INT32);  // int
+  EXPECT_EQ(out_map_col.child(0).child(1).type_kind(),
+            cudf::io::parquet::TypeKind::FLOAT);  // float
+
+  auto const& out_list_col = meta.schema().root().child(1);
+  EXPECT_EQ(out_list_col.name(), "lists");
+  EXPECT_EQ(out_list_col.type_kind(), cudf::io::parquet::TypeKind::UNDEFINED_TYPE);  // list
+  // TODO repetition type?
+  ASSERT_EQ(out_list_col.num_children(), 1);
+  EXPECT_EQ(out_list_col.child(0).name(), "list");  // list (named in parquet writer)
+  ASSERT_EQ(out_list_col.child(0).num_children(), 1);
+
+  auto const& out_list_struct_col = out_list_col.child(0).child(0);
+  EXPECT_EQ(out_list_struct_col.name(), "element");  // elements (named in parquet writer)
+  EXPECT_EQ(out_list_struct_col.type_kind(),
+            cudf::io::parquet::TypeKind::UNDEFINED_TYPE);  // struct
+  ASSERT_EQ(out_list_struct_col.num_children(), 2);
+
+  auto const& out_int_col = out_list_struct_col.child(0);
+  EXPECT_EQ(out_int_col.name(), "int_field");
+  EXPECT_EQ(out_int_col.type_kind(), cudf::io::parquet::TypeKind::INT32);
+
+  auto const& out_float_col = out_list_struct_col.child(1);
+  EXPECT_EQ(out_float_col.name(), "float_field");
+  EXPECT_EQ(out_float_col.type_kind(), cudf::io::parquet::TypeKind::FLOAT);
+}
+
+///////////////////////
+// reader source tests
+
+template <typename T>
+struct ParquetReaderSourceTest : public ParquetReaderTest {};
+
+TYPED_TEST_SUITE(ParquetReaderSourceTest, ByteLikeTypes);
+
+TYPED_TEST(ParquetReaderSourceTest, BufferSourceTypes)
+{
+  using T = TypeParam;
+
+  srand(31337);
+  auto table = create_random_fixed_table<int>(5, 5, true);
+
+  std::vector<char> out_buffer;
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer), *table);
+  cudf::io::write_parquet(out_opts);
+
+  {
+    cudf::io::parquet_reader_options in_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info(
+        cudf::host_span<T>(reinterpret_cast<T*>(out_buffer.data()), out_buffer.size())));
+    auto const result = cudf::io::read_parquet(in_opts);
+
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*table, result.tbl->view());
+  }
+
+  {
+    cudf::io::parquet_reader_options in_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info(cudf::host_span<T const>(
+        reinterpret_cast<T const*>(out_buffer.data()), out_buffer.size())));
+    auto const result = cudf::io::read_parquet(in_opts);
+
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*table, result.tbl->view());
+  }
+}
+
+TYPED_TEST(ParquetReaderSourceTest, BufferSourceArrayTypes)
+{
+  using T = TypeParam;
+
+  srand(31337);
+  auto table = create_random_fixed_table<int>(5, 5, true);
+
+  std::vector<char> out_buffer;
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer), *table);
+  cudf::io::write_parquet(out_opts);
+
+  auto full_table = cudf::concatenate(std::vector<table_view>({*table, *table}));
+
+  {
+    auto spans = std::vector<cudf::host_span<T>>{
+      cudf::host_span<T>(reinterpret_cast<T*>(out_buffer.data()), out_buffer.size()),
+      cudf::host_span<T>(reinterpret_cast<T*>(out_buffer.data()), out_buffer.size())};
+    cudf::io::parquet_reader_options in_opts = cudf::io::parquet_reader_options::builder(
+      cudf::io::source_info(cudf::host_span<cudf::host_span<T>>(spans.data(), spans.size())));
+    auto const result = cudf::io::read_parquet(in_opts);
+
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*full_table, result.tbl->view());
+  }
+
+  {
+    auto spans = std::vector<cudf::host_span<T const>>{
+      cudf::host_span<T const>(reinterpret_cast<T const*>(out_buffer.data()), out_buffer.size()),
+      cudf::host_span<T const>(reinterpret_cast<T const*>(out_buffer.data()), out_buffer.size())};
+    cudf::io::parquet_reader_options in_opts = cudf::io::parquet_reader_options::builder(
+      cudf::io::source_info(cudf::host_span<cudf::host_span<T const>>(spans.data(), spans.size())));
+    auto const result = cudf::io::read_parquet(in_opts);
+
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*full_table, result.tbl->view());
+  }
+}
+
+//////////////////////////////
+// predicate pushdown tests
+
+// Test for Types - numeric, chrono, string.
+template <typename T>
+struct ParquetReaderPredicatePushdownTest : public ParquetReaderTest {};
+
+TYPED_TEST_SUITE(ParquetReaderPredicatePushdownTest, SupportedTestTypes);
+
+TYPED_TEST(ParquetReaderPredicatePushdownTest, FilterTyped)
+{
+  using T = TypeParam;
+
+  auto const [src, filepath] = create_parquet_typed_with_stats<T>("FilterTyped.parquet");
+  auto const written_table   = src.view();
+
+  // Filtering AST
+  auto literal_value = []() {
+    if constexpr (cudf::is_timestamp<T>()) {
+      // table[0] < 10000 timestamp days/seconds/milliseconds/microseconds/nanoseconds
+      return cudf::timestamp_scalar<T>(T(typename T::duration(10000)));  // i (0-20,000)
+    } else if constexpr (cudf::is_duration<T>()) {
+      // table[0] < 10000 day/seconds/milliseconds/microseconds/nanoseconds
+      return cudf::duration_scalar<T>(T(10000));  // i (0-20,000)
+    } else if constexpr (std::is_same_v<T, cudf::string_view>) {
+      // table[0] < "000010000"
+      return cudf::string_scalar("000010000");  // i (0-20,000)
+    } else {
+      // table[0] < 0 or 100u
+      return cudf::numeric_scalar<T>((100 - 100 * std::is_signed_v<T>));  // i/100 (-100-100/ 0-200)
+    }
+  }();
+  auto literal           = cudf::ast::literal(literal_value);
+  auto col_name_0        = cudf::ast::column_name_reference("col0");
+  auto filter_expression = cudf::ast::operation(cudf::ast::ast_operator::LESS, col_name_0, literal);
+  auto col_ref_0         = cudf::ast::column_reference(0);
+  auto ref_filter        = cudf::ast::operation(cudf::ast::ast_operator::LESS, col_ref_0, literal);
+
+  // Expected result
+  auto predicate = cudf::compute_column(written_table, ref_filter);
+  EXPECT_EQ(predicate->view().type().id(), cudf::type_id::BOOL8)
+    << "Predicate filter should return a boolean";
+  auto expected = cudf::apply_boolean_mask(written_table, *predicate);
+
+  // Reading with Predicate Pushdown
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+      .filter(filter_expression);
+  auto result       = cudf::io::read_parquet(read_opts);
+  auto result_table = result.tbl->view();
+
+  // tests
+  EXPECT_EQ(int(written_table.column(0).type().id()), int(result_table.column(0).type().id()))
+    << "col0 type mismatch";
+  // To make sure AST filters out some elements
+  EXPECT_LT(expected->num_rows(), written_table.num_rows());
+  EXPECT_EQ(result_table.num_rows(), expected->num_rows());
+  EXPECT_EQ(result_table.num_columns(), expected->num_columns());
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result_table);
+}
diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp
index 785a398d716..be2ecd56424 100644
--- a/cpp/tests/io/parquet_test.cpp
+++ b/cpp/tests/io/parquet_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,7290 +15,12 @@
  */
 
 #include <cudf_test/base_fixture.hpp>
-#include <cudf_test/column_utilities.hpp>
-#include <cudf_test/column_wrapper.hpp>
-#include <cudf_test/cudf_gtest.hpp>
-#include <cudf_test/io_metadata_utilities.hpp>
-#include <cudf_test/iterator_utilities.hpp>
-#include <cudf_test/table_utilities.hpp>
-#include <cudf_test/type_lists.hpp>
+#include <cudf_test/testing_main.hpp>
 
-#include <cudf/concatenate.hpp>
-#include <cudf/copying.hpp>
-#include <cudf/detail/iterator.cuh>
-#include <cudf/fixed_point/fixed_point.hpp>
-#include <cudf/io/data_sink.hpp>
-#include <cudf/io/datasource.hpp>
-#include <cudf/io/parquet.hpp>
-#include <cudf/io/parquet_metadata.hpp>
-#include <cudf/stream_compaction.hpp>
-#include <cudf/strings/strings_column_view.hpp>
-#include <cudf/table/table.hpp>
-#include <cudf/table/table_view.hpp>
-#include <cudf/transform.hpp>
-#include <cudf/unary.hpp>
-#include <cudf/utilities/span.hpp>
-#include <cudf/wrappers/timestamps.hpp>
-
-#include <src/io/parquet/compact_protocol_reader.hpp>
-#include <src/io/parquet/parquet.hpp>
-#include <src/io/parquet/parquet_gpu.hpp>
-
-#include <rmm/cuda_stream_view.hpp>
-
-#include <thrust/iterator/counting_iterator.h>
-
-#include <fstream>
-#include <random>
-#include <type_traits>
-
-template <typename T, typename SourceElementT = T>
-using column_wrapper =
-  typename std::conditional<std::is_same_v<T, cudf::string_view>,
-                            cudf::test::strings_column_wrapper,
-                            cudf::test::fixed_width_column_wrapper<T, SourceElementT>>::type;
-using column     = cudf::column;
-using table      = cudf::table;
-using table_view = cudf::table_view;
-
-// Global environment for temporary files
-auto const temp_env = static_cast<cudf::test::TempDirTestEnvironment*>(
-  ::testing::AddGlobalTestEnvironment(new cudf::test::TempDirTestEnvironment));
-
-template <typename T, typename Elements>
-std::unique_ptr<cudf::table> create_fixed_table(cudf::size_type num_columns,
-                                                cudf::size_type num_rows,
-                                                bool include_validity,
-                                                Elements elements)
-{
-  auto valids =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; });
-  std::vector<cudf::test::fixed_width_column_wrapper<T>> src_cols(num_columns);
-  for (int idx = 0; idx < num_columns; idx++) {
-    if (include_validity) {
-      src_cols[idx] =
-        cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_rows, valids);
-    } else {
-      src_cols[idx] = cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_rows);
-    }
-  }
-  std::vector<std::unique_ptr<cudf::column>> columns(num_columns);
-  std::transform(src_cols.begin(),
-                 src_cols.end(),
-                 columns.begin(),
-                 [](cudf::test::fixed_width_column_wrapper<T>& in) {
-                   auto ret = in.release();
-                   // pre-cache the null count
-                   [[maybe_unused]] auto const nulls = ret->has_nulls();
-                   return ret;
-                 });
-  return std::make_unique<cudf::table>(std::move(columns));
-}
-
-template <typename T>
-std::unique_ptr<cudf::table> create_random_fixed_table(cudf::size_type num_columns,
-                                                       cudf::size_type num_rows,
-                                                       bool include_validity)
-{
-  auto rand_elements =
-    cudf::detail::make_counting_transform_iterator(0, [](T i) { return rand(); });
-  return create_fixed_table<T>(num_columns, num_rows, include_validity, rand_elements);
-}
-
-template <typename T>
-std::unique_ptr<cudf::table> create_compressible_fixed_table(cudf::size_type num_columns,
-                                                             cudf::size_type num_rows,
-                                                             cudf::size_type period,
-                                                             bool include_validity)
-{
-  auto compressible_elements =
-    cudf::detail::make_counting_transform_iterator(0, [period](T i) { return i / period; });
-  return create_fixed_table<T>(num_columns, num_rows, include_validity, compressible_elements);
-}
-
-// this function replicates the "list_gen" function in
-// python/cudf/cudf/tests/test_parquet.py
-template <typename T>
-std::unique_ptr<cudf::column> make_parquet_list_list_col(
-  int skip_rows, int num_rows, int lists_per_row, int list_size, bool include_validity)
-{
-  auto valids =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0 ? 1 : 0; });
-
-  // root list
-  std::vector<int> row_offsets(num_rows + 1);
-  int row_offset_count = 0;
-  {
-    int offset = 0;
-    for (int idx = 0; idx < (num_rows) + 1; idx++) {
-      row_offsets[row_offset_count] = offset;
-      if (!include_validity || valids[idx]) { offset += lists_per_row; }
-      row_offset_count++;
-    }
-  }
-  cudf::test::fixed_width_column_wrapper<int> offsets(row_offsets.begin(),
-                                                      row_offsets.begin() + row_offset_count);
-
-  // child list
-  std::vector<int> child_row_offsets((num_rows * lists_per_row) + 1);
-  int child_row_offset_count = 0;
-  {
-    int offset = 0;
-    for (int idx = 0; idx < (num_rows * lists_per_row); idx++) {
-      int row_index = idx / lists_per_row;
-      if (include_validity && !valids[row_index]) { continue; }
-
-      child_row_offsets[child_row_offset_count] = offset;
-      offset += list_size;
-      child_row_offset_count++;
-    }
-    child_row_offsets[child_row_offset_count++] = offset;
-  }
-  cudf::test::fixed_width_column_wrapper<int> child_offsets(
-    child_row_offsets.begin(), child_row_offsets.begin() + child_row_offset_count);
-
-  // child values
-  std::vector<T> child_values(num_rows * lists_per_row * list_size);
-  T first_child_value_index = skip_rows * lists_per_row * list_size;
-  int child_value_count     = 0;
-  {
-    for (int idx = 0; idx < (num_rows * lists_per_row * list_size); idx++) {
-      int row_index = idx / (lists_per_row * list_size);
-
-      int val = first_child_value_index;
-      first_child_value_index++;
-
-      if (include_validity && !valids[row_index]) { continue; }
-
-      child_values[child_value_count] = val;
-      child_value_count++;
-    }
-  }
-  // validity by value instead of index
-  auto valids2 = cudf::detail::make_counting_transform_iterator(
-    0, [list_size](auto i) { return (i % list_size) % 2 == 0 ? 1 : 0; });
-  auto child_data = include_validity
-                      ? cudf::test::fixed_width_column_wrapper<T>(
-                          child_values.begin(), child_values.begin() + child_value_count, valids2)
-                      : cudf::test::fixed_width_column_wrapper<T>(
-                          child_values.begin(), child_values.begin() + child_value_count);
-
-  int child_offsets_size = static_cast<cudf::column_view>(child_offsets).size() - 1;
-  auto child             = cudf::make_lists_column(
-    child_offsets_size, child_offsets.release(), child_data.release(), 0, rmm::device_buffer{});
-
-  int offsets_size             = static_cast<cudf::column_view>(offsets).size() - 1;
-  auto [null_mask, null_count] = cudf::test::detail::make_null_mask(valids, valids + offsets_size);
-  return include_validity
-           ? cudf::make_lists_column(
-               offsets_size, offsets.release(), std::move(child), null_count, std::move(null_mask))
-           : cudf::make_lists_column(
-               offsets_size, offsets.release(), std::move(child), 0, rmm::device_buffer{});
-}
-
-// given a datasource pointing to a parquet file, read the footer
-// of the file to populate the FileMetaData pointed to by file_meta_data.
-// throws cudf::logic_error if the file or metadata is invalid.
-void read_footer(std::unique_ptr<cudf::io::datasource> const& source,
-                 cudf::io::parquet::detail::FileMetaData* file_meta_data)
-{
-  constexpr auto header_len = sizeof(cudf::io::parquet::detail::file_header_s);
-  constexpr auto ender_len  = sizeof(cudf::io::parquet::detail::file_ender_s);
-
-  auto const len           = source->size();
-  auto const header_buffer = source->host_read(0, header_len);
-  auto const header =
-    reinterpret_cast<cudf::io::parquet::detail::file_header_s const*>(header_buffer->data());
-  auto const ender_buffer = source->host_read(len - ender_len, ender_len);
-  auto const ender =
-    reinterpret_cast<cudf::io::parquet::detail::file_ender_s const*>(ender_buffer->data());
-
-  // checks for valid header, footer, and file length
-  ASSERT_GT(len, header_len + ender_len);
-  ASSERT_TRUE(header->magic == cudf::io::parquet::detail::parquet_magic &&
-              ender->magic == cudf::io::parquet::detail::parquet_magic);
-  ASSERT_TRUE(ender->footer_len != 0 && ender->footer_len <= (len - header_len - ender_len));
-
-  // parquet files end with 4-byte footer_length and 4-byte magic == "PAR1"
-  // seek backwards from the end of the file (footer_length + 8 bytes of ender)
-  auto const footer_buffer =
-    source->host_read(len - ender->footer_len - ender_len, ender->footer_len);
-  cudf::io::parquet::detail::CompactProtocolReader cp(footer_buffer->data(), ender->footer_len);
-
-  cp.read(file_meta_data);
-}
-
-// returns the number of bits used for dictionary encoding data at the given page location.
-// this assumes the data is uncompressed.
-// throws cudf::logic_error if the page_loc data is invalid.
-int read_dict_bits(std::unique_ptr<cudf::io::datasource> const& source,
-                   cudf::io::parquet::detail::PageLocation const& page_loc)
-{
-  CUDF_EXPECTS(page_loc.offset > 0, "Cannot find page header");
-  CUDF_EXPECTS(page_loc.compressed_page_size > 0, "Invalid page header length");
-
-  cudf::io::parquet::detail::PageHeader page_hdr;
-  auto const page_buf = source->host_read(page_loc.offset, page_loc.compressed_page_size);
-  cudf::io::parquet::detail::CompactProtocolReader cp(page_buf->data(), page_buf->size());
-  cp.read(&page_hdr);
-
-  // cp should be pointing at the start of page data now. the first byte
-  // should be the encoding bit size
-  return cp.getb();
-}
-
-// read column index from datasource at location indicated by chunk,
-// parse and return as a ColumnIndex struct.
-// throws cudf::logic_error if the chunk data is invalid.
-cudf::io::parquet::detail::ColumnIndex read_column_index(
-  std::unique_ptr<cudf::io::datasource> const& source,
-  cudf::io::parquet::detail::ColumnChunk const& chunk)
-{
-  CUDF_EXPECTS(chunk.column_index_offset > 0, "Cannot find column index");
-  CUDF_EXPECTS(chunk.column_index_length > 0, "Invalid column index length");
-
-  cudf::io::parquet::detail::ColumnIndex colidx;
-  auto const ci_buf = source->host_read(chunk.column_index_offset, chunk.column_index_length);
-  cudf::io::parquet::detail::CompactProtocolReader cp(ci_buf->data(), ci_buf->size());
-  cp.read(&colidx);
-  return colidx;
-}
-
-// read offset index from datasource at location indicated by chunk,
-// parse and return as an OffsetIndex struct.
-// throws cudf::logic_error if the chunk data is invalid.
-cudf::io::parquet::detail::OffsetIndex read_offset_index(
-  std::unique_ptr<cudf::io::datasource> const& source,
-  cudf::io::parquet::detail::ColumnChunk const& chunk)
-{
-  CUDF_EXPECTS(chunk.offset_index_offset > 0, "Cannot find offset index");
-  CUDF_EXPECTS(chunk.offset_index_length > 0, "Invalid offset index length");
-
-  cudf::io::parquet::detail::OffsetIndex offidx;
-  auto const oi_buf = source->host_read(chunk.offset_index_offset, chunk.offset_index_length);
-  cudf::io::parquet::detail::CompactProtocolReader cp(oi_buf->data(), oi_buf->size());
-  cp.read(&offidx);
-  return offidx;
-}
-
-// Return as a Statistics from the column chunk
-cudf::io::parquet::detail::Statistics const& get_statistics(
-  cudf::io::parquet::detail::ColumnChunk const& chunk)
-{
-  return chunk.meta_data.statistics;
-}
-
-// read page header from datasource at location indicated by page_loc,
-// parse and return as a PageHeader struct.
-// throws cudf::logic_error if the page_loc data is invalid.
-cudf::io::parquet::detail::PageHeader read_page_header(
-  std::unique_ptr<cudf::io::datasource> const& source,
-  cudf::io::parquet::detail::PageLocation const& page_loc)
-{
-  CUDF_EXPECTS(page_loc.offset > 0, "Cannot find page header");
-  CUDF_EXPECTS(page_loc.compressed_page_size > 0, "Invalid page header length");
-
-  cudf::io::parquet::detail::PageHeader page_hdr;
-  auto const page_buf = source->host_read(page_loc.offset, page_loc.compressed_page_size);
-  cudf::io::parquet::detail::CompactProtocolReader cp(page_buf->data(), page_buf->size());
-  cp.read(&page_hdr);
-  return page_hdr;
-}
-
-// Base test fixture for tests
-struct ParquetWriterTest : public cudf::test::BaseFixture {};
-
-// Base test fixture for tests
-struct ParquetReaderTest : public cudf::test::BaseFixture {};
-
-// Base test fixture for "stress" tests
-struct ParquetWriterStressTest : public cudf::test::BaseFixture {};
-
-// Typed test fixture for numeric type tests
-template <typename T>
-struct ParquetWriterNumericTypeTest : public ParquetWriterTest {
-  auto type() { return cudf::data_type{cudf::type_to_id<T>()}; }
-};
-
-// Typed test fixture for comparable type tests
-template <typename T>
-struct ParquetWriterComparableTypeTest : public ParquetWriterTest {
-  auto type() { return cudf::data_type{cudf::type_to_id<T>()}; }
-};
-
-// Typed test fixture for timestamp type tests
-template <typename T>
-struct ParquetWriterChronoTypeTest : public ParquetWriterTest {
-  auto type() { return cudf::data_type{cudf::type_to_id<T>()}; }
-};
-
-// Typed test fixture for timestamp type tests
-template <typename T>
-struct ParquetWriterTimestampTypeTest : public ParquetWriterTest {
-  auto type() { return cudf::data_type{cudf::type_to_id<T>()}; }
-};
-
-// Typed test fixture for all types
-template <typename T>
-struct ParquetWriterSchemaTest : public ParquetWriterTest {
-  auto type() { return cudf::data_type{cudf::type_to_id<T>()}; }
-};
-
-template <typename T>
-struct ParquetReaderSourceTest : public ParquetReaderTest {};
-
-template <typename T>
-struct ParquetWriterDeltaTest : public ParquetWriterTest {};
-
-// Declare typed test cases
-// TODO: Replace with `NumericTypes` when unsigned support is added. Issue #5352
-using SupportedTypes = cudf::test::Types<int8_t, int16_t, int32_t, int64_t, bool, float, double>;
-TYPED_TEST_SUITE(ParquetWriterNumericTypeTest, SupportedTypes);
-using ComparableAndFixedTypes =
-  cudf::test::Concat<cudf::test::ComparableTypes, cudf::test::FixedPointTypes>;
-TYPED_TEST_SUITE(ParquetWriterComparableTypeTest, ComparableAndFixedTypes);
-TYPED_TEST_SUITE(ParquetWriterChronoTypeTest, cudf::test::ChronoTypes);
-using SupportedTimestampTypes =
-  cudf::test::Types<cudf::timestamp_ms, cudf::timestamp_us, cudf::timestamp_ns>;
-TYPED_TEST_SUITE(ParquetWriterTimestampTypeTest, SupportedTimestampTypes);
-TYPED_TEST_SUITE(ParquetWriterSchemaTest, cudf::test::AllTypes);
-using ByteLikeTypes = cudf::test::Types<int8_t, char, uint8_t, unsigned char, std::byte>;
-TYPED_TEST_SUITE(ParquetReaderSourceTest, ByteLikeTypes);
-
-// Base test fixture for chunked writer tests
-struct ParquetChunkedWriterTest : public cudf::test::BaseFixture {};
-
-// Typed test fixture for numeric type tests
-template <typename T>
-struct ParquetChunkedWriterNumericTypeTest : public ParquetChunkedWriterTest {
-  auto type() { return cudf::data_type{cudf::type_to_id<T>()}; }
-};
-
-// Declare typed test cases
-TYPED_TEST_SUITE(ParquetChunkedWriterNumericTypeTest, SupportedTypes);
-
-// Base test fixture for size-parameterized tests
-class ParquetSizedTest : public ::cudf::test::BaseFixtureWithParam<int> {};
-
-// test the allowed bit widths for dictionary encoding
-INSTANTIATE_TEST_SUITE_P(ParquetDictionaryTest,
-                         ParquetSizedTest,
-                         testing::Range(1, 25),
-                         testing::PrintToStringParamName());
-
-// Base test fixture for V2 header tests
-class ParquetV2Test : public ::cudf::test::BaseFixtureWithParam<bool> {};
-INSTANTIATE_TEST_SUITE_P(ParquetV2ReadWriteTest,
-                         ParquetV2Test,
-                         testing::Bool(),
-                         testing::PrintToStringParamName());
-
-namespace {
-// Generates a vector of uniform random values of type T
-template <typename T>
-inline auto random_values(size_t size)
-{
-  std::vector<T> values(size);
-
-  using T1 = T;
-  using uniform_distribution =
-    typename std::conditional_t<std::is_same_v<T1, bool>,
-                                std::bernoulli_distribution,
-                                std::conditional_t<std::is_floating_point_v<T1>,
-                                                   std::uniform_real_distribution<T1>,
-                                                   std::uniform_int_distribution<T1>>>;
-
-  static constexpr auto seed = 0xf00d;
-  static std::mt19937 engine{seed};
-  static uniform_distribution dist{};
-  std::generate_n(values.begin(), size, [&]() { return T{dist(engine)}; });
-
-  return values;
-}
-
-}  // namespace
-
-TYPED_TEST(ParquetWriterNumericTypeTest, SingleColumn)
-{
-  auto sequence =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return TypeParam(i % 400); });
-  auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
-
-  constexpr auto num_rows = 800;
-  column_wrapper<TypeParam> col(sequence, sequence + num_rows, validity);
-
-  auto expected = table_view{{col}};
-
-  auto filepath = temp_env->get_temp_filepath("SingleColumn.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-}
-
-TYPED_TEST(ParquetWriterNumericTypeTest, SingleColumnWithNulls)
-{
-  auto sequence =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return TypeParam(i); });
-  auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i % 2); });
-
-  constexpr auto num_rows = 100;
-  column_wrapper<TypeParam> col(sequence, sequence + num_rows, validity);
-
-  auto expected = table_view{{col}};
-
-  auto filepath = temp_env->get_temp_filepath("SingleColumnWithNulls.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-}
-
-template <typename mask_op_t>
-void test_durations(mask_op_t mask_op)
-{
-  std::default_random_engine generator;
-  std::uniform_int_distribution<int> distribution_d(0, 30);
-  auto sequence_d = cudf::detail::make_counting_transform_iterator(
-    0, [&](auto i) { return distribution_d(generator); });
-
-  std::uniform_int_distribution<int> distribution_s(0, 86400);
-  auto sequence_s = cudf::detail::make_counting_transform_iterator(
-    0, [&](auto i) { return distribution_s(generator); });
-
-  std::uniform_int_distribution<int> distribution(0, 86400 * 1000);
-  auto sequence = cudf::detail::make_counting_transform_iterator(
-    0, [&](auto i) { return distribution(generator); });
-
-  auto mask = cudf::detail::make_counting_transform_iterator(0, mask_op);
-
-  constexpr auto num_rows = 100;
-  // Durations longer than a day are not exactly valid, but cudf should be able to round trip
-  auto durations_d = cudf::test::fixed_width_column_wrapper<cudf::duration_D, int64_t>(
-    sequence_d, sequence_d + num_rows, mask);
-  auto durations_s = cudf::test::fixed_width_column_wrapper<cudf::duration_s, int64_t>(
-    sequence_s, sequence_s + num_rows, mask);
-  auto durations_ms = cudf::test::fixed_width_column_wrapper<cudf::duration_ms, int64_t>(
-    sequence, sequence + num_rows, mask);
-  auto durations_us = cudf::test::fixed_width_column_wrapper<cudf::duration_us, int64_t>(
-    sequence, sequence + num_rows, mask);
-  auto durations_ns = cudf::test::fixed_width_column_wrapper<cudf::duration_ns, int64_t>(
-    sequence, sequence + num_rows, mask);
-
-  auto expected = table_view{{durations_d, durations_s, durations_ms, durations_us, durations_ns}};
-
-  auto filepath = temp_env->get_temp_filepath("Durations.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(in_opts);
-
-  auto durations_d_got =
-    cudf::cast(result.tbl->view().column(0), cudf::data_type{cudf::type_id::DURATION_DAYS});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(durations_d, durations_d_got->view());
-
-  auto durations_s_got =
-    cudf::cast(result.tbl->view().column(1), cudf::data_type{cudf::type_id::DURATION_SECONDS});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(durations_s, durations_s_got->view());
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(durations_ms, result.tbl->view().column(2));
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(durations_us, result.tbl->view().column(3));
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(durations_ns, result.tbl->view().column(4));
-}
-
-TEST_F(ParquetWriterTest, Durations)
-{
-  test_durations([](auto i) { return true; });
-  test_durations([](auto i) { return (i % 2) != 0; });
-  test_durations([](auto i) { return (i % 3) != 0; });
-  test_durations([](auto i) { return false; });
-}
-
-TYPED_TEST(ParquetWriterTimestampTypeTest, Timestamps)
-{
-  auto sequence = cudf::detail::make_counting_transform_iterator(
-    0, [](auto i) { return ((std::rand() / 10000) * 1000); });
-  auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
-
-  constexpr auto num_rows = 100;
-  column_wrapper<TypeParam, typename decltype(sequence)::value_type> col(
-    sequence, sequence + num_rows, validity);
-
-  auto expected = table_view{{col}};
-
-  auto filepath = temp_env->get_temp_filepath("Timestamps.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-      .timestamp_type(this->type());
-  auto result = cudf::io::read_parquet(in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-}
-
-TYPED_TEST(ParquetWriterTimestampTypeTest, TimestampsWithNulls)
-{
-  auto sequence = cudf::detail::make_counting_transform_iterator(
-    0, [](auto i) { return ((std::rand() / 10000) * 1000); });
-  auto validity =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i > 30) && (i < 60); });
-
-  constexpr auto num_rows = 100;
-  column_wrapper<TypeParam, typename decltype(sequence)::value_type> col(
-    sequence, sequence + num_rows, validity);
-
-  auto expected = table_view{{col}};
-
-  auto filepath = temp_env->get_temp_filepath("TimestampsWithNulls.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-      .timestamp_type(this->type());
-  auto result = cudf::io::read_parquet(in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-}
-
-TYPED_TEST(ParquetWriterTimestampTypeTest, TimestampOverflow)
-{
-  constexpr int64_t max = std::numeric_limits<int64_t>::max();
-  auto sequence = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return max - i; });
-  auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
-
-  constexpr auto num_rows = 100;
-  column_wrapper<TypeParam, typename decltype(sequence)::value_type> col(
-    sequence, sequence + num_rows, validity);
-  table_view expected({col});
-
-  auto filepath = temp_env->get_temp_filepath("ParquetTimestampOverflow.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-      .timestamp_type(this->type());
-  auto result = cudf::io::read_parquet(in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-}
-
-TEST_P(ParquetV2Test, MultiColumn)
-{
-  constexpr auto num_rows = 100000;
-  auto const is_v2        = GetParam();
-
-  // auto col0_data = random_values<bool>(num_rows);
-  auto col1_data = random_values<int8_t>(num_rows);
-  auto col2_data = random_values<int16_t>(num_rows);
-  auto col3_data = random_values<int32_t>(num_rows);
-  auto col4_data = random_values<float>(num_rows);
-  auto col5_data = random_values<double>(num_rows);
-  auto col6_vals = random_values<int16_t>(num_rows);
-  auto col7_vals = random_values<int32_t>(num_rows);
-  auto col8_vals = random_values<int64_t>(num_rows);
-  auto col6_data = cudf::detail::make_counting_transform_iterator(0, [col6_vals](auto i) {
-    return numeric::decimal32{col6_vals[i], numeric::scale_type{5}};
-  });
-  auto col7_data = cudf::detail::make_counting_transform_iterator(0, [col7_vals](auto i) {
-    return numeric::decimal64{col7_vals[i], numeric::scale_type{-5}};
-  });
-  auto col8_data = cudf::detail::make_counting_transform_iterator(0, [col8_vals](auto i) {
-    return numeric::decimal128{col8_vals[i], numeric::scale_type{-6}};
-  });
-  auto validity  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
-
-  // column_wrapper<bool> col0{
-  //    col0_data.begin(), col0_data.end(), validity};
-  column_wrapper<int8_t> col1{col1_data.begin(), col1_data.end(), validity};
-  column_wrapper<int16_t> col2{col2_data.begin(), col2_data.end(), validity};
-  column_wrapper<int32_t> col3{col3_data.begin(), col3_data.end(), validity};
-  column_wrapper<float> col4{col4_data.begin(), col4_data.end(), validity};
-  column_wrapper<double> col5{col5_data.begin(), col5_data.end(), validity};
-  column_wrapper<numeric::decimal32> col6{col6_data, col6_data + num_rows, validity};
-  column_wrapper<numeric::decimal64> col7{col7_data, col7_data + num_rows, validity};
-  column_wrapper<numeric::decimal128> col8{col8_data, col8_data + num_rows, validity};
-
-  auto expected = table_view{{col1, col2, col3, col4, col5, col6, col7, col8}};
-
-  cudf::io::table_input_metadata expected_metadata(expected);
-  // expected_metadata.column_metadata[0].set_name( "bools");
-  expected_metadata.column_metadata[0].set_name("int8s");
-  expected_metadata.column_metadata[1].set_name("int16s");
-  expected_metadata.column_metadata[2].set_name("int32s");
-  expected_metadata.column_metadata[3].set_name("floats");
-  expected_metadata.column_metadata[4].set_name("doubles");
-  expected_metadata.column_metadata[5].set_name("decimal32s").set_decimal_precision(10);
-  expected_metadata.column_metadata[6].set_name("decimal64s").set_decimal_precision(20);
-  expected_metadata.column_metadata[7].set_name("decimal128s").set_decimal_precision(40);
-
-  auto filepath = temp_env->get_temp_filepath("MultiColumn.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .write_v2_headers(is_v2)
-      .metadata(expected_metadata);
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
-}
-
-TEST_P(ParquetV2Test, MultiColumnWithNulls)
-{
-  constexpr auto num_rows = 100;
-  auto const is_v2        = GetParam();
-
-  // auto col0_data = random_values<bool>(num_rows);
-  auto col1_data = random_values<int8_t>(num_rows);
-  auto col2_data = random_values<int16_t>(num_rows);
-  auto col3_data = random_values<int32_t>(num_rows);
-  auto col4_data = random_values<float>(num_rows);
-  auto col5_data = random_values<double>(num_rows);
-  auto col6_vals = random_values<int32_t>(num_rows);
-  auto col7_vals = random_values<int64_t>(num_rows);
-  auto col6_data = cudf::detail::make_counting_transform_iterator(0, [col6_vals](auto i) {
-    return numeric::decimal32{col6_vals[i], numeric::scale_type{-2}};
-  });
-  auto col7_data = cudf::detail::make_counting_transform_iterator(0, [col7_vals](auto i) {
-    return numeric::decimal64{col7_vals[i], numeric::scale_type{-8}};
-  });
-  // auto col0_mask = cudf::detail::make_counting_transform_iterator(
-  //    0, [](auto i) { return (i % 2); });
-  auto col1_mask =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i < 10); });
-  auto col2_mask = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
-  auto col3_mask =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i == (num_rows - 1)); });
-  auto col4_mask =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i >= 40 && i <= 60); });
-  auto col5_mask =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i > 80); });
-  auto col6_mask =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i % 5); });
-  auto col7_mask =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i != 55); });
-
-  // column_wrapper<bool> col0{
-  //    col0_data.begin(), col0_data.end(), col0_mask};
-  column_wrapper<int8_t> col1{col1_data.begin(), col1_data.end(), col1_mask};
-  column_wrapper<int16_t> col2{col2_data.begin(), col2_data.end(), col2_mask};
-  column_wrapper<int32_t> col3{col3_data.begin(), col3_data.end(), col3_mask};
-  column_wrapper<float> col4{col4_data.begin(), col4_data.end(), col4_mask};
-  column_wrapper<double> col5{col5_data.begin(), col5_data.end(), col5_mask};
-  column_wrapper<numeric::decimal32> col6{col6_data, col6_data + num_rows, col6_mask};
-  column_wrapper<numeric::decimal64> col7{col7_data, col7_data + num_rows, col7_mask};
-
-  auto expected = table_view{{/*col0, */ col1, col2, col3, col4, col5, col6, col7}};
-
-  cudf::io::table_input_metadata expected_metadata(expected);
-  // expected_metadata.column_names.emplace_back("bools");
-  expected_metadata.column_metadata[0].set_name("int8s");
-  expected_metadata.column_metadata[1].set_name("int16s");
-  expected_metadata.column_metadata[2].set_name("int32s");
-  expected_metadata.column_metadata[3].set_name("floats");
-  expected_metadata.column_metadata[4].set_name("doubles");
-  expected_metadata.column_metadata[5].set_name("decimal32s").set_decimal_precision(9);
-  expected_metadata.column_metadata[6].set_name("decimal64s").set_decimal_precision(20);
-
-  auto filepath = temp_env->get_temp_filepath("MultiColumnWithNulls.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .write_v2_headers(is_v2)
-      .metadata(expected_metadata);
-
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-  // TODO: Need to be able to return metadata in tree form from reader so they can be compared.
-  // Unfortunately the closest thing to a hierarchical schema is column_name_info which does not
-  // have any tests for it c++ or python.
-  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
-}
-
-TEST_P(ParquetV2Test, Strings)
-{
-  auto const is_v2 = GetParam();
-
-  std::vector<char const*> strings{
-    "Monday", "Wȅdnȅsday", "Friday", "Monday", "Friday", "Friday", "Friday", "Funday"};
-  auto const num_rows = strings.size();
-
-  auto seq_col0 = random_values<int>(num_rows);
-  auto seq_col2 = random_values<float>(num_rows);
-  auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
-
-  column_wrapper<int> col0{seq_col0.begin(), seq_col0.end(), validity};
-  column_wrapper<cudf::string_view> col1{strings.begin(), strings.end()};
-  column_wrapper<float> col2{seq_col2.begin(), seq_col2.end(), validity};
-
-  auto expected = table_view{{col0, col1, col2}};
-
-  cudf::io::table_input_metadata expected_metadata(expected);
-  expected_metadata.column_metadata[0].set_name("col_other");
-  expected_metadata.column_metadata[1].set_name("col_string");
-  expected_metadata.column_metadata[2].set_name("col_another");
-
-  auto filepath = temp_env->get_temp_filepath("Strings.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .write_v2_headers(is_v2)
-      .metadata(expected_metadata);
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
-}
-
-TEST_P(ParquetV2Test, StringsAsBinary)
-{
-  auto const is_v2 = GetParam();
-  std::vector<char const*> unicode_strings{
-    "Monday", "Wȅdnȅsday", "Friday", "Monday", "Friday", "Friday", "Friday", "Funday"};
-  std::vector<char const*> ascii_strings{
-    "Monday", "Wednesday", "Friday", "Monday", "Friday", "Friday", "Friday", "Funday"};
-
-  column_wrapper<cudf::string_view> col0{ascii_strings.begin(), ascii_strings.end()};
-  column_wrapper<cudf::string_view> col1{unicode_strings.begin(), unicode_strings.end()};
-  column_wrapper<cudf::string_view> col2{ascii_strings.begin(), ascii_strings.end()};
-  cudf::test::lists_column_wrapper<uint8_t> col3{{'M', 'o', 'n', 'd', 'a', 'y'},
-                                                 {'W', 'e', 'd', 'n', 'e', 's', 'd', 'a', 'y'},
-                                                 {'F', 'r', 'i', 'd', 'a', 'y'},
-                                                 {'M', 'o', 'n', 'd', 'a', 'y'},
-                                                 {'F', 'r', 'i', 'd', 'a', 'y'},
-                                                 {'F', 'r', 'i', 'd', 'a', 'y'},
-                                                 {'F', 'r', 'i', 'd', 'a', 'y'},
-                                                 {'F', 'u', 'n', 'd', 'a', 'y'}};
-  cudf::test::lists_column_wrapper<uint8_t> col4{
-    {'M', 'o', 'n', 'd', 'a', 'y'},
-    {'W', 200, 133, 'd', 'n', 200, 133, 's', 'd', 'a', 'y'},
-    {'F', 'r', 'i', 'd', 'a', 'y'},
-    {'M', 'o', 'n', 'd', 'a', 'y'},
-    {'F', 'r', 'i', 'd', 'a', 'y'},
-    {'F', 'r', 'i', 'd', 'a', 'y'},
-    {'F', 'r', 'i', 'd', 'a', 'y'},
-    {'F', 'u', 'n', 'd', 'a', 'y'}};
-
-  auto write_tbl = table_view{{col0, col1, col2, col3, col4}};
-
-  cudf::io::table_input_metadata expected_metadata(write_tbl);
-  expected_metadata.column_metadata[0].set_name("col_single").set_output_as_binary(true);
-  expected_metadata.column_metadata[1].set_name("col_string").set_output_as_binary(true);
-  expected_metadata.column_metadata[2].set_name("col_another").set_output_as_binary(true);
-  expected_metadata.column_metadata[3].set_name("col_binary");
-  expected_metadata.column_metadata[4].set_name("col_binary2");
-
-  auto filepath = temp_env->get_temp_filepath("BinaryStrings.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, write_tbl)
-      .write_v2_headers(is_v2)
-      .dictionary_policy(cudf::io::dictionary_policy::NEVER)
-      .metadata(expected_metadata);
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-      .set_column_schema(
-        {cudf::io::reader_column_schema().set_convert_binary_to_strings(false),
-         cudf::io::reader_column_schema().set_convert_binary_to_strings(false),
-         cudf::io::reader_column_schema().set_convert_binary_to_strings(false),
-         cudf::io::reader_column_schema().add_child(cudf::io::reader_column_schema()),
-         cudf::io::reader_column_schema().add_child(cudf::io::reader_column_schema())});
-  auto result   = cudf::io::read_parquet(in_opts);
-  auto expected = table_view{{col3, col4, col3, col3, col4}};
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
-}
-
-TEST_P(ParquetV2Test, SlicedTable)
-{
-  // This test checks for writing zero copy, offsetted views into existing cudf tables
-
-  std::vector<char const*> strings{
-    "Monday", "Wȅdnȅsday", "Friday", "Monday", "Friday", "Friday", "Friday", "Funday"};
-  auto const num_rows = strings.size();
-  auto const is_v2    = GetParam();
-
-  auto seq_col0 = random_values<int>(num_rows);
-  auto seq_col2 = random_values<float>(num_rows);
-  auto validity =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 3 != 0; });
-
-  column_wrapper<int> col0{seq_col0.begin(), seq_col0.end(), validity};
-  column_wrapper<cudf::string_view> col1{strings.begin(), strings.end()};
-  column_wrapper<float> col2{seq_col2.begin(), seq_col2.end(), validity};
-
-  using lcw = cudf::test::lists_column_wrapper<uint64_t>;
-  lcw col3{{9, 8}, {7, 6, 5}, {}, {4}, {3, 2, 1, 0}, {20, 21, 22, 23, 24}, {}, {66, 666}};
-
-  // [[[NULL,2,NULL,4]], [[NULL,6,NULL], [8,9]]]
-  // [NULL, [[13],[14,15,16]],  NULL]
-  // [NULL, [], NULL, [[]]]
-  // NULL
-  // [[[NULL,2,NULL,4]], [[NULL,6,NULL], [8,9]]]
-  // [NULL, [[13],[14,15,16]],  NULL]
-  // [[[]]]
-  // [NULL, [], NULL, [[]]]
-  auto valids  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; });
-  auto valids2 = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; });
-  lcw col4{{
-             {{{{1, 2, 3, 4}, valids}}, {{{5, 6, 7}, valids}, {8, 9}}},
-             {{{{10, 11}, {12}}, {{13}, {14, 15, 16}}, {{17, 18}}}, valids},
-             {{lcw{lcw{}}, lcw{}, lcw{}, lcw{lcw{}}}, valids},
-             lcw{lcw{lcw{}}},
-             {{{{1, 2, 3, 4}, valids}}, {{{5, 6, 7}, valids}, {8, 9}}},
-             {{{{10, 11}, {12}}, {{13}, {14, 15, 16}}, {{17, 18}}}, valids},
-             lcw{lcw{lcw{}}},
-             {{lcw{lcw{}}, lcw{}, lcw{}, lcw{lcw{}}}, valids},
-           },
-           valids2};
-
-  // Struct column
-  auto ages_col = cudf::test::fixed_width_column_wrapper<int32_t>{
-    {48, 27, 25, 31, 351, 351, 29, 15}, {1, 1, 1, 1, 1, 0, 1, 1}};
-
-  auto col5 = cudf::test::structs_column_wrapper{{ages_col}, {1, 1, 1, 1, 0, 1, 1, 1}};
-
-  // Struct/List mixed column
-
-  // []
-  // [NULL, 2, NULL]
-  // [4, 5]
-  // NULL
-  // []
-  // [7, 8, 9]
-  // [10]
-  // [11, 12]
-  lcw land{{{}, {{1, 2, 3}, valids}, {4, 5}, {}, {}, {7, 8, 9}, {10}, {11, 12}}, valids2};
-
-  // []
-  // [[1, 2, 3], [], [4, 5], [], [0, 6, 0]]
-  // [[7, 8], []]
-  // [[]]
-  // [[]]
-  // [[], [], []]
-  // [[10]]
-  // [[13, 14], [15]]
-  lcw flats{lcw{},
-            {{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}},
-            {{7, 8}, {}},
-            lcw{lcw{}},
-            lcw{lcw{}},
-            lcw{lcw{}, lcw{}, lcw{}},
-            {lcw{10}},
-            {{13, 14}, {15}}};
-
-  auto struct_1 = cudf::test::structs_column_wrapper{land, flats};
-  auto is_human = cudf::test::fixed_width_column_wrapper<bool>{
-    {true, true, false, false, true, false, true, false}};
-  auto col6 = cudf::test::structs_column_wrapper{{is_human, struct_1}};
-
-  auto expected = table_view({col0, col1, col2, col3, col4, col5, col6});
-
-  // auto expected_slice = expected;
-  auto expected_slice = cudf::slice(expected, {2, static_cast<cudf::size_type>(num_rows) - 1});
-
-  cudf::io::table_input_metadata expected_metadata(expected_slice);
-  expected_metadata.column_metadata[0].set_name("col_other");
-  expected_metadata.column_metadata[1].set_name("col_string");
-  expected_metadata.column_metadata[2].set_name("col_another");
-  expected_metadata.column_metadata[3].set_name("col_list");
-  expected_metadata.column_metadata[4].set_name("col_multi_level_list");
-  expected_metadata.column_metadata[5].set_name("col_struct");
-  expected_metadata.column_metadata[5].set_name("col_struct_list");
-  expected_metadata.column_metadata[6].child(0).set_name("human?");
-  expected_metadata.column_metadata[6].child(1).set_name("particulars");
-  expected_metadata.column_metadata[6].child(1).child(0).set_name("land");
-  expected_metadata.column_metadata[6].child(1).child(1).set_name("flats");
-
-  auto filepath = temp_env->get_temp_filepath("SlicedTable.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected_slice)
-      .write_v2_headers(is_v2)
-      .metadata(expected_metadata);
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_slice, result.tbl->view());
-  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
-}
-
-TEST_P(ParquetV2Test, ListColumn)
-{
-  auto const is_v2 = GetParam();
-
-  auto valids  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; });
-  auto valids2 = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; });
-
-  using lcw = cudf::test::lists_column_wrapper<int32_t>;
-
-  // [NULL, 2, NULL]
-  // []
-  // [4, 5]
-  // NULL
-  lcw col0{{{{1, 2, 3}, valids}, {}, {4, 5}, {}}, valids2};
-
-  // [[1, 2, 3], [], [4, 5], [], [0, 6, 0]]
-  // [[7, 8]]
-  // []
-  // [[]]
-  lcw col1{{{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, {{7, 8}}, lcw{}, lcw{lcw{}}};
-
-  // [[1, 2, 3], [], [4, 5], NULL, [0, 6, 0]]
-  // [[7, 8]]
-  // []
-  // [[]]
-  lcw col2{{{{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, valids2}, {{7, 8}}, lcw{}, lcw{lcw{}}};
-
-  // [[1, 2, 3], [], [4, 5], NULL, [NULL, 6, NULL]]
-  // [[7, 8]]
-  // []
-  // [[]]
-  using dlcw = cudf::test::lists_column_wrapper<double>;
-  dlcw col3{{{{1., 2., 3.}, {}, {4., 5.}, {}, {{0., 6., 0.}, valids}}, valids2},
-            {{7., 8.}},
-            dlcw{},
-            dlcw{dlcw{}}};
-
-  // TODO: uint16_t lists are not read properly in parquet reader
-  // [[1, 2, 3], [], [4, 5], NULL, [0, 6, 0]]
-  // [[7, 8]]
-  // []
-  // NULL
-  // using ui16lcw = cudf::test::lists_column_wrapper<uint16_t>;
-  // cudf::test::lists_column_wrapper<uint16_t> col4{
-  //   {{{{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, valids2}, {{7, 8}}, ui16lcw{}, ui16lcw{ui16lcw{}}},
-  //   valids2};
-
-  // [[1, 2, 3], [], [4, 5], NULL, [NULL, 6, NULL]]
-  // [[7, 8]]
-  // []
-  // NULL
-  lcw col5{
-    {{{{1, 2, 3}, {}, {4, 5}, {}, {{0, 6, 0}, valids}}, valids2}, {{7, 8}}, lcw{}, lcw{lcw{}}},
-    valids2};
-
-  using strlcw = cudf::test::lists_column_wrapper<cudf::string_view>;
-  cudf::test::lists_column_wrapper<cudf::string_view> col6{
-    {{"Monday", "Monday", "Friday"}, {}, {"Monday", "Friday"}, {}, {"Sunday", "Funday"}},
-    {{"bee", "sting"}},
-    strlcw{},
-    strlcw{strlcw{}}};
-
-  // [[[NULL,2,NULL,4]], [[NULL,6,NULL], [8,9]]]
-  // [NULL, [[13],[14,15,16]],  NULL]
-  // [NULL, [], NULL, [[]]]
-  // NULL
-  lcw col7{{
-             {{{{1, 2, 3, 4}, valids}}, {{{5, 6, 7}, valids}, {8, 9}}},
-             {{{{10, 11}, {12}}, {{13}, {14, 15, 16}}, {{17, 18}}}, valids},
-             {{lcw{lcw{}}, lcw{}, lcw{}, lcw{lcw{}}}, valids},
-             lcw{lcw{lcw{}}},
-           },
-           valids2};
-
-  table_view expected({col0, col1, col2, col3, /* col4, */ col5, col6, col7});
-
-  cudf::io::table_input_metadata expected_metadata(expected);
-  expected_metadata.column_metadata[0].set_name("col_list_int_0");
-  expected_metadata.column_metadata[1].set_name("col_list_list_int_1");
-  expected_metadata.column_metadata[2].set_name("col_list_list_int_nullable_2");
-  expected_metadata.column_metadata[3].set_name("col_list_list_nullable_double_nullable_3");
-  // expected_metadata.column_metadata[0].set_name("col_list_list_uint16_4");
-  expected_metadata.column_metadata[4].set_name("col_list_nullable_list_nullable_int_nullable_5");
-  expected_metadata.column_metadata[5].set_name("col_list_list_string_6");
-  expected_metadata.column_metadata[6].set_name("col_list_list_list_7");
-
-  auto filepath = temp_env->get_temp_filepath("ListColumn.parquet");
-  auto out_opts = cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-                    .write_v2_headers(is_v2)
-                    .metadata(expected_metadata)
-                    .compression(cudf::io::compression_type::NONE);
-
-  cudf::io::write_parquet(out_opts);
-
-  auto in_opts = cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result  = cudf::io::read_parquet(in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
-}
-
-TEST_F(ParquetWriterTest, MultiIndex)
-{
-  constexpr auto num_rows = 100;
-
-  auto col0_data = random_values<int8_t>(num_rows);
-  auto col1_data = random_values<int16_t>(num_rows);
-  auto col2_data = random_values<int32_t>(num_rows);
-  auto col3_data = random_values<float>(num_rows);
-  auto col4_data = random_values<double>(num_rows);
-  auto validity  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
-
-  column_wrapper<int8_t> col0{col0_data.begin(), col0_data.end(), validity};
-  column_wrapper<int16_t> col1{col1_data.begin(), col1_data.end(), validity};
-  column_wrapper<int32_t> col2{col2_data.begin(), col2_data.end(), validity};
-  column_wrapper<float> col3{col3_data.begin(), col3_data.end(), validity};
-  column_wrapper<double> col4{col4_data.begin(), col4_data.end(), validity};
-
-  auto expected = table_view{{col0, col1, col2, col3, col4}};
-
-  cudf::io::table_input_metadata expected_metadata(expected);
-  expected_metadata.column_metadata[0].set_name("int8s");
-  expected_metadata.column_metadata[1].set_name("int16s");
-  expected_metadata.column_metadata[2].set_name("int32s");
-  expected_metadata.column_metadata[3].set_name("floats");
-  expected_metadata.column_metadata[4].set_name("doubles");
-
-  auto filepath = temp_env->get_temp_filepath("MultiIndex.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .metadata(expected_metadata)
-      .key_value_metadata(
-        {{{"pandas", "\"index_columns\": [\"int8s\", \"int16s\"], \"column1\": [\"int32s\"]"}}});
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-      .use_pandas_metadata(true)
-      .columns({"int32s", "floats", "doubles"});
-  auto result = cudf::io::read_parquet(in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
-}
-
-TEST_F(ParquetWriterTest, BufferSource)
-{
-  constexpr auto num_rows = 100 << 10;
-  auto const seq_col      = random_values<int>(num_rows);
-  auto const validity =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
-  column_wrapper<int> col{seq_col.begin(), seq_col.end(), validity};
-
-  auto const expected = table_view{{col}};
-
-  cudf::io::table_input_metadata expected_metadata(expected);
-  expected_metadata.column_metadata[0].set_name("col_other");
-
-  std::vector<char> out_buffer;
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer), expected)
-      .metadata(expected_metadata);
-  cudf::io::write_parquet(out_opts);
-
-  // host buffer
-  {
-    cudf::io::parquet_reader_options in_opts = cudf::io::parquet_reader_options::builder(
-      cudf::io::source_info(out_buffer.data(), out_buffer.size()));
-    auto const result = cudf::io::read_parquet(in_opts);
-
-    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-    cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
-  }
-
-  // device buffer
-  {
-    auto const d_input = cudf::detail::make_device_uvector_sync(
-      cudf::host_span<uint8_t const>{reinterpret_cast<uint8_t const*>(out_buffer.data()),
-                                     out_buffer.size()},
-      cudf::get_default_stream(),
-      rmm::mr::get_current_device_resource());
-    auto const d_buffer = cudf::device_span<std::byte const>(
-      reinterpret_cast<std::byte const*>(d_input.data()), d_input.size());
-    cudf::io::parquet_reader_options in_opts =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info(d_buffer));
-    auto const result = cudf::io::read_parquet(in_opts);
-
-    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-    cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
-  }
-}
-
-TEST_F(ParquetWriterTest, ManyFragments)
-{
-  srand(31337);
-  auto const expected = create_random_fixed_table<int>(10, 6'000'000, false);
-
-  auto const filepath = temp_env->get_temp_filepath("ManyFragments.parquet");
-  cudf::io::parquet_writer_options const args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected)
-      .max_page_size_bytes(8 * 1024);
-  cudf::io::write_parquet(args);
-
-  cudf::io::parquet_reader_options const read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto const result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
-}
-
-TEST_F(ParquetWriterTest, NonNullable)
-{
-  srand(31337);
-  auto expected = create_random_fixed_table<int>(9, 9, false);
-
-  auto filepath = temp_env->get_temp_filepath("NonNullable.parquet");
-  cudf::io::parquet_writer_options args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected);
-  cudf::io::write_parquet(args);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
-}
-
-TEST_F(ParquetWriterTest, Struct)
-{
-  // Struct<is_human:bool, Struct<names:string, ages:int>>
-
-  auto names = {"Samuel Vimes",
-                "Carrot Ironfoundersson",
-                "Angua von Uberwald",
-                "Cheery Littlebottom",
-                "Detritus",
-                "Mr Slant"};
-
-  // `Name` column has all valid values.
-  auto names_col = cudf::test::strings_column_wrapper{names.begin(), names.end()};
-
-  auto ages_col =
-    cudf::test::fixed_width_column_wrapper<int32_t>{{48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 0}};
-
-  auto struct_1 = cudf::test::structs_column_wrapper{{names_col, ages_col}, {1, 1, 1, 1, 0, 1}};
-
-  auto is_human_col = cudf::test::fixed_width_column_wrapper<bool>{
-    {true, true, false, false, false, false}, {1, 1, 0, 1, 1, 0}};
-
-  auto struct_2 =
-    cudf::test::structs_column_wrapper{{is_human_col, struct_1}, {0, 1, 1, 1, 1, 1}}.release();
-
-  auto expected = table_view({*struct_2});
-
-  auto filepath = temp_env->get_temp_filepath("Struct.parquet");
-  cudf::io::parquet_writer_options args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
-  cudf::io::write_parquet(args);
-
-  cudf::io::parquet_reader_options read_args =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath));
-  cudf::io::read_parquet(read_args);
-}
-
-TEST_P(ParquetV2Test, StructOfList)
-{
-  auto const is_v2 = GetParam();
-
-  // Struct<is_human:bool,
-  //        Struct<weight:float,
-  //               ages:int,
-  //               land_unit:List<int>>,
-  //               flats:List<List<int>>
-  //              >
-  //       >
-
-  auto weights_col = cudf::test::fixed_width_column_wrapper<float>{1.1, 2.4, 5.3, 8.0, 9.6, 6.9};
-
-  auto ages_col =
-    cudf::test::fixed_width_column_wrapper<int32_t>{{48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 0}};
-
-  auto valids  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; });
-  auto valids2 = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; });
-
-  using lcw = cudf::test::lists_column_wrapper<int32_t>;
-
-  // []
-  // [NULL, 2, NULL]
-  // [4, 5]
-  // NULL
-  // []
-  // [7, 8, 9]
-  lcw land_unit{{{}, {{1, 2, 3}, valids}, {4, 5}, {}, {}, {7, 8, 9}}, valids2};
-
-  // []
-  // [[1, 2, 3], [], [4, 5], [], [0, 6, 0]]
-  // [[7, 8], []]
-  // [[]]
-  // [[]]
-  // [[], [], []]
-  lcw flats{lcw{},
-            {{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}},
-            {{7, 8}, {}},
-            lcw{lcw{}},
-            lcw{lcw{}},
-            lcw{lcw{}, lcw{}, lcw{}}};
-
-  auto struct_1 = cudf::test::structs_column_wrapper{{weights_col, ages_col, land_unit, flats},
-                                                     {1, 1, 1, 1, 0, 1}};
-
-  auto is_human_col = cudf::test::fixed_width_column_wrapper<bool>{
-    {true, true, false, false, false, false}, {1, 1, 0, 1, 1, 0}};
-
-  auto struct_2 =
-    cudf::test::structs_column_wrapper{{is_human_col, struct_1}, {0, 1, 1, 1, 1, 1}}.release();
-
-  auto expected = table_view({*struct_2});
-
-  cudf::io::table_input_metadata expected_metadata(expected);
-  expected_metadata.column_metadata[0].set_name("being");
-  expected_metadata.column_metadata[0].child(0).set_name("human?");
-  expected_metadata.column_metadata[0].child(1).set_name("particulars");
-  expected_metadata.column_metadata[0].child(1).child(0).set_name("weight");
-  expected_metadata.column_metadata[0].child(1).child(1).set_name("age");
-  expected_metadata.column_metadata[0].child(1).child(2).set_name("land_unit");
-  expected_metadata.column_metadata[0].child(1).child(3).set_name("flats");
-
-  auto filepath = temp_env->get_temp_filepath("StructOfList.parquet");
-  cudf::io::parquet_writer_options args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .write_v2_headers(is_v2)
-      .metadata(expected_metadata);
-  cudf::io::write_parquet(args);
-
-  cudf::io::parquet_reader_options read_args =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath));
-  auto const result = cudf::io::read_parquet(read_args);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
-}
-
-TEST_P(ParquetV2Test, ListOfStruct)
-{
-  auto const is_v2 = GetParam();
-
-  // List<Struct<is_human:bool,
-  //             Struct<weight:float,
-  //                    ages:int,
-  //                   >
-  //            >
-  //     >
-
-  auto weight_col = cudf::test::fixed_width_column_wrapper<float>{1.1, 2.4, 5.3, 8.0, 9.6, 6.9};
-
-  auto ages_col =
-    cudf::test::fixed_width_column_wrapper<int32_t>{{48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 0}};
-
-  auto struct_1 = cudf::test::structs_column_wrapper{{weight_col, ages_col}, {1, 1, 1, 1, 0, 1}};
-
-  auto is_human_col = cudf::test::fixed_width_column_wrapper<bool>{
-    {true, true, false, false, false, false}, {1, 1, 0, 1, 1, 0}};
-
-  auto struct_2 =
-    cudf::test::structs_column_wrapper{{is_human_col, struct_1}, {0, 1, 1, 1, 1, 1}}.release();
-
-  auto list_offsets_column =
-    cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 2, 5, 5, 6}.release();
-  auto num_list_rows = list_offsets_column->size() - 1;
-
-  auto list_col = cudf::make_lists_column(
-    num_list_rows, std::move(list_offsets_column), std::move(struct_2), 0, {});
-
-  auto expected = table_view({*list_col});
-
-  cudf::io::table_input_metadata expected_metadata(expected);
-  expected_metadata.column_metadata[0].set_name("family");
-  expected_metadata.column_metadata[0].child(1).child(0).set_name("human?");
-  expected_metadata.column_metadata[0].child(1).child(1).set_name("particulars");
-  expected_metadata.column_metadata[0].child(1).child(1).child(0).set_name("weight");
-  expected_metadata.column_metadata[0].child(1).child(1).child(1).set_name("age");
-
-  auto filepath = temp_env->get_temp_filepath("ListOfStruct.parquet");
-  cudf::io::parquet_writer_options args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .write_v2_headers(is_v2)
-      .metadata(expected_metadata);
-  cudf::io::write_parquet(args);
-
-  cudf::io::parquet_reader_options read_args =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath));
-  auto const result = cudf::io::read_parquet(read_args);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
-}
-
-// custom data sink that supports device writes. uses plain file io.
-class custom_test_data_sink : public cudf::io::data_sink {
- public:
-  explicit custom_test_data_sink(std::string const& filepath)
-  {
-    outfile_.open(filepath, std::ios::out | std::ios::binary | std::ios::trunc);
-    CUDF_EXPECTS(outfile_.is_open(), "Cannot open output file");
-  }
-
-  virtual ~custom_test_data_sink() { flush(); }
-
-  void host_write(void const* data, size_t size) override
-  {
-    outfile_.write(static_cast<char const*>(data), size);
-  }
-
-  [[nodiscard]] bool supports_device_write() const override { return true; }
-
-  void device_write(void const* gpu_data, size_t size, rmm::cuda_stream_view stream) override
-  {
-    this->device_write_async(gpu_data, size, stream).get();
-  }
-
-  std::future<void> device_write_async(void const* gpu_data,
-                                       size_t size,
-                                       rmm::cuda_stream_view stream) override
-  {
-    return std::async(std::launch::deferred, [=] {
-      char* ptr = nullptr;
-      CUDF_CUDA_TRY(cudaMallocHost(&ptr, size));
-      CUDF_CUDA_TRY(cudaMemcpyAsync(ptr, gpu_data, size, cudaMemcpyDefault, stream.value()));
-      stream.synchronize();
-      outfile_.write(ptr, size);
-      CUDF_CUDA_TRY(cudaFreeHost(ptr));
-    });
-  }
-
-  void flush() override { outfile_.flush(); }
-
-  size_t bytes_written() override { return outfile_.tellp(); }
-
- private:
-  std::ofstream outfile_;
-};
-
-TEST_F(ParquetWriterTest, CustomDataSink)
-{
-  auto filepath = temp_env->get_temp_filepath("CustomDataSink.parquet");
-  custom_test_data_sink custom_sink(filepath);
-
-  srand(31337);
-  auto expected = create_random_fixed_table<int>(5, 10, false);
-
-  // write out using the custom sink
-  {
-    cudf::io::parquet_writer_options args =
-      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
-    cudf::io::write_parquet(args);
-  }
-
-  // write out using a memmapped sink
-  std::vector<char> buf_sink;
-  {
-    cudf::io::parquet_writer_options args =
-      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&buf_sink}, *expected);
-    cudf::io::write_parquet(args);
-  }
-
-  // read them back in and make sure everything matches
-
-  cudf::io::parquet_reader_options custom_args =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto custom_tbl = cudf::io::read_parquet(custom_args);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
-
-  cudf::io::parquet_reader_options buf_args = cudf::io::parquet_reader_options::builder(
-    cudf::io::source_info{buf_sink.data(), buf_sink.size()});
-  auto buf_tbl = cudf::io::read_parquet(buf_args);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(buf_tbl.tbl->view(), expected->view());
-}
-
-TEST_F(ParquetWriterTest, DeviceWriteLargeishFile)
-{
-  auto filepath = temp_env->get_temp_filepath("DeviceWriteLargeishFile.parquet");
-  custom_test_data_sink custom_sink(filepath);
-
-  // exercises multiple rowgroups
-  srand(31337);
-  auto expected = create_random_fixed_table<int>(4, 4 * 1024 * 1024, false);
-
-  // write out using the custom sink (which uses device writes)
-  cudf::io::parquet_writer_options args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
-  cudf::io::write_parquet(args);
-
-  cudf::io::parquet_reader_options custom_args =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto custom_tbl = cudf::io::read_parquet(custom_args);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
-}
-
-TEST_F(ParquetWriterTest, PartitionedWrite)
-{
-  auto source = create_compressible_fixed_table<int>(16, 4 * 1024 * 1024, 1000, false);
-
-  auto filepath1 = temp_env->get_temp_filepath("PartitionedWrite1.parquet");
-  auto filepath2 = temp_env->get_temp_filepath("PartitionedWrite2.parquet");
-
-  auto partition1 = cudf::io::partition_info{10, 1024 * 1024};
-  auto partition2 = cudf::io::partition_info{20 * 1024 + 7, 3 * 1024 * 1024};
-
-  auto expected1 =
-    cudf::slice(*source, {partition1.start_row, partition1.start_row + partition1.num_rows});
-  auto expected2 =
-    cudf::slice(*source, {partition2.start_row, partition2.start_row + partition2.num_rows});
-
-  cudf::io::parquet_writer_options args =
-    cudf::io::parquet_writer_options::builder(
-      cudf::io::sink_info(std::vector<std::string>{filepath1, filepath2}), *source)
-      .partitions({partition1, partition2})
-      .compression(cudf::io::compression_type::NONE);
-  cudf::io::write_parquet(args);
-
-  auto result1 = cudf::io::read_parquet(
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath1)));
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected1, result1.tbl->view());
-
-  auto result2 = cudf::io::read_parquet(
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath2)));
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected2, result2.tbl->view());
-}
-
-TEST_P(ParquetV2Test, PartitionedWriteEmptyPartitions)
-{
-  auto const is_v2 = GetParam();
-
-  auto source = create_random_fixed_table<int>(4, 4, false);
-
-  auto filepath1 = temp_env->get_temp_filepath("PartitionedWrite1.parquet");
-  auto filepath2 = temp_env->get_temp_filepath("PartitionedWrite2.parquet");
-
-  auto partition1 = cudf::io::partition_info{1, 0};
-  auto partition2 = cudf::io::partition_info{1, 0};
-
-  auto expected1 =
-    cudf::slice(*source, {partition1.start_row, partition1.start_row + partition1.num_rows});
-  auto expected2 =
-    cudf::slice(*source, {partition2.start_row, partition2.start_row + partition2.num_rows});
-
-  cudf::io::parquet_writer_options args =
-    cudf::io::parquet_writer_options::builder(
-      cudf::io::sink_info(std::vector<std::string>{filepath1, filepath2}), *source)
-      .partitions({partition1, partition2})
-      .write_v2_headers(is_v2)
-      .compression(cudf::io::compression_type::NONE);
-  cudf::io::write_parquet(args);
-
-  auto result1 = cudf::io::read_parquet(
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath1)));
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected1, result1.tbl->view());
-
-  auto result2 = cudf::io::read_parquet(
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath2)));
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected2, result2.tbl->view());
-}
-
-TEST_P(ParquetV2Test, PartitionedWriteEmptyColumns)
-{
-  auto const is_v2 = GetParam();
-
-  auto source = create_random_fixed_table<int>(0, 4, false);
-
-  auto filepath1 = temp_env->get_temp_filepath("PartitionedWrite1.parquet");
-  auto filepath2 = temp_env->get_temp_filepath("PartitionedWrite2.parquet");
-
-  auto partition1 = cudf::io::partition_info{1, 0};
-  auto partition2 = cudf::io::partition_info{1, 0};
-
-  auto expected1 =
-    cudf::slice(*source, {partition1.start_row, partition1.start_row + partition1.num_rows});
-  auto expected2 =
-    cudf::slice(*source, {partition2.start_row, partition2.start_row + partition2.num_rows});
-
-  cudf::io::parquet_writer_options args =
-    cudf::io::parquet_writer_options::builder(
-      cudf::io::sink_info(std::vector<std::string>{filepath1, filepath2}), *source)
-      .partitions({partition1, partition2})
-      .write_v2_headers(is_v2)
-      .compression(cudf::io::compression_type::NONE);
-  cudf::io::write_parquet(args);
-
-  auto result1 = cudf::io::read_parquet(
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath1)));
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected1, result1.tbl->view());
-
-  auto result2 = cudf::io::read_parquet(
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath2)));
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected2, result2.tbl->view());
-}
-
-template <typename T>
-std::string create_parquet_file(int num_cols)
-{
-  srand(31337);
-  auto const table = create_random_fixed_table<T>(num_cols, 10, true);
-  auto const filepath =
-    temp_env->get_temp_filepath(typeid(T).name() + std::to_string(num_cols) + ".parquet");
-  cudf::io::parquet_writer_options const out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, table->view());
-  cudf::io::write_parquet(out_opts);
-  return filepath;
-}
-
-TEST_F(ParquetWriterTest, MultipleMismatchedSources)
-{
-  auto const int5file = create_parquet_file<int>(5);
-  {
-    auto const float5file = create_parquet_file<float>(5);
-    std::vector<std::string> files{int5file, float5file};
-    cudf::io::parquet_reader_options const read_opts =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{files});
-    EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error);
-  }
-  {
-    auto const int10file = create_parquet_file<int>(10);
-    std::vector<std::string> files{int5file, int10file};
-    cudf::io::parquet_reader_options const read_opts =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{files});
-    EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error);
-  }
-}
-
-TEST_F(ParquetWriterTest, Slice)
-{
-  auto col =
-    cudf::test::fixed_width_column_wrapper<int>{{1, 2, 3, 4, 5}, {true, true, true, false, true}};
-  std::vector<cudf::size_type> indices{2, 5};
-  std::vector<cudf::column_view> result = cudf::slice(col, indices);
-  cudf::table_view tbl{result};
-
-  auto filepath = temp_env->get_temp_filepath("Slice.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl);
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto read_table = cudf::io::read_parquet(in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(read_table.tbl->view(), tbl);
-}
-
-TEST_F(ParquetChunkedWriterTest, SingleTable)
-{
-  srand(31337);
-  auto table1 = create_random_fixed_table<int>(5, 5, true);
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedSingle.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  cudf::io::parquet_chunked_writer(args).write(*table1);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *table1);
-}
-
-TEST_F(ParquetChunkedWriterTest, SimpleTable)
-{
-  srand(31337);
-  auto table1 = create_random_fixed_table<int>(5, 5, true);
-  auto table2 = create_random_fixed_table<int>(5, 5, true);
-
-  auto full_table = cudf::concatenate(std::vector<table_view>({*table1, *table2}));
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedSimple.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  cudf::io::parquet_chunked_writer(args).write(*table1).write(*table2);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
-}
-
-TEST_F(ParquetChunkedWriterTest, LargeTables)
-{
-  srand(31337);
-  auto table1 = create_random_fixed_table<int>(512, 4096, true);
-  auto table2 = create_random_fixed_table<int>(512, 8192, true);
-
-  auto full_table = cudf::concatenate(std::vector<table_view>({*table1, *table2}));
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedLarge.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  auto md = cudf::io::parquet_chunked_writer(args).write(*table1).write(*table2).close();
-  ASSERT_EQ(md, nullptr);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
-}
-
-TEST_F(ParquetChunkedWriterTest, ManyTables)
-{
-  srand(31337);
-  std::vector<std::unique_ptr<table>> tables;
-  std::vector<table_view> table_views;
-  constexpr int num_tables = 96;
-  for (int idx = 0; idx < num_tables; idx++) {
-    auto tbl = create_random_fixed_table<int>(16, 64, true);
-    table_views.push_back(*tbl);
-    tables.push_back(std::move(tbl));
-  }
-
-  auto expected = cudf::concatenate(table_views);
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedManyTables.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  cudf::io::parquet_chunked_writer writer(args);
-  std::for_each(table_views.begin(), table_views.end(), [&writer](table_view const& tbl) {
-    writer.write(tbl);
-  });
-  auto md = writer.close({"dummy/path"});
-  ASSERT_NE(md, nullptr);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
-}
-
-TEST_F(ParquetChunkedWriterTest, Strings)
-{
-  std::vector<std::unique_ptr<cudf::column>> cols;
-
-  bool mask1[] = {true, true, false, true, true, true, true};
-  std::vector<char const*> h_strings1{"four", "score", "and", "seven", "years", "ago", "abcdefgh"};
-  cudf::test::strings_column_wrapper strings1(h_strings1.begin(), h_strings1.end(), mask1);
-  cols.push_back(strings1.release());
-  cudf::table tbl1(std::move(cols));
-
-  bool mask2[] = {false, true, true, true, true, true, true};
-  std::vector<char const*> h_strings2{"ooooo", "ppppppp", "fff", "j", "cccc", "bbb", "zzzzzzzzzzz"};
-  cudf::test::strings_column_wrapper strings2(h_strings2.begin(), h_strings2.end(), mask2);
-  cols.push_back(strings2.release());
-  cudf::table tbl2(std::move(cols));
-
-  auto expected = cudf::concatenate(std::vector<table_view>({tbl1, tbl2}));
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedStrings.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  cudf::io::parquet_chunked_writer(args).write(tbl1).write(tbl2);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
-}
-
-TEST_F(ParquetChunkedWriterTest, ListColumn)
-{
-  auto valids  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; });
-  auto valids2 = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; });
-
-  using lcw = cudf::test::lists_column_wrapper<int32_t>;
-
-  // COL0 (Same nullability) ====================
-  // [NULL, 2, NULL]
-  // []
-  // [4, 5]
-  // NULL
-  lcw col0_tbl0{{{{1, 2, 3}, valids}, {}, {4, 5}, {}}, valids2};
-
-  // [7, 8, 9]
-  // []
-  // [NULL, 11]
-  // NULL
-  lcw col0_tbl1{{{7, 8, 9}, {}, {{10, 11}, valids}, {}}, valids2};
-
-  // COL1 (Nullability different in different chunks, test of merging nullability in writer)
-  // [NULL, 2, NULL]
-  // []
-  // [4, 5]
-  // []
-  lcw col1_tbl0{{{1, 2, 3}, valids}, {}, {4, 5}, {}};
-
-  // [7, 8, 9]
-  // []
-  // [10, 11]
-  // NULL
-  lcw col1_tbl1{{{7, 8, 9}, {}, {10, 11}, {}}, valids2};
-
-  // COL2 (non-nested columns to test proper schema construction)
-  size_t num_rows_tbl0 = static_cast<cudf::column_view>(col0_tbl0).size();
-  size_t num_rows_tbl1 = static_cast<cudf::column_view>(col0_tbl1).size();
-  auto seq_col0        = random_values<int>(num_rows_tbl0);
-  auto seq_col1        = random_values<int>(num_rows_tbl1);
-
-  column_wrapper<int> col2_tbl0{seq_col0.begin(), seq_col0.end(), valids};
-  column_wrapper<int> col2_tbl1{seq_col1.begin(), seq_col1.end(), valids2};
-
-  auto tbl0 = table_view({col0_tbl0, col1_tbl0, col2_tbl0});
-  auto tbl1 = table_view({col0_tbl1, col1_tbl1, col2_tbl1});
-
-  auto expected = cudf::concatenate(std::vector<table_view>({tbl0, tbl1}));
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedLists.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  cudf::io::parquet_chunked_writer(args).write(tbl0).write(tbl1);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
-}
-
-TEST_F(ParquetChunkedWriterTest, ListOfStruct)
-{
-  // Table 1
-  auto weight_1   = cudf::test::fixed_width_column_wrapper<float>{{57.5, 51.1, 15.3}};
-  auto ages_1     = cudf::test::fixed_width_column_wrapper<int32_t>{{30, 27, 5}};
-  auto struct_1_1 = cudf::test::structs_column_wrapper{weight_1, ages_1};
-  auto is_human_1 = cudf::test::fixed_width_column_wrapper<bool>{{true, true, false}};
-  auto struct_2_1 = cudf::test::structs_column_wrapper{{is_human_1, struct_1_1}};
-
-  auto list_offsets_column_1 =
-    cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 2, 3, 3}.release();
-  auto num_list_rows_1 = list_offsets_column_1->size() - 1;
-
-  auto list_col_1 = cudf::make_lists_column(
-    num_list_rows_1, std::move(list_offsets_column_1), struct_2_1.release(), 0, {});
-
-  auto table_1 = table_view({*list_col_1});
-
-  // Table 2
-  auto weight_2   = cudf::test::fixed_width_column_wrapper<float>{{1.1, -1.0, -1.0}};
-  auto ages_2     = cudf::test::fixed_width_column_wrapper<int32_t>{{31, 351, 351}, {1, 1, 0}};
-  auto struct_1_2 = cudf::test::structs_column_wrapper{{weight_2, ages_2}, {1, 0, 1}};
-  auto is_human_2 = cudf::test::fixed_width_column_wrapper<bool>{{false, false, false}, {1, 1, 0}};
-  auto struct_2_2 = cudf::test::structs_column_wrapper{{is_human_2, struct_1_2}};
-
-  auto list_offsets_column_2 =
-    cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 1, 2, 3}.release();
-  auto num_list_rows_2 = list_offsets_column_2->size() - 1;
-
-  auto list_col_2 = cudf::make_lists_column(
-    num_list_rows_2, std::move(list_offsets_column_2), struct_2_2.release(), 0, {});
-
-  auto table_2 = table_view({*list_col_2});
-
-  auto full_table = cudf::concatenate(std::vector<table_view>({table_1, table_2}));
-
-  cudf::io::table_input_metadata expected_metadata(table_1);
-  expected_metadata.column_metadata[0].set_name("family");
-  expected_metadata.column_metadata[0].child(1).set_nullability(false);
-  expected_metadata.column_metadata[0].child(1).child(0).set_name("human?");
-  expected_metadata.column_metadata[0].child(1).child(1).set_name("particulars");
-  expected_metadata.column_metadata[0].child(1).child(1).child(0).set_name("weight");
-  expected_metadata.column_metadata[0].child(1).child(1).child(1).set_name("age");
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedListOfStruct.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  args.set_metadata(expected_metadata);
-  cudf::io::parquet_chunked_writer(args).write(table_1).write(table_2);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *full_table);
-  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
-}
-
-TEST_F(ParquetChunkedWriterTest, ListOfStructOfStructOfListOfList)
-{
-  auto valids  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; });
-  auto valids2 = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; });
-
-  using lcw = cudf::test::lists_column_wrapper<int32_t>;
-
-  // Table 1 ===========================
-
-  // []
-  // [NULL, 2, NULL]
-  // [4, 5]
-  // NULL
-  lcw land_1{{{}, {{1, 2, 3}, valids}, {4, 5}, {}}, valids2};
-
-  // []
-  // [[1, 2, 3], [], [4, 5], [], [0, 6, 0]]
-  // [[7, 8], []]
-  // [[]]
-  lcw flats_1{lcw{}, {{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, {{7, 8}, {}}, lcw{lcw{}}};
-
-  auto weight_1   = cudf::test::fixed_width_column_wrapper<float>{{57.5, 51.1, 15.3, 1.1}};
-  auto ages_1     = cudf::test::fixed_width_column_wrapper<int32_t>{{30, 27, 5, 31}};
-  auto struct_1_1 = cudf::test::structs_column_wrapper{weight_1, ages_1, land_1, flats_1};
-  auto is_human_1 = cudf::test::fixed_width_column_wrapper<bool>{{true, true, false, false}};
-  auto struct_2_1 = cudf::test::structs_column_wrapper{{is_human_1, struct_1_1}};
-
-  auto list_offsets_column_1 =
-    cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 2, 3, 4}.release();
-  auto num_list_rows_1 = list_offsets_column_1->size() - 1;
-
-  auto list_col_1 = cudf::make_lists_column(
-    num_list_rows_1, std::move(list_offsets_column_1), struct_2_1.release(), 0, {});
-
-  auto table_1 = table_view({*list_col_1});
-
-  // Table 2 ===========================
-
-  // []
-  // [7, 8, 9]
-  lcw land_2{{}, {7, 8, 9}};
-
-  // [[]]
-  // [[], [], []]
-  lcw flats_2{lcw{lcw{}}, lcw{lcw{}, lcw{}, lcw{}}};
-
-  auto weight_2   = cudf::test::fixed_width_column_wrapper<float>{{-1.0, -1.0}};
-  auto ages_2     = cudf::test::fixed_width_column_wrapper<int32_t>{{351, 351}, {1, 0}};
-  auto struct_1_2 = cudf::test::structs_column_wrapper{{weight_2, ages_2, land_2, flats_2}, {0, 1}};
-  auto is_human_2 = cudf::test::fixed_width_column_wrapper<bool>{{false, false}, {1, 0}};
-  auto struct_2_2 = cudf::test::structs_column_wrapper{{is_human_2, struct_1_2}};
-
-  auto list_offsets_column_2 =
-    cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 1, 2}.release();
-  auto num_list_rows_2 = list_offsets_column_2->size() - 1;
-
-  auto list_col_2 = cudf::make_lists_column(
-    num_list_rows_2, std::move(list_offsets_column_2), struct_2_2.release(), 0, {});
-
-  auto table_2 = table_view({*list_col_2});
-
-  auto full_table = cudf::concatenate(std::vector<table_view>({table_1, table_2}));
-
-  cudf::io::table_input_metadata expected_metadata(table_1);
-  expected_metadata.column_metadata[0].set_name("family");
-  expected_metadata.column_metadata[0].child(1).set_nullability(false);
-  expected_metadata.column_metadata[0].child(1).child(0).set_name("human?");
-  expected_metadata.column_metadata[0].child(1).child(1).set_name("particulars");
-  expected_metadata.column_metadata[0].child(1).child(1).child(0).set_name("weight");
-  expected_metadata.column_metadata[0].child(1).child(1).child(1).set_name("age");
-  expected_metadata.column_metadata[0].child(1).child(1).child(2).set_name("land_unit");
-  expected_metadata.column_metadata[0].child(1).child(1).child(3).set_name("flats");
-
-  auto filepath = temp_env->get_temp_filepath("ListOfStructOfStructOfListOfList.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  args.set_metadata(expected_metadata);
-  cudf::io::parquet_chunked_writer(args).write(table_1).write(table_2);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *full_table);
-  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
-
-  // We specifically mentioned in input schema that struct_2 is non-nullable across chunked calls.
-  auto result_parent_list = result.tbl->get_column(0);
-  auto result_struct_2    = result_parent_list.child(cudf::lists_column_view::child_column_index);
-  EXPECT_EQ(result_struct_2.nullable(), false);
-}
-
-TEST_F(ParquetChunkedWriterTest, MismatchedTypes)
-{
-  srand(31337);
-  auto table1 = create_random_fixed_table<int>(4, 4, true);
-  auto table2 = create_random_fixed_table<float>(4, 4, true);
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedMismatchedTypes.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  cudf::io::parquet_chunked_writer writer(args);
-  writer.write(*table1);
-  EXPECT_THROW(writer.write(*table2), cudf::logic_error);
-  writer.close();
-}
-
-TEST_F(ParquetChunkedWriterTest, ChunkedWriteAfterClosing)
-{
-  srand(31337);
-  auto table = create_random_fixed_table<int>(4, 4, true);
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedWriteAfterClosing.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  cudf::io::parquet_chunked_writer writer(args);
-  writer.write(*table).close();
-  EXPECT_THROW(writer.write(*table), cudf::logic_error);
-}
-
-TEST_F(ParquetChunkedWriterTest, ReadingUnclosedFile)
-{
-  srand(31337);
-  auto table = create_random_fixed_table<int>(4, 4, true);
-
-  auto filepath = temp_env->get_temp_filepath("ReadingUnclosedFile.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  cudf::io::parquet_chunked_writer writer(args);
-  writer.write(*table);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error);
-}
-
-TEST_F(ParquetChunkedWriterTest, MismatchedStructure)
-{
-  srand(31337);
-  auto table1 = create_random_fixed_table<int>(4, 4, true);
-  auto table2 = create_random_fixed_table<float>(3, 4, true);
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedMismatchedStructure.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  cudf::io::parquet_chunked_writer writer(args);
-  writer.write(*table1);
-  EXPECT_THROW(writer.write(*table2), cudf::logic_error);
-  writer.close();
-}
-
-TEST_F(ParquetChunkedWriterTest, MismatchedStructureList)
-{
-  auto valids  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; });
-  auto valids2 = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; });
-
-  using lcw = cudf::test::lists_column_wrapper<int32_t>;
-
-  // COL0 (mismatched depth) ====================
-  // [NULL, 2, NULL]
-  // []
-  // [4, 5]
-  // NULL
-  lcw col00{{{{1, 2, 3}, valids}, {}, {4, 5}, {}}, valids2};
-
-  // [[1, 2, 3], [], [4, 5], [], [0, 6, 0]]
-  // [[7, 8]]
-  // []
-  // [[]]
-  lcw col01{{{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, {{7, 8}}, lcw{}, lcw{lcw{}}};
-
-  // COL2 (non-nested columns to test proper schema construction)
-  size_t num_rows = static_cast<cudf::column_view>(col00).size();
-  auto seq_col0   = random_values<int>(num_rows);
-  auto seq_col1   = random_values<int>(num_rows);
-
-  column_wrapper<int> col10{seq_col0.begin(), seq_col0.end(), valids};
-  column_wrapper<int> col11{seq_col1.begin(), seq_col1.end(), valids2};
-
-  auto tbl0 = table_view({col00, col10});
-  auto tbl1 = table_view({col01, col11});
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedLists.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  cudf::io::parquet_chunked_writer writer(args);
-  writer.write(tbl0);
-  EXPECT_THROW(writer.write(tbl1), cudf::logic_error);
-}
-
-TEST_F(ParquetChunkedWriterTest, DifferentNullability)
-{
-  srand(31337);
-  auto table1 = create_random_fixed_table<int>(5, 5, true);
-  auto table2 = create_random_fixed_table<int>(5, 5, false);
-
-  auto full_table = cudf::concatenate(std::vector<table_view>({*table1, *table2}));
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedNullable.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  cudf::io::parquet_chunked_writer(args).write(*table1).write(*table2);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
-}
-
-TEST_F(ParquetChunkedWriterTest, DifferentNullabilityStruct)
-{
-  // Struct<is_human:bool (non-nullable),
-  //        Struct<weight:float>,
-  //               age:int
-  //              > (nullable)
-  //       > (non-nullable)
-
-  // Table 1: is_human and struct_1 are non-nullable but should be nullable when read back.
-  auto weight_1   = cudf::test::fixed_width_column_wrapper<float>{{57.5, 51.1, 15.3}};
-  auto ages_1     = cudf::test::fixed_width_column_wrapper<int32_t>{{30, 27, 5}};
-  auto struct_1_1 = cudf::test::structs_column_wrapper{weight_1, ages_1};
-  auto is_human_1 = cudf::test::fixed_width_column_wrapper<bool>{{true, true, false}};
-  auto struct_2_1 = cudf::test::structs_column_wrapper{{is_human_1, struct_1_1}};
-  auto table_1    = cudf::table_view({struct_2_1});
-
-  // Table 2: struct_1 and is_human are nullable now so if we hadn't assumed worst case (nullable)
-  // when writing table_1, we would have wrong pages for it.
-  auto weight_2   = cudf::test::fixed_width_column_wrapper<float>{{1.1, -1.0, -1.0}};
-  auto ages_2     = cudf::test::fixed_width_column_wrapper<int32_t>{{31, 351, 351}, {1, 1, 0}};
-  auto struct_1_2 = cudf::test::structs_column_wrapper{{weight_2, ages_2}, {1, 0, 1}};
-  auto is_human_2 = cudf::test::fixed_width_column_wrapper<bool>{{false, false, false}, {1, 1, 0}};
-  auto struct_2_2 = cudf::test::structs_column_wrapper{{is_human_2, struct_1_2}};
-  auto table_2    = cudf::table_view({struct_2_2});
-
-  auto full_table = cudf::concatenate(std::vector<table_view>({table_1, table_2}));
-
-  cudf::io::table_input_metadata expected_metadata(table_1);
-  expected_metadata.column_metadata[0].set_name("being");
-  expected_metadata.column_metadata[0].child(0).set_name("human?");
-  expected_metadata.column_metadata[0].child(1).set_name("particulars");
-  expected_metadata.column_metadata[0].child(1).child(0).set_name("weight");
-  expected_metadata.column_metadata[0].child(1).child(1).set_name("age");
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedNullableStruct.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  args.set_metadata(expected_metadata);
-  cudf::io::parquet_chunked_writer(args).write(table_1).write(table_2);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *full_table);
-  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
-}
-
-TEST_F(ParquetChunkedWriterTest, ForcedNullability)
-{
-  srand(31337);
-  auto table1 = create_random_fixed_table<int>(5, 5, false);
-  auto table2 = create_random_fixed_table<int>(5, 5, false);
-
-  auto full_table = cudf::concatenate(std::vector<table_view>({*table1, *table2}));
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedNoNullable.parquet");
-
-  cudf::io::table_input_metadata metadata(*table1);
-
-  // In the absence of prescribed per-column nullability in metadata, the writer assumes the worst
-  // and considers all columns nullable. However cudf::concatenate will not force nulls in case no
-  // columns are nullable. To get the expected result, we tell the writer the nullability of all
-  // columns in advance.
-  for (auto& col_meta : metadata.column_metadata) {
-    col_meta.set_nullability(false);
-  }
-
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath})
-      .metadata(std::move(metadata));
-  cudf::io::parquet_chunked_writer(args).write(*table1).write(*table2);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
-}
-
-TEST_F(ParquetChunkedWriterTest, ForcedNullabilityList)
-{
-  srand(31337);
-
-  auto valids  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; });
-  auto valids2 = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; });
-
-  using lcw = cudf::test::lists_column_wrapper<int32_t>;
-
-  // COL0 ====================
-  // [1, 2, 3]
-  // []
-  // [4, 5]
-  // NULL
-  lcw col00{{{1, 2, 3}, {}, {4, 5}, {}}, valids2};
-
-  // [7]
-  // []
-  // [8, 9, 10, 11]
-  // NULL
-  lcw col01{{{7}, {}, {8, 9, 10, 11}, {}}, valids2};
-
-  // COL1 (non-nested columns to test proper schema construction)
-  size_t num_rows = static_cast<cudf::column_view>(col00).size();
-  auto seq_col0   = random_values<int>(num_rows);
-  auto seq_col1   = random_values<int>(num_rows);
-
-  column_wrapper<int> col10{seq_col0.begin(), seq_col0.end(), valids};
-  column_wrapper<int> col11{seq_col1.begin(), seq_col1.end(), valids2};
-
-  auto table1 = table_view({col00, col10});
-  auto table2 = table_view({col01, col11});
-
-  auto full_table = cudf::concatenate(std::vector<table_view>({table1, table2}));
-
-  cudf::io::table_input_metadata metadata(table1);
-  metadata.column_metadata[0].set_nullability(true);  // List is nullable at first (root) level
-  metadata.column_metadata[0].child(1).set_nullability(
-    false);  // non-nullable at second (leaf) level
-  metadata.column_metadata[1].set_nullability(true);
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedListNullable.parquet");
-
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath})
-      .metadata(std::move(metadata));
-  cudf::io::parquet_chunked_writer(args).write(table1).write(table2);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
-}
-
-TEST_F(ParquetChunkedWriterTest, ForcedNullabilityStruct)
-{
-  // Struct<is_human:bool (non-nullable),
-  //        Struct<weight:float>,
-  //               age:int
-  //              > (nullable)
-  //       > (non-nullable)
-
-  // Table 1: is_human and struct_2 are non-nullable and should stay that way when read back.
-  auto weight_1   = cudf::test::fixed_width_column_wrapper<float>{{57.5, 51.1, 15.3}};
-  auto ages_1     = cudf::test::fixed_width_column_wrapper<int32_t>{{30, 27, 5}};
-  auto struct_1_1 = cudf::test::structs_column_wrapper{weight_1, ages_1};
-  auto is_human_1 = cudf::test::fixed_width_column_wrapper<bool>{{true, true, false}};
-  auto struct_2_1 = cudf::test::structs_column_wrapper{{is_human_1, struct_1_1}};
-  auto table_1    = cudf::table_view({struct_2_1});
-
-  auto weight_2   = cudf::test::fixed_width_column_wrapper<float>{{1.1, -1.0, -1.0}};
-  auto ages_2     = cudf::test::fixed_width_column_wrapper<int32_t>{{31, 351, 351}, {1, 1, 0}};
-  auto struct_1_2 = cudf::test::structs_column_wrapper{{weight_2, ages_2}, {1, 0, 1}};
-  auto is_human_2 = cudf::test::fixed_width_column_wrapper<bool>{{false, false, false}};
-  auto struct_2_2 = cudf::test::structs_column_wrapper{{is_human_2, struct_1_2}};
-  auto table_2    = cudf::table_view({struct_2_2});
-
-  auto full_table = cudf::concatenate(std::vector<table_view>({table_1, table_2}));
-
-  cudf::io::table_input_metadata expected_metadata(table_1);
-  expected_metadata.column_metadata[0].set_name("being").set_nullability(false);
-  expected_metadata.column_metadata[0].child(0).set_name("human?").set_nullability(false);
-  expected_metadata.column_metadata[0].child(1).set_name("particulars");
-  expected_metadata.column_metadata[0].child(1).child(0).set_name("weight");
-  expected_metadata.column_metadata[0].child(1).child(1).set_name("age");
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedNullableStruct.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  args.set_metadata(expected_metadata);
-  cudf::io::parquet_chunked_writer(args).write(table_1).write(table_2);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
-  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
-}
-
-TEST_F(ParquetChunkedWriterTest, ReadRowGroups)
-{
-  srand(31337);
-  auto table1 = create_random_fixed_table<int>(5, 5, true);
-  auto table2 = create_random_fixed_table<int>(5, 5, true);
-
-  auto full_table = cudf::concatenate(std::vector<table_view>({*table2, *table1, *table2}));
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedRowGroups.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  {
-    cudf::io::parquet_chunked_writer(args).write(*table1).write(*table2);
-  }
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-      .row_groups({{1, 0, 1}});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
-}
-
-TEST_F(ParquetChunkedWriterTest, ReadRowGroupsError)
-{
-  srand(31337);
-  auto table1 = create_random_fixed_table<int>(5, 5, true);
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedRowGroupsError.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  cudf::io::parquet_chunked_writer(args).write(*table1);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}).row_groups({{0, 1}});
-  EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error);
-  read_opts.set_row_groups({{-1}});
-  EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error);
-  read_opts.set_row_groups({{0}, {0}});
-  EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error);
-}
-
-TEST_F(ParquetWriterTest, DecimalWrite)
-{
-  constexpr cudf::size_type num_rows = 500;
-  auto seq_col0                      = random_values<int32_t>(num_rows);
-  auto seq_col1                      = random_values<int64_t>(num_rows);
-
-  auto valids =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; });
-
-  auto col0 = cudf::test::fixed_point_column_wrapper<int32_t>{
-    seq_col0.begin(), seq_col0.end(), valids, numeric::scale_type{5}};
-  auto col1 = cudf::test::fixed_point_column_wrapper<int64_t>{
-    seq_col1.begin(), seq_col1.end(), valids, numeric::scale_type{-9}};
-
-  auto table = table_view({col0, col1});
-
-  auto filepath = temp_env->get_temp_filepath("DecimalWrite.parquet");
-  cudf::io::parquet_writer_options args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, table);
-
-  cudf::io::table_input_metadata expected_metadata(table);
-
-  // verify failure if too small a precision is given
-  expected_metadata.column_metadata[0].set_decimal_precision(7);
-  expected_metadata.column_metadata[1].set_decimal_precision(1);
-  args.set_metadata(expected_metadata);
-  EXPECT_THROW(cudf::io::write_parquet(args), cudf::logic_error);
-
-  // verify success if equal precision is given
-  expected_metadata.column_metadata[0].set_decimal_precision(7);
-  expected_metadata.column_metadata[1].set_decimal_precision(9);
-  args.set_metadata(std::move(expected_metadata));
-  cudf::io::write_parquet(args);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, table);
-}
-
-TYPED_TEST(ParquetChunkedWriterNumericTypeTest, UnalignedSize)
-{
-  // write out two 31 row tables and make sure they get
-  // read back with all their validity bits in the right place
-
-  using T = TypeParam;
-
-  int num_els = 31;
-  std::vector<std::unique_ptr<cudf::column>> cols;
-
-  bool mask[] = {false, true, true, true, true, true, true, true, true, true, true,
-                 true,  true, true, true, true, true, true, true, true, true, true,
-
-                 true,  true, true, true, true, true, true, true, true};
-  T c1a[num_els];
-  std::fill(c1a, c1a + num_els, static_cast<T>(5));
-  T c1b[num_els];
-  std::fill(c1b, c1b + num_els, static_cast<T>(6));
-  column_wrapper<T> c1a_w(c1a, c1a + num_els, mask);
-  column_wrapper<T> c1b_w(c1b, c1b + num_els, mask);
-  cols.push_back(c1a_w.release());
-  cols.push_back(c1b_w.release());
-  cudf::table tbl1(std::move(cols));
-
-  T c2a[num_els];
-  std::fill(c2a, c2a + num_els, static_cast<T>(8));
-  T c2b[num_els];
-  std::fill(c2b, c2b + num_els, static_cast<T>(9));
-  column_wrapper<T> c2a_w(c2a, c2a + num_els, mask);
-  column_wrapper<T> c2b_w(c2b, c2b + num_els, mask);
-  cols.push_back(c2a_w.release());
-  cols.push_back(c2b_w.release());
-  cudf::table tbl2(std::move(cols));
-
-  auto expected = cudf::concatenate(std::vector<table_view>({tbl1, tbl2}));
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedUnalignedSize.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  cudf::io::parquet_chunked_writer(args).write(tbl1).write(tbl2);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
-}
-
-TYPED_TEST(ParquetChunkedWriterNumericTypeTest, UnalignedSize2)
-{
-  // write out two 33 row tables and make sure they get
-  // read back with all their validity bits in the right place
-
-  using T = TypeParam;
-
-  int num_els = 33;
-  std::vector<std::unique_ptr<cudf::column>> cols;
-
-  bool mask[] = {false, true, true, true, true, true, true, true, true, true, true,
-                 true,  true, true, true, true, true, true, true, true, true, true,
-                 true,  true, true, true, true, true, true, true, true, true, true};
-
-  T c1a[num_els];
-  std::fill(c1a, c1a + num_els, static_cast<T>(5));
-  T c1b[num_els];
-  std::fill(c1b, c1b + num_els, static_cast<T>(6));
-  column_wrapper<T> c1a_w(c1a, c1a + num_els, mask);
-  column_wrapper<T> c1b_w(c1b, c1b + num_els, mask);
-  cols.push_back(c1a_w.release());
-  cols.push_back(c1b_w.release());
-  cudf::table tbl1(std::move(cols));
-
-  T c2a[num_els];
-  std::fill(c2a, c2a + num_els, static_cast<T>(8));
-  T c2b[num_els];
-  std::fill(c2b, c2b + num_els, static_cast<T>(9));
-  column_wrapper<T> c2a_w(c2a, c2a + num_els, mask);
-  column_wrapper<T> c2b_w(c2b, c2b + num_els, mask);
-  cols.push_back(c2a_w.release());
-  cols.push_back(c2b_w.release());
-  cudf::table tbl2(std::move(cols));
-
-  auto expected = cudf::concatenate(std::vector<table_view>({tbl1, tbl2}));
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedUnalignedSize2.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  cudf::io::parquet_chunked_writer(args).write(tbl1).write(tbl2);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
-}
-
-// custom mem mapped data sink that supports device writes
-template <bool supports_device_writes>
-class custom_test_memmap_sink : public cudf::io::data_sink {
- public:
-  explicit custom_test_memmap_sink(std::vector<char>* mm_writer_buf)
-  {
-    mm_writer = cudf::io::data_sink::create(mm_writer_buf);
-  }
-
-  virtual ~custom_test_memmap_sink() { mm_writer->flush(); }
-
-  void host_write(void const* data, size_t size) override { mm_writer->host_write(data, size); }
-
-  [[nodiscard]] bool supports_device_write() const override { return supports_device_writes; }
-
-  void device_write(void const* gpu_data, size_t size, rmm::cuda_stream_view stream) override
-  {
-    this->device_write_async(gpu_data, size, stream).get();
-  }
-
-  std::future<void> device_write_async(void const* gpu_data,
-                                       size_t size,
-                                       rmm::cuda_stream_view stream) override
-  {
-    return std::async(std::launch::deferred, [=] {
-      char* ptr = nullptr;
-      CUDF_CUDA_TRY(cudaMallocHost(&ptr, size));
-      CUDF_CUDA_TRY(cudaMemcpyAsync(ptr, gpu_data, size, cudaMemcpyDefault, stream.value()));
-      stream.synchronize();
-      mm_writer->host_write(ptr, size);
-      CUDF_CUDA_TRY(cudaFreeHost(ptr));
-    });
-  }
-
-  void flush() override { mm_writer->flush(); }
-
-  size_t bytes_written() override { return mm_writer->bytes_written(); }
-
- private:
-  std::unique_ptr<data_sink> mm_writer;
-};
-
-TEST_F(ParquetWriterStressTest, LargeTableWeakCompression)
-{
-  std::vector<char> mm_buf;
-  mm_buf.reserve(4 * 1024 * 1024 * 16);
-  custom_test_memmap_sink<false> custom_sink(&mm_buf);
-
-  // exercises multiple rowgroups
-  srand(31337);
-  auto expected = create_random_fixed_table<int>(16, 4 * 1024 * 1024, false);
-
-  // write out using the custom sink (which uses device writes)
-  cudf::io::parquet_writer_options args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
-  cudf::io::write_parquet(args);
-
-  cudf::io::parquet_reader_options custom_args =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()});
-  auto custom_tbl = cudf::io::read_parquet(custom_args);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
-}
-
-TEST_F(ParquetWriterStressTest, LargeTableGoodCompression)
-{
-  std::vector<char> mm_buf;
-  mm_buf.reserve(4 * 1024 * 1024 * 16);
-  custom_test_memmap_sink<false> custom_sink(&mm_buf);
-
-  // exercises multiple rowgroups
-  srand(31337);
-  auto expected = create_compressible_fixed_table<int>(16, 4 * 1024 * 1024, 128 * 1024, false);
-
-  // write out using the custom sink (which uses device writes)
-  cudf::io::parquet_writer_options args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
-  cudf::io::write_parquet(args);
-
-  cudf::io::parquet_reader_options custom_args =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()});
-  auto custom_tbl = cudf::io::read_parquet(custom_args);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
-}
-
-TEST_F(ParquetWriterStressTest, LargeTableWithValids)
-{
-  std::vector<char> mm_buf;
-  mm_buf.reserve(4 * 1024 * 1024 * 16);
-  custom_test_memmap_sink<false> custom_sink(&mm_buf);
-
-  // exercises multiple rowgroups
-  srand(31337);
-  auto expected = create_compressible_fixed_table<int>(16, 4 * 1024 * 1024, 6, true);
-
-  // write out using the custom sink (which uses device writes)
-  cudf::io::parquet_writer_options args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
-  cudf::io::write_parquet(args);
-
-  cudf::io::parquet_reader_options custom_args =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()});
-  auto custom_tbl = cudf::io::read_parquet(custom_args);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
-}
-
-TEST_F(ParquetWriterStressTest, DeviceWriteLargeTableWeakCompression)
-{
-  std::vector<char> mm_buf;
-  mm_buf.reserve(4 * 1024 * 1024 * 16);
-  custom_test_memmap_sink<true> custom_sink(&mm_buf);
-
-  // exercises multiple rowgroups
-  srand(31337);
-  auto expected = create_random_fixed_table<int>(16, 4 * 1024 * 1024, false);
-
-  // write out using the custom sink (which uses device writes)
-  cudf::io::parquet_writer_options args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
-  cudf::io::write_parquet(args);
-
-  cudf::io::parquet_reader_options custom_args =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()});
-  auto custom_tbl = cudf::io::read_parquet(custom_args);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
-}
-
-TEST_F(ParquetWriterStressTest, DeviceWriteLargeTableGoodCompression)
-{
-  std::vector<char> mm_buf;
-  mm_buf.reserve(4 * 1024 * 1024 * 16);
-  custom_test_memmap_sink<true> custom_sink(&mm_buf);
-
-  // exercises multiple rowgroups
-  srand(31337);
-  auto expected = create_compressible_fixed_table<int>(16, 4 * 1024 * 1024, 128 * 1024, false);
-
-  // write out using the custom sink (which uses device writes)
-  cudf::io::parquet_writer_options args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
-  cudf::io::write_parquet(args);
-
-  cudf::io::parquet_reader_options custom_args =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()});
-  auto custom_tbl = cudf::io::read_parquet(custom_args);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
-}
-
-TEST_F(ParquetWriterStressTest, DeviceWriteLargeTableWithValids)
-{
-  std::vector<char> mm_buf;
-  mm_buf.reserve(4 * 1024 * 1024 * 16);
-  custom_test_memmap_sink<true> custom_sink(&mm_buf);
-
-  // exercises multiple rowgroups
-  srand(31337);
-  auto expected = create_compressible_fixed_table<int>(16, 4 * 1024 * 1024, 6, true);
-
-  // write out using the custom sink (which uses device writes)
-  cudf::io::parquet_writer_options args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
-  cudf::io::write_parquet(args);
-
-  cudf::io::parquet_reader_options custom_args =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()});
-  auto custom_tbl = cudf::io::read_parquet(custom_args);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
-}
-
-TEST_F(ParquetReaderTest, UserBounds)
-{
-  // trying to read more rows than there are should result in
-  // receiving the properly capped # of rows
-  {
-    srand(31337);
-    auto expected = create_random_fixed_table<int>(4, 4, false);
-
-    auto filepath = temp_env->get_temp_filepath("TooManyRows.parquet");
-    cudf::io::parquet_writer_options args =
-      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected);
-    cudf::io::write_parquet(args);
-
-    // attempt to read more rows than there actually are
-    cudf::io::parquet_reader_options read_opts =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}).num_rows(16);
-    auto result = cudf::io::read_parquet(read_opts);
-
-    // we should only get back 4 rows
-    EXPECT_EQ(result.tbl->view().column(0).size(), 4);
-  }
-
-  // trying to read past the end of the # of actual rows should result
-  // in empty columns.
-  {
-    srand(31337);
-    auto expected = create_random_fixed_table<int>(4, 4, false);
-
-    auto filepath = temp_env->get_temp_filepath("PastBounds.parquet");
-    cudf::io::parquet_writer_options args =
-      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected);
-    cudf::io::write_parquet(args);
-
-    // attempt to read more rows than there actually are
-    cudf::io::parquet_reader_options read_opts =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}).skip_rows(4);
-    auto result = cudf::io::read_parquet(read_opts);
-
-    // we should get empty columns back
-    EXPECT_EQ(result.tbl->view().num_columns(), 4);
-    EXPECT_EQ(result.tbl->view().column(0).size(), 0);
-  }
-
-  // trying to read 0 rows should result in empty columns
-  {
-    srand(31337);
-    auto expected = create_random_fixed_table<int>(4, 4, false);
-
-    auto filepath = temp_env->get_temp_filepath("ZeroRows.parquet");
-    cudf::io::parquet_writer_options args =
-      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected);
-    cudf::io::write_parquet(args);
-
-    // attempt to read more rows than there actually are
-    cudf::io::parquet_reader_options read_opts =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}).num_rows(0);
-    auto result = cudf::io::read_parquet(read_opts);
-
-    EXPECT_EQ(result.tbl->view().num_columns(), 4);
-    EXPECT_EQ(result.tbl->view().column(0).size(), 0);
-  }
-
-  // trying to read 0 rows past the end of the # of actual rows should result
-  // in empty columns.
-  {
-    srand(31337);
-    auto expected = create_random_fixed_table<int>(4, 4, false);
-
-    auto filepath = temp_env->get_temp_filepath("ZeroRowsPastBounds.parquet");
-    cudf::io::parquet_writer_options args =
-      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected);
-    cudf::io::write_parquet(args);
-
-    // attempt to read more rows than there actually are
-    cudf::io::parquet_reader_options read_opts =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-        .skip_rows(4)
-        .num_rows(0);
-    auto result = cudf::io::read_parquet(read_opts);
-
-    // we should get empty columns back
-    EXPECT_EQ(result.tbl->view().num_columns(), 4);
-    EXPECT_EQ(result.tbl->view().column(0).size(), 0);
-  }
-}
-
-TEST_F(ParquetReaderTest, UserBoundsWithNulls)
-{
-  // clang-format off
-  cudf::test::fixed_width_column_wrapper<float> col{{1,1,1,1,1,1,1,1, 2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4,  5,5,5,5,5,5,5,5, 6,6,6,6,6,6,6,6, 7,7,7,7,7,7,7,7, 8,8,8,8,8,8,8,8}
-                                                   ,{1,1,1,0,0,0,1,1, 1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0, 1,1,1,1,1,1,0,0,  1,0,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,0}};
-  // clang-format on
-  cudf::table_view tbl({col});
-  auto filepath = temp_env->get_temp_filepath("UserBoundsWithNulls.parquet");
-  cudf::io::parquet_writer_options out_args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl);
-  cudf::io::write_parquet(out_args);
-
-  // skip_rows / num_rows
-  // clang-format off
-  std::vector<std::pair<int, int>> params{ {-1, -1}, {1, 3}, {3, -1},
-                                           {31, -1}, {32, -1}, {33, -1},
-                                           {31, 5}, {32, 5}, {33, 5},
-                                           {-1, 7}, {-1, 31}, {-1, 32}, {-1, 33},
-                                           {62, -1}, {63, -1},
-                                           {62, 2}, {63, 1}};
-  // clang-format on
-  for (auto p : params) {
-    cudf::io::parquet_reader_options read_args =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-    if (p.first >= 0) { read_args.set_skip_rows(p.first); }
-    if (p.second >= 0) { read_args.set_num_rows(p.second); }
-    auto result = cudf::io::read_parquet(read_args);
-
-    p.first  = p.first < 0 ? 0 : p.first;
-    p.second = p.second < 0 ? static_cast<cudf::column_view>(col).size() - p.first : p.second;
-    std::vector<cudf::size_type> slice_indices{p.first, p.first + p.second};
-    auto expected = cudf::slice(col, slice_indices);
-
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(0), expected[0]);
-  }
-}
-
-TEST_F(ParquetReaderTest, UserBoundsWithNullsMixedTypes)
-{
-  constexpr int num_rows = 32 * 1024;
-
-  std::mt19937 gen(6542);
-  std::bernoulli_distribution bn(0.7f);
-  auto valids =
-    cudf::detail::make_counting_transform_iterator(0, [&](int index) { return bn(gen); });
-  auto values = thrust::make_counting_iterator(0);
-
-  // int64
-  cudf::test::fixed_width_column_wrapper<int64_t> c0(values, values + num_rows, valids);
-
-  // list<float>
-  constexpr int floats_per_row = 4;
-  auto c1_offset_iter          = cudf::detail::make_counting_transform_iterator(
-    0, [floats_per_row](cudf::size_type idx) { return idx * floats_per_row; });
-  cudf::test::fixed_width_column_wrapper<cudf::size_type> c1_offsets(c1_offset_iter,
-                                                                     c1_offset_iter + num_rows + 1);
-  cudf::test::fixed_width_column_wrapper<float> c1_floats(
-    values, values + (num_rows * floats_per_row), valids);
-  auto [null_mask, null_count] = cudf::test::detail::make_null_mask(valids, valids + num_rows);
-
-  auto _c1 = cudf::make_lists_column(
-    num_rows, c1_offsets.release(), c1_floats.release(), null_count, std::move(null_mask));
-  auto c1 = cudf::purge_nonempty_nulls(*_c1);
-
-  // list<list<int>>
-  auto c2 = make_parquet_list_list_col<int>(0, num_rows, 5, 8, true);
-
-  // struct<list<string>, int, float>
-  std::vector<std::string> strings{
-    "abc", "x", "bananas", "gpu", "minty", "backspace", "", "cayenne", "turbine", "soft"};
-  std::uniform_int_distribution<int> uni(0, strings.size() - 1);
-  auto string_iter = cudf::detail::make_counting_transform_iterator(
-    0, [&](cudf::size_type idx) { return strings[uni(gen)]; });
-  constexpr int string_per_row  = 3;
-  constexpr int num_string_rows = num_rows * string_per_row;
-  cudf::test::strings_column_wrapper string_col{string_iter, string_iter + num_string_rows};
-  auto offset_iter = cudf::detail::make_counting_transform_iterator(
-    0, [string_per_row](cudf::size_type idx) { return idx * string_per_row; });
-  cudf::test::fixed_width_column_wrapper<cudf::size_type> offsets(offset_iter,
-                                                                  offset_iter + num_rows + 1);
-
-  auto _c3_valids =
-    cudf::detail::make_counting_transform_iterator(0, [&](int index) { return index % 200; });
-  std::vector<bool> c3_valids(num_rows);
-  std::copy(_c3_valids, _c3_valids + num_rows, c3_valids.begin());
-  std::tie(null_mask, null_count) = cudf::test::detail::make_null_mask(valids, valids + num_rows);
-  auto _c3_list                   = cudf::make_lists_column(
-    num_rows, offsets.release(), string_col.release(), null_count, std::move(null_mask));
-  auto c3_list = cudf::purge_nonempty_nulls(*_c3_list);
-  cudf::test::fixed_width_column_wrapper<int> c3_ints(values, values + num_rows, valids);
-  cudf::test::fixed_width_column_wrapper<float> c3_floats(values, values + num_rows, valids);
-  std::vector<std::unique_ptr<cudf::column>> c3_children;
-  c3_children.push_back(std::move(c3_list));
-  c3_children.push_back(c3_ints.release());
-  c3_children.push_back(c3_floats.release());
-  cudf::test::structs_column_wrapper _c3(std::move(c3_children), c3_valids);
-  auto c3 = cudf::purge_nonempty_nulls(_c3);
-
-  // write it out
-  cudf::table_view tbl({c0, *c1, *c2, *c3});
-  auto filepath = temp_env->get_temp_filepath("UserBoundsWithNullsMixedTypes.parquet");
-  cudf::io::parquet_writer_options out_args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl);
-  cudf::io::write_parquet(out_args);
-
-  // read it back
-  std::vector<std::pair<int, int>> params{
-    {-1, -1}, {0, num_rows}, {1, num_rows - 1}, {num_rows - 1, 1}, {517, 22000}};
-  for (auto p : params) {
-    cudf::io::parquet_reader_options read_args =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-    if (p.first >= 0) { read_args.set_skip_rows(p.first); }
-    if (p.second >= 0) { read_args.set_num_rows(p.second); }
-    auto result = cudf::io::read_parquet(read_args);
-
-    p.first  = p.first < 0 ? 0 : p.first;
-    p.second = p.second < 0 ? num_rows - p.first : p.second;
-    std::vector<cudf::size_type> slice_indices{p.first, p.first + p.second};
-    auto expected = cudf::slice(tbl, slice_indices);
-
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, expected[0]);
-  }
-}
-
-TEST_F(ParquetReaderTest, UserBoundsWithNullsLarge)
-{
-  constexpr int num_rows = 30 * 1000000;
-
-  std::mt19937 gen(6747);
-  std::bernoulli_distribution bn(0.7f);
-  auto valids =
-    cudf::detail::make_counting_transform_iterator(0, [&](int index) { return bn(gen); });
-  auto values = thrust::make_counting_iterator(0);
-
-  cudf::test::fixed_width_column_wrapper<int> col(values, values + num_rows, valids);
-
-  // this file will have row groups of 1,000,000 each
-  cudf::table_view tbl({col});
-  auto filepath = temp_env->get_temp_filepath("UserBoundsWithNullsLarge.parquet");
-  cudf::io::parquet_writer_options out_args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl);
-  cudf::io::write_parquet(out_args);
-
-  // skip_rows / num_rows
-  // clang-format off
-  std::vector<std::pair<int, int>> params{ {-1, -1}, {31, -1}, {32, -1}, {33, -1}, {1613470, -1}, {1999999, -1},
-                                           {31, 1}, {32, 1}, {33, 1},
-                                           // deliberately span some row group boundaries
-                                           {999000, 1001}, {999000, 2000}, {2999999, 2}, {13999997, -1},
-                                           {16785678, 3}, {22996176, 31},
-                                           {24001231, 17}, {29000001, 989999}, {29999999, 1} };
-  // clang-format on
-  for (auto p : params) {
-    cudf::io::parquet_reader_options read_args =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-    if (p.first >= 0) { read_args.set_skip_rows(p.first); }
-    if (p.second >= 0) { read_args.set_num_rows(p.second); }
-    auto result = cudf::io::read_parquet(read_args);
-
-    p.first  = p.first < 0 ? 0 : p.first;
-    p.second = p.second < 0 ? static_cast<cudf::column_view>(col).size() - p.first : p.second;
-    std::vector<cudf::size_type> slice_indices{p.first, p.first + p.second};
-    auto expected = cudf::slice(col, slice_indices);
-
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(0), expected[0]);
-  }
-}
-
-TEST_F(ParquetReaderTest, ListUserBoundsWithNullsLarge)
-{
-  constexpr int num_rows = 5 * 1000000;
-  auto colp              = make_parquet_list_list_col<int>(0, num_rows, 5, 8, true);
-  cudf::column_view col  = *colp;
-
-  // this file will have row groups of 1,000,000 each
-  cudf::table_view tbl({col});
-  auto filepath = temp_env->get_temp_filepath("ListUserBoundsWithNullsLarge.parquet");
-  cudf::io::parquet_writer_options out_args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl);
-  cudf::io::write_parquet(out_args);
-
-  // skip_rows / num_rows
-  // clang-format off
-  std::vector<std::pair<int, int>> params{ {-1, -1}, {31, -1}, {32, -1}, {33, -1}, {161470, -1}, {4499997, -1},
-                                           {31, 1}, {32, 1}, {33, 1},
-                                           // deliberately span some row group boundaries
-                                           {999000, 1001}, {999000, 2000}, {2999999, 2},
-                                           {1678567, 3}, {4299676, 31},
-                                           {4001231, 17}, {1900000, 989999}, {4999999, 1} };
-  // clang-format on
-  for (auto p : params) {
-    cudf::io::parquet_reader_options read_args =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-    if (p.first >= 0) { read_args.set_skip_rows(p.first); }
-    if (p.second >= 0) { read_args.set_num_rows(p.second); }
-    auto result = cudf::io::read_parquet(read_args);
-
-    p.first  = p.first < 0 ? 0 : p.first;
-    p.second = p.second < 0 ? static_cast<cudf::column_view>(col).size() - p.first : p.second;
-    std::vector<cudf::size_type> slice_indices{p.first, p.first + p.second};
-    auto expected = cudf::slice(col, slice_indices);
-
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(0), expected[0]);
-  }
-}
-
-TEST_F(ParquetReaderTest, ReorderedColumns)
-{
-  {
-    auto a = cudf::test::strings_column_wrapper{{"a", "", "c"}, {true, false, true}};
-    auto b = cudf::test::fixed_width_column_wrapper<int>{1, 2, 3};
-
-    cudf::table_view tbl{{a, b}};
-    auto filepath = temp_env->get_temp_filepath("ReorderedColumns.parquet");
-    cudf::io::table_input_metadata md(tbl);
-    md.column_metadata[0].set_name("a");
-    md.column_metadata[1].set_name("b");
-    cudf::io::parquet_writer_options opts =
-      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl).metadata(md);
-    cudf::io::write_parquet(opts);
-
-    // read them out of order
-    cudf::io::parquet_reader_options read_opts =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-        .columns({"b", "a"});
-    auto result = cudf::io::read_parquet(read_opts);
-
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), b);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), a);
-  }
-
-  {
-    auto a = cudf::test::fixed_width_column_wrapper<int>{1, 2, 3};
-    auto b = cudf::test::strings_column_wrapper{{"a", "", "c"}, {true, false, true}};
-
-    cudf::table_view tbl{{a, b}};
-    auto filepath = temp_env->get_temp_filepath("ReorderedColumns2.parquet");
-    cudf::io::table_input_metadata md(tbl);
-    md.column_metadata[0].set_name("a");
-    md.column_metadata[1].set_name("b");
-    cudf::io::parquet_writer_options opts =
-      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl).metadata(md);
-    cudf::io::write_parquet(opts);
-
-    // read them out of order
-    cudf::io::parquet_reader_options read_opts =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-        .columns({"b", "a"});
-    auto result = cudf::io::read_parquet(read_opts);
-
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), b);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), a);
-  }
-
-  auto a = cudf::test::fixed_width_column_wrapper<int>{1, 2, 3, 10, 20, 30};
-  auto b = cudf::test::strings_column_wrapper{{"a", "", "c", "cats", "dogs", "owls"},
-                                              {true, false, true, true, false, true}};
-  auto c = cudf::test::fixed_width_column_wrapper<int>{{15, 16, 17, 25, 26, 32},
-                                                       {false, true, true, true, true, false}};
-  auto d = cudf::test::strings_column_wrapper{"ducks", "sheep", "cows", "fish", "birds", "ants"};
-
-  cudf::table_view tbl{{a, b, c, d}};
-  auto filepath = temp_env->get_temp_filepath("ReorderedColumns3.parquet");
-  cudf::io::table_input_metadata md(tbl);
-  md.column_metadata[0].set_name("a");
-  md.column_metadata[1].set_name("b");
-  md.column_metadata[2].set_name("c");
-  md.column_metadata[3].set_name("d");
-  cudf::io::parquet_writer_options opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl)
-      .metadata(std::move(md));
-  cudf::io::write_parquet(opts);
-
-  {
-    // read them out of order
-    cudf::io::parquet_reader_options read_opts =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-        .columns({"d", "a", "b", "c"});
-    auto result = cudf::io::read_parquet(read_opts);
-
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), d);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), a);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(2), b);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(3), c);
-  }
-
-  {
-    // read them out of order
-    cudf::io::parquet_reader_options read_opts =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-        .columns({"c", "d", "a", "b"});
-    auto result = cudf::io::read_parquet(read_opts);
-
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), c);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), d);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(2), a);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(3), b);
-  }
-
-  {
-    // read them out of order
-    cudf::io::parquet_reader_options read_opts =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-        .columns({"d", "c", "b", "a"});
-    auto result = cudf::io::read_parquet(read_opts);
-
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), d);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), c);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(2), b);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(3), a);
-  }
-}
-
-TEST_F(ParquetReaderTest, SelectNestedColumn)
-{
-  // Struct<is_human:bool,
-  //        Struct<weight:float,
-  //               ages:int,
-  //               land_unit:List<int>>,
-  //               flats:List<List<int>>
-  //              >
-  //       >
-
-  auto weights_col = cudf::test::fixed_width_column_wrapper<float>{1.1, 2.4, 5.3, 8.0, 9.6, 6.9};
-
-  auto ages_col =
-    cudf::test::fixed_width_column_wrapper<int32_t>{{48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 0}};
-
-  auto struct_1 = cudf::test::structs_column_wrapper{{weights_col, ages_col}, {1, 1, 1, 1, 0, 1}};
-
-  auto is_human_col = cudf::test::fixed_width_column_wrapper<bool>{
-    {true, true, false, false, false, false}, {1, 1, 0, 1, 1, 0}};
-
-  auto struct_2 =
-    cudf::test::structs_column_wrapper{{is_human_col, struct_1}, {0, 1, 1, 1, 1, 1}}.release();
-
-  auto input = table_view({*struct_2});
-
-  cudf::io::table_input_metadata input_metadata(input);
-  input_metadata.column_metadata[0].set_name("being");
-  input_metadata.column_metadata[0].child(0).set_name("human?");
-  input_metadata.column_metadata[0].child(1).set_name("particulars");
-  input_metadata.column_metadata[0].child(1).child(0).set_name("weight");
-  input_metadata.column_metadata[0].child(1).child(1).set_name("age");
-
-  auto filepath = temp_env->get_temp_filepath("SelectNestedColumn.parquet");
-  cudf::io::parquet_writer_options args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, input)
-      .metadata(std::move(input_metadata));
-  cudf::io::write_parquet(args);
-
-  {  // Test selecting a single leaf from the table
-    cudf::io::parquet_reader_options read_args =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath))
-        .columns({"being.particulars.age"});
-    auto const result = cudf::io::read_parquet(read_args);
-
-    auto expect_ages_col = cudf::test::fixed_width_column_wrapper<int32_t>{
-      {48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 0}};
-    auto expect_s_1 = cudf::test::structs_column_wrapper{{expect_ages_col}, {1, 1, 1, 1, 0, 1}};
-    auto expect_s_2 =
-      cudf::test::structs_column_wrapper{{expect_s_1}, {0, 1, 1, 1, 1, 1}}.release();
-    auto expected = table_view({*expect_s_2});
-
-    cudf::io::table_input_metadata expected_metadata(expected);
-    expected_metadata.column_metadata[0].set_name("being");
-    expected_metadata.column_metadata[0].child(0).set_name("particulars");
-    expected_metadata.column_metadata[0].child(0).child(0).set_name("age");
-
-    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-    cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
-  }
-
-  {  // Test selecting a non-leaf and expecting all hierarchy from that node onwards
-    cudf::io::parquet_reader_options read_args =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath))
-        .columns({"being.particulars"});
-    auto const result = cudf::io::read_parquet(read_args);
-
-    auto expected_weights_col =
-      cudf::test::fixed_width_column_wrapper<float>{1.1, 2.4, 5.3, 8.0, 9.6, 6.9};
-
-    auto expected_ages_col = cudf::test::fixed_width_column_wrapper<int32_t>{
-      {48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 0}};
-
-    auto expected_s_1 = cudf::test::structs_column_wrapper{
-      {expected_weights_col, expected_ages_col}, {1, 1, 1, 1, 0, 1}};
-
-    auto expect_s_2 =
-      cudf::test::structs_column_wrapper{{expected_s_1}, {0, 1, 1, 1, 1, 1}}.release();
-    auto expected = table_view({*expect_s_2});
-
-    cudf::io::table_input_metadata expected_metadata(expected);
-    expected_metadata.column_metadata[0].set_name("being");
-    expected_metadata.column_metadata[0].child(0).set_name("particulars");
-    expected_metadata.column_metadata[0].child(0).child(0).set_name("weight");
-    expected_metadata.column_metadata[0].child(0).child(1).set_name("age");
-
-    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-    cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
-  }
-
-  {  // Test selecting struct children out of order
-    cudf::io::parquet_reader_options read_args =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath))
-        .columns({"being.particulars.age", "being.particulars.weight", "being.human?"});
-    auto const result = cudf::io::read_parquet(read_args);
-
-    auto expected_weights_col =
-      cudf::test::fixed_width_column_wrapper<float>{1.1, 2.4, 5.3, 8.0, 9.6, 6.9};
-
-    auto expected_ages_col = cudf::test::fixed_width_column_wrapper<int32_t>{
-      {48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 0}};
-
-    auto expected_is_human_col = cudf::test::fixed_width_column_wrapper<bool>{
-      {true, true, false, false, false, false}, {1, 1, 0, 1, 1, 0}};
-
-    auto expect_s_1 = cudf::test::structs_column_wrapper{{expected_ages_col, expected_weights_col},
-                                                         {1, 1, 1, 1, 0, 1}};
-
-    auto expect_s_2 =
-      cudf::test::structs_column_wrapper{{expect_s_1, expected_is_human_col}, {0, 1, 1, 1, 1, 1}}
-        .release();
-
-    auto expected = table_view({*expect_s_2});
-
-    cudf::io::table_input_metadata expected_metadata(expected);
-    expected_metadata.column_metadata[0].set_name("being");
-    expected_metadata.column_metadata[0].child(0).set_name("particulars");
-    expected_metadata.column_metadata[0].child(0).child(0).set_name("age");
-    expected_metadata.column_metadata[0].child(0).child(1).set_name("weight");
-    expected_metadata.column_metadata[0].child(1).set_name("human?");
-
-    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-    cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
-  }
-}
-
-TEST_F(ParquetReaderTest, DecimalRead)
-{
-  {
-    /* We could add a dataset to include this file, but we don't want tests in cudf to have data.
-       This test is a temporary test until python gains the ability to write decimal, so we're
-       embedding
-       a parquet file directly into the code here to prevent issues with finding the file */
-    unsigned char const decimals_parquet[] = {
-      0x50, 0x41, 0x52, 0x31, 0x15, 0x00, 0x15, 0xb0, 0x03, 0x15, 0xb8, 0x03, 0x2c, 0x15, 0x6a,
-      0x15, 0x00, 0x15, 0x06, 0x15, 0x08, 0x1c, 0x36, 0x02, 0x28, 0x04, 0x7f, 0x96, 0x98, 0x00,
-      0x18, 0x04, 0x81, 0x69, 0x67, 0xff, 0x00, 0x00, 0x00, 0xd8, 0x01, 0xf0, 0xd7, 0x04, 0x00,
-      0x00, 0x00, 0x64, 0x01, 0x03, 0x06, 0x68, 0x12, 0xdc, 0xff, 0xbd, 0x18, 0xfd, 0xff, 0x64,
-      0x13, 0x80, 0x00, 0xb3, 0x5d, 0x62, 0x00, 0x90, 0x35, 0xa9, 0xff, 0xa2, 0xde, 0xe3, 0xff,
-      0xe9, 0xbf, 0x96, 0xff, 0x1f, 0x8a, 0x98, 0xff, 0xb1, 0x50, 0x34, 0x00, 0x88, 0x24, 0x59,
-      0x00, 0x2a, 0x33, 0xbe, 0xff, 0xd5, 0x16, 0xbc, 0xff, 0x13, 0x50, 0x8d, 0xff, 0xcb, 0x63,
-      0x2d, 0x00, 0x80, 0x8f, 0xbe, 0xff, 0x82, 0x40, 0x10, 0x00, 0x84, 0x68, 0x70, 0xff, 0x9b,
-      0x69, 0x78, 0x00, 0x14, 0x6c, 0x10, 0x00, 0x50, 0xd9, 0xe1, 0xff, 0xaa, 0xcd, 0x6a, 0x00,
-      0xcf, 0xb1, 0x28, 0x00, 0x77, 0x57, 0x8d, 0x00, 0xee, 0x05, 0x79, 0x00, 0xf0, 0x15, 0xeb,
-      0xff, 0x02, 0xe2, 0x06, 0x00, 0x87, 0x43, 0x86, 0x00, 0xf8, 0x2d, 0x2e, 0x00, 0xee, 0x2e,
-      0x98, 0xff, 0x39, 0xcb, 0x4d, 0x00, 0x1e, 0x6b, 0xea, 0xff, 0x80, 0x8e, 0x6c, 0xff, 0x97,
-      0x25, 0x26, 0x00, 0x4d, 0x0d, 0x0a, 0x00, 0xca, 0x64, 0x7f, 0x00, 0xf4, 0xbe, 0xa1, 0xff,
-      0xe2, 0x12, 0x6c, 0xff, 0xbd, 0x77, 0xae, 0xff, 0xf9, 0x4b, 0x36, 0x00, 0xb0, 0xe3, 0x79,
-      0xff, 0xa2, 0x2a, 0x29, 0x00, 0xcd, 0x06, 0xbc, 0xff, 0x2d, 0xa3, 0x7e, 0x00, 0xa9, 0x08,
-      0xa1, 0xff, 0xbf, 0x81, 0xd0, 0xff, 0x4f, 0x03, 0x73, 0x00, 0xb0, 0x99, 0x0c, 0x00, 0xbd,
-      0x6f, 0xf8, 0xff, 0x6b, 0x02, 0x05, 0x00, 0xc1, 0xe1, 0xba, 0xff, 0x81, 0x69, 0x67, 0xff,
-      0x7f, 0x96, 0x98, 0x00, 0x15, 0x00, 0x15, 0xd0, 0x06, 0x15, 0xda, 0x06, 0x2c, 0x15, 0x6a,
-      0x15, 0x00, 0x15, 0x06, 0x15, 0x08, 0x1c, 0x36, 0x02, 0x28, 0x08, 0xff, 0x3f, 0x7a, 0x10,
-      0xf3, 0x5a, 0x00, 0x00, 0x18, 0x08, 0x01, 0xc0, 0x85, 0xef, 0x0c, 0xa5, 0xff, 0xff, 0x00,
-      0x00, 0x00, 0xa8, 0x03, 0xf4, 0xa7, 0x01, 0x04, 0x00, 0x00, 0x00, 0x64, 0x01, 0x03, 0x06,
-      0x55, 0x6f, 0xc5, 0xe4, 0x9f, 0x1a, 0x00, 0x00, 0x47, 0x89, 0x0a, 0xe8, 0x58, 0xf0, 0xff,
-      0xff, 0x63, 0xee, 0x21, 0xdd, 0xdd, 0xca, 0xff, 0xff, 0xbe, 0x6f, 0x3b, 0xaa, 0xe9, 0x3d,
-      0x00, 0x00, 0xd6, 0x91, 0x2a, 0xb7, 0x08, 0x02, 0x00, 0x00, 0x75, 0x45, 0x2c, 0xd7, 0x76,
-      0x0c, 0x00, 0x00, 0x54, 0x49, 0x92, 0x44, 0x9c, 0xbf, 0xff, 0xff, 0x41, 0xa9, 0x6d, 0xec,
-      0x7a, 0xd0, 0xff, 0xff, 0x27, 0xa0, 0x23, 0x41, 0x44, 0xc1, 0xff, 0xff, 0x18, 0xd4, 0xe1,
-      0x30, 0xd3, 0xe0, 0xff, 0xff, 0x59, 0xac, 0x14, 0xf4, 0xec, 0x58, 0x00, 0x00, 0x2c, 0x17,
-      0x29, 0x57, 0x44, 0x13, 0x00, 0x00, 0xa2, 0x0d, 0x4a, 0xcc, 0x63, 0xff, 0xff, 0xff, 0x81,
-      0x33, 0xbc, 0xda, 0xd5, 0xda, 0xff, 0xff, 0x4c, 0x05, 0xf4, 0x78, 0x19, 0xea, 0xff, 0xff,
-      0x06, 0x71, 0x25, 0xde, 0x5a, 0xaf, 0xff, 0xff, 0x95, 0x32, 0x5f, 0x76, 0x98, 0xb3, 0xff,
-      0xff, 0xf1, 0x34, 0x3c, 0xbf, 0xa8, 0xbe, 0xff, 0xff, 0x27, 0x73, 0x40, 0x0c, 0x7d, 0xcd,
-      0xff, 0xff, 0x68, 0xa9, 0xc2, 0xe9, 0x2c, 0x03, 0x00, 0x00, 0x3f, 0x79, 0xd9, 0x04, 0x8c,
-      0xe5, 0xff, 0xff, 0x91, 0xb4, 0x9b, 0xe3, 0x8f, 0x21, 0x00, 0x00, 0xb8, 0x20, 0xc8, 0xc2,
-      0x4d, 0xa6, 0xff, 0xff, 0x47, 0xfa, 0xde, 0x36, 0x4a, 0xf3, 0xff, 0xff, 0x72, 0x80, 0x94,
-      0x59, 0xdd, 0x4e, 0x00, 0x00, 0x29, 0xe4, 0xd6, 0x43, 0xb0, 0xf0, 0xff, 0xff, 0x68, 0x36,
-      0xbc, 0x2d, 0xd1, 0xa9, 0xff, 0xff, 0xbc, 0xe4, 0xbe, 0xd7, 0xed, 0x1b, 0x00, 0x00, 0x02,
-      0x8b, 0xcb, 0xd7, 0xed, 0x47, 0x00, 0x00, 0x3c, 0x06, 0xe4, 0xda, 0xc7, 0x47, 0x00, 0x00,
-      0xf3, 0x39, 0x55, 0x28, 0x97, 0xba, 0xff, 0xff, 0x07, 0x79, 0x38, 0x4e, 0xe0, 0x21, 0x00,
-      0x00, 0xde, 0xed, 0x1c, 0x23, 0x09, 0x49, 0x00, 0x00, 0x49, 0x46, 0x49, 0x5d, 0x8f, 0x34,
-      0x00, 0x00, 0x38, 0x18, 0x50, 0xf6, 0xa1, 0x11, 0x00, 0x00, 0xdf, 0xb8, 0x19, 0x14, 0xd1,
-      0xe1, 0xff, 0xff, 0x2c, 0x56, 0x72, 0x93, 0x64, 0x3f, 0x00, 0x00, 0x1c, 0xe0, 0xbe, 0x87,
-      0x7d, 0xf9, 0xff, 0xff, 0x73, 0x0e, 0x3c, 0x01, 0x91, 0xf9, 0xff, 0xff, 0xb2, 0x37, 0x85,
-      0x81, 0x5f, 0x54, 0x00, 0x00, 0x58, 0x44, 0xb0, 0x1a, 0xac, 0xbb, 0xff, 0xff, 0x36, 0xbf,
-      0xbe, 0x5e, 0x22, 0xff, 0xff, 0xff, 0x06, 0x20, 0xa0, 0x23, 0x0d, 0x3b, 0x00, 0x00, 0x19,
-      0xc6, 0x49, 0x0a, 0x00, 0xcf, 0xff, 0xff, 0x4f, 0xcd, 0xc6, 0x95, 0x4b, 0xf1, 0xff, 0xff,
-      0xa3, 0x59, 0xaf, 0x65, 0xec, 0xe9, 0xff, 0xff, 0x58, 0xef, 0x05, 0x50, 0x63, 0xe4, 0xff,
-      0xff, 0xc7, 0x6a, 0x9e, 0xf1, 0x69, 0x20, 0x00, 0x00, 0xd1, 0xb3, 0xc9, 0x14, 0xb2, 0x29,
-      0x00, 0x00, 0x1d, 0x48, 0x16, 0x70, 0xf0, 0x40, 0x00, 0x00, 0x01, 0xc0, 0x85, 0xef, 0x0c,
-      0xa5, 0xff, 0xff, 0xff, 0x3f, 0x7a, 0x10, 0xf3, 0x5a, 0x00, 0x00, 0x15, 0x00, 0x15, 0x90,
-      0x0d, 0x15, 0x9a, 0x0d, 0x2c, 0x15, 0x6a, 0x15, 0x00, 0x15, 0x06, 0x15, 0x08, 0x1c, 0x36,
-      0x02, 0x28, 0x10, 0x4b, 0x3b, 0x4c, 0xa8, 0x5a, 0x86, 0xc4, 0x7a, 0x09, 0x8a, 0x22, 0x3f,
-      0xff, 0xff, 0xff, 0xff, 0x18, 0x10, 0xb4, 0xc4, 0xb3, 0x57, 0xa5, 0x79, 0x3b, 0x85, 0xf6,
-      0x75, 0xdd, 0xc0, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xc8, 0x06, 0xf4, 0x47, 0x03,
-      0x04, 0x00, 0x00, 0x00, 0x64, 0x01, 0x03, 0x06, 0x05, 0x49, 0xf7, 0xfc, 0x89, 0x3d, 0x3e,
-      0x20, 0x07, 0x72, 0x3e, 0xa1, 0x66, 0x81, 0x67, 0x80, 0x23, 0x78, 0x06, 0x68, 0x0e, 0x78,
-      0xf5, 0x08, 0xed, 0x20, 0xcd, 0x0e, 0x7f, 0x9c, 0x70, 0xa0, 0xb9, 0x16, 0x44, 0xb2, 0x41,
-      0x62, 0xba, 0x82, 0xad, 0xe1, 0x12, 0x9b, 0xa6, 0x53, 0x8d, 0x20, 0x27, 0xd5, 0x84, 0x63,
-      0xb8, 0x07, 0x4b, 0x5b, 0xa4, 0x1c, 0xa4, 0x1c, 0x17, 0xbf, 0x4b, 0x00, 0x24, 0x04, 0x56,
-      0xa8, 0x52, 0xaf, 0x33, 0xf7, 0xad, 0x7c, 0xc8, 0x83, 0x25, 0x13, 0xaf, 0x80, 0x25, 0x6f,
-      0xbd, 0xd1, 0x15, 0x69, 0x64, 0x20, 0x7b, 0xd7, 0x33, 0xba, 0x66, 0x29, 0x8a, 0x00, 0xda,
-      0x42, 0x07, 0x2c, 0x6c, 0x39, 0x76, 0x9f, 0xdc, 0x17, 0xad, 0xb6, 0x58, 0xdf, 0x5f, 0x00,
-      0x18, 0x3a, 0xae, 0x1c, 0xd6, 0x5f, 0x9d, 0x78, 0x8d, 0x73, 0xdd, 0x3e, 0xd6, 0x18, 0x33,
-      0x40, 0xe4, 0x36, 0xde, 0xb0, 0xb7, 0x33, 0x2a, 0x6b, 0x08, 0x03, 0x6c, 0x6d, 0x8f, 0x13,
-      0x93, 0xd0, 0xd7, 0x87, 0x62, 0x63, 0x53, 0xfb, 0xd8, 0xbb, 0xc9, 0x54, 0x90, 0xd6, 0xa9,
-      0x8f, 0xc8, 0x60, 0xbd, 0xec, 0x75, 0x23, 0x9a, 0x21, 0xec, 0xe4, 0x86, 0x43, 0xd7, 0xc1,
-      0x88, 0xdc, 0x82, 0x00, 0x32, 0x79, 0xc9, 0x2b, 0x70, 0x85, 0xb7, 0x25, 0xa1, 0xcc, 0x7d,
-      0x0b, 0x29, 0x03, 0xea, 0x80, 0xff, 0x9b, 0xf3, 0x24, 0x7f, 0xd1, 0xff, 0xf0, 0x22, 0x65,
-      0x85, 0x99, 0x17, 0x63, 0xc2, 0xc0, 0xb7, 0x62, 0x05, 0xda, 0x7a, 0xa0, 0xc3, 0x2a, 0x6f,
-      0x1f, 0xee, 0x1f, 0x31, 0xa8, 0x42, 0x80, 0xe4, 0xb7, 0x6c, 0xf6, 0xac, 0x47, 0xb0, 0x17,
-      0x69, 0xcb, 0xff, 0x66, 0x8a, 0xd6, 0x25, 0x00, 0xf3, 0xcf, 0x0a, 0xaf, 0xf8, 0x92, 0x8a,
-      0xa0, 0xdf, 0x71, 0x13, 0x8d, 0x9d, 0xff, 0x7e, 0xe0, 0x0a, 0x52, 0xf1, 0x97, 0x01, 0xa9,
-      0x73, 0x27, 0xfd, 0x63, 0x58, 0x00, 0x32, 0xa6, 0xf6, 0x78, 0xb8, 0xe4, 0xfd, 0x20, 0x7c,
-      0x90, 0xee, 0xad, 0x8c, 0xc9, 0x71, 0x35, 0x66, 0x71, 0x3c, 0xe0, 0xe4, 0x0b, 0xbb, 0xa0,
-      0x50, 0xe9, 0xf2, 0x81, 0x1d, 0x3a, 0x95, 0x94, 0x00, 0xd5, 0x49, 0x00, 0x07, 0xdf, 0x21,
-      0x53, 0x36, 0x8d, 0x9e, 0xd9, 0xa5, 0x52, 0x4d, 0x0d, 0x29, 0x74, 0xf0, 0x40, 0xbd, 0xda,
-      0x63, 0x4e, 0xdd, 0x91, 0x8e, 0xa6, 0xa7, 0xf6, 0x78, 0x58, 0x3b, 0x0a, 0x5c, 0x60, 0x3c,
-      0x15, 0x34, 0xf8, 0x2c, 0x21, 0xe3, 0x56, 0x1b, 0x9e, 0xd9, 0x56, 0xd3, 0x13, 0x2e, 0x80,
-      0x2c, 0x36, 0xda, 0x1d, 0xc8, 0xfb, 0x52, 0xee, 0x17, 0xb3, 0x2b, 0xf3, 0xd2, 0xeb, 0x29,
-      0xa0, 0x37, 0xa0, 0x12, 0xce, 0x1c, 0x50, 0x6a, 0xf4, 0x11, 0xcd, 0x96, 0x88, 0x3f, 0x43,
-      0x78, 0xc0, 0x2c, 0x53, 0x6c, 0xa6, 0xdf, 0xb9, 0x9e, 0x93, 0xd4, 0x1e, 0xa9, 0x7f, 0x67,
-      0xa6, 0xc1, 0x80, 0x46, 0x0f, 0x63, 0x7d, 0x15, 0xf2, 0x4c, 0xc5, 0xda, 0x11, 0x9a, 0x20,
-      0x67, 0x27, 0xe8, 0x00, 0xec, 0x03, 0x1d, 0x15, 0xa7, 0x92, 0xb3, 0x1f, 0xda, 0x20, 0x92,
-      0xd8, 0x00, 0xfb, 0x06, 0x80, 0xeb, 0x4b, 0x0c, 0xc1, 0x1f, 0x49, 0x40, 0x06, 0x8d, 0x8a,
-      0xf8, 0x34, 0xb1, 0x0c, 0x1d, 0x20, 0xd0, 0x47, 0xe5, 0xb1, 0x7e, 0xf7, 0xe4, 0xb4, 0x7e,
-      0x9c, 0x84, 0x18, 0x61, 0x32, 0x4f, 0xc0, 0xc2, 0xb2, 0xcc, 0x63, 0xf6, 0xe1, 0x16, 0xd6,
-      0xd9, 0x4b, 0x74, 0x13, 0x01, 0xa1, 0xe2, 0x00, 0xb7, 0x9e, 0xc1, 0x3a, 0xc5, 0xaf, 0xe8,
-      0x54, 0x07, 0x2a, 0x20, 0xfd, 0x2c, 0x6f, 0xb9, 0x80, 0x18, 0x92, 0x87, 0xa0, 0x81, 0x24,
-      0x60, 0x47, 0x17, 0x4f, 0xbc, 0xbe, 0xf5, 0x03, 0x69, 0x80, 0xe3, 0x10, 0x54, 0xd6, 0x68,
-      0x7d, 0x75, 0xd3, 0x0a, 0x45, 0x38, 0x9e, 0xa9, 0xfd, 0x05, 0x40, 0xd2, 0x1e, 0x6f, 0x5c,
-      0x30, 0x10, 0xfe, 0x9b, 0x9f, 0x6d, 0xc0, 0x9d, 0x6c, 0x17, 0x7d, 0x00, 0x09, 0xb6, 0x8a,
-      0x31, 0x8e, 0x1b, 0x6b, 0x84, 0x1e, 0x79, 0xce, 0x10, 0x55, 0x59, 0x6a, 0x40, 0x16, 0xdc,
-      0x9a, 0xcf, 0x4d, 0xb0, 0x8f, 0xac, 0xe3, 0x8d, 0xee, 0xd2, 0xef, 0x01, 0x8c, 0xe0, 0x2b,
-      0x24, 0xe5, 0xb4, 0xe1, 0x86, 0x72, 0x00, 0x30, 0x07, 0xce, 0x02, 0x23, 0x41, 0x33, 0x40,
-      0xf0, 0x9b, 0xc2, 0x2d, 0x30, 0xec, 0x3b, 0x17, 0xb2, 0x8f, 0x64, 0x7d, 0xcd, 0x70, 0x9e,
-      0x80, 0x22, 0xb5, 0xdf, 0x6d, 0x2a, 0x43, 0xd4, 0x2b, 0x5a, 0xf6, 0x96, 0xa6, 0xea, 0x91,
-      0x62, 0x80, 0x39, 0xf2, 0x5a, 0x8e, 0xc0, 0xb9, 0x29, 0x99, 0x17, 0xe7, 0x35, 0x2c, 0xf6,
-      0x4d, 0x18, 0x00, 0x48, 0x10, 0x85, 0xb4, 0x3f, 0x89, 0x60, 0x49, 0x6e, 0xf0, 0xcd, 0x9d,
-      0x92, 0xeb, 0x96, 0x80, 0xcf, 0xf9, 0xf1, 0x46, 0x1d, 0xc0, 0x49, 0xb3, 0x36, 0x2e, 0x24,
-      0xc8, 0xdb, 0x41, 0x72, 0x20, 0xf5, 0xde, 0x5c, 0xf9, 0x4a, 0x6e, 0xa0, 0x0b, 0x13, 0xfc,
-      0x2d, 0x17, 0x07, 0x16, 0x5e, 0x00, 0x3c, 0x54, 0x41, 0x0e, 0xa2, 0x0d, 0xf3, 0x48, 0x12,
-      0x2e, 0x7c, 0xab, 0x3c, 0x59, 0x1c, 0x40, 0xca, 0xb0, 0x71, 0xc7, 0x29, 0xf0, 0xbb, 0x9f,
-      0xf4, 0x3f, 0x25, 0x49, 0xad, 0xc2, 0x8f, 0x80, 0x04, 0x38, 0x6d, 0x35, 0x02, 0xca, 0xe6,
-      0x02, 0x83, 0x89, 0x4e, 0x74, 0xdb, 0x08, 0x5a, 0x80, 0x13, 0x99, 0xd4, 0x26, 0xc1, 0x27,
-      0xce, 0xb0, 0x98, 0x99, 0xca, 0xf6, 0x3e, 0x50, 0x49, 0xd0, 0xbf, 0xcb, 0x6f, 0xbe, 0x5b,
-      0x92, 0x63, 0xde, 0x94, 0xd3, 0x8f, 0x07, 0x06, 0x0f, 0x2b, 0x80, 0x36, 0xf1, 0x77, 0xf6,
-      0x29, 0x33, 0x13, 0xa9, 0x4a, 0x55, 0x3d, 0x6c, 0xca, 0xdb, 0x4e, 0x40, 0xc4, 0x95, 0x54,
-      0xf4, 0xe2, 0x8c, 0x1b, 0xa0, 0xfe, 0x30, 0x50, 0x9d, 0x62, 0xbc, 0x5c, 0x00, 0xb4, 0xc4,
-      0xb3, 0x57, 0xa5, 0x79, 0x3b, 0x85, 0xf6, 0x75, 0xdd, 0xc0, 0x00, 0x00, 0x00, 0x01, 0x4b,
-      0x3b, 0x4c, 0xa8, 0x5a, 0x86, 0xc4, 0x7a, 0x09, 0x8a, 0x22, 0x3f, 0xff, 0xff, 0xff, 0xff,
-      0x15, 0x02, 0x19, 0x4c, 0x48, 0x0c, 0x73, 0x70, 0x61, 0x72, 0x6b, 0x5f, 0x73, 0x63, 0x68,
-      0x65, 0x6d, 0x61, 0x15, 0x06, 0x00, 0x15, 0x02, 0x25, 0x02, 0x18, 0x06, 0x64, 0x65, 0x63,
-      0x37, 0x70, 0x34, 0x25, 0x0a, 0x15, 0x08, 0x15, 0x0e, 0x00, 0x15, 0x04, 0x25, 0x02, 0x18,
-      0x07, 0x64, 0x65, 0x63, 0x31, 0x34, 0x70, 0x35, 0x25, 0x0a, 0x15, 0x0a, 0x15, 0x1c, 0x00,
-      0x15, 0x0e, 0x15, 0x20, 0x15, 0x02, 0x18, 0x08, 0x64, 0x65, 0x63, 0x33, 0x38, 0x70, 0x31,
-      0x38, 0x25, 0x0a, 0x15, 0x24, 0x15, 0x4c, 0x00, 0x16, 0x6a, 0x19, 0x1c, 0x19, 0x3c, 0x26,
-      0x08, 0x1c, 0x15, 0x02, 0x19, 0x35, 0x06, 0x08, 0x00, 0x19, 0x18, 0x06, 0x64, 0x65, 0x63,
-      0x37, 0x70, 0x34, 0x15, 0x02, 0x16, 0x6a, 0x16, 0xf6, 0x03, 0x16, 0xfe, 0x03, 0x26, 0x08,
-      0x3c, 0x36, 0x02, 0x28, 0x04, 0x7f, 0x96, 0x98, 0x00, 0x18, 0x04, 0x81, 0x69, 0x67, 0xff,
-      0x00, 0x19, 0x1c, 0x15, 0x00, 0x15, 0x00, 0x15, 0x02, 0x00, 0x00, 0x00, 0x26, 0x86, 0x04,
-      0x1c, 0x15, 0x04, 0x19, 0x35, 0x06, 0x08, 0x00, 0x19, 0x18, 0x07, 0x64, 0x65, 0x63, 0x31,
-      0x34, 0x70, 0x35, 0x15, 0x02, 0x16, 0x6a, 0x16, 0xa6, 0x07, 0x16, 0xb0, 0x07, 0x26, 0x86,
-      0x04, 0x3c, 0x36, 0x02, 0x28, 0x08, 0xff, 0x3f, 0x7a, 0x10, 0xf3, 0x5a, 0x00, 0x00, 0x18,
-      0x08, 0x01, 0xc0, 0x85, 0xef, 0x0c, 0xa5, 0xff, 0xff, 0x00, 0x19, 0x1c, 0x15, 0x00, 0x15,
-      0x00, 0x15, 0x02, 0x00, 0x00, 0x00, 0x26, 0xb6, 0x0b, 0x1c, 0x15, 0x0e, 0x19, 0x35, 0x06,
-      0x08, 0x00, 0x19, 0x18, 0x08, 0x64, 0x65, 0x63, 0x33, 0x38, 0x70, 0x31, 0x38, 0x15, 0x02,
-      0x16, 0x6a, 0x16, 0x86, 0x0e, 0x16, 0x90, 0x0e, 0x26, 0xb6, 0x0b, 0x3c, 0x36, 0x02, 0x28,
-      0x10, 0x4b, 0x3b, 0x4c, 0xa8, 0x5a, 0x86, 0xc4, 0x7a, 0x09, 0x8a, 0x22, 0x3f, 0xff, 0xff,
-      0xff, 0xff, 0x18, 0x10, 0xb4, 0xc4, 0xb3, 0x57, 0xa5, 0x79, 0x3b, 0x85, 0xf6, 0x75, 0xdd,
-      0xc0, 0x00, 0x00, 0x00, 0x01, 0x00, 0x19, 0x1c, 0x15, 0x00, 0x15, 0x00, 0x15, 0x02, 0x00,
-      0x00, 0x00, 0x16, 0xa2, 0x19, 0x16, 0x6a, 0x00, 0x19, 0x2c, 0x18, 0x18, 0x6f, 0x72, 0x67,
-      0x2e, 0x61, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2e, 0x73, 0x70, 0x61, 0x72, 0x6b, 0x2e, 0x76,
-      0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x05, 0x33, 0x2e, 0x30, 0x2e, 0x31, 0x00, 0x18,
-      0x29, 0x6f, 0x72, 0x67, 0x2e, 0x61, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2e, 0x73, 0x70, 0x61,
-      0x72, 0x6b, 0x2e, 0x73, 0x71, 0x6c, 0x2e, 0x70, 0x61, 0x72, 0x71, 0x75, 0x65, 0x74, 0x2e,
-      0x72, 0x6f, 0x77, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0xf4, 0x01,
-      0x7b, 0x22, 0x74, 0x79, 0x70, 0x65, 0x22, 0x3a, 0x22, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74,
-      0x22, 0x2c, 0x22, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x22, 0x3a, 0x5b, 0x7b, 0x22, 0x6e,
-      0x61, 0x6d, 0x65, 0x22, 0x3a, 0x22, 0x64, 0x65, 0x63, 0x37, 0x70, 0x34, 0x22, 0x2c, 0x22,
-      0x74, 0x79, 0x70, 0x65, 0x22, 0x3a, 0x22, 0x64, 0x65, 0x63, 0x69, 0x6d, 0x61, 0x6c, 0x28,
-      0x37, 0x2c, 0x34, 0x29, 0x22, 0x2c, 0x22, 0x6e, 0x75, 0x6c, 0x6c, 0x61, 0x62, 0x6c, 0x65,
-      0x22, 0x3a, 0x74, 0x72, 0x75, 0x65, 0x2c, 0x22, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74,
-      0x61, 0x22, 0x3a, 0x7b, 0x7d, 0x7d, 0x2c, 0x7b, 0x22, 0x6e, 0x61, 0x6d, 0x65, 0x22, 0x3a,
-      0x22, 0x64, 0x65, 0x63, 0x31, 0x34, 0x70, 0x35, 0x22, 0x2c, 0x22, 0x74, 0x79, 0x70, 0x65,
-      0x22, 0x3a, 0x22, 0x64, 0x65, 0x63, 0x69, 0x6d, 0x61, 0x6c, 0x28, 0x31, 0x34, 0x2c, 0x35,
-      0x29, 0x22, 0x2c, 0x22, 0x6e, 0x75, 0x6c, 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x22, 0x3a, 0x74,
-      0x72, 0x75, 0x65, 0x2c, 0x22, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x22, 0x3a,
-      0x7b, 0x7d, 0x7d, 0x2c, 0x7b, 0x22, 0x6e, 0x61, 0x6d, 0x65, 0x22, 0x3a, 0x22, 0x64, 0x65,
-      0x63, 0x33, 0x38, 0x70, 0x31, 0x38, 0x22, 0x2c, 0x22, 0x74, 0x79, 0x70, 0x65, 0x22, 0x3a,
-      0x22, 0x64, 0x65, 0x63, 0x69, 0x6d, 0x61, 0x6c, 0x28, 0x33, 0x38, 0x2c, 0x31, 0x38, 0x29,
-      0x22, 0x2c, 0x22, 0x6e, 0x75, 0x6c, 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x22, 0x3a, 0x74, 0x72,
-      0x75, 0x65, 0x2c, 0x22, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x22, 0x3a, 0x7b,
-      0x7d, 0x7d, 0x5d, 0x7d, 0x00, 0x18, 0x4a, 0x70, 0x61, 0x72, 0x71, 0x75, 0x65, 0x74, 0x2d,
-      0x6d, 0x72, 0x20, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x20, 0x31, 0x2e, 0x31, 0x30,
-      0x2e, 0x31, 0x20, 0x28, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x20, 0x61, 0x38, 0x39, 0x64, 0x66,
-      0x38, 0x66, 0x39, 0x39, 0x33, 0x32, 0x62, 0x36, 0x65, 0x66, 0x36, 0x36, 0x33, 0x33, 0x64,
-      0x30, 0x36, 0x30, 0x36, 0x39, 0x65, 0x35, 0x30, 0x63, 0x39, 0x62, 0x37, 0x39, 0x37, 0x30,
-      0x62, 0x65, 0x62, 0x64, 0x31, 0x29, 0x19, 0x3c, 0x1c, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x1c,
-      0x00, 0x00, 0x00, 0xd3, 0x02, 0x00, 0x00, 0x50, 0x41, 0x52, 0x31};
-    unsigned int decimals_parquet_len = 2366;
-
-    cudf::io::parquet_reader_options read_opts = cudf::io::parquet_reader_options::builder(
-      cudf::io::source_info{reinterpret_cast<char const*>(decimals_parquet), decimals_parquet_len});
-    auto result = cudf::io::read_parquet(read_opts);
-
-    auto validity =
-      cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 50; });
-
-    EXPECT_EQ(result.tbl->view().num_columns(), 3);
-
-    int32_t col0_data[] = {
-      -2354584, -190275,  8393572,  6446515,  -5687920, -1843550, -6897687, -6780385, 3428529,
-      5842056,  -4312278, -4450603, -7516141, 2974667,  -4288640, 1065090,  -9410428, 7891355,
-      1076244,  -1975984, 6999466,  2666959,  9262967,  7931374,  -1370640, 451074,   8799111,
-      3026424,  -6803730, 5098297,  -1414370, -9662848, 2499991,  658765,   8348874,  -6177036,
-      -9694494, -5343299, 3558393,  -8789072, 2697890,  -4454707, 8299309,  -6223703, -3112513,
-      7537487,  825776,   -495683,  328299,   -4529727, 0,        -9999999, 9999999};
-
-    EXPECT_EQ(static_cast<std::size_t>(result.tbl->view().column(0).size()),
-              sizeof(col0_data) / sizeof(col0_data[0]));
-    cudf::test::fixed_point_column_wrapper<int32_t> col0(
-      std::begin(col0_data), std::end(col0_data), validity, numeric::scale_type{-4});
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), col0);
-
-    int64_t col1_data[] = {29274040266581,  -17210335917753, -58420730139037,
-                           68073792696254,  2236456014294,   13704555677045,
-                           -70797090469548, -52248605513407, -68976081919961,
-                           -34277313883112, 97774730521689,  21184241014572,
-                           -670882460254,   -40862944054399, -24079852370612,
-                           -88670167797498, -84007574359403, -71843004533519,
-                           -55538016554201, 3491435293032,   -29085437167297,
-                           36901882672273,  -98622066122568, -13974902998457,
-                           86712597643378,  -16835133643735, -94759096142232,
-                           30708340810940,  79086853262082,  78923696440892,
-                           -76316597208589, 37247268714759,  80303592631774,
-                           57790350050889,  19387319851064,  -33186875066145,
-                           69701203023404,  -7157433049060,  -7073790423437,
-                           92769171617714,  -75127120182184, -951893180618,
-                           64927618310150,  -53875897154023, -16168039035569,
-                           -24273449166429, -30359781249192, 35639397345991,
-                           45844829680593,  71401416837149,  0,
-                           -99999999999999, 99999999999999};
-
-    EXPECT_EQ(static_cast<std::size_t>(result.tbl->view().column(1).size()),
-              sizeof(col1_data) / sizeof(col1_data[0]));
-    cudf::test::fixed_point_column_wrapper<int64_t> col1(
-      std::begin(col1_data), std::end(col1_data), validity, numeric::scale_type{-5});
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), col1);
-
-    cudf::io::parquet_reader_options read_strict_opts = read_opts;
-    read_strict_opts.set_columns({"dec7p4", "dec14p5"});
-    EXPECT_NO_THROW(cudf::io::read_parquet(read_strict_opts));
-  }
-  {
-    // dec7p3: Decimal(precision=7, scale=3) backed by FIXED_LENGTH_BYTE_ARRAY(length = 4)
-    // dec12p11: Decimal(precision=12, scale=11) backed by FIXED_LENGTH_BYTE_ARRAY(length = 6)
-    // dec20p1: Decimal(precision=20, scale=1) backed by FIXED_LENGTH_BYTE_ARRAY(length = 9)
-    unsigned char const fixed_len_bytes_decimal_parquet[] = {
-      0x50, 0x41, 0x52, 0x31, 0x15, 0x00, 0x15, 0xA8, 0x01, 0x15, 0xAE, 0x01, 0x2C, 0x15, 0x28,
-      0x15, 0x00, 0x15, 0x06, 0x15, 0x08, 0x1C, 0x36, 0x02, 0x28, 0x04, 0x00, 0x97, 0x45, 0x72,
-      0x18, 0x04, 0x00, 0x01, 0x81, 0x3B, 0x00, 0x00, 0x00, 0x54, 0xF0, 0x53, 0x04, 0x00, 0x00,
-      0x00, 0x26, 0x01, 0x03, 0x00, 0x00, 0x61, 0x10, 0xCF, 0x00, 0x0A, 0xA9, 0x08, 0x00, 0x77,
-      0x58, 0x6F, 0x00, 0x6B, 0xEE, 0xA4, 0x00, 0x92, 0xF8, 0x94, 0x00, 0x2E, 0x18, 0xD4, 0x00,
-      0x4F, 0x45, 0x33, 0x00, 0x97, 0x45, 0x72, 0x00, 0x0D, 0xC2, 0x75, 0x00, 0x76, 0xAA, 0xAA,
-      0x00, 0x30, 0x9F, 0x86, 0x00, 0x4B, 0x9D, 0xB1, 0x00, 0x4E, 0x4B, 0x3B, 0x00, 0x01, 0x81,
-      0x3B, 0x00, 0x22, 0xD4, 0x53, 0x00, 0x72, 0xC4, 0xAF, 0x00, 0x43, 0x9B, 0x72, 0x00, 0x1D,
-      0x91, 0xC3, 0x00, 0x45, 0x27, 0x48, 0x15, 0x00, 0x15, 0xF4, 0x01, 0x15, 0xFA, 0x01, 0x2C,
-      0x15, 0x28, 0x15, 0x00, 0x15, 0x06, 0x15, 0x08, 0x1C, 0x36, 0x02, 0x28, 0x06, 0x00, 0xD5,
-      0xD7, 0x31, 0x99, 0xA6, 0x18, 0x06, 0xFF, 0x17, 0x2B, 0x5A, 0xF0, 0x01, 0x00, 0x00, 0x00,
-      0x7A, 0xF0, 0x79, 0x04, 0x00, 0x00, 0x00, 0x24, 0x01, 0x03, 0x02, 0x00, 0x54, 0x23, 0xCF,
-      0x13, 0x0A, 0x00, 0x07, 0x22, 0xB1, 0x21, 0x7E, 0x00, 0x64, 0x19, 0xD6, 0xD2, 0xA5, 0x00,
-      0x61, 0x7F, 0xF6, 0xB9, 0xB0, 0x00, 0xD0, 0x7F, 0x9C, 0xA9, 0xE9, 0x00, 0x65, 0x58, 0xF0,
-      0xAD, 0xFB, 0x00, 0xBC, 0x61, 0xE2, 0x03, 0xDA, 0xFF, 0x17, 0x2B, 0x5A, 0xF0, 0x01, 0x00,
-      0x63, 0x4B, 0x4C, 0xFE, 0x45, 0x00, 0x7A, 0xA0, 0xD8, 0xD1, 0xC0, 0x00, 0xC0, 0x63, 0xF7,
-      0x9D, 0x0A, 0x00, 0x88, 0x22, 0x0F, 0x1B, 0x25, 0x00, 0x1A, 0x80, 0x56, 0x34, 0xC7, 0x00,
-      0x5F, 0x48, 0x61, 0x09, 0x7C, 0x00, 0x61, 0xEF, 0x92, 0x42, 0x2F, 0x00, 0xD5, 0xD7, 0x31,
-      0x99, 0xA6, 0xFF, 0x17, 0x2B, 0x5A, 0xF0, 0x01, 0x00, 0x71, 0xDD, 0xE2, 0x22, 0x7B, 0x00,
-      0x54, 0xBF, 0xAE, 0xE9, 0x3C, 0x15, 0x00, 0x15, 0xD4, 0x02, 0x15, 0xDC, 0x02, 0x2C, 0x15,
-      0x28, 0x15, 0x00, 0x15, 0x06, 0x15, 0x08, 0x1C, 0x36, 0x04, 0x28, 0x09, 0x00, 0x7D, 0xFE,
-      0x02, 0xDA, 0xB2, 0x62, 0xA3, 0xFB, 0x18, 0x09, 0x00, 0x03, 0x9C, 0xCD, 0x5A, 0xAC, 0xBB,
-      0xF1, 0xE3, 0x00, 0x00, 0x00, 0xAA, 0x01, 0xF0, 0xA9, 0x04, 0x00, 0x00, 0x00, 0x07, 0xBF,
-      0xBF, 0x0F, 0x00, 0x7D, 0xFE, 0x02, 0xDA, 0xB2, 0x62, 0xA3, 0xFB, 0x00, 0x7D, 0x9A, 0xCB,
-      0xDA, 0x4B, 0x10, 0x8B, 0xAC, 0x00, 0x20, 0xBA, 0x97, 0x87, 0x2E, 0x3B, 0x4E, 0x04, 0x00,
-      0x15, 0xBB, 0xC2, 0xDF, 0x2D, 0x25, 0x08, 0xB6, 0x00, 0x5C, 0x67, 0x0E, 0x36, 0x30, 0xF1,
-      0xAC, 0xA4, 0x00, 0x44, 0xF1, 0x8E, 0xFB, 0x17, 0x5E, 0xE1, 0x96, 0x00, 0x64, 0x69, 0xF9,
-      0x66, 0x3F, 0x11, 0xED, 0xB9, 0x00, 0x45, 0xB5, 0xDA, 0x14, 0x9C, 0xA3, 0xFA, 0x64, 0x00,
-      0x26, 0x5F, 0xDE, 0xD7, 0x67, 0x95, 0xEF, 0xB1, 0x00, 0x35, 0xDB, 0x9B, 0x88, 0x46, 0xD0,
-      0xA1, 0x0E, 0x00, 0x45, 0xA9, 0x92, 0x8E, 0x89, 0xD1, 0xAC, 0x4C, 0x00, 0x4C, 0xF1, 0xCB,
-      0x27, 0x82, 0x3A, 0x7D, 0xB7, 0x00, 0x64, 0xD3, 0xD2, 0x2F, 0x9C, 0x83, 0x16, 0x75, 0x00,
-      0x15, 0xDF, 0xC2, 0xA9, 0x63, 0xB8, 0x33, 0x65, 0x00, 0x27, 0x40, 0x28, 0x97, 0x05, 0x8E,
-      0xE3, 0x46, 0x00, 0x03, 0x9C, 0xCD, 0x5A, 0xAC, 0xBB, 0xF1, 0xE3, 0x00, 0x22, 0x23, 0xF5,
-      0xE8, 0x9D, 0x55, 0xD4, 0x9C, 0x00, 0x25, 0xB9, 0xD8, 0x87, 0x2D, 0xF1, 0xF2, 0x17, 0x15,
-      0x02, 0x19, 0x4C, 0x48, 0x0C, 0x73, 0x70, 0x61, 0x72, 0x6B, 0x5F, 0x73, 0x63, 0x68, 0x65,
-      0x6D, 0x61, 0x15, 0x06, 0x00, 0x15, 0x0E, 0x15, 0x08, 0x15, 0x02, 0x18, 0x06, 0x64, 0x65,
-      0x63, 0x37, 0x70, 0x33, 0x25, 0x0A, 0x15, 0x06, 0x15, 0x0E, 0x00, 0x15, 0x0E, 0x15, 0x0C,
-      0x15, 0x02, 0x18, 0x08, 0x64, 0x65, 0x63, 0x31, 0x32, 0x70, 0x31, 0x31, 0x25, 0x0A, 0x15,
-      0x16, 0x15, 0x18, 0x00, 0x15, 0x0E, 0x15, 0x12, 0x15, 0x02, 0x18, 0x07, 0x64, 0x65, 0x63,
-      0x32, 0x30, 0x70, 0x31, 0x25, 0x0A, 0x15, 0x02, 0x15, 0x28, 0x00, 0x16, 0x28, 0x19, 0x1C,
-      0x19, 0x3C, 0x26, 0x08, 0x1C, 0x15, 0x0E, 0x19, 0x35, 0x06, 0x08, 0x00, 0x19, 0x18, 0x06,
-      0x64, 0x65, 0x63, 0x37, 0x70, 0x33, 0x15, 0x02, 0x16, 0x28, 0x16, 0xEE, 0x01, 0x16, 0xF4,
-      0x01, 0x26, 0x08, 0x3C, 0x36, 0x02, 0x28, 0x04, 0x00, 0x97, 0x45, 0x72, 0x18, 0x04, 0x00,
-      0x01, 0x81, 0x3B, 0x00, 0x19, 0x1C, 0x15, 0x00, 0x15, 0x00, 0x15, 0x02, 0x00, 0x00, 0x00,
-      0x26, 0xFC, 0x01, 0x1C, 0x15, 0x0E, 0x19, 0x35, 0x06, 0x08, 0x00, 0x19, 0x18, 0x08, 0x64,
-      0x65, 0x63, 0x31, 0x32, 0x70, 0x31, 0x31, 0x15, 0x02, 0x16, 0x28, 0x16, 0xC2, 0x02, 0x16,
-      0xC8, 0x02, 0x26, 0xFC, 0x01, 0x3C, 0x36, 0x02, 0x28, 0x06, 0x00, 0xD5, 0xD7, 0x31, 0x99,
-      0xA6, 0x18, 0x06, 0xFF, 0x17, 0x2B, 0x5A, 0xF0, 0x01, 0x00, 0x19, 0x1C, 0x15, 0x00, 0x15,
-      0x00, 0x15, 0x02, 0x00, 0x00, 0x00, 0x26, 0xC4, 0x04, 0x1C, 0x15, 0x0E, 0x19, 0x35, 0x06,
-      0x08, 0x00, 0x19, 0x18, 0x07, 0x64, 0x65, 0x63, 0x32, 0x30, 0x70, 0x31, 0x15, 0x02, 0x16,
-      0x28, 0x16, 0xAE, 0x03, 0x16, 0xB6, 0x03, 0x26, 0xC4, 0x04, 0x3C, 0x36, 0x04, 0x28, 0x09,
-      0x00, 0x7D, 0xFE, 0x02, 0xDA, 0xB2, 0x62, 0xA3, 0xFB, 0x18, 0x09, 0x00, 0x03, 0x9C, 0xCD,
-      0x5A, 0xAC, 0xBB, 0xF1, 0xE3, 0x00, 0x19, 0x1C, 0x15, 0x00, 0x15, 0x00, 0x15, 0x02, 0x00,
-      0x00, 0x00, 0x16, 0xDE, 0x07, 0x16, 0x28, 0x00, 0x19, 0x2C, 0x18, 0x18, 0x6F, 0x72, 0x67,
-      0x2E, 0x61, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2E, 0x73, 0x70, 0x61, 0x72, 0x6B, 0x2E, 0x76,
-      0x65, 0x72, 0x73, 0x69, 0x6F, 0x6E, 0x18, 0x05, 0x33, 0x2E, 0x30, 0x2E, 0x31, 0x00, 0x18,
-      0x29, 0x6F, 0x72, 0x67, 0x2E, 0x61, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2E, 0x73, 0x70, 0x61,
-      0x72, 0x6B, 0x2E, 0x73, 0x71, 0x6C, 0x2E, 0x70, 0x61, 0x72, 0x71, 0x75, 0x65, 0x74, 0x2E,
-      0x72, 0x6F, 0x77, 0x2E, 0x6D, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0xF4, 0x01,
-      0x7B, 0x22, 0x74, 0x79, 0x70, 0x65, 0x22, 0x3A, 0x22, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74,
-      0x22, 0x2C, 0x22, 0x66, 0x69, 0x65, 0x6C, 0x64, 0x73, 0x22, 0x3A, 0x5B, 0x7B, 0x22, 0x6E,
-      0x61, 0x6D, 0x65, 0x22, 0x3A, 0x22, 0x64, 0x65, 0x63, 0x37, 0x70, 0x33, 0x22, 0x2C, 0x22,
-      0x74, 0x79, 0x70, 0x65, 0x22, 0x3A, 0x22, 0x64, 0x65, 0x63, 0x69, 0x6D, 0x61, 0x6C, 0x28,
-      0x37, 0x2C, 0x33, 0x29, 0x22, 0x2C, 0x22, 0x6E, 0x75, 0x6C, 0x6C, 0x61, 0x62, 0x6C, 0x65,
-      0x22, 0x3A, 0x74, 0x72, 0x75, 0x65, 0x2C, 0x22, 0x6D, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74,
-      0x61, 0x22, 0x3A, 0x7B, 0x7D, 0x7D, 0x2C, 0x7B, 0x22, 0x6E, 0x61, 0x6D, 0x65, 0x22, 0x3A,
-      0x22, 0x64, 0x65, 0x63, 0x31, 0x32, 0x70, 0x31, 0x31, 0x22, 0x2C, 0x22, 0x74, 0x79, 0x70,
-      0x65, 0x22, 0x3A, 0x22, 0x64, 0x65, 0x63, 0x69, 0x6D, 0x61, 0x6C, 0x28, 0x31, 0x32, 0x2C,
-      0x31, 0x31, 0x29, 0x22, 0x2C, 0x22, 0x6E, 0x75, 0x6C, 0x6C, 0x61, 0x62, 0x6C, 0x65, 0x22,
-      0x3A, 0x74, 0x72, 0x75, 0x65, 0x2C, 0x22, 0x6D, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61,
-      0x22, 0x3A, 0x7B, 0x7D, 0x7D, 0x2C, 0x7B, 0x22, 0x6E, 0x61, 0x6D, 0x65, 0x22, 0x3A, 0x22,
-      0x64, 0x65, 0x63, 0x32, 0x30, 0x70, 0x31, 0x22, 0x2C, 0x22, 0x74, 0x79, 0x70, 0x65, 0x22,
-      0x3A, 0x22, 0x64, 0x65, 0x63, 0x69, 0x6D, 0x61, 0x6C, 0x28, 0x32, 0x30, 0x2C, 0x31, 0x29,
-      0x22, 0x2C, 0x22, 0x6E, 0x75, 0x6C, 0x6C, 0x61, 0x62, 0x6C, 0x65, 0x22, 0x3A, 0x74, 0x72,
-      0x75, 0x65, 0x2C, 0x22, 0x6D, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x22, 0x3A, 0x7B,
-      0x7D, 0x7D, 0x5D, 0x7D, 0x00, 0x18, 0x4A, 0x70, 0x61, 0x72, 0x71, 0x75, 0x65, 0x74, 0x2D,
-      0x6D, 0x72, 0x20, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6F, 0x6E, 0x20, 0x31, 0x2E, 0x31, 0x30,
-      0x2E, 0x31, 0x20, 0x28, 0x62, 0x75, 0x69, 0x6C, 0x64, 0x20, 0x61, 0x38, 0x39, 0x64, 0x66,
-      0x38, 0x66, 0x39, 0x39, 0x33, 0x32, 0x62, 0x36, 0x65, 0x66, 0x36, 0x36, 0x33, 0x33, 0x64,
-      0x30, 0x36, 0x30, 0x36, 0x39, 0x65, 0x35, 0x30, 0x63, 0x39, 0x62, 0x37, 0x39, 0x37, 0x30,
-      0x62, 0x65, 0x62, 0x64, 0x31, 0x29, 0x19, 0x3C, 0x1C, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x1C,
-      0x00, 0x00, 0x00, 0xC5, 0x02, 0x00, 0x00, 0x50, 0x41, 0x52, 0x31,
-    };
-
-    unsigned int parquet_len = 1226;
-
-    cudf::io::parquet_reader_options read_opts =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{
-        reinterpret_cast<char const*>(fixed_len_bytes_decimal_parquet), parquet_len});
-    auto result = cudf::io::read_parquet(read_opts);
-    EXPECT_EQ(result.tbl->view().num_columns(), 3);
-
-    auto validity_c0    = cudf::test::iterators::nulls_at({19});
-    int32_t col0_data[] = {6361295, 698632,  7821423, 7073444, 9631892, 3021012, 5195059,
-                           9913714, 901749,  7776938, 3186566, 4955569, 5131067, 98619,
-                           2282579, 7521455, 4430706, 1937859, 4532040, 0};
-
-    EXPECT_EQ(static_cast<std::size_t>(result.tbl->view().column(0).size()),
-              sizeof(col0_data) / sizeof(col0_data[0]));
-    cudf::test::fixed_point_column_wrapper<int32_t> col0(
-      std::begin(col0_data), std::end(col0_data), validity_c0, numeric::scale_type{-3});
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), col0);
-
-    auto validity_c1    = cudf::test::iterators::nulls_at({18});
-    int64_t col1_data[] = {361378026250,
-                           30646804862,
-                           429930238629,
-                           418758703536,
-                           895494171113,
-                           435283865083,
-                           809096053722,
-                           -999999999999,
-                           426465099333,
-                           526684574144,
-                           826310892810,
-                           584686967589,
-                           113822282951,
-                           409236212092,
-                           420631167535,
-                           918438386086,
-                           -999999999999,
-                           489053889147,
-                           0,
-                           363993164092};
-
-    EXPECT_EQ(static_cast<std::size_t>(result.tbl->view().column(1).size()),
-              sizeof(col1_data) / sizeof(col1_data[0]));
-    cudf::test::fixed_point_column_wrapper<int64_t> col1(
-      std::begin(col1_data), std::end(col1_data), validity_c1, numeric::scale_type{-11});
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), col1);
-
-    auto validity_c2       = cudf::test::iterators::nulls_at({6, 14});
-    __int128_t col2_data[] = {9078697037144433659,
-                              9050770539577117612,
-                              2358363961733893636,
-                              1566059559232276662,
-                              6658306200002735268,
-                              4967909073046397334,
-                              0,
-                              7235588493887532473,
-                              5023160741463849572,
-                              2765173712965988273,
-                              3880866513515749646,
-                              5019704400576359500,
-                              5544435986818825655,
-                              7265381725809874549,
-                              0,
-                              1576192427381240677,
-                              2828305195087094598,
-                              260308667809395171,
-                              2460080200895288476,
-                              2718441925197820439};
-
-    EXPECT_EQ(static_cast<std::size_t>(result.tbl->view().column(2).size()),
-              sizeof(col2_data) / sizeof(col2_data[0]));
-    cudf::test::fixed_point_column_wrapper<__int128_t> col2(
-      std::begin(col2_data), std::end(col2_data), validity_c2, numeric::scale_type{-1});
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(2), col2);
-  }
-}
-
-TEST_F(ParquetReaderTest, EmptyOutput)
-{
-  cudf::test::fixed_width_column_wrapper<int> c0;
-  cudf::test::strings_column_wrapper c1;
-  cudf::test::fixed_point_column_wrapper<int> c2({}, numeric::scale_type{2});
-  cudf::test::lists_column_wrapper<float> _c3{{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}};
-  auto c3 = cudf::empty_like(_c3);
-
-  cudf::test::fixed_width_column_wrapper<int> sc0;
-  cudf::test::strings_column_wrapper sc1;
-  cudf::test::lists_column_wrapper<int> _sc2{{1, 2}};
-  std::vector<std::unique_ptr<cudf::column>> struct_children;
-  struct_children.push_back(sc0.release());
-  struct_children.push_back(sc1.release());
-  struct_children.push_back(cudf::empty_like(_sc2));
-  cudf::test::structs_column_wrapper c4(std::move(struct_children));
-
-  table_view expected({c0, c1, c2, *c3, c4});
-
-  // set precision on the decimal column
-  cudf::io::table_input_metadata expected_metadata(expected);
-  expected_metadata.column_metadata[2].set_decimal_precision(1);
-
-  auto filepath = temp_env->get_temp_filepath("EmptyOutput.parquet");
-  cudf::io::parquet_writer_options out_args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
-  out_args.set_metadata(std::move(expected_metadata));
-  cudf::io::write_parquet(out_args);
-
-  cudf::io::parquet_reader_options read_args =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_args);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-}
-
-TEST_F(ParquetWriterTest, RowGroupSizeInvalid)
-{
-  auto const unused_table = std::make_unique<table>();
-  std::vector<char> out_buffer;
-
-  EXPECT_THROW(cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer),
-                                                         unused_table->view())
-                 .row_group_size_rows(0),
-               cudf::logic_error);
-  EXPECT_THROW(cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer),
-                                                         unused_table->view())
-                 .max_page_size_rows(0),
-               cudf::logic_error);
-  EXPECT_THROW(cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer),
-                                                         unused_table->view())
-                 .row_group_size_bytes(3 << 8),
-               cudf::logic_error);
-  EXPECT_THROW(cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer),
-                                                         unused_table->view())
-                 .max_page_size_bytes(3 << 8),
-               cudf::logic_error);
-  EXPECT_THROW(cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer),
-                                                         unused_table->view())
-                 .max_page_size_bytes(0xFFFF'FFFFUL),
-               cudf::logic_error);
-
-  EXPECT_THROW(cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer))
-                 .row_group_size_rows(0),
-               cudf::logic_error);
-  EXPECT_THROW(cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer))
-                 .max_page_size_rows(0),
-               cudf::logic_error);
-  EXPECT_THROW(cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer))
-                 .row_group_size_bytes(3 << 8),
-               cudf::logic_error);
-  EXPECT_THROW(cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer))
-                 .max_page_size_bytes(3 << 8),
-               cudf::logic_error);
-  EXPECT_THROW(cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer))
-                 .max_page_size_bytes(0xFFFF'FFFFUL),
-               cudf::logic_error);
-}
-
-TEST_F(ParquetWriterTest, RowGroupPageSizeMatch)
-{
-  auto const unused_table = std::make_unique<table>();
-  std::vector<char> out_buffer;
-
-  auto options = cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer),
-                                                           unused_table->view())
-                   .row_group_size_bytes(128 * 1024)
-                   .max_page_size_bytes(512 * 1024)
-                   .row_group_size_rows(10000)
-                   .max_page_size_rows(20000)
-                   .build();
-  EXPECT_EQ(options.get_row_group_size_bytes(), options.get_max_page_size_bytes());
-  EXPECT_EQ(options.get_row_group_size_rows(), options.get_max_page_size_rows());
-}
-
-TEST_F(ParquetChunkedWriterTest, RowGroupPageSizeMatch)
-{
-  std::vector<char> out_buffer;
-
-  auto options = cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer))
-                   .row_group_size_bytes(128 * 1024)
-                   .max_page_size_bytes(512 * 1024)
-                   .row_group_size_rows(10000)
-                   .max_page_size_rows(20000)
-                   .build();
-  EXPECT_EQ(options.get_row_group_size_bytes(), options.get_max_page_size_bytes());
-  EXPECT_EQ(options.get_row_group_size_rows(), options.get_max_page_size_rows());
-}
-
-TEST_F(ParquetWriterTest, EmptyList)
-{
-  auto L1 = cudf::make_lists_column(0,
-                                    cudf::make_empty_column(cudf::data_type(cudf::type_id::INT32)),
-                                    cudf::make_empty_column(cudf::data_type{cudf::type_id::INT64}),
-                                    0,
-                                    {});
-  auto L0 = cudf::make_lists_column(
-    3, cudf::test::fixed_width_column_wrapper<int32_t>{0, 0, 0, 0}.release(), std::move(L1), 0, {});
-
-  auto filepath = temp_env->get_temp_filepath("EmptyList.parquet");
-  cudf::io::write_parquet(cudf::io::parquet_writer_options_builder(cudf::io::sink_info(filepath),
-                                                                   cudf::table_view({*L0})));
-
-  auto result = cudf::io::read_parquet(
-    cudf::io::parquet_reader_options_builder(cudf::io::source_info(filepath)));
-
-  using lcw     = cudf::test::lists_column_wrapper<int64_t>;
-  auto expected = lcw{lcw{}, lcw{}, lcw{}};
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), expected);
-}
-
-TEST_F(ParquetWriterTest, DeepEmptyList)
-{
-  // Make a list column LLLi st only L is valid and LLi are all null. This tests whether we can
-  // handle multiple nullptr offsets
-
-  auto L2 = cudf::make_lists_column(0,
-                                    cudf::make_empty_column(cudf::data_type(cudf::type_id::INT32)),
-                                    cudf::make_empty_column(cudf::data_type{cudf::type_id::INT64}),
-                                    0,
-                                    {});
-  auto L1 = cudf::make_lists_column(
-    0, cudf::make_empty_column(cudf::data_type(cudf::type_id::INT32)), std::move(L2), 0, {});
-  auto L0 = cudf::make_lists_column(
-    3, cudf::test::fixed_width_column_wrapper<int32_t>{0, 0, 0, 0}.release(), std::move(L1), 0, {});
-
-  auto filepath = temp_env->get_temp_filepath("DeepEmptyList.parquet");
-  cudf::io::write_parquet(cudf::io::parquet_writer_options_builder(cudf::io::sink_info(filepath),
-                                                                   cudf::table_view({*L0})));
-
-  auto result = cudf::io::read_parquet(
-    cudf::io::parquet_reader_options_builder(cudf::io::source_info(filepath)));
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), *L0);
-}
-
-TEST_F(ParquetWriterTest, EmptyListWithStruct)
-{
-  auto L2 = cudf::make_lists_column(0,
-                                    cudf::make_empty_column(cudf::data_type(cudf::type_id::INT32)),
-                                    cudf::make_empty_column(cudf::data_type{cudf::type_id::INT64}),
-                                    0,
-                                    {});
-
-  auto children = std::vector<std::unique_ptr<cudf::column>>{};
-  children.push_back(std::move(L2));
-  auto S2 = cudf::make_structs_column(0, std::move(children), 0, {});
-  auto L1 = cudf::make_lists_column(
-    0, cudf::make_empty_column(cudf::data_type(cudf::type_id::INT32)), std::move(S2), 0, {});
-  auto L0 = cudf::make_lists_column(
-    3, cudf::test::fixed_width_column_wrapper<int32_t>{0, 0, 0, 0}.release(), std::move(L1), 0, {});
-
-  auto filepath = temp_env->get_temp_filepath("EmptyListWithStruct.parquet");
-  cudf::io::write_parquet(cudf::io::parquet_writer_options_builder(cudf::io::sink_info(filepath),
-                                                                   cudf::table_view({*L0})));
-  auto result = cudf::io::read_parquet(
-    cudf::io::parquet_reader_options_builder(cudf::io::source_info(filepath)));
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), *L0);
-}
-
-TEST_F(ParquetWriterTest, CheckPageRows)
-{
-  auto sequence = thrust::make_counting_iterator(0);
-  auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
-
-  constexpr auto page_rows = 5000;
-  constexpr auto num_rows  = 2 * page_rows;
-  column_wrapper<int> col(sequence, sequence + num_rows, validity);
-
-  auto expected = table_view{{col}};
-
-  auto const filepath = temp_env->get_temp_filepath("CheckPageRows.parquet");
-  const cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .max_page_size_rows(page_rows);
-  cudf::io::write_parquet(out_opts);
-
-  // check first page header and make sure it has only page_rows values
-  auto const source = cudf::io::datasource::create(filepath);
-  cudf::io::parquet::detail::FileMetaData fmd;
-
-  read_footer(source, &fmd);
-  ASSERT_GT(fmd.row_groups.size(), 0);
-  ASSERT_EQ(fmd.row_groups[0].columns.size(), 1);
-  auto const& first_chunk = fmd.row_groups[0].columns[0].meta_data;
-  ASSERT_GT(first_chunk.data_page_offset, 0);
-
-  // read first data page header.  sizeof(PageHeader) is not exact, but the thrift encoded
-  // version should be smaller than size of the struct.
-  auto const ph = read_page_header(
-    source, {first_chunk.data_page_offset, sizeof(cudf::io::parquet::detail::PageHeader), 0});
-
-  EXPECT_EQ(ph.data_page_header.num_values, page_rows);
-}
-
-TEST_F(ParquetWriterTest, CheckPageRowsAdjusted)
-{
-  // enough for a few pages with the default 20'000 rows/page
-  constexpr auto rows_per_page = 20'000;
-  constexpr auto num_rows      = 3 * rows_per_page;
-  const std::string s1(32, 'a');
-  auto col0_elements =
-    cudf::detail::make_counting_transform_iterator(0, [&](auto i) { return s1; });
-  auto col0 = cudf::test::strings_column_wrapper(col0_elements, col0_elements + num_rows);
-
-  auto const expected = table_view{{col0}};
-
-  auto const filepath = temp_env->get_temp_filepath("CheckPageRowsAdjusted.parquet");
-  const cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .max_page_size_rows(rows_per_page);
-  cudf::io::write_parquet(out_opts);
-
-  // check first page header and make sure it has only page_rows values
-  auto const source = cudf::io::datasource::create(filepath);
-  cudf::io::parquet::detail::FileMetaData fmd;
-
-  read_footer(source, &fmd);
-  ASSERT_GT(fmd.row_groups.size(), 0);
-  ASSERT_EQ(fmd.row_groups[0].columns.size(), 1);
-  auto const& first_chunk = fmd.row_groups[0].columns[0].meta_data;
-  ASSERT_GT(first_chunk.data_page_offset, 0);
-
-  // read first data page header.  sizeof(PageHeader) is not exact, but the thrift encoded
-  // version should be smaller than size of the struct.
-  auto const ph = read_page_header(
-    source, {first_chunk.data_page_offset, sizeof(cudf::io::parquet::detail::PageHeader), 0});
-
-  EXPECT_LE(ph.data_page_header.num_values, rows_per_page);
-}
-
-TEST_F(ParquetWriterTest, CheckPageRowsTooSmall)
-{
-  constexpr auto rows_per_page = 1'000;
-  constexpr auto fragment_size = 5'000;
-  constexpr auto num_rows      = 3 * rows_per_page;
-  const std::string s1(32, 'a');
-  auto col0_elements =
-    cudf::detail::make_counting_transform_iterator(0, [&](auto i) { return s1; });
-  auto col0 = cudf::test::strings_column_wrapper(col0_elements, col0_elements + num_rows);
-
-  auto const expected = table_view{{col0}};
-
-  auto const filepath = temp_env->get_temp_filepath("CheckPageRowsTooSmall.parquet");
-  const cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .max_page_fragment_size(fragment_size)
-      .max_page_size_rows(rows_per_page);
-  cudf::io::write_parquet(out_opts);
-
-  // check that file is written correctly when rows/page < fragment size
-  auto const source = cudf::io::datasource::create(filepath);
-  cudf::io::parquet::detail::FileMetaData fmd;
-
-  read_footer(source, &fmd);
-  ASSERT_TRUE(fmd.row_groups.size() > 0);
-  ASSERT_TRUE(fmd.row_groups[0].columns.size() == 1);
-  auto const& first_chunk = fmd.row_groups[0].columns[0].meta_data;
-  ASSERT_TRUE(first_chunk.data_page_offset > 0);
-
-  // read first data page header.  sizeof(PageHeader) is not exact, but the thrift encoded
-  // version should be smaller than size of the struct.
-  auto const ph = read_page_header(
-    source, {first_chunk.data_page_offset, sizeof(cudf::io::parquet::detail::PageHeader), 0});
-
-  // there should be only one page since the fragment size is larger than rows_per_page
-  EXPECT_EQ(ph.data_page_header.num_values, num_rows);
-}
-
-TEST_F(ParquetWriterTest, Decimal128Stats)
-{
-  // check that decimal128 min and max statistics are written in network byte order
-  // this is negative, so should be the min
-  std::vector<uint8_t> expected_min{
-    0xa1, 0xb2, 0xc3, 0xd4, 0xe5, 0xf6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
-  std::vector<uint8_t> expected_max{
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xa1, 0xb2, 0xc3, 0xd4, 0xe5, 0xf6};
-
-  __int128_t val0 = 0xa1b2'c3d4'e5f6ULL;
-  __int128_t val1 = val0 << 80;
-  column_wrapper<numeric::decimal128> col0{{numeric::decimal128(val0, numeric::scale_type{0}),
-                                            numeric::decimal128(val1, numeric::scale_type{0})}};
-
-  auto expected = table_view{{col0}};
-
-  auto const filepath = temp_env->get_temp_filepath("Decimal128Stats.parquet");
-  const cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
-  cudf::io::write_parquet(out_opts);
-
-  auto const source = cudf::io::datasource::create(filepath);
-  cudf::io::parquet::detail::FileMetaData fmd;
-
-  read_footer(source, &fmd);
-
-  auto const stats = get_statistics(fmd.row_groups[0].columns[0]);
-
-  EXPECT_EQ(expected_min, stats.min_value);
-  EXPECT_EQ(expected_max, stats.max_value);
-}
-
-// =============================================================================
-// ---- test data for stats sort order tests
-// need at least 3 pages, and min page count is 5000, so need at least 15000 values.
-// use 20000 to be safe.
-static constexpr int num_ordered_rows            = 20000;
-static constexpr int page_size_for_ordered_tests = 5000;
-
-namespace {
-namespace testdata {
-// ----- most numerics. scale by 100 so all values fit in a single byte
-
-template <typename T>
-std::enable_if_t<std::is_arithmetic_v<T> && !std::is_same_v<T, bool>,
-                 cudf::test::fixed_width_column_wrapper<T>>
-ascending()
-{
-  int start = std::is_signed_v<T> ? -num_ordered_rows / 2 : 0;
-  auto elements =
-    cudf::detail::make_counting_transform_iterator(start, [](auto i) { return i / 100; });
-  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
-}
-
-template <typename T>
-std::enable_if_t<std::is_arithmetic_v<T> && !std::is_same_v<T, bool>,
-                 cudf::test::fixed_width_column_wrapper<T>>
-descending()
-{
-  if (std::is_signed_v<T>) {
-    auto elements = cudf::detail::make_counting_transform_iterator(-num_ordered_rows / 2,
-                                                                   [](auto i) { return -i / 100; });
-    return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
-  } else {
-    auto elements = cudf::detail::make_counting_transform_iterator(
-      0, [](auto i) { return (num_ordered_rows - i) / 100; });
-    return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
-  }
-}
-
-template <typename T>
-std::enable_if_t<std::is_arithmetic_v<T> && !std::is_same_v<T, bool>,
-                 cudf::test::fixed_width_column_wrapper<T>>
-unordered()
-{
-  if (std::is_signed_v<T>) {
-    auto elements = cudf::detail::make_counting_transform_iterator(
-      -num_ordered_rows / 2, [](auto i) { return (i % 2 ? i : -i) / 100; });
-    return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
-  } else {
-    auto elements = cudf::detail::make_counting_transform_iterator(
-      0, [](auto i) { return (i % 2 ? i : num_ordered_rows - i) / 100; });
-    return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
-  }
-}
-
-// ----- bool
-
-template <typename T>
-std::enable_if_t<std::is_same_v<T, bool>, cudf::test::fixed_width_column_wrapper<bool>> ascending()
-{
-  auto elements = cudf::detail::make_counting_transform_iterator(
-    0, [](auto i) { return i >= num_ordered_rows / 2; });
-  return cudf::test::fixed_width_column_wrapper<bool>(elements, elements + num_ordered_rows);
-}
-
-template <typename T>
-std::enable_if_t<std::is_same_v<T, bool>, cudf::test::fixed_width_column_wrapper<bool>> descending()
-{
-  auto elements = cudf::detail::make_counting_transform_iterator(
-    0, [](auto i) { return i < num_ordered_rows / 2; });
-  return cudf::test::fixed_width_column_wrapper<bool>(elements, elements + num_ordered_rows);
-}
-
-template <typename T>
-std::enable_if_t<std::is_same_v<T, bool>, cudf::test::fixed_width_column_wrapper<bool>> unordered()
-{
-  auto elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-    switch (i / page_size_for_ordered_tests) {
-      case 0: return true;
-      case 1: return false;
-      case 2: return true;
-      default: return false;
-    }
-  });
-  return cudf::test::fixed_width_column_wrapper<bool>(elements, elements + num_ordered_rows);
-}
-
-// ----- fixed point types
-
-template <typename T>
-std::enable_if_t<cudf::is_fixed_point<T>(), cudf::test::fixed_width_column_wrapper<T>> ascending()
-{
-  auto elements = cudf::detail::make_counting_transform_iterator(
-    -num_ordered_rows / 2, [](auto i) { return T(i, numeric::scale_type{0}); });
-  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
-}
-
-template <typename T>
-std::enable_if_t<cudf::is_fixed_point<T>(), cudf::test::fixed_width_column_wrapper<T>> descending()
-{
-  auto elements = cudf::detail::make_counting_transform_iterator(
-    -num_ordered_rows / 2, [](auto i) { return T(-i, numeric::scale_type{0}); });
-  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
-}
-
-template <typename T>
-std::enable_if_t<cudf::is_fixed_point<T>(), cudf::test::fixed_width_column_wrapper<T>> unordered()
-{
-  auto elements = cudf::detail::make_counting_transform_iterator(
-    -num_ordered_rows / 2, [](auto i) { return T(i % 2 ? i : -i, numeric::scale_type{0}); });
-  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
-}
-
-// ----- chrono types
-// ----- timstamp
-
-template <typename T>
-std::enable_if_t<cudf::is_timestamp<T>(), cudf::test::fixed_width_column_wrapper<T>> ascending()
-{
-  auto elements = cudf::detail::make_counting_transform_iterator(
-    0, [](auto i) { return T(typename T::duration(i)); });
-  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
-}
-
-template <typename T>
-std::enable_if_t<cudf::is_timestamp<T>(), cudf::test::fixed_width_column_wrapper<T>> descending()
-{
-  auto elements = cudf::detail::make_counting_transform_iterator(
-    0, [](auto i) { return T(typename T::duration(num_ordered_rows - i)); });
-  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
-}
-
-template <typename T>
-std::enable_if_t<cudf::is_timestamp<T>(), cudf::test::fixed_width_column_wrapper<T>> unordered()
-{
-  auto elements = cudf::detail::make_counting_transform_iterator(
-    0, [](auto i) { return T(typename T::duration(i % 2 ? i : num_ordered_rows - i)); });
-  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
-}
-
-// ----- duration
-
-template <typename T>
-std::enable_if_t<cudf::is_duration<T>(), cudf::test::fixed_width_column_wrapper<T>> ascending()
-{
-  auto elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return T(i); });
-  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
-}
-
-template <typename T>
-std::enable_if_t<cudf::is_duration<T>(), cudf::test::fixed_width_column_wrapper<T>> descending()
-{
-  auto elements = cudf::detail::make_counting_transform_iterator(
-    0, [](auto i) { return T(num_ordered_rows - i); });
-  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
-}
-
-template <typename T>
-std::enable_if_t<cudf::is_duration<T>(), cudf::test::fixed_width_column_wrapper<T>> unordered()
-{
-  auto elements = cudf::detail::make_counting_transform_iterator(
-    0, [](auto i) { return T(i % 2 ? i : num_ordered_rows - i); });
-  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
-}
-
-// ----- string_view
-
-template <typename T>
-std::enable_if_t<std::is_same_v<T, cudf::string_view>, cudf::test::strings_column_wrapper>
-ascending()
-{
-  char buf[10];
-  auto elements = cudf::detail::make_counting_transform_iterator(0, [&buf](auto i) {
-    sprintf(buf, "%09d", i);
-    return std::string(buf);
-  });
-  return cudf::test::strings_column_wrapper(elements, elements + num_ordered_rows);
-}
-
-template <typename T>
-std::enable_if_t<std::is_same_v<T, cudf::string_view>, cudf::test::strings_column_wrapper>
-descending()
-{
-  char buf[10];
-  auto elements = cudf::detail::make_counting_transform_iterator(0, [&buf](auto i) {
-    sprintf(buf, "%09d", num_ordered_rows - i);
-    return std::string(buf);
-  });
-  return cudf::test::strings_column_wrapper(elements, elements + num_ordered_rows);
-}
-
-template <typename T>
-std::enable_if_t<std::is_same_v<T, cudf::string_view>, cudf::test::strings_column_wrapper>
-unordered()
-{
-  char buf[10];
-  auto elements = cudf::detail::make_counting_transform_iterator(0, [&buf](auto i) {
-    sprintf(buf, "%09d", (i % 2 == 0) ? i : (num_ordered_rows - i));
-    return std::string(buf);
-  });
-  return cudf::test::strings_column_wrapper(elements, elements + num_ordered_rows);
-}
-
-}  // namespace testdata
-}  // anonymous namespace
-
-TYPED_TEST(ParquetWriterComparableTypeTest, ThreeColumnSorted)
-{
-  using T = TypeParam;
-
-  auto col0 = testdata::ascending<T>();
-  auto col1 = testdata::descending<T>();
-  auto col2 = testdata::unordered<T>();
-
-  auto const expected = table_view{{col0, col1, col2}};
-
-  auto const filepath = temp_env->get_temp_filepath("ThreeColumnSorted.parquet");
-  const cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .max_page_size_rows(page_size_for_ordered_tests)
-      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN);
-  cudf::io::write_parquet(out_opts);
-
-  auto const source = cudf::io::datasource::create(filepath);
-  cudf::io::parquet::detail::FileMetaData fmd;
-
-  read_footer(source, &fmd);
-  ASSERT_GT(fmd.row_groups.size(), 0);
-
-  auto const& columns = fmd.row_groups[0].columns;
-  ASSERT_EQ(columns.size(), static_cast<size_t>(expected.num_columns()));
-
-  // now check that the boundary order for chunk 1 is ascending,
-  // chunk 2 is descending, and chunk 3 is unordered
-  cudf::io::parquet::detail::BoundaryOrder expected_orders[] = {
-    cudf::io::parquet::detail::BoundaryOrder::ASCENDING,
-    cudf::io::parquet::detail::BoundaryOrder::DESCENDING,
-    cudf::io::parquet::detail::BoundaryOrder::UNORDERED};
-
-  for (std::size_t i = 0; i < columns.size(); i++) {
-    auto const ci = read_column_index(source, columns[i]);
-    EXPECT_EQ(ci.boundary_order, expected_orders[i]);
-  }
-}
-
-// utility functions for column index tests
-
-// compare two values.  return -1 if v1 < v2,
-// 0 if v1 == v2, and 1 if v1 > v2.
-template <typename T>
-int32_t compare(T& v1, T& v2)
-{
-  return (v1 > v2) - (v1 < v2);
-}
-
-// compare two binary statistics blobs based on their physical
-// and converted types. returns -1 if v1 < v2, 0 if v1 == v2, and
-// 1 if v1 > v2.
-int32_t compare_binary(std::vector<uint8_t> const& v1,
-                       std::vector<uint8_t> const& v2,
-                       cudf::io::parquet::detail::Type ptype,
-                       thrust::optional<cudf::io::parquet::detail::ConvertedType> const& ctype)
-{
-  auto ctype_val = ctype.value_or(cudf::io::parquet::detail::UNKNOWN);
-  switch (ptype) {
-    case cudf::io::parquet::detail::INT32:
-      switch (ctype_val) {
-        case cudf::io::parquet::detail::UINT_8:
-        case cudf::io::parquet::detail::UINT_16:
-        case cudf::io::parquet::detail::UINT_32:
-          return compare(*(reinterpret_cast<uint32_t const*>(v1.data())),
-                         *(reinterpret_cast<uint32_t const*>(v2.data())));
-        default:
-          return compare(*(reinterpret_cast<int32_t const*>(v1.data())),
-                         *(reinterpret_cast<int32_t const*>(v2.data())));
-      }
-
-    case cudf::io::parquet::detail::INT64:
-      if (ctype_val == cudf::io::parquet::detail::UINT_64) {
-        return compare(*(reinterpret_cast<uint64_t const*>(v1.data())),
-                       *(reinterpret_cast<uint64_t const*>(v2.data())));
-      }
-      return compare(*(reinterpret_cast<int64_t const*>(v1.data())),
-                     *(reinterpret_cast<int64_t const*>(v2.data())));
-
-    case cudf::io::parquet::detail::FLOAT:
-      return compare(*(reinterpret_cast<float const*>(v1.data())),
-                     *(reinterpret_cast<float const*>(v2.data())));
-
-    case cudf::io::parquet::detail::DOUBLE:
-      return compare(*(reinterpret_cast<double const*>(v1.data())),
-                     *(reinterpret_cast<double const*>(v2.data())));
-
-    case cudf::io::parquet::detail::BYTE_ARRAY: {
-      int32_t v1sz = v1.size();
-      int32_t v2sz = v2.size();
-      int32_t ret  = memcmp(v1.data(), v2.data(), std::min(v1sz, v2sz));
-      if (ret != 0 or v1sz == v2sz) { return ret; }
-      return v1sz - v2sz;
-    }
-
-    default: CUDF_FAIL("Invalid type in compare_binary");
-  }
-
-  return 0;
-}
-
-TEST_P(ParquetV2Test, LargeColumnIndex)
-{
-  // create a file large enough to be written in 2 batches (currently 1GB per batch)
-  // pick fragment size that num_rows is divisible by, so we'll get equal sized row groups
-  const std::string s1(1000, 'a');
-  const std::string s2(1000, 'b');
-  constexpr auto num_rows  = 512 * 1024;
-  constexpr auto frag_size = num_rows / 128;
-  auto const is_v2         = GetParam();
-
-  auto col0_elements = cudf::detail::make_counting_transform_iterator(
-    0, [&](auto i) { return (i < num_rows) ? s1 : s2; });
-  auto col0 = cudf::test::strings_column_wrapper(col0_elements, col0_elements + 2 * num_rows);
-
-  auto const expected = table_view{{col0, col0}};
-
-  auto const filepath = temp_env->get_temp_filepath("LargeColumnIndex.parquet");
-  const cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
-      .compression(cudf::io::compression_type::NONE)
-      .dictionary_policy(cudf::io::dictionary_policy::NEVER)
-      .write_v2_headers(is_v2)
-      .max_page_fragment_size(frag_size)
-      .row_group_size_bytes(1024 * 1024 * 1024)
-      .row_group_size_rows(num_rows);
-  cudf::io::write_parquet(out_opts);
-
-  auto const source = cudf::io::datasource::create(filepath);
-  cudf::io::parquet::detail::FileMetaData fmd;
-
-  read_footer(source, &fmd);
-
-  for (auto const& rg : fmd.row_groups) {
-    for (size_t c = 0; c < rg.columns.size(); c++) {
-      auto const& chunk = rg.columns[c];
-
-      auto const ci    = read_column_index(source, chunk);
-      auto const stats = get_statistics(chunk);
-
-      // check trunc(page.min) <= stats.min && trun(page.max) >= stats.max
-      auto const ptype = fmd.schema[c + 1].type;
-      auto const ctype = fmd.schema[c + 1].converted_type;
-      ASSERT_TRUE(stats.min_value.has_value());
-      ASSERT_TRUE(stats.max_value.has_value());
-      EXPECT_TRUE(compare_binary(ci.min_values[0], stats.min_value.value(), ptype, ctype) <= 0);
-      EXPECT_TRUE(compare_binary(ci.max_values[0], stats.max_value.value(), ptype, ctype) >= 0);
-    }
-  }
-}
-
-TEST_P(ParquetV2Test, CheckColumnOffsetIndex)
-{
-  constexpr auto num_rows      = 100000;
-  auto const is_v2             = GetParam();
-  auto const expected_hdr_type = is_v2 ? cudf::io::parquet::detail::PageType::DATA_PAGE_V2
-                                       : cudf::io::parquet::detail::PageType::DATA_PAGE;
-
-  // fixed length strings
-  auto str1_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-    char buf[30];
-    sprintf(buf, "%012d", i);
-    return std::string(buf);
-  });
-  auto col0          = cudf::test::strings_column_wrapper(str1_elements, str1_elements + num_rows);
-
-  auto col1_data = random_values<int8_t>(num_rows);
-  auto col2_data = random_values<int16_t>(num_rows);
-  auto col3_data = random_values<int32_t>(num_rows);
-  auto col4_data = random_values<uint64_t>(num_rows);
-  auto col5_data = random_values<float>(num_rows);
-  auto col6_data = random_values<double>(num_rows);
-
-  auto col1 = cudf::test::fixed_width_column_wrapper<int8_t>(col1_data.begin(), col1_data.end());
-  auto col2 = cudf::test::fixed_width_column_wrapper<int16_t>(col2_data.begin(), col2_data.end());
-  auto col3 = cudf::test::fixed_width_column_wrapper<int32_t>(col3_data.begin(), col3_data.end());
-  auto col4 = cudf::test::fixed_width_column_wrapper<uint64_t>(col4_data.begin(), col4_data.end());
-  auto col5 = cudf::test::fixed_width_column_wrapper<float>(col5_data.begin(), col5_data.end());
-  auto col6 = cudf::test::fixed_width_column_wrapper<double>(col6_data.begin(), col6_data.end());
-
-  // mixed length strings
-  auto str2_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-    char buf[30];
-    sprintf(buf, "%d", i);
-    return std::string(buf);
-  });
-  auto col7          = cudf::test::strings_column_wrapper(str2_elements, str2_elements + num_rows);
-
-  auto const expected = table_view{{col0, col1, col2, col3, col4, col5, col6, col7}};
-
-  auto const filepath = temp_env->get_temp_filepath("CheckColumnOffsetIndex.parquet");
-  const cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
-      .write_v2_headers(is_v2)
-      .max_page_size_rows(20000);
-  cudf::io::write_parquet(out_opts);
-
-  auto const source = cudf::io::datasource::create(filepath);
-  cudf::io::parquet::detail::FileMetaData fmd;
-
-  read_footer(source, &fmd);
-
-  for (size_t r = 0; r < fmd.row_groups.size(); r++) {
-    auto const& rg = fmd.row_groups[r];
-    for (size_t c = 0; c < rg.columns.size(); c++) {
-      auto const& chunk = rg.columns[c];
-
-      // loop over offsets, read each page header, make sure it's a data page and that
-      // the first row index is correct
-      auto const oi = read_offset_index(source, chunk);
-
-      int64_t num_vals = 0;
-      for (size_t o = 0; o < oi.page_locations.size(); o++) {
-        auto const& page_loc = oi.page_locations[o];
-        auto const ph        = read_page_header(source, page_loc);
-        EXPECT_EQ(ph.type, expected_hdr_type);
-        EXPECT_EQ(page_loc.first_row_index, num_vals);
-        num_vals += is_v2 ? ph.data_page_header_v2.num_rows : ph.data_page_header.num_values;
-      }
-
-      // loop over page stats from the column index. check that stats.min <= page.min
-      // and stats.max >= page.max for each page.
-      auto const ci    = read_column_index(source, chunk);
-      auto const stats = get_statistics(chunk);
-
-      ASSERT_TRUE(stats.min_value.has_value());
-      ASSERT_TRUE(stats.max_value.has_value());
-      ASSERT_TRUE(ci.null_counts.has_value());
-
-      // schema indexing starts at 1
-      auto const ptype = fmd.schema[c + 1].type;
-      auto const ctype = fmd.schema[c + 1].converted_type;
-      for (size_t p = 0; p < ci.min_values.size(); p++) {
-        // null_pages should always be false
-        EXPECT_FALSE(ci.null_pages[p]);
-        // null_counts should always be 0
-        EXPECT_EQ(ci.null_counts.value()[p], 0);
-        EXPECT_TRUE(compare_binary(stats.min_value.value(), ci.min_values[p], ptype, ctype) <= 0);
-      }
-      for (size_t p = 0; p < ci.max_values.size(); p++)
-        EXPECT_TRUE(compare_binary(stats.max_value.value(), ci.max_values[p], ptype, ctype) >= 0);
-    }
-  }
-}
-
-TEST_P(ParquetV2Test, CheckColumnOffsetIndexNulls)
-{
-  constexpr auto num_rows      = 100000;
-  auto const is_v2             = GetParam();
-  auto const expected_hdr_type = is_v2 ? cudf::io::parquet::detail::PageType::DATA_PAGE_V2
-                                       : cudf::io::parquet::detail::PageType::DATA_PAGE;
-
-  // fixed length strings
-  auto str1_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-    char buf[30];
-    sprintf(buf, "%012d", i);
-    return std::string(buf);
-  });
-  auto col0          = cudf::test::strings_column_wrapper(str1_elements, str1_elements + num_rows);
-
-  auto col1_data = random_values<int8_t>(num_rows);
-  auto col2_data = random_values<int16_t>(num_rows);
-  auto col3_data = random_values<int32_t>(num_rows);
-  auto col4_data = random_values<uint64_t>(num_rows);
-  auto col5_data = random_values<float>(num_rows);
-  auto col6_data = random_values<double>(num_rows);
-
-  auto valids =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; });
-
-  // add null values for all but first column
-  auto col1 =
-    cudf::test::fixed_width_column_wrapper<int8_t>(col1_data.begin(), col1_data.end(), valids);
-  auto col2 =
-    cudf::test::fixed_width_column_wrapper<int16_t>(col2_data.begin(), col2_data.end(), valids);
-  auto col3 =
-    cudf::test::fixed_width_column_wrapper<int32_t>(col3_data.begin(), col3_data.end(), valids);
-  auto col4 =
-    cudf::test::fixed_width_column_wrapper<uint64_t>(col4_data.begin(), col4_data.end(), valids);
-  auto col5 =
-    cudf::test::fixed_width_column_wrapper<float>(col5_data.begin(), col5_data.end(), valids);
-  auto col6 =
-    cudf::test::fixed_width_column_wrapper<double>(col6_data.begin(), col6_data.end(), valids);
-
-  // mixed length strings
-  auto str2_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-    char buf[30];
-    sprintf(buf, "%d", i);
-    return std::string(buf);
-  });
-  auto col7 = cudf::test::strings_column_wrapper(str2_elements, str2_elements + num_rows, valids);
-
-  auto expected = table_view{{col0, col1, col2, col3, col4, col5, col6, col7}};
-
-  auto const filepath = temp_env->get_temp_filepath("CheckColumnOffsetIndexNulls.parquet");
-  const cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
-      .write_v2_headers(is_v2)
-      .max_page_size_rows(20000);
-  cudf::io::write_parquet(out_opts);
-
-  auto const source = cudf::io::datasource::create(filepath);
-  cudf::io::parquet::detail::FileMetaData fmd;
-
-  read_footer(source, &fmd);
-
-  for (size_t r = 0; r < fmd.row_groups.size(); r++) {
-    auto const& rg = fmd.row_groups[r];
-    for (size_t c = 0; c < rg.columns.size(); c++) {
-      auto const& chunk = rg.columns[c];
-
-      // loop over offsets, read each page header, make sure it's a data page and that
-      // the first row index is correct
-      auto const oi = read_offset_index(source, chunk);
-
-      int64_t num_vals = 0;
-      for (size_t o = 0; o < oi.page_locations.size(); o++) {
-        auto const& page_loc = oi.page_locations[o];
-        auto const ph        = read_page_header(source, page_loc);
-        EXPECT_EQ(ph.type, expected_hdr_type);
-        EXPECT_EQ(page_loc.first_row_index, num_vals);
-        num_vals += is_v2 ? ph.data_page_header_v2.num_rows : ph.data_page_header.num_values;
-      }
-
-      // loop over page stats from the column index. check that stats.min <= page.min
-      // and stats.max >= page.max for each page.
-      auto const ci    = read_column_index(source, chunk);
-      auto const stats = get_statistics(chunk);
-
-      // should be half nulls, except no nulls in column 0
-      ASSERT_TRUE(stats.min_value.has_value());
-      ASSERT_TRUE(stats.max_value.has_value());
-      ASSERT_TRUE(stats.null_count.has_value());
-      EXPECT_EQ(stats.null_count.value(), c == 0 ? 0 : num_rows / 2);
-      ASSERT_TRUE(ci.null_counts.has_value());
-
-      // schema indexing starts at 1
-      auto const ptype = fmd.schema[c + 1].type;
-      auto const ctype = fmd.schema[c + 1].converted_type;
-      for (size_t p = 0; p < ci.min_values.size(); p++) {
-        EXPECT_FALSE(ci.null_pages[p]);
-        if (c > 0) {  // first column has no nulls
-          EXPECT_GT(ci.null_counts.value()[p], 0);
-        } else {
-          EXPECT_EQ(ci.null_counts.value()[p], 0);
-        }
-        EXPECT_TRUE(compare_binary(stats.min_value.value(), ci.min_values[p], ptype, ctype) <= 0);
-      }
-      for (size_t p = 0; p < ci.max_values.size(); p++) {
-        EXPECT_TRUE(compare_binary(stats.max_value.value(), ci.max_values[p], ptype, ctype) >= 0);
-      }
-    }
-  }
-}
-
-TEST_P(ParquetV2Test, CheckColumnOffsetIndexNullColumn)
-{
-  constexpr auto num_rows      = 100000;
-  auto const is_v2             = GetParam();
-  auto const expected_hdr_type = is_v2 ? cudf::io::parquet::detail::PageType::DATA_PAGE_V2
-                                       : cudf::io::parquet::detail::PageType::DATA_PAGE;
-
-  // fixed length strings
-  auto str1_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-    char buf[30];
-    sprintf(buf, "%012d", i);
-    return std::string(buf);
-  });
-  auto col0          = cudf::test::strings_column_wrapper(str1_elements, str1_elements + num_rows);
-
-  auto col1_data = random_values<int32_t>(num_rows);
-  auto col2_data = random_values<int32_t>(num_rows);
-
-  // col1 is all nulls
-  auto valids = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return false; });
-  auto col1 =
-    cudf::test::fixed_width_column_wrapper<int32_t>(col1_data.begin(), col1_data.end(), valids);
-  auto col2 = cudf::test::fixed_width_column_wrapper<int32_t>(col2_data.begin(), col2_data.end());
-
-  // mixed length strings
-  auto str2_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-    char buf[30];
-    sprintf(buf, "%d", i);
-    return std::string(buf);
-  });
-  auto col3          = cudf::test::strings_column_wrapper(str2_elements, str2_elements + num_rows);
-
-  auto expected = table_view{{col0, col1, col2, col3}};
-
-  auto const filepath = temp_env->get_temp_filepath("CheckColumnOffsetIndexNullColumn.parquet");
-  const cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
-      .write_v2_headers(is_v2)
-      .max_page_size_rows(20000);
-  cudf::io::write_parquet(out_opts);
-
-  auto const source = cudf::io::datasource::create(filepath);
-  cudf::io::parquet::detail::FileMetaData fmd;
-
-  read_footer(source, &fmd);
-
-  for (size_t r = 0; r < fmd.row_groups.size(); r++) {
-    auto const& rg = fmd.row_groups[r];
-    for (size_t c = 0; c < rg.columns.size(); c++) {
-      auto const& chunk = rg.columns[c];
-
-      // loop over offsets, read each page header, make sure it's a data page and that
-      // the first row index is correct
-      auto const oi = read_offset_index(source, chunk);
-
-      int64_t num_vals = 0;
-      for (size_t o = 0; o < oi.page_locations.size(); o++) {
-        auto const& page_loc = oi.page_locations[o];
-        auto const ph        = read_page_header(source, page_loc);
-        EXPECT_EQ(ph.type, expected_hdr_type);
-        EXPECT_EQ(page_loc.first_row_index, num_vals);
-        num_vals += is_v2 ? ph.data_page_header_v2.num_rows : ph.data_page_header.num_values;
-      }
-
-      // loop over page stats from the column index. check that stats.min <= page.min
-      // and stats.max >= page.max for each non-empty page.
-      auto const ci    = read_column_index(source, chunk);
-      auto const stats = get_statistics(chunk);
-
-      // there should be no nulls except column 1 which is all nulls
-      if (c != 1) {
-        ASSERT_TRUE(stats.min_value.has_value());
-        ASSERT_TRUE(stats.max_value.has_value());
-      }
-      ASSERT_TRUE(stats.null_count.has_value());
-      EXPECT_EQ(stats.null_count.value(), c == 1 ? num_rows : 0);
-      ASSERT_TRUE(ci.null_counts.has_value());
-
-      // schema indexing starts at 1
-      auto const ptype = fmd.schema[c + 1].type;
-      auto const ctype = fmd.schema[c + 1].converted_type;
-      for (size_t p = 0; p < ci.min_values.size(); p++) {
-        // check tnat null_pages is true for column 1
-        if (c == 1) {
-          EXPECT_TRUE(ci.null_pages[p]);
-          EXPECT_GT(ci.null_counts.value()[p], 0);
-        }
-        if (not ci.null_pages[p]) {
-          EXPECT_EQ(ci.null_counts.value()[p], 0);
-          EXPECT_TRUE(compare_binary(stats.min_value.value(), ci.min_values[p], ptype, ctype) <= 0);
-        }
-      }
-      for (size_t p = 0; p < ci.max_values.size(); p++) {
-        if (not ci.null_pages[p]) {
-          EXPECT_TRUE(compare_binary(stats.max_value.value(), ci.max_values[p], ptype, ctype) >= 0);
-        }
-      }
-    }
-  }
-}
-
-TEST_P(ParquetV2Test, CheckColumnOffsetIndexStruct)
-{
-  auto const is_v2             = GetParam();
-  auto const expected_hdr_type = is_v2 ? cudf::io::parquet::detail::PageType::DATA_PAGE_V2
-                                       : cudf::io::parquet::detail::PageType::DATA_PAGE;
-
-  auto c0 = testdata::ascending<uint32_t>();
-
-  auto sc0 = testdata::ascending<cudf::string_view>();
-  auto sc1 = testdata::descending<int32_t>();
-  auto sc2 = testdata::unordered<int64_t>();
-
-  std::vector<std::unique_ptr<cudf::column>> struct_children;
-  struct_children.push_back(sc0.release());
-  struct_children.push_back(sc1.release());
-  struct_children.push_back(sc2.release());
-  cudf::test::structs_column_wrapper c1(std::move(struct_children));
-
-  auto listgen = cudf::detail::make_counting_transform_iterator(
-    0, [](auto i) { return i % 2 == 0 ? i / 2 : num_ordered_rows - (i / 2); });
-  auto list =
-    cudf::test::fixed_width_column_wrapper<int32_t>(listgen, listgen + 2 * num_ordered_rows);
-  auto offgen = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i * 2; });
-  auto offsets =
-    cudf::test::fixed_width_column_wrapper<int32_t>(offgen, offgen + num_ordered_rows + 1);
-
-  auto c2 = cudf::make_lists_column(num_ordered_rows, offsets.release(), list.release(), 0, {});
-
-  table_view expected({c0, c1, *c2});
-
-  auto const filepath = temp_env->get_temp_filepath("CheckColumnOffsetIndexStruct.parquet");
-  const cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
-      .write_v2_headers(is_v2)
-      .max_page_size_rows(page_size_for_ordered_tests);
-  cudf::io::write_parquet(out_opts);
-
-  auto const source = cudf::io::datasource::create(filepath);
-  cudf::io::parquet::detail::FileMetaData fmd;
-
-  read_footer(source, &fmd);
-
-  // hard coded schema indices.
-  // TODO find a way to do this without magic
-  size_t const colidxs[] = {1, 3, 4, 5, 8};
-  for (size_t r = 0; r < fmd.row_groups.size(); r++) {
-    auto const& rg = fmd.row_groups[r];
-    for (size_t c = 0; c < rg.columns.size(); c++) {
-      size_t colidx     = colidxs[c];
-      auto const& chunk = rg.columns[c];
-
-      // loop over offsets, read each page header, make sure it's a data page and that
-      // the first row index is correct
-      auto const oi = read_offset_index(source, chunk);
-
-      int64_t num_vals = 0;
-      for (size_t o = 0; o < oi.page_locations.size(); o++) {
-        auto const& page_loc = oi.page_locations[o];
-        auto const ph        = read_page_header(source, page_loc);
-        EXPECT_EQ(ph.type, expected_hdr_type);
-        EXPECT_EQ(page_loc.first_row_index, num_vals);
-        // last column has 2 values per row
-        num_vals += is_v2 ? ph.data_page_header_v2.num_rows
-                          : ph.data_page_header.num_values / (c == rg.columns.size() - 1 ? 2 : 1);
-      }
-
-      // loop over page stats from the column index. check that stats.min <= page.min
-      // and stats.max >= page.max for each page.
-      auto const ci    = read_column_index(source, chunk);
-      auto const stats = get_statistics(chunk);
-
-      ASSERT_TRUE(stats.min_value.has_value());
-      ASSERT_TRUE(stats.max_value.has_value());
-
-      auto const ptype = fmd.schema[colidx].type;
-      auto const ctype = fmd.schema[colidx].converted_type;
-      for (size_t p = 0; p < ci.min_values.size(); p++) {
-        EXPECT_TRUE(compare_binary(stats.min_value.value(), ci.min_values[p], ptype, ctype) <= 0);
-      }
-      for (size_t p = 0; p < ci.max_values.size(); p++) {
-        EXPECT_TRUE(compare_binary(stats.max_value.value(), ci.max_values[p], ptype, ctype) >= 0);
-      }
-    }
-  }
-}
-
-TEST_P(ParquetV2Test, CheckColumnOffsetIndexStructNulls)
-{
-  auto const is_v2             = GetParam();
-  auto const expected_hdr_type = is_v2 ? cudf::io::parquet::detail::PageType::DATA_PAGE_V2
-                                       : cudf::io::parquet::detail::PageType::DATA_PAGE;
-
-  auto validity2 =
-    cudf::detail::make_counting_transform_iterator(0, [](cudf::size_type i) { return i % 2; });
-  auto validity3 = cudf::detail::make_counting_transform_iterator(
-    0, [](cudf::size_type i) { return (i % 3) != 0; });
-  auto validity4 = cudf::detail::make_counting_transform_iterator(
-    0, [](cudf::size_type i) { return (i % 4) != 0; });
-  auto validity5 = cudf::detail::make_counting_transform_iterator(
-    0, [](cudf::size_type i) { return (i % 5) != 0; });
-
-  auto c0 = testdata::ascending<uint32_t>();
-
-  auto col1_data = random_values<int32_t>(num_ordered_rows);
-  auto col2_data = random_values<int32_t>(num_ordered_rows);
-  auto col3_data = random_values<int32_t>(num_ordered_rows);
-
-  // col1 is all nulls
-  auto col1 =
-    cudf::test::fixed_width_column_wrapper<int32_t>(col1_data.begin(), col1_data.end(), validity2);
-  auto col2 =
-    cudf::test::fixed_width_column_wrapper<int32_t>(col2_data.begin(), col2_data.end(), validity3);
-  auto col3 =
-    cudf::test::fixed_width_column_wrapper<int32_t>(col2_data.begin(), col2_data.end(), validity4);
-
-  std::vector<std::unique_ptr<cudf::column>> struct_children;
-  struct_children.push_back(col1.release());
-  struct_children.push_back(col2.release());
-  struct_children.push_back(col3.release());
-  auto struct_validity = std::vector<bool>(validity5, validity5 + num_ordered_rows);
-  cudf::test::structs_column_wrapper c1(std::move(struct_children), struct_validity);
-  table_view expected({c0, c1});
-
-  auto const filepath = temp_env->get_temp_filepath("CheckColumnOffsetIndexStructNulls.parquet");
-  const cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
-      .write_v2_headers(is_v2)
-      .max_page_size_rows(page_size_for_ordered_tests);
-  cudf::io::write_parquet(out_opts);
-
-  auto const source = cudf::io::datasource::create(filepath);
-  cudf::io::parquet::detail::FileMetaData fmd;
-
-  read_footer(source, &fmd);
-
-  // all struct columns will have num_ordered_rows / 5 nulls at level 0.
-  // col1 will have num_ordered_rows / 2 nulls total
-  // col2 will have num_ordered_rows / 3 nulls total
-  // col3 will have num_ordered_rows / 4 nulls total
-  int const null_mods[] = {0, 2, 3, 4};
-
-  for (size_t r = 0; r < fmd.row_groups.size(); r++) {
-    auto const& rg = fmd.row_groups[r];
-    for (size_t c = 0; c < rg.columns.size(); c++) {
-      auto const& chunk = rg.columns[c];
-
-      // loop over offsets, read each page header, make sure it's a data page and that
-      // the first row index is correct
-      auto const oi = read_offset_index(source, chunk);
-      auto const ci = read_column_index(source, chunk);
-
-      // check definition level histogram (repetition will not be present)
-      if (c != 0) {
-        ASSERT_TRUE(chunk.meta_data.size_statistics.has_value());
-        ASSERT_TRUE(chunk.meta_data.size_statistics->definition_level_histogram.has_value());
-        // there are no lists so there should be no repetition level histogram
-        EXPECT_FALSE(chunk.meta_data.size_statistics->repetition_level_histogram.has_value());
-        auto const& def_hist = chunk.meta_data.size_statistics->definition_level_histogram.value();
-        ASSERT_TRUE(def_hist.size() == 3L);
-        auto const l0_nulls    = num_ordered_rows / 5;
-        auto const l1_l0_nulls = num_ordered_rows / (5 * null_mods[c]);
-        auto const l1_nulls    = num_ordered_rows / null_mods[c] - l1_l0_nulls;
-        auto const l2_vals     = num_ordered_rows - l1_nulls - l0_nulls;
-        EXPECT_EQ(def_hist[0], l0_nulls);
-        EXPECT_EQ(def_hist[1], l1_nulls);
-        EXPECT_EQ(def_hist[2], l2_vals);
-      } else {
-        // column 0 has no lists and no nulls and no strings, so there should be no size stats
-        EXPECT_FALSE(chunk.meta_data.size_statistics.has_value());
-      }
-
-      int64_t num_vals = 0;
-
-      if (is_v2) { ASSERT_TRUE(ci.null_counts.has_value()); }
-      for (size_t o = 0; o < oi.page_locations.size(); o++) {
-        auto const& page_loc = oi.page_locations[o];
-        auto const ph        = read_page_header(source, page_loc);
-        EXPECT_EQ(ph.type, expected_hdr_type);
-        EXPECT_EQ(page_loc.first_row_index, num_vals);
-        num_vals += is_v2 ? ph.data_page_header_v2.num_rows : ph.data_page_header.num_values;
-        // check that null counts match
-        if (is_v2) { EXPECT_EQ(ci.null_counts.value()[o], ph.data_page_header_v2.num_nulls); }
-      }
-    }
-  }
-}
-
-TEST_P(ParquetV2Test, CheckColumnIndexListWithNulls)
-{
-  auto const is_v2             = GetParam();
-  auto const expected_hdr_type = is_v2 ? cudf::io::parquet::detail::PageType::DATA_PAGE_V2
-                                       : cudf::io::parquet::detail::PageType::DATA_PAGE;
-
-  using cudf::test::iterators::null_at;
-  using cudf::test::iterators::nulls_at;
-  using lcw = cudf::test::lists_column_wrapper<int32_t>;
-
-  // 4 nulls
-  // [NULL, 2, NULL]
-  // []
-  // [4, 5]
-  // NULL
-  // def histogram [1, 1, 2, 3]
-  // rep histogram [4, 3]
-  lcw col0{{{{1, 2, 3}, nulls_at({0, 2})}, {}, {4, 5}, {}}, null_at(3)};
-
-  // 4 nulls
-  // [[1, 2, 3], [], [4, 5], [], [0, 6, 0]]
-  // [[7, 8]]
-  // []
-  // [[]]
-  // def histogram [1, 3, 10]
-  // rep histogram [4, 4, 6]
-  lcw col1{{{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, {{7, 8}}, lcw{}, lcw{lcw{}}};
-
-  // 4 nulls
-  // [[1, 2, 3], [], [4, 5], NULL, [0, 6, 0]]
-  // [[7, 8]]
-  // []
-  // [[]]
-  // def histogram [1, 1, 2, 10]
-  // rep histogram [4, 4, 6]
-  lcw col2{{{{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, null_at(3)}, {{7, 8}}, lcw{}, lcw{lcw{}}};
-
-  // 6 nulls
-  // [[1, 2, 3], [], [4, 5], NULL, [NULL, 6, NULL]]
-  // [[7, 8]]
-  // []
-  // [[]]
-  // def histogram [1, 1, 2, 2, 8]
-  // rep histogram [4, 4, 6]
-  using dlcw = cudf::test::lists_column_wrapper<double>;
-  dlcw col3{{{{1., 2., 3.}, {}, {4., 5.}, {}, {{0., 6., 0.}, nulls_at({0, 2})}}, null_at(3)},
-            {{7., 8.}},
-            dlcw{},
-            dlcw{dlcw{}}};
-
-  // 4 nulls
-  // [[1, 2, 3], [], [4, 5], NULL, [0, 6, 0]]
-  // [[7, 8]]
-  // []
-  // NULL
-  // def histogram [1, 1, 1, 1, 10]
-  // rep histogram [4, 4, 6]
-  using ui16lcw = cudf::test::lists_column_wrapper<uint16_t>;
-  cudf::test::lists_column_wrapper<uint16_t> col4{
-    {{{{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, null_at(3)}, {{7, 8}}, ui16lcw{}, ui16lcw{ui16lcw{}}},
-    null_at(3)};
-
-  // 6 nulls
-  // [[1, 2, 3], [], [4, 5], NULL, [NULL, 6, NULL]]
-  // [[7, 8]]
-  // []
-  // NULL
-  // def histogram [1, 1, 1, 1, 2, 8]
-  // rep histogram [4, 4, 6]
-  lcw col5{{{{{1, 2, 3}, {}, {4, 5}, {}, {{0, 6, 0}, nulls_at({0, 2})}}, null_at(3)},
-            {{7, 8}},
-            lcw{},
-            lcw{lcw{}}},
-           null_at(3)};
-
-  // 4 nulls
-  // def histogram [1, 3, 9]
-  // rep histogram [4, 4, 5]
-  using strlcw = cudf::test::lists_column_wrapper<cudf::string_view>;
-  cudf::test::lists_column_wrapper<cudf::string_view> col6{
-    {{"Monday", "Monday", "Friday"}, {}, {"Monday", "Friday"}, {}, {"Sunday", "Funday"}},
-    {{"bee", "sting"}},
-    strlcw{},
-    strlcw{strlcw{}}};
-
-  // 5 nulls
-  // def histogram [1, 3, 1, 8]
-  // rep histogram [4, 4, 5]
-  using strlcw = cudf::test::lists_column_wrapper<cudf::string_view>;
-  cudf::test::lists_column_wrapper<cudf::string_view> col7{{{"Monday", "Monday", "Friday"},
-                                                            {},
-                                                            {{"Monday", "Friday"}, null_at(1)},
-                                                            {},
-                                                            {"Sunday", "Funday"}},
-                                                           {{"bee", "sting"}},
-                                                           strlcw{},
-                                                           strlcw{strlcw{}}};
-
-  // 11 nulls
-  // D   5   6   5  6        5  6  5      6 6
-  // R   0   3   3  3        1  3  3      2 3
-  // [[[NULL,2,NULL,4]], [[NULL,6,NULL], [8,9]]]
-  // D 2      6    6   6  6      2
-  // R 0      1    2   3  3      1
-  // [NULL, [[13],[14,15,16]],  NULL]
-  // D 2     3   2      4
-  // R 0     1   1      1
-  // [NULL, [], NULL, [[]]]
-  // D 0
-  // R 0
-  // NULL
-  // def histogram [1, 0, 4, 1, 1, 4, 9]
-  // rep histogram [4, 6, 2, 8]
-  lcw col8{{
-             {{{{1, 2, 3, 4}, nulls_at({0, 2})}}, {{{5, 6, 7}, nulls_at({0, 2})}, {8, 9}}},
-             {{{{10, 11}, {12}}, {{13}, {14, 15, 16}}, {{17, 18}}}, nulls_at({0, 2})},
-             {{lcw{lcw{}}, lcw{}, lcw{}, lcw{lcw{}}}, nulls_at({0, 2})},
-             lcw{lcw{lcw{}}},
-           },
-           null_at(3)};
-
-  table_view expected({col0, col1, col2, col3, col4, col5, col6, col7});
-
-  int64_t const expected_null_counts[]            = {4, 4, 4, 6, 4, 6, 4, 5, 11};
-  std::vector<int64_t> const expected_def_hists[] = {{1, 1, 2, 3},
-                                                     {1, 3, 10},
-                                                     {1, 1, 2, 10},
-                                                     {1, 1, 2, 2, 8},
-                                                     {1, 1, 1, 1, 10},
-                                                     {1, 1, 1, 1, 2, 8},
-                                                     {1, 3, 9},
-                                                     {1, 3, 1, 8},
-                                                     {1, 0, 4, 1, 1, 4, 9}};
-  std::vector<int64_t> const expected_rep_hists[] = {{4, 3},
-                                                     {4, 4, 6},
-                                                     {4, 4, 6},
-                                                     {4, 4, 6},
-                                                     {4, 4, 6},
-                                                     {4, 4, 6},
-                                                     {4, 4, 5},
-                                                     {4, 4, 5},
-                                                     {4, 6, 2, 8}};
-
-  auto const filepath = temp_env->get_temp_filepath("ColumnIndexListWithNulls.parquet");
-  auto out_opts = cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-                    .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
-                    .write_v2_headers(is_v2)
-                    .compression(cudf::io::compression_type::NONE);
-
-  cudf::io::write_parquet(out_opts);
-
-  auto const source = cudf::io::datasource::create(filepath);
-  cudf::io::parquet::detail::FileMetaData fmd;
-
-  read_footer(source, &fmd);
-
-  for (size_t r = 0; r < fmd.row_groups.size(); r++) {
-    auto const& rg = fmd.row_groups[r];
-    for (size_t c = 0; c < rg.columns.size(); c++) {
-      auto const& chunk = rg.columns[c];
-
-      ASSERT_TRUE(chunk.meta_data.size_statistics.has_value());
-      ASSERT_TRUE(chunk.meta_data.size_statistics->definition_level_histogram.has_value());
-      ASSERT_TRUE(chunk.meta_data.size_statistics->repetition_level_histogram.has_value());
-      // there is only one page, so chunk stats should match the page stats
-      EXPECT_EQ(chunk.meta_data.size_statistics->definition_level_histogram.value(),
-                expected_def_hists[c]);
-      EXPECT_EQ(chunk.meta_data.size_statistics->repetition_level_histogram.value(),
-                expected_rep_hists[c]);
-      // only column 6 has string data
-      if (c == 6) {
-        ASSERT_TRUE(chunk.meta_data.size_statistics->unencoded_byte_array_data_bytes.has_value());
-        EXPECT_EQ(chunk.meta_data.size_statistics->unencoded_byte_array_data_bytes.value(), 50L);
-      } else if (c == 7) {
-        ASSERT_TRUE(chunk.meta_data.size_statistics->unencoded_byte_array_data_bytes.has_value());
-        EXPECT_EQ(chunk.meta_data.size_statistics->unencoded_byte_array_data_bytes.value(), 44L);
-      } else {
-        EXPECT_FALSE(chunk.meta_data.size_statistics->unencoded_byte_array_data_bytes.has_value());
-      }
-
-      // loop over offsets, read each page header, make sure it's a data page and that
-      // the first row index is correct
-      auto const oi = read_offset_index(source, chunk);
-
-      for (size_t o = 0; o < oi.page_locations.size(); o++) {
-        auto const& page_loc = oi.page_locations[o];
-        auto const ph        = read_page_header(source, page_loc);
-        EXPECT_EQ(ph.type, expected_hdr_type);
-        // check null counts in V2 header
-        if (is_v2) { EXPECT_EQ(ph.data_page_header_v2.num_nulls, expected_null_counts[c]); }
-      }
-
-      // check null counts in column chunk stats and page indexes
-      auto const ci    = read_column_index(source, chunk);
-      auto const stats = get_statistics(chunk);
-      EXPECT_EQ(stats.null_count, expected_null_counts[c]);
-
-      // should only be one page
-      EXPECT_FALSE(ci.null_pages[0]);
-      ASSERT_TRUE(ci.null_counts.has_value());
-      EXPECT_EQ(ci.null_counts.value()[0], expected_null_counts[c]);
-
-      ASSERT_TRUE(ci.definition_level_histogram.has_value());
-      EXPECT_EQ(ci.definition_level_histogram.value(), expected_def_hists[c]);
-
-      ASSERT_TRUE(ci.repetition_level_histogram.has_value());
-      EXPECT_EQ(ci.repetition_level_histogram.value(), expected_rep_hists[c]);
-
-      if (c == 6) {
-        ASSERT_TRUE(oi.unencoded_byte_array_data_bytes.has_value());
-        EXPECT_EQ(oi.unencoded_byte_array_data_bytes.value()[0], 50L);
-      } else if (c == 7) {
-        ASSERT_TRUE(oi.unencoded_byte_array_data_bytes.has_value());
-        EXPECT_EQ(oi.unencoded_byte_array_data_bytes.value()[0], 44L);
-      } else {
-        EXPECT_FALSE(oi.unencoded_byte_array_data_bytes.has_value());
-      }
-    }
-  }
-}
-
-TEST_F(ParquetWriterTest, CheckColumnIndexTruncation)
-{
-  char const* coldata[] = {
-    // in-range 7 bit.  should truncate to "yyyyyyyz"
-    "yyyyyyyyy",
-    // max 7 bit. should truncate to "x7fx7fx7fx7fx7fx7fx7fx80", since it's
-    // considered binary, not UTF-8.  If UTF-8 it should not truncate.
-    "\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f",
-    // max binary.  this should not truncate
-    "\xff\xff\xff\xff\xff\xff\xff\xff\xff",
-    // in-range 2-byte UTF8 (U+00E9). should truncate to "éééê"
-    "ééééé",
-    // max 2-byte UTF8 (U+07FF). should not truncate
-    "߿߿߿߿߿",
-    // in-range 3-byte UTF8 (U+0800). should truncate to "ࠀࠁ"
-    "ࠀࠀࠀ",
-    // max 3-byte UTF8 (U+FFFF). should not truncate
-    "\xef\xbf\xbf\xef\xbf\xbf\xef\xbf\xbf",
-    // in-range 4-byte UTF8 (U+10000). should truncate to "𐀀𐀁"
-    "𐀀𐀀𐀀",
-    // max unicode (U+10FFFF). should truncate to \xf4\x8f\xbf\xbf\xf4\x90\x80\x80,
-    // which is no longer valid unicode, but is still ok UTF-8???
-    "\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf",
-    // max 4-byte UTF8 (U+1FFFFF). should not truncate
-    "\xf7\xbf\xbf\xbf\xf7\xbf\xbf\xbf\xf7\xbf\xbf\xbf"};
-
-  // NOTE: UTF8 min is initialized with 0xf7bfbfbf. Binary values larger
-  // than that will not become minimum value (when written as UTF-8).
-  char const* truncated_min[] = {"yyyyyyyy",
-                                 "\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f",
-                                 "\xf7\xbf\xbf\xbf",
-                                 "éééé",
-                                 "߿߿߿߿",
-                                 "ࠀࠀ",
-                                 "\xef\xbf\xbf\xef\xbf\xbf",
-                                 "𐀀𐀀",
-                                 "\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf",
-                                 "\xf7\xbf\xbf\xbf"};
-
-  char const* truncated_max[] = {"yyyyyyyz",
-                                 "\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x80",
-                                 "\xff\xff\xff\xff\xff\xff\xff\xff\xff",
-                                 "éééê",
-                                 "߿߿߿߿߿",
-                                 "ࠀࠁ",
-                                 "\xef\xbf\xbf\xef\xbf\xbf\xef\xbf\xbf",
-                                 "𐀀𐀁",
-                                 "\xf4\x8f\xbf\xbf\xf4\x90\x80\x80",
-                                 "\xf7\xbf\xbf\xbf\xf7\xbf\xbf\xbf\xf7\xbf\xbf\xbf"};
-
-  auto cols = [&]() {
-    using string_wrapper = column_wrapper<cudf::string_view>;
-    std::vector<std::unique_ptr<column>> cols;
-    for (auto const str : coldata) {
-      cols.push_back(string_wrapper{str}.release());
-    }
-    return cols;
-  }();
-  auto expected = std::make_unique<table>(std::move(cols));
-
-  auto const filepath = temp_env->get_temp_filepath("CheckColumnIndexTruncation.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected->view())
-      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
-      .column_index_truncate_length(8);
-  cudf::io::write_parquet(out_opts);
-
-  auto const source = cudf::io::datasource::create(filepath);
-  cudf::io::parquet::detail::FileMetaData fmd;
-
-  read_footer(source, &fmd);
-
-  for (size_t r = 0; r < fmd.row_groups.size(); r++) {
-    auto const& rg = fmd.row_groups[r];
-    for (size_t c = 0; c < rg.columns.size(); c++) {
-      auto const& chunk = rg.columns[c];
-
-      auto const ci    = read_column_index(source, chunk);
-      auto const stats = get_statistics(chunk);
-
-      ASSERT_TRUE(stats.min_value.has_value());
-      ASSERT_TRUE(stats.max_value.has_value());
-
-      // check trunc(page.min) <= stats.min && trun(page.max) >= stats.max
-      auto const ptype = fmd.schema[c + 1].type;
-      auto const ctype = fmd.schema[c + 1].converted_type;
-      EXPECT_TRUE(compare_binary(ci.min_values[0], stats.min_value.value(), ptype, ctype) <= 0);
-      EXPECT_TRUE(compare_binary(ci.max_values[0], stats.max_value.value(), ptype, ctype) >= 0);
-
-      // check that truncated values == expected
-      EXPECT_EQ(memcmp(ci.min_values[0].data(), truncated_min[c], ci.min_values[0].size()), 0);
-      EXPECT_EQ(memcmp(ci.max_values[0].data(), truncated_max[c], ci.max_values[0].size()), 0);
-    }
-  }
-}
-
-TEST_F(ParquetWriterTest, BinaryColumnIndexTruncation)
-{
-  std::vector<uint8_t> truncated_min[] = {{0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe},
-                                          {0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
-                                          {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}};
-
-  std::vector<uint8_t> truncated_max[] = {{0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xff},
-                                          {0xff},
-                                          {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}};
-
-  cudf::test::lists_column_wrapper<uint8_t> col0{
-    {0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe}};
-  cudf::test::lists_column_wrapper<uint8_t> col1{
-    {0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}};
-  cudf::test::lists_column_wrapper<uint8_t> col2{
-    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}};
-
-  auto expected = table_view{{col0, col1, col2}};
-
-  cudf::io::table_input_metadata output_metadata(expected);
-  output_metadata.column_metadata[0].set_name("col_binary0").set_output_as_binary(true);
-  output_metadata.column_metadata[1].set_name("col_binary1").set_output_as_binary(true);
-  output_metadata.column_metadata[2].set_name("col_binary2").set_output_as_binary(true);
-
-  auto const filepath = temp_env->get_temp_filepath("BinaryColumnIndexTruncation.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .metadata(std::move(output_metadata))
-      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
-      .column_index_truncate_length(8);
-  cudf::io::write_parquet(out_opts);
-
-  auto const source = cudf::io::datasource::create(filepath);
-  cudf::io::parquet::detail::FileMetaData fmd;
-
-  read_footer(source, &fmd);
-
-  for (size_t r = 0; r < fmd.row_groups.size(); r++) {
-    auto const& rg = fmd.row_groups[r];
-    for (size_t c = 0; c < rg.columns.size(); c++) {
-      auto const& chunk = rg.columns[c];
-
-      auto const ci    = read_column_index(source, chunk);
-      auto const stats = get_statistics(chunk);
-
-      // check trunc(page.min) <= stats.min && trun(page.max) >= stats.max
-      auto const ptype = fmd.schema[c + 1].type;
-      auto const ctype = fmd.schema[c + 1].converted_type;
-      ASSERT_TRUE(stats.min_value.has_value());
-      ASSERT_TRUE(stats.max_value.has_value());
-      EXPECT_TRUE(compare_binary(ci.min_values[0], stats.min_value.value(), ptype, ctype) <= 0);
-      EXPECT_TRUE(compare_binary(ci.max_values[0], stats.max_value.value(), ptype, ctype) >= 0);
-
-      // check that truncated values == expected
-      EXPECT_EQ(ci.min_values[0], truncated_min[c]);
-      EXPECT_EQ(ci.max_values[0], truncated_max[c]);
-    }
-  }
-}
-
-TEST_F(ParquetReaderTest, EmptyColumnsParam)
-{
-  srand(31337);
-  auto const expected = create_random_fixed_table<int>(2, 4, false);
-
-  std::vector<char> out_buffer;
-  cudf::io::parquet_writer_options args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&out_buffer}, *expected);
-  cudf::io::write_parquet(args);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(
-      cudf::io::source_info{out_buffer.data(), out_buffer.size()})
-      .columns({});
-  auto const result = cudf::io::read_parquet(read_opts);
-
-  EXPECT_EQ(result.tbl->num_columns(), 0);
-  EXPECT_EQ(result.tbl->num_rows(), 0);
-}
-
-TEST_F(ParquetReaderTest, BinaryAsStrings)
-{
-  std::vector<char const*> strings{
-    "Monday", "Wednesday", "Friday", "Monday", "Friday", "Friday", "Friday", "Funday"};
-  auto const num_rows = strings.size();
-
-  auto seq_col0 = random_values<int>(num_rows);
-  auto seq_col2 = random_values<float>(num_rows);
-  auto seq_col3 = random_values<uint8_t>(num_rows);
-  auto validity = cudf::test::iterators::no_nulls();
-
-  column_wrapper<int> int_col{seq_col0.begin(), seq_col0.end(), validity};
-  column_wrapper<cudf::string_view> string_col{strings.begin(), strings.end()};
-  column_wrapper<float> float_col{seq_col2.begin(), seq_col2.end(), validity};
-  cudf::test::lists_column_wrapper<uint8_t> list_int_col{
-    {'M', 'o', 'n', 'd', 'a', 'y'},
-    {'W', 'e', 'd', 'n', 'e', 's', 'd', 'a', 'y'},
-    {'F', 'r', 'i', 'd', 'a', 'y'},
-    {'M', 'o', 'n', 'd', 'a', 'y'},
-    {'F', 'r', 'i', 'd', 'a', 'y'},
-    {'F', 'r', 'i', 'd', 'a', 'y'},
-    {'F', 'r', 'i', 'd', 'a', 'y'},
-    {'F', 'u', 'n', 'd', 'a', 'y'}};
-
-  auto output = table_view{{int_col, string_col, float_col, string_col, list_int_col}};
-  cudf::io::table_input_metadata output_metadata(output);
-  output_metadata.column_metadata[0].set_name("col_other");
-  output_metadata.column_metadata[1].set_name("col_string");
-  output_metadata.column_metadata[2].set_name("col_float");
-  output_metadata.column_metadata[3].set_name("col_string2").set_output_as_binary(true);
-  output_metadata.column_metadata[4].set_name("col_binary").set_output_as_binary(true);
-
-  auto filepath = temp_env->get_temp_filepath("BinaryReadStrings.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, output)
-      .metadata(std::move(output_metadata));
-  cudf::io::write_parquet(out_opts);
-
-  auto expected_string = table_view{{int_col, string_col, float_col, string_col, string_col}};
-  auto expected_mixed  = table_view{{int_col, string_col, float_col, list_int_col, list_int_col}};
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-      .set_column_schema({{}, {}, {}, {}, {}});
-  auto result = cudf::io::read_parquet(in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected_string, result.tbl->view());
-
-  cudf::io::parquet_reader_options default_in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  result = cudf::io::read_parquet(default_in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected_string, result.tbl->view());
-
-  std::vector<cudf::io::reader_column_schema> md{
-    {},
-    {},
-    {},
-    cudf::io::reader_column_schema().set_convert_binary_to_strings(false),
-    cudf::io::reader_column_schema().set_convert_binary_to_strings(false)};
-
-  cudf::io::parquet_reader_options mixed_in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-      .set_column_schema(md);
-  result = cudf::io::read_parquet(mixed_in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected_mixed, result.tbl->view());
-}
-
-TEST_F(ParquetReaderTest, NestedByteArray)
-{
-  constexpr auto num_rows = 8;
-
-  auto seq_col0       = random_values<int>(num_rows);
-  auto seq_col2       = random_values<float>(num_rows);
-  auto seq_col3       = random_values<uint8_t>(num_rows);
-  auto const validity = cudf::test::iterators::no_nulls();
-
-  column_wrapper<int> int_col{seq_col0.begin(), seq_col0.end(), validity};
-  column_wrapper<float> float_col{seq_col2.begin(), seq_col2.end(), validity};
-  cudf::test::lists_column_wrapper<uint8_t> list_list_int_col{
-    {{'M', 'o', 'n', 'd', 'a', 'y'},
-     {'W', 'e', 'd', 'n', 'e', 's', 'd', 'a', 'y'},
-     {'F', 'r', 'i', 'd', 'a', 'y'}},
-    {{'M', 'o', 'n', 'd', 'a', 'y'}, {'F', 'r', 'i', 'd', 'a', 'y'}},
-    {{'M', 'o', 'n', 'd', 'a', 'y'},
-     {'W', 'e', 'd', 'n', 'e', 's', 'd', 'a', 'y'},
-     {'F', 'r', 'i', 'd', 'a', 'y'}},
-    {{'F', 'r', 'i', 'd', 'a', 'y'},
-     {'F', 'r', 'i', 'd', 'a', 'y'},
-     {'F', 'u', 'n', 'd', 'a', 'y'}},
-    {{'M', 'o', 'n', 'd', 'a', 'y'},
-     {'W', 'e', 'd', 'n', 'e', 's', 'd', 'a', 'y'},
-     {'F', 'r', 'i', 'd', 'a', 'y'}},
-    {{'F', 'r', 'i', 'd', 'a', 'y'},
-     {'F', 'r', 'i', 'd', 'a', 'y'},
-     {'F', 'u', 'n', 'd', 'a', 'y'}},
-    {{'M', 'o', 'n', 'd', 'a', 'y'},
-     {'W', 'e', 'd', 'n', 'e', 's', 'd', 'a', 'y'},
-     {'F', 'r', 'i', 'd', 'a', 'y'}},
-    {{'M', 'o', 'n', 'd', 'a', 'y'}, {'F', 'r', 'i', 'd', 'a', 'y'}}};
-
-  auto const expected = table_view{{int_col, float_col, list_list_int_col}};
-  cudf::io::table_input_metadata output_metadata(expected);
-  output_metadata.column_metadata[0].set_name("col_other");
-  output_metadata.column_metadata[1].set_name("col_float");
-  output_metadata.column_metadata[2].set_name("col_binary").child(1).set_output_as_binary(true);
-
-  auto filepath = temp_env->get_temp_filepath("NestedByteArray.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .metadata(std::move(output_metadata));
-  cudf::io::write_parquet(out_opts);
-
-  auto source = cudf::io::datasource::create(filepath);
-  cudf::io::parquet::detail::FileMetaData fmd;
-
-  read_footer(source, &fmd);
-  EXPECT_EQ(fmd.schema[5].type, cudf::io::parquet::detail::Type::BYTE_ARRAY);
-
-  std::vector<cudf::io::reader_column_schema> md{
-    {},
-    {},
-    cudf::io::reader_column_schema().add_child(
-      cudf::io::reader_column_schema().set_convert_binary_to_strings(false))};
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-      .set_column_schema(md);
-  auto result = cudf::io::read_parquet(in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-}
-
-TEST_F(ParquetWriterTest, ByteArrayStats)
-{
-  // check that byte array min and max statistics are written as expected. If a byte array is
-  // written as a string, max utf8 is 0xf7bfbfbf and so the minimum value will be set to that value
-  // instead of a potential minimum higher than that.
-  std::vector<uint8_t> expected_col0_min{0xf0};
-  std::vector<uint8_t> expected_col0_max{0xf0, 0xf5, 0xf5};
-  std::vector<uint8_t> expected_col1_min{0xfe, 0xfe, 0xfe};
-  std::vector<uint8_t> expected_col1_max{0xfe, 0xfe, 0xfe};
-
-  cudf::test::lists_column_wrapper<uint8_t> list_int_col0{
-    {0xf0}, {0xf0, 0xf5, 0xf3}, {0xf0, 0xf5, 0xf5}};
-  cudf::test::lists_column_wrapper<uint8_t> list_int_col1{
-    {0xfe, 0xfe, 0xfe}, {0xfe, 0xfe, 0xfe}, {0xfe, 0xfe, 0xfe}};
-
-  auto expected = table_view{{list_int_col0, list_int_col1}};
-  cudf::io::table_input_metadata output_metadata(expected);
-  output_metadata.column_metadata[0].set_name("col_binary0").set_output_as_binary(true);
-  output_metadata.column_metadata[1].set_name("col_binary1").set_output_as_binary(true);
-
-  auto filepath = temp_env->get_temp_filepath("ByteArrayStats.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .metadata(std::move(output_metadata));
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-      .set_column_schema({{}, {}});
-  auto result = cudf::io::read_parquet(in_opts);
-
-  auto source = cudf::io::datasource::create(filepath);
-  cudf::io::parquet::detail::FileMetaData fmd;
-
-  read_footer(source, &fmd);
-
-  EXPECT_EQ(fmd.schema[1].type, cudf::io::parquet::detail::Type::BYTE_ARRAY);
-  EXPECT_EQ(fmd.schema[2].type, cudf::io::parquet::detail::Type::BYTE_ARRAY);
-
-  auto const stats0 = get_statistics(fmd.row_groups[0].columns[0]);
-  auto const stats1 = get_statistics(fmd.row_groups[0].columns[1]);
-
-  EXPECT_EQ(expected_col0_min, stats0.min_value);
-  EXPECT_EQ(expected_col0_max, stats0.max_value);
-  EXPECT_EQ(expected_col1_min, stats1.min_value);
-  EXPECT_EQ(expected_col1_max, stats1.max_value);
-}
-
-TEST_F(ParquetReaderTest, StructByteArray)
-{
-  constexpr auto num_rows = 100;
-
-  auto seq_col0       = random_values<uint8_t>(num_rows);
-  auto const validity = cudf::test::iterators::no_nulls();
-
-  column_wrapper<uint8_t> int_col{seq_col0.begin(), seq_col0.end(), validity};
-  cudf::test::lists_column_wrapper<uint8_t> list_of_int{{seq_col0.begin(), seq_col0.begin() + 50},
-                                                        {seq_col0.begin() + 50, seq_col0.end()}};
-  auto struct_col = cudf::test::structs_column_wrapper{{list_of_int}, validity};
-
-  auto const expected = table_view{{struct_col}};
-  EXPECT_EQ(1, expected.num_columns());
-  cudf::io::table_input_metadata output_metadata(expected);
-  output_metadata.column_metadata[0]
-    .set_name("struct_binary")
-    .child(0)
-    .set_name("a")
-    .set_output_as_binary(true);
-
-  auto filepath = temp_env->get_temp_filepath("StructByteArray.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .metadata(std::move(output_metadata));
-  cudf::io::write_parquet(out_opts);
-
-  std::vector<cudf::io::reader_column_schema> md{cudf::io::reader_column_schema().add_child(
-    cudf::io::reader_column_schema().set_convert_binary_to_strings(false))};
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-      .set_column_schema(md);
-  auto result = cudf::io::read_parquet(in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-}
-
-TEST_F(ParquetReaderTest, NestingOptimizationTest)
-{
-  // test nesting levels > cudf::io::parquet::detail::max_cacheable_nesting_decode_info deep.
-  constexpr cudf::size_type num_nesting_levels = 16;
-  static_assert(num_nesting_levels > cudf::io::parquet::detail::max_cacheable_nesting_decode_info);
-  constexpr cudf::size_type rows_per_level = 2;
-
-  constexpr cudf::size_type num_values = (1 << num_nesting_levels) * rows_per_level;
-  auto value_iter                      = thrust::make_counting_iterator(0);
-  auto validity =
-    cudf::detail::make_counting_transform_iterator(0, [](cudf::size_type i) { return i % 2; });
-  cudf::test::fixed_width_column_wrapper<int> values(value_iter, value_iter + num_values, validity);
-
-  // ~256k values with num_nesting_levels = 16
-  int total_values_produced = num_values;
-  auto prev_col             = values.release();
-  for (int idx = 0; idx < num_nesting_levels; idx++) {
-    auto const depth    = num_nesting_levels - idx;
-    auto const num_rows = (1 << (num_nesting_levels - idx));
-
-    auto offsets_iter = cudf::detail::make_counting_transform_iterator(
-      0, [depth, rows_per_level](cudf::size_type i) { return i * rows_per_level; });
-    total_values_produced += (num_rows + 1);
-
-    cudf::test::fixed_width_column_wrapper<cudf::size_type> offsets(offsets_iter,
-                                                                    offsets_iter + num_rows + 1);
-    auto c   = cudf::make_lists_column(num_rows, offsets.release(), std::move(prev_col), 0, {});
-    prev_col = std::move(c);
-  }
-  auto const& expect = prev_col;
-
-  auto filepath = temp_env->get_temp_filepath("NestingDecodeCache.parquet");
-  cudf::io::parquet_writer_options opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, table_view{{*expect}});
-  cudf::io::write_parquet(opts);
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(in_opts);
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expect, result.tbl->get_column(0));
-}
-
-TEST_F(ParquetWriterTest, SingleValueDictionaryTest)
-{
-  constexpr unsigned int expected_bits = 1;
-  constexpr unsigned int nrows         = 1'000'000U;
-
-  auto elements = cudf::detail::make_counting_transform_iterator(
-    0, [](auto i) { return "a unique string value suffixed with 1"; });
-  auto const col0     = cudf::test::strings_column_wrapper(elements, elements + nrows);
-  auto const expected = table_view{{col0}};
-
-  auto const filepath = temp_env->get_temp_filepath("SingleValueDictionaryTest.parquet");
-  // set row group size so that there will be only one row group
-  // no compression so we can easily read page data
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .compression(cudf::io::compression_type::NONE)
-      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
-      .row_group_size_rows(nrows);
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options default_in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto const result = cudf::io::read_parquet(default_in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-
-  // make sure dictionary was used
-  auto const source = cudf::io::datasource::create(filepath);
-  cudf::io::parquet::detail::FileMetaData fmd;
-
-  read_footer(source, &fmd);
-  auto used_dict = [&fmd]() {
-    for (auto enc : fmd.row_groups[0].columns[0].meta_data.encodings) {
-      if (enc == cudf::io::parquet::detail::Encoding::PLAIN_DICTIONARY or
-          enc == cudf::io::parquet::detail::Encoding::RLE_DICTIONARY) {
-        return true;
-      }
-    }
-    return false;
-  };
-  EXPECT_TRUE(used_dict());
-
-  // and check that the correct number of bits was used
-  auto const oi    = read_offset_index(source, fmd.row_groups[0].columns[0]);
-  auto const nbits = read_dict_bits(source, oi.page_locations[0]);
-  EXPECT_EQ(nbits, expected_bits);
-}
-
-TEST_F(ParquetWriterTest, DictionaryNeverTest)
-{
-  constexpr unsigned int nrows = 1'000U;
-
-  // only one value, so would normally use dictionary
-  auto elements = cudf::detail::make_counting_transform_iterator(
-    0, [](auto i) { return "a unique string value suffixed with 1"; });
-  auto const col0     = cudf::test::strings_column_wrapper(elements, elements + nrows);
-  auto const expected = table_view{{col0}};
-
-  auto const filepath = temp_env->get_temp_filepath("DictionaryNeverTest.parquet");
-  // no compression so we can easily read page data
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .compression(cudf::io::compression_type::NONE)
-      .dictionary_policy(cudf::io::dictionary_policy::NEVER);
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options default_in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto const result = cudf::io::read_parquet(default_in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-
-  // make sure dictionary was not used
-  auto const source = cudf::io::datasource::create(filepath);
-  cudf::io::parquet::detail::FileMetaData fmd;
-
-  read_footer(source, &fmd);
-  auto used_dict = [&fmd]() {
-    for (auto enc : fmd.row_groups[0].columns[0].meta_data.encodings) {
-      if (enc == cudf::io::parquet::detail::Encoding::PLAIN_DICTIONARY or
-          enc == cudf::io::parquet::detail::Encoding::RLE_DICTIONARY) {
-        return true;
-      }
-    }
-    return false;
-  };
-  EXPECT_FALSE(used_dict());
-}
-
-TEST_F(ParquetWriterTest, DictionaryAdaptiveTest)
-{
-  constexpr unsigned int nrows = 65'536U;
-  // cardinality is chosen to result in a dictionary > 1MB in size
-  constexpr unsigned int cardinality = 32'768U;
-
-  // single value will have a small dictionary
-  auto elements0 = cudf::detail::make_counting_transform_iterator(
-    0, [](auto i) { return "a unique string value suffixed with 1"; });
-  auto const col0 = cudf::test::strings_column_wrapper(elements0, elements0 + nrows);
-
-  // high cardinality will have a large dictionary
-  auto elements1  = cudf::detail::make_counting_transform_iterator(0, [cardinality](auto i) {
-    return "a unique string value suffixed with " + std::to_string(i % cardinality);
-  });
-  auto const col1 = cudf::test::strings_column_wrapper(elements1, elements1 + nrows);
-
-  auto const expected = table_view{{col0, col1}};
-
-  auto const filepath = temp_env->get_temp_filepath("DictionaryAdaptiveTest.parquet");
-  // no compression so we can easily read page data
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .compression(cudf::io::compression_type::ZSTD)
-      .dictionary_policy(cudf::io::dictionary_policy::ADAPTIVE);
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options default_in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto const result = cudf::io::read_parquet(default_in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-
-  // make sure dictionary was used as expected. col0 should use one,
-  // col1 should not.
-  auto const source = cudf::io::datasource::create(filepath);
-  cudf::io::parquet::detail::FileMetaData fmd;
-
-  read_footer(source, &fmd);
-  auto used_dict = [&fmd](int col) {
-    for (auto enc : fmd.row_groups[0].columns[col].meta_data.encodings) {
-      if (enc == cudf::io::parquet::detail::Encoding::PLAIN_DICTIONARY or
-          enc == cudf::io::parquet::detail::Encoding::RLE_DICTIONARY) {
-        return true;
-      }
-    }
-    return false;
-  };
-  EXPECT_TRUE(used_dict(0));
-  EXPECT_FALSE(used_dict(1));
-}
-
-TEST_F(ParquetWriterTest, DictionaryAlwaysTest)
-{
-  constexpr unsigned int nrows = 65'536U;
-  // cardinality is chosen to result in a dictionary > 1MB in size
-  constexpr unsigned int cardinality = 32'768U;
-
-  // single value will have a small dictionary
-  auto elements0 = cudf::detail::make_counting_transform_iterator(
-    0, [](auto i) { return "a unique string value suffixed with 1"; });
-  auto const col0 = cudf::test::strings_column_wrapper(elements0, elements0 + nrows);
-
-  // high cardinality will have a large dictionary
-  auto elements1  = cudf::detail::make_counting_transform_iterator(0, [cardinality](auto i) {
-    return "a unique string value suffixed with " + std::to_string(i % cardinality);
-  });
-  auto const col1 = cudf::test::strings_column_wrapper(elements1, elements1 + nrows);
-
-  auto const expected = table_view{{col0, col1}};
-
-  auto const filepath = temp_env->get_temp_filepath("DictionaryAlwaysTest.parquet");
-  // no compression so we can easily read page data
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .compression(cudf::io::compression_type::ZSTD)
-      .dictionary_policy(cudf::io::dictionary_policy::ALWAYS);
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options default_in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto const result = cudf::io::read_parquet(default_in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-
-  // make sure dictionary was used for both columns
-  auto const source = cudf::io::datasource::create(filepath);
-  cudf::io::parquet::detail::FileMetaData fmd;
-
-  read_footer(source, &fmd);
-  auto used_dict = [&fmd](int col) {
-    for (auto enc : fmd.row_groups[0].columns[col].meta_data.encodings) {
-      if (enc == cudf::io::parquet::detail::Encoding::PLAIN_DICTIONARY or
-          enc == cudf::io::parquet::detail::Encoding::RLE_DICTIONARY) {
-        return true;
-      }
-    }
-    return false;
-  };
-  EXPECT_TRUE(used_dict(0));
-  EXPECT_TRUE(used_dict(1));
-}
-
-TEST_F(ParquetWriterTest, DictionaryPageSizeEst)
-{
-  // one page
-  constexpr unsigned int nrows = 20'000U;
-
-  // this test is creating a pattern of repeating then non-repeating values to trigger
-  // a "worst-case" for page size estimation in the presence of a dictionary. have confirmed
-  // that this fails for values over 16 in the final term of `max_RLE_page_size()`.
-  // The output of the iterator will be 'CCCCCRRRRRCCCCCRRRRR...` where 'C' is a changing
-  // value, and 'R' repeats. The encoder will turn this into a literal run of 8 values
-  // (`CCCCCRRR`) followed by a repeated run of 2 (`RR`). This pattern then repeats, getting
-  // as close as possible to a condition of repeated 8 value literal runs.
-  auto elements0  = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-    if ((i / 5) % 2 == 1) {
-      return std::string("non-unique string");
-    } else {
-      return "a unique string value suffixed with " + std::to_string(i);
-    }
-  });
-  auto const col0 = cudf::test::strings_column_wrapper(elements0, elements0 + nrows);
-
-  auto const expected = table_view{{col0}};
-
-  auto const filepath = temp_env->get_temp_filepath("DictionaryPageSizeEst.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .compression(cudf::io::compression_type::ZSTD)
-      .dictionary_policy(cudf::io::dictionary_policy::ALWAYS);
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options default_in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto const result = cudf::io::read_parquet(default_in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-}
-
-TEST_P(ParquetSizedTest, DictionaryTest)
-{
-  unsigned int const cardinality = (1 << (GetParam() - 1)) + 1;
-  unsigned int const nrows       = std::max(cardinality * 3 / 2, 3'000'000U);
-
-  auto elements       = cudf::detail::make_counting_transform_iterator(0, [cardinality](auto i) {
-    return "a unique string value suffixed with " + std::to_string(i % cardinality);
-  });
-  auto const col0     = cudf::test::strings_column_wrapper(elements, elements + nrows);
-  auto const expected = table_view{{col0}};
-
-  auto const filepath = temp_env->get_temp_filepath("DictionaryTest.parquet");
-  // set row group size so that there will be only one row group
-  // no compression so we can easily read page data
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .compression(cudf::io::compression_type::NONE)
-      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
-      .dictionary_policy(cudf::io::dictionary_policy::ALWAYS)
-      .row_group_size_rows(nrows)
-      .row_group_size_bytes(512 * 1024 * 1024);
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options default_in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto const result = cudf::io::read_parquet(default_in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-
-  // make sure dictionary was used
-  auto const source = cudf::io::datasource::create(filepath);
-  cudf::io::parquet::detail::FileMetaData fmd;
-
-  read_footer(source, &fmd);
-  auto used_dict = [&fmd]() {
-    for (auto enc : fmd.row_groups[0].columns[0].meta_data.encodings) {
-      if (enc == cudf::io::parquet::detail::Encoding::PLAIN_DICTIONARY or
-          enc == cudf::io::parquet::detail::Encoding::RLE_DICTIONARY) {
-        return true;
-      }
-    }
-    return false;
-  };
-  EXPECT_TRUE(used_dict());
-
-  // and check that the correct number of bits was used
-  auto const oi    = read_offset_index(source, fmd.row_groups[0].columns[0]);
-  auto const nbits = read_dict_bits(source, oi.page_locations[0]);
-  EXPECT_EQ(nbits, GetParam());
-}
-
-TYPED_TEST(ParquetReaderSourceTest, BufferSourceTypes)
-{
-  using T = TypeParam;
-
-  srand(31337);
-  auto table = create_random_fixed_table<int>(5, 5, true);
-
-  std::vector<char> out_buffer;
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer), *table);
-  cudf::io::write_parquet(out_opts);
-
-  {
-    cudf::io::parquet_reader_options in_opts =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info(
-        cudf::host_span<T>(reinterpret_cast<T*>(out_buffer.data()), out_buffer.size())));
-    auto const result = cudf::io::read_parquet(in_opts);
-
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*table, result.tbl->view());
-  }
-
-  {
-    cudf::io::parquet_reader_options in_opts =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info(cudf::host_span<T const>(
-        reinterpret_cast<T const*>(out_buffer.data()), out_buffer.size())));
-    auto const result = cudf::io::read_parquet(in_opts);
-
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*table, result.tbl->view());
-  }
-}
-
-TYPED_TEST(ParquetReaderSourceTest, BufferSourceArrayTypes)
-{
-  using T = TypeParam;
-
-  srand(31337);
-  auto table = create_random_fixed_table<int>(5, 5, true);
-
-  std::vector<char> out_buffer;
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer), *table);
-  cudf::io::write_parquet(out_opts);
-
-  auto full_table = cudf::concatenate(std::vector<table_view>({*table, *table}));
-
-  {
-    auto spans = std::vector<cudf::host_span<T>>{
-      cudf::host_span<T>(reinterpret_cast<T*>(out_buffer.data()), out_buffer.size()),
-      cudf::host_span<T>(reinterpret_cast<T*>(out_buffer.data()), out_buffer.size())};
-    cudf::io::parquet_reader_options in_opts = cudf::io::parquet_reader_options::builder(
-      cudf::io::source_info(cudf::host_span<cudf::host_span<T>>(spans.data(), spans.size())));
-    auto const result = cudf::io::read_parquet(in_opts);
-
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*full_table, result.tbl->view());
-  }
-
-  {
-    auto spans = std::vector<cudf::host_span<T const>>{
-      cudf::host_span<T const>(reinterpret_cast<T const*>(out_buffer.data()), out_buffer.size()),
-      cudf::host_span<T const>(reinterpret_cast<T const*>(out_buffer.data()), out_buffer.size())};
-    cudf::io::parquet_reader_options in_opts = cudf::io::parquet_reader_options::builder(
-      cudf::io::source_info(cudf::host_span<cudf::host_span<T const>>(spans.data(), spans.size())));
-    auto const result = cudf::io::read_parquet(in_opts);
-
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*full_table, result.tbl->view());
-  }
-}
-
-TEST_F(ParquetWriterTest, UserNullability)
-{
-  auto weight_col = cudf::test::fixed_width_column_wrapper<float>{{57.5, 51.1, 15.3}};
-  auto ages_col   = cudf::test::fixed_width_column_wrapper<int32_t>{{30, 27, 5}};
-  auto struct_col = cudf::test::structs_column_wrapper{weight_col, ages_col};
-
-  auto expected = table_view({struct_col});
-
-  cudf::io::table_input_metadata expected_metadata(expected);
-  expected_metadata.column_metadata[0].set_nullability(false);
-  expected_metadata.column_metadata[0].child(0).set_nullability(true);
-
-  auto filepath = temp_env->get_temp_filepath("SingleWriteNullable.parquet");
-  cudf::io::parquet_writer_options write_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .metadata(std::move(expected_metadata));
-  cudf::io::write_parquet(write_opts);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  EXPECT_FALSE(result.tbl->view().column(0).nullable());
-  EXPECT_TRUE(result.tbl->view().column(0).child(0).nullable());
-  EXPECT_FALSE(result.tbl->view().column(0).child(1).nullable());
-}
-
-TEST_F(ParquetWriterTest, UserNullabilityInvalid)
-{
-  auto valids =
-    cudf::detail::make_counting_transform_iterator(0, [&](int index) { return index % 2; });
-  auto col      = cudf::test::fixed_width_column_wrapper<double>{{57.5, 51.1, 15.3}, valids};
-  auto expected = table_view({col});
-
-  auto filepath = temp_env->get_temp_filepath("SingleWriteNullableInvalid.parquet");
-  cudf::io::parquet_writer_options write_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
-  // Should work without the nullability option
-  EXPECT_NO_THROW(cudf::io::write_parquet(write_opts));
-
-  cudf::io::table_input_metadata expected_metadata(expected);
-  expected_metadata.column_metadata[0].set_nullability(false);
-  write_opts.set_metadata(std::move(expected_metadata));
-  // Can't write a column with nulls as not nullable
-  EXPECT_THROW(cudf::io::write_parquet(write_opts), cudf::logic_error);
-}
-
-TEST_F(ParquetReaderTest, SingleLevelLists)
-{
-  unsigned char list_bytes[] = {
-    0x50, 0x41, 0x52, 0x31, 0x15, 0x00, 0x15, 0x28, 0x15, 0x28, 0x15, 0xa7, 0xce, 0x91, 0x8c, 0x06,
-    0x1c, 0x15, 0x04, 0x15, 0x00, 0x15, 0x06, 0x15, 0x06, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03,
-    0x02, 0x02, 0x00, 0x00, 0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x15,
-    0x02, 0x19, 0x3c, 0x48, 0x0c, 0x73, 0x70, 0x61, 0x72, 0x6b, 0x5f, 0x73, 0x63, 0x68, 0x65, 0x6d,
-    0x61, 0x15, 0x02, 0x00, 0x35, 0x00, 0x18, 0x01, 0x66, 0x15, 0x02, 0x15, 0x06, 0x4c, 0x3c, 0x00,
-    0x00, 0x00, 0x15, 0x02, 0x25, 0x04, 0x18, 0x05, 0x61, 0x72, 0x72, 0x61, 0x79, 0x00, 0x16, 0x02,
-    0x19, 0x1c, 0x19, 0x1c, 0x26, 0x08, 0x1c, 0x15, 0x02, 0x19, 0x25, 0x00, 0x06, 0x19, 0x28, 0x01,
-    0x66, 0x05, 0x61, 0x72, 0x72, 0x61, 0x79, 0x15, 0x00, 0x16, 0x04, 0x16, 0x56, 0x16, 0x56, 0x26,
-    0x08, 0x3c, 0x18, 0x04, 0x01, 0x00, 0x00, 0x00, 0x18, 0x04, 0x00, 0x00, 0x00, 0x00, 0x16, 0x00,
-    0x28, 0x04, 0x01, 0x00, 0x00, 0x00, 0x18, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x19, 0x1c, 0x15,
-    0x00, 0x15, 0x00, 0x15, 0x02, 0x00, 0x00, 0x00, 0x16, 0x56, 0x16, 0x02, 0x26, 0x08, 0x16, 0x56,
-    0x14, 0x00, 0x00, 0x28, 0x13, 0x52, 0x41, 0x50, 0x49, 0x44, 0x53, 0x20, 0x53, 0x70, 0x61, 0x72,
-    0x6b, 0x20, 0x50, 0x6c, 0x75, 0x67, 0x69, 0x6e, 0x19, 0x1c, 0x1c, 0x00, 0x00, 0x00, 0x9f, 0x00,
-    0x00, 0x00, 0x50, 0x41, 0x52, 0x31};
-
-  // read single level list reproducing parquet file
-  cudf::io::parquet_reader_options read_opts = cudf::io::parquet_reader_options::builder(
-    cudf::io::source_info{reinterpret_cast<char const*>(list_bytes), sizeof(list_bytes)});
-  auto table = cudf::io::read_parquet(read_opts);
-
-  auto const c0 = table.tbl->get_column(0);
-  EXPECT_TRUE(c0.type().id() == cudf::type_id::LIST);
-
-  auto const lc    = cudf::lists_column_view(c0);
-  auto const child = lc.child();
-  EXPECT_TRUE(child.type().id() == cudf::type_id::INT32);
-}
-
-TEST_F(ParquetReaderTest, ChunkedSingleLevelLists)
-{
-  unsigned char list_bytes[] = {
-    0x50, 0x41, 0x52, 0x31, 0x15, 0x00, 0x15, 0x28, 0x15, 0x28, 0x15, 0xa7, 0xce, 0x91, 0x8c, 0x06,
-    0x1c, 0x15, 0x04, 0x15, 0x00, 0x15, 0x06, 0x15, 0x06, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03,
-    0x02, 0x02, 0x00, 0x00, 0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x15,
-    0x02, 0x19, 0x3c, 0x48, 0x0c, 0x73, 0x70, 0x61, 0x72, 0x6b, 0x5f, 0x73, 0x63, 0x68, 0x65, 0x6d,
-    0x61, 0x15, 0x02, 0x00, 0x35, 0x00, 0x18, 0x01, 0x66, 0x15, 0x02, 0x15, 0x06, 0x4c, 0x3c, 0x00,
-    0x00, 0x00, 0x15, 0x02, 0x25, 0x04, 0x18, 0x05, 0x61, 0x72, 0x72, 0x61, 0x79, 0x00, 0x16, 0x02,
-    0x19, 0x1c, 0x19, 0x1c, 0x26, 0x08, 0x1c, 0x15, 0x02, 0x19, 0x25, 0x00, 0x06, 0x19, 0x28, 0x01,
-    0x66, 0x05, 0x61, 0x72, 0x72, 0x61, 0x79, 0x15, 0x00, 0x16, 0x04, 0x16, 0x56, 0x16, 0x56, 0x26,
-    0x08, 0x3c, 0x18, 0x04, 0x01, 0x00, 0x00, 0x00, 0x18, 0x04, 0x00, 0x00, 0x00, 0x00, 0x16, 0x00,
-    0x28, 0x04, 0x01, 0x00, 0x00, 0x00, 0x18, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x19, 0x1c, 0x15,
-    0x00, 0x15, 0x00, 0x15, 0x02, 0x00, 0x00, 0x00, 0x16, 0x56, 0x16, 0x02, 0x26, 0x08, 0x16, 0x56,
-    0x14, 0x00, 0x00, 0x28, 0x13, 0x52, 0x41, 0x50, 0x49, 0x44, 0x53, 0x20, 0x53, 0x70, 0x61, 0x72,
-    0x6b, 0x20, 0x50, 0x6c, 0x75, 0x67, 0x69, 0x6e, 0x19, 0x1c, 0x1c, 0x00, 0x00, 0x00, 0x9f, 0x00,
-    0x00, 0x00, 0x50, 0x41, 0x52, 0x31};
-
-  auto reader = cudf::io::chunked_parquet_reader(
-    1L << 31,
-    cudf::io::parquet_reader_options::builder(
-      cudf::io::source_info{reinterpret_cast<char const*>(list_bytes), sizeof(list_bytes)}));
-  int iterations = 0;
-  while (reader.has_next() && iterations < 10) {
-    auto chunk = reader.read_chunk();
-  }
-  EXPECT_TRUE(iterations < 10);
-}
-
-TEST_F(ParquetWriterTest, CompStats)
-{
-  auto table = create_random_fixed_table<int>(1, 100000, true);
-
-  auto const stats = std::make_shared<cudf::io::writer_compression_statistics>();
-
-  std::vector<char> unused_buffer;
-  cudf::io::parquet_writer_options opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&unused_buffer}, table->view())
-      .compression_statistics(stats);
-  cudf::io::write_parquet(opts);
-
-  EXPECT_NE(stats->num_compressed_bytes(), 0);
-  EXPECT_EQ(stats->num_failed_bytes(), 0);
-  EXPECT_EQ(stats->num_skipped_bytes(), 0);
-  EXPECT_FALSE(std::isnan(stats->compression_ratio()));
-}
-
-TEST_F(ParquetChunkedWriterTest, CompStats)
-{
-  auto table = create_random_fixed_table<int>(1, 100000, true);
-
-  auto const stats = std::make_shared<cudf::io::writer_compression_statistics>();
-
-  std::vector<char> unused_buffer;
-  cudf::io::chunked_parquet_writer_options opts =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{&unused_buffer})
-      .compression_statistics(stats);
-  cudf::io::parquet_chunked_writer(opts).write(*table);
-
-  EXPECT_NE(stats->num_compressed_bytes(), 0);
-  EXPECT_EQ(stats->num_failed_bytes(), 0);
-  EXPECT_EQ(stats->num_skipped_bytes(), 0);
-  EXPECT_FALSE(std::isnan(stats->compression_ratio()));
-
-  auto const single_table_comp_stats = *stats;
-  cudf::io::parquet_chunked_writer(opts).write(*table);
-
-  EXPECT_EQ(stats->compression_ratio(), single_table_comp_stats.compression_ratio());
-  EXPECT_EQ(stats->num_compressed_bytes(), 2 * single_table_comp_stats.num_compressed_bytes());
-
-  EXPECT_EQ(stats->num_failed_bytes(), 0);
-  EXPECT_EQ(stats->num_skipped_bytes(), 0);
-}
-
-void expect_compression_stats_empty(std::shared_ptr<cudf::io::writer_compression_statistics> stats)
-{
-  EXPECT_EQ(stats->num_compressed_bytes(), 0);
-  EXPECT_EQ(stats->num_failed_bytes(), 0);
-  EXPECT_EQ(stats->num_skipped_bytes(), 0);
-  EXPECT_TRUE(std::isnan(stats->compression_ratio()));
-}
-
-TEST_F(ParquetWriterTest, CompStatsEmptyTable)
-{
-  auto table_no_rows = create_random_fixed_table<int>(20, 0, false);
-
-  auto const stats = std::make_shared<cudf::io::writer_compression_statistics>();
-
-  std::vector<char> unused_buffer;
-  cudf::io::parquet_writer_options opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&unused_buffer},
-                                              table_no_rows->view())
-      .compression_statistics(stats);
-  cudf::io::write_parquet(opts);
-
-  expect_compression_stats_empty(stats);
-}
-
-TEST_F(ParquetChunkedWriterTest, CompStatsEmptyTable)
-{
-  auto table_no_rows = create_random_fixed_table<int>(20, 0, false);
-
-  auto const stats = std::make_shared<cudf::io::writer_compression_statistics>();
-
-  std::vector<char> unused_buffer;
-  cudf::io::chunked_parquet_writer_options opts =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{&unused_buffer})
-      .compression_statistics(stats);
-  cudf::io::parquet_chunked_writer(opts).write(*table_no_rows);
-
-  expect_compression_stats_empty(stats);
-}
-
-TEST_F(ParquetReaderTest, ReorderedReadMultipleFiles)
-{
-  constexpr auto num_rows    = 50'000;
-  constexpr auto cardinality = 20'000;
-
-  // table 1
-  auto str1 = cudf::detail::make_counting_transform_iterator(
-    0, [](auto i) { return "cat " + std::to_string(i % cardinality); });
-  auto cols1 = cudf::test::strings_column_wrapper(str1, str1 + num_rows);
-
-  auto int1 =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % cardinality; });
-  auto coli1 = cudf::test::fixed_width_column_wrapper<int>(int1, int1 + num_rows);
-
-  auto const expected1 = table_view{{cols1, coli1}};
-  auto const swapped1  = table_view{{coli1, cols1}};
-
-  auto const filepath1 = temp_env->get_temp_filepath("LargeReorderedRead1.parquet");
-  auto out_opts1 =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath1}, expected1)
-      .compression(cudf::io::compression_type::NONE);
-  cudf::io::write_parquet(out_opts1);
-
-  // table 2
-  auto str2 = cudf::detail::make_counting_transform_iterator(
-    0, [](auto i) { return "dog " + std::to_string(i % cardinality); });
-  auto cols2 = cudf::test::strings_column_wrapper(str2, str2 + num_rows);
-
-  auto int2 = cudf::detail::make_counting_transform_iterator(
-    0, [](auto i) { return (i % cardinality) + cardinality; });
-  auto coli2 = cudf::test::fixed_width_column_wrapper<int>(int2, int2 + num_rows);
-
-  auto const expected2 = table_view{{cols2, coli2}};
-  auto const swapped2  = table_view{{coli2, cols2}};
-
-  auto const filepath2 = temp_env->get_temp_filepath("LargeReorderedRead2.parquet");
-  auto out_opts2 =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath2}, expected2)
-      .compression(cudf::io::compression_type::NONE);
-  cudf::io::write_parquet(out_opts2);
-
-  // read in both files swapping the columns
-  auto read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{{filepath1, filepath2}})
-      .columns({"_col1", "_col0"});
-  auto result = cudf::io::read_parquet(read_opts);
-  auto sliced = cudf::slice(result.tbl->view(), {0, num_rows, num_rows, 2 * num_rows});
-  CUDF_TEST_EXPECT_TABLES_EQUAL(sliced[0], swapped1);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(sliced[1], swapped2);
-}
-
-// Test fixture for metadata tests
-struct ParquetMetadataReaderTest : public cudf::test::BaseFixture {
-  std::string print(cudf::io::parquet_column_schema schema, int depth = 0)
-  {
-    std::string child_str;
-    for (auto const& child : schema.children()) {
-      child_str += print(child, depth + 1);
-    }
-    return std::string(depth, ' ') + schema.name() + "\n" + child_str;
-  }
-};
-
-TEST_F(ParquetMetadataReaderTest, TestBasic)
-{
-  auto const num_rows = 1200;
-
-  auto ints   = random_values<int>(num_rows);
-  auto floats = random_values<float>(num_rows);
-  column_wrapper<int> int_col(ints.begin(), ints.end());
-  column_wrapper<float> float_col(floats.begin(), floats.end());
-
-  table_view expected({int_col, float_col});
-
-  cudf::io::table_input_metadata expected_metadata(expected);
-  expected_metadata.column_metadata[0].set_name("int_col");
-  expected_metadata.column_metadata[1].set_name("float_col");
-
-  auto filepath = temp_env->get_temp_filepath("MetadataTest.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .metadata(std::move(expected_metadata));
-  cudf::io::write_parquet(out_opts);
-
-  auto meta = read_parquet_metadata(cudf::io::source_info{filepath});
-  EXPECT_EQ(meta.num_rows(), num_rows);
-
-  std::string expected_schema = R"(schema
- int_col
- float_col
-)";
-  EXPECT_EQ(expected_schema, print(meta.schema().root()));
-
-  EXPECT_EQ(meta.schema().root().name(), "schema");
-  EXPECT_EQ(meta.schema().root().type_kind(), cudf::io::parquet::TypeKind::UNDEFINED_TYPE);
-  ASSERT_EQ(meta.schema().root().num_children(), 2);
-
-  EXPECT_EQ(meta.schema().root().child(0).name(), "int_col");
-  EXPECT_EQ(meta.schema().root().child(1).name(), "float_col");
-}
-
-TEST_F(ParquetMetadataReaderTest, TestNested)
-{
-  auto const num_rows       = 1200;
-  auto const lists_per_row  = 4;
-  auto const num_child_rows = num_rows * lists_per_row;
-
-  auto keys = random_values<int>(num_child_rows);
-  auto vals = random_values<float>(num_child_rows);
-  column_wrapper<int> keys_col(keys.begin(), keys.end());
-  column_wrapper<float> vals_col(vals.begin(), vals.end());
-  auto s_col = cudf::test::structs_column_wrapper({keys_col, vals_col}).release();
-
-  std::vector<int> row_offsets(num_rows + 1);
-  for (int idx = 0; idx < num_rows + 1; ++idx) {
-    row_offsets[idx] = idx * lists_per_row;
-  }
-  column_wrapper<int> offsets(row_offsets.begin(), row_offsets.end());
-
-  auto list_col =
-    cudf::make_lists_column(num_rows, offsets.release(), std::move(s_col), 0, rmm::device_buffer{});
-
-  table_view expected({*list_col, *list_col});
-
-  cudf::io::table_input_metadata expected_metadata(expected);
-  expected_metadata.column_metadata[0].set_name("maps");
-  expected_metadata.column_metadata[0].set_list_column_as_map();
-  expected_metadata.column_metadata[1].set_name("lists");
-  expected_metadata.column_metadata[1].child(1).child(0).set_name("int_field");
-  expected_metadata.column_metadata[1].child(1).child(1).set_name("float_field");
-
-  auto filepath = temp_env->get_temp_filepath("MetadataTest.orc");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .metadata(std::move(expected_metadata));
-  cudf::io::write_parquet(out_opts);
-
-  auto meta = read_parquet_metadata(cudf::io::source_info{filepath});
-  EXPECT_EQ(meta.num_rows(), num_rows);
-
-  std::string expected_schema = R"(schema
- maps
-  key_value
-   key
-   value
- lists
-  list
-   element
-    int_field
-    float_field
-)";
-  EXPECT_EQ(expected_schema, print(meta.schema().root()));
-
-  EXPECT_EQ(meta.schema().root().name(), "schema");
-  EXPECT_EQ(meta.schema().root().type_kind(),
-            cudf::io::parquet::TypeKind::UNDEFINED_TYPE);  // struct
-  ASSERT_EQ(meta.schema().root().num_children(), 2);
-
-  auto const& out_map_col = meta.schema().root().child(0);
-  EXPECT_EQ(out_map_col.name(), "maps");
-  EXPECT_EQ(out_map_col.type_kind(), cudf::io::parquet::TypeKind::UNDEFINED_TYPE);  // map
-
-  ASSERT_EQ(out_map_col.num_children(), 1);
-  EXPECT_EQ(out_map_col.child(0).name(), "key_value");  // key_value (named in parquet writer)
-  ASSERT_EQ(out_map_col.child(0).num_children(), 2);
-  EXPECT_EQ(out_map_col.child(0).child(0).name(), "key");    // key (named in parquet writer)
-  EXPECT_EQ(out_map_col.child(0).child(1).name(), "value");  // value (named in parquet writer)
-  EXPECT_EQ(out_map_col.child(0).child(0).type_kind(), cudf::io::parquet::TypeKind::INT32);  // int
-  EXPECT_EQ(out_map_col.child(0).child(1).type_kind(),
-            cudf::io::parquet::TypeKind::FLOAT);  // float
-
-  auto const& out_list_col = meta.schema().root().child(1);
-  EXPECT_EQ(out_list_col.name(), "lists");
-  EXPECT_EQ(out_list_col.type_kind(), cudf::io::parquet::TypeKind::UNDEFINED_TYPE);  // list
-  // TODO repetition type?
-  ASSERT_EQ(out_list_col.num_children(), 1);
-  EXPECT_EQ(out_list_col.child(0).name(), "list");  // list (named in parquet writer)
-  ASSERT_EQ(out_list_col.child(0).num_children(), 1);
-
-  auto const& out_list_struct_col = out_list_col.child(0).child(0);
-  EXPECT_EQ(out_list_struct_col.name(), "element");  // elements (named in parquet writer)
-  EXPECT_EQ(out_list_struct_col.type_kind(),
-            cudf::io::parquet::TypeKind::UNDEFINED_TYPE);  // struct
-  ASSERT_EQ(out_list_struct_col.num_children(), 2);
-
-  auto const& out_int_col = out_list_struct_col.child(0);
-  EXPECT_EQ(out_int_col.name(), "int_field");
-  EXPECT_EQ(out_int_col.type_kind(), cudf::io::parquet::TypeKind::INT32);
-
-  auto const& out_float_col = out_list_struct_col.child(1);
-  EXPECT_EQ(out_float_col.name(), "float_field");
-  EXPECT_EQ(out_float_col.type_kind(), cudf::io::parquet::TypeKind::FLOAT);
-}
-
-TEST_F(ParquetWriterTest, NoNullsAsNonNullable)
-{
-  auto valids = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
-  column_wrapper<int32_t> col{{1, 2, 3}, valids};
-  table_view expected({col});
-
-  cudf::io::table_input_metadata expected_metadata(expected);
-  expected_metadata.column_metadata[0].set_nullability(false);
-
-  auto filepath = temp_env->get_temp_filepath("NonNullable.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .metadata(std::move(expected_metadata));
-  // Writer should be able to write a column without nulls as non-nullable
-  EXPECT_NO_THROW(cudf::io::write_parquet(out_opts));
-}
-
-TEST_F(ParquetReaderTest, FilterSimple)
-{
-  srand(31337);
-  auto written_table = create_random_fixed_table<int>(9, 9, false);
-
-  auto filepath = temp_env->get_temp_filepath("FilterSimple.parquet");
-  cudf::io::parquet_writer_options args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *written_table);
-  cudf::io::write_parquet(args);
-
-  // Filtering AST - table[0] < RAND_MAX/2
-  auto literal_value     = cudf::numeric_scalar<decltype(RAND_MAX)>(RAND_MAX / 2);
-  auto literal           = cudf::ast::literal(literal_value);
-  auto col_ref_0         = cudf::ast::column_reference(0);
-  auto filter_expression = cudf::ast::operation(cudf::ast::ast_operator::LESS, col_ref_0, literal);
-
-  auto predicate = cudf::compute_column(*written_table, filter_expression);
-  EXPECT_EQ(predicate->view().type().id(), cudf::type_id::BOOL8)
-    << "Predicate filter should return a boolean";
-  auto expected = cudf::apply_boolean_mask(*written_table, *predicate);
-  // To make sure AST filters out some elements
-  EXPECT_LT(expected->num_rows(), written_table->num_rows());
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-      .filter(filter_expression);
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
-}
-
-auto create_parquet_with_stats(std::string const& filename)
-{
-  auto col0 = testdata::ascending<uint32_t>();
-  auto col1 = testdata::descending<int64_t>();
-  auto col2 = testdata::unordered<double>();
-
-  auto const expected = table_view{{col0, col1, col2}};
-
-  cudf::io::table_input_metadata expected_metadata(expected);
-  expected_metadata.column_metadata[0].set_name("col_uint32");
-  expected_metadata.column_metadata[1].set_name("col_int64");
-  expected_metadata.column_metadata[2].set_name("col_double");
-
-  auto const filepath = temp_env->get_temp_filepath(filename);
-  const cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .metadata(std::move(expected_metadata))
-      .row_group_size_rows(8000)
-      .stats_level(cudf::io::statistics_freq::STATISTICS_ROWGROUP);
-  cudf::io::write_parquet(out_opts);
-
-  std::vector<std::unique_ptr<column>> columns;
-  columns.push_back(col0.release());
-  columns.push_back(col1.release());
-  columns.push_back(col2.release());
-
-  return std::pair{cudf::table{std::move(columns)}, filepath};
-}
-
-TEST_F(ParquetReaderTest, FilterIdentity)
-{
-  auto [src, filepath] = create_parquet_with_stats("FilterIdentity.parquet");
-
-  // Filtering AST - identity function, always true.
-  auto literal_value     = cudf::numeric_scalar<bool>(true);
-  auto literal           = cudf::ast::literal(literal_value);
-  auto filter_expression = cudf::ast::operation(cudf::ast::ast_operator::IDENTITY, literal);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-      .filter(filter_expression);
-  auto result = cudf::io::read_parquet(read_opts);
-
-  cudf::io::parquet_reader_options read_opts2 =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result2 = cudf::io::read_parquet(read_opts2);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *result2.tbl);
-}
-
-TEST_F(ParquetReaderTest, FilterReferenceExpression)
-{
-  auto [src, filepath] = create_parquet_with_stats("FilterReferenceExpression.parquet");
-  // Filtering AST - table[0] < 150
-  auto literal_value     = cudf::numeric_scalar<uint32_t>(150);
-  auto literal           = cudf::ast::literal(literal_value);
-  auto col_ref_0         = cudf::ast::column_reference(0);
-  auto filter_expression = cudf::ast::operation(cudf::ast::ast_operator::LESS, col_ref_0, literal);
-
-  // Expected result
-  auto predicate = cudf::compute_column(src, filter_expression);
-  auto expected  = cudf::apply_boolean_mask(src, *predicate);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-      .filter(filter_expression);
-  auto result = cudf::io::read_parquet(read_opts);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
-}
-
-TEST_F(ParquetReaderTest, FilterNamedExpression)
-{
-  auto [src, filepath] = create_parquet_with_stats("NamedExpression.parquet");
-  // Filtering AST - table["col_uint32"] < 150
-  auto literal_value  = cudf::numeric_scalar<uint32_t>(150);
-  auto literal        = cudf::ast::literal(literal_value);
-  auto col_name_0     = cudf::ast::column_name_reference("col_uint32");
-  auto parquet_filter = cudf::ast::operation(cudf::ast::ast_operator::LESS, col_name_0, literal);
-  auto col_ref_0      = cudf::ast::column_reference(0);
-  auto table_filter   = cudf::ast::operation(cudf::ast::ast_operator::LESS, col_ref_0, literal);
-
-  // Expected result
-  auto predicate = cudf::compute_column(src, table_filter);
-  auto expected  = cudf::apply_boolean_mask(src, *predicate);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-      .filter(parquet_filter);
-  auto result = cudf::io::read_parquet(read_opts);
-
-  // tests
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
-}
-
-// Test for Types - numeric, chrono, string.
-template <typename T>
-struct ParquetReaderPredicatePushdownTest : public ParquetReaderTest {};
-
-// These chrono types are not supported because parquet writer does not have a type to represent
-// them.
-using UnsupportedChronoTypes =
-  cudf::test::Types<cudf::timestamp_s, cudf::duration_D, cudf::duration_s>;
-// Also fixed point types unsupported, because AST does not support them yet.
-using SupportedTestTypes = cudf::test::RemoveIf<cudf::test::ContainedIn<UnsupportedChronoTypes>,
-                                                cudf::test::ComparableTypes>;
-
-TYPED_TEST_SUITE(ParquetReaderPredicatePushdownTest, SupportedTestTypes);
-
-template <typename T>
-auto create_parquet_typed_with_stats(std::string const& filename)
-{
-  auto col0 = testdata::ascending<T>();
-  auto col1 = testdata::descending<T>();
-  auto col2 = testdata::unordered<T>();
-
-  auto const written_table = table_view{{col0, col1, col2}};
-  auto const filepath      = temp_env->get_temp_filepath("FilterTyped.parquet");
-  {
-    cudf::io::table_input_metadata expected_metadata(written_table);
-    expected_metadata.column_metadata[0].set_name("col0");
-    expected_metadata.column_metadata[1].set_name("col1");
-    expected_metadata.column_metadata[2].set_name("col2");
-
-    const cudf::io::parquet_writer_options out_opts =
-      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, written_table)
-        .metadata(std::move(expected_metadata))
-        .row_group_size_rows(8000);
-    cudf::io::write_parquet(out_opts);
-  }
-
-  std::vector<std::unique_ptr<column>> columns;
-  columns.push_back(col0.release());
-  columns.push_back(col1.release());
-  columns.push_back(col2.release());
-
-  return std::pair{cudf::table{std::move(columns)}, filepath};
-}
-
-TYPED_TEST(ParquetReaderPredicatePushdownTest, FilterTyped)
-{
-  using T = TypeParam;
-
-  auto const [src, filepath] = create_parquet_typed_with_stats<T>("FilterTyped.parquet");
-  auto const written_table   = src.view();
-
-  // Filtering AST
-  auto literal_value = []() {
-    if constexpr (cudf::is_timestamp<T>()) {
-      // table[0] < 10000 timestamp days/seconds/milliseconds/microseconds/nanoseconds
-      return cudf::timestamp_scalar<T>(T(typename T::duration(10000)));  // i (0-20,000)
-    } else if constexpr (cudf::is_duration<T>()) {
-      // table[0] < 10000 day/seconds/milliseconds/microseconds/nanoseconds
-      return cudf::duration_scalar<T>(T(10000));  // i (0-20,000)
-    } else if constexpr (std::is_same_v<T, cudf::string_view>) {
-      // table[0] < "000010000"
-      return cudf::string_scalar("000010000");  // i (0-20,000)
-    } else {
-      // table[0] < 0 or 100u
-      return cudf::numeric_scalar<T>((100 - 100 * std::is_signed_v<T>));  // i/100 (-100-100/ 0-200)
-    }
-  }();
-  auto literal           = cudf::ast::literal(literal_value);
-  auto col_name_0        = cudf::ast::column_name_reference("col0");
-  auto filter_expression = cudf::ast::operation(cudf::ast::ast_operator::LESS, col_name_0, literal);
-  auto col_ref_0         = cudf::ast::column_reference(0);
-  auto ref_filter        = cudf::ast::operation(cudf::ast::ast_operator::LESS, col_ref_0, literal);
-
-  // Expected result
-  auto predicate = cudf::compute_column(written_table, ref_filter);
-  EXPECT_EQ(predicate->view().type().id(), cudf::type_id::BOOL8)
-    << "Predicate filter should return a boolean";
-  auto expected = cudf::apply_boolean_mask(written_table, *predicate);
-
-  // Reading with Predicate Pushdown
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-      .filter(filter_expression);
-  auto result       = cudf::io::read_parquet(read_opts);
-  auto result_table = result.tbl->view();
-
-  // tests
-  EXPECT_EQ(int(written_table.column(0).type().id()), int(result_table.column(0).type().id()))
-    << "col0 type mismatch";
-  // To make sure AST filters out some elements
-  EXPECT_LT(expected->num_rows(), written_table.num_rows());
-  EXPECT_EQ(result_table.num_rows(), expected->num_rows());
-  EXPECT_EQ(result_table.num_columns(), expected->num_columns());
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result_table);
-}
-
-TEST_F(ParquetReaderTest, FilterMultiple1)
-{
-  using T = cudf::string_view;
-
-  auto const [src, filepath] = create_parquet_typed_with_stats<T>("FilterMultiple1.parquet");
-  auto const written_table   = src.view();
-
-  // Filtering AST - 10000 < table[0] < 12000
-  std::string const low  = "000010000";
-  std::string const high = "000012000";
-  auto lov               = cudf::string_scalar(low, true);
-  auto hiv               = cudf::string_scalar(high, true);
-  auto filter_col        = cudf::ast::column_reference(0);
-  auto lo_lit            = cudf::ast::literal(lov);
-  auto hi_lit            = cudf::ast::literal(hiv);
-  auto expr_1 = cudf::ast::operation(cudf::ast::ast_operator::GREATER_EQUAL, filter_col, lo_lit);
-  auto expr_2 = cudf::ast::operation(cudf::ast::ast_operator::LESS, filter_col, hi_lit);
-  auto expr_3 = cudf::ast::operation(cudf::ast::ast_operator::LOGICAL_AND, expr_1, expr_2);
-
-  // Expected result
-  auto predicate = cudf::compute_column(written_table, expr_3);
-  auto expected  = cudf::apply_boolean_mask(written_table, *predicate);
-
-  auto si                  = cudf::io::source_info(filepath);
-  auto builder             = cudf::io::parquet_reader_options::builder(si).filter(expr_3);
-  auto table_with_metadata = cudf::io::read_parquet(builder);
-  auto result              = table_with_metadata.tbl->view();
-
-  // tests
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result);
-}
-
-TEST_F(ParquetReaderTest, FilterMultiple2)
-{
-  // multiple conditions on same column.
-  using T = cudf::string_view;
-
-  auto const [src, filepath] = create_parquet_typed_with_stats<T>("FilterMultiple2.parquet");
-  auto const written_table   = src.view();
-  // 0-8000, 8001-16000, 16001-20000
-
-  // Filtering AST
-  // (table[0] >= "000010000" AND table[0] < "000012000") OR
-  // (table[0] >= "000017000" AND table[0] < "000019000")
-  std::string const low1  = "000010000";
-  std::string const high1 = "000012000";
-  auto lov                = cudf::string_scalar(low1, true);
-  auto hiv                = cudf::string_scalar(high1, true);
-  auto filter_col         = cudf::ast::column_reference(0);
-  auto lo_lit             = cudf::ast::literal(lov);
-  auto hi_lit             = cudf::ast::literal(hiv);
-  auto expr_1 = cudf::ast::operation(cudf::ast::ast_operator::GREATER_EQUAL, filter_col, lo_lit);
-  auto expr_2 = cudf::ast::operation(cudf::ast::ast_operator::LESS, filter_col, hi_lit);
-  auto expr_3 = cudf::ast::operation(cudf::ast::ast_operator::LOGICAL_AND, expr_1, expr_2);
-  std::string const low2  = "000017000";
-  std::string const high2 = "000019000";
-  auto lov2               = cudf::string_scalar(low2, true);
-  auto hiv2               = cudf::string_scalar(high2, true);
-  auto lo_lit2            = cudf::ast::literal(lov2);
-  auto hi_lit2            = cudf::ast::literal(hiv2);
-  auto expr_4 = cudf::ast::operation(cudf::ast::ast_operator::GREATER_EQUAL, filter_col, lo_lit2);
-  auto expr_5 = cudf::ast::operation(cudf::ast::ast_operator::LESS, filter_col, hi_lit2);
-  auto expr_6 = cudf::ast::operation(cudf::ast::ast_operator::LOGICAL_AND, expr_4, expr_5);
-  auto expr_7 = cudf::ast::operation(cudf::ast::ast_operator::LOGICAL_OR, expr_3, expr_6);
-
-  // Expected result
-  auto predicate = cudf::compute_column(written_table, expr_7);
-  auto expected  = cudf::apply_boolean_mask(written_table, *predicate);
-
-  auto si                  = cudf::io::source_info(filepath);
-  auto builder             = cudf::io::parquet_reader_options::builder(si).filter(expr_7);
-  auto table_with_metadata = cudf::io::read_parquet(builder);
-  auto result              = table_with_metadata.tbl->view();
-
-  // tests
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result);
-}
-
-TEST_F(ParquetReaderTest, FilterMultiple3)
-{
-  // multiple conditions with reference to multiple columns.
-  // index and name references mixed.
-  using T                    = uint32_t;
-  auto const [src, filepath] = create_parquet_typed_with_stats<T>("FilterMultiple3.parquet");
-  auto const written_table   = src.view();
-
-  // Filtering AST - (table[0] >= 70 AND table[0] < 90) OR (table[1] >= 100 AND table[1] < 120)
-  // row groups min, max:
-  // table[0] 0-80, 81-160, 161-200.
-  // table[1] 200-121, 120-41, 40-0.
-  auto filter_col1  = cudf::ast::column_reference(0);
-  auto filter_col2  = cudf::ast::column_name_reference("col1");
-  T constexpr low1  = 70;
-  T constexpr high1 = 90;
-  T constexpr low2  = 100;
-  T constexpr high2 = 120;
-  auto lov          = cudf::numeric_scalar(low1, true);
-  auto hiv          = cudf::numeric_scalar(high1, true);
-  auto lo_lit1      = cudf::ast::literal(lov);
-  auto hi_lit1      = cudf::ast::literal(hiv);
-  auto expr_1  = cudf::ast::operation(cudf::ast::ast_operator::GREATER_EQUAL, filter_col1, lo_lit1);
-  auto expr_2  = cudf::ast::operation(cudf::ast::ast_operator::LESS, filter_col1, hi_lit1);
-  auto expr_3  = cudf::ast::operation(cudf::ast::ast_operator::LOGICAL_AND, expr_1, expr_2);
-  auto lov2    = cudf::numeric_scalar(low2, true);
-  auto hiv2    = cudf::numeric_scalar(high2, true);
-  auto lo_lit2 = cudf::ast::literal(lov2);
-  auto hi_lit2 = cudf::ast::literal(hiv2);
-  auto expr_4  = cudf::ast::operation(cudf::ast::ast_operator::GREATER_EQUAL, filter_col2, lo_lit2);
-  auto expr_5  = cudf::ast::operation(cudf::ast::ast_operator::LESS, filter_col2, hi_lit2);
-  auto expr_6  = cudf::ast::operation(cudf::ast::ast_operator::LOGICAL_AND, expr_4, expr_5);
-  // expression to test
-  auto expr_7 = cudf::ast::operation(cudf::ast::ast_operator::LOGICAL_OR, expr_3, expr_6);
-
-  // Expected result
-  auto filter_col2_ref = cudf::ast::column_reference(1);
-  auto expr_4_ref =
-    cudf::ast::operation(cudf::ast::ast_operator::GREATER_EQUAL, filter_col2_ref, lo_lit2);
-  auto expr_5_ref = cudf::ast::operation(cudf::ast::ast_operator::LESS, filter_col2_ref, hi_lit2);
-  auto expr_6_ref =
-    cudf::ast::operation(cudf::ast::ast_operator::LOGICAL_AND, expr_4_ref, expr_5_ref);
-  auto expr_7_ref = cudf::ast::operation(cudf::ast::ast_operator::LOGICAL_OR, expr_3, expr_6_ref);
-  auto predicate  = cudf::compute_column(written_table, expr_7_ref);
-  auto expected   = cudf::apply_boolean_mask(written_table, *predicate);
-
-  auto si                  = cudf::io::source_info(filepath);
-  auto builder             = cudf::io::parquet_reader_options::builder(si).filter(expr_7);
-  auto table_with_metadata = cudf::io::read_parquet(builder);
-  auto result              = table_with_metadata.tbl->view();
-
-  // tests
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result);
-}
-
-TEST_F(ParquetReaderTest, FilterSupported)
-{
-  using T                    = uint32_t;
-  auto const [src, filepath] = create_parquet_typed_with_stats<T>("FilterSupported.parquet");
-  auto const written_table   = src.view();
-
-  // Filtering AST - ((table[0] > 70 AND table[0] <= 90) OR (table[1] >= 100 AND table[1] < 120))
-  //              AND (table[1] != 110)
-  // row groups min, max:
-  // table[0] 0-80, 81-160, 161-200.
-  // table[1] 200-121, 120-41, 40-0.
-  auto filter_col1       = cudf::ast::column_reference(0);
-  auto filter_col2       = cudf::ast::column_reference(1);
-  T constexpr low1       = 70;
-  T constexpr high1      = 90;
-  T constexpr low2       = 100;
-  T constexpr high2      = 120;
-  T constexpr skip_value = 110;
-  auto lov               = cudf::numeric_scalar(low1, true);
-  auto hiv               = cudf::numeric_scalar(high1, true);
-  auto lo_lit1           = cudf::ast::literal(lov);
-  auto hi_lit1           = cudf::ast::literal(hiv);
-  auto expr_1  = cudf::ast::operation(cudf::ast::ast_operator::GREATER, filter_col1, lo_lit1);
-  auto expr_2  = cudf::ast::operation(cudf::ast::ast_operator::LESS_EQUAL, filter_col1, hi_lit1);
-  auto expr_3  = cudf::ast::operation(cudf::ast::ast_operator::LOGICAL_AND, expr_1, expr_2);
-  auto lov2    = cudf::numeric_scalar(low2, true);
-  auto hiv2    = cudf::numeric_scalar(high2, true);
-  auto lo_lit2 = cudf::ast::literal(lov2);
-  auto hi_lit2 = cudf::ast::literal(hiv2);
-  auto expr_4  = cudf::ast::operation(cudf::ast::ast_operator::GREATER_EQUAL, filter_col2, lo_lit2);
-  auto expr_5  = cudf::ast::operation(cudf::ast::ast_operator::LESS, filter_col2, hi_lit2);
-  auto expr_6  = cudf::ast::operation(cudf::ast::ast_operator::LOGICAL_AND, expr_4, expr_5);
-  auto expr_7  = cudf::ast::operation(cudf::ast::ast_operator::LOGICAL_OR, expr_3, expr_6);
-  auto skip_ov = cudf::numeric_scalar(skip_value, true);
-  auto skip_lit = cudf::ast::literal(skip_ov);
-  auto expr_8   = cudf::ast::operation(cudf::ast::ast_operator::NOT_EQUAL, filter_col2, skip_lit);
-  auto expr_9   = cudf::ast::operation(cudf::ast::ast_operator::LOGICAL_AND, expr_7, expr_8);
-
-  // Expected result
-  auto predicate = cudf::compute_column(written_table, expr_9);
-  auto expected  = cudf::apply_boolean_mask(written_table, *predicate);
-
-  auto si                  = cudf::io::source_info(filepath);
-  auto builder             = cudf::io::parquet_reader_options::builder(si).filter(expr_9);
-  auto table_with_metadata = cudf::io::read_parquet(builder);
-  auto result              = table_with_metadata.tbl->view();
-
-  // tests
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result);
-}
-
-TEST_F(ParquetReaderTest, FilterSupported2)
-{
-  using T                 = uint32_t;
-  constexpr auto num_rows = 4000;
-  auto elements0 =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i / 2000; });
-  auto elements1 =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i / 1000; });
-  auto elements2 =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i / 500; });
-  auto col0 = cudf::test::fixed_width_column_wrapper<T>(elements0, elements0 + num_rows);
-  auto col1 = cudf::test::fixed_width_column_wrapper<T>(elements1, elements1 + num_rows);
-  auto col2 = cudf::test::fixed_width_column_wrapper<T>(elements2, elements2 + num_rows);
-  auto const written_table = table_view{{col0, col1, col2}};
-  auto const filepath      = temp_env->get_temp_filepath("FilterSupported2.parquet");
-  {
-    const cudf::io::parquet_writer_options out_opts =
-      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, written_table)
-        .row_group_size_rows(1000);
-    cudf::io::write_parquet(out_opts);
-  }
-  auto si          = cudf::io::source_info(filepath);
-  auto filter_col0 = cudf::ast::column_reference(0);
-  auto filter_col1 = cudf::ast::column_reference(1);
-  auto filter_col2 = cudf::ast::column_reference(2);
-  auto s_value     = cudf::numeric_scalar<T>(1, true);
-  auto lit_value   = cudf::ast::literal(s_value);
-
-  auto test_expr = [&](auto& expr) {
-    // Expected result
-    auto predicate = cudf::compute_column(written_table, expr);
-    auto expected  = cudf::apply_boolean_mask(written_table, *predicate);
-
-    // tests
-    auto builder             = cudf::io::parquet_reader_options::builder(si).filter(expr);
-    auto table_with_metadata = cudf::io::read_parquet(builder);
-    auto result              = table_with_metadata.tbl->view();
-
-    CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result);
-  };
-
-  // row groups min, max:
-  // table[0] 0-0, 0-0, 1-1, 1-1
-  // table[1] 0-0, 1-1, 2-2, 3-3
-  // table[2] 0-1, 2-3, 4-5, 6-7
-
-  // Filtering AST -   table[i] == 1
-  {
-    auto expr0 = cudf::ast::operation(cudf::ast::ast_operator::EQUAL, filter_col0, lit_value);
-    test_expr(expr0);
-
-    auto expr1 = cudf::ast::operation(cudf::ast::ast_operator::EQUAL, filter_col1, lit_value);
-    test_expr(expr1);
-
-    auto expr2 = cudf::ast::operation(cudf::ast::ast_operator::EQUAL, filter_col2, lit_value);
-    test_expr(expr2);
-  }
-  // Filtering AST -   table[i] != 1
-  {
-    auto expr0 = cudf::ast::operation(cudf::ast::ast_operator::NOT_EQUAL, filter_col0, lit_value);
-    test_expr(expr0);
-
-    auto expr1 = cudf::ast::operation(cudf::ast::ast_operator::NOT_EQUAL, filter_col1, lit_value);
-    test_expr(expr1);
-
-    auto expr2 = cudf::ast::operation(cudf::ast::ast_operator::NOT_EQUAL, filter_col2, lit_value);
-    test_expr(expr2);
-  }
-}
-
-// Error types - type mismatch, invalid column name, invalid literal type, invalid operator,
-// non-bool filter output type.
-TEST_F(ParquetReaderTest, FilterErrors)
-{
-  using T                    = uint32_t;
-  auto const [src, filepath] = create_parquet_typed_with_stats<T>("FilterErrors.parquet");
-  auto const written_table   = src.view();
-  auto si                    = cudf::io::source_info(filepath);
-
-  // Filtering AST - invalid column index
-  {
-    auto filter_col1 = cudf::ast::column_reference(3);
-    T constexpr low  = 100;
-    auto lov         = cudf::numeric_scalar(low, true);
-    auto low_lot     = cudf::ast::literal(lov);
-    auto expr        = cudf::ast::operation(cudf::ast::ast_operator::LESS, filter_col1, low_lot);
-
-    auto builder = cudf::io::parquet_reader_options::builder(si).filter(expr);
-    EXPECT_THROW(cudf::io::read_parquet(builder), cudf::logic_error);
-  }
-
-  // Filtering AST - invalid column name
-  {
-    auto filter_col1 = cudf::ast::column_name_reference("col3");
-    T constexpr low  = 100;
-    auto lov         = cudf::numeric_scalar(low, true);
-    auto low_lot     = cudf::ast::literal(lov);
-    auto expr        = cudf::ast::operation(cudf::ast::ast_operator::LESS, filter_col1, low_lot);
-    auto builder     = cudf::io::parquet_reader_options::builder(si).filter(expr);
-    EXPECT_THROW(cudf::io::read_parquet(builder), cudf::logic_error);
-  }
-
-  // Filtering AST - incompatible literal type
-  {
-    auto filter_col1      = cudf::ast::column_name_reference("col0");
-    auto filter_col2      = cudf::ast::column_reference(1);
-    int64_t constexpr low = 100;
-    auto lov              = cudf::numeric_scalar(low, true);
-    auto low_lot          = cudf::ast::literal(lov);
-    auto expr1    = cudf::ast::operation(cudf::ast::ast_operator::LESS, filter_col1, low_lot);
-    auto expr2    = cudf::ast::operation(cudf::ast::ast_operator::LESS, filter_col2, low_lot);
-    auto builder1 = cudf::io::parquet_reader_options::builder(si).filter(expr1);
-    EXPECT_THROW(cudf::io::read_parquet(builder1), cudf::logic_error);
-
-    auto builder2 = cudf::io::parquet_reader_options::builder(si).filter(expr2);
-    EXPECT_THROW(cudf::io::read_parquet(builder2), cudf::logic_error);
-  }
-
-  // Filtering AST - "table[0] + 110" is invalid filter expression
-  {
-    auto filter_col1      = cudf::ast::column_reference(0);
-    T constexpr add_value = 110;
-    auto add_v            = cudf::numeric_scalar(add_value, true);
-    auto add_lit          = cudf::ast::literal(add_v);
-    auto expr_8 = cudf::ast::operation(cudf::ast::ast_operator::ADD, filter_col1, add_lit);
-
-    auto si      = cudf::io::source_info(filepath);
-    auto builder = cudf::io::parquet_reader_options::builder(si).filter(expr_8);
-    EXPECT_THROW(cudf::io::read_parquet(builder), cudf::logic_error);
-
-    // Expected result throw to show that the filter expression is invalid,
-    // not a limitation of the parquet predicate pushdown.
-    auto predicate = cudf::compute_column(written_table, expr_8);
-    EXPECT_THROW(cudf::apply_boolean_mask(written_table, *predicate), cudf::logic_error);
-  }
-
-  // Filtering AST - INT64(table[0] < 100) non-bool expression
-  {
-    auto filter_col1 = cudf::ast::column_reference(0);
-    T constexpr low  = 100;
-    auto lov         = cudf::numeric_scalar(low, true);
-    auto low_lot     = cudf::ast::literal(lov);
-    auto bool_expr   = cudf::ast::operation(cudf::ast::ast_operator::LESS, filter_col1, low_lot);
-    auto cast        = cudf::ast::operation(cudf::ast::ast_operator::CAST_TO_INT64, bool_expr);
-
-    auto builder = cudf::io::parquet_reader_options::builder(si).filter(cast);
-    EXPECT_THROW(cudf::io::read_parquet(builder), cudf::logic_error);
-    EXPECT_NO_THROW(cudf::compute_column(written_table, cast));
-    auto predicate = cudf::compute_column(written_table, cast);
-    EXPECT_NE(predicate->view().type().id(), cudf::type_id::BOOL8);
-  }
-}
-
-// Filter without stats information in file.
-TEST_F(ParquetReaderTest, FilterNoStats)
-{
-  using T                 = uint32_t;
-  constexpr auto num_rows = 16000;
-  auto elements =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i / 1000; });
-  auto col0 = cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_rows);
-  auto const written_table = table_view{{col0}};
-  auto const filepath      = temp_env->get_temp_filepath("FilterNoStats.parquet");
-  {
-    const cudf::io::parquet_writer_options out_opts =
-      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, written_table)
-        .row_group_size_rows(8000)
-        .stats_level(cudf::io::statistics_freq::STATISTICS_NONE);
-    cudf::io::write_parquet(out_opts);
-  }
-  auto si          = cudf::io::source_info(filepath);
-  auto filter_col0 = cudf::ast::column_reference(0);
-  auto s_value     = cudf::numeric_scalar<T>(1, true);
-  auto lit_value   = cudf::ast::literal(s_value);
-
-  // row groups min, max:
-  // table[0] 0-0, 1-1, 2-2, 3-3
-  // Filtering AST - table[0] > 1
-  auto expr = cudf::ast::operation(cudf::ast::ast_operator::GREATER, filter_col0, lit_value);
-
-  // Expected result
-  auto predicate = cudf::compute_column(written_table, expr);
-  auto expected  = cudf::apply_boolean_mask(written_table, *predicate);
-
-  // tests
-  auto builder             = cudf::io::parquet_reader_options::builder(si).filter(expr);
-  auto table_with_metadata = cudf::io::read_parquet(builder);
-  auto result              = table_with_metadata.tbl->view();
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result);
-}
-
-// Filter for float column with NaN values
-TEST_F(ParquetReaderTest, FilterFloatNAN)
-{
-  constexpr auto num_rows = 24000;
-  auto elements           = cudf::detail::make_counting_transform_iterator(
-    0, [num_rows](auto i) { return i > num_rows / 2 ? NAN : i; });
-  auto col0 = cudf::test::fixed_width_column_wrapper<float>(elements, elements + num_rows);
-  auto col1 = cudf::test::fixed_width_column_wrapper<double>(elements, elements + num_rows);
-
-  auto const written_table = table_view{{col0, col1}};
-  auto const filepath      = temp_env->get_temp_filepath("FilterFloatNAN.parquet");
-  {
-    const cudf::io::parquet_writer_options out_opts =
-      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, written_table)
-        .row_group_size_rows(8000);
-    cudf::io::write_parquet(out_opts);
-  }
-  auto si          = cudf::io::source_info(filepath);
-  auto filter_col0 = cudf::ast::column_reference(0);
-  auto filter_col1 = cudf::ast::column_reference(1);
-  auto s0_value    = cudf::numeric_scalar<float>(NAN, true);
-  auto lit0_value  = cudf::ast::literal(s0_value);
-  auto s1_value    = cudf::numeric_scalar<double>(NAN, true);
-  auto lit1_value  = cudf::ast::literal(s1_value);
-
-  // row groups min, max:
-  // table[0] 0-0, 1-1, 2-2, 3-3
-  // Filtering AST - table[0] == NAN, table[1] != NAN
-  auto expr_eq  = cudf::ast::operation(cudf::ast::ast_operator::EQUAL, filter_col0, lit0_value);
-  auto expr_neq = cudf::ast::operation(cudf::ast::ast_operator::NOT_EQUAL, filter_col1, lit1_value);
-
-  // Expected result
-  auto predicate0 = cudf::compute_column(written_table, expr_eq);
-  auto expected0  = cudf::apply_boolean_mask(written_table, *predicate0);
-  auto predicate1 = cudf::compute_column(written_table, expr_neq);
-  auto expected1  = cudf::apply_boolean_mask(written_table, *predicate1);
-
-  // tests
-  auto builder0             = cudf::io::parquet_reader_options::builder(si).filter(expr_eq);
-  auto table_with_metadata0 = cudf::io::read_parquet(builder0);
-  auto result0              = table_with_metadata0.tbl->view();
-  auto builder1             = cudf::io::parquet_reader_options::builder(si).filter(expr_neq);
-  auto table_with_metadata1 = cudf::io::read_parquet(builder1);
-  auto result1              = table_with_metadata1.tbl->view();
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected0->view(), result0);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected1->view(), result1);
-}
-
-TEST_F(ParquetWriterTest, TimestampMicrosINT96NoOverflow)
-{
-  using namespace cuda::std::chrono;
-  using namespace cudf::io;
-
-  column_wrapper<cudf::timestamp_us> big_ts_col{
-    sys_days{year{3023} / month{7} / day{14}} + 7h + 38min + 45s + 418688us,
-    sys_days{year{723} / month{3} / day{21}} + 14h + 20min + 13s + microseconds{781ms}};
-
-  table_view expected({big_ts_col});
-  auto filepath = temp_env->get_temp_filepath("BigINT96Timestamp.parquet");
-
-  auto const out_opts =
-    parquet_writer_options::builder(sink_info{filepath}, expected).int96_timestamps(true).build();
-  write_parquet(out_opts);
-
-  auto const in_opts = parquet_reader_options::builder(source_info(filepath))
-                         .timestamp_type(cudf::data_type(cudf::type_id::TIMESTAMP_MICROSECONDS))
-                         .build();
-  auto const result = read_parquet(in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-}
-
-TEST_F(ParquetWriterTest, PreserveNullability)
-{
-  constexpr auto num_rows = 100;
-
-  auto const col0_data = random_values<int32_t>(num_rows);
-  auto const col1_data = random_values<int32_t>(num_rows);
-
-  auto const col0_validity = cudf::test::iterators::no_nulls();
-  auto const col1_validity =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; });
-
-  column_wrapper<int32_t> col0{col0_data.begin(), col0_data.end(), col0_validity};
-  column_wrapper<int32_t> col1{col1_data.begin(), col1_data.end(), col1_validity};
-  auto const col2 = make_parquet_list_list_col<int>(0, num_rows, 5, 8, true);
-
-  auto const expected = table_view{{col0, col1, *col2}};
-
-  cudf::io::table_input_metadata expected_metadata(expected);
-  expected_metadata.column_metadata[0].set_name("mandatory");
-  expected_metadata.column_metadata[0].set_nullability(false);
-  expected_metadata.column_metadata[1].set_name("optional");
-  expected_metadata.column_metadata[1].set_nullability(true);
-  expected_metadata.column_metadata[2].set_name("lists");
-  expected_metadata.column_metadata[2].set_nullability(true);
-  // offsets is a cudf thing that's not part of the parquet schema so it won't have nullability set
-  expected_metadata.column_metadata[2].child(0).set_name("offsets");
-  expected_metadata.column_metadata[2].child(1).set_name("element");
-  expected_metadata.column_metadata[2].child(1).set_nullability(false);
-  expected_metadata.column_metadata[2].child(1).child(0).set_name("offsets");
-  expected_metadata.column_metadata[2].child(1).child(1).set_name("element");
-  expected_metadata.column_metadata[2].child(1).child(1).set_nullability(true);
-
-  auto const filepath = temp_env->get_temp_filepath("PreserveNullability.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .metadata(expected_metadata);
-
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options const in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto const result        = cudf::io::read_parquet(in_opts);
-  auto const read_metadata = cudf::io::table_input_metadata{result.metadata};
-
-  // test that expected_metadata matches read_metadata
-  std::function<void(cudf::io::column_in_metadata, cudf::io::column_in_metadata)>
-    compare_names_and_nullability = [&](auto lhs, auto rhs) {
-      EXPECT_EQ(lhs.get_name(), rhs.get_name());
-      ASSERT_EQ(lhs.is_nullability_defined(), rhs.is_nullability_defined());
-      if (lhs.is_nullability_defined()) { EXPECT_EQ(lhs.nullable(), rhs.nullable()); }
-      ASSERT_EQ(lhs.num_children(), rhs.num_children());
-      for (int i = 0; i < lhs.num_children(); ++i) {
-        compare_names_and_nullability(lhs.child(i), rhs.child(i));
-      }
-    };
-
-  ASSERT_EQ(expected_metadata.column_metadata.size(), read_metadata.column_metadata.size());
-
-  for (size_t i = 0; i < expected_metadata.column_metadata.size(); ++i) {
-    compare_names_and_nullability(expected_metadata.column_metadata[i],
-                                  read_metadata.column_metadata[i]);
-  }
-}
-
-TEST_P(ParquetV2Test, CheckEncodings)
-{
-  using cudf::io::parquet::detail::Encoding;
-  constexpr auto num_rows = 100'000;
-  auto const is_v2        = GetParam();
-
-  auto const validity = cudf::test::iterators::no_nulls();
-  // data should be PLAIN for v1, RLE for V2
-  auto col0_data =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) -> bool { return i % 2 == 0; });
-  // data should be PLAIN for v1, DELTA_BINARY_PACKED for v2
-  auto col1_data = random_values<int32_t>(num_rows);
-  // data should be PLAIN_DICTIONARY for v1, PLAIN and RLE_DICTIONARY for v2
-  auto col2_data = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return 1; });
-
-  cudf::test::fixed_width_column_wrapper<bool> col0{col0_data, col0_data + num_rows, validity};
-  column_wrapper<int32_t> col1{col1_data.begin(), col1_data.end(), validity};
-  column_wrapper<int32_t> col2{col2_data, col2_data + num_rows, validity};
-
-  auto expected = table_view{{col0, col1, col2}};
-
-  auto const filename = is_v2 ? "CheckEncodingsV2.parquet" : "CheckEncodingsV1.parquet";
-  auto filepath       = temp_env->get_temp_filepath(filename);
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .max_page_size_rows(num_rows)
-      .write_v2_headers(is_v2);
-  cudf::io::write_parquet(out_opts);
-
-  // make sure the expected encodings are present
-  auto contains = [](auto const& vec, auto const& enc) {
-    return std::find(vec.begin(), vec.end(), enc) != vec.end();
-  };
-
-  auto const source = cudf::io::datasource::create(filepath);
-  cudf::io::parquet::detail::FileMetaData fmd;
-
-  read_footer(source, &fmd);
-  auto const& chunk0_enc = fmd.row_groups[0].columns[0].meta_data.encodings;
-  auto const& chunk1_enc = fmd.row_groups[0].columns[1].meta_data.encodings;
-  auto const& chunk2_enc = fmd.row_groups[0].columns[2].meta_data.encodings;
-  if (is_v2) {
-    // col0 should have RLE for rep/def and data
-    EXPECT_TRUE(chunk0_enc.size() == 1);
-    EXPECT_TRUE(contains(chunk0_enc, Encoding::RLE));
-    // col1 should have RLE for rep/def and DELTA_BINARY_PACKED for data
-    EXPECT_TRUE(chunk1_enc.size() == 2);
-    EXPECT_TRUE(contains(chunk1_enc, Encoding::RLE));
-    EXPECT_TRUE(contains(chunk1_enc, Encoding::DELTA_BINARY_PACKED));
-    // col2 should have RLE for rep/def, PLAIN for dict, and RLE_DICTIONARY for data
-    EXPECT_TRUE(chunk2_enc.size() == 3);
-    EXPECT_TRUE(contains(chunk2_enc, Encoding::RLE));
-    EXPECT_TRUE(contains(chunk2_enc, Encoding::PLAIN));
-    EXPECT_TRUE(contains(chunk2_enc, Encoding::RLE_DICTIONARY));
-  } else {
-    // col0 should have RLE for rep/def and PLAIN for data
-    EXPECT_TRUE(chunk0_enc.size() == 2);
-    EXPECT_TRUE(contains(chunk0_enc, Encoding::RLE));
-    EXPECT_TRUE(contains(chunk0_enc, Encoding::PLAIN));
-    // col1 should have RLE for rep/def and PLAIN for data
-    EXPECT_TRUE(chunk1_enc.size() == 2);
-    EXPECT_TRUE(contains(chunk1_enc, Encoding::RLE));
-    EXPECT_TRUE(contains(chunk1_enc, Encoding::PLAIN));
-    // col2 should have RLE for rep/def and PLAIN_DICTIONARY for data and dict
-    EXPECT_TRUE(chunk2_enc.size() == 2);
-    EXPECT_TRUE(contains(chunk2_enc, Encoding::RLE));
-    EXPECT_TRUE(contains(chunk2_enc, Encoding::PLAIN_DICTIONARY));
-  }
-}
-
-// removing duration_D, duration_s, and timestamp_s as they don't appear to be supported properly.
-// see definition of UnsupportedChronoTypes above.
-using DeltaDecimalTypes = cudf::test::Types<numeric::decimal32, numeric::decimal64>;
-using DeltaBinaryTypes =
-  cudf::test::Concat<cudf::test::IntegralTypesNotBool, cudf::test::ChronoTypes, DeltaDecimalTypes>;
-using SupportedDeltaTestTypes =
-  cudf::test::RemoveIf<cudf::test::ContainedIn<UnsupportedChronoTypes>, DeltaBinaryTypes>;
-TYPED_TEST_SUITE(ParquetWriterDeltaTest, SupportedDeltaTestTypes);
-
-TYPED_TEST(ParquetWriterDeltaTest, SupportedDeltaTestTypes)
-{
-  using T   = TypeParam;
-  auto col0 = testdata::ascending<T>();
-  auto col1 = testdata::unordered<T>();
-
-  auto const expected = table_view{{col0, col1}};
-
-  auto const filepath = temp_env->get_temp_filepath("DeltaBinaryPacked.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .write_v2_headers(true)
-      .dictionary_policy(cudf::io::dictionary_policy::NEVER);
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(in_opts);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-}
-
-TYPED_TEST(ParquetWriterDeltaTest, SupportedDeltaTestTypesSliced)
-{
-  using T                = TypeParam;
-  constexpr int num_rows = 4'000;
-  auto col0              = testdata::ascending<T>();
-  auto col1              = testdata::unordered<T>();
-
-  auto const expected = table_view{{col0, col1}};
-  auto expected_slice = cudf::slice(expected, {num_rows, 2 * num_rows});
-  ASSERT_EQ(expected_slice[0].num_rows(), num_rows);
-
-  auto const filepath = temp_env->get_temp_filepath("DeltaBinaryPackedSliced.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected_slice)
-      .write_v2_headers(true)
-      .dictionary_policy(cudf::io::dictionary_policy::NEVER);
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(in_opts);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected_slice, result.tbl->view());
-}
-
-TYPED_TEST(ParquetWriterDeltaTest, SupportedDeltaListSliced)
-{
-  using T = TypeParam;
-
-  constexpr int num_slice = 4'000;
-  constexpr int num_rows  = 32 * 1024;
-
-  std::mt19937 gen(6542);
-  std::bernoulli_distribution bn(0.7f);
-  auto valids =
-    cudf::detail::make_counting_transform_iterator(0, [&](int index) { return bn(gen); });
-  auto values = thrust::make_counting_iterator(0);
-
-  // list<T>
-  constexpr int vals_per_row = 4;
-  auto c1_offset_iter        = cudf::detail::make_counting_transform_iterator(
-    0, [vals_per_row](cudf::size_type idx) { return idx * vals_per_row; });
-  cudf::test::fixed_width_column_wrapper<cudf::size_type> c1_offsets(c1_offset_iter,
-                                                                     c1_offset_iter + num_rows + 1);
-  cudf::test::fixed_width_column_wrapper<T> c1_vals(
-    values, values + (num_rows * vals_per_row), valids);
-  auto [null_mask, null_count] = cudf::test::detail::make_null_mask(valids, valids + num_rows);
-
-  auto _c1 = cudf::make_lists_column(
-    num_rows, c1_offsets.release(), c1_vals.release(), null_count, std::move(null_mask));
-  auto c1 = cudf::purge_nonempty_nulls(*_c1);
-
-  auto const expected = table_view{{*c1}};
-  auto expected_slice = cudf::slice(expected, {num_slice, 2 * num_slice});
-  ASSERT_EQ(expected_slice[0].num_rows(), num_slice);
-
-  auto const filepath = temp_env->get_temp_filepath("DeltaBinaryPackedListSliced.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected_slice)
-      .write_v2_headers(true)
-      .dictionary_policy(cudf::io::dictionary_policy::NEVER);
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(in_opts);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected_slice, result.tbl->view());
-}
-
-TEST_F(ParquetWriterTest, EmptyMinStringStatistics)
-{
-  char const* const min_val = "";
-  char const* const max_val = "zzz";
-  std::vector<char const*> strings{min_val, max_val, "pining", "for", "the", "fjords"};
-
-  column_wrapper<cudf::string_view> string_col{strings.begin(), strings.end()};
-  auto const output   = table_view{{string_col}};
-  auto const filepath = temp_env->get_temp_filepath("EmptyMinStringStatistics.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, output);
-  cudf::io::write_parquet(out_opts);
-
-  auto const source = cudf::io::datasource::create(filepath);
-  cudf::io::parquet::detail::FileMetaData fmd;
-  read_footer(source, &fmd);
-
-  ASSERT_TRUE(fmd.row_groups.size() > 0);
-  ASSERT_TRUE(fmd.row_groups[0].columns.size() > 0);
-  auto const& chunk = fmd.row_groups[0].columns[0];
-  auto const stats  = get_statistics(chunk);
-
-  ASSERT_TRUE(stats.min_value.has_value());
-  ASSERT_TRUE(stats.max_value.has_value());
-  auto const min_value = std::string{reinterpret_cast<char const*>(stats.min_value.value().data()),
-                                     stats.min_value.value().size()};
-  auto const max_value = std::string{reinterpret_cast<char const*>(stats.max_value.value().data()),
-                                     stats.max_value.value().size()};
-  EXPECT_EQ(min_value, std::string(min_val));
-  EXPECT_EQ(max_value, std::string(max_val));
-}
-
-TEST_F(ParquetReaderTest, RepeatedNoAnnotations)
-{
-  constexpr unsigned char repeated_bytes[] = {
-    0x50, 0x41, 0x52, 0x31, 0x15, 0x04, 0x15, 0x30, 0x15, 0x30, 0x4c, 0x15, 0x0c, 0x15, 0x00, 0x12,
-    0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x04, 0x00,
-    0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x15, 0x0a, 0x15, 0x0a,
-    0x2c, 0x15, 0x0c, 0x15, 0x10, 0x15, 0x06, 0x15, 0x06, 0x00, 0x00, 0x03, 0x03, 0x88, 0xc6, 0x02,
-    0x26, 0x80, 0x01, 0x1c, 0x15, 0x02, 0x19, 0x25, 0x00, 0x10, 0x19, 0x18, 0x02, 0x69, 0x64, 0x15,
-    0x00, 0x16, 0x0c, 0x16, 0x78, 0x16, 0x78, 0x26, 0x54, 0x26, 0x08, 0x00, 0x00, 0x15, 0x04, 0x15,
-    0x40, 0x15, 0x40, 0x4c, 0x15, 0x08, 0x15, 0x00, 0x12, 0x00, 0x00, 0xe3, 0x0c, 0x23, 0x4b, 0x01,
-    0x00, 0x00, 0x00, 0xc7, 0x35, 0x3a, 0x42, 0x00, 0x00, 0x00, 0x00, 0x8e, 0x6b, 0x74, 0x84, 0x00,
-    0x00, 0x00, 0x00, 0x55, 0xa1, 0xae, 0xc6, 0x00, 0x00, 0x00, 0x00, 0x15, 0x00, 0x15, 0x22, 0x15,
-    0x22, 0x2c, 0x15, 0x10, 0x15, 0x10, 0x15, 0x06, 0x15, 0x06, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
-    0x03, 0xc0, 0x03, 0x00, 0x00, 0x00, 0x03, 0x90, 0xaa, 0x02, 0x03, 0x94, 0x03, 0x26, 0xda, 0x02,
-    0x1c, 0x15, 0x04, 0x19, 0x25, 0x00, 0x10, 0x19, 0x38, 0x0c, 0x70, 0x68, 0x6f, 0x6e, 0x65, 0x4e,
-    0x75, 0x6d, 0x62, 0x65, 0x72, 0x73, 0x05, 0x70, 0x68, 0x6f, 0x6e, 0x65, 0x06, 0x6e, 0x75, 0x6d,
-    0x62, 0x65, 0x72, 0x15, 0x00, 0x16, 0x10, 0x16, 0xa0, 0x01, 0x16, 0xa0, 0x01, 0x26, 0x96, 0x02,
-    0x26, 0xba, 0x01, 0x00, 0x00, 0x15, 0x04, 0x15, 0x24, 0x15, 0x24, 0x4c, 0x15, 0x04, 0x15, 0x00,
-    0x12, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x68, 0x6f, 0x6d, 0x65, 0x06, 0x00, 0x00, 0x00, 0x6d,
-    0x6f, 0x62, 0x69, 0x6c, 0x65, 0x15, 0x00, 0x15, 0x20, 0x15, 0x20, 0x2c, 0x15, 0x10, 0x15, 0x10,
-    0x15, 0x06, 0x15, 0x06, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0xc0, 0x03, 0x00, 0x00, 0x00,
-    0x03, 0x90, 0xef, 0x01, 0x03, 0x04, 0x26, 0xcc, 0x04, 0x1c, 0x15, 0x0c, 0x19, 0x25, 0x00, 0x10,
-    0x19, 0x38, 0x0c, 0x70, 0x68, 0x6f, 0x6e, 0x65, 0x4e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x73, 0x05,
-    0x70, 0x68, 0x6f, 0x6e, 0x65, 0x04, 0x6b, 0x69, 0x6e, 0x64, 0x15, 0x00, 0x16, 0x10, 0x16, 0x82,
-    0x01, 0x16, 0x82, 0x01, 0x26, 0x8a, 0x04, 0x26, 0xca, 0x03, 0x00, 0x00, 0x15, 0x02, 0x19, 0x6c,
-    0x48, 0x04, 0x75, 0x73, 0x65, 0x72, 0x15, 0x04, 0x00, 0x15, 0x02, 0x25, 0x00, 0x18, 0x02, 0x69,
-    0x64, 0x00, 0x35, 0x02, 0x18, 0x0c, 0x70, 0x68, 0x6f, 0x6e, 0x65, 0x4e, 0x75, 0x6d, 0x62, 0x65,
-    0x72, 0x73, 0x15, 0x02, 0x00, 0x35, 0x04, 0x18, 0x05, 0x70, 0x68, 0x6f, 0x6e, 0x65, 0x15, 0x04,
-    0x00, 0x15, 0x04, 0x25, 0x00, 0x18, 0x06, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x00, 0x15, 0x0c,
-    0x25, 0x02, 0x18, 0x04, 0x6b, 0x69, 0x6e, 0x64, 0x25, 0x00, 0x00, 0x16, 0x00, 0x19, 0x1c, 0x19,
-    0x3c, 0x26, 0x80, 0x01, 0x1c, 0x15, 0x02, 0x19, 0x25, 0x00, 0x10, 0x19, 0x18, 0x02, 0x69, 0x64,
-    0x15, 0x00, 0x16, 0x0c, 0x16, 0x78, 0x16, 0x78, 0x26, 0x54, 0x26, 0x08, 0x00, 0x00, 0x26, 0xda,
-    0x02, 0x1c, 0x15, 0x04, 0x19, 0x25, 0x00, 0x10, 0x19, 0x38, 0x0c, 0x70, 0x68, 0x6f, 0x6e, 0x65,
-    0x4e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x73, 0x05, 0x70, 0x68, 0x6f, 0x6e, 0x65, 0x06, 0x6e, 0x75,
-    0x6d, 0x62, 0x65, 0x72, 0x15, 0x00, 0x16, 0x10, 0x16, 0xa0, 0x01, 0x16, 0xa0, 0x01, 0x26, 0x96,
-    0x02, 0x26, 0xba, 0x01, 0x00, 0x00, 0x26, 0xcc, 0x04, 0x1c, 0x15, 0x0c, 0x19, 0x25, 0x00, 0x10,
-    0x19, 0x38, 0x0c, 0x70, 0x68, 0x6f, 0x6e, 0x65, 0x4e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x73, 0x05,
-    0x70, 0x68, 0x6f, 0x6e, 0x65, 0x04, 0x6b, 0x69, 0x6e, 0x64, 0x15, 0x00, 0x16, 0x10, 0x16, 0x82,
-    0x01, 0x16, 0x82, 0x01, 0x26, 0x8a, 0x04, 0x26, 0xca, 0x03, 0x00, 0x00, 0x16, 0x9a, 0x03, 0x16,
-    0x0c, 0x00, 0x28, 0x49, 0x70, 0x61, 0x72, 0x71, 0x75, 0x65, 0x74, 0x2d, 0x72, 0x73, 0x20, 0x76,
-    0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x20, 0x30, 0x2e, 0x33, 0x2e, 0x30, 0x20, 0x28, 0x62, 0x75,
-    0x69, 0x6c, 0x64, 0x20, 0x62, 0x34, 0x35, 0x63, 0x65, 0x37, 0x63, 0x62, 0x61, 0x32, 0x31, 0x39,
-    0x39, 0x66, 0x32, 0x32, 0x64, 0x39, 0x33, 0x32, 0x36, 0x39, 0x63, 0x31, 0x35, 0x30, 0x64, 0x38,
-    0x61, 0x38, 0x33, 0x39, 0x31, 0x36, 0x63, 0x36, 0x39, 0x62, 0x35, 0x65, 0x29, 0x00, 0x32, 0x01,
-    0x00, 0x00, 0x50, 0x41, 0x52, 0x31};
-
-  auto read_opts = cudf::io::parquet_reader_options::builder(
-    cudf::io::source_info{reinterpret_cast<char const*>(repeated_bytes), sizeof(repeated_bytes)});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  EXPECT_EQ(result.tbl->view().column(0).size(), 6);
-  EXPECT_EQ(result.tbl->view().num_columns(), 2);
-
-  column_wrapper<int32_t> col0{1, 2, 3, 4, 5, 6};
-  column_wrapper<int64_t> child0{{5555555555l, 1111111111l, 1111111111l, 2222222222l, 3333333333l}};
-  cudf::test::strings_column_wrapper child1{{"-", "home", "home", "-", "mobile"}, {0, 1, 1, 0, 1}};
-  auto struct_col = cudf::test::structs_column_wrapper{{child0, child1}};
-
-  auto list_offsets_column =
-    cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 0, 0, 0, 1, 2, 5}.release();
-  auto num_list_rows = list_offsets_column->size() - 1;
-
-  auto mask = cudf::create_null_mask(6, cudf::mask_state::ALL_VALID);
-  cudf::set_null_mask(static_cast<cudf::bitmask_type*>(mask.data()), 0, 2, false);
-
-  auto list_col = cudf::make_lists_column(
-    num_list_rows, std::move(list_offsets_column), struct_col.release(), 2, std::move(mask));
-
-  std::vector<std::unique_ptr<cudf::column>> struct_children;
-  struct_children.push_back(std::move(list_col));
-
-  auto outer_struct =
-    cudf::test::structs_column_wrapper{{std::move(struct_children)}, {0, 0, 1, 1, 1, 1}};
-  table_view expected{{col0, outer_struct}};
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(result.tbl->view(), expected);
-}
-
-inline auto random_validity(std::mt19937& engine)
-{
-  static std::bernoulli_distribution bn(0.7f);
-  return cudf::detail::make_counting_transform_iterator(0, [&](int index) { return bn(engine); });
-}
-
-template <typename T>
-std::unique_ptr<cudf::column> make_parquet_list_col(std::mt19937& engine,
-                                                    int num_rows,
-                                                    int max_vals_per_row,
-                                                    bool include_validity)
-{
-  std::vector<cudf::size_type> row_sizes(num_rows);
-
-  auto const min_values_per_row = include_validity ? 0 : 1;
-  std::uniform_int_distribution<cudf::size_type> dist{min_values_per_row, max_vals_per_row};
-  std::generate_n(row_sizes.begin(), num_rows, [&]() { return cudf::size_type{dist(engine)}; });
-
-  std::vector<cudf::size_type> offsets(num_rows + 1);
-  std::exclusive_scan(row_sizes.begin(), row_sizes.end(), offsets.begin(), 0);
-  offsets[num_rows] = offsets[num_rows - 1] + row_sizes.back();
-
-  std::vector<T> values = random_values<T>(offsets[num_rows]);
-  cudf::test::fixed_width_column_wrapper<cudf::size_type> offsets_col(offsets.begin(),
-                                                                      offsets.end());
-
-  if (include_validity) {
-    auto valids = random_validity(engine);
-    auto values_col =
-      cudf::test::fixed_width_column_wrapper<T>(values.begin(), values.end(), valids);
-    auto [null_mask, null_count] = cudf::test::detail::make_null_mask(valids, valids + num_rows);
-
-    auto col = cudf::make_lists_column(
-      num_rows, offsets_col.release(), values_col.release(), null_count, std::move(null_mask));
-    return cudf::purge_nonempty_nulls(*col);
-  } else {
-    auto values_col = cudf::test::fixed_width_column_wrapper<T>(values.begin(), values.end());
-    return cudf::make_lists_column(num_rows,
-                                   offsets_col.release(),
-                                   values_col.release(),
-                                   0,
-                                   cudf::create_null_mask(num_rows, cudf::mask_state::ALL_VALID));
-  }
-}
-
-std::vector<std::string> string_values(std::mt19937& engine, int num_rows, int max_string_len)
-{
-  static std::uniform_int_distribution<char> char_dist{'a', 'z'};
-  static std::uniform_int_distribution<cudf::size_type> strlen_dist{1, max_string_len};
-
-  std::vector<std::string> values(num_rows);
-  std::generate_n(values.begin(), values.size(), [&]() {
-    int str_len     = strlen_dist(engine);
-    std::string res = "";
-    for (int i = 0; i < str_len; i++) {
-      res += char_dist(engine);
-    }
-    return res;
-  });
-
-  return values;
-}
-
-// make a random list<string> column, with random string lengths of 0..max_string_len,
-// and up to max_vals_per_row strings in each list.
-std::unique_ptr<cudf::column> make_parquet_string_list_col(std::mt19937& engine,
-                                                           int num_rows,
-                                                           int max_vals_per_row,
-                                                           int max_string_len,
-                                                           bool include_validity)
-{
-  auto const range_min = include_validity ? 0 : 1;
-
-  std::uniform_int_distribution<cudf::size_type> dist{range_min, max_vals_per_row};
-
-  std::vector<cudf::size_type> row_sizes(num_rows);
-  std::generate_n(row_sizes.begin(), num_rows, [&]() { return cudf::size_type{dist(engine)}; });
-
-  std::vector<cudf::size_type> offsets(num_rows + 1);
-  std::exclusive_scan(row_sizes.begin(), row_sizes.end(), offsets.begin(), 0);
-  offsets[num_rows] = offsets[num_rows - 1] + row_sizes.back();
-
-  std::uniform_int_distribution<cudf::size_type> strlen_dist{range_min, max_string_len};
-  auto const values = string_values(engine, offsets[num_rows], max_string_len);
-
-  cudf::test::fixed_width_column_wrapper<cudf::size_type> offsets_col(offsets.begin(),
-                                                                      offsets.end());
-
-  if (include_validity) {
-    auto valids     = random_validity(engine);
-    auto values_col = cudf::test::strings_column_wrapper(values.begin(), values.end(), valids);
-    auto [null_mask, null_count] = cudf::test::detail::make_null_mask(valids, valids + num_rows);
-
-    auto col = cudf::make_lists_column(
-      num_rows, offsets_col.release(), values_col.release(), null_count, std::move(null_mask));
-    return cudf::purge_nonempty_nulls(*col);
-  } else {
-    auto values_col = cudf::test::strings_column_wrapper(values.begin(), values.end());
-    return cudf::make_lists_column(num_rows,
-                                   offsets_col.release(),
-                                   values_col.release(),
-                                   0,
-                                   cudf::create_null_mask(num_rows, cudf::mask_state::ALL_VALID));
-  }
-}
-
-TEST_F(ParquetReaderTest, DeltaSkipRowsWithNulls)
-{
-  constexpr int num_rows = 50'000;
-  constexpr auto seed    = 21337;
-
-  std::mt19937 engine{seed};
-  auto int32_list_nulls = make_parquet_list_col<int32_t>(engine, num_rows, 5, true);
-  auto int32_list       = make_parquet_list_col<int32_t>(engine, num_rows, 5, false);
-  auto int64_list_nulls = make_parquet_list_col<int64_t>(engine, num_rows, 5, true);
-  auto int64_list       = make_parquet_list_col<int64_t>(engine, num_rows, 5, false);
-  auto int16_list_nulls = make_parquet_list_col<int16_t>(engine, num_rows, 5, true);
-  auto int16_list       = make_parquet_list_col<int16_t>(engine, num_rows, 5, false);
-  auto int8_list_nulls  = make_parquet_list_col<int8_t>(engine, num_rows, 5, true);
-  auto int8_list        = make_parquet_list_col<int8_t>(engine, num_rows, 5, false);
-
-  auto str_list_nulls     = make_parquet_string_list_col(engine, num_rows, 5, 32, true);
-  auto str_list           = make_parquet_string_list_col(engine, num_rows, 5, 32, false);
-  auto big_str_list_nulls = make_parquet_string_list_col(engine, num_rows, 5, 256, true);
-  auto big_str_list       = make_parquet_string_list_col(engine, num_rows, 5, 256, false);
-
-  auto int32_data   = random_values<int32_t>(num_rows);
-  auto int64_data   = random_values<int64_t>(num_rows);
-  auto int16_data   = random_values<int16_t>(num_rows);
-  auto int8_data    = random_values<int8_t>(num_rows);
-  auto str_data     = string_values(engine, num_rows, 32);
-  auto big_str_data = string_values(engine, num_rows, 256);
-
-  auto const validity = random_validity(engine);
-  auto const no_nulls = cudf::test::iterators::no_nulls();
-  column_wrapper<int32_t> int32_nulls_col{int32_data.begin(), int32_data.end(), validity};
-  column_wrapper<int32_t> int32_col{int32_data.begin(), int32_data.end(), no_nulls};
-  column_wrapper<int64_t> int64_nulls_col{int64_data.begin(), int64_data.end(), validity};
-  column_wrapper<int64_t> int64_col{int64_data.begin(), int64_data.end(), no_nulls};
-
-  auto str_col = cudf::test::strings_column_wrapper(str_data.begin(), str_data.end(), no_nulls);
-  auto str_col_nulls = cudf::purge_nonempty_nulls(
-    cudf::test::strings_column_wrapper(str_data.begin(), str_data.end(), validity));
-  auto big_str_col =
-    cudf::test::strings_column_wrapper(big_str_data.begin(), big_str_data.end(), no_nulls);
-  auto big_str_col_nulls = cudf::purge_nonempty_nulls(
-    cudf::test::strings_column_wrapper(big_str_data.begin(), big_str_data.end(), validity));
-
-  cudf::table_view tbl({int32_col,   int32_nulls_col,    *int32_list,   *int32_list_nulls,
-                        int64_col,   int64_nulls_col,    *int64_list,   *int64_list_nulls,
-                        *int16_list, *int16_list_nulls,  *int8_list,    *int8_list_nulls,
-                        str_col,     *str_col_nulls,     *str_list,     *str_list_nulls,
-                        big_str_col, *big_str_col_nulls, *big_str_list, *big_str_list_nulls});
-
-  auto const filepath = temp_env->get_temp_filepath("DeltaSkipRowsWithNulls.parquet");
-  auto const out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl)
-      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
-      .compression(cudf::io::compression_type::NONE)
-      .dictionary_policy(cudf::io::dictionary_policy::NEVER)
-      .max_page_size_rows(20'000)
-      .write_v2_headers(true)
-      .build();
-  cudf::io::write_parquet(out_opts);
-
-  // skip_rows / num_rows
-  // clang-format off
-  std::vector<std::pair<int, int>> params{
-    // skip and then read rest of file
-    {-1, -1}, {1, -1}, {2, -1}, {32, -1}, {33, -1}, {128, -1}, {1000, -1},
-    // no skip but read fewer rows
-    {0, 1}, {0, 2}, {0, 31}, {0, 32}, {0, 33}, {0, 128}, {0, 129}, {0, 130},
-    // skip and truncate
-    {1, 32}, {1, 33}, {32, 32}, {33, 139},
-    // cross page boundaries
-    {10'000, 20'000}
-  };
-
-  // clang-format on
-  for (auto p : params) {
-    cudf::io::parquet_reader_options read_args =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-    if (p.first >= 0) { read_args.set_skip_rows(p.first); }
-    if (p.second >= 0) { read_args.set_num_rows(p.second); }
-    auto result = cudf::io::read_parquet(read_args);
-
-    p.first  = p.first < 0 ? 0 : p.first;
-    p.second = p.second < 0 ? num_rows - p.first : p.second;
-    std::vector<cudf::size_type> slice_indices{p.first, p.first + p.second};
-    std::vector<cudf::table_view> expected = cudf::slice(tbl, slice_indices);
-
-    CUDF_TEST_EXPECT_TABLES_EQUAL(result.tbl->view(), expected[0]);
-
-    // test writing the result back out as a further check of the delta writer's correctness
-    std::vector<char> out_buffer;
-    cudf::io::parquet_writer_options out_opts2 =
-      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&out_buffer},
-                                                result.tbl->view())
-        .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
-        .compression(cudf::io::compression_type::NONE)
-        .dictionary_policy(cudf::io::dictionary_policy::NEVER)
-        .max_page_size_rows(20'000)
-        .write_v2_headers(true);
-    cudf::io::write_parquet(out_opts2);
-
-    cudf::io::parquet_reader_options default_in_opts = cudf::io::parquet_reader_options::builder(
-      cudf::io::source_info{out_buffer.data(), out_buffer.size()});
-    auto const result2 = cudf::io::read_parquet(default_in_opts);
-
-    CUDF_TEST_EXPECT_TABLES_EQUAL(result.tbl->view(), result2.tbl->view());
-  }
-}
+// NOTE: this file exists to define the parquet test's `main()` function.
+// `main()` is kept in its own compilation unit to keep the compilation time for
+// PARQUET_TEST at a minimum.
+//
+// Do not add any test definitions to this file.
 
 CUDF_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/io/parquet_v2_test.cpp b/cpp/tests/io/parquet_v2_test.cpp
new file mode 100644
index 00000000000..f2b50639a4d
--- /dev/null
+++ b/cpp/tests/io/parquet_v2_test.cpp
@@ -0,0 +1,1528 @@
+/*
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "parquet_common.hpp"
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/io_metadata_utilities.hpp>
+#include <cudf_test/iterator_utilities.hpp>
+#include <cudf_test/table_utilities.hpp>
+
+#include <cudf/io/parquet.hpp>
+
+// Base test fixture for V2 header tests
+class ParquetV2Test : public ::cudf::test::BaseFixtureWithParam<bool> {};
+
+INSTANTIATE_TEST_SUITE_P(ParquetV2ReadWriteTest,
+                         ParquetV2Test,
+                         testing::Bool(),
+                         testing::PrintToStringParamName());
+
+TEST_P(ParquetV2Test, MultiColumn)
+{
+  constexpr auto num_rows = 100000;
+  auto const is_v2        = GetParam();
+
+  // auto col0_data = random_values<bool>(num_rows);
+  auto col1_data = random_values<int8_t>(num_rows);
+  auto col2_data = random_values<int16_t>(num_rows);
+  auto col3_data = random_values<int32_t>(num_rows);
+  auto col4_data = random_values<float>(num_rows);
+  auto col5_data = random_values<double>(num_rows);
+  auto col6_vals = random_values<int16_t>(num_rows);
+  auto col7_vals = random_values<int32_t>(num_rows);
+  auto col8_vals = random_values<int64_t>(num_rows);
+  auto col6_data = cudf::detail::make_counting_transform_iterator(0, [col6_vals](auto i) {
+    return numeric::decimal32{col6_vals[i], numeric::scale_type{5}};
+  });
+  auto col7_data = cudf::detail::make_counting_transform_iterator(0, [col7_vals](auto i) {
+    return numeric::decimal64{col7_vals[i], numeric::scale_type{-5}};
+  });
+  auto col8_data = cudf::detail::make_counting_transform_iterator(0, [col8_vals](auto i) {
+    return numeric::decimal128{col8_vals[i], numeric::scale_type{-6}};
+  });
+  auto validity  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
+
+  // column_wrapper<bool> col0{
+  //    col0_data.begin(), col0_data.end(), validity};
+  column_wrapper<int8_t> col1{col1_data.begin(), col1_data.end(), validity};
+  column_wrapper<int16_t> col2{col2_data.begin(), col2_data.end(), validity};
+  column_wrapper<int32_t> col3{col3_data.begin(), col3_data.end(), validity};
+  column_wrapper<float> col4{col4_data.begin(), col4_data.end(), validity};
+  column_wrapper<double> col5{col5_data.begin(), col5_data.end(), validity};
+  column_wrapper<numeric::decimal32> col6{col6_data, col6_data + num_rows, validity};
+  column_wrapper<numeric::decimal64> col7{col7_data, col7_data + num_rows, validity};
+  column_wrapper<numeric::decimal128> col8{col8_data, col8_data + num_rows, validity};
+
+  auto expected = table_view{{col1, col2, col3, col4, col5, col6, col7, col8}};
+
+  cudf::io::table_input_metadata expected_metadata(expected);
+  // expected_metadata.column_metadata[0].set_name( "bools");
+  expected_metadata.column_metadata[0].set_name("int8s");
+  expected_metadata.column_metadata[1].set_name("int16s");
+  expected_metadata.column_metadata[2].set_name("int32s");
+  expected_metadata.column_metadata[3].set_name("floats");
+  expected_metadata.column_metadata[4].set_name("doubles");
+  expected_metadata.column_metadata[5].set_name("decimal32s").set_decimal_precision(10);
+  expected_metadata.column_metadata[6].set_name("decimal64s").set_decimal_precision(20);
+  expected_metadata.column_metadata[7].set_name("decimal128s").set_decimal_precision(40);
+
+  auto filepath = temp_env->get_temp_filepath("MultiColumn.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .write_v2_headers(is_v2)
+      .metadata(expected_metadata);
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
+}
+
+TEST_P(ParquetV2Test, MultiColumnWithNulls)
+{
+  constexpr auto num_rows = 100;
+  auto const is_v2        = GetParam();
+
+  // auto col0_data = random_values<bool>(num_rows);
+  auto col1_data = random_values<int8_t>(num_rows);
+  auto col2_data = random_values<int16_t>(num_rows);
+  auto col3_data = random_values<int32_t>(num_rows);
+  auto col4_data = random_values<float>(num_rows);
+  auto col5_data = random_values<double>(num_rows);
+  auto col6_vals = random_values<int32_t>(num_rows);
+  auto col7_vals = random_values<int64_t>(num_rows);
+  auto col6_data = cudf::detail::make_counting_transform_iterator(0, [col6_vals](auto i) {
+    return numeric::decimal32{col6_vals[i], numeric::scale_type{-2}};
+  });
+  auto col7_data = cudf::detail::make_counting_transform_iterator(0, [col7_vals](auto i) {
+    return numeric::decimal64{col7_vals[i], numeric::scale_type{-8}};
+  });
+  // auto col0_mask = cudf::detail::make_counting_transform_iterator(
+  //    0, [](auto i) { return (i % 2); });
+  auto col1_mask =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i < 10); });
+  auto col2_mask = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
+  auto col3_mask =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i == (num_rows - 1)); });
+  auto col4_mask =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i >= 40 && i <= 60); });
+  auto col5_mask =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i > 80); });
+  auto col6_mask =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i % 5); });
+  auto col7_mask =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i != 55); });
+
+  // column_wrapper<bool> col0{
+  //    col0_data.begin(), col0_data.end(), col0_mask};
+  column_wrapper<int8_t> col1{col1_data.begin(), col1_data.end(), col1_mask};
+  column_wrapper<int16_t> col2{col2_data.begin(), col2_data.end(), col2_mask};
+  column_wrapper<int32_t> col3{col3_data.begin(), col3_data.end(), col3_mask};
+  column_wrapper<float> col4{col4_data.begin(), col4_data.end(), col4_mask};
+  column_wrapper<double> col5{col5_data.begin(), col5_data.end(), col5_mask};
+  column_wrapper<numeric::decimal32> col6{col6_data, col6_data + num_rows, col6_mask};
+  column_wrapper<numeric::decimal64> col7{col7_data, col7_data + num_rows, col7_mask};
+
+  auto expected = table_view{{/*col0, */ col1, col2, col3, col4, col5, col6, col7}};
+
+  cudf::io::table_input_metadata expected_metadata(expected);
+  // expected_metadata.column_names.emplace_back("bools");
+  expected_metadata.column_metadata[0].set_name("int8s");
+  expected_metadata.column_metadata[1].set_name("int16s");
+  expected_metadata.column_metadata[2].set_name("int32s");
+  expected_metadata.column_metadata[3].set_name("floats");
+  expected_metadata.column_metadata[4].set_name("doubles");
+  expected_metadata.column_metadata[5].set_name("decimal32s").set_decimal_precision(9);
+  expected_metadata.column_metadata[6].set_name("decimal64s").set_decimal_precision(20);
+
+  auto filepath = temp_env->get_temp_filepath("MultiColumnWithNulls.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .write_v2_headers(is_v2)
+      .metadata(expected_metadata);
+
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+  // TODO: Need to be able to return metadata in tree form from reader so they can be compared.
+  // Unfortunately the closest thing to a hierarchical schema is column_name_info which does not
+  // have any tests for it c++ or python.
+  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
+}
+
+TEST_P(ParquetV2Test, Strings)
+{
+  auto const is_v2 = GetParam();
+
+  std::vector<char const*> strings{
+    "Monday", "Wȅdnȅsday", "Friday", "Monday", "Friday", "Friday", "Friday", "Funday"};
+  auto const num_rows = strings.size();
+
+  auto seq_col0 = random_values<int>(num_rows);
+  auto seq_col2 = random_values<float>(num_rows);
+  auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
+
+  column_wrapper<int> col0{seq_col0.begin(), seq_col0.end(), validity};
+  column_wrapper<cudf::string_view> col1{strings.begin(), strings.end()};
+  column_wrapper<float> col2{seq_col2.begin(), seq_col2.end(), validity};
+
+  auto expected = table_view{{col0, col1, col2}};
+
+  cudf::io::table_input_metadata expected_metadata(expected);
+  expected_metadata.column_metadata[0].set_name("col_other");
+  expected_metadata.column_metadata[1].set_name("col_string");
+  expected_metadata.column_metadata[2].set_name("col_another");
+
+  auto filepath = temp_env->get_temp_filepath("Strings.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .write_v2_headers(is_v2)
+      .metadata(expected_metadata);
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
+}
+
+TEST_P(ParquetV2Test, StringsAsBinary)
+{
+  auto const is_v2 = GetParam();
+  std::vector<char const*> unicode_strings{
+    "Monday", "Wȅdnȅsday", "Friday", "Monday", "Friday", "Friday", "Friday", "Funday"};
+  std::vector<char const*> ascii_strings{
+    "Monday", "Wednesday", "Friday", "Monday", "Friday", "Friday", "Friday", "Funday"};
+
+  column_wrapper<cudf::string_view> col0{ascii_strings.begin(), ascii_strings.end()};
+  column_wrapper<cudf::string_view> col1{unicode_strings.begin(), unicode_strings.end()};
+  column_wrapper<cudf::string_view> col2{ascii_strings.begin(), ascii_strings.end()};
+  cudf::test::lists_column_wrapper<uint8_t> col3{{'M', 'o', 'n', 'd', 'a', 'y'},
+                                                 {'W', 'e', 'd', 'n', 'e', 's', 'd', 'a', 'y'},
+                                                 {'F', 'r', 'i', 'd', 'a', 'y'},
+                                                 {'M', 'o', 'n', 'd', 'a', 'y'},
+                                                 {'F', 'r', 'i', 'd', 'a', 'y'},
+                                                 {'F', 'r', 'i', 'd', 'a', 'y'},
+                                                 {'F', 'r', 'i', 'd', 'a', 'y'},
+                                                 {'F', 'u', 'n', 'd', 'a', 'y'}};
+  cudf::test::lists_column_wrapper<uint8_t> col4{
+    {'M', 'o', 'n', 'd', 'a', 'y'},
+    {'W', 200, 133, 'd', 'n', 200, 133, 's', 'd', 'a', 'y'},
+    {'F', 'r', 'i', 'd', 'a', 'y'},
+    {'M', 'o', 'n', 'd', 'a', 'y'},
+    {'F', 'r', 'i', 'd', 'a', 'y'},
+    {'F', 'r', 'i', 'd', 'a', 'y'},
+    {'F', 'r', 'i', 'd', 'a', 'y'},
+    {'F', 'u', 'n', 'd', 'a', 'y'}};
+
+  auto write_tbl = table_view{{col0, col1, col2, col3, col4}};
+
+  cudf::io::table_input_metadata expected_metadata(write_tbl);
+  expected_metadata.column_metadata[0].set_name("col_single").set_output_as_binary(true);
+  expected_metadata.column_metadata[1].set_name("col_string").set_output_as_binary(true);
+  expected_metadata.column_metadata[2].set_name("col_another").set_output_as_binary(true);
+  expected_metadata.column_metadata[3].set_name("col_binary");
+  expected_metadata.column_metadata[4].set_name("col_binary2");
+
+  auto filepath = temp_env->get_temp_filepath("BinaryStrings.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, write_tbl)
+      .write_v2_headers(is_v2)
+      .dictionary_policy(cudf::io::dictionary_policy::NEVER)
+      .metadata(expected_metadata);
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+      .set_column_schema(
+        {cudf::io::reader_column_schema().set_convert_binary_to_strings(false),
+         cudf::io::reader_column_schema().set_convert_binary_to_strings(false),
+         cudf::io::reader_column_schema().set_convert_binary_to_strings(false),
+         cudf::io::reader_column_schema().add_child(cudf::io::reader_column_schema()),
+         cudf::io::reader_column_schema().add_child(cudf::io::reader_column_schema())});
+  auto result   = cudf::io::read_parquet(in_opts);
+  auto expected = table_view{{col3, col4, col3, col3, col4}};
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
+}
+
+TEST_P(ParquetV2Test, SlicedTable)
+{
+  // This test checks for writing zero copy, offsetted views into existing cudf tables
+
+  std::vector<char const*> strings{
+    "Monday", "Wȅdnȅsday", "Friday", "Monday", "Friday", "Friday", "Friday", "Funday"};
+  auto const num_rows = strings.size();
+  auto const is_v2    = GetParam();
+
+  auto seq_col0 = random_values<int>(num_rows);
+  auto seq_col2 = random_values<float>(num_rows);
+  auto validity =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 3 != 0; });
+
+  column_wrapper<int> col0{seq_col0.begin(), seq_col0.end(), validity};
+  column_wrapper<cudf::string_view> col1{strings.begin(), strings.end()};
+  column_wrapper<float> col2{seq_col2.begin(), seq_col2.end(), validity};
+
+  using lcw = cudf::test::lists_column_wrapper<uint64_t>;
+  lcw col3{{9, 8}, {7, 6, 5}, {}, {4}, {3, 2, 1, 0}, {20, 21, 22, 23, 24}, {}, {66, 666}};
+
+  // [[[NULL,2,NULL,4]], [[NULL,6,NULL], [8,9]]]
+  // [NULL, [[13],[14,15,16]],  NULL]
+  // [NULL, [], NULL, [[]]]
+  // NULL
+  // [[[NULL,2,NULL,4]], [[NULL,6,NULL], [8,9]]]
+  // [NULL, [[13],[14,15,16]],  NULL]
+  // [[[]]]
+  // [NULL, [], NULL, [[]]]
+  auto valids  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; });
+  auto valids2 = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; });
+  lcw col4{{
+             {{{{1, 2, 3, 4}, valids}}, {{{5, 6, 7}, valids}, {8, 9}}},
+             {{{{10, 11}, {12}}, {{13}, {14, 15, 16}}, {{17, 18}}}, valids},
+             {{lcw{lcw{}}, lcw{}, lcw{}, lcw{lcw{}}}, valids},
+             lcw{lcw{lcw{}}},
+             {{{{1, 2, 3, 4}, valids}}, {{{5, 6, 7}, valids}, {8, 9}}},
+             {{{{10, 11}, {12}}, {{13}, {14, 15, 16}}, {{17, 18}}}, valids},
+             lcw{lcw{lcw{}}},
+             {{lcw{lcw{}}, lcw{}, lcw{}, lcw{lcw{}}}, valids},
+           },
+           valids2};
+
+  // Struct column
+  auto ages_col = cudf::test::fixed_width_column_wrapper<int32_t>{
+    {48, 27, 25, 31, 351, 351, 29, 15}, {1, 1, 1, 1, 1, 0, 1, 1}};
+
+  auto col5 = cudf::test::structs_column_wrapper{{ages_col}, {1, 1, 1, 1, 0, 1, 1, 1}};
+
+  // Struct/List mixed column
+
+  // []
+  // [NULL, 2, NULL]
+  // [4, 5]
+  // NULL
+  // []
+  // [7, 8, 9]
+  // [10]
+  // [11, 12]
+  lcw land{{{}, {{1, 2, 3}, valids}, {4, 5}, {}, {}, {7, 8, 9}, {10}, {11, 12}}, valids2};
+
+  // []
+  // [[1, 2, 3], [], [4, 5], [], [0, 6, 0]]
+  // [[7, 8], []]
+  // [[]]
+  // [[]]
+  // [[], [], []]
+  // [[10]]
+  // [[13, 14], [15]]
+  lcw flats{lcw{},
+            {{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}},
+            {{7, 8}, {}},
+            lcw{lcw{}},
+            lcw{lcw{}},
+            lcw{lcw{}, lcw{}, lcw{}},
+            {lcw{10}},
+            {{13, 14}, {15}}};
+
+  auto struct_1 = cudf::test::structs_column_wrapper{land, flats};
+  auto is_human = cudf::test::fixed_width_column_wrapper<bool>{
+    {true, true, false, false, true, false, true, false}};
+  auto col6 = cudf::test::structs_column_wrapper{{is_human, struct_1}};
+
+  auto expected = table_view({col0, col1, col2, col3, col4, col5, col6});
+
+  // auto expected_slice = expected;
+  auto expected_slice = cudf::slice(expected, {2, static_cast<cudf::size_type>(num_rows) - 1});
+
+  cudf::io::table_input_metadata expected_metadata(expected_slice);
+  expected_metadata.column_metadata[0].set_name("col_other");
+  expected_metadata.column_metadata[1].set_name("col_string");
+  expected_metadata.column_metadata[2].set_name("col_another");
+  expected_metadata.column_metadata[3].set_name("col_list");
+  expected_metadata.column_metadata[4].set_name("col_multi_level_list");
+  expected_metadata.column_metadata[5].set_name("col_struct");
+  expected_metadata.column_metadata[5].set_name("col_struct_list");
+  expected_metadata.column_metadata[6].child(0).set_name("human?");
+  expected_metadata.column_metadata[6].child(1).set_name("particulars");
+  expected_metadata.column_metadata[6].child(1).child(0).set_name("land");
+  expected_metadata.column_metadata[6].child(1).child(1).set_name("flats");
+
+  auto filepath = temp_env->get_temp_filepath("SlicedTable.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected_slice)
+      .write_v2_headers(is_v2)
+      .metadata(expected_metadata);
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_slice, result.tbl->view());
+  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
+}
+
+TEST_P(ParquetV2Test, ListColumn)
+{
+  auto const is_v2 = GetParam();
+
+  auto valids  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; });
+  auto valids2 = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; });
+
+  using lcw = cudf::test::lists_column_wrapper<int32_t>;
+
+  // [NULL, 2, NULL]
+  // []
+  // [4, 5]
+  // NULL
+  lcw col0{{{{1, 2, 3}, valids}, {}, {4, 5}, {}}, valids2};
+
+  // [[1, 2, 3], [], [4, 5], [], [0, 6, 0]]
+  // [[7, 8]]
+  // []
+  // [[]]
+  lcw col1{{{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, {{7, 8}}, lcw{}, lcw{lcw{}}};
+
+  // [[1, 2, 3], [], [4, 5], NULL, [0, 6, 0]]
+  // [[7, 8]]
+  // []
+  // [[]]
+  lcw col2{{{{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, valids2}, {{7, 8}}, lcw{}, lcw{lcw{}}};
+
+  // [[1, 2, 3], [], [4, 5], NULL, [NULL, 6, NULL]]
+  // [[7, 8]]
+  // []
+  // [[]]
+  using dlcw = cudf::test::lists_column_wrapper<double>;
+  dlcw col3{{{{1., 2., 3.}, {}, {4., 5.}, {}, {{0., 6., 0.}, valids}}, valids2},
+            {{7., 8.}},
+            dlcw{},
+            dlcw{dlcw{}}};
+
+  // TODO: uint16_t lists are not read properly in parquet reader
+  // [[1, 2, 3], [], [4, 5], NULL, [0, 6, 0]]
+  // [[7, 8]]
+  // []
+  // NULL
+  // using ui16lcw = cudf::test::lists_column_wrapper<uint16_t>;
+  // cudf::test::lists_column_wrapper<uint16_t> col4{
+  //   {{{{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, valids2}, {{7, 8}}, ui16lcw{}, ui16lcw{ui16lcw{}}},
+  //   valids2};
+
+  // [[1, 2, 3], [], [4, 5], NULL, [NULL, 6, NULL]]
+  // [[7, 8]]
+  // []
+  // NULL
+  lcw col5{
+    {{{{1, 2, 3}, {}, {4, 5}, {}, {{0, 6, 0}, valids}}, valids2}, {{7, 8}}, lcw{}, lcw{lcw{}}},
+    valids2};
+
+  using strlcw = cudf::test::lists_column_wrapper<cudf::string_view>;
+  cudf::test::lists_column_wrapper<cudf::string_view> col6{
+    {{"Monday", "Monday", "Friday"}, {}, {"Monday", "Friday"}, {}, {"Sunday", "Funday"}},
+    {{"bee", "sting"}},
+    strlcw{},
+    strlcw{strlcw{}}};
+
+  // [[[NULL,2,NULL,4]], [[NULL,6,NULL], [8,9]]]
+  // [NULL, [[13],[14,15,16]],  NULL]
+  // [NULL, [], NULL, [[]]]
+  // NULL
+  lcw col7{{
+             {{{{1, 2, 3, 4}, valids}}, {{{5, 6, 7}, valids}, {8, 9}}},
+             {{{{10, 11}, {12}}, {{13}, {14, 15, 16}}, {{17, 18}}}, valids},
+             {{lcw{lcw{}}, lcw{}, lcw{}, lcw{lcw{}}}, valids},
+             lcw{lcw{lcw{}}},
+           },
+           valids2};
+
+  table_view expected({col0, col1, col2, col3, /* col4, */ col5, col6, col7});
+
+  cudf::io::table_input_metadata expected_metadata(expected);
+  expected_metadata.column_metadata[0].set_name("col_list_int_0");
+  expected_metadata.column_metadata[1].set_name("col_list_list_int_1");
+  expected_metadata.column_metadata[2].set_name("col_list_list_int_nullable_2");
+  expected_metadata.column_metadata[3].set_name("col_list_list_nullable_double_nullable_3");
+  // expected_metadata.column_metadata[0].set_name("col_list_list_uint16_4");
+  expected_metadata.column_metadata[4].set_name("col_list_nullable_list_nullable_int_nullable_5");
+  expected_metadata.column_metadata[5].set_name("col_list_list_string_6");
+  expected_metadata.column_metadata[6].set_name("col_list_list_list_7");
+
+  auto filepath = temp_env->get_temp_filepath("ListColumn.parquet");
+  auto out_opts = cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+                    .write_v2_headers(is_v2)
+                    .metadata(expected_metadata)
+                    .compression(cudf::io::compression_type::NONE);
+
+  cudf::io::write_parquet(out_opts);
+
+  auto in_opts = cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result  = cudf::io::read_parquet(in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
+}
+
+TEST_P(ParquetV2Test, StructOfList)
+{
+  auto const is_v2 = GetParam();
+
+  // Struct<is_human:bool,
+  //        Struct<weight:float,
+  //               ages:int,
+  //               land_unit:List<int>>,
+  //               flats:List<List<int>>
+  //              >
+  //       >
+
+  auto weights_col = cudf::test::fixed_width_column_wrapper<float>{1.1, 2.4, 5.3, 8.0, 9.6, 6.9};
+
+  auto ages_col =
+    cudf::test::fixed_width_column_wrapper<int32_t>{{48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 0}};
+
+  auto valids  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; });
+  auto valids2 = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; });
+
+  using lcw = cudf::test::lists_column_wrapper<int32_t>;
+
+  // []
+  // [NULL, 2, NULL]
+  // [4, 5]
+  // NULL
+  // []
+  // [7, 8, 9]
+  lcw land_unit{{{}, {{1, 2, 3}, valids}, {4, 5}, {}, {}, {7, 8, 9}}, valids2};
+
+  // []
+  // [[1, 2, 3], [], [4, 5], [], [0, 6, 0]]
+  // [[7, 8], []]
+  // [[]]
+  // [[]]
+  // [[], [], []]
+  lcw flats{lcw{},
+            {{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}},
+            {{7, 8}, {}},
+            lcw{lcw{}},
+            lcw{lcw{}},
+            lcw{lcw{}, lcw{}, lcw{}}};
+
+  auto struct_1 = cudf::test::structs_column_wrapper{{weights_col, ages_col, land_unit, flats},
+                                                     {1, 1, 1, 1, 0, 1}};
+
+  auto is_human_col = cudf::test::fixed_width_column_wrapper<bool>{
+    {true, true, false, false, false, false}, {1, 1, 0, 1, 1, 0}};
+
+  auto struct_2 =
+    cudf::test::structs_column_wrapper{{is_human_col, struct_1}, {0, 1, 1, 1, 1, 1}}.release();
+
+  auto expected = table_view({*struct_2});
+
+  cudf::io::table_input_metadata expected_metadata(expected);
+  expected_metadata.column_metadata[0].set_name("being");
+  expected_metadata.column_metadata[0].child(0).set_name("human?");
+  expected_metadata.column_metadata[0].child(1).set_name("particulars");
+  expected_metadata.column_metadata[0].child(1).child(0).set_name("weight");
+  expected_metadata.column_metadata[0].child(1).child(1).set_name("age");
+  expected_metadata.column_metadata[0].child(1).child(2).set_name("land_unit");
+  expected_metadata.column_metadata[0].child(1).child(3).set_name("flats");
+
+  auto filepath = temp_env->get_temp_filepath("StructOfList.parquet");
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .write_v2_headers(is_v2)
+      .metadata(expected_metadata);
+  cudf::io::write_parquet(args);
+
+  cudf::io::parquet_reader_options read_args =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath));
+  auto const result = cudf::io::read_parquet(read_args);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
+}
+
+TEST_P(ParquetV2Test, ListOfStruct)
+{
+  auto const is_v2 = GetParam();
+
+  // List<Struct<is_human:bool,
+  //             Struct<weight:float,
+  //                    ages:int,
+  //                   >
+  //            >
+  //     >
+
+  auto weight_col = cudf::test::fixed_width_column_wrapper<float>{1.1, 2.4, 5.3, 8.0, 9.6, 6.9};
+
+  auto ages_col =
+    cudf::test::fixed_width_column_wrapper<int32_t>{{48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 0}};
+
+  auto struct_1 = cudf::test::structs_column_wrapper{{weight_col, ages_col}, {1, 1, 1, 1, 0, 1}};
+
+  auto is_human_col = cudf::test::fixed_width_column_wrapper<bool>{
+    {true, true, false, false, false, false}, {1, 1, 0, 1, 1, 0}};
+
+  auto struct_2 =
+    cudf::test::structs_column_wrapper{{is_human_col, struct_1}, {0, 1, 1, 1, 1, 1}}.release();
+
+  auto list_offsets_column =
+    cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 2, 5, 5, 6}.release();
+  auto num_list_rows = list_offsets_column->size() - 1;
+
+  auto list_col = cudf::make_lists_column(
+    num_list_rows, std::move(list_offsets_column), std::move(struct_2), 0, {});
+
+  auto expected = table_view({*list_col});
+
+  cudf::io::table_input_metadata expected_metadata(expected);
+  expected_metadata.column_metadata[0].set_name("family");
+  expected_metadata.column_metadata[0].child(1).child(0).set_name("human?");
+  expected_metadata.column_metadata[0].child(1).child(1).set_name("particulars");
+  expected_metadata.column_metadata[0].child(1).child(1).child(0).set_name("weight");
+  expected_metadata.column_metadata[0].child(1).child(1).child(1).set_name("age");
+
+  auto filepath = temp_env->get_temp_filepath("ListOfStruct.parquet");
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .write_v2_headers(is_v2)
+      .metadata(expected_metadata);
+  cudf::io::write_parquet(args);
+
+  cudf::io::parquet_reader_options read_args =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath));
+  auto const result = cudf::io::read_parquet(read_args);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
+}
+
+TEST_P(ParquetV2Test, PartitionedWriteEmptyPartitions)
+{
+  auto const is_v2 = GetParam();
+
+  auto source = create_random_fixed_table<int>(4, 4, false);
+
+  auto filepath1 = temp_env->get_temp_filepath("PartitionedWrite1.parquet");
+  auto filepath2 = temp_env->get_temp_filepath("PartitionedWrite2.parquet");
+
+  auto partition1 = cudf::io::partition_info{1, 0};
+  auto partition2 = cudf::io::partition_info{1, 0};
+
+  auto expected1 =
+    cudf::slice(*source, {partition1.start_row, partition1.start_row + partition1.num_rows});
+  auto expected2 =
+    cudf::slice(*source, {partition2.start_row, partition2.start_row + partition2.num_rows});
+
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(
+      cudf::io::sink_info(std::vector<std::string>{filepath1, filepath2}), *source)
+      .partitions({partition1, partition2})
+      .write_v2_headers(is_v2)
+      .compression(cudf::io::compression_type::NONE);
+  cudf::io::write_parquet(args);
+
+  auto result1 = cudf::io::read_parquet(
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath1)));
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected1, result1.tbl->view());
+
+  auto result2 = cudf::io::read_parquet(
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath2)));
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected2, result2.tbl->view());
+}
+
+TEST_P(ParquetV2Test, PartitionedWriteEmptyColumns)
+{
+  auto const is_v2 = GetParam();
+
+  auto source = create_random_fixed_table<int>(0, 4, false);
+
+  auto filepath1 = temp_env->get_temp_filepath("PartitionedWrite1.parquet");
+  auto filepath2 = temp_env->get_temp_filepath("PartitionedWrite2.parquet");
+
+  auto partition1 = cudf::io::partition_info{1, 0};
+  auto partition2 = cudf::io::partition_info{1, 0};
+
+  auto expected1 =
+    cudf::slice(*source, {partition1.start_row, partition1.start_row + partition1.num_rows});
+  auto expected2 =
+    cudf::slice(*source, {partition2.start_row, partition2.start_row + partition2.num_rows});
+
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(
+      cudf::io::sink_info(std::vector<std::string>{filepath1, filepath2}), *source)
+      .partitions({partition1, partition2})
+      .write_v2_headers(is_v2)
+      .compression(cudf::io::compression_type::NONE);
+  cudf::io::write_parquet(args);
+
+  auto result1 = cudf::io::read_parquet(
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath1)));
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected1, result1.tbl->view());
+
+  auto result2 = cudf::io::read_parquet(
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath2)));
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected2, result2.tbl->view());
+}
+
+TEST_P(ParquetV2Test, LargeColumnIndex)
+{
+  // create a file large enough to be written in 2 batches (currently 1GB per batch)
+  // pick fragment size that num_rows is divisible by, so we'll get equal sized row groups
+  const std::string s1(1000, 'a');
+  const std::string s2(1000, 'b');
+  constexpr auto num_rows  = 512 * 1024;
+  constexpr auto frag_size = num_rows / 128;
+  auto const is_v2         = GetParam();
+
+  auto col0_elements = cudf::detail::make_counting_transform_iterator(
+    0, [&](auto i) { return (i < num_rows) ? s1 : s2; });
+  auto col0 = cudf::test::strings_column_wrapper(col0_elements, col0_elements + 2 * num_rows);
+
+  auto const expected = table_view{{col0, col0}};
+
+  auto const filepath = temp_env->get_temp_filepath("LargeColumnIndex.parquet");
+  const cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
+      .compression(cudf::io::compression_type::NONE)
+      .dictionary_policy(cudf::io::dictionary_policy::NEVER)
+      .write_v2_headers(is_v2)
+      .max_page_fragment_size(frag_size)
+      .row_group_size_bytes(1024 * 1024 * 1024)
+      .row_group_size_rows(num_rows);
+  cudf::io::write_parquet(out_opts);
+
+  auto const source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::detail::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+
+  for (auto const& rg : fmd.row_groups) {
+    for (size_t c = 0; c < rg.columns.size(); c++) {
+      auto const& chunk = rg.columns[c];
+
+      auto const ci    = read_column_index(source, chunk);
+      auto const stats = get_statistics(chunk);
+
+      // check trunc(page.min) <= stats.min && trun(page.max) >= stats.max
+      auto const ptype = fmd.schema[c + 1].type;
+      auto const ctype = fmd.schema[c + 1].converted_type;
+      ASSERT_TRUE(stats.min_value.has_value());
+      ASSERT_TRUE(stats.max_value.has_value());
+      EXPECT_TRUE(compare_binary(ci.min_values[0], stats.min_value.value(), ptype, ctype) <= 0);
+      EXPECT_TRUE(compare_binary(ci.max_values[0], stats.max_value.value(), ptype, ctype) >= 0);
+    }
+  }
+}
+
+TEST_P(ParquetV2Test, CheckColumnOffsetIndex)
+{
+  constexpr auto num_rows      = 100000;
+  auto const is_v2             = GetParam();
+  auto const expected_hdr_type = is_v2 ? cudf::io::parquet::detail::PageType::DATA_PAGE_V2
+                                       : cudf::io::parquet::detail::PageType::DATA_PAGE;
+
+  // fixed length strings
+  auto str1_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
+    char buf[30];
+    sprintf(buf, "%012d", i);
+    return std::string(buf);
+  });
+  auto col0          = cudf::test::strings_column_wrapper(str1_elements, str1_elements + num_rows);
+
+  auto col1_data = random_values<int8_t>(num_rows);
+  auto col2_data = random_values<int16_t>(num_rows);
+  auto col3_data = random_values<int32_t>(num_rows);
+  auto col4_data = random_values<uint64_t>(num_rows);
+  auto col5_data = random_values<float>(num_rows);
+  auto col6_data = random_values<double>(num_rows);
+
+  auto col1 = cudf::test::fixed_width_column_wrapper<int8_t>(col1_data.begin(), col1_data.end());
+  auto col2 = cudf::test::fixed_width_column_wrapper<int16_t>(col2_data.begin(), col2_data.end());
+  auto col3 = cudf::test::fixed_width_column_wrapper<int32_t>(col3_data.begin(), col3_data.end());
+  auto col4 = cudf::test::fixed_width_column_wrapper<uint64_t>(col4_data.begin(), col4_data.end());
+  auto col5 = cudf::test::fixed_width_column_wrapper<float>(col5_data.begin(), col5_data.end());
+  auto col6 = cudf::test::fixed_width_column_wrapper<double>(col6_data.begin(), col6_data.end());
+
+  // mixed length strings
+  auto str2_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
+    char buf[30];
+    sprintf(buf, "%d", i);
+    return std::string(buf);
+  });
+  auto col7          = cudf::test::strings_column_wrapper(str2_elements, str2_elements + num_rows);
+
+  auto const expected = table_view{{col0, col1, col2, col3, col4, col5, col6, col7}};
+
+  auto const filepath = temp_env->get_temp_filepath("CheckColumnOffsetIndex.parquet");
+  const cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
+      .write_v2_headers(is_v2)
+      .max_page_size_rows(20000);
+  cudf::io::write_parquet(out_opts);
+
+  auto const source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::detail::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+
+  for (size_t r = 0; r < fmd.row_groups.size(); r++) {
+    auto const& rg = fmd.row_groups[r];
+    for (size_t c = 0; c < rg.columns.size(); c++) {
+      auto const& chunk = rg.columns[c];
+
+      // loop over offsets, read each page header, make sure it's a data page and that
+      // the first row index is correct
+      auto const oi = read_offset_index(source, chunk);
+
+      int64_t num_vals = 0;
+      for (size_t o = 0; o < oi.page_locations.size(); o++) {
+        auto const& page_loc = oi.page_locations[o];
+        auto const ph        = read_page_header(source, page_loc);
+        EXPECT_EQ(ph.type, expected_hdr_type);
+        EXPECT_EQ(page_loc.first_row_index, num_vals);
+        num_vals += is_v2 ? ph.data_page_header_v2.num_rows : ph.data_page_header.num_values;
+      }
+
+      // loop over page stats from the column index. check that stats.min <= page.min
+      // and stats.max >= page.max for each page.
+      auto const ci    = read_column_index(source, chunk);
+      auto const stats = get_statistics(chunk);
+
+      ASSERT_TRUE(stats.min_value.has_value());
+      ASSERT_TRUE(stats.max_value.has_value());
+      ASSERT_TRUE(ci.null_counts.has_value());
+
+      // schema indexing starts at 1
+      auto const ptype = fmd.schema[c + 1].type;
+      auto const ctype = fmd.schema[c + 1].converted_type;
+      for (size_t p = 0; p < ci.min_values.size(); p++) {
+        // null_pages should always be false
+        EXPECT_FALSE(ci.null_pages[p]);
+        // null_counts should always be 0
+        EXPECT_EQ(ci.null_counts.value()[p], 0);
+        EXPECT_TRUE(compare_binary(stats.min_value.value(), ci.min_values[p], ptype, ctype) <= 0);
+      }
+      for (size_t p = 0; p < ci.max_values.size(); p++)
+        EXPECT_TRUE(compare_binary(stats.max_value.value(), ci.max_values[p], ptype, ctype) >= 0);
+    }
+  }
+}
+
+TEST_P(ParquetV2Test, CheckColumnOffsetIndexNulls)
+{
+  constexpr auto num_rows      = 100000;
+  auto const is_v2             = GetParam();
+  auto const expected_hdr_type = is_v2 ? cudf::io::parquet::detail::PageType::DATA_PAGE_V2
+                                       : cudf::io::parquet::detail::PageType::DATA_PAGE;
+
+  // fixed length strings
+  auto str1_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
+    char buf[30];
+    sprintf(buf, "%012d", i);
+    return std::string(buf);
+  });
+  auto col0          = cudf::test::strings_column_wrapper(str1_elements, str1_elements + num_rows);
+
+  auto col1_data = random_values<int8_t>(num_rows);
+  auto col2_data = random_values<int16_t>(num_rows);
+  auto col3_data = random_values<int32_t>(num_rows);
+  auto col4_data = random_values<uint64_t>(num_rows);
+  auto col5_data = random_values<float>(num_rows);
+  auto col6_data = random_values<double>(num_rows);
+
+  auto valids =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; });
+
+  // add null values for all but first column
+  auto col1 =
+    cudf::test::fixed_width_column_wrapper<int8_t>(col1_data.begin(), col1_data.end(), valids);
+  auto col2 =
+    cudf::test::fixed_width_column_wrapper<int16_t>(col2_data.begin(), col2_data.end(), valids);
+  auto col3 =
+    cudf::test::fixed_width_column_wrapper<int32_t>(col3_data.begin(), col3_data.end(), valids);
+  auto col4 =
+    cudf::test::fixed_width_column_wrapper<uint64_t>(col4_data.begin(), col4_data.end(), valids);
+  auto col5 =
+    cudf::test::fixed_width_column_wrapper<float>(col5_data.begin(), col5_data.end(), valids);
+  auto col6 =
+    cudf::test::fixed_width_column_wrapper<double>(col6_data.begin(), col6_data.end(), valids);
+
+  // mixed length strings
+  auto str2_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
+    char buf[30];
+    sprintf(buf, "%d", i);
+    return std::string(buf);
+  });
+  auto col7 = cudf::test::strings_column_wrapper(str2_elements, str2_elements + num_rows, valids);
+
+  auto expected = table_view{{col0, col1, col2, col3, col4, col5, col6, col7}};
+
+  auto const filepath = temp_env->get_temp_filepath("CheckColumnOffsetIndexNulls.parquet");
+  const cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
+      .write_v2_headers(is_v2)
+      .max_page_size_rows(20000);
+  cudf::io::write_parquet(out_opts);
+
+  auto const source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::detail::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+
+  for (size_t r = 0; r < fmd.row_groups.size(); r++) {
+    auto const& rg = fmd.row_groups[r];
+    for (size_t c = 0; c < rg.columns.size(); c++) {
+      auto const& chunk = rg.columns[c];
+
+      // loop over offsets, read each page header, make sure it's a data page and that
+      // the first row index is correct
+      auto const oi = read_offset_index(source, chunk);
+
+      int64_t num_vals = 0;
+      for (size_t o = 0; o < oi.page_locations.size(); o++) {
+        auto const& page_loc = oi.page_locations[o];
+        auto const ph        = read_page_header(source, page_loc);
+        EXPECT_EQ(ph.type, expected_hdr_type);
+        EXPECT_EQ(page_loc.first_row_index, num_vals);
+        num_vals += is_v2 ? ph.data_page_header_v2.num_rows : ph.data_page_header.num_values;
+      }
+
+      // loop over page stats from the column index. check that stats.min <= page.min
+      // and stats.max >= page.max for each page.
+      auto const ci    = read_column_index(source, chunk);
+      auto const stats = get_statistics(chunk);
+
+      // should be half nulls, except no nulls in column 0
+      ASSERT_TRUE(stats.min_value.has_value());
+      ASSERT_TRUE(stats.max_value.has_value());
+      ASSERT_TRUE(stats.null_count.has_value());
+      EXPECT_EQ(stats.null_count.value(), c == 0 ? 0 : num_rows / 2);
+      ASSERT_TRUE(ci.null_counts.has_value());
+
+      // schema indexing starts at 1
+      auto const ptype = fmd.schema[c + 1].type;
+      auto const ctype = fmd.schema[c + 1].converted_type;
+      for (size_t p = 0; p < ci.min_values.size(); p++) {
+        EXPECT_FALSE(ci.null_pages[p]);
+        if (c > 0) {  // first column has no nulls
+          EXPECT_GT(ci.null_counts.value()[p], 0);
+        } else {
+          EXPECT_EQ(ci.null_counts.value()[p], 0);
+        }
+        EXPECT_TRUE(compare_binary(stats.min_value.value(), ci.min_values[p], ptype, ctype) <= 0);
+      }
+      for (size_t p = 0; p < ci.max_values.size(); p++) {
+        EXPECT_TRUE(compare_binary(stats.max_value.value(), ci.max_values[p], ptype, ctype) >= 0);
+      }
+    }
+  }
+}
+
+TEST_P(ParquetV2Test, CheckColumnOffsetIndexNullColumn)
+{
+  constexpr auto num_rows      = 100000;
+  auto const is_v2             = GetParam();
+  auto const expected_hdr_type = is_v2 ? cudf::io::parquet::detail::PageType::DATA_PAGE_V2
+                                       : cudf::io::parquet::detail::PageType::DATA_PAGE;
+
+  // fixed length strings
+  auto str1_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
+    char buf[30];
+    sprintf(buf, "%012d", i);
+    return std::string(buf);
+  });
+  auto col0          = cudf::test::strings_column_wrapper(str1_elements, str1_elements + num_rows);
+
+  auto col1_data = random_values<int32_t>(num_rows);
+  auto col2_data = random_values<int32_t>(num_rows);
+
+  // col1 is all nulls
+  auto valids = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return false; });
+  auto col1 =
+    cudf::test::fixed_width_column_wrapper<int32_t>(col1_data.begin(), col1_data.end(), valids);
+  auto col2 = cudf::test::fixed_width_column_wrapper<int32_t>(col2_data.begin(), col2_data.end());
+
+  // mixed length strings
+  auto str2_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
+    char buf[30];
+    sprintf(buf, "%d", i);
+    return std::string(buf);
+  });
+  auto col3          = cudf::test::strings_column_wrapper(str2_elements, str2_elements + num_rows);
+
+  auto expected = table_view{{col0, col1, col2, col3}};
+
+  auto const filepath = temp_env->get_temp_filepath("CheckColumnOffsetIndexNullColumn.parquet");
+  const cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
+      .write_v2_headers(is_v2)
+      .max_page_size_rows(20000);
+  cudf::io::write_parquet(out_opts);
+
+  auto const source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::detail::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+
+  for (size_t r = 0; r < fmd.row_groups.size(); r++) {
+    auto const& rg = fmd.row_groups[r];
+    for (size_t c = 0; c < rg.columns.size(); c++) {
+      auto const& chunk = rg.columns[c];
+
+      // loop over offsets, read each page header, make sure it's a data page and that
+      // the first row index is correct
+      auto const oi = read_offset_index(source, chunk);
+
+      int64_t num_vals = 0;
+      for (size_t o = 0; o < oi.page_locations.size(); o++) {
+        auto const& page_loc = oi.page_locations[o];
+        auto const ph        = read_page_header(source, page_loc);
+        EXPECT_EQ(ph.type, expected_hdr_type);
+        EXPECT_EQ(page_loc.first_row_index, num_vals);
+        num_vals += is_v2 ? ph.data_page_header_v2.num_rows : ph.data_page_header.num_values;
+      }
+
+      // loop over page stats from the column index. check that stats.min <= page.min
+      // and stats.max >= page.max for each non-empty page.
+      auto const ci    = read_column_index(source, chunk);
+      auto const stats = get_statistics(chunk);
+
+      // there should be no nulls except column 1 which is all nulls
+      if (c != 1) {
+        ASSERT_TRUE(stats.min_value.has_value());
+        ASSERT_TRUE(stats.max_value.has_value());
+      }
+      ASSERT_TRUE(stats.null_count.has_value());
+      EXPECT_EQ(stats.null_count.value(), c == 1 ? num_rows : 0);
+      ASSERT_TRUE(ci.null_counts.has_value());
+
+      // schema indexing starts at 1
+      auto const ptype = fmd.schema[c + 1].type;
+      auto const ctype = fmd.schema[c + 1].converted_type;
+      for (size_t p = 0; p < ci.min_values.size(); p++) {
+        // check tnat null_pages is true for column 1
+        if (c == 1) {
+          EXPECT_TRUE(ci.null_pages[p]);
+          EXPECT_GT(ci.null_counts.value()[p], 0);
+        }
+        if (not ci.null_pages[p]) {
+          EXPECT_EQ(ci.null_counts.value()[p], 0);
+          EXPECT_TRUE(compare_binary(stats.min_value.value(), ci.min_values[p], ptype, ctype) <= 0);
+        }
+      }
+      for (size_t p = 0; p < ci.max_values.size(); p++) {
+        if (not ci.null_pages[p]) {
+          EXPECT_TRUE(compare_binary(stats.max_value.value(), ci.max_values[p], ptype, ctype) >= 0);
+        }
+      }
+    }
+  }
+}
+
+TEST_P(ParquetV2Test, CheckColumnOffsetIndexStruct)
+{
+  auto const is_v2             = GetParam();
+  auto const expected_hdr_type = is_v2 ? cudf::io::parquet::detail::PageType::DATA_PAGE_V2
+                                       : cudf::io::parquet::detail::PageType::DATA_PAGE;
+
+  auto c0 = testdata::ascending<uint32_t>();
+
+  auto sc0 = testdata::ascending<cudf::string_view>();
+  auto sc1 = testdata::descending<int32_t>();
+  auto sc2 = testdata::unordered<int64_t>();
+
+  std::vector<std::unique_ptr<cudf::column>> struct_children;
+  struct_children.push_back(sc0.release());
+  struct_children.push_back(sc1.release());
+  struct_children.push_back(sc2.release());
+  cudf::test::structs_column_wrapper c1(std::move(struct_children));
+
+  auto listgen = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return i % 2 == 0 ? i / 2 : num_ordered_rows - (i / 2); });
+  auto list =
+    cudf::test::fixed_width_column_wrapper<int32_t>(listgen, listgen + 2 * num_ordered_rows);
+  auto offgen = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i * 2; });
+  auto offsets =
+    cudf::test::fixed_width_column_wrapper<int32_t>(offgen, offgen + num_ordered_rows + 1);
+
+  auto c2 = cudf::make_lists_column(num_ordered_rows, offsets.release(), list.release(), 0, {});
+
+  table_view expected({c0, c1, *c2});
+
+  auto const filepath = temp_env->get_temp_filepath("CheckColumnOffsetIndexStruct.parquet");
+  const cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
+      .write_v2_headers(is_v2)
+      .max_page_size_rows(page_size_for_ordered_tests);
+  cudf::io::write_parquet(out_opts);
+
+  auto const source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::detail::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+
+  // hard coded schema indices.
+  // TODO find a way to do this without magic
+  size_t const colidxs[] = {1, 3, 4, 5, 8};
+  for (size_t r = 0; r < fmd.row_groups.size(); r++) {
+    auto const& rg = fmd.row_groups[r];
+    for (size_t c = 0; c < rg.columns.size(); c++) {
+      size_t colidx     = colidxs[c];
+      auto const& chunk = rg.columns[c];
+
+      // loop over offsets, read each page header, make sure it's a data page and that
+      // the first row index is correct
+      auto const oi = read_offset_index(source, chunk);
+
+      int64_t num_vals = 0;
+      for (size_t o = 0; o < oi.page_locations.size(); o++) {
+        auto const& page_loc = oi.page_locations[o];
+        auto const ph        = read_page_header(source, page_loc);
+        EXPECT_EQ(ph.type, expected_hdr_type);
+        EXPECT_EQ(page_loc.first_row_index, num_vals);
+        // last column has 2 values per row
+        num_vals += is_v2 ? ph.data_page_header_v2.num_rows
+                          : ph.data_page_header.num_values / (c == rg.columns.size() - 1 ? 2 : 1);
+      }
+
+      // loop over page stats from the column index. check that stats.min <= page.min
+      // and stats.max >= page.max for each page.
+      auto const ci    = read_column_index(source, chunk);
+      auto const stats = get_statistics(chunk);
+
+      ASSERT_TRUE(stats.min_value.has_value());
+      ASSERT_TRUE(stats.max_value.has_value());
+
+      auto const ptype = fmd.schema[colidx].type;
+      auto const ctype = fmd.schema[colidx].converted_type;
+      for (size_t p = 0; p < ci.min_values.size(); p++) {
+        EXPECT_TRUE(compare_binary(stats.min_value.value(), ci.min_values[p], ptype, ctype) <= 0);
+      }
+      for (size_t p = 0; p < ci.max_values.size(); p++) {
+        EXPECT_TRUE(compare_binary(stats.max_value.value(), ci.max_values[p], ptype, ctype) >= 0);
+      }
+    }
+  }
+}
+
+TEST_P(ParquetV2Test, CheckColumnOffsetIndexStructNulls)
+{
+  auto const is_v2             = GetParam();
+  auto const expected_hdr_type = is_v2 ? cudf::io::parquet::detail::PageType::DATA_PAGE_V2
+                                       : cudf::io::parquet::detail::PageType::DATA_PAGE;
+
+  auto validity2 =
+    cudf::detail::make_counting_transform_iterator(0, [](cudf::size_type i) { return i % 2; });
+  auto validity3 = cudf::detail::make_counting_transform_iterator(
+    0, [](cudf::size_type i) { return (i % 3) != 0; });
+  auto validity4 = cudf::detail::make_counting_transform_iterator(
+    0, [](cudf::size_type i) { return (i % 4) != 0; });
+  auto validity5 = cudf::detail::make_counting_transform_iterator(
+    0, [](cudf::size_type i) { return (i % 5) != 0; });
+
+  auto c0 = testdata::ascending<uint32_t>();
+
+  auto col1_data = random_values<int32_t>(num_ordered_rows);
+  auto col2_data = random_values<int32_t>(num_ordered_rows);
+  auto col3_data = random_values<int32_t>(num_ordered_rows);
+
+  // col1 is all nulls
+  auto col1 =
+    cudf::test::fixed_width_column_wrapper<int32_t>(col1_data.begin(), col1_data.end(), validity2);
+  auto col2 =
+    cudf::test::fixed_width_column_wrapper<int32_t>(col2_data.begin(), col2_data.end(), validity3);
+  auto col3 =
+    cudf::test::fixed_width_column_wrapper<int32_t>(col2_data.begin(), col2_data.end(), validity4);
+
+  std::vector<std::unique_ptr<cudf::column>> struct_children;
+  struct_children.push_back(col1.release());
+  struct_children.push_back(col2.release());
+  struct_children.push_back(col3.release());
+  auto struct_validity = std::vector<bool>(validity5, validity5 + num_ordered_rows);
+  cudf::test::structs_column_wrapper c1(std::move(struct_children), struct_validity);
+  table_view expected({c0, c1});
+
+  auto const filepath = temp_env->get_temp_filepath("CheckColumnOffsetIndexStructNulls.parquet");
+  const cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
+      .write_v2_headers(is_v2)
+      .max_page_size_rows(page_size_for_ordered_tests);
+  cudf::io::write_parquet(out_opts);
+
+  auto const source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::detail::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+
+  // all struct columns will have num_ordered_rows / 5 nulls at level 0.
+  // col1 will have num_ordered_rows / 2 nulls total
+  // col2 will have num_ordered_rows / 3 nulls total
+  // col3 will have num_ordered_rows / 4 nulls total
+  int const null_mods[] = {0, 2, 3, 4};
+
+  for (size_t r = 0; r < fmd.row_groups.size(); r++) {
+    auto const& rg = fmd.row_groups[r];
+    for (size_t c = 0; c < rg.columns.size(); c++) {
+      auto const& chunk = rg.columns[c];
+
+      // loop over offsets, read each page header, make sure it's a data page and that
+      // the first row index is correct
+      auto const oi = read_offset_index(source, chunk);
+      auto const ci = read_column_index(source, chunk);
+
+      // check definition level histogram (repetition will not be present)
+      if (c != 0) {
+        ASSERT_TRUE(chunk.meta_data.size_statistics.has_value());
+        ASSERT_TRUE(chunk.meta_data.size_statistics->definition_level_histogram.has_value());
+        // there are no lists so there should be no repetition level histogram
+        EXPECT_FALSE(chunk.meta_data.size_statistics->repetition_level_histogram.has_value());
+        auto const& def_hist = chunk.meta_data.size_statistics->definition_level_histogram.value();
+        ASSERT_TRUE(def_hist.size() == 3L);
+        auto const l0_nulls    = num_ordered_rows / 5;
+        auto const l1_l0_nulls = num_ordered_rows / (5 * null_mods[c]);
+        auto const l1_nulls    = num_ordered_rows / null_mods[c] - l1_l0_nulls;
+        auto const l2_vals     = num_ordered_rows - l1_nulls - l0_nulls;
+        EXPECT_EQ(def_hist[0], l0_nulls);
+        EXPECT_EQ(def_hist[1], l1_nulls);
+        EXPECT_EQ(def_hist[2], l2_vals);
+      } else {
+        // column 0 has no lists and no nulls and no strings, so there should be no size stats
+        EXPECT_FALSE(chunk.meta_data.size_statistics.has_value());
+      }
+
+      int64_t num_vals = 0;
+
+      if (is_v2) { ASSERT_TRUE(ci.null_counts.has_value()); }
+      for (size_t o = 0; o < oi.page_locations.size(); o++) {
+        auto const& page_loc = oi.page_locations[o];
+        auto const ph        = read_page_header(source, page_loc);
+        EXPECT_EQ(ph.type, expected_hdr_type);
+        EXPECT_EQ(page_loc.first_row_index, num_vals);
+        num_vals += is_v2 ? ph.data_page_header_v2.num_rows : ph.data_page_header.num_values;
+        // check that null counts match
+        if (is_v2) { EXPECT_EQ(ci.null_counts.value()[o], ph.data_page_header_v2.num_nulls); }
+      }
+    }
+  }
+}
+
+TEST_P(ParquetV2Test, CheckColumnIndexListWithNulls)
+{
+  auto const is_v2             = GetParam();
+  auto const expected_hdr_type = is_v2 ? cudf::io::parquet::detail::PageType::DATA_PAGE_V2
+                                       : cudf::io::parquet::detail::PageType::DATA_PAGE;
+
+  using cudf::test::iterators::null_at;
+  using cudf::test::iterators::nulls_at;
+  using lcw = cudf::test::lists_column_wrapper<int32_t>;
+
+  // 4 nulls
+  // [NULL, 2, NULL]
+  // []
+  // [4, 5]
+  // NULL
+  // def histogram [1, 1, 2, 3]
+  // rep histogram [4, 3]
+  lcw col0{{{{1, 2, 3}, nulls_at({0, 2})}, {}, {4, 5}, {}}, null_at(3)};
+
+  // 4 nulls
+  // [[1, 2, 3], [], [4, 5], [], [0, 6, 0]]
+  // [[7, 8]]
+  // []
+  // [[]]
+  // def histogram [1, 3, 10]
+  // rep histogram [4, 4, 6]
+  lcw col1{{{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, {{7, 8}}, lcw{}, lcw{lcw{}}};
+
+  // 4 nulls
+  // [[1, 2, 3], [], [4, 5], NULL, [0, 6, 0]]
+  // [[7, 8]]
+  // []
+  // [[]]
+  // def histogram [1, 1, 2, 10]
+  // rep histogram [4, 4, 6]
+  lcw col2{{{{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, null_at(3)}, {{7, 8}}, lcw{}, lcw{lcw{}}};
+
+  // 6 nulls
+  // [[1, 2, 3], [], [4, 5], NULL, [NULL, 6, NULL]]
+  // [[7, 8]]
+  // []
+  // [[]]
+  // def histogram [1, 1, 2, 2, 8]
+  // rep histogram [4, 4, 6]
+  using dlcw = cudf::test::lists_column_wrapper<double>;
+  dlcw col3{{{{1., 2., 3.}, {}, {4., 5.}, {}, {{0., 6., 0.}, nulls_at({0, 2})}}, null_at(3)},
+            {{7., 8.}},
+            dlcw{},
+            dlcw{dlcw{}}};
+
+  // 4 nulls
+  // [[1, 2, 3], [], [4, 5], NULL, [0, 6, 0]]
+  // [[7, 8]]
+  // []
+  // NULL
+  // def histogram [1, 1, 1, 1, 10]
+  // rep histogram [4, 4, 6]
+  using ui16lcw = cudf::test::lists_column_wrapper<uint16_t>;
+  cudf::test::lists_column_wrapper<uint16_t> col4{
+    {{{{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, null_at(3)}, {{7, 8}}, ui16lcw{}, ui16lcw{ui16lcw{}}},
+    null_at(3)};
+
+  // 6 nulls
+  // [[1, 2, 3], [], [4, 5], NULL, [NULL, 6, NULL]]
+  // [[7, 8]]
+  // []
+  // NULL
+  // def histogram [1, 1, 1, 1, 2, 8]
+  // rep histogram [4, 4, 6]
+  lcw col5{{{{{1, 2, 3}, {}, {4, 5}, {}, {{0, 6, 0}, nulls_at({0, 2})}}, null_at(3)},
+            {{7, 8}},
+            lcw{},
+            lcw{lcw{}}},
+           null_at(3)};
+
+  // 4 nulls
+  // def histogram [1, 3, 9]
+  // rep histogram [4, 4, 5]
+  using strlcw = cudf::test::lists_column_wrapper<cudf::string_view>;
+  cudf::test::lists_column_wrapper<cudf::string_view> col6{
+    {{"Monday", "Monday", "Friday"}, {}, {"Monday", "Friday"}, {}, {"Sunday", "Funday"}},
+    {{"bee", "sting"}},
+    strlcw{},
+    strlcw{strlcw{}}};
+
+  // 5 nulls
+  // def histogram [1, 3, 1, 8]
+  // rep histogram [4, 4, 5]
+  using strlcw = cudf::test::lists_column_wrapper<cudf::string_view>;
+  cudf::test::lists_column_wrapper<cudf::string_view> col7{{{"Monday", "Monday", "Friday"},
+                                                            {},
+                                                            {{"Monday", "Friday"}, null_at(1)},
+                                                            {},
+                                                            {"Sunday", "Funday"}},
+                                                           {{"bee", "sting"}},
+                                                           strlcw{},
+                                                           strlcw{strlcw{}}};
+
+  // 11 nulls
+  // D   5   6   5  6        5  6  5      6 6
+  // R   0   3   3  3        1  3  3      2 3
+  // [[[NULL,2,NULL,4]], [[NULL,6,NULL], [8,9]]]
+  // D 2      6    6   6  6      2
+  // R 0      1    2   3  3      1
+  // [NULL, [[13],[14,15,16]],  NULL]
+  // D 2     3   2      4
+  // R 0     1   1      1
+  // [NULL, [], NULL, [[]]]
+  // D 0
+  // R 0
+  // NULL
+  // def histogram [1, 0, 4, 1, 1, 4, 9]
+  // rep histogram [4, 6, 2, 8]
+  lcw col8{{
+             {{{{1, 2, 3, 4}, nulls_at({0, 2})}}, {{{5, 6, 7}, nulls_at({0, 2})}, {8, 9}}},
+             {{{{10, 11}, {12}}, {{13}, {14, 15, 16}}, {{17, 18}}}, nulls_at({0, 2})},
+             {{lcw{lcw{}}, lcw{}, lcw{}, lcw{lcw{}}}, nulls_at({0, 2})},
+             lcw{lcw{lcw{}}},
+           },
+           null_at(3)};
+
+  table_view expected({col0, col1, col2, col3, col4, col5, col6, col7});
+
+  int64_t const expected_null_counts[]            = {4, 4, 4, 6, 4, 6, 4, 5, 11};
+  std::vector<int64_t> const expected_def_hists[] = {{1, 1, 2, 3},
+                                                     {1, 3, 10},
+                                                     {1, 1, 2, 10},
+                                                     {1, 1, 2, 2, 8},
+                                                     {1, 1, 1, 1, 10},
+                                                     {1, 1, 1, 1, 2, 8},
+                                                     {1, 3, 9},
+                                                     {1, 3, 1, 8},
+                                                     {1, 0, 4, 1, 1, 4, 9}};
+  std::vector<int64_t> const expected_rep_hists[] = {{4, 3},
+                                                     {4, 4, 6},
+                                                     {4, 4, 6},
+                                                     {4, 4, 6},
+                                                     {4, 4, 6},
+                                                     {4, 4, 6},
+                                                     {4, 4, 5},
+                                                     {4, 4, 5},
+                                                     {4, 6, 2, 8}};
+
+  auto const filepath = temp_env->get_temp_filepath("ColumnIndexListWithNulls.parquet");
+  auto out_opts = cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+                    .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
+                    .write_v2_headers(is_v2)
+                    .compression(cudf::io::compression_type::NONE);
+
+  cudf::io::write_parquet(out_opts);
+
+  auto const source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::detail::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+
+  for (size_t r = 0; r < fmd.row_groups.size(); r++) {
+    auto const& rg = fmd.row_groups[r];
+    for (size_t c = 0; c < rg.columns.size(); c++) {
+      auto const& chunk = rg.columns[c];
+
+      ASSERT_TRUE(chunk.meta_data.size_statistics.has_value());
+      ASSERT_TRUE(chunk.meta_data.size_statistics->definition_level_histogram.has_value());
+      ASSERT_TRUE(chunk.meta_data.size_statistics->repetition_level_histogram.has_value());
+      // there is only one page, so chunk stats should match the page stats
+      EXPECT_EQ(chunk.meta_data.size_statistics->definition_level_histogram.value(),
+                expected_def_hists[c]);
+      EXPECT_EQ(chunk.meta_data.size_statistics->repetition_level_histogram.value(),
+                expected_rep_hists[c]);
+      // only column 6 has string data
+      if (c == 6) {
+        ASSERT_TRUE(chunk.meta_data.size_statistics->unencoded_byte_array_data_bytes.has_value());
+        EXPECT_EQ(chunk.meta_data.size_statistics->unencoded_byte_array_data_bytes.value(), 50L);
+      } else if (c == 7) {
+        ASSERT_TRUE(chunk.meta_data.size_statistics->unencoded_byte_array_data_bytes.has_value());
+        EXPECT_EQ(chunk.meta_data.size_statistics->unencoded_byte_array_data_bytes.value(), 44L);
+      } else {
+        EXPECT_FALSE(chunk.meta_data.size_statistics->unencoded_byte_array_data_bytes.has_value());
+      }
+
+      // loop over offsets, read each page header, make sure it's a data page and that
+      // the first row index is correct
+      auto const oi = read_offset_index(source, chunk);
+
+      for (size_t o = 0; o < oi.page_locations.size(); o++) {
+        auto const& page_loc = oi.page_locations[o];
+        auto const ph        = read_page_header(source, page_loc);
+        EXPECT_EQ(ph.type, expected_hdr_type);
+        // check null counts in V2 header
+        if (is_v2) { EXPECT_EQ(ph.data_page_header_v2.num_nulls, expected_null_counts[c]); }
+      }
+
+      // check null counts in column chunk stats and page indexes
+      auto const ci    = read_column_index(source, chunk);
+      auto const stats = get_statistics(chunk);
+      EXPECT_EQ(stats.null_count, expected_null_counts[c]);
+
+      // should only be one page
+      EXPECT_FALSE(ci.null_pages[0]);
+      ASSERT_TRUE(ci.null_counts.has_value());
+      EXPECT_EQ(ci.null_counts.value()[0], expected_null_counts[c]);
+
+      ASSERT_TRUE(ci.definition_level_histogram.has_value());
+      EXPECT_EQ(ci.definition_level_histogram.value(), expected_def_hists[c]);
+
+      ASSERT_TRUE(ci.repetition_level_histogram.has_value());
+      EXPECT_EQ(ci.repetition_level_histogram.value(), expected_rep_hists[c]);
+
+      if (c == 6) {
+        ASSERT_TRUE(oi.unencoded_byte_array_data_bytes.has_value());
+        EXPECT_EQ(oi.unencoded_byte_array_data_bytes.value()[0], 50L);
+      } else if (c == 7) {
+        ASSERT_TRUE(oi.unencoded_byte_array_data_bytes.has_value());
+        EXPECT_EQ(oi.unencoded_byte_array_data_bytes.value()[0], 44L);
+      } else {
+        EXPECT_FALSE(oi.unencoded_byte_array_data_bytes.has_value());
+      }
+    }
+  }
+}
+
+TEST_P(ParquetV2Test, CheckEncodings)
+{
+  using cudf::io::parquet::detail::Encoding;
+  constexpr auto num_rows = 100'000;
+  auto const is_v2        = GetParam();
+
+  auto const validity = cudf::test::iterators::no_nulls();
+  // data should be PLAIN for v1, RLE for V2
+  auto col0_data =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) -> bool { return i % 2 == 0; });
+  // data should be PLAIN for v1, DELTA_BINARY_PACKED for v2
+  auto col1_data = random_values<int32_t>(num_rows);
+  // data should be PLAIN_DICTIONARY for v1, PLAIN and RLE_DICTIONARY for v2
+  auto col2_data = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return 1; });
+
+  cudf::test::fixed_width_column_wrapper<bool> col0{col0_data, col0_data + num_rows, validity};
+  column_wrapper<int32_t> col1{col1_data.begin(), col1_data.end(), validity};
+  column_wrapper<int32_t> col2{col2_data, col2_data + num_rows, validity};
+
+  auto expected = table_view{{col0, col1, col2}};
+
+  auto const filename = is_v2 ? "CheckEncodingsV2.parquet" : "CheckEncodingsV1.parquet";
+  auto filepath       = temp_env->get_temp_filepath(filename);
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .max_page_size_rows(num_rows)
+      .write_v2_headers(is_v2);
+  cudf::io::write_parquet(out_opts);
+
+  // make sure the expected encodings are present
+  auto contains = [](auto const& vec, auto const& enc) {
+    return std::find(vec.begin(), vec.end(), enc) != vec.end();
+  };
+
+  auto const source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::detail::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+  auto const& chunk0_enc = fmd.row_groups[0].columns[0].meta_data.encodings;
+  auto const& chunk1_enc = fmd.row_groups[0].columns[1].meta_data.encodings;
+  auto const& chunk2_enc = fmd.row_groups[0].columns[2].meta_data.encodings;
+  if (is_v2) {
+    // col0 should have RLE for rep/def and data
+    EXPECT_TRUE(chunk0_enc.size() == 1);
+    EXPECT_TRUE(contains(chunk0_enc, Encoding::RLE));
+    // col1 should have RLE for rep/def and DELTA_BINARY_PACKED for data
+    EXPECT_TRUE(chunk1_enc.size() == 2);
+    EXPECT_TRUE(contains(chunk1_enc, Encoding::RLE));
+    EXPECT_TRUE(contains(chunk1_enc, Encoding::DELTA_BINARY_PACKED));
+    // col2 should have RLE for rep/def, PLAIN for dict, and RLE_DICTIONARY for data
+    EXPECT_TRUE(chunk2_enc.size() == 3);
+    EXPECT_TRUE(contains(chunk2_enc, Encoding::RLE));
+    EXPECT_TRUE(contains(chunk2_enc, Encoding::PLAIN));
+    EXPECT_TRUE(contains(chunk2_enc, Encoding::RLE_DICTIONARY));
+  } else {
+    // col0 should have RLE for rep/def and PLAIN for data
+    EXPECT_TRUE(chunk0_enc.size() == 2);
+    EXPECT_TRUE(contains(chunk0_enc, Encoding::RLE));
+    EXPECT_TRUE(contains(chunk0_enc, Encoding::PLAIN));
+    // col1 should have RLE for rep/def and PLAIN for data
+    EXPECT_TRUE(chunk1_enc.size() == 2);
+    EXPECT_TRUE(contains(chunk1_enc, Encoding::RLE));
+    EXPECT_TRUE(contains(chunk1_enc, Encoding::PLAIN));
+    // col2 should have RLE for rep/def and PLAIN_DICTIONARY for data and dict
+    EXPECT_TRUE(chunk2_enc.size() == 2);
+    EXPECT_TRUE(contains(chunk2_enc, Encoding::RLE));
+    EXPECT_TRUE(contains(chunk2_enc, Encoding::PLAIN_DICTIONARY));
+  }
+}
diff --git a/cpp/tests/io/parquet_writer_test.cpp b/cpp/tests/io/parquet_writer_test.cpp
new file mode 100644
index 00000000000..51190b5de9e
--- /dev/null
+++ b/cpp/tests/io/parquet_writer_test.cpp
@@ -0,0 +1,1720 @@
+/*
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "parquet_common.hpp"
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/io_metadata_utilities.hpp>
+#include <cudf_test/iterator_utilities.hpp>
+#include <cudf_test/table_utilities.hpp>
+
+#include <cudf/io/data_sink.hpp>
+#include <cudf/io/parquet.hpp>
+#include <cudf/unary.hpp>
+
+#include <fstream>
+
+template <typename mask_op_t>
+void test_durations(mask_op_t mask_op)
+{
+  std::default_random_engine generator;
+  std::uniform_int_distribution<int> distribution_d(0, 30);
+  auto sequence_d = cudf::detail::make_counting_transform_iterator(
+    0, [&](auto i) { return distribution_d(generator); });
+
+  std::uniform_int_distribution<int> distribution_s(0, 86400);
+  auto sequence_s = cudf::detail::make_counting_transform_iterator(
+    0, [&](auto i) { return distribution_s(generator); });
+
+  std::uniform_int_distribution<int> distribution(0, 86400 * 1000);
+  auto sequence = cudf::detail::make_counting_transform_iterator(
+    0, [&](auto i) { return distribution(generator); });
+
+  auto mask = cudf::detail::make_counting_transform_iterator(0, mask_op);
+
+  constexpr auto num_rows = 100;
+  // Durations longer than a day are not exactly valid, but cudf should be able to round trip
+  auto durations_d = cudf::test::fixed_width_column_wrapper<cudf::duration_D, int64_t>(
+    sequence_d, sequence_d + num_rows, mask);
+  auto durations_s = cudf::test::fixed_width_column_wrapper<cudf::duration_s, int64_t>(
+    sequence_s, sequence_s + num_rows, mask);
+  auto durations_ms = cudf::test::fixed_width_column_wrapper<cudf::duration_ms, int64_t>(
+    sequence, sequence + num_rows, mask);
+  auto durations_us = cudf::test::fixed_width_column_wrapper<cudf::duration_us, int64_t>(
+    sequence, sequence + num_rows, mask);
+  auto durations_ns = cudf::test::fixed_width_column_wrapper<cudf::duration_ns, int64_t>(
+    sequence, sequence + num_rows, mask);
+
+  auto expected = table_view{{durations_d, durations_s, durations_ms, durations_us, durations_ns}};
+
+  auto filepath = temp_env->get_temp_filepath("Durations.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(in_opts);
+
+  auto durations_d_got =
+    cudf::cast(result.tbl->view().column(0), cudf::data_type{cudf::type_id::DURATION_DAYS});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(durations_d, durations_d_got->view());
+
+  auto durations_s_got =
+    cudf::cast(result.tbl->view().column(1), cudf::data_type{cudf::type_id::DURATION_SECONDS});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(durations_s, durations_s_got->view());
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(durations_ms, result.tbl->view().column(2));
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(durations_us, result.tbl->view().column(3));
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(durations_ns, result.tbl->view().column(4));
+}
+
+TEST_F(ParquetWriterTest, Durations)
+{
+  test_durations([](auto i) { return true; });
+  test_durations([](auto i) { return (i % 2) != 0; });
+  test_durations([](auto i) { return (i % 3) != 0; });
+  test_durations([](auto i) { return false; });
+}
+
+TEST_F(ParquetWriterTest, MultiIndex)
+{
+  constexpr auto num_rows = 100;
+
+  auto col0_data = random_values<int8_t>(num_rows);
+  auto col1_data = random_values<int16_t>(num_rows);
+  auto col2_data = random_values<int32_t>(num_rows);
+  auto col3_data = random_values<float>(num_rows);
+  auto col4_data = random_values<double>(num_rows);
+  auto validity  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
+
+  column_wrapper<int8_t> col0{col0_data.begin(), col0_data.end(), validity};
+  column_wrapper<int16_t> col1{col1_data.begin(), col1_data.end(), validity};
+  column_wrapper<int32_t> col2{col2_data.begin(), col2_data.end(), validity};
+  column_wrapper<float> col3{col3_data.begin(), col3_data.end(), validity};
+  column_wrapper<double> col4{col4_data.begin(), col4_data.end(), validity};
+
+  auto expected = table_view{{col0, col1, col2, col3, col4}};
+
+  cudf::io::table_input_metadata expected_metadata(expected);
+  expected_metadata.column_metadata[0].set_name("int8s");
+  expected_metadata.column_metadata[1].set_name("int16s");
+  expected_metadata.column_metadata[2].set_name("int32s");
+  expected_metadata.column_metadata[3].set_name("floats");
+  expected_metadata.column_metadata[4].set_name("doubles");
+
+  auto filepath = temp_env->get_temp_filepath("MultiIndex.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .metadata(expected_metadata)
+      .key_value_metadata(
+        {{{"pandas", "\"index_columns\": [\"int8s\", \"int16s\"], \"column1\": [\"int32s\"]"}}});
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+      .use_pandas_metadata(true)
+      .columns({"int32s", "floats", "doubles"});
+  auto result = cudf::io::read_parquet(in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
+}
+
+TEST_F(ParquetWriterTest, BufferSource)
+{
+  constexpr auto num_rows = 100 << 10;
+  auto const seq_col      = random_values<int>(num_rows);
+  auto const validity =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
+  column_wrapper<int> col{seq_col.begin(), seq_col.end(), validity};
+
+  auto const expected = table_view{{col}};
+
+  cudf::io::table_input_metadata expected_metadata(expected);
+  expected_metadata.column_metadata[0].set_name("col_other");
+
+  std::vector<char> out_buffer;
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer), expected)
+      .metadata(expected_metadata);
+  cudf::io::write_parquet(out_opts);
+
+  // host buffer
+  {
+    cudf::io::parquet_reader_options in_opts = cudf::io::parquet_reader_options::builder(
+      cudf::io::source_info(out_buffer.data(), out_buffer.size()));
+    auto const result = cudf::io::read_parquet(in_opts);
+
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+    cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
+  }
+
+  // device buffer
+  {
+    auto const d_input = cudf::detail::make_device_uvector_sync(
+      cudf::host_span<uint8_t const>{reinterpret_cast<uint8_t const*>(out_buffer.data()),
+                                     out_buffer.size()},
+      cudf::get_default_stream(),
+      rmm::mr::get_current_device_resource());
+    auto const d_buffer = cudf::device_span<std::byte const>(
+      reinterpret_cast<std::byte const*>(d_input.data()), d_input.size());
+    cudf::io::parquet_reader_options in_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info(d_buffer));
+    auto const result = cudf::io::read_parquet(in_opts);
+
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+    cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
+  }
+}
+
+TEST_F(ParquetWriterTest, ManyFragments)
+{
+  srand(31337);
+  auto const expected = create_random_fixed_table<int>(10, 6'000'000, false);
+
+  auto const filepath = temp_env->get_temp_filepath("ManyFragments.parquet");
+  cudf::io::parquet_writer_options const args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected)
+      .max_page_size_bytes(8 * 1024);
+  cudf::io::write_parquet(args);
+
+  cudf::io::parquet_reader_options const read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto const result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
+}
+
+TEST_F(ParquetWriterTest, NonNullable)
+{
+  srand(31337);
+  auto expected = create_random_fixed_table<int>(9, 9, false);
+
+  auto filepath = temp_env->get_temp_filepath("NonNullable.parquet");
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected);
+  cudf::io::write_parquet(args);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
+}
+
+TEST_F(ParquetWriterTest, Struct)
+{
+  // Struct<is_human:bool, Struct<names:string, ages:int>>
+
+  auto names = {"Samuel Vimes",
+                "Carrot Ironfoundersson",
+                "Angua von Uberwald",
+                "Cheery Littlebottom",
+                "Detritus",
+                "Mr Slant"};
+
+  // `Name` column has all valid values.
+  auto names_col = cudf::test::strings_column_wrapper{names.begin(), names.end()};
+
+  auto ages_col =
+    cudf::test::fixed_width_column_wrapper<int32_t>{{48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 0}};
+
+  auto struct_1 = cudf::test::structs_column_wrapper{{names_col, ages_col}, {1, 1, 1, 1, 0, 1}};
+
+  auto is_human_col = cudf::test::fixed_width_column_wrapper<bool>{
+    {true, true, false, false, false, false}, {1, 1, 0, 1, 1, 0}};
+
+  auto struct_2 =
+    cudf::test::structs_column_wrapper{{is_human_col, struct_1}, {0, 1, 1, 1, 1, 1}}.release();
+
+  auto expected = table_view({*struct_2});
+
+  auto filepath = temp_env->get_temp_filepath("Struct.parquet");
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
+  cudf::io::write_parquet(args);
+
+  cudf::io::parquet_reader_options read_args =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath));
+  cudf::io::read_parquet(read_args);
+}
+
+// custom data sink that supports device writes. uses plain file io.
+class custom_test_data_sink : public cudf::io::data_sink {
+ public:
+  explicit custom_test_data_sink(std::string const& filepath)
+  {
+    outfile_.open(filepath, std::ios::out | std::ios::binary | std::ios::trunc);
+    CUDF_EXPECTS(outfile_.is_open(), "Cannot open output file");
+  }
+
+  virtual ~custom_test_data_sink() { flush(); }
+
+  void host_write(void const* data, size_t size) override
+  {
+    outfile_.write(static_cast<char const*>(data), size);
+  }
+
+  [[nodiscard]] bool supports_device_write() const override { return true; }
+
+  void device_write(void const* gpu_data, size_t size, rmm::cuda_stream_view stream) override
+  {
+    this->device_write_async(gpu_data, size, stream).get();
+  }
+
+  std::future<void> device_write_async(void const* gpu_data,
+                                       size_t size,
+                                       rmm::cuda_stream_view stream) override
+  {
+    return std::async(std::launch::deferred, [=] {
+      char* ptr = nullptr;
+      CUDF_CUDA_TRY(cudaMallocHost(&ptr, size));
+      CUDF_CUDA_TRY(cudaMemcpyAsync(ptr, gpu_data, size, cudaMemcpyDefault, stream.value()));
+      stream.synchronize();
+      outfile_.write(ptr, size);
+      CUDF_CUDA_TRY(cudaFreeHost(ptr));
+    });
+  }
+
+  void flush() override { outfile_.flush(); }
+
+  size_t bytes_written() override { return outfile_.tellp(); }
+
+ private:
+  std::ofstream outfile_;
+};
+
+TEST_F(ParquetWriterTest, CustomDataSink)
+{
+  auto filepath = temp_env->get_temp_filepath("CustomDataSink.parquet");
+  custom_test_data_sink custom_sink(filepath);
+
+  srand(31337);
+  auto expected = create_random_fixed_table<int>(5, 10, false);
+
+  // write out using the custom sink
+  {
+    cudf::io::parquet_writer_options args =
+      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
+    cudf::io::write_parquet(args);
+  }
+
+  // write out using a memmapped sink
+  std::vector<char> buf_sink;
+  {
+    cudf::io::parquet_writer_options args =
+      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&buf_sink}, *expected);
+    cudf::io::write_parquet(args);
+  }
+
+  // read them back in and make sure everything matches
+
+  cudf::io::parquet_reader_options custom_args =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto custom_tbl = cudf::io::read_parquet(custom_args);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
+
+  cudf::io::parquet_reader_options buf_args = cudf::io::parquet_reader_options::builder(
+    cudf::io::source_info{buf_sink.data(), buf_sink.size()});
+  auto buf_tbl = cudf::io::read_parquet(buf_args);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(buf_tbl.tbl->view(), expected->view());
+}
+
+TEST_F(ParquetWriterTest, DeviceWriteLargeishFile)
+{
+  auto filepath = temp_env->get_temp_filepath("DeviceWriteLargeishFile.parquet");
+  custom_test_data_sink custom_sink(filepath);
+
+  // exercises multiple rowgroups
+  srand(31337);
+  auto expected = create_random_fixed_table<int>(4, 4 * 1024 * 1024, false);
+
+  // write out using the custom sink (which uses device writes)
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
+  cudf::io::write_parquet(args);
+
+  cudf::io::parquet_reader_options custom_args =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto custom_tbl = cudf::io::read_parquet(custom_args);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
+}
+
+TEST_F(ParquetWriterTest, PartitionedWrite)
+{
+  auto source = create_compressible_fixed_table<int>(16, 4 * 1024 * 1024, 1000, false);
+
+  auto filepath1 = temp_env->get_temp_filepath("PartitionedWrite1.parquet");
+  auto filepath2 = temp_env->get_temp_filepath("PartitionedWrite2.parquet");
+
+  auto partition1 = cudf::io::partition_info{10, 1024 * 1024};
+  auto partition2 = cudf::io::partition_info{20 * 1024 + 7, 3 * 1024 * 1024};
+
+  auto expected1 =
+    cudf::slice(*source, {partition1.start_row, partition1.start_row + partition1.num_rows});
+  auto expected2 =
+    cudf::slice(*source, {partition2.start_row, partition2.start_row + partition2.num_rows});
+
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(
+      cudf::io::sink_info(std::vector<std::string>{filepath1, filepath2}), *source)
+      .partitions({partition1, partition2})
+      .compression(cudf::io::compression_type::NONE);
+  cudf::io::write_parquet(args);
+
+  auto result1 = cudf::io::read_parquet(
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath1)));
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected1, result1.tbl->view());
+
+  auto result2 = cudf::io::read_parquet(
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath2)));
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected2, result2.tbl->view());
+}
+
+template <typename T>
+std::string create_parquet_file(int num_cols)
+{
+  srand(31337);
+  auto const table = create_random_fixed_table<T>(num_cols, 10, true);
+  auto const filepath =
+    temp_env->get_temp_filepath(typeid(T).name() + std::to_string(num_cols) + ".parquet");
+  cudf::io::parquet_writer_options const out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, table->view());
+  cudf::io::write_parquet(out_opts);
+  return filepath;
+}
+
+TEST_F(ParquetWriterTest, MultipleMismatchedSources)
+{
+  auto const int5file = create_parquet_file<int>(5);
+  {
+    auto const float5file = create_parquet_file<float>(5);
+    std::vector<std::string> files{int5file, float5file};
+    cudf::io::parquet_reader_options const read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{files});
+    EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error);
+  }
+  {
+    auto const int10file = create_parquet_file<int>(10);
+    std::vector<std::string> files{int5file, int10file};
+    cudf::io::parquet_reader_options const read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{files});
+    EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error);
+  }
+}
+
+TEST_F(ParquetWriterTest, Slice)
+{
+  auto col =
+    cudf::test::fixed_width_column_wrapper<int>{{1, 2, 3, 4, 5}, {true, true, true, false, true}};
+  std::vector<cudf::size_type> indices{2, 5};
+  std::vector<cudf::column_view> result = cudf::slice(col, indices);
+  cudf::table_view tbl{result};
+
+  auto filepath = temp_env->get_temp_filepath("Slice.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl);
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto read_table = cudf::io::read_parquet(in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(read_table.tbl->view(), tbl);
+}
+
+TEST_F(ParquetWriterTest, DecimalWrite)
+{
+  constexpr cudf::size_type num_rows = 500;
+  auto seq_col0                      = random_values<int32_t>(num_rows);
+  auto seq_col1                      = random_values<int64_t>(num_rows);
+
+  auto valids =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; });
+
+  auto col0 = cudf::test::fixed_point_column_wrapper<int32_t>{
+    seq_col0.begin(), seq_col0.end(), valids, numeric::scale_type{5}};
+  auto col1 = cudf::test::fixed_point_column_wrapper<int64_t>{
+    seq_col1.begin(), seq_col1.end(), valids, numeric::scale_type{-9}};
+
+  auto table = table_view({col0, col1});
+
+  auto filepath = temp_env->get_temp_filepath("DecimalWrite.parquet");
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, table);
+
+  cudf::io::table_input_metadata expected_metadata(table);
+
+  // verify failure if too small a precision is given
+  expected_metadata.column_metadata[0].set_decimal_precision(7);
+  expected_metadata.column_metadata[1].set_decimal_precision(1);
+  args.set_metadata(expected_metadata);
+  EXPECT_THROW(cudf::io::write_parquet(args), cudf::logic_error);
+
+  // verify success if equal precision is given
+  expected_metadata.column_metadata[0].set_decimal_precision(7);
+  expected_metadata.column_metadata[1].set_decimal_precision(9);
+  args.set_metadata(std::move(expected_metadata));
+  cudf::io::write_parquet(args);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, table);
+}
+
+TEST_F(ParquetWriterTest, RowGroupSizeInvalid)
+{
+  auto const unused_table = std::make_unique<table>();
+  std::vector<char> out_buffer;
+
+  EXPECT_THROW(cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer),
+                                                         unused_table->view())
+                 .row_group_size_rows(0),
+               cudf::logic_error);
+  EXPECT_THROW(cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer),
+                                                         unused_table->view())
+                 .max_page_size_rows(0),
+               cudf::logic_error);
+  EXPECT_THROW(cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer),
+                                                         unused_table->view())
+                 .row_group_size_bytes(3 << 8),
+               cudf::logic_error);
+  EXPECT_THROW(cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer),
+                                                         unused_table->view())
+                 .max_page_size_bytes(3 << 8),
+               cudf::logic_error);
+  EXPECT_THROW(cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer),
+                                                         unused_table->view())
+                 .max_page_size_bytes(0xFFFF'FFFFUL),
+               cudf::logic_error);
+
+  EXPECT_THROW(cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer))
+                 .row_group_size_rows(0),
+               cudf::logic_error);
+  EXPECT_THROW(cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer))
+                 .max_page_size_rows(0),
+               cudf::logic_error);
+  EXPECT_THROW(cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer))
+                 .row_group_size_bytes(3 << 8),
+               cudf::logic_error);
+  EXPECT_THROW(cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer))
+                 .max_page_size_bytes(3 << 8),
+               cudf::logic_error);
+  EXPECT_THROW(cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer))
+                 .max_page_size_bytes(0xFFFF'FFFFUL),
+               cudf::logic_error);
+}
+
+TEST_F(ParquetWriterTest, RowGroupPageSizeMatch)
+{
+  auto const unused_table = std::make_unique<table>();
+  std::vector<char> out_buffer;
+
+  auto options = cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer),
+                                                           unused_table->view())
+                   .row_group_size_bytes(128 * 1024)
+                   .max_page_size_bytes(512 * 1024)
+                   .row_group_size_rows(10000)
+                   .max_page_size_rows(20000)
+                   .build();
+  EXPECT_EQ(options.get_row_group_size_bytes(), options.get_max_page_size_bytes());
+  EXPECT_EQ(options.get_row_group_size_rows(), options.get_max_page_size_rows());
+}
+
+TEST_F(ParquetWriterTest, EmptyList)
+{
+  auto L1 = cudf::make_lists_column(0,
+                                    cudf::make_empty_column(cudf::data_type(cudf::type_id::INT32)),
+                                    cudf::make_empty_column(cudf::data_type{cudf::type_id::INT64}),
+                                    0,
+                                    {});
+  auto L0 = cudf::make_lists_column(
+    3, cudf::test::fixed_width_column_wrapper<int32_t>{0, 0, 0, 0}.release(), std::move(L1), 0, {});
+
+  auto filepath = temp_env->get_temp_filepath("EmptyList.parquet");
+  cudf::io::write_parquet(cudf::io::parquet_writer_options_builder(cudf::io::sink_info(filepath),
+                                                                   cudf::table_view({*L0})));
+
+  auto result = cudf::io::read_parquet(
+    cudf::io::parquet_reader_options_builder(cudf::io::source_info(filepath)));
+
+  using lcw     = cudf::test::lists_column_wrapper<int64_t>;
+  auto expected = lcw{lcw{}, lcw{}, lcw{}};
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), expected);
+}
+
+TEST_F(ParquetWriterTest, DeepEmptyList)
+{
+  // Make a list column LLLi st only L is valid and LLi are all null. This tests whether we can
+  // handle multiple nullptr offsets
+
+  auto L2 = cudf::make_lists_column(0,
+                                    cudf::make_empty_column(cudf::data_type(cudf::type_id::INT32)),
+                                    cudf::make_empty_column(cudf::data_type{cudf::type_id::INT64}),
+                                    0,
+                                    {});
+  auto L1 = cudf::make_lists_column(
+    0, cudf::make_empty_column(cudf::data_type(cudf::type_id::INT32)), std::move(L2), 0, {});
+  auto L0 = cudf::make_lists_column(
+    3, cudf::test::fixed_width_column_wrapper<int32_t>{0, 0, 0, 0}.release(), std::move(L1), 0, {});
+
+  auto filepath = temp_env->get_temp_filepath("DeepEmptyList.parquet");
+  cudf::io::write_parquet(cudf::io::parquet_writer_options_builder(cudf::io::sink_info(filepath),
+                                                                   cudf::table_view({*L0})));
+
+  auto result = cudf::io::read_parquet(
+    cudf::io::parquet_reader_options_builder(cudf::io::source_info(filepath)));
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), *L0);
+}
+
+TEST_F(ParquetWriterTest, EmptyListWithStruct)
+{
+  auto L2 = cudf::make_lists_column(0,
+                                    cudf::make_empty_column(cudf::data_type(cudf::type_id::INT32)),
+                                    cudf::make_empty_column(cudf::data_type{cudf::type_id::INT64}),
+                                    0,
+                                    {});
+
+  auto children = std::vector<std::unique_ptr<cudf::column>>{};
+  children.push_back(std::move(L2));
+  auto S2 = cudf::make_structs_column(0, std::move(children), 0, {});
+  auto L1 = cudf::make_lists_column(
+    0, cudf::make_empty_column(cudf::data_type(cudf::type_id::INT32)), std::move(S2), 0, {});
+  auto L0 = cudf::make_lists_column(
+    3, cudf::test::fixed_width_column_wrapper<int32_t>{0, 0, 0, 0}.release(), std::move(L1), 0, {});
+
+  auto filepath = temp_env->get_temp_filepath("EmptyListWithStruct.parquet");
+  cudf::io::write_parquet(cudf::io::parquet_writer_options_builder(cudf::io::sink_info(filepath),
+                                                                   cudf::table_view({*L0})));
+  auto result = cudf::io::read_parquet(
+    cudf::io::parquet_reader_options_builder(cudf::io::source_info(filepath)));
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), *L0);
+}
+
+TEST_F(ParquetWriterTest, CheckPageRows)
+{
+  auto sequence = thrust::make_counting_iterator(0);
+  auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
+
+  constexpr auto page_rows = 5000;
+  constexpr auto num_rows  = 2 * page_rows;
+  column_wrapper<int> col(sequence, sequence + num_rows, validity);
+
+  auto expected = table_view{{col}};
+
+  auto const filepath = temp_env->get_temp_filepath("CheckPageRows.parquet");
+  const cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .max_page_size_rows(page_rows);
+  cudf::io::write_parquet(out_opts);
+
+  // check first page header and make sure it has only page_rows values
+  auto const source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::detail::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+  ASSERT_GT(fmd.row_groups.size(), 0);
+  ASSERT_EQ(fmd.row_groups[0].columns.size(), 1);
+  auto const& first_chunk = fmd.row_groups[0].columns[0].meta_data;
+  ASSERT_GT(first_chunk.data_page_offset, 0);
+
+  // read first data page header.  sizeof(PageHeader) is not exact, but the thrift encoded
+  // version should be smaller than size of the struct.
+  auto const ph = read_page_header(
+    source, {first_chunk.data_page_offset, sizeof(cudf::io::parquet::detail::PageHeader), 0});
+
+  EXPECT_EQ(ph.data_page_header.num_values, page_rows);
+}
+
+TEST_F(ParquetWriterTest, CheckPageRowsAdjusted)
+{
+  // enough for a few pages with the default 20'000 rows/page
+  constexpr auto rows_per_page = 20'000;
+  constexpr auto num_rows      = 3 * rows_per_page;
+  const std::string s1(32, 'a');
+  auto col0_elements =
+    cudf::detail::make_counting_transform_iterator(0, [&](auto i) { return s1; });
+  auto col0 = cudf::test::strings_column_wrapper(col0_elements, col0_elements + num_rows);
+
+  auto const expected = table_view{{col0}};
+
+  auto const filepath = temp_env->get_temp_filepath("CheckPageRowsAdjusted.parquet");
+  const cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .max_page_size_rows(rows_per_page);
+  cudf::io::write_parquet(out_opts);
+
+  // check first page header and make sure it has only page_rows values
+  auto const source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::detail::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+  ASSERT_GT(fmd.row_groups.size(), 0);
+  ASSERT_EQ(fmd.row_groups[0].columns.size(), 1);
+  auto const& first_chunk = fmd.row_groups[0].columns[0].meta_data;
+  ASSERT_GT(first_chunk.data_page_offset, 0);
+
+  // read first data page header.  sizeof(PageHeader) is not exact, but the thrift encoded
+  // version should be smaller than size of the struct.
+  auto const ph = read_page_header(
+    source, {first_chunk.data_page_offset, sizeof(cudf::io::parquet::detail::PageHeader), 0});
+
+  EXPECT_LE(ph.data_page_header.num_values, rows_per_page);
+}
+
+TEST_F(ParquetWriterTest, CheckPageRowsTooSmall)
+{
+  constexpr auto rows_per_page = 1'000;
+  constexpr auto fragment_size = 5'000;
+  constexpr auto num_rows      = 3 * rows_per_page;
+  const std::string s1(32, 'a');
+  auto col0_elements =
+    cudf::detail::make_counting_transform_iterator(0, [&](auto i) { return s1; });
+  auto col0 = cudf::test::strings_column_wrapper(col0_elements, col0_elements + num_rows);
+
+  auto const expected = table_view{{col0}};
+
+  auto const filepath = temp_env->get_temp_filepath("CheckPageRowsTooSmall.parquet");
+  const cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .max_page_fragment_size(fragment_size)
+      .max_page_size_rows(rows_per_page);
+  cudf::io::write_parquet(out_opts);
+
+  // check that file is written correctly when rows/page < fragment size
+  auto const source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::detail::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+  ASSERT_TRUE(fmd.row_groups.size() > 0);
+  ASSERT_TRUE(fmd.row_groups[0].columns.size() == 1);
+  auto const& first_chunk = fmd.row_groups[0].columns[0].meta_data;
+  ASSERT_TRUE(first_chunk.data_page_offset > 0);
+
+  // read first data page header.  sizeof(PageHeader) is not exact, but the thrift encoded
+  // version should be smaller than size of the struct.
+  auto const ph = read_page_header(
+    source, {first_chunk.data_page_offset, sizeof(cudf::io::parquet::detail::PageHeader), 0});
+
+  // there should be only one page since the fragment size is larger than rows_per_page
+  EXPECT_EQ(ph.data_page_header.num_values, num_rows);
+}
+
+TEST_F(ParquetWriterTest, Decimal128Stats)
+{
+  // check that decimal128 min and max statistics are written in network byte order
+  // this is negative, so should be the min
+  std::vector<uint8_t> expected_min{
+    0xa1, 0xb2, 0xc3, 0xd4, 0xe5, 0xf6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+  std::vector<uint8_t> expected_max{
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xa1, 0xb2, 0xc3, 0xd4, 0xe5, 0xf6};
+
+  __int128_t val0 = 0xa1b2'c3d4'e5f6ULL;
+  __int128_t val1 = val0 << 80;
+  column_wrapper<numeric::decimal128> col0{{numeric::decimal128(val0, numeric::scale_type{0}),
+                                            numeric::decimal128(val1, numeric::scale_type{0})}};
+
+  auto expected = table_view{{col0}};
+
+  auto const filepath = temp_env->get_temp_filepath("Decimal128Stats.parquet");
+  const cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
+  cudf::io::write_parquet(out_opts);
+
+  auto const source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::detail::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+
+  auto const stats = get_statistics(fmd.row_groups[0].columns[0]);
+
+  EXPECT_EQ(expected_min, stats.min_value);
+  EXPECT_EQ(expected_max, stats.max_value);
+}
+
+TEST_F(ParquetWriterTest, CheckColumnIndexTruncation)
+{
+  char const* coldata[] = {
+    // in-range 7 bit.  should truncate to "yyyyyyyz"
+    "yyyyyyyyy",
+    // max 7 bit. should truncate to "x7fx7fx7fx7fx7fx7fx7fx80", since it's
+    // considered binary, not UTF-8.  If UTF-8 it should not truncate.
+    "\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f",
+    // max binary.  this should not truncate
+    "\xff\xff\xff\xff\xff\xff\xff\xff\xff",
+    // in-range 2-byte UTF8 (U+00E9). should truncate to "éééê"
+    "ééééé",
+    // max 2-byte UTF8 (U+07FF). should not truncate
+    "߿߿߿߿߿",
+    // in-range 3-byte UTF8 (U+0800). should truncate to "ࠀࠁ"
+    "ࠀࠀࠀ",
+    // max 3-byte UTF8 (U+FFFF). should not truncate
+    "\xef\xbf\xbf\xef\xbf\xbf\xef\xbf\xbf",
+    // in-range 4-byte UTF8 (U+10000). should truncate to "𐀀𐀁"
+    "𐀀𐀀𐀀",
+    // max unicode (U+10FFFF). should truncate to \xf4\x8f\xbf\xbf\xf4\x90\x80\x80,
+    // which is no longer valid unicode, but is still ok UTF-8???
+    "\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf",
+    // max 4-byte UTF8 (U+1FFFFF). should not truncate
+    "\xf7\xbf\xbf\xbf\xf7\xbf\xbf\xbf\xf7\xbf\xbf\xbf"};
+
+  // NOTE: UTF8 min is initialized with 0xf7bfbfbf. Binary values larger
+  // than that will not become minimum value (when written as UTF-8).
+  char const* truncated_min[] = {"yyyyyyyy",
+                                 "\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f",
+                                 "\xf7\xbf\xbf\xbf",
+                                 "éééé",
+                                 "߿߿߿߿",
+                                 "ࠀࠀ",
+                                 "\xef\xbf\xbf\xef\xbf\xbf",
+                                 "𐀀𐀀",
+                                 "\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf",
+                                 "\xf7\xbf\xbf\xbf"};
+
+  char const* truncated_max[] = {"yyyyyyyz",
+                                 "\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x80",
+                                 "\xff\xff\xff\xff\xff\xff\xff\xff\xff",
+                                 "éééê",
+                                 "߿߿߿߿߿",
+                                 "ࠀࠁ",
+                                 "\xef\xbf\xbf\xef\xbf\xbf\xef\xbf\xbf",
+                                 "𐀀𐀁",
+                                 "\xf4\x8f\xbf\xbf\xf4\x90\x80\x80",
+                                 "\xf7\xbf\xbf\xbf\xf7\xbf\xbf\xbf\xf7\xbf\xbf\xbf"};
+
+  auto cols = [&]() {
+    using string_wrapper = column_wrapper<cudf::string_view>;
+    std::vector<std::unique_ptr<column>> cols;
+    for (auto const str : coldata) {
+      cols.push_back(string_wrapper{str}.release());
+    }
+    return cols;
+  }();
+  auto expected = std::make_unique<table>(std::move(cols));
+
+  auto const filepath = temp_env->get_temp_filepath("CheckColumnIndexTruncation.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected->view())
+      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
+      .column_index_truncate_length(8);
+  cudf::io::write_parquet(out_opts);
+
+  auto const source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::detail::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+
+  for (size_t r = 0; r < fmd.row_groups.size(); r++) {
+    auto const& rg = fmd.row_groups[r];
+    for (size_t c = 0; c < rg.columns.size(); c++) {
+      auto const& chunk = rg.columns[c];
+
+      auto const ci    = read_column_index(source, chunk);
+      auto const stats = get_statistics(chunk);
+
+      ASSERT_TRUE(stats.min_value.has_value());
+      ASSERT_TRUE(stats.max_value.has_value());
+
+      // check trunc(page.min) <= stats.min && trun(page.max) >= stats.max
+      auto const ptype = fmd.schema[c + 1].type;
+      auto const ctype = fmd.schema[c + 1].converted_type;
+      EXPECT_TRUE(compare_binary(ci.min_values[0], stats.min_value.value(), ptype, ctype) <= 0);
+      EXPECT_TRUE(compare_binary(ci.max_values[0], stats.max_value.value(), ptype, ctype) >= 0);
+
+      // check that truncated values == expected
+      EXPECT_EQ(memcmp(ci.min_values[0].data(), truncated_min[c], ci.min_values[0].size()), 0);
+      EXPECT_EQ(memcmp(ci.max_values[0].data(), truncated_max[c], ci.max_values[0].size()), 0);
+    }
+  }
+}
+
+TEST_F(ParquetWriterTest, BinaryColumnIndexTruncation)
+{
+  std::vector<uint8_t> truncated_min[] = {{0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe},
+                                          {0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+                                          {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}};
+
+  std::vector<uint8_t> truncated_max[] = {{0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xff},
+                                          {0xff},
+                                          {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}};
+
+  cudf::test::lists_column_wrapper<uint8_t> col0{
+    {0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe}};
+  cudf::test::lists_column_wrapper<uint8_t> col1{
+    {0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}};
+  cudf::test::lists_column_wrapper<uint8_t> col2{
+    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}};
+
+  auto expected = table_view{{col0, col1, col2}};
+
+  cudf::io::table_input_metadata output_metadata(expected);
+  output_metadata.column_metadata[0].set_name("col_binary0").set_output_as_binary(true);
+  output_metadata.column_metadata[1].set_name("col_binary1").set_output_as_binary(true);
+  output_metadata.column_metadata[2].set_name("col_binary2").set_output_as_binary(true);
+
+  auto const filepath = temp_env->get_temp_filepath("BinaryColumnIndexTruncation.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .metadata(std::move(output_metadata))
+      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
+      .column_index_truncate_length(8);
+  cudf::io::write_parquet(out_opts);
+
+  auto const source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::detail::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+
+  for (size_t r = 0; r < fmd.row_groups.size(); r++) {
+    auto const& rg = fmd.row_groups[r];
+    for (size_t c = 0; c < rg.columns.size(); c++) {
+      auto const& chunk = rg.columns[c];
+
+      auto const ci    = read_column_index(source, chunk);
+      auto const stats = get_statistics(chunk);
+
+      // check trunc(page.min) <= stats.min && trun(page.max) >= stats.max
+      auto const ptype = fmd.schema[c + 1].type;
+      auto const ctype = fmd.schema[c + 1].converted_type;
+      ASSERT_TRUE(stats.min_value.has_value());
+      ASSERT_TRUE(stats.max_value.has_value());
+      EXPECT_TRUE(compare_binary(ci.min_values[0], stats.min_value.value(), ptype, ctype) <= 0);
+      EXPECT_TRUE(compare_binary(ci.max_values[0], stats.max_value.value(), ptype, ctype) >= 0);
+
+      // check that truncated values == expected
+      EXPECT_EQ(ci.min_values[0], truncated_min[c]);
+      EXPECT_EQ(ci.max_values[0], truncated_max[c]);
+    }
+  }
+}
+
+TEST_F(ParquetWriterTest, ByteArrayStats)
+{
+  // check that byte array min and max statistics are written as expected. If a byte array is
+  // written as a string, max utf8 is 0xf7bfbfbf and so the minimum value will be set to that value
+  // instead of a potential minimum higher than that.
+  std::vector<uint8_t> expected_col0_min{0xf0};
+  std::vector<uint8_t> expected_col0_max{0xf0, 0xf5, 0xf5};
+  std::vector<uint8_t> expected_col1_min{0xfe, 0xfe, 0xfe};
+  std::vector<uint8_t> expected_col1_max{0xfe, 0xfe, 0xfe};
+
+  cudf::test::lists_column_wrapper<uint8_t> list_int_col0{
+    {0xf0}, {0xf0, 0xf5, 0xf3}, {0xf0, 0xf5, 0xf5}};
+  cudf::test::lists_column_wrapper<uint8_t> list_int_col1{
+    {0xfe, 0xfe, 0xfe}, {0xfe, 0xfe, 0xfe}, {0xfe, 0xfe, 0xfe}};
+
+  auto expected = table_view{{list_int_col0, list_int_col1}};
+  cudf::io::table_input_metadata output_metadata(expected);
+  output_metadata.column_metadata[0].set_name("col_binary0").set_output_as_binary(true);
+  output_metadata.column_metadata[1].set_name("col_binary1").set_output_as_binary(true);
+
+  auto filepath = temp_env->get_temp_filepath("ByteArrayStats.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .metadata(std::move(output_metadata));
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+      .set_column_schema({{}, {}});
+  auto result = cudf::io::read_parquet(in_opts);
+
+  auto source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::detail::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+
+  EXPECT_EQ(fmd.schema[1].type, cudf::io::parquet::detail::Type::BYTE_ARRAY);
+  EXPECT_EQ(fmd.schema[2].type, cudf::io::parquet::detail::Type::BYTE_ARRAY);
+
+  auto const stats0 = get_statistics(fmd.row_groups[0].columns[0]);
+  auto const stats1 = get_statistics(fmd.row_groups[0].columns[1]);
+
+  EXPECT_EQ(expected_col0_min, stats0.min_value);
+  EXPECT_EQ(expected_col0_max, stats0.max_value);
+  EXPECT_EQ(expected_col1_min, stats1.min_value);
+  EXPECT_EQ(expected_col1_max, stats1.max_value);
+}
+
+TEST_F(ParquetWriterTest, SingleValueDictionaryTest)
+{
+  constexpr unsigned int expected_bits = 1;
+  constexpr unsigned int nrows         = 1'000'000U;
+
+  auto elements = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return "a unique string value suffixed with 1"; });
+  auto const col0     = cudf::test::strings_column_wrapper(elements, elements + nrows);
+  auto const expected = table_view{{col0}};
+
+  auto const filepath = temp_env->get_temp_filepath("SingleValueDictionaryTest.parquet");
+  // set row group size so that there will be only one row group
+  // no compression so we can easily read page data
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .compression(cudf::io::compression_type::NONE)
+      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
+      .row_group_size_rows(nrows);
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options default_in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto const result = cudf::io::read_parquet(default_in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+
+  // make sure dictionary was used
+  auto const source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::detail::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+  auto used_dict = [&fmd]() {
+    for (auto enc : fmd.row_groups[0].columns[0].meta_data.encodings) {
+      if (enc == cudf::io::parquet::detail::Encoding::PLAIN_DICTIONARY or
+          enc == cudf::io::parquet::detail::Encoding::RLE_DICTIONARY) {
+        return true;
+      }
+    }
+    return false;
+  };
+  EXPECT_TRUE(used_dict());
+
+  // and check that the correct number of bits was used
+  auto const oi    = read_offset_index(source, fmd.row_groups[0].columns[0]);
+  auto const nbits = read_dict_bits(source, oi.page_locations[0]);
+  EXPECT_EQ(nbits, expected_bits);
+}
+
+TEST_F(ParquetWriterTest, DictionaryNeverTest)
+{
+  constexpr unsigned int nrows = 1'000U;
+
+  // only one value, so would normally use dictionary
+  auto elements = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return "a unique string value suffixed with 1"; });
+  auto const col0     = cudf::test::strings_column_wrapper(elements, elements + nrows);
+  auto const expected = table_view{{col0}};
+
+  auto const filepath = temp_env->get_temp_filepath("DictionaryNeverTest.parquet");
+  // no compression so we can easily read page data
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .compression(cudf::io::compression_type::NONE)
+      .dictionary_policy(cudf::io::dictionary_policy::NEVER);
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options default_in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto const result = cudf::io::read_parquet(default_in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+
+  // make sure dictionary was not used
+  auto const source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::detail::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+  auto used_dict = [&fmd]() {
+    for (auto enc : fmd.row_groups[0].columns[0].meta_data.encodings) {
+      if (enc == cudf::io::parquet::detail::Encoding::PLAIN_DICTIONARY or
+          enc == cudf::io::parquet::detail::Encoding::RLE_DICTIONARY) {
+        return true;
+      }
+    }
+    return false;
+  };
+  EXPECT_FALSE(used_dict());
+}
+
+TEST_F(ParquetWriterTest, DictionaryAdaptiveTest)
+{
+  constexpr unsigned int nrows = 65'536U;
+  // cardinality is chosen to result in a dictionary > 1MB in size
+  constexpr unsigned int cardinality = 32'768U;
+
+  // single value will have a small dictionary
+  auto elements0 = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return "a unique string value suffixed with 1"; });
+  auto const col0 = cudf::test::strings_column_wrapper(elements0, elements0 + nrows);
+
+  // high cardinality will have a large dictionary
+  auto elements1  = cudf::detail::make_counting_transform_iterator(0, [cardinality](auto i) {
+    return "a unique string value suffixed with " + std::to_string(i % cardinality);
+  });
+  auto const col1 = cudf::test::strings_column_wrapper(elements1, elements1 + nrows);
+
+  auto const expected = table_view{{col0, col1}};
+
+  auto const filepath = temp_env->get_temp_filepath("DictionaryAdaptiveTest.parquet");
+  // no compression so we can easily read page data
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .compression(cudf::io::compression_type::ZSTD)
+      .dictionary_policy(cudf::io::dictionary_policy::ADAPTIVE);
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options default_in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto const result = cudf::io::read_parquet(default_in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+
+  // make sure dictionary was used as expected. col0 should use one,
+  // col1 should not.
+  auto const source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::detail::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+  auto used_dict = [&fmd](int col) {
+    for (auto enc : fmd.row_groups[0].columns[col].meta_data.encodings) {
+      if (enc == cudf::io::parquet::detail::Encoding::PLAIN_DICTIONARY or
+          enc == cudf::io::parquet::detail::Encoding::RLE_DICTIONARY) {
+        return true;
+      }
+    }
+    return false;
+  };
+  EXPECT_TRUE(used_dict(0));
+  EXPECT_FALSE(used_dict(1));
+}
+
+TEST_F(ParquetWriterTest, DictionaryAlwaysTest)
+{
+  constexpr unsigned int nrows = 65'536U;
+  // cardinality is chosen to result in a dictionary > 1MB in size
+  constexpr unsigned int cardinality = 32'768U;
+
+  // single value will have a small dictionary
+  auto elements0 = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return "a unique string value suffixed with 1"; });
+  auto const col0 = cudf::test::strings_column_wrapper(elements0, elements0 + nrows);
+
+  // high cardinality will have a large dictionary
+  auto elements1  = cudf::detail::make_counting_transform_iterator(0, [cardinality](auto i) {
+    return "a unique string value suffixed with " + std::to_string(i % cardinality);
+  });
+  auto const col1 = cudf::test::strings_column_wrapper(elements1, elements1 + nrows);
+
+  auto const expected = table_view{{col0, col1}};
+
+  auto const filepath = temp_env->get_temp_filepath("DictionaryAlwaysTest.parquet");
+  // no compression so we can easily read page data
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .compression(cudf::io::compression_type::ZSTD)
+      .dictionary_policy(cudf::io::dictionary_policy::ALWAYS);
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options default_in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto const result = cudf::io::read_parquet(default_in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+
+  // make sure dictionary was used for both columns
+  auto const source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::detail::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+  auto used_dict = [&fmd](int col) {
+    for (auto enc : fmd.row_groups[0].columns[col].meta_data.encodings) {
+      if (enc == cudf::io::parquet::detail::Encoding::PLAIN_DICTIONARY or
+          enc == cudf::io::parquet::detail::Encoding::RLE_DICTIONARY) {
+        return true;
+      }
+    }
+    return false;
+  };
+  EXPECT_TRUE(used_dict(0));
+  EXPECT_TRUE(used_dict(1));
+}
+
+TEST_F(ParquetWriterTest, DictionaryPageSizeEst)
+{
+  // one page
+  constexpr unsigned int nrows = 20'000U;
+
+  // this test is creating a pattern of repeating then non-repeating values to trigger
+  // a "worst-case" for page size estimation in the presence of a dictionary. have confirmed
+  // that this fails for values over 16 in the final term of `max_RLE_page_size()`.
+  // The output of the iterator will be 'CCCCCRRRRRCCCCCRRRRR...` where 'C' is a changing
+  // value, and 'R' repeats. The encoder will turn this into a literal run of 8 values
+  // (`CCCCCRRR`) followed by a repeated run of 2 (`RR`). This pattern then repeats, getting
+  // as close as possible to a condition of repeated 8 value literal runs.
+  auto elements0  = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
+    if ((i / 5) % 2 == 1) {
+      return std::string("non-unique string");
+    } else {
+      return "a unique string value suffixed with " + std::to_string(i);
+    }
+  });
+  auto const col0 = cudf::test::strings_column_wrapper(elements0, elements0 + nrows);
+
+  auto const expected = table_view{{col0}};
+
+  auto const filepath = temp_env->get_temp_filepath("DictionaryPageSizeEst.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .compression(cudf::io::compression_type::ZSTD)
+      .dictionary_policy(cudf::io::dictionary_policy::ALWAYS);
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options default_in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto const result = cudf::io::read_parquet(default_in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+}
+
+TEST_F(ParquetWriterTest, UserNullability)
+{
+  auto weight_col = cudf::test::fixed_width_column_wrapper<float>{{57.5, 51.1, 15.3}};
+  auto ages_col   = cudf::test::fixed_width_column_wrapper<int32_t>{{30, 27, 5}};
+  auto struct_col = cudf::test::structs_column_wrapper{weight_col, ages_col};
+
+  auto expected = table_view({struct_col});
+
+  cudf::io::table_input_metadata expected_metadata(expected);
+  expected_metadata.column_metadata[0].set_nullability(false);
+  expected_metadata.column_metadata[0].child(0).set_nullability(true);
+
+  auto filepath = temp_env->get_temp_filepath("SingleWriteNullable.parquet");
+  cudf::io::parquet_writer_options write_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .metadata(std::move(expected_metadata));
+  cudf::io::write_parquet(write_opts);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  EXPECT_FALSE(result.tbl->view().column(0).nullable());
+  EXPECT_TRUE(result.tbl->view().column(0).child(0).nullable());
+  EXPECT_FALSE(result.tbl->view().column(0).child(1).nullable());
+}
+
+TEST_F(ParquetWriterTest, UserNullabilityInvalid)
+{
+  auto valids =
+    cudf::detail::make_counting_transform_iterator(0, [&](int index) { return index % 2; });
+  auto col      = cudf::test::fixed_width_column_wrapper<double>{{57.5, 51.1, 15.3}, valids};
+  auto expected = table_view({col});
+
+  auto filepath = temp_env->get_temp_filepath("SingleWriteNullableInvalid.parquet");
+  cudf::io::parquet_writer_options write_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
+  // Should work without the nullability option
+  EXPECT_NO_THROW(cudf::io::write_parquet(write_opts));
+
+  cudf::io::table_input_metadata expected_metadata(expected);
+  expected_metadata.column_metadata[0].set_nullability(false);
+  write_opts.set_metadata(std::move(expected_metadata));
+  // Can't write a column with nulls as not nullable
+  EXPECT_THROW(cudf::io::write_parquet(write_opts), cudf::logic_error);
+}
+
+TEST_F(ParquetWriterTest, CompStats)
+{
+  auto table = create_random_fixed_table<int>(1, 100000, true);
+
+  auto const stats = std::make_shared<cudf::io::writer_compression_statistics>();
+
+  std::vector<char> unused_buffer;
+  cudf::io::parquet_writer_options opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&unused_buffer}, table->view())
+      .compression_statistics(stats);
+  cudf::io::write_parquet(opts);
+
+  EXPECT_NE(stats->num_compressed_bytes(), 0);
+  EXPECT_EQ(stats->num_failed_bytes(), 0);
+  EXPECT_EQ(stats->num_skipped_bytes(), 0);
+  EXPECT_FALSE(std::isnan(stats->compression_ratio()));
+}
+
+TEST_F(ParquetWriterTest, CompStatsEmptyTable)
+{
+  auto table_no_rows = create_random_fixed_table<int>(20, 0, false);
+
+  auto const stats = std::make_shared<cudf::io::writer_compression_statistics>();
+
+  std::vector<char> unused_buffer;
+  cudf::io::parquet_writer_options opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&unused_buffer},
+                                              table_no_rows->view())
+      .compression_statistics(stats);
+  cudf::io::write_parquet(opts);
+
+  expect_compression_stats_empty(stats);
+}
+
+TEST_F(ParquetWriterTest, NoNullsAsNonNullable)
+{
+  auto valids = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
+  column_wrapper<int32_t> col{{1, 2, 3}, valids};
+  table_view expected({col});
+
+  cudf::io::table_input_metadata expected_metadata(expected);
+  expected_metadata.column_metadata[0].set_nullability(false);
+
+  auto filepath = temp_env->get_temp_filepath("NonNullable.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .metadata(std::move(expected_metadata));
+  // Writer should be able to write a column without nulls as non-nullable
+  EXPECT_NO_THROW(cudf::io::write_parquet(out_opts));
+}
+
+TEST_F(ParquetWriterTest, TimestampMicrosINT96NoOverflow)
+{
+  using namespace cuda::std::chrono;
+  using namespace cudf::io;
+
+  column_wrapper<cudf::timestamp_us> big_ts_col{
+    sys_days{year{3023} / month{7} / day{14}} + 7h + 38min + 45s + 418688us,
+    sys_days{year{723} / month{3} / day{21}} + 14h + 20min + 13s + microseconds{781ms}};
+
+  table_view expected({big_ts_col});
+  auto filepath = temp_env->get_temp_filepath("BigINT96Timestamp.parquet");
+
+  auto const out_opts =
+    parquet_writer_options::builder(sink_info{filepath}, expected).int96_timestamps(true).build();
+  write_parquet(out_opts);
+
+  auto const in_opts = parquet_reader_options::builder(source_info(filepath))
+                         .timestamp_type(cudf::data_type(cudf::type_id::TIMESTAMP_MICROSECONDS))
+                         .build();
+  auto const result = read_parquet(in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+}
+
+TEST_F(ParquetWriterTest, PreserveNullability)
+{
+  constexpr auto num_rows = 100;
+
+  auto const col0_data = random_values<int32_t>(num_rows);
+  auto const col1_data = random_values<int32_t>(num_rows);
+
+  auto const col0_validity = cudf::test::iterators::no_nulls();
+  auto const col1_validity =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; });
+
+  column_wrapper<int32_t> col0{col0_data.begin(), col0_data.end(), col0_validity};
+  column_wrapper<int32_t> col1{col1_data.begin(), col1_data.end(), col1_validity};
+  auto const col2 = make_parquet_list_list_col<int>(0, num_rows, 5, 8, true);
+
+  auto const expected = table_view{{col0, col1, *col2}};
+
+  cudf::io::table_input_metadata expected_metadata(expected);
+  expected_metadata.column_metadata[0].set_name("mandatory");
+  expected_metadata.column_metadata[0].set_nullability(false);
+  expected_metadata.column_metadata[1].set_name("optional");
+  expected_metadata.column_metadata[1].set_nullability(true);
+  expected_metadata.column_metadata[2].set_name("lists");
+  expected_metadata.column_metadata[2].set_nullability(true);
+  // offsets is a cudf thing that's not part of the parquet schema so it won't have nullability set
+  expected_metadata.column_metadata[2].child(0).set_name("offsets");
+  expected_metadata.column_metadata[2].child(1).set_name("element");
+  expected_metadata.column_metadata[2].child(1).set_nullability(false);
+  expected_metadata.column_metadata[2].child(1).child(0).set_name("offsets");
+  expected_metadata.column_metadata[2].child(1).child(1).set_name("element");
+  expected_metadata.column_metadata[2].child(1).child(1).set_nullability(true);
+
+  auto const filepath = temp_env->get_temp_filepath("PreserveNullability.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .metadata(expected_metadata);
+
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options const in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto const result        = cudf::io::read_parquet(in_opts);
+  auto const read_metadata = cudf::io::table_input_metadata{result.metadata};
+
+  // test that expected_metadata matches read_metadata
+  std::function<void(cudf::io::column_in_metadata, cudf::io::column_in_metadata)>
+    compare_names_and_nullability = [&](auto lhs, auto rhs) {
+      EXPECT_EQ(lhs.get_name(), rhs.get_name());
+      ASSERT_EQ(lhs.is_nullability_defined(), rhs.is_nullability_defined());
+      if (lhs.is_nullability_defined()) { EXPECT_EQ(lhs.nullable(), rhs.nullable()); }
+      ASSERT_EQ(lhs.num_children(), rhs.num_children());
+      for (int i = 0; i < lhs.num_children(); ++i) {
+        compare_names_and_nullability(lhs.child(i), rhs.child(i));
+      }
+    };
+
+  ASSERT_EQ(expected_metadata.column_metadata.size(), read_metadata.column_metadata.size());
+
+  for (size_t i = 0; i < expected_metadata.column_metadata.size(); ++i) {
+    compare_names_and_nullability(expected_metadata.column_metadata[i],
+                                  read_metadata.column_metadata[i]);
+  }
+}
+
+TEST_F(ParquetWriterTest, EmptyMinStringStatistics)
+{
+  char const* const min_val = "";
+  char const* const max_val = "zzz";
+  std::vector<char const*> strings{min_val, max_val, "pining", "for", "the", "fjords"};
+
+  column_wrapper<cudf::string_view> string_col{strings.begin(), strings.end()};
+  auto const output   = table_view{{string_col}};
+  auto const filepath = temp_env->get_temp_filepath("EmptyMinStringStatistics.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, output);
+  cudf::io::write_parquet(out_opts);
+
+  auto const source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::detail::FileMetaData fmd;
+  read_footer(source, &fmd);
+
+  ASSERT_TRUE(fmd.row_groups.size() > 0);
+  ASSERT_TRUE(fmd.row_groups[0].columns.size() > 0);
+  auto const& chunk = fmd.row_groups[0].columns[0];
+  auto const stats  = get_statistics(chunk);
+
+  ASSERT_TRUE(stats.min_value.has_value());
+  ASSERT_TRUE(stats.max_value.has_value());
+  auto const min_value = std::string{reinterpret_cast<char const*>(stats.min_value.value().data()),
+                                     stats.min_value.value().size()};
+  auto const max_value = std::string{reinterpret_cast<char const*>(stats.max_value.value().data()),
+                                     stats.max_value.value().size()};
+  EXPECT_EQ(min_value, std::string(min_val));
+  EXPECT_EQ(max_value, std::string(max_val));
+}
+
+// custom mem mapped data sink that supports device writes
+template <bool supports_device_writes>
+class custom_test_memmap_sink : public cudf::io::data_sink {
+ public:
+  explicit custom_test_memmap_sink(std::vector<char>* mm_writer_buf)
+  {
+    mm_writer = cudf::io::data_sink::create(mm_writer_buf);
+  }
+
+  virtual ~custom_test_memmap_sink() { mm_writer->flush(); }
+
+  void host_write(void const* data, size_t size) override { mm_writer->host_write(data, size); }
+
+  [[nodiscard]] bool supports_device_write() const override { return supports_device_writes; }
+
+  void device_write(void const* gpu_data, size_t size, rmm::cuda_stream_view stream) override
+  {
+    this->device_write_async(gpu_data, size, stream).get();
+  }
+
+  std::future<void> device_write_async(void const* gpu_data,
+                                       size_t size,
+                                       rmm::cuda_stream_view stream) override
+  {
+    return std::async(std::launch::deferred, [=] {
+      char* ptr = nullptr;
+      CUDF_CUDA_TRY(cudaMallocHost(&ptr, size));
+      CUDF_CUDA_TRY(cudaMemcpyAsync(ptr, gpu_data, size, cudaMemcpyDefault, stream.value()));
+      stream.synchronize();
+      mm_writer->host_write(ptr, size);
+      CUDF_CUDA_TRY(cudaFreeHost(ptr));
+    });
+  }
+
+  void flush() override { mm_writer->flush(); }
+
+  size_t bytes_written() override { return mm_writer->bytes_written(); }
+
+ private:
+  std::unique_ptr<data_sink> mm_writer;
+};
+
+////////////////////////
+// Numeric type tests
+
+// Typed test fixture for numeric type tests
+template <typename T>
+struct ParquetWriterNumericTypeTest : public ParquetWriterTest {
+  auto type() { return cudf::data_type{cudf::type_to_id<T>()}; }
+};
+
+TYPED_TEST_SUITE(ParquetWriterNumericTypeTest, SupportedTypes);
+
+TYPED_TEST(ParquetWriterNumericTypeTest, SingleColumn)
+{
+  auto sequence =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return TypeParam(i % 400); });
+  auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
+
+  constexpr auto num_rows = 800;
+  column_wrapper<TypeParam> col(sequence, sequence + num_rows, validity);
+
+  auto expected = table_view{{col}};
+
+  auto filepath = temp_env->get_temp_filepath("SingleColumn.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+}
+
+TYPED_TEST(ParquetWriterNumericTypeTest, SingleColumnWithNulls)
+{
+  auto sequence =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return TypeParam(i); });
+  auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i % 2); });
+
+  constexpr auto num_rows = 100;
+  column_wrapper<TypeParam> col(sequence, sequence + num_rows, validity);
+
+  auto expected = table_view{{col}};
+
+  auto filepath = temp_env->get_temp_filepath("SingleColumnWithNulls.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+}
+
+/////////////////////////
+// timestamp type tests
+
+// Typed test fixture for timestamp type tests
+template <typename T>
+struct ParquetWriterTimestampTypeTest : public ParquetWriterTest {
+  auto type() { return cudf::data_type{cudf::type_to_id<T>()}; }
+};
+
+TYPED_TEST_SUITE(ParquetWriterTimestampTypeTest, SupportedTimestampTypes);
+
+TYPED_TEST(ParquetWriterTimestampTypeTest, Timestamps)
+{
+  auto sequence = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return ((std::rand() / 10000) * 1000); });
+  auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
+
+  constexpr auto num_rows = 100;
+  column_wrapper<TypeParam, typename decltype(sequence)::value_type> col(
+    sequence, sequence + num_rows, validity);
+
+  auto expected = table_view{{col}};
+
+  auto filepath = temp_env->get_temp_filepath("Timestamps.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+      .timestamp_type(this->type());
+  auto result = cudf::io::read_parquet(in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+}
+
+TYPED_TEST(ParquetWriterTimestampTypeTest, TimestampsWithNulls)
+{
+  auto sequence = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return ((std::rand() / 10000) * 1000); });
+  auto validity =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i > 30) && (i < 60); });
+
+  constexpr auto num_rows = 100;
+  column_wrapper<TypeParam, typename decltype(sequence)::value_type> col(
+    sequence, sequence + num_rows, validity);
+
+  auto expected = table_view{{col}};
+
+  auto filepath = temp_env->get_temp_filepath("TimestampsWithNulls.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+      .timestamp_type(this->type());
+  auto result = cudf::io::read_parquet(in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+}
+
+TYPED_TEST(ParquetWriterTimestampTypeTest, TimestampOverflow)
+{
+  constexpr int64_t max = std::numeric_limits<int64_t>::max();
+  auto sequence = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return max - i; });
+  auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
+
+  constexpr auto num_rows = 100;
+  column_wrapper<TypeParam, typename decltype(sequence)::value_type> col(
+    sequence, sequence + num_rows, validity);
+  table_view expected({col});
+
+  auto filepath = temp_env->get_temp_filepath("ParquetTimestampOverflow.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+      .timestamp_type(this->type());
+  auto result = cudf::io::read_parquet(in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+}
+
+//////////////////////////////
+// writer stress tests
+
+// Base test fixture for "stress" tests
+struct ParquetWriterStressTest : public cudf::test::BaseFixture {};
+
+TEST_F(ParquetWriterStressTest, LargeTableWeakCompression)
+{
+  std::vector<char> mm_buf;
+  mm_buf.reserve(4 * 1024 * 1024 * 16);
+  custom_test_memmap_sink<false> custom_sink(&mm_buf);
+
+  // exercises multiple rowgroups
+  srand(31337);
+  auto expected = create_random_fixed_table<int>(16, 4 * 1024 * 1024, false);
+
+  // write out using the custom sink (which uses device writes)
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
+  cudf::io::write_parquet(args);
+
+  cudf::io::parquet_reader_options custom_args =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()});
+  auto custom_tbl = cudf::io::read_parquet(custom_args);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
+}
+
+TEST_F(ParquetWriterStressTest, LargeTableGoodCompression)
+{
+  std::vector<char> mm_buf;
+  mm_buf.reserve(4 * 1024 * 1024 * 16);
+  custom_test_memmap_sink<false> custom_sink(&mm_buf);
+
+  // exercises multiple rowgroups
+  srand(31337);
+  auto expected = create_compressible_fixed_table<int>(16, 4 * 1024 * 1024, 128 * 1024, false);
+
+  // write out using the custom sink (which uses device writes)
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
+  cudf::io::write_parquet(args);
+
+  cudf::io::parquet_reader_options custom_args =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()});
+  auto custom_tbl = cudf::io::read_parquet(custom_args);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
+}
+
+TEST_F(ParquetWriterStressTest, LargeTableWithValids)
+{
+  std::vector<char> mm_buf;
+  mm_buf.reserve(4 * 1024 * 1024 * 16);
+  custom_test_memmap_sink<false> custom_sink(&mm_buf);
+
+  // exercises multiple rowgroups
+  srand(31337);
+  auto expected = create_compressible_fixed_table<int>(16, 4 * 1024 * 1024, 6, true);
+
+  // write out using the custom sink (which uses device writes)
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
+  cudf::io::write_parquet(args);
+
+  cudf::io::parquet_reader_options custom_args =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()});
+  auto custom_tbl = cudf::io::read_parquet(custom_args);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
+}
+
+TEST_F(ParquetWriterStressTest, DeviceWriteLargeTableWeakCompression)
+{
+  std::vector<char> mm_buf;
+  mm_buf.reserve(4 * 1024 * 1024 * 16);
+  custom_test_memmap_sink<true> custom_sink(&mm_buf);
+
+  // exercises multiple rowgroups
+  srand(31337);
+  auto expected = create_random_fixed_table<int>(16, 4 * 1024 * 1024, false);
+
+  // write out using the custom sink (which uses device writes)
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
+  cudf::io::write_parquet(args);
+
+  cudf::io::parquet_reader_options custom_args =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()});
+  auto custom_tbl = cudf::io::read_parquet(custom_args);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
+}
+
+TEST_F(ParquetWriterStressTest, DeviceWriteLargeTableGoodCompression)
+{
+  std::vector<char> mm_buf;
+  mm_buf.reserve(4 * 1024 * 1024 * 16);
+  custom_test_memmap_sink<true> custom_sink(&mm_buf);
+
+  // exercises multiple rowgroups
+  srand(31337);
+  auto expected = create_compressible_fixed_table<int>(16, 4 * 1024 * 1024, 128 * 1024, false);
+
+  // write out using the custom sink (which uses device writes)
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
+  cudf::io::write_parquet(args);
+
+  cudf::io::parquet_reader_options custom_args =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()});
+  auto custom_tbl = cudf::io::read_parquet(custom_args);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
+}
+
+TEST_F(ParquetWriterStressTest, DeviceWriteLargeTableWithValids)
+{
+  std::vector<char> mm_buf;
+  mm_buf.reserve(4 * 1024 * 1024 * 16);
+  custom_test_memmap_sink<true> custom_sink(&mm_buf);
+
+  // exercises multiple rowgroups
+  srand(31337);
+  auto expected = create_compressible_fixed_table<int>(16, 4 * 1024 * 1024, 6, true);
+
+  // write out using the custom sink (which uses device writes)
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
+  cudf::io::write_parquet(args);
+
+  cudf::io::parquet_reader_options custom_args =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()});
+  auto custom_tbl = cudf::io::read_parquet(custom_args);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
+}
diff --git a/cpp/tests/io/row_selection_test.cpp b/cpp/tests/io/row_selection_test.cpp
index b4583ac4f17..0c259c81a23 100644
--- a/cpp/tests/io/row_selection_test.cpp
+++ b/cpp/tests/io/row_selection_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,6 +16,7 @@
 
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/cudf_gtest.hpp>
+#include <cudf_test/testing_main.hpp>
 
 #include <src/io/utilities/row_selection.hpp>
 
diff --git a/cpp/tests/io/text/data_chunk_source_test.cpp b/cpp/tests/io/text/data_chunk_source_test.cpp
index e186dcda9cb..6f46df20633 100644
--- a/cpp/tests/io/text/data_chunk_source_test.cpp
+++ b/cpp/tests/io/text/data_chunk_source_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,6 +16,7 @@
 
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/cudf_gtest.hpp>
+#include <cudf_test/testing_main.hpp>
 
 #include <cudf/io/text/data_chunk_source_factories.hpp>
 #include <cudf/io/text/detail/bgzip_utils.hpp>
diff --git a/cpp/tests/io/text/multibyte_split_test.cpp b/cpp/tests/io/text/multibyte_split_test.cpp
index afa02741be1..b207c3f15a6 100644
--- a/cpp/tests/io/text/multibyte_split_test.cpp
+++ b/cpp/tests/io/text/multibyte_split_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,6 +21,7 @@
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/cudf_gtest.hpp>
 #include <cudf_test/table_utilities.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <cudf/concatenate.hpp>
diff --git a/cpp/tests/io/type_inference_test.cu b/cpp/tests/io/type_inference_test.cu
index a14e7ecf5b3..3bb15a59aa3 100644
--- a/cpp/tests/io/type_inference_test.cu
+++ b/cpp/tests/io/type_inference_test.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,6 +20,7 @@
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/scalar/scalar_factories.hpp>
 #include <cudf_test/base_fixture.hpp>
+#include <cudf_test/testing_main.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
diff --git a/cpp/tests/iterator/optional_iterator_test_numeric.cu b/cpp/tests/iterator/optional_iterator_test_numeric.cu
index d70a074692e..98befb0a3ee 100644
--- a/cpp/tests/iterator/optional_iterator_test_numeric.cu
+++ b/cpp/tests/iterator/optional_iterator_test_numeric.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +14,8 @@
  */
 #include <tests/iterator/optional_iterator_test.cuh>
 
+#include <cudf_test/random.hpp>
+
 #include <cudf/utilities/default_stream.hpp>
 
 #include <thrust/execution_policy.h>
diff --git a/cpp/tests/iterator/pair_iterator_test_numeric.cu b/cpp/tests/iterator/pair_iterator_test_numeric.cu
index 420b3d4aff5..3447aa0dde6 100644
--- a/cpp/tests/iterator/pair_iterator_test_numeric.cu
+++ b/cpp/tests/iterator/pair_iterator_test_numeric.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +14,8 @@
  */
 #include <tests/iterator/pair_iterator_test.cuh>
 
+#include <cudf_test/random.hpp>
+
 #include <rmm/exec_policy.hpp>
 
 #include <thrust/iterator/transform_iterator.h>
diff --git a/cpp/tests/iterator/scalar_iterator_test.cu b/cpp/tests/iterator/scalar_iterator_test.cu
index b867703535e..8d24dc41e56 100644
--- a/cpp/tests/iterator/scalar_iterator_test.cu
+++ b/cpp/tests/iterator/scalar_iterator_test.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +14,8 @@
  */
 #include <tests/iterator/iterator_tests.cuh>
 
+#include <cudf_test/random.hpp>
+
 #include <thrust/host_vector.h>
 #include <thrust/pair.h>
 
diff --git a/cpp/tests/iterator/value_iterator.cpp b/cpp/tests/iterator/value_iterator.cpp
index a25b19e99d8..22bc7475dbe 100644
--- a/cpp/tests/iterator/value_iterator.cpp
+++ b/cpp/tests/iterator/value_iterator.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,5 +14,6 @@
  */
 
 #include <cudf_test/base_fixture.hpp>
+#include <cudf_test/testing_main.hpp>
 
 CUDF_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/iterator/value_iterator_test_transform.cu b/cpp/tests/iterator/value_iterator_test_transform.cu
index a309cfd6327..417233e759b 100644
--- a/cpp/tests/iterator/value_iterator_test_transform.cu
+++ b/cpp/tests/iterator/value_iterator_test_transform.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +14,8 @@
  */
 #include <tests/iterator/iterator_tests.cuh>
 
+#include <cudf_test/random.hpp>
+
 #include <thrust/functional.h>
 #include <thrust/host_vector.h>
 #include <thrust/iterator/transform_iterator.h>
diff --git a/cpp/tests/jit/parse_ptx_function.cpp b/cpp/tests/jit/parse_ptx_function.cpp
index 5f00c5f561a..f6df2abc01a 100644
--- a/cpp/tests/jit/parse_ptx_function.cpp
+++ b/cpp/tests/jit/parse_ptx_function.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
 #include <cctype>
 
 #include <cudf_test/base_fixture.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <jit/parser.hpp>
 
 struct JitParseTest : public ::testing::Test {};
diff --git a/cpp/tests/join/join_tests.cpp b/cpp/tests/join/join_tests.cpp
index a416df0c7c3..651e44511fb 100644
--- a/cpp/tests/join/join_tests.cpp
+++ b/cpp/tests/join/join_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -35,6 +35,7 @@
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/iterator_utilities.hpp>
 #include <cudf_test/table_utilities.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <limits>
diff --git a/cpp/tests/labeling/label_bins_tests.cpp b/cpp/tests/labeling/label_bins_tests.cpp
index 91aa13a2cd2..2356c7e5ce1 100644
--- a/cpp/tests/labeling/label_bins_tests.cpp
+++ b/cpp/tests/labeling/label_bins_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,6 +21,7 @@
 #include <cudf/types.hpp>
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_list_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
 
diff --git a/cpp/tests/lists/extract_tests.cpp b/cpp/tests/lists/extract_tests.cpp
index 017cd471e01..92dd5df5ec7 100644
--- a/cpp/tests/lists/extract_tests.cpp
+++ b/cpp/tests/lists/extract_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/iterator_utilities.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <cudf/column/column_factories.hpp>
diff --git a/cpp/tests/merge/merge_test.cpp b/cpp/tests/merge/merge_test.cpp
index 3558e5676dd..d73c3192549 100644
--- a/cpp/tests/merge/merge_test.cpp
+++ b/cpp/tests/merge/merge_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -29,6 +29,7 @@
 #include <cudf_test/cudf_gtest.hpp>
 #include <cudf_test/iterator_utilities.hpp>
 #include <cudf_test/table_utilities.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_list_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
 
diff --git a/cpp/tests/partitioning/hash_partition_test.cpp b/cpp/tests/partitioning/hash_partition_test.cpp
index f3d131ae50c..f1486a49bf9 100644
--- a/cpp/tests/partitioning/hash_partition_test.cpp
+++ b/cpp/tests/partitioning/hash_partition_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -23,6 +23,7 @@
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/iterator_utilities.hpp>
 #include <cudf_test/table_utilities.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <thrust/iterator/counting_iterator.h>
diff --git a/cpp/tests/quantiles/quantile_test.cpp b/cpp/tests/quantiles/quantile_test.cpp
index a607376b0a5..b25a4d6c666 100644
--- a/cpp/tests/quantiles/quantile_test.cpp
+++ b/cpp/tests/quantiles/quantile_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_list_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
 
diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp
index 7644ac48892..905cd67bc95 100644
--- a/cpp/tests/reductions/reduction_tests.cpp
+++ b/cpp/tests/reductions/reduction_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/iterator_utilities.hpp>
 #include <cudf_test/table_utilities.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <cudf/copying.hpp>
diff --git a/cpp/tests/replace/clamp_test.cpp b/cpp/tests/replace/clamp_test.cpp
index a13829c5abc..74ee27137ed 100644
--- a/cpp/tests/replace/clamp_test.cpp
+++ b/cpp/tests/replace/clamp_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -23,6 +23,7 @@
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/cudf_gtest.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <gtest/gtest.h>
diff --git a/cpp/tests/replace/normalize_replace_tests.cpp b/cpp/tests/replace/normalize_replace_tests.cpp
index 80b9416eaaa..50736940520 100644
--- a/cpp/tests/replace/normalize_replace_tests.cpp
+++ b/cpp/tests/replace/normalize_replace_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,6 +19,7 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/testing_main.hpp>
 
 // This is the main test fixture
 struct ReplaceTest : public cudf::test::BaseFixture {};
diff --git a/cpp/tests/replace/replace_nans_tests.cpp b/cpp/tests/replace/replace_nans_tests.cpp
index c74f6d6e6a6..35232204db7 100644
--- a/cpp/tests/replace/replace_nans_tests.cpp
+++ b/cpp/tests/replace/replace_nans_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/cudf_gtest.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <cudf/replace.hpp>
diff --git a/cpp/tests/replace/replace_nulls_tests.cpp b/cpp/tests/replace/replace_nulls_tests.cpp
index a7c54145708..6c23dd6bdc8 100644
--- a/cpp/tests/replace/replace_nulls_tests.cpp
+++ b/cpp/tests/replace/replace_nulls_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Copyright 2018 BlazingDB, Inc.
  *     Copyright 2018 Alexander Ocsa <cristhian@blazingdb.com>
@@ -22,6 +22,7 @@
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/cudf_gtest.hpp>
 #include <cudf_test/iterator_utilities.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <cudf/dictionary/encode.hpp>
diff --git a/cpp/tests/replace/replace_tests.cpp b/cpp/tests/replace/replace_tests.cpp
index 63460b0cb15..8b953079d34 100644
--- a/cpp/tests/replace/replace_tests.cpp
+++ b/cpp/tests/replace/replace_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Copyright 2018 BlazingDB, Inc.
  *     Copyright 2018 Cristhian Alberto Gonzales Castillo <cristhian@blazingdb.com>
@@ -21,6 +21,7 @@
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/cudf_gtest.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <cudf/detail/iterator.cuh>
diff --git a/cpp/tests/reshape/interleave_columns_tests.cpp b/cpp/tests/reshape/interleave_columns_tests.cpp
index e8ea9d619c5..bc7488bbf9e 100644
--- a/cpp/tests/reshape/interleave_columns_tests.cpp
+++ b/cpp/tests/reshape/interleave_columns_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/iterator_utilities.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <cudf/column/column_factories.hpp>
diff --git a/cpp/tests/rolling/rolling_test.cpp b/cpp/tests/rolling/rolling_test.cpp
index d0181974479..c0307000f5c 100644
--- a/cpp/tests/rolling/rolling_test.cpp
+++ b/cpp/tests/rolling/rolling_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,6 +21,8 @@
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/cudf_gtest.hpp>
 #include <cudf_test/iterator_utilities.hpp>
+#include <cudf_test/random.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <cudf/aggregation.hpp>
diff --git a/cpp/tests/round/round_tests.cpp b/cpp/tests/round/round_tests.cpp
index f97bb7a5323..64d08fa7338 100644
--- a/cpp/tests/round/round_tests.cpp
+++ b/cpp/tests/round/round_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,6 +16,7 @@
 
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <cudf/detail/iterator.cuh>
diff --git a/cpp/tests/scalar/factories_test.cpp b/cpp/tests/scalar/factories_test.cpp
index 7da5c408a48..5f132f3ace9 100644
--- a/cpp/tests/scalar/factories_test.cpp
+++ b/cpp/tests/scalar/factories_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/table_utilities.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <cudf/scalar/scalar_factories.hpp>
diff --git a/cpp/tests/scalar/scalar_test.cpp b/cpp/tests/scalar/scalar_test.cpp
index d2f2b5d6a2e..cb689abb8d8 100644
--- a/cpp/tests/scalar/scalar_test.cpp
+++ b/cpp/tests/scalar/scalar_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/table_utilities.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <cudf/scalar/scalar.hpp>
diff --git a/cpp/tests/search/search_test.cpp b/cpp/tests/search/search_test.cpp
index efff7cd31c6..7550cc27161 100644
--- a/cpp/tests/search/search_test.cpp
+++ b/cpp/tests/search/search_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <cudf/fixed_point/fixed_point.hpp>
diff --git a/cpp/tests/sort/is_sorted_tests.cpp b/cpp/tests/sort/is_sorted_tests.cpp
index ba7c9d82533..271b119ff80 100644
--- a/cpp/tests/sort/is_sorted_tests.cpp
+++ b/cpp/tests/sort/is_sorted_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/iterator_utilities.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_list_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
 
diff --git a/cpp/tests/sort/sort_test.cpp b/cpp/tests/sort/sort_test.cpp
index e0828b7a0db..e84275f41ef 100644
--- a/cpp/tests/sort/sort_test.cpp
+++ b/cpp/tests/sort/sort_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,6 +19,7 @@
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/iterator_utilities.hpp>
 #include <cudf_test/table_utilities.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <cudf/copying.hpp>
diff --git a/cpp/tests/stream_compaction/apply_boolean_mask_tests.cpp b/cpp/tests/stream_compaction/apply_boolean_mask_tests.cpp
index 7ee090cc13f..cb7d11dab35 100644
--- a/cpp/tests/stream_compaction/apply_boolean_mask_tests.cpp
+++ b/cpp/tests/stream_compaction/apply_boolean_mask_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -25,7 +25,9 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/random.hpp>
 #include <cudf_test/table_utilities.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <thrust/copy.h>
diff --git a/cpp/tests/strings/array_tests.cpp b/cpp/tests/strings/array_tests.cpp
index c7ceb899833..d1e0dfb1ff1 100644
--- a/cpp/tests/strings/array_tests.cpp
+++ b/cpp/tests/strings/array_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/iterator_utilities.hpp>
+#include <cudf_test/testing_main.hpp>
 
 #include <cudf/copying.hpp>
 #include <cudf/scalar/scalar.hpp>
diff --git a/cpp/tests/structs/structs_column_tests.cpp b/cpp/tests/structs/structs_column_tests.cpp
index b7617fc5724..fb25c67b763 100644
--- a/cpp/tests/structs/structs_column_tests.cpp
+++ b/cpp/tests/structs/structs_column_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -30,6 +30,7 @@
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/cudf_gtest.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <thrust/host_vector.h>
diff --git a/cpp/tests/table/table_tests.cpp b/cpp/tests/table/table_tests.cpp
index f8d94afdb20..0d6b870c33b 100644
--- a/cpp/tests/table/table_tests.cpp
+++ b/cpp/tests/table/table_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,6 +24,7 @@
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/table_utilities.hpp>
+#include <cudf_test/testing_main.hpp>
 
 #include <memory>
 #include <random>
diff --git a/cpp/tests/text/ngrams_tests.cpp b/cpp/tests/text/ngrams_tests.cpp
index 7b179588385..c5a5a342471 100644
--- a/cpp/tests/text/ngrams_tests.cpp
+++ b/cpp/tests/text/ngrams_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/testing_main.hpp>
 
 #include <cudf/column/column.hpp>
 #include <cudf/scalar/scalar.hpp>
diff --git a/cpp/tests/transform/bools_to_mask_test.cpp b/cpp/tests/transform/bools_to_mask_test.cpp
index b7950052cf9..ce8ed9285fe 100644
--- a/cpp/tests/transform/bools_to_mask_test.cpp
+++ b/cpp/tests/transform/bools_to_mask_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -23,6 +23,7 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/testing_main.hpp>
 
 #include <thrust/host_vector.h>
 
diff --git a/cpp/tests/transform/integration/unary_transform_test.cpp b/cpp/tests/transform/integration/unary_transform_test.cpp
index eff327e75be..5fa02d9978a 100644
--- a/cpp/tests/transform/integration/unary_transform_test.cpp
+++ b/cpp/tests/transform/integration/unary_transform_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Copyright 2018-2019 BlazingDB, Inc.
  *     Copyright 2018 Christian Noboa Mardini <christian@blazingdb.com>
@@ -21,6 +21,7 @@
 
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/random.hpp>
 
 #include <cudf/detail/iterator.cuh>
 #include <cudf/transform.hpp>
diff --git a/cpp/tests/transform/row_conversion.cpp b/cpp/tests/transform/row_conversion.cpp
index 6d8f714401c..e54929f1651 100644
--- a/cpp/tests/transform/row_conversion.cpp
+++ b/cpp/tests/transform/row_conversion.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -26,6 +26,7 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/random.hpp>
 #include <cudf_test/table_utilities.hpp>
 
 #include <rmm/exec_policy.hpp>
diff --git a/cpp/tests/transpose/transpose_test.cpp b/cpp/tests/transpose/transpose_test.cpp
index cf46dd74138..59094db6cc3 100644
--- a/cpp/tests/transpose/transpose_test.cpp
+++ b/cpp/tests/transpose/transpose_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <algorithm>
diff --git a/cpp/tests/types/traits_test.cpp b/cpp/tests/types/traits_test.cpp
index 32d55624fc6..53bf224649e 100644
--- a/cpp/tests/types/traits_test.cpp
+++ b/cpp/tests/types/traits_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,6 +16,7 @@
 
 #include <cudf/utilities/traits.hpp>
 #include <cudf_test/base_fixture.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <gtest/gtest.h>
diff --git a/cpp/tests/types/type_dispatcher_test.cu b/cpp/tests/types/type_dispatcher_test.cu
index 7b2384d38cb..d7df8f03ec1 100644
--- a/cpp/tests/types/type_dispatcher_test.cu
+++ b/cpp/tests/types/type_dispatcher_test.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,6 +20,7 @@
 #include <cudf/utilities/type_dispatcher.hpp>
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/cudf_gtest.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_list_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
 
diff --git a/cpp/tests/unary/unary_ops_test.cpp b/cpp/tests/unary/unary_ops_test.cpp
index 76d1f769856..19c18a8b0c1 100644
--- a/cpp/tests/unary/unary_ops_test.cpp
+++ b/cpp/tests/unary/unary_ops_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <cudf/detail/iterator.cuh>
diff --git a/cpp/tests/utilities_tests/column_utilities_tests.cpp b/cpp/tests/utilities_tests/column_utilities_tests.cpp
index 9c2aac0a0b5..f5b7a499243 100644
--- a/cpp/tests/utilities_tests/column_utilities_tests.cpp
+++ b/cpp/tests/utilities_tests/column_utilities_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,6 +22,8 @@
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/cudf_gtest.hpp>
+#include <cudf_test/random.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <thrust/iterator/constant_iterator.h>
diff --git a/cpp/tests/utilities_tests/column_wrapper_tests.cpp b/cpp/tests/utilities_tests/column_wrapper_tests.cpp
index 9d2cc257f02..da17e33e11a 100644
--- a/cpp/tests/utilities_tests/column_wrapper_tests.cpp
+++ b/cpp/tests/utilities_tests/column_wrapper_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/cudf_gtest.hpp>
+#include <cudf_test/random.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <cudf/detail/iterator.cuh>
diff --git a/cpp/tests/utilities_tests/span_tests.cu b/cpp/tests/utilities_tests/span_tests.cu
index 881a237de72..870528d306c 100644
--- a/cpp/tests/utilities_tests/span_tests.cu
+++ b/cpp/tests/utilities_tests/span_tests.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,6 +21,7 @@
 
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/cudf_gtest.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <rmm/device_buffer.hpp>
diff --git a/cpp/tests/wrappers/timestamps_test.cu b/cpp/tests/wrappers/timestamps_test.cu
index f7d3df18ffd..4086c5a91bb 100644
--- a/cpp/tests/wrappers/timestamps_test.cu
+++ b/cpp/tests/wrappers/timestamps_test.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/testing_main.hpp>
 #include <cudf_test/timestamp_utilities.cuh>
 #include <cudf_test/type_lists.hpp>
 

From 433bdc32eeecf8fa2fcd900214a9005eb4a07992 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Tue, 9 Jan 2024 14:54:16 -0800
Subject: [PATCH 02/23] Convert `FieldType` to scoped enum (#14642)

Switch to scoped enum (`enum class`); they are better because, well, values now have a scope.
Another benefit in this case - values are now named consistently with compact protocol.
De-duplicated some code, now that more static_casts are required and duplication stands out more.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Nghia Truong (https://github.com/ttnghia)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - MithunR (https://github.com/mythrocks)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/14642
---
 .../io/parquet/compact_protocol_reader.cpp    | 110 ++++++++++--------
 .../io/parquet/compact_protocol_writer.cpp    |  43 +++----
 .../io/parquet/compact_protocol_writer.hpp    |  11 +-
 cpp/src/io/parquet/page_enc.cu                |  49 ++++----
 cpp/src/io/parquet/page_hdr.cu                |  34 +++---
 cpp/src/io/parquet/parquet_common.hpp         |  29 ++---
 6 files changed, 153 insertions(+), 123 deletions(-)

diff --git a/cpp/src/io/parquet/compact_protocol_reader.cpp b/cpp/src/io/parquet/compact_protocol_reader.cpp
index 55848802f12..d39d832c18c 100644
--- a/cpp/src/io/parquet/compact_protocol_reader.cpp
+++ b/cpp/src/io/parquet/compact_protocol_reader.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -45,28 +45,37 @@ class parquet_field {
 std::string field_type_string(FieldType type)
 {
   switch (type) {
-    case ST_FLD_TRUE: return "bool(true)";
-    case ST_FLD_FALSE: return "bool(false)";
-    case ST_FLD_BYTE: return "int8";
-    case ST_FLD_I16: return "int16";
-    case ST_FLD_I32: return "int32";
-    case ST_FLD_I64: return "int64";
-    case ST_FLD_DOUBLE: return "double";
-    case ST_FLD_BINARY: return "binary";
-    case ST_FLD_STRUCT: return "struct";
-    case ST_FLD_LIST: return "list";
-    case ST_FLD_SET: return "set";
-    default: return "unknown(" + std::to_string(type) + ")";
+    case FieldType::BOOLEAN_TRUE: return "bool(true)";
+    case FieldType::BOOLEAN_FALSE: return "bool(false)";
+    case FieldType::I8: return "int8";
+    case FieldType::I16: return "int16";
+    case FieldType::I32: return "int32";
+    case FieldType::I64: return "int64";
+    case FieldType::DOUBLE: return "double";
+    case FieldType::BINARY: return "binary";
+    case FieldType::LIST: return "list";
+    case FieldType::SET: return "set";
+    case FieldType::MAP: return "map";
+    case FieldType::STRUCT: return "struct";
+    case FieldType::UUID: return "UUID";
+    default: return "unknown(" + std::to_string(static_cast<uint8_t>(type)) + ")";
   }
 }
 
 void assert_field_type(int type, FieldType expected)
 {
-  CUDF_EXPECTS(type == expected,
+  CUDF_EXPECTS(type == static_cast<int>(expected),
                "expected " + field_type_string(expected) + " field, got " +
                  field_type_string(static_cast<FieldType>(type)) + " field instead");
 }
 
+void assert_bool_field_type(int type)
+{
+  auto const field_type = static_cast<FieldType>(type);
+  CUDF_EXPECTS(field_type == FieldType::BOOLEAN_TRUE || field_type == FieldType::BOOLEAN_FALSE,
+               "expected bool field, got " + field_type_string(field_type) + " field instead");
+}
+
 /**
  * @brief Abstract base class for list functors.
  */
@@ -86,7 +95,7 @@ class parquet_field_list : public parquet_field {
  public:
   inline void operator()(CompactProtocolReader* cpr, int field_type)
   {
-    assert_field_type(field_type, ST_FLD_LIST);
+    assert_field_type(field_type, FieldType::LIST);
     auto const [t, n] = cpr->get_listh();
     assert_field_type(t, EXPECTED_ELEM_TYPE);
     val.resize(n);
@@ -111,8 +120,8 @@ class parquet_field_bool : public parquet_field {
 
   inline void operator()(CompactProtocolReader* cpr, int field_type)
   {
-    CUDF_EXPECTS(field_type == ST_FLD_TRUE || field_type == ST_FLD_FALSE, "expected bool field");
-    val = field_type == ST_FLD_TRUE;
+    assert_bool_field_type(field_type);
+    val = field_type == static_cast<int>(FieldType::BOOLEAN_TRUE);
   }
 };
 
@@ -122,14 +131,13 @@ class parquet_field_bool : public parquet_field {
  * @return True if field types mismatch or if the process of reading a
  * bool fails
  */
-struct parquet_field_bool_list : public parquet_field_list<bool, ST_FLD_TRUE> {
+struct parquet_field_bool_list : public parquet_field_list<bool, FieldType::BOOLEAN_TRUE> {
   parquet_field_bool_list(int f, std::vector<bool>& v) : parquet_field_list(f, v)
   {
     auto const read_value = [this](uint32_t i, CompactProtocolReader* cpr) {
       auto const current_byte = cpr->getb();
-      CUDF_EXPECTS(current_byte == ST_FLD_TRUE || current_byte == ST_FLD_FALSE,
-                   "expected bool field");
-      this->val[i] = current_byte == ST_FLD_TRUE;
+      assert_bool_field_type(current_byte);
+      this->val[i] = current_byte == static_cast<int>(FieldType::BOOLEAN_TRUE);
     };
     bind_read_func(read_value);
   }
@@ -162,9 +170,9 @@ class parquet_field_int : public parquet_field {
   }
 };
 
-using parquet_field_int8  = parquet_field_int<int8_t, ST_FLD_BYTE>;
-using parquet_field_int32 = parquet_field_int<int32_t, ST_FLD_I32>;
-using parquet_field_int64 = parquet_field_int<int64_t, ST_FLD_I64>;
+using parquet_field_int8  = parquet_field_int<int8_t, FieldType::I8>;
+using parquet_field_int32 = parquet_field_int<int32_t, FieldType::I32>;
+using parquet_field_int64 = parquet_field_int<int64_t, FieldType::I64>;
 
 /**
  * @brief Functor to read a vector of integers from CompactProtocolReader
@@ -183,7 +191,7 @@ struct parquet_field_int_list : public parquet_field_list<T, EXPECTED_TYPE> {
   }
 };
 
-using parquet_field_int64_list = parquet_field_int_list<int64_t, ST_FLD_I64>;
+using parquet_field_int64_list = parquet_field_int_list<int64_t, FieldType::I64>;
 
 /**
  * @brief Functor to read a string from CompactProtocolReader
@@ -199,7 +207,7 @@ class parquet_field_string : public parquet_field {
 
   inline void operator()(CompactProtocolReader* cpr, int field_type)
   {
-    assert_field_type(field_type, ST_FLD_BINARY);
+    assert_field_type(field_type, FieldType::BINARY);
     auto const n = cpr->get_u32();
     CUDF_EXPECTS(n < static_cast<size_t>(cpr->m_end - cpr->m_cur), "string length mismatch");
 
@@ -214,7 +222,7 @@ class parquet_field_string : public parquet_field {
  * @return True if field types mismatch or if the process of reading a
  * string fails
  */
-struct parquet_field_string_list : public parquet_field_list<std::string, ST_FLD_BINARY> {
+struct parquet_field_string_list : public parquet_field_list<std::string, FieldType::BINARY> {
   parquet_field_string_list(int f, std::vector<std::string>& v) : parquet_field_list(f, v)
   {
     auto const read_value = [this](uint32_t i, CompactProtocolReader* cpr) {
@@ -241,7 +249,7 @@ class parquet_field_enum : public parquet_field {
   parquet_field_enum(int f, Enum& v) : parquet_field(f), val(v) {}
   inline void operator()(CompactProtocolReader* cpr, int field_type)
   {
-    assert_field_type(field_type, ST_FLD_I32);
+    assert_field_type(field_type, FieldType::I32);
     val = static_cast<Enum>(cpr->get_i32());
   }
 };
@@ -253,8 +261,9 @@ class parquet_field_enum : public parquet_field {
  * enum fails
  */
 template <typename Enum>
-struct parquet_field_enum_list : public parquet_field_list<Enum, ST_FLD_I32> {
-  parquet_field_enum_list(int f, std::vector<Enum>& v) : parquet_field_list<Enum, ST_FLD_I32>(f, v)
+struct parquet_field_enum_list : public parquet_field_list<Enum, FieldType::I32> {
+  parquet_field_enum_list(int f, std::vector<Enum>& v)
+    : parquet_field_list<Enum, FieldType::I32>(f, v)
   {
     auto const read_value = [this](uint32_t i, CompactProtocolReader* cpr) {
       this->val[i] = static_cast<Enum>(cpr->get_i32());
@@ -278,7 +287,7 @@ class parquet_field_struct : public parquet_field {
 
   inline void operator()(CompactProtocolReader* cpr, int field_type)
   {
-    assert_field_type(field_type, ST_FLD_STRUCT);
+    assert_field_type(field_type, FieldType::STRUCT);
     cpr->read(&val);
   }
 };
@@ -324,7 +333,7 @@ class parquet_field_union_enumerator : public parquet_field {
 
   inline void operator()(CompactProtocolReader* cpr, int field_type)
   {
-    assert_field_type(field_type, ST_FLD_STRUCT);
+    assert_field_type(field_type, FieldType::STRUCT);
     cpr->skip_struct_field(field_type);
     val = static_cast<E>(field());
   }
@@ -337,8 +346,9 @@ class parquet_field_union_enumerator : public parquet_field {
  * struct fails
  */
 template <typename T>
-struct parquet_field_struct_list : public parquet_field_list<T, ST_FLD_STRUCT> {
-  parquet_field_struct_list(int f, std::vector<T>& v) : parquet_field_list<T, ST_FLD_STRUCT>(f, v)
+struct parquet_field_struct_list : public parquet_field_list<T, FieldType::STRUCT> {
+  parquet_field_struct_list(int f, std::vector<T>& v)
+    : parquet_field_list<T, FieldType::STRUCT>(f, v)
   {
     auto const read_value = [this](uint32_t i, CompactProtocolReader* cpr) {
       cpr->read(&this->val[i]);
@@ -361,7 +371,7 @@ class parquet_field_binary : public parquet_field {
 
   inline void operator()(CompactProtocolReader* cpr, int field_type)
   {
-    assert_field_type(field_type, ST_FLD_BINARY);
+    assert_field_type(field_type, FieldType::BINARY);
     auto const n = cpr->get_u32();
     CUDF_EXPECTS(n <= static_cast<size_t>(cpr->m_end - cpr->m_cur), "binary length mismatch");
 
@@ -377,7 +387,8 @@ class parquet_field_binary : public parquet_field {
  * @return True if field types mismatch or if the process of reading a
  * binary fails
  */
-struct parquet_field_binary_list : public parquet_field_list<std::vector<uint8_t>, ST_FLD_BINARY> {
+struct parquet_field_binary_list
+  : public parquet_field_list<std::vector<uint8_t>, FieldType::BINARY> {
   parquet_field_binary_list(int f, std::vector<std::vector<uint8_t>>& v) : parquet_field_list(f, v)
   {
     auto const read_value = [this](uint32_t i, CompactProtocolReader* cpr) {
@@ -404,7 +415,7 @@ class parquet_field_struct_blob : public parquet_field {
   parquet_field_struct_blob(int f, std::vector<uint8_t>& v) : parquet_field(f), val(v) {}
   inline void operator()(CompactProtocolReader* cpr, int field_type)
   {
-    assert_field_type(field_type, ST_FLD_STRUCT);
+    assert_field_type(field_type, FieldType::STRUCT);
     uint8_t const* const start = cpr->m_cur;
     cpr->skip_struct_field(field_type);
     if (cpr->m_cur > start) { val.assign(start, cpr->m_cur - 1); }
@@ -439,24 +450,25 @@ class parquet_field_optional : public parquet_field {
  */
 void CompactProtocolReader::skip_struct_field(int t, int depth)
 {
-  switch (t) {
-    case ST_FLD_TRUE:
-    case ST_FLD_FALSE: break;
-    case ST_FLD_I16:
-    case ST_FLD_I32:
-    case ST_FLD_I64: get_u64(); break;
-    case ST_FLD_BYTE: skip_bytes(1); break;
-    case ST_FLD_DOUBLE: skip_bytes(8); break;
-    case ST_FLD_BINARY: skip_bytes(get_u32()); break;
-    case ST_FLD_LIST: [[fallthrough]];
-    case ST_FLD_SET: {
+  auto const t_enum = static_cast<FieldType>(t);
+  switch (t_enum) {
+    case FieldType::BOOLEAN_TRUE:
+    case FieldType::BOOLEAN_FALSE: break;
+    case FieldType::I16:
+    case FieldType::I32:
+    case FieldType::I64: get_u64(); break;
+    case FieldType::I8: skip_bytes(1); break;
+    case FieldType::DOUBLE: skip_bytes(8); break;
+    case FieldType::BINARY: skip_bytes(get_u32()); break;
+    case FieldType::LIST:
+    case FieldType::SET: {
       auto const [t, n] = get_listh();
       CUDF_EXPECTS(depth <= 10, "struct nesting too deep");
       for (uint32_t i = 0; i < n; i++) {
         skip_struct_field(t, depth + 1);
       }
     } break;
-    case ST_FLD_STRUCT:
+    case FieldType::STRUCT:
       for (;;) {
         int const c = getb();
         t           = c & 0xf;
diff --git a/cpp/src/io/parquet/compact_protocol_writer.cpp b/cpp/src/io/parquet/compact_protocol_writer.cpp
index f857b75f707..d610ec6c546 100644
--- a/cpp/src/io/parquet/compact_protocol_writer.cpp
+++ b/cpp/src/io/parquet/compact_protocol_writer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -268,39 +268,40 @@ uint32_t CompactProtocolFieldWriter::put_int(int64_t v)
   return put_uint(((v ^ -s) << 1) + s);
 }
 
-void CompactProtocolFieldWriter::put_field_header(int f, int cur, int t)
+void CompactProtocolFieldWriter::put_field_header(int f, int cur, FieldType t)
 {
   if (f > cur && f <= cur + 15)
-    put_byte(((f - cur) << 4) | t);
+    put_packed_type_byte(f - cur, t);
   else {
-    put_byte(t);
+    put_byte(static_cast<uint8_t>(t));
     put_int(f);
   }
 }
 
 inline void CompactProtocolFieldWriter::field_bool(int field, bool b)
 {
-  put_field_header(field, current_field_value, b ? ST_FLD_TRUE : ST_FLD_FALSE);
+  put_field_header(
+    field, current_field_value, b ? FieldType::BOOLEAN_TRUE : FieldType::BOOLEAN_FALSE);
   current_field_value = field;
 }
 
 inline void CompactProtocolFieldWriter::field_int8(int field, int8_t val)
 {
-  put_field_header(field, current_field_value, ST_FLD_BYTE);
+  put_field_header(field, current_field_value, FieldType::I8);
   put_byte(val);
   current_field_value = field;
 }
 
 inline void CompactProtocolFieldWriter::field_int(int field, int32_t val)
 {
-  put_field_header(field, current_field_value, ST_FLD_I32);
+  put_field_header(field, current_field_value, FieldType::I32);
   put_int(val);
   current_field_value = field;
 }
 
 inline void CompactProtocolFieldWriter::field_int(int field, int64_t val)
 {
-  put_field_header(field, current_field_value, ST_FLD_I64);
+  put_field_header(field, current_field_value, FieldType::I64);
   put_int(val);
   current_field_value = field;
 }
@@ -309,8 +310,8 @@ template <>
 inline void CompactProtocolFieldWriter::field_int_list<int64_t>(int field,
                                                                 std::vector<int64_t> const& val)
 {
-  put_field_header(field, current_field_value, ST_FLD_LIST);
-  put_byte(static_cast<uint8_t>((std::min(val.size(), 0xfUL) << 4) | ST_FLD_I64));
+  put_field_header(field, current_field_value, FieldType::LIST);
+  put_packed_type_byte(val.size(), FieldType::I64);
   if (val.size() >= 0xfUL) { put_uint(val.size()); }
   for (auto const v : val) {
     put_int(v);
@@ -321,8 +322,8 @@ inline void CompactProtocolFieldWriter::field_int_list<int64_t>(int field,
 template <typename Enum>
 inline void CompactProtocolFieldWriter::field_int_list(int field, std::vector<Enum> const& val)
 {
-  put_field_header(field, current_field_value, ST_FLD_LIST);
-  put_byte(static_cast<uint8_t>((std::min(val.size(), 0xfUL) << 4) | ST_FLD_I32));
+  put_field_header(field, current_field_value, FieldType::LIST);
+  put_packed_type_byte(val.size(), FieldType::I32);
   if (val.size() >= 0xfUL) { put_uint(val.size()); }
   for (auto const& v : val) {
     put_int(static_cast<int32_t>(v));
@@ -333,7 +334,7 @@ inline void CompactProtocolFieldWriter::field_int_list(int field, std::vector<En
 template <typename T>
 inline void CompactProtocolFieldWriter::field_struct(int field, T const& val)
 {
-  put_field_header(field, current_field_value, ST_FLD_STRUCT);
+  put_field_header(field, current_field_value, FieldType::STRUCT);
   if constexpr (not std::is_empty_v<T>) {
     writer.write(val);  // write the struct if it's not empty
   } else {
@@ -344,7 +345,7 @@ inline void CompactProtocolFieldWriter::field_struct(int field, T const& val)
 
 inline void CompactProtocolFieldWriter::field_empty_struct(int field)
 {
-  put_field_header(field, current_field_value, ST_FLD_STRUCT);
+  put_field_header(field, current_field_value, FieldType::STRUCT);
   put_byte(0);  // add a stop field
   current_field_value = field;
 }
@@ -352,8 +353,8 @@ inline void CompactProtocolFieldWriter::field_empty_struct(int field)
 template <typename T>
 inline void CompactProtocolFieldWriter::field_struct_list(int field, std::vector<T> const& val)
 {
-  put_field_header(field, current_field_value, ST_FLD_LIST);
-  put_byte((uint8_t)((std::min(val.size(), (size_t)0xfu) << 4) | ST_FLD_STRUCT));
+  put_field_header(field, current_field_value, FieldType::LIST);
+  put_packed_type_byte(val.size(), FieldType::STRUCT);
   if (val.size() >= 0xf) put_uint(val.size());
   for (auto& v : val) {
     writer.write(v);
@@ -370,7 +371,7 @@ inline size_t CompactProtocolFieldWriter::value()
 inline void CompactProtocolFieldWriter::field_struct_blob(int field,
                                                           std::vector<uint8_t> const& val)
 {
-  put_field_header(field, current_field_value, ST_FLD_STRUCT);
+  put_field_header(field, current_field_value, FieldType::STRUCT);
   put_byte(val.data(), static_cast<uint32_t>(val.size()));
   put_byte(0);
   current_field_value = field;
@@ -378,7 +379,7 @@ inline void CompactProtocolFieldWriter::field_struct_blob(int field,
 
 inline void CompactProtocolFieldWriter::field_binary(int field, std::vector<uint8_t> const& val)
 {
-  put_field_header(field, current_field_value, ST_FLD_BINARY);
+  put_field_header(field, current_field_value, FieldType::BINARY);
   put_uint(val.size());
   put_byte(val.data(), static_cast<uint32_t>(val.size()));
   current_field_value = field;
@@ -386,7 +387,7 @@ inline void CompactProtocolFieldWriter::field_binary(int field, std::vector<uint
 
 inline void CompactProtocolFieldWriter::field_string(int field, std::string const& val)
 {
-  put_field_header(field, current_field_value, ST_FLD_BINARY);
+  put_field_header(field, current_field_value, FieldType::BINARY);
   put_uint(val.size());
   // FIXME : replace reinterpret_cast
   put_byte(reinterpret_cast<uint8_t const*>(val.data()), static_cast<uint32_t>(val.size()));
@@ -396,8 +397,8 @@ inline void CompactProtocolFieldWriter::field_string(int field, std::string cons
 inline void CompactProtocolFieldWriter::field_string_list(int field,
                                                           std::vector<std::string> const& val)
 {
-  put_field_header(field, current_field_value, ST_FLD_LIST);
-  put_byte((uint8_t)((std::min(val.size(), (size_t)0xfu) << 4) | ST_FLD_BINARY));
+  put_field_header(field, current_field_value, FieldType::LIST);
+  put_packed_type_byte(val.size(), FieldType::BINARY);
   if (val.size() >= 0xf) put_uint(val.size());
   for (auto& v : val) {
     put_uint(v.size());
diff --git a/cpp/src/io/parquet/compact_protocol_writer.hpp b/cpp/src/io/parquet/compact_protocol_writer.hpp
index a2ed0f1f4dc..2ed7c078f8b 100644
--- a/cpp/src/io/parquet/compact_protocol_writer.hpp
+++ b/cpp/src/io/parquet/compact_protocol_writer.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -78,7 +78,14 @@ class CompactProtocolFieldWriter {
 
   uint32_t put_int(int64_t v);
 
-  void put_field_header(int f, int cur, int t);
+  template <typename T>
+  void put_packed_type_byte(T high_bits, FieldType t)
+  {
+    uint8_t const clamped_high_bits = std::min(std::max(high_bits, T{0}), T{0xf});
+    put_byte((clamped_high_bits << 4) | static_cast<uint8_t>(t));
+  }
+
+  void put_field_header(int f, int cur, FieldType t);
 
   inline void field_bool(int field, bool b);
 
diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu
index 8e1c0682ffd..e16551024d1 100644
--- a/cpp/src/io/parquet/page_enc.cu
+++ b/cpp/src/io/parquet/page_enc.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -2207,13 +2207,14 @@ inline __device__ uint8_t* cpw_put_int64(uint8_t* p, int64_t v)
   return cpw_put_uint64(p, (v ^ -s) * 2 + s);
 }
 
-inline __device__ uint8_t* cpw_put_fldh(uint8_t* p, int f, int cur, int t)
+inline __device__ uint8_t* cpw_put_fldh(uint8_t* p, int f, int cur, FieldType t)
 {
+  auto const t_num = static_cast<uint8_t>(t);
   if (f > cur && f <= cur + 15) {
-    *p++ = ((f - cur) << 4) | t;
+    *p++ = ((f - cur) << 4) | t_num;
     return p;
   } else {
-    *p++ = t;
+    *p++ = t_num;
     return cpw_put_int32(p, f);
   }
 }
@@ -2231,7 +2232,7 @@ class header_encoder {
   inline __device__ void field_struct_begin(int field)
   {
     current_header_ptr =
-      cpw_put_fldh(current_header_ptr, field, current_field_index, ST_FLD_STRUCT);
+      cpw_put_fldh(current_header_ptr, field, current_field_index, FieldType::STRUCT);
     current_field_index = 0;
   }
 
@@ -2241,11 +2242,13 @@ class header_encoder {
     current_field_index   = field;
   }
 
-  inline __device__ void field_list_begin(int field, size_t len, int type)
+  inline __device__ void field_list_begin(int field, size_t len, FieldType type)
   {
-    current_header_ptr = cpw_put_fldh(current_header_ptr, field, current_field_index, ST_FLD_LIST);
+    current_header_ptr =
+      cpw_put_fldh(current_header_ptr, field, current_field_index, FieldType::LIST);
+    auto const t_num   = static_cast<uint8_t>(type);
     current_header_ptr = cpw_put_uint8(
-      current_header_ptr, static_cast<uint8_t>((std::min(len, size_t{0xfu}) << 4) | type));
+      current_header_ptr, static_cast<uint8_t>((std::min(len, size_t{0xfu}) << 4) | t_num));
     if (len >= 0xf) { current_header_ptr = cpw_put_uint32(current_header_ptr, len); }
     current_field_index = 0;
   }
@@ -2254,7 +2257,9 @@ class header_encoder {
 
   inline __device__ void put_bool(bool value)
   {
-    current_header_ptr = cpw_put_uint8(current_header_ptr, value ? ST_FLD_TRUE : ST_FLD_FALSE);
+    auto const type_byte =
+      static_cast<uint8_t>(value ? FieldType::BOOLEAN_TRUE : FieldType::BOOLEAN_FALSE);
+    current_header_ptr = cpw_put_uint8(current_header_ptr, type_byte);
   }
 
   inline __device__ void put_binary(void const* value, uint32_t length)
@@ -2272,15 +2277,18 @@ class header_encoder {
 
   inline __device__ void field_bool(int field, bool value)
   {
-    current_header_ptr = cpw_put_fldh(
-      current_header_ptr, field, current_field_index, value ? ST_FLD_TRUE : ST_FLD_FALSE);
+    current_header_ptr  = cpw_put_fldh(current_header_ptr,
+                                      field,
+                                      current_field_index,
+                                      value ? FieldType::BOOLEAN_TRUE : FieldType::BOOLEAN_FALSE);
     current_field_index = field;
   }
 
   template <typename T>
   inline __device__ void field_int32(int field, T value)
   {
-    current_header_ptr  = cpw_put_fldh(current_header_ptr, field, current_field_index, ST_FLD_I32);
+    current_header_ptr =
+      cpw_put_fldh(current_header_ptr, field, current_field_index, FieldType::I32);
     current_header_ptr  = cpw_put_int32(current_header_ptr, static_cast<int32_t>(value));
     current_field_index = field;
   }
@@ -2288,7 +2296,8 @@ class header_encoder {
   template <typename T>
   inline __device__ void field_int64(int field, T value)
   {
-    current_header_ptr  = cpw_put_fldh(current_header_ptr, field, current_field_index, ST_FLD_I64);
+    current_header_ptr =
+      cpw_put_fldh(current_header_ptr, field, current_field_index, FieldType::I64);
     current_header_ptr  = cpw_put_int64(current_header_ptr, static_cast<int64_t>(value));
     current_field_index = field;
   }
@@ -2296,7 +2305,7 @@ class header_encoder {
   inline __device__ void field_binary(int field, void const* value, uint32_t length)
   {
     current_header_ptr =
-      cpw_put_fldh(current_header_ptr, field, current_field_index, ST_FLD_BINARY);
+      cpw_put_fldh(current_header_ptr, field, current_field_index, FieldType::BINARY);
     current_header_ptr = cpw_put_uint32(current_header_ptr, length);
     memcpy(current_header_ptr, value, length);
     current_header_ptr += length;
@@ -2868,13 +2877,13 @@ __global__ void __launch_bounds__(1)
       : align8(ck_g->column_index_blob + ck_g->column_index_size - column_index_truncate_length);
 
   // null_pages
-  encoder.field_list_begin(1, num_data_pages, ST_FLD_TRUE);
+  encoder.field_list_begin(1, num_data_pages, FieldType::BOOLEAN_TRUE);
   for (uint32_t page = first_data_page; page < num_pages; page++) {
     encoder.put_bool(column_stats[pageidx + page].non_nulls == 0);
   }
   encoder.field_list_end(1);
   // min_values
-  encoder.field_list_begin(2, num_data_pages, ST_FLD_BINARY);
+  encoder.field_list_begin(2, num_data_pages, FieldType::BINARY);
   for (uint32_t page = first_data_page; page < num_pages; page++) {
     auto const [min_ptr, min_size] = get_extremum(&column_stats[pageidx + page].min_value,
                                                   col_g.stats_dtype,
@@ -2885,7 +2894,7 @@ __global__ void __launch_bounds__(1)
   }
   encoder.field_list_end(2);
   // max_values
-  encoder.field_list_begin(3, num_data_pages, ST_FLD_BINARY);
+  encoder.field_list_begin(3, num_data_pages, FieldType::BINARY);
   for (uint32_t page = first_data_page; page < num_pages; page++) {
     auto const [max_ptr, max_size] = get_extremum(&column_stats[pageidx + page].max_value,
                                                   col_g.stats_dtype,
@@ -2902,7 +2911,7 @@ __global__ void __launch_bounds__(1)
                                                col_g.converted_type,
                                                num_pages - first_data_page));
   // null_counts
-  encoder.field_list_begin(5, num_data_pages, ST_FLD_I64);
+  encoder.field_list_begin(5, num_data_pages, FieldType::I64);
   for (uint32_t page = first_data_page; page < num_pages; page++) {
     encoder.put_int64(column_stats[pageidx + page].null_count);
   }
@@ -2918,7 +2927,7 @@ __global__ void __launch_bounds__(1)
 
   // optionally encode histograms and sum var_bytes.
   if (cd->max_rep_level > REP_LVL_HIST_CUTOFF) {
-    encoder.field_list_begin(6, num_data_pages * (cd->max_rep_level + 1), ST_FLD_I64);
+    encoder.field_list_begin(6, num_data_pages * (cd->max_rep_level + 1), FieldType::I64);
     thrust::for_each(thrust::seq, page_start, page_end, [&] __device__(auto const& page) {
       for (int i = 0; i < cd->max_rep_level + 1; i++) {
         encoder.put_int64(page.rep_histogram[i]);
@@ -2929,7 +2938,7 @@ __global__ void __launch_bounds__(1)
   }
 
   if (cd->max_def_level > DEF_LVL_HIST_CUTOFF) {
-    encoder.field_list_begin(7, num_data_pages * (cd->max_def_level + 1), ST_FLD_I64);
+    encoder.field_list_begin(7, num_data_pages * (cd->max_def_level + 1), FieldType::I64);
     thrust::for_each(thrust::seq, page_start, page_end, [&] __device__(auto const& page) {
       for (int i = 0; i < cd->max_def_level + 1; i++) {
         encoder.put_int64(page.def_histogram[i]);
diff --git a/cpp/src/io/parquet/page_hdr.cu b/cpp/src/io/parquet/page_hdr.cu
index 36157f725e3..cc3f584422d 100644
--- a/cpp/src/io/parquet/page_hdr.cu
+++ b/cpp/src/io/parquet/page_hdr.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -114,28 +114,28 @@ __device__ void skip_struct_field(byte_stream_s* bs, int field_type)
       field_type = c & 0xf;
       if (!(c & 0xf0)) get_i32(bs);
     }
-    switch (field_type) {
-      case ST_FLD_TRUE:
-      case ST_FLD_FALSE: break;
-      case ST_FLD_I16:
-      case ST_FLD_I32:
-      case ST_FLD_I64: get_u32(bs); break;
-      case ST_FLD_BYTE: skip_bytes(bs, 1); break;
-      case ST_FLD_DOUBLE: skip_bytes(bs, 8); break;
-      case ST_FLD_BINARY: skip_bytes(bs, get_u32(bs)); break;
-      case ST_FLD_LIST:
-      case ST_FLD_SET: {  // NOTE: skipping a list of lists is not handled
+    switch (static_cast<FieldType>(field_type)) {
+      case FieldType::BOOLEAN_TRUE:
+      case FieldType::BOOLEAN_FALSE: break;
+      case FieldType::I16:
+      case FieldType::I32:
+      case FieldType::I64: get_u32(bs); break;
+      case FieldType::I8: skip_bytes(bs, 1); break;
+      case FieldType::DOUBLE: skip_bytes(bs, 8); break;
+      case FieldType::BINARY: skip_bytes(bs, get_u32(bs)); break;
+      case FieldType::LIST:
+      case FieldType::SET: {  // NOTE: skipping a list of lists is not handled
         auto const c = getb(bs);
         int n        = c >> 4;
         if (n == 0xf) { n = get_u32(bs); }
         field_type = c & 0xf;
-        if (field_type == ST_FLD_STRUCT) {
+        if (static_cast<FieldType>(field_type) == FieldType::STRUCT) {
           struct_depth += n;
         } else {
           rep_cnt = n;
         }
       } break;
-      case ST_FLD_STRUCT: struct_depth++; break;
+      case FieldType::STRUCT: struct_depth++; break;
     }
   } while (rep_cnt || struct_depth);
 }
@@ -180,7 +180,7 @@ struct ParquetFieldInt32 {
   inline __device__ bool operator()(byte_stream_s* bs, int field_type)
   {
     val = get_i32(bs);
-    return (field_type != ST_FLD_I32);
+    return (static_cast<FieldType>(field_type) != FieldType::I32);
   }
 };
 
@@ -199,7 +199,7 @@ struct ParquetFieldEnum {
   inline __device__ bool operator()(byte_stream_s* bs, int field_type)
   {
     val = static_cast<Enum>(get_i32(bs));
-    return (field_type != ST_FLD_I32);
+    return (static_cast<FieldType>(field_type) != FieldType::I32);
   }
 };
 
@@ -218,7 +218,7 @@ struct ParquetFieldStruct {
 
   inline __device__ bool operator()(byte_stream_s* bs, int field_type)
   {
-    return ((field_type != ST_FLD_STRUCT) || !op(bs));
+    return ((static_cast<FieldType>(field_type) != FieldType::STRUCT) || !op(bs));
   }
 };
 
diff --git a/cpp/src/io/parquet/parquet_common.hpp b/cpp/src/io/parquet/parquet_common.hpp
index 50736197eb9..a680e44f360 100644
--- a/cpp/src/io/parquet/parquet_common.hpp
+++ b/cpp/src/io/parquet/parquet_common.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -140,19 +140,20 @@ enum BoundaryOrder {
 /**
  * @brief Thrift compact protocol struct field types
  */
-enum FieldType {
-  ST_FLD_TRUE   = 1,
-  ST_FLD_FALSE  = 2,
-  ST_FLD_BYTE   = 3,
-  ST_FLD_I16    = 4,
-  ST_FLD_I32    = 5,
-  ST_FLD_I64    = 6,
-  ST_FLD_DOUBLE = 7,
-  ST_FLD_BINARY = 8,
-  ST_FLD_LIST   = 9,
-  ST_FLD_SET    = 10,
-  ST_FLD_MAP    = 11,
-  ST_FLD_STRUCT = 12,
+enum class FieldType : uint8_t {
+  BOOLEAN_TRUE  = 1,
+  BOOLEAN_FALSE = 2,
+  I8            = 3,
+  I16           = 4,
+  I32           = 5,
+  I64           = 6,
+  DOUBLE        = 7,
+  BINARY        = 8,
+  LIST          = 9,
+  SET           = 10,
+  MAP           = 11,
+  STRUCT        = 12,
+  UUID          = 13,
 };
 
 }  // namespace cudf::io::parquet::detail

From 8df33eed224bf33ad6013179459dfe41e5f26b2a Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Tue, 9 Jan 2024 18:47:38 -0600
Subject: [PATCH 03/23] Expand JIT groupby test suite (#13813)

This PR reorganizes and expands the test suite for groupby apply functions using the JIT engine to include nan cases and cases where the groups are larger than a single thread block.

Authors:
  - https://github.com/brandon-b-miller

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/13813
---
 python/cudf/cudf/tests/test_groupby.py | 371 ++++++++++++++++---------
 1 file changed, 238 insertions(+), 133 deletions(-)

diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py
index 57aa6e72eae..b46949faa06 100644
--- a/python/cudf/cudf/tests/test_groupby.py
+++ b/python/cudf/cudf/tests/test_groupby.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2023, NVIDIA CORPORATION.
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
 
 import collections
 import datetime
@@ -392,19 +392,70 @@ def emulate(df):
 
 
 @pytest.fixture(scope="module")
-def groupby_jit_data():
-    np.random.seed(0)
+def groupby_jit_data_small():
+    """
+    Return a small dataset for testing JIT Groupby Apply. The dataframe
+    contains 4 groups of size 1, 2, 3, 4 as well as an additional key
+    column that can be used to test subgroups within groups. This data
+    is useful for smoke testing basic numeric results
+    """
+    rng = np.random.default_rng(42)
     df = DataFrame()
-    nelem = 20
-    df["key1"] = np.random.randint(0, 3, nelem)
-    df["key2"] = np.random.randint(0, 2, nelem)
-    df["val1"] = np.random.random(nelem)
-    df["val2"] = np.random.random(nelem)
-    df["val3"] = np.random.randint(0, 10, nelem)
-    df["val4"] = np.random.randint(0, 10, nelem)
+    key1 = [1] + [2] * 2 + [3] * 3 + [4] * 4
+    key2 = [1, 2] * 5
+    df["key1"] = key1
+    df["key2"] = key2
+
+    df["val1"] = rng.integers(0, 10, len(key1))
+    df["val2"] = rng.integers(0, 10, len(key1))
+
+    # randomly permute data
+    df = df.sample(frac=1, ignore_index=True)
     return df
 
 
+@pytest.fixture(scope="module")
+def groupby_jit_data_large(groupby_jit_data_small):
+    """
+    Larger version of groupby_jit_data_small which contains enough data
+    to require more than one block per group. This data is useful for
+    testing if JIT GroupBy algorithms scale to larger dastasets without
+    manifesting numerical issues such as overflow.
+    """
+    max_tpb = 1024
+    factor = (
+        max_tpb + 1
+    )  # bigger than a block but not always an exact multiple
+    df = cudf.concat([groupby_jit_data_small] * factor)
+
+    return df
+
+
+@pytest.fixture(scope="module")
+def groupby_jit_data_nans(groupby_jit_data_small):
+    """
+    Returns a modified version of groupby_jit_data_small which contains
+    nan values.
+    """
+
+    df = groupby_jit_data_small.sort_values(["key1", "key2"])
+    df["val1"] = df["val1"].astype("float64")
+    df["val1"][::2] = np.nan
+    df = df.sample(frac=1, ignore_index=True)
+    return df
+
+
+@pytest.fixture(scope="module")
+def groupby_jit_datasets(
+    groupby_jit_data_small, groupby_jit_data_large, groupby_jit_data_nans
+):
+    return {
+        "small": groupby_jit_data_small,
+        "large": groupby_jit_data_large,
+        "nans": groupby_jit_data_nans,
+    }
+
+
 def run_groupby_apply_jit_test(data, func, keys, *args):
     expect_groupby_obj = data.to_pandas().groupby(keys)
     got_groupby_obj = data.groupby(keys)
@@ -415,6 +466,30 @@ def run_groupby_apply_jit_test(data, func, keys, *args):
     assert_groupby_results_equal(cudf_jit_result, pandas_result)
 
 
+def groupby_apply_jit_reductions_test_inner(func, data, dtype):
+    # ideally we'd just have:
+    # lambda group: getattr(group, func)()
+    # but the current kernel caching mechanism relies on pickle which
+    # does not play nice with local functions. What's below uses
+    # exec as a workaround to write the test functions dynamically
+
+    funcstr = textwrap.dedent(
+        f"""
+        def func(df):
+            return df['val1'].{func}()
+        """
+    )
+    lcl = {}
+    exec(funcstr, lcl)
+    func = lcl["func"]
+
+    data["val1"] = data["val1"].astype(dtype)
+    data["val2"] = data["val2"].astype(dtype)
+
+    run_groupby_apply_jit_test(data, func, ["key1"])
+
+
+# test unary reductions
 @pytest.mark.parametrize(
     "dtype",
     SUPPORTED_GROUPBY_NUMPY_TYPES,
@@ -423,13 +498,41 @@ def run_groupby_apply_jit_test(data, func, keys, *args):
 @pytest.mark.parametrize(
     "func", ["min", "max", "sum", "mean", "var", "std", "idxmin", "idxmax"]
 )
-def test_groupby_apply_jit_reductions(func, groupby_jit_data, dtype):
-    # ideally we'd just have:
-    # lambda group: getattr(group, func)()
-    # but the current kernel caching mechanism relies on pickle which
-    # does not play nice with local functions. What's below uses
-    # exec as a workaround to write the test functions dynamically
+@pytest.mark.parametrize("dataset", ["small", "large", "nans"])
+def test_groupby_apply_jit_unary_reductions(
+    func, dtype, dataset, groupby_jit_datasets
+):
+    dataset = groupby_jit_datasets[dataset]
+
+    groupby_apply_jit_reductions_test_inner(func, dataset, dtype)
+
+
+# test unary reductions for special values
+def groupby_apply_jit_reductions_special_vals_inner(
+    func, data, dtype, special_val
+):
+    funcstr = textwrap.dedent(
+        f"""
+        def func(df):
+            return df['val1'].{func}()
+        """
+    )
+    lcl = {}
+    exec(funcstr, lcl)
+    func = lcl["func"]
 
+    data["val1"] = data["val1"].astype(dtype)
+    data["val2"] = data["val2"].astype(dtype)
+    data["val1"] = special_val
+    data["val2"] = special_val
+
+    run_groupby_apply_jit_test(data, func, ["key1"])
+
+
+# test unary index reductions for special values
+def groupby_apply_jit_idx_reductions_special_vals_inner(
+    func, data, dtype, special_val
+):
     funcstr = textwrap.dedent(
         f"""
         def func(df):
@@ -440,36 +543,129 @@ def func(df):
     exec(funcstr, lcl)
     func = lcl["func"]
 
-    groupby_jit_data["val1"] = groupby_jit_data["val1"].astype(dtype)
-    groupby_jit_data["val2"] = groupby_jit_data["val2"].astype(dtype)
+    data["val1"] = data["val1"].astype(dtype)
+    data["val2"] = data["val2"].astype(dtype)
+    data["val1"] = special_val
+    data["val2"] = special_val
 
-    run_groupby_apply_jit_test(groupby_jit_data, func, ["key1"])
+    run_groupby_apply_jit_test(data, func, ["key1"])
 
 
-@pytest.mark.parametrize("dtype", SUPPORTED_GROUPBY_NUMPY_TYPES)
-def test_groupby_apply_jit_correlation(groupby_jit_data, dtype):
+@pytest.mark.parametrize("dtype", ["float64", "float32"])
+@pytest.mark.parametrize("func", ["min", "max", "sum", "mean", "var", "std"])
+@pytest.mark.parametrize("special_val", [np.nan, np.inf, -np.inf])
+@pytest.mark.parametrize("dataset", ["small", "large", "nans"])
+def test_groupby_apply_jit_reductions_special_vals(
+    func, dtype, dataset, groupby_jit_datasets, special_val
+):
+    dataset = groupby_jit_datasets[dataset]
+    groupby_apply_jit_reductions_special_vals_inner(
+        func, dataset, dtype, special_val
+    )
 
-    groupby_jit_data["val3"] = groupby_jit_data["val3"].astype(dtype)
-    groupby_jit_data["val4"] = groupby_jit_data["val4"].astype(dtype)
 
-    keys = ["key1", "key2"]
+@pytest.mark.parametrize("dtype", ["float64"])
+@pytest.mark.parametrize("func", ["idxmax", "idxmin"])
+@pytest.mark.parametrize(
+    "special_val",
+    [
+        pytest.param(
+            np.nan,
+            marks=pytest.mark.xfail(
+                reason="https://github.com/rapidsai/cudf/issues/13832"
+            ),
+        ),
+        np.inf,
+        -np.inf,
+    ],
+)
+@pytest.mark.parametrize("dataset", ["small", "large", "nans"])
+def test_groupby_apply_jit_idx_reductions_special_vals(
+    func, dtype, dataset, groupby_jit_datasets, special_val
+):
+    dataset = groupby_jit_datasets[dataset]
+    groupby_apply_jit_idx_reductions_special_vals_inner(
+        func, dataset, dtype, special_val
+    )
+
+
+@pytest.mark.parametrize("dtype", ["int32"])
+def test_groupby_apply_jit_sum_integer_overflow(dtype):
+    max = np.iinfo(dtype).max
+
+    data = DataFrame(
+        {
+            "a": [0, 0, 0],
+            "b": [max, max, max],
+        }
+    )
 
     def func(group):
-        return group["val3"].corr(group["val4"])
+        return group["b"].sum()
 
-    if dtype.kind == "f":
+    run_groupby_apply_jit_test(data, func, ["a"])
+
+
+@pytest.mark.parametrize("dtype", ["int32", "int64", "float32", "float64"])
+@pytest.mark.parametrize(
+    "dataset",
+    [
+        pytest.param(
+            "small",
+            marks=[
+                pytest.mark.filterwarnings(
+                    "ignore:Degrees of Freedom <= 0 for slice"
+                ),
+                pytest.mark.filterwarnings(
+                    "ignore:divide by zero encountered in divide"
+                ),
+            ],
+        ),
+        "large",
+    ],
+)
+def test_groupby_apply_jit_correlation(dataset, groupby_jit_datasets, dtype):
+
+    dataset = groupby_jit_datasets[dataset]
+
+    dataset["val1"] = dataset["val1"].astype(dtype)
+    dataset["val2"] = dataset["val2"].astype(dtype)
+
+    keys = ["key1"]
+
+    def func(group):
+        return group["val1"].corr(group["val2"])
+
+    if np.dtype(dtype).kind == "f":
+        # Correlation of floating types is not yet supported:
+        # https://github.com/rapidsai/cudf/issues/13839
         m = (
             f"Series.corr\\(Series\\) is not "
             f"supported for \\({dtype}, {dtype}\\)"
         )
         with pytest.raises(UDFError, match=m):
-            run_groupby_apply_jit_test(groupby_jit_data, func, keys)
+            run_groupby_apply_jit_test(dataset, func, keys)
         return
-    run_groupby_apply_jit_test(groupby_jit_data, func, keys)
+    run_groupby_apply_jit_test(dataset, func, keys)
+
+
+@pytest.mark.parametrize("dtype", ["int32", "int64"])
+def test_groupby_apply_jit_correlation_zero_variance(dtype):
+    # pearson correlation is undefined when the variance of either
+    # variable is zero. This test ensures that the jit implementation
+    # returns the same result as pandas in this case.
+    data = DataFrame(
+        {"a": [0, 0, 0, 0, 0], "b": [1, 1, 1, 1, 1], "c": [2, 2, 2, 2, 2]}
+    )
+
+    def func(group):
+        return group["b"].corr(group["c"])
+
+    run_groupby_apply_jit_test(data, func, ["a"])
 
 
 @pytest.mark.parametrize("op", unary_ops)
-def test_groupby_apply_jit_invalid_unary_ops_error(groupby_jit_data, op):
+def test_groupby_apply_jit_invalid_unary_ops_error(groupby_jit_data_small, op):
     keys = ["key1"]
 
     def func(group):
@@ -479,11 +675,13 @@ def func(group):
         UDFError,
         match=f"{op.__name__}\\(Series\\) is not supported by JIT GroupBy",
     ):
-        run_groupby_apply_jit_test(groupby_jit_data, func, keys)
+        run_groupby_apply_jit_test(groupby_jit_data_small, func, keys)
 
 
 @pytest.mark.parametrize("op", arith_ops + comparison_ops)
-def test_groupby_apply_jit_invalid_binary_ops_error(groupby_jit_data, op):
+def test_groupby_apply_jit_invalid_binary_ops_error(
+    groupby_jit_data_small, op
+):
     keys = ["key1"]
 
     def func(group):
@@ -493,10 +691,10 @@ def func(group):
         UDFError,
         match=f"{op.__name__}\\(Series, Series\\) is not supported",
     ):
-        run_groupby_apply_jit_test(groupby_jit_data, func, keys)
+        run_groupby_apply_jit_test(groupby_jit_data_small, func, keys)
 
 
-def test_groupby_apply_jit_no_df_ops(groupby_jit_data):
+def test_groupby_apply_jit_no_df_ops(groupby_jit_data_small):
     # DataFrame level operations are not yet supported.
     def func(group):
         return group.sum()
@@ -505,7 +703,7 @@ def func(group):
         UDFError,
         match="JIT GroupBy.apply\\(\\) does not support DataFrame.sum\\(\\)",
     ):
-        run_groupby_apply_jit_test(groupby_jit_data, func, ["key1"])
+        run_groupby_apply_jit_test(groupby_jit_data_small, func, ["key1"])
 
 
 @pytest.mark.parametrize("dtype", ["uint8", "str"])
@@ -529,101 +727,6 @@ def func(group):
         run_groupby_apply_jit_test(df, func, ["a"])
 
 
-@pytest.mark.parametrize("dtype", ["int32", "int64"])
-def test_groupby_apply_jit_correlation_zero_variance(dtype):
-    # pearson correlation is undefined when the variance of either
-    # variable is zero. This test ensures that the jit implementation
-    # returns the same result as pandas in this case.
-    data = DataFrame(
-        {"a": [0, 0, 0, 0, 0], "b": [1, 1, 1, 1, 1], "c": [2, 2, 2, 2, 2]}
-    )
-
-    def func(group):
-        return group["b"].corr(group["c"])
-
-    run_groupby_apply_jit_test(data, func, ["a"])
-
-
-@pytest.mark.parametrize("dtype", ["int32"])
-def test_groupby_apply_jit_sum_integer_overflow(dtype):
-    max = np.iinfo(dtype).max
-
-    data = DataFrame(
-        {
-            "a": [0, 0, 0],
-            "b": [max, max, max],
-        }
-    )
-
-    def func(group):
-        return group["b"].sum()
-
-    run_groupby_apply_jit_test(data, func, ["a"])
-
-
-@pytest.mark.parametrize("dtype", ["float64"])
-@pytest.mark.parametrize("func", ["min", "max", "sum", "mean", "var", "std"])
-@pytest.mark.parametrize("special_val", [np.nan, np.inf, -np.inf])
-def test_groupby_apply_jit_reductions_special_vals(
-    func, groupby_jit_data, dtype, special_val
-):
-    # dynamically generate to avoid pickling error.
-    # see test_groupby_apply_jit_reductions for details.
-    funcstr = textwrap.dedent(
-        f"""
-            def func(df):
-                return df['val1'].{func}()
-        """
-    )
-    lcl = {}
-    exec(funcstr, lcl)
-    func = lcl["func"]
-
-    groupby_jit_data["val1"] = special_val
-    groupby_jit_data["val1"] = groupby_jit_data["val1"].astype(dtype)
-
-    run_groupby_apply_jit_test(groupby_jit_data, func, ["key1"])
-
-
-@pytest.mark.parametrize("dtype", ["float64"])
-@pytest.mark.parametrize("func", ["idxmax", "idxmin"])
-@pytest.mark.parametrize(
-    "special_val",
-    [
-        pytest.param(
-            np.nan,
-            marks=pytest.mark.xfail(
-                reason="https://github.com/rapidsai/cudf/issues/13832"
-            ),
-        ),
-        np.inf,
-        -np.inf,
-    ],
-)
-def test_groupby_apply_jit_idx_reductions_special_vals(
-    func, groupby_jit_data, dtype, special_val
-):
-    # dynamically generate to avoid pickling error.
-    # see test_groupby_apply_jit_reductions for details.
-    funcstr = textwrap.dedent(
-        f"""
-            def func(df):
-                return df['val1'].{func}()
-        """
-    )
-    lcl = {}
-    exec(funcstr, lcl)
-    func = lcl["func"]
-
-    groupby_jit_data["val1"] = special_val
-    groupby_jit_data["val1"] = groupby_jit_data["val1"].astype(dtype)
-
-    expect = groupby_jit_data.to_pandas().groupby("key1").apply(func)
-    got = groupby_jit_data.groupby("key1").apply(func, engine="jit")
-
-    assert_eq(expect, got, check_dtype=False)
-
-
 @pytest.mark.parametrize(
     "func",
     [
@@ -632,8 +735,8 @@ def func(df):
         lambda df: df["val1"].mean() + df["val2"].std(),
     ],
 )
-def test_groupby_apply_jit_basic(func, groupby_jit_data):
-    run_groupby_apply_jit_test(groupby_jit_data, func, ["key1", "key2"])
+def test_groupby_apply_jit_basic(func, groupby_jit_data_small):
+    run_groupby_apply_jit_test(groupby_jit_data_small, func, ["key1", "key2"])
 
 
 def create_test_groupby_apply_jit_args_params():
@@ -652,8 +755,10 @@ def f3(df, k, L, m):
 @pytest.mark.parametrize(
     "func,args", create_test_groupby_apply_jit_args_params()
 )
-def test_groupby_apply_jit_args(func, args, groupby_jit_data):
-    run_groupby_apply_jit_test(groupby_jit_data, func, ["key1", "key2"], *args)
+def test_groupby_apply_jit_args(func, args, groupby_jit_data_small):
+    run_groupby_apply_jit_test(
+        groupby_jit_data_small, func, ["key1", "key2"], *args
+    )
 
 
 def test_groupby_apply_jit_block_divergence():

From 6a23775db29dc4b38820994297c94201c9287aaf Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 9 Jan 2024 15:28:34 -1000
Subject: [PATCH 04/23] Ensure column.fillna signatures are consistent (#14724)

Aligns the definitions of `Columns.fillna` among all subclasses. `dtype` looks to only needed in certain instances to cast the fill value so can do that separately. A `fill_nan` can be avoided with its single usage in a `can_cast` routine by checking for nan first

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/14724
---
 python/cudf/cudf/core/column/categorical.py |  9 ++++----
 python/cudf/cudf/core/column/column.py      |  9 ++++----
 python/cudf/cudf/core/column/datetime.py    |  5 +++--
 python/cudf/cudf/core/column/decimal.py     | 22 +++++++++----------
 python/cudf/cudf/core/column/numerical.py   | 19 ++++++++--------
 python/cudf/cudf/core/column/string.py      | 12 +++++------
 python/cudf/cudf/core/column/timedelta.py   | 24 ++++++++++-----------
 python/cudf/cudf/tests/test_timedelta.py    |  3 +--
 8 files changed, 50 insertions(+), 53 deletions(-)

diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index 7036a9ee870..c7e7cf2bf7e 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2023, NVIDIA CORPORATION.
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -1236,9 +1236,8 @@ def notnull(self) -> ColumnBase:
     def fillna(
         self,
         fill_value: Any = None,
-        method: Any = None,
-        dtype: Optional[Dtype] = None,
-    ) -> CategoricalColumn:
+        method: Optional[str] = None,
+    ) -> Self:
         """
         Fill null values with *fill_value*
         """
@@ -1276,7 +1275,7 @@ def fillna(
                     self.codes.dtype
                 )
 
-        return super().fillna(value=fill_value, method=method)
+        return super().fillna(fill_value, method=method)
 
     def indices_of(
         self, value: ScalarLike
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 296fd6a41b0..440ac855691 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2023, NVIDIA CORPORATION.
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -710,16 +710,15 @@ def _check_scatter_key_length(
 
     def fillna(
         self,
-        value: Any = None,
+        fill_value: Any = None,
         method: Optional[str] = None,
-        dtype: Optional[Dtype] = None,
     ) -> Self:
         """Fill null values with ``value``.
 
         Returns a copy with null filled.
         """
         return libcudf.replace.replace_nulls(
-            input_col=self, replacement=value, method=method, dtype=dtype
+            input_col=self, replacement=fill_value, method=method
         )._with_type_metadata(self.dtype)
 
     def isnull(self) -> ColumnBase:
@@ -929,7 +928,7 @@ def _obtain_isin_result(self, rhs: ColumnBase) -> ColumnBase:
             # https://github.com/rapidsai/cudf/issues/14515 by
             # providing a mode in which cudf::contains does not mask
             # the result.
-            result = result.fillna(rhs.null_count > 0, dtype=bool)
+            result = result.fillna(cudf.Scalar(rhs.null_count > 0))
         return result
 
     def as_mask(self) -> Buffer:
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 466ea3220c8..5aa75365389 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -11,6 +11,7 @@
 import numpy as np
 import pandas as pd
 import pyarrow as pa
+from typing_extensions import Self
 
 import cudf
 from cudf import _lib as libcudf
@@ -598,12 +599,12 @@ def fillna(
         self,
         fill_value: Any = None,
         method: Optional[str] = None,
-        dtype: Optional[Dtype] = None,
-    ) -> DatetimeColumn:
+    ) -> Self:
         if fill_value is not None:
             if cudf.utils.utils._isnat(fill_value):
                 return self.copy(deep=True)
             if is_scalar(fill_value):
+                # TODO: Add cast checking like TimedeltaColumn.fillna
                 if not isinstance(fill_value, cudf.Scalar):
                     fill_value = cudf.Scalar(fill_value, dtype=self.dtype)
             else:
diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py
index 02e03f92745..299875f0091 100644
--- a/python/cudf/cudf/core/column/decimal.py
+++ b/python/cudf/cudf/core/column/decimal.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2023, NVIDIA CORPORATION.
+# Copyright (c) 2021-2024, NVIDIA CORPORATION.
 
 import warnings
 from decimal import Decimal
@@ -7,6 +7,7 @@
 import cupy as cp
 import numpy as np
 import pyarrow as pa
+from typing_extensions import Self
 
 import cudf
 from cudf import _lib as libcudf
@@ -125,29 +126,28 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str):
 
     def fillna(
         self,
-        value: Any = None,
+        fill_value: Any = None,
         method: Optional[str] = None,
-        dtype: Optional[Dtype] = None,
-    ):
+    ) -> Self:
         """Fill null values with ``value``.
 
         Returns a copy with null filled.
         """
-        if isinstance(value, (int, Decimal)):
-            value = cudf.Scalar(value, dtype=self.dtype)
+        if isinstance(fill_value, (int, Decimal)):
+            fill_value = cudf.Scalar(fill_value, dtype=self.dtype)
         elif (
-            isinstance(value, DecimalBaseColumn)
-            or isinstance(value, cudf.core.column.NumericalColumn)
-            and is_integer_dtype(value.dtype)
+            isinstance(fill_value, DecimalBaseColumn)
+            or isinstance(fill_value, cudf.core.column.NumericalColumn)
+            and is_integer_dtype(fill_value.dtype)
         ):
-            value = value.astype(self.dtype)
+            fill_value = fill_value.astype(self.dtype)
         else:
             raise TypeError(
                 "Decimal columns only support using fillna with decimal and "
                 "integer values"
             )
 
-        return super().fillna(value=value, method=method)
+        return super().fillna(fill_value, method=method)
 
     def normalize_binop_value(self, other):
         if isinstance(other, ColumnBase):
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index f40886bf153..e848c86897f 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2023, NVIDIA CORPORATION.
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -16,6 +16,7 @@
 import cupy as cp
 import numpy as np
 import pandas as pd
+from typing_extensions import Self
 
 import cudf
 from cudf import _lib as libcudf
@@ -291,7 +292,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
 
         return libcudf.binaryop.binaryop(lhs, rhs, op, out_dtype)
 
-    def nans_to_nulls(self: NumericalColumn) -> NumericalColumn:
+    def nans_to_nulls(self: Self) -> Self:
         # Only floats can contain nan.
         if self.dtype.kind != "f" or self.nan_count == 0:
             return self
@@ -533,13 +534,11 @@ def fillna(
         self,
         fill_value: Any = None,
         method: Optional[str] = None,
-        dtype: Optional[Dtype] = None,
-        fill_nan: bool = True,
-    ) -> NumericalColumn:
+    ) -> Self:
         """
         Fill null values with *fill_value*
         """
-        col = self.nans_to_nulls() if fill_nan else self
+        col = self.nans_to_nulls()
 
         if col.null_count == 0:
             return col
@@ -574,8 +573,8 @@ def fillna(
                     if not (new_fill_value == fill_value).all():
                         raise TypeError(
                             f"Cannot safely cast non-equivalent "
-                            f"{col.dtype.type.__name__} to "
-                            f"{cudf.dtype(dtype).type.__name__}"
+                            f"{fill_value.dtype.type.__name__} to "
+                            f"{col.dtype.type.__name__}"
                         )
                     fill_value = new_fill_value
             else:
@@ -652,12 +651,14 @@ def can_cast_safely(self, to_dtype: DtypeObj) -> bool:
 
         # want to cast float to int:
         elif self.dtype.kind == "f" and to_dtype.kind in {"i", "u"}:
+            if self.nan_count > 0:
+                return False
             iinfo = np.iinfo(to_dtype)
             min_, max_ = iinfo.min, iinfo.max
 
             # best we can do is hope to catch it here and avoid compare
             if (self.min() >= min_) and (self.max() <= max_):
-                filled = self.fillna(0, fill_nan=False)
+                filled = self.fillna(0)
                 return (cudf.Series(filled) % 1 == 0).all()
             else:
                 return False
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 7bf81f3e2d3..06b5ac31ca6 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -21,6 +21,7 @@
 import pandas as pd
 import pyarrow as pa
 from numba import cuda
+from typing_extensions import Self
 
 import cudf
 import cudf.api.types
@@ -5824,17 +5825,16 @@ def fillna(
         self,
         fill_value: Any = None,
         method: Optional[str] = None,
-        dtype: Optional[Dtype] = None,
-    ) -> StringColumn:
+    ) -> Self:
         if fill_value is not None:
             if not is_scalar(fill_value):
                 fill_value = column.as_column(fill_value, dtype=self.dtype)
             elif cudf._lib.scalar._is_null_host_scalar(fill_value):
                 # Trying to fill <NA> with <NA> value? Return copy.
                 return self.copy(deep=True)
-            return super().fillna(value=fill_value, dtype="object")
-        else:
-            return super().fillna(method=method)
+            else:
+                fill_value = cudf.Scalar(fill_value, dtype=self.dtype)
+        return super().fillna(fill_value, method=method)
 
     def normalize_binop_value(
         self, other
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index 572b3b894dc..d664b0f18df 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -8,6 +8,7 @@
 import numpy as np
 import pandas as pd
 import pyarrow as pa
+from typing_extensions import Self
 
 import cudf
 from cudf import _lib as libcudf
@@ -281,24 +282,21 @@ def fillna(
         self,
         fill_value: Any = None,
         method: Optional[str] = None,
-        dtype: Optional[Dtype] = None,
-    ) -> TimeDeltaColumn:
+    ) -> Self:
         if fill_value is not None:
             if cudf.utils.utils._isnat(fill_value):
                 return self.copy(deep=True)
-            col: ColumnBase = self
             if is_scalar(fill_value):
-                if isinstance(fill_value, np.timedelta64):
-                    dtype = determine_out_dtype(self.dtype, fill_value.dtype)
-                    fill_value = fill_value.astype(dtype)
-                    col = col.astype(dtype)
-                if not isinstance(fill_value, cudf.Scalar):
-                    fill_value = cudf.Scalar(fill_value, dtype=dtype)
+                fill_value = cudf.Scalar(fill_value)
+                dtype = determine_out_dtype(self.dtype, fill_value.dtype)
+                fill_value = fill_value.astype(dtype)
+                if self.dtype != dtype:
+                    return cast(
+                        Self, self.astype(dtype).fillna(fill_value, method)
+                    )
             else:
                 fill_value = column.as_column(fill_value, nan_as_null=False)
-            return cast(TimeDeltaColumn, ColumnBase.fillna(col, fill_value))
-        else:
-            return super().fillna(method=method)
+        return super().fillna(fill_value, method)
 
     def as_numerical_column(
         self, dtype: Dtype, **kwargs
diff --git a/python/cudf/cudf/tests/test_timedelta.py b/python/cudf/cudf/tests/test_timedelta.py
index 139ce1c4ca3..d86612d3143 100644
--- a/python/cudf/cudf/tests/test_timedelta.py
+++ b/python/cudf/cudf/tests/test_timedelta.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 import datetime
 import operator
@@ -1024,7 +1024,6 @@ def local_assert(expected, actual):
     [
         np.timedelta64(4, "s"),
         np.timedelta64(456, "D"),
-        np.timedelta64(46, "h"),
         np.timedelta64("nat"),
         np.timedelta64(1, "s"),
         np.timedelta64(1, "ms"),

From fa37e13db360e0b685bc6af020aa7510f1fbbdbd Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 10 Jan 2024 08:18:15 -1000
Subject: [PATCH 05/23] Replace as_numerical with as_numerical_column/codes
 (#14719)

* For datetime/timedelta, replaced with `as_numerical_column("int64")`
* For categorical, replaced with `codes`

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Michael Wang (https://github.com/isVoid)

URL: https://github.com/rapidsai/cudf/pull/14719
---
 python/cudf/cudf/core/column/categorical.py | 25 ++++---------
 python/cudf/cudf/core/column/datetime.py    | 41 +++++++++------------
 python/cudf/cudf/core/column/timedelta.py   | 39 +++++++++-----------
 python/cudf/cudf/core/dataframe.py          |  8 ++--
 4 files changed, 46 insertions(+), 67 deletions(-)

diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index c7e7cf2bf7e..f3f2be0cc45 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -754,7 +754,7 @@ def __contains__(self, item: ScalarLike) -> bool:
             self._encode(item)
         except ValueError:
             return False
-        return self._encode(item) in self.as_numerical
+        return self._encode(item) in self.codes
 
     def set_base_data(self, value):
         if value is not None:
@@ -799,15 +799,6 @@ def children(self) -> Tuple[NumericalColumn]:
             self._children = (codes_column,)
         return self._children
 
-    @property
-    def as_numerical(self) -> NumericalColumn:
-        return cast(
-            cudf.core.column.NumericalColumn,
-            column.build_column(
-                data=self.codes.data, dtype=self.codes.dtype, mask=self.mask
-            ),
-        )
-
     @property
     def categories(self) -> ColumnBase:
         return self.dtype.categories._values
@@ -925,7 +916,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
                 "The only binary operations supported by unordered "
                 "categorical columns are equality and inequality."
             )
-        return self.as_numerical._binaryop(other.as_numerical, op)
+        return self.codes._binaryop(other.codes, op)
 
     def normalize_binop_value(self, other: ScalarLike) -> CategoricalColumn:
         if isinstance(other, column.ColumnBase):
@@ -950,7 +941,7 @@ def normalize_binop_value(self, other: ScalarLike) -> CategoricalColumn:
     def sort_values(
         self, ascending: bool = True, na_position="last"
     ) -> CategoricalColumn:
-        codes = self.as_numerical.sort_values(ascending, na_position)
+        codes = self.codes.sort_values(ascending, na_position)
         col = column.build_categorical_column(
             categories=self.dtype.categories._values,
             codes=column.build_column(codes.base_data, dtype=codes.dtype),
@@ -961,7 +952,7 @@ def sort_values(
         return col
 
     def element_indexing(self, index: int) -> ScalarLike:
-        val = self.as_numerical.element_indexing(index)
+        val = self.codes.element_indexing(index)
         return self._decode(int(val)) if val is not None else val
 
     @property
@@ -1053,7 +1044,7 @@ def data_array_view(
         return self.codes.data_array_view(mode=mode)
 
     def unique(self) -> CategoricalColumn:
-        codes = self.as_numerical.unique()
+        codes = self.codes.unique()
         return column.build_categorical_column(
             categories=self.categories,
             codes=column.build_column(codes.base_data, dtype=codes.dtype),
@@ -1280,15 +1271,15 @@ def fillna(
     def indices_of(
         self, value: ScalarLike
     ) -> cudf.core.column.NumericalColumn:
-        return self.as_numerical.indices_of(self._encode(value))
+        return self.codes.indices_of(self._encode(value))
 
     @property
     def is_monotonic_increasing(self) -> bool:
-        return bool(self.ordered) and self.as_numerical.is_monotonic_increasing
+        return bool(self.ordered) and self.codes.is_monotonic_increasing
 
     @property
     def is_monotonic_decreasing(self) -> bool:
-        return bool(self.ordered) and self.as_numerical.is_monotonic_decreasing
+        return bool(self.ordered) and self.codes.is_monotonic_decreasing
 
     def as_categorical_column(self, dtype: Dtype) -> CategoricalColumn:
         if isinstance(dtype, str) and dtype == "category":
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 5aa75365389..2b44b46bb9e 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -268,7 +268,9 @@ def __contains__(self, item: ScalarLike) -> bool:
             # np.datetime64 raises ValueError, hence `item`
             # cannot exist in `self`.
             return False
-        return item_as_dt64.astype("int64") in self.as_numerical
+        return item_as_dt64.astype("int64") in self.as_numerical_column(
+            "int64"
+        )
 
     @property
     def time_unit(self) -> str:
@@ -397,19 +399,6 @@ def normalize_binop_value(self, other: DatetimeLikeScalar) -> ScalarLike:
 
         return NotImplemented
 
-    @property
-    def as_numerical(self) -> "cudf.core.column.NumericalColumn":
-        return cast(
-            "cudf.core.column.NumericalColumn",
-            column.build_column(
-                data=self.base_data,
-                dtype=np.int64,
-                mask=self.base_mask,
-                offset=self.offset,
-                size=self.size,
-            ),
-        )
-
     @property
     def __cuda_array_interface__(self) -> Mapping[str, Any]:
         output = {
@@ -449,9 +438,14 @@ def as_timedelta_column(
     def as_numerical_column(
         self, dtype: Dtype, **kwargs
     ) -> "cudf.core.column.NumericalColumn":
-        return cast(
-            "cudf.core.column.NumericalColumn", self.as_numerical.astype(dtype)
+        col = column.build_column(
+            data=self.base_data,
+            dtype=np.int64,
+            mask=self.base_mask,
+            offset=self.offset,
+            size=self.size,
         )
+        return cast("cudf.core.column.NumericalColumn", col.astype(dtype))
 
     def as_string_column(
         self, dtype: Dtype, format=None, **kwargs
@@ -484,7 +478,7 @@ def mean(
         self, skipna=None, min_count: int = 0, dtype=np.float64
     ) -> ScalarLike:
         return pd.Timestamp(
-            self.as_numerical.mean(
+            self.as_numerical_column("int64").mean(
                 skipna=skipna, min_count=min_count, dtype=dtype
             ),
             unit=self.time_unit,
@@ -498,7 +492,7 @@ def std(
         ddof: int = 1,
     ) -> pd.Timedelta:
         return pd.Timedelta(
-            self.as_numerical.std(
+            self.as_numerical_column("int64").std(
                 skipna=skipna, min_count=min_count, dtype=dtype, ddof=ddof
             )
             * _unit_to_nanoseconds_conversion[self.time_unit],
@@ -506,7 +500,8 @@ def std(
 
     def median(self, skipna: Optional[bool] = None) -> pd.Timestamp:
         return pd.Timestamp(
-            self.as_numerical.median(skipna=skipna), unit=self.time_unit
+            self.as_numerical_column("int64").median(skipna=skipna),
+            unit=self.time_unit,
         )
 
     def quantile(
@@ -516,7 +511,7 @@ def quantile(
         exact: bool,
         return_scalar: bool,
     ) -> ColumnBase:
-        result = self.as_numerical.quantile(
+        result = self.as_numerical_column("int64").quantile(
             q=q,
             interpolation=interpolation,
             exact=exact,
@@ -617,12 +612,12 @@ def indices_of(
     ) -> cudf.core.column.NumericalColumn:
         value = column.as_column(
             pd.to_datetime(value), dtype=self.dtype
-        ).as_numerical
-        return self.as_numerical.indices_of(value)
+        ).as_numerical_column("int64")
+        return self.as_numerical_column("int64").indices_of(value)
 
     @property
     def is_unique(self) -> bool:
-        return self.as_numerical.is_unique
+        return self.as_numerical_column("int64").is_unique
 
     def isin(self, values: Sequence) -> ColumnBase:
         return cudf.core.tools.datetimes._isin_datetimelike(self, values)
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index d664b0f18df..2f842130f48 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -115,7 +115,7 @@ def __contains__(self, item: DatetimeLikeScalar) -> bool:
             # np.timedelta64 raises ValueError, hence `item`
             # cannot exist in `self`.
             return False
-        return item.view("int64") in self.as_numerical
+        return item.view("int64") in self.as_numerical_column("int64")
 
     @property
     def values(self):
@@ -134,7 +134,9 @@ def to_arrow(self) -> pa.Array:
                 self.mask_array_view(mode="read").copy_to_host()
             )
         data = pa.py_buffer(
-            self.as_numerical.data_array_view(mode="read").copy_to_host()
+            self.as_numerical_column("int64")
+            .data_array_view(mode="read")
+            .copy_to_host()
         )
         pa_dtype = np_to_pa_dtype(self.dtype)
         return pa.Array.from_buffers(
@@ -261,19 +263,6 @@ def normalize_binop_value(self, other) -> ColumnBinaryOperand:
             return cudf.Scalar(other)
         return NotImplemented
 
-    @property
-    def as_numerical(self) -> "cudf.core.column.NumericalColumn":
-        return cast(
-            "cudf.core.column.NumericalColumn",
-            column.build_column(
-                data=self.base_data,
-                dtype=np.int64,
-                mask=self.base_mask,
-                offset=self.offset,
-                size=self.size,
-            ),
-        )
-
     @property
     def time_unit(self) -> str:
         return self._time_unit
@@ -301,9 +290,14 @@ def fillna(
     def as_numerical_column(
         self, dtype: Dtype, **kwargs
     ) -> "cudf.core.column.NumericalColumn":
-        return cast(
-            "cudf.core.column.NumericalColumn", self.as_numerical.astype(dtype)
+        col = column.build_column(
+            data=self.base_data,
+            dtype=np.int64,
+            mask=self.base_mask,
+            offset=self.offset,
+            size=self.size,
         )
+        return cast("cudf.core.column.NumericalColumn", col.astype(dtype))
 
     def as_datetime_column(
         self, dtype: Dtype, **kwargs
@@ -337,13 +331,14 @@ def as_timedelta_column(self, dtype: Dtype, **kwargs) -> TimeDeltaColumn:
 
     def mean(self, skipna=None, dtype: Dtype = np.float64) -> pd.Timedelta:
         return pd.Timedelta(
-            self.as_numerical.mean(skipna=skipna, dtype=dtype),
+            self.as_numerical_column("int64").mean(skipna=skipna, dtype=dtype),
             unit=self.time_unit,
         )
 
     def median(self, skipna: Optional[bool] = None) -> pd.Timedelta:
         return pd.Timedelta(
-            self.as_numerical.median(skipna=skipna), unit=self.time_unit
+            self.as_numerical_column("int64").median(skipna=skipna),
+            unit=self.time_unit,
         )
 
     def isin(self, values: Sequence) -> ColumnBase:
@@ -356,7 +351,7 @@ def quantile(
         exact: bool,
         return_scalar: bool,
     ) -> ColumnBase:
-        result = self.as_numerical.quantile(
+        result = self.as_numerical_column("int64").quantile(
             q=q,
             interpolation=interpolation,
             exact=exact,
@@ -376,7 +371,7 @@ def sum(
             # Since sum isn't overridden in Numerical[Base]Column, mypy only
             # sees the signature from Reducible (which doesn't have the extra
             # parameters from ColumnBase._reduce) so we have to ignore this.
-            self.as_numerical.sum(  # type: ignore
+            self.as_numerical_column("int64").sum(  # type: ignore
                 skipna=skipna, min_count=min_count, dtype=dtype
             ),
             unit=self.time_unit,
@@ -390,7 +385,7 @@ def std(
         ddof: int = 1,
     ) -> pd.Timedelta:
         return pd.Timedelta(
-            self.as_numerical.std(
+            self.as_numerical_column("int64").std(
                 skipna=skipna, min_count=min_count, ddof=ddof, dtype=dtype
             ),
             unit=self.time_unit,
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 01935fec8c3..8dd81c92994 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2023, NVIDIA CORPORATION.
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -2379,15 +2379,13 @@ def scatter_by_map(
 
         # Convert string or categorical to integer
         if isinstance(map_index, cudf.core.column.StringColumn):
-            map_index = map_index.as_categorical_column(
-                "category"
-            ).as_numerical
+            map_index = map_index.as_categorical_column("category").codes
             warnings.warn(
                 "Using StringColumn for map_index in scatter_by_map. "
                 "Use an integer array/column for better performance."
             )
         elif isinstance(map_index, cudf.core.column.CategoricalColumn):
-            map_index = map_index.as_numerical
+            map_index = map_index.codes
             warnings.warn(
                 "Using CategoricalColumn for map_index in scatter_by_map. "
                 "Use an integer array/column for better performance."

From 3f19d04c1889e7114cbed9081359f3bdcb9f6e64 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 10 Jan 2024 11:11:28 -1000
Subject: [PATCH 06/23] Remove unneeded methods in Column (#14730)

* `valid_count` can be composed of `null_count` or where checked `has_nulls`
* `contains_na_entries` is redundant with `has_nulls`
* Better typing in `searchsorted`

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/14730
---
 python/cudf/cudf/core/_base_index.py        | 16 +++++++++++-----
 python/cudf/cudf/core/column/categorical.py |  4 +++-
 python/cudf/cudf/core/column/column.py      | 20 ++++++--------------
 python/cudf/cudf/core/column/numerical.py   |  6 +-----
 python/cudf/cudf/core/dataframe.py          | 11 ++++++++---
 python/cudf/cudf/core/frame.py              |  9 +++++++--
 python/cudf/cudf/core/index.py              | 11 ++++++-----
 python/cudf/cudf/core/series.py             |  2 +-
 python/cudf/cudf/tests/test_categorical.py  |  4 ++--
 python/cudf/cudf/tests/test_orc.py          |  8 +++++---
 python/cudf/cudf/utils/dtypes.py            |  4 ++--
 11 files changed, 52 insertions(+), 43 deletions(-)

diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
index 8d2506403d4..2aef77b6c99 100644
--- a/python/cudf/cudf/core/_base_index.py
+++ b/python/cudf/cudf/core/_base_index.py
@@ -2,11 +2,10 @@
 
 from __future__ import annotations
 
-import builtins
 import pickle
 import warnings
 from functools import cached_property
-from typing import Any, Set, Tuple
+from typing import Any, Literal, Set, Tuple
 
 import pandas as pd
 from typing_extensions import Self
@@ -1702,6 +1701,8 @@ def find_label_range(self, loc: slice) -> slice:
         start = loc.start
         stop = loc.stop
         step = 1 if loc.step is None else loc.step
+        start_side: Literal["left", "right"]
+        stop_side: Literal["left", "right"]
         if step < 0:
             start_side, stop_side = "right", "left"
         else:
@@ -1725,9 +1726,9 @@ def find_label_range(self, loc: slice) -> slice:
     def searchsorted(
         self,
         value,
-        side: builtins.str = "left",
+        side: Literal["left", "right"] = "left",
         ascending: bool = True,
-        na_position: builtins.str = "last",
+        na_position: Literal["first", "last"] = "last",
     ):
         """Find index where elements should be inserted to maintain order
 
@@ -1754,7 +1755,12 @@ def searchsorted(
         """
         raise NotImplementedError
 
-    def get_slice_bound(self, label, side: builtins.str, kind=None) -> int:
+    def get_slice_bound(
+        self,
+        label,
+        side: Literal["left", "right"],
+        kind: Literal["ix", "loc", "getitem", None] = None,
+    ) -> int:
         """
         Calculate slice bound that corresponds to given label.
         Returns leftmost (one-past-the-rightmost if ``side=='right'``) position
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index f3f2be0cc45..59fd4631067 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -1372,7 +1372,9 @@ def _concat(
         # improved as the concatenation API is solidified.
 
         # Find the first non-null column:
-        head = next((obj for obj in objs if obj.valid_count), objs[0])
+        head = next(
+            (obj for obj in objs if not obj.null_count != len(obj)), objs[0]
+        )
 
         # Combine and de-dupe the categories
         cats = column.concat_columns([o.categories for o in objs]).unique()
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 440ac855691..6af39dd3558 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -12,6 +12,7 @@
     Any,
     Dict,
     List,
+    Literal,
     MutableSequence,
     Optional,
     Sequence,
@@ -428,11 +429,6 @@ def _fill(
     def shift(self, offset: int, fill_value: ScalarLike) -> ColumnBase:
         return libcudf.copying.shift(self, offset, fill_value)
 
-    @property
-    def valid_count(self) -> int:
-        """Number of non-null values"""
-        return len(self) - self.null_count
-
     @property
     def nullmask(self) -> Buffer:
         """The gpu buffer for the null-mask"""
@@ -1159,9 +1155,9 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
     def searchsorted(
         self,
         value,
-        side: str = "left",
+        side: Literal["left", "right"] = "left",
         ascending: bool = True,
-        na_position: str = "last",
+        na_position: Literal["first", "last"] = "last",
     ) -> Self:
         if not isinstance(value, ColumnBase) or value.dtype != self.dtype:
             raise ValueError(
@@ -1304,10 +1300,6 @@ def _reduce(
             return libcudf.reduce.reduce(op, preprocessed, **kwargs)
         return preprocessed
 
-    @property
-    def contains_na_entries(self) -> bool:
-        return self.null_count != 0
-
     def _process_for_reduction(
         self, skipna: Optional[bool] = None, min_count: int = 0
     ) -> Union[ColumnBase, ScalarLike]:
@@ -2742,7 +2734,7 @@ def concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase:
     # If all columns are `NumericalColumn` with different dtypes,
     # we cast them to a common dtype.
     # Notice, we can always cast pure null columns
-    not_null_col_dtypes = [o.dtype for o in objs if o.valid_count]
+    not_null_col_dtypes = [o.dtype for o in objs if o.null_count != len(o)]
     if len(not_null_col_dtypes) and all(
         _is_non_decimal_numeric_dtype(dtyp)
         and np.issubdtype(dtyp, np.datetime64)
@@ -2754,13 +2746,13 @@ def concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase:
         objs = [obj.astype(common_dtype) for obj in objs]
 
     # Find the first non-null column:
-    head = next((obj for obj in objs if obj.valid_count), objs[0])
+    head = next((obj for obj in objs if obj.null_count != len(obj)), objs[0])
 
     for i, obj in enumerate(objs):
         # Check that all columns are the same type:
         if not is_dtype_equal(obj.dtype, head.dtype):
             # if all null, cast to appropriate dtype
-            if obj.valid_count == 0:
+            if obj.null_count == len(obj):
                 objs[i] = column_empty_like(
                     head, dtype=head.dtype, masked=True, newsize=len(obj)
                 )
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index e848c86897f..6ef3a6abacc 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -140,7 +140,7 @@ def indices_of(self, value: ScalarLike) -> NumericalColumn:
         else:
             return super().indices_of(value)
 
-    def has_nulls(self, include_nan=False):
+    def has_nulls(self, include_nan: bool = False) -> bool:
         return bool(self.null_count != 0) or (
             include_nan and bool(self.nan_count != 0)
         )
@@ -425,10 +425,6 @@ def dropna(self, drop_nan: bool = False) -> NumericalColumn:
         col = self.nans_to_nulls() if drop_nan else self
         return drop_nulls([col])[0]
 
-    @property
-    def contains_na_entries(self) -> bool:
-        return (self.nan_count != 0) or (self.null_count != 0)
-
     def _process_values_for_isin(
         self, values: Sequence
     ) -> Tuple[ColumnBase, ColumnBase]:
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 8dd81c92994..a3373951a06 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -5997,9 +5997,14 @@ def count(self, axis=0, level=None, numeric_only=False, **kwargs):
         axis = self._get_axis_from_axis_arg(axis)
         if axis != 0:
             raise NotImplementedError("Only axis=0 is currently supported.")
-
+        length = len(self)
         return Series._from_data(
-            {None: [self._data[col].valid_count for col in self._data.names]},
+            {
+                None: [
+                    length - self._data[col].null_count
+                    for col in self._data.names
+                ]
+            },
             as_index(self._data.names),
         )
 
@@ -8091,7 +8096,7 @@ def _get_non_null_cols_and_dtypes(col_idxs, list_of_columns):
             # non-null Column with the same name is found.
             if idx not in dtypes:
                 dtypes[idx] = cols[idx].dtype
-            if cols[idx].valid_count > 0:
+            if cols[idx].null_count != len(cols[idx]):
                 if idx not in non_null_columns:
                     non_null_columns[idx] = [cols[idx]]
                 else:
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 123f13f8733..5f7a86e86d8 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -13,6 +13,7 @@
     Callable,
     Dict,
     List,
+    Literal,
     MutableMapping,
     Optional,
     Tuple,
@@ -882,7 +883,7 @@ def fillna(
                 replace_val = None
             should_fill = (
                 col_name in value
-                and col.contains_na_entries
+                and col.has_nulls(include_nan=True)
                 and not libcudf.scalar._is_null_host_scalar(replace_val)
             ) or method is not None
             if should_fill:
@@ -1354,7 +1355,11 @@ def notna(self):
 
     @_cudf_nvtx_annotate
     def searchsorted(
-        self, values, side="left", ascending=True, na_position="last"
+        self,
+        values,
+        side: Literal["left", "right"] = "left",
+        ascending: bool = True,
+        na_position: Literal["first", "last"] = "last",
     ):
         """Find indices where elements should be inserted to maintain order
 
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 6bc632e0a53..5c33cd09ad1 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2023, NVIDIA CORPORATION.
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -11,6 +11,7 @@
     Any,
     Dict,
     List,
+    Literal,
     MutableMapping,
     Optional,
     Sequence,
@@ -233,9 +234,9 @@ def _copy_type_metadata(
     def searchsorted(
         self,
         value: int,
-        side: str = "left",
+        side: Literal["left", "right"] = "left",
         ascending: bool = True,
-        na_position: str = "last",
+        na_position: Literal["first", "last"] = "last",
     ):
         assert (len(self) <= 1) or (
             ascending == (self._step > 0)
@@ -2205,9 +2206,9 @@ def copy(self, name=None, deep=False, dtype=None, names=None):
     def searchsorted(
         self,
         value,
-        side: str = "left",
+        side: Literal["left", "right"] = "left",
         ascending: bool = True,
-        na_position: str = "last",
+        na_position: Literal["first", "last"] = "last",
     ):
         value = self.dtype.type(value)
         return super().searchsorted(
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index fcb4e77f6a5..8739a61dd8b 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -1710,7 +1710,7 @@ def _concat(cls, objs, axis=0, index=True):
     @_cudf_nvtx_annotate
     def valid_count(self):
         """Number of non-null values"""
-        return self._column.valid_count
+        return len(self) - self._column.null_count
 
     @property  # type: ignore
     @_cudf_nvtx_annotate
diff --git a/python/cudf/cudf/tests/test_categorical.py b/python/cudf/cudf/tests/test_categorical.py
index 49eeff01bee..52b7236b965 100644
--- a/python/cudf/cudf/tests/test_categorical.py
+++ b/python/cudf/cudf/tests/test_categorical.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2023, NVIDIA CORPORATION.
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
 
 import operator
 import string
@@ -217,7 +217,7 @@ def test_categorical_masking():
     got_masked = sr[got_matches]
 
     assert len(expect_masked) == len(got_masked)
-    assert len(expect_masked) == got_masked.valid_count
+    assert got_masked.null_count == 0
     assert_eq(got_masked, expect_masked)
 
 
diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py
index 7407da9c4ac..4630b6eef0a 100644
--- a/python/cudf/cudf/tests/test_orc.py
+++ b/python/cudf/cudf/tests/test_orc.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
 
 import datetime
 import decimal
@@ -812,7 +812,7 @@ def test_orc_write_bool_statistics(tmpdir, datadir, nrows):
 
     if "number_of_values" in file_stats[0][col]:
         stats_valid_count = file_stats[0][col]["number_of_values"]
-        actual_valid_count = gdf[col].valid_count
+        actual_valid_count = len(gdf[col]) - gdf[col].null_count
         assert normalized_equals(actual_valid_count, stats_valid_count)
 
     # compare stripe statistics with actual min/max
@@ -827,7 +827,9 @@ def test_orc_write_bool_statistics(tmpdir, datadir, nrows):
             assert normalized_equals(actual_true_count, stats_true_count)
 
         if "number_of_values" in stripes_stats[stripe_idx][col]:
-            actual_valid_count = stripe_df[col].valid_count
+            actual_valid_count = (
+                len(stripe_df[col]) - stripe_df[col].null_count
+            )
             stats_valid_count = stripes_stats[stripe_idx][col][
                 "number_of_values"
             ]
diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index c65404445cb..72721b5197f 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 import datetime
 from collections import namedtuple
@@ -401,7 +401,7 @@ def min_column_type(x, expected_type):
 
     if not isinstance(x, cudf.core.column.NumericalColumn):
         raise TypeError("Argument x must be of type column.NumericalColumn")
-    if x.valid_count == 0:
+    if x.null_count == len(x):
         return x.dtype
 
     if np.issubdtype(x.dtype, np.floating):

From 1078326535c9989a2e904d78ceb708a097be989b Mon Sep 17 00:00:00 2001
From: Ed Seidl <etseidl@users.noreply.github.com>
Date: Wed, 10 Jan 2024 13:43:58 -0800
Subject: [PATCH 07/23] Write cuDF version in Parquet "created_by" metadata
 field (#14721)

Populate the informational `created_by` field in the Parquet file metadata. Identifying the source of a parquet file can help with tracking down interoperability problems.

Authors:
  - Ed Seidl (https://github.com/etseidl)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Nghia Truong (https://github.com/ttnghia)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/14721
---
 cpp/CMakeLists.txt                | 8 +++++++-
 cpp/src/io/parquet/writer_impl.cu | 9 ++++++---
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index a7c34ca489c..cb1fdb1f557 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2018-2023, NVIDIA CORPORATION.
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -658,6 +658,12 @@ set_source_files_properties(
   PROPERTIES COMPILE_DEFINITIONS "_FILE_OFFSET_BITS=64"
 )
 
+set_property(
+  SOURCE src/io/parquet/writer_impl.cu
+  APPEND
+  PROPERTY COMPILE_DEFINITIONS "CUDF_VERSION=${PROJECT_VERSION}"
+)
+
 set_target_properties(
   cudf
   PROPERTIES BUILD_RPATH "\$ORIGIN"
diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu
index c452f632cd6..279a814a4e1 100644
--- a/cpp/src/io/parquet/writer_impl.cu
+++ b/cpp/src/io/parquet/writer_impl.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -55,6 +55,10 @@
 #include <numeric>
 #include <utility>
 
+#ifndef CUDF_VERSION
+#error "CUDF_VERSION is not defined"
+#endif
+
 namespace cudf::io::parquet::detail {
 
 using namespace cudf::io::detail;
@@ -108,7 +112,7 @@ struct aggregate_writer_metadata {
     meta.num_rows           = this->files[part].num_rows;
     meta.row_groups         = this->files[part].row_groups;
     meta.key_value_metadata = this->files[part].key_value_metadata;
-    meta.created_by         = this->created_by;
+    meta.created_by         = "cudf version " CUDF_STRINGIFY(CUDF_VERSION);
     meta.column_orders      = this->column_orders;
     return meta;
   }
@@ -171,7 +175,6 @@ struct aggregate_writer_metadata {
     std::vector<std::vector<uint8_t>> column_indexes;
   };
   std::vector<per_file_metadata> files;
-  std::string created_by                                   = "";
   thrust::optional<std::vector<ColumnOrder>> column_orders = thrust::nullopt;
 };
 

From 516559e76cfed67d58dcc4c2116ed93bfcf193ca Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 11 Jan 2024 06:18:55 -1000
Subject: [PATCH 08/23] Explicitly pass .dtype into is_foo_dtype functions
 (#14657)

Just promotes more explicitness when using dtype checking functions

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/14657
---
 python/cudf/cudf/core/_internals/where.py |  2 +-
 python/cudf/cudf/core/column/numerical.py |  4 ++--
 python/cudf/cudf/core/dataframe.py        |  6 +++---
 python/cudf/cudf/testing/testing.py       | 10 +++++++---
 4 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/python/cudf/cudf/core/_internals/where.py b/python/cudf/cudf/core/_internals/where.py
index f76802c8b7d..ef6b10f66c1 100644
--- a/python/cudf/cudf/core/_internals/where.py
+++ b/python/cudf/cudf/core/_internals/where.py
@@ -103,7 +103,7 @@ def _check_and_cast_columns_with_other(
         other = cudf.Scalar(other)
 
     if is_mixed_with_object_dtype(other, source_col) or (
-        is_bool_dtype(source_col) and not is_bool_dtype(common_dtype)
+        is_bool_dtype(source_dtype) and not is_bool_dtype(common_dtype)
     ):
         raise TypeError(mixed_err)
 
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index 6ef3a6abacc..148fa252fad 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -272,13 +272,13 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
             out_dtype = "bool"
 
         if op in {"__and__", "__or__", "__xor__"}:
-            if is_float_dtype(self.dtype) or is_float_dtype(other):
+            if is_float_dtype(self.dtype) or is_float_dtype(other.dtype):
                 raise TypeError(
                     f"Operation 'bitwise {op[2:-2]}' not supported between "
                     f"{self.dtype.type.__name__} and "
                     f"{other.dtype.type.__name__}"
                 )
-            if is_bool_dtype(self.dtype) or is_bool_dtype(other):
+            if is_bool_dtype(self.dtype) or is_bool_dtype(other.dtype):
                 out_dtype = "bool"
 
         if (
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index a3373951a06..813ecc32069 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -324,7 +324,7 @@ def _getitem_tuple_arg(self, arg):
                     tmp_arg[1],
                 )
 
-                if is_bool_dtype(tmp_arg[0]):
+                if is_bool_dtype(tmp_arg[0].dtype):
                     df = columns_df._apply_boolean_mask(
                         BooleanMask(tmp_arg[0], len(columns_df))
                     )
@@ -6032,7 +6032,7 @@ def _reduce(
             numeric_cols = (
                 name
                 for name in self._data.names
-                if is_numeric_dtype(self._data[name])
+                if is_numeric_dtype(self._data[name].dtype)
             )
             source = self._get_columns_by_label(numeric_cols)
             if source.empty:
@@ -6078,7 +6078,7 @@ def _reduce(
                     numeric_cols = (
                         name
                         for name in self._data.names
-                        if is_numeric_dtype(self._data[name])
+                        if is_numeric_dtype(self._data[name].dtype)
                     )
                     source = self._get_columns_by_label(numeric_cols)
                     if source.empty:
diff --git a/python/cudf/cudf/testing/testing.py b/python/cudf/cudf/testing/testing.py
index a45733a0f83..6c2f073b7ac 100644
--- a/python/cudf/cudf/testing/testing.py
+++ b/python/cudf/cudf/testing/testing.py
@@ -232,10 +232,10 @@ def assert_column_equal(
     elif not (
         (
             not dtype_can_compare_equal_to_other(left.dtype)
-            and is_numeric_dtype(right)
+            and is_numeric_dtype(right.dtype)
         )
         or (
-            is_numeric_dtype(left)
+            is_numeric_dtype(left.dtype)
             and not dtype_can_compare_equal_to_other(right.dtype)
         )
     ):
@@ -245,7 +245,11 @@ def assert_column_equal(
                 left.isnull().values == right.isnull().values
             )
 
-            if columns_equal and not check_exact and is_numeric_dtype(left):
+            if (
+                columns_equal
+                and not check_exact
+                and is_numeric_dtype(left.dtype)
+            ):
                 # non-null values must be the same
                 columns_equal = cp.allclose(
                     left.apply_boolean_mask(

From d26ea6de90ad28e409ffeae4a3a470c41316e969 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 11 Jan 2024 06:23:41 -1000
Subject: [PATCH 09/23] Fix nan_as_null not being respected when passing arrow
 object (#14688)

Similar to https://github.com/rapidsai/cudf/pull/14687, nan was not being interpreted as null when `nan_as_null=True`

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/14688
---
 python/cudf/cudf/core/column/column.py | 13 +++++++++++--
 python/cudf/cudf/tests/test_series.py  | 10 ++++++++++
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 6af39dd3558..5b638b1f4ad 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -25,6 +25,7 @@
 import numpy as np
 import pandas as pd
 import pyarrow as pa
+import pyarrow.compute as pc
 from numba import cuda
 from typing_extensions import Self
 
@@ -1997,11 +1998,19 @@ def as_column(
         return col
 
     elif isinstance(arbitrary, (pa.Array, pa.ChunkedArray)):
-        if isinstance(arbitrary, pa.lib.HalfFloatArray):
+        if pa.types.is_float16(arbitrary.type):
             raise NotImplementedError(
                 "Type casting from `float16` to `float32` is not "
                 "yet supported in pyarrow, see: "
-                "https://issues.apache.org/jira/browse/ARROW-3802"
+                "https://github.com/apache/arrow/issues/20213"
+            )
+        elif (nan_as_null is None or nan_as_null) and pa.types.is_floating(
+            arbitrary.type
+        ):
+            arbitrary = pc.if_else(
+                pc.is_nan(arbitrary),
+                pa.nulls(len(arbitrary), type=arbitrary.type),
+                arbitrary,
             )
         col = ColumnBase.from_arrow(arbitrary)
 
diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
index 248ac201e12..8898bf70047 100644
--- a/python/cudf/cudf/tests/test_series.py
+++ b/python/cudf/cudf/tests/test_series.py
@@ -2572,6 +2572,16 @@ def test_series_arrow_list_types_roundtrip():
             cudf.from_pandas(pdf)
 
 
+@pytest.mark.parametrize("klass", [cudf.Index, cudf.Series])
+@pytest.mark.parametrize(
+    "data", [pa.array([float("nan")]), pa.chunked_array([[float("nan")]])]
+)
+def test_nan_as_null_from_arrow_objects(klass, data):
+    result = klass(data, nan_as_null=True)
+    expected = klass(pa.array([None], type=pa.float64()))
+    assert_eq(result, expected)
+
+
 @pytest.mark.parametrize("reso", ["M", "ps"])
 @pytest.mark.parametrize("typ", ["M", "m"])
 def test_series_invalid_reso_dtype(reso, typ):

From 85acdc640701940e47b3969b14a811f33e7faf5b Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 11 Jan 2024 09:58:15 -1000
Subject: [PATCH 10/23] Fix constructing Series/Index from arrow array and
 dtype (#14686)

Previously, passing a `dtype=` argument to the constructors was ignored when passing arrow array objects

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/14686
---
 python/cudf/cudf/core/column/column.py | 21 +++++++++++++--------
 python/cudf/cudf/tests/test_series.py  | 20 ++++++++++++++++++++
 2 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 5b638b1f4ad..19e76d4a95b 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -2036,6 +2036,8 @@ def as_column(
                 new_dtype = "str"
 
             col = col.astype(new_dtype)
+        elif dtype is not None:
+            col = col.astype(dtype)
 
         return col
 
@@ -2112,6 +2114,15 @@ def as_column(
                 arbitrary, nan_as_null=nan_as_null, dtype=dtype, length=length
             )
         elif arbitrary.dtype.kind == "O":
+            if len(arbitrary) == 0:
+                # TODO: Can remove once empty constructor default becomes
+                # object instead of float.
+                return as_column(
+                    pa.array([], type=pa.string()),
+                    nan_as_null=nan_as_null,
+                    dtype=dtype,
+                    length=length,
+                )
             if isinstance(arbitrary, pd.arrays.PandasArray):
                 # infer_dtype does not handle PandasArray
                 arbitrary = np.array(arbitrary, dtype=object)
@@ -2140,15 +2151,9 @@ def as_column(
                 arbitrary,
                 from_pandas=True,
             )
-            if isinstance(pyarrow_array.type, pa.Decimal128Type):
-                pyarrow_type = cudf.Decimal128Dtype.from_arrow(
-                    pyarrow_array.type
-                )
-            else:
-                pyarrow_type = arbitrary.dtype
             data = as_column(
                 pyarrow_array,
-                dtype=pyarrow_type,
+                dtype=dtype,
                 nan_as_null=nan_as_null,
                 length=length,
             )
@@ -2280,7 +2285,7 @@ def as_column(
             if dtype is not None:
                 data = data.astype(dtype)
         elif arb_dtype.kind in ("O", "U"):
-            data = as_column(pa.array(arbitrary), dtype=arbitrary.dtype)
+            data = as_column(pa.array(arbitrary), dtype=dtype)
             # There is no cast operation available for pa.Array from int to
             # str, Hence instead of handling in pa.Array block, we
             # will have to type-cast here.
diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
index 8898bf70047..36033a72479 100644
--- a/python/cudf/cudf/tests/test_series.py
+++ b/python/cudf/cudf/tests/test_series.py
@@ -2650,6 +2650,26 @@ def test_astype_pandas_nullable_pandas_compat(dtype, klass, kind):
             ser.astype(kind(dtype))
 
 
+@pytest.mark.parametrize("klass", [cudf.Series, cudf.Index])
+@pytest.mark.parametrize(
+    "data",
+    [
+        pa.array([1, None], type=pa.int64()),
+        pa.chunked_array([[1, None]], type=pa.int64()),
+    ],
+)
+def test_from_arrow_array_dtype(klass, data):
+    obj = klass(data, dtype="int8")
+    assert obj.dtype == np.dtype("int8")
+
+
+@pytest.mark.parametrize("klass", [cudf.Series, cudf.Index])
+def test_from_pandas_object_dtype_passed_dtype(klass):
+    result = klass(pd.Series([True, False], dtype=object), dtype="int8")
+    expected = klass(pa.array([1, 0], type=pa.int8()))
+    assert_eq(result, expected)
+
+
 def test_series_where_mixed_bool_dtype():
     s = cudf.Series([True, False, True])
     with pytest.raises(TypeError):

From 0d87bb7e6edbb108be04773afe7d0eee82258131 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Thu, 11 Jan 2024 16:04:59 -0600
Subject: [PATCH 11/23] refactor CUDA versions in dependencies.yaml (#14733)

Follow-up to #14644.

Contributes to https://github.com/rapidsai/build-planning/issues/7.

Similar to https://github.com/rapidsai/rmm/pull/1422, this proposes splitting the `cuda-version` dependency in `dependencies.yaml` out to its own thing, separate from the bits of the CUDA Toolkit `cudf` needs.

Some other simplifications:

* removes the notebook-specific stuff added in #14722 (which I think were added specifically because `cuda-version` and CTK stuff was coupled)
* consolidates two sections with selectors only based on CUDA `{major}.{minor}`

Authors:
   - James Lamb (https://github.com/jameslamb)

Approvers:
   - Ray Douglass (https://github.com/raydouglass)
   - Bradley Dice (https://github.com/bdice)
   - Vyas Ramasubramani (https://github.com/vyasr)
---
 dependencies.yaml | 69 +++++++++++++++++++++++++----------------------
 1 file changed, 37 insertions(+), 32 deletions(-)

diff --git a/dependencies.yaml b/dependencies.yaml
index 94f31240797..28b3afd7bbc 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -11,7 +11,8 @@ files:
       - build_wheels
       - build_python_common
       - build_python_cudf
-      - cudatoolkit
+      - cuda
+      - cuda_version
       - develop
       - docs
       - libarrow_build
@@ -29,31 +30,32 @@ files:
   test_cpp:
     output: none
     includes:
-      - cudatoolkit
-      - test_cpp
+      - cuda_version
       - libarrow_run
+      - test_cpp
   test_python:
     output: none
     includes:
-      - cudatoolkit
+      - cuda_version
       - py_version
+      - pyarrow_run
       - test_python_common
       - test_python_cudf
       - test_python_dask_cudf
-      - pyarrow_run
   test_java:
     output: none
     includes:
       - build_all
+      - cuda
+      - cuda_version
       - libarrow_run
-      - cudatoolkit
       - test_java
   test_notebooks:
     output: none
     includes:
+      - cuda_version
       - notebooks
       - py_version
-      - notebook_cuda_version
   checks:
     output: none
     includes:
@@ -62,7 +64,8 @@ files:
   docs:
     output: none
     includes:
-      - cudatoolkit
+      - cuda
+      - cuda_version
       - docs
       - libarrow_run
       - py_version
@@ -333,37 +336,51 @@ dependencies:
           # Allow runtime version to float up to minor version
           # Disallow pyarrow 14.0.0 due to a CVE
           - pyarrow>=14.0.1,<15.0.0a0
-  cudatoolkit:
+  cuda_version:
     specific:
       - output_types: conda
         matrices:
           - matrix:
-              cuda: "12.*"
+              cuda: "11.2"
             packages:
-              - cuda-cudart-dev
-              - cuda-nvrtc-dev
-              - cuda-nvtx-dev
-              - libcurand-dev
-          - matrix:  # Fallback for CUDA 11 or no matrix
+              - cuda-version=11.2
+          - matrix:
+              cuda: "11.4"
             packages:
-              - cudatoolkit
-      - output_types: conda
-        matrices:
+              - cuda-version=11.4
+          - matrix:
+              cuda: "11.5"
+            packages:
+              - cuda-version=11.5
+          - matrix:
+              cuda: "11.8"
+            packages:
+              - cuda-version=11.8
           - matrix:
               cuda: "12.0"
             packages:
               - cuda-version=12.0
+  cuda:
+    specific:
+      - output_types: conda
+        matrices:
+          - matrix:
+              cuda: "12.*"
+            packages:
+              - cuda-cudart-dev
+              - cuda-nvrtc-dev
+              - cuda-nvtx-dev
+              - libcurand-dev
           - matrix:
               cuda: "11.8"
             packages:
-              - cuda-version=11.8
+              - cudatoolkit
               - cuda-nvtx=11.8
               - libcurand-dev=10.3.0.86
               - libcurand=10.3.0.86
           - matrix:
               cuda: "11.5"
             packages:
-              - cuda-version=11.5
               - cudatoolkit
               - cuda-nvtx=11.5
                 # Can't hard pin the version since 11.x is missing many
@@ -373,7 +390,6 @@ dependencies:
           - matrix:
               cuda: "11.4"
             packages:
-              - cuda-version=11.4
               - cudatoolkit
               - &cudanvtx114 cuda-nvtx=11.4
               - &libcurand_dev114 libcurand-dev>=10.2.5.43,<=10.2.5.120
@@ -381,7 +397,6 @@ dependencies:
           - matrix:
               cuda: "11.2"
             packages:
-              - cuda-version=11.2
               - cudatoolkit
                 # The NVIDIA channel doesn't publish pkgs older than 11.4 for
                 # these libs, so 11.2 uses 11.4 packages (the oldest
@@ -753,13 +768,3 @@ dependencies:
         packages:
           - ipython
           - openpyxl
-  notebook_cuda_version:
-    specific:
-       - output_types: conda
-         matrices:
-           - matrix: {cuda: "12.0"}
-             packages:
-               - cuda-version=12.0
-           - matrix: {cuda: "11.8"}
-             packages:
-               - cuda-version=11.8

From e50fa00aed685395a16d252787a834d308a548bc Mon Sep 17 00:00:00 2001
From: Shruti Shivakumar <shruti.shivakumar@gmail.com>
Date: Thu, 11 Jan 2024 14:58:50 -0800
Subject: [PATCH 12/23] Expose streams in Parquet reader and writer APIs
 (#14359)

This PR contributes to https://github.com/rapidsai/cudf/issues/13744.
-Added stream parameters to public APIs
```
cudf::io::read_parquet
cudf::io::write_parquet
cudf::io::parquet_chunked_writer
cudf::io::chunked_parquet_reader
```
-Added stream gtests

Authors:
  - Shruti Shivakumar (https://github.com/shrshi)

Approvers:
  - Mark Harris (https://github.com/harrism)
  - Vukasin Milovanovic (https://github.com/vuule)

URL: https://github.com/rapidsai/cudf/pull/14359
---
 cpp/include/cudf/io/parquet.hpp       |  16 ++-
 cpp/src/io/functions.cpp              |  27 ++---
 cpp/tests/CMakeLists.txt              |   1 +
 cpp/tests/streams/io/parquet_test.cpp | 138 ++++++++++++++++++++++++++
 4 files changed, 166 insertions(+), 16 deletions(-)
 create mode 100644 cpp/tests/streams/io/parquet_test.cpp

diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp
index ea18da74d5a..dc035db8d39 100644
--- a/cpp/include/cudf/io/parquet.hpp
+++ b/cpp/include/cudf/io/parquet.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -401,6 +401,7 @@ class parquet_reader_options_builder {
  * @endcode
  *
  * @param options Settings for controlling reading behavior
+ * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate device memory of the table in the returned
  * table_with_metadata
  *
@@ -408,6 +409,7 @@ class parquet_reader_options_builder {
  */
 table_with_metadata read_parquet(
   parquet_reader_options const& options,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -438,11 +440,13 @@ class chunked_parquet_reader {
    * @param chunk_read_limit Limit on total number of bytes to be returned per read,
    *        or `0` if there is no limit
    * @param options The options used to read Parquet file
+   * @param stream CUDA stream used for device memory operations and kernel launches
    * @param mr Device memory resource to use for device memory allocation
    */
   chunked_parquet_reader(
     std::size_t chunk_read_limit,
     parquet_reader_options const& options,
+    rmm::cuda_stream_view stream        = cudf::get_default_stream(),
     rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
@@ -461,12 +465,14 @@ class chunked_parquet_reader {
    * @param pass_read_limit Limit on the amount of memory used for reading and decompressing data or
    * `0` if there is no limit
    * @param options The options used to read Parquet file
+   * @param stream CUDA stream used for device memory operations and kernel launches
    * @param mr Device memory resource to use for device memory allocation
    */
   chunked_parquet_reader(
     std::size_t chunk_read_limit,
     std::size_t pass_read_limit,
     parquet_reader_options const& options,
+    rmm::cuda_stream_view stream        = cudf::get_default_stream(),
     rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
@@ -1163,11 +1169,13 @@ class parquet_writer_options_builder {
  * @endcode
  *
  * @param options Settings for controlling writing behavior
+ * @param stream CUDA stream used for device memory operations and kernel launches
  * @return A blob that contains the file metadata (parquet FileMetadata thrift message) if
  *         requested in parquet_writer_options (empty blob otherwise).
  */
 
-std::unique_ptr<std::vector<uint8_t>> write_parquet(parquet_writer_options const& options);
+std::unique_ptr<std::vector<uint8_t>> write_parquet(
+  parquet_writer_options const& options, rmm::cuda_stream_view stream = cudf::get_default_stream());
 
 /**
  * @brief Merges multiple raw metadata blobs that were previously created by write_parquet
@@ -1778,8 +1786,10 @@ class parquet_chunked_writer {
    * @brief Constructor with chunked writer options
    *
    * @param[in] options options used to write table
+   * @param[in] stream CUDA stream used for device memory operations and kernel launches
    */
-  parquet_chunked_writer(chunked_parquet_writer_options const& options);
+  parquet_chunked_writer(chunked_parquet_writer_options const& options,
+                         rmm::cuda_stream_view stream = cudf::get_default_stream());
 
   /**
    * @brief Writes table to output.
diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp
index a9049d5640e..e5489963618 100644
--- a/cpp/src/io/functions.cpp
+++ b/cpp/src/io/functions.cpp
@@ -488,13 +488,14 @@ using namespace cudf::io::parquet::detail;
 namespace detail_parquet = cudf::io::parquet::detail;
 
 table_with_metadata read_parquet(parquet_reader_options const& options,
+                                 rmm::cuda_stream_view stream,
                                  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
 
   auto datasources = make_datasources(options.get_source());
-  auto reader      = std::make_unique<detail_parquet::reader>(
-    std::move(datasources), options, cudf::get_default_stream(), mr);
+  auto reader =
+    std::make_unique<detail_parquet::reader>(std::move(datasources), options, stream, mr);
 
   return reader->read(options);
 }
@@ -554,7 +555,8 @@ table_input_metadata::table_input_metadata(table_metadata const& metadata)
 /**
  * @copydoc cudf::io::write_parquet
  */
-std::unique_ptr<std::vector<uint8_t>> write_parquet(parquet_writer_options const& options)
+std::unique_ptr<std::vector<uint8_t>> write_parquet(parquet_writer_options const& options,
+                                                    rmm::cuda_stream_view stream)
 {
   namespace io_detail = cudf::io::detail;
 
@@ -562,7 +564,7 @@ std::unique_ptr<std::vector<uint8_t>> write_parquet(parquet_writer_options const
 
   auto sinks  = make_datasinks(options.get_sink());
   auto writer = std::make_unique<detail_parquet::writer>(
-    std::move(sinks), options, io_detail::single_write_mode::YES, cudf::get_default_stream());
+    std::move(sinks), options, io_detail::single_write_mode::YES, stream);
 
   writer->write(options.get_table(), options.get_partitions());
 
@@ -574,13 +576,10 @@ std::unique_ptr<std::vector<uint8_t>> write_parquet(parquet_writer_options const
  */
 chunked_parquet_reader::chunked_parquet_reader(std::size_t chunk_read_limit,
                                                parquet_reader_options const& options,
+                                               rmm::cuda_stream_view stream,
                                                rmm::mr::device_memory_resource* mr)
-  : reader{std::make_unique<detail_parquet::chunked_reader>(chunk_read_limit,
-                                                            0,
-                                                            make_datasources(options.get_source()),
-                                                            options,
-                                                            cudf::get_default_stream(),
-                                                            mr)}
+  : reader{std::make_unique<detail_parquet::chunked_reader>(
+      chunk_read_limit, 0, make_datasources(options.get_source()), options, stream, mr)}
 {
 }
 
@@ -590,12 +589,13 @@ chunked_parquet_reader::chunked_parquet_reader(std::size_t chunk_read_limit,
 chunked_parquet_reader::chunked_parquet_reader(std::size_t chunk_read_limit,
                                                std::size_t pass_read_limit,
                                                parquet_reader_options const& options,
+                                               rmm::cuda_stream_view stream,
                                                rmm::mr::device_memory_resource* mr)
   : reader{std::make_unique<detail_parquet::chunked_reader>(chunk_read_limit,
                                                             pass_read_limit,
                                                             make_datasources(options.get_source()),
                                                             options,
-                                                            cudf::get_default_stream(),
+                                                            stream,
                                                             mr)}
 {
 }
@@ -628,14 +628,15 @@ table_with_metadata chunked_parquet_reader::read_chunk() const
 /**
  * @copydoc cudf::io::parquet_chunked_writer::parquet_chunked_writer
  */
-parquet_chunked_writer::parquet_chunked_writer(chunked_parquet_writer_options const& options)
+parquet_chunked_writer::parquet_chunked_writer(chunked_parquet_writer_options const& options,
+                                               rmm::cuda_stream_view stream)
 {
   namespace io_detail = cudf::io::detail;
 
   auto sinks = make_datasinks(options.get_sink());
 
   writer = std::make_unique<detail_parquet::writer>(
-    std::move(sinks), options, io_detail::single_write_mode::NO, cudf::get_default_stream());
+    std::move(sinks), options, io_detail::single_write_mode::NO, stream);
 }
 
 /**
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 48bc4ac6fc1..f7b805b68f5 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -655,6 +655,7 @@ ConfigureTest(STREAM_INTEROP_TEST streams/interop_test.cpp STREAM_MODE testing)
 ConfigureTest(STREAM_JSONIO_TEST streams/io/json_test.cpp STREAM_MODE testing)
 ConfigureTest(STREAM_LISTS_TEST streams/lists_test.cpp STREAM_MODE testing)
 ConfigureTest(STREAM_NULL_MASK_TEST streams/null_mask_test.cpp STREAM_MODE testing)
+ConfigureTest(STREAM_PARQUETIO_TEST streams/io/parquet_test.cpp STREAM_MODE testing)
 ConfigureTest(STREAM_POOL_TEST streams/pool_test.cu STREAM_MODE testing)
 ConfigureTest(STREAM_REPLACE_TEST streams/replace_test.cpp STREAM_MODE testing)
 ConfigureTest(STREAM_SEARCH_TEST streams/search_test.cpp STREAM_MODE testing)
diff --git a/cpp/tests/streams/io/parquet_test.cpp b/cpp/tests/streams/io/parquet_test.cpp
new file mode 100644
index 00000000000..c6d531bc376
--- /dev/null
+++ b/cpp/tests/streams/io/parquet_test.cpp
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/io/detail/parquet.hpp>
+#include <cudf/io/parquet.hpp>
+#include <cudf/table/table.hpp>
+#include <cudf/table/table_view.hpp>
+#include <cudf/types.hpp>
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/default_stream.hpp>
+#include <cudf_test/iterator_utilities.hpp>
+
+#include <string>
+#include <vector>
+
+// Global environment for temporary files
+auto const temp_env = static_cast<cudf::test::TempDirTestEnvironment*>(
+  ::testing::AddGlobalTestEnvironment(new cudf::test::TempDirTestEnvironment));
+
+class ParquetTest : public cudf::test::BaseFixture {};
+
+template <typename... UniqPtrs>
+std::vector<std::unique_ptr<cudf::column>> make_uniqueptrs_vector(UniqPtrs&&... uniqptrs)
+{
+  std::vector<std::unique_ptr<cudf::column>> ptrsvec;
+  (ptrsvec.push_back(std::forward<UniqPtrs>(uniqptrs)), ...);
+  return ptrsvec;
+}
+
+cudf::table construct_table()
+{
+  constexpr auto num_rows = 10;
+
+  std::vector<size_t> zeros(num_rows, 0);
+  std::vector<size_t> ones(num_rows, 1);
+
+  cudf::test::fixed_width_column_wrapper<bool> col0(zeros.begin(), zeros.end());
+  cudf::test::fixed_width_column_wrapper<int8_t> col1(zeros.begin(), zeros.end());
+  cudf::test::fixed_width_column_wrapper<int16_t> col2(zeros.begin(), zeros.end());
+  cudf::test::fixed_width_column_wrapper<int32_t> col3(zeros.begin(), zeros.end());
+  cudf::test::fixed_width_column_wrapper<float> col4(zeros.begin(), zeros.end());
+  cudf::test::fixed_width_column_wrapper<double> col5(zeros.begin(), zeros.end());
+  cudf::test::fixed_width_column_wrapper<numeric::decimal128> col6 = [&ones, num_rows] {
+    auto col6_data = cudf::detail::make_counting_transform_iterator(0, [&](auto i) {
+      return numeric::decimal128{ones[i], numeric::scale_type{12}};
+    });
+    return cudf::test::fixed_width_column_wrapper<numeric::decimal128>(col6_data,
+                                                                       col6_data + num_rows);
+  }();
+  cudf::test::fixed_width_column_wrapper<numeric::decimal128> col7 = [&ones, num_rows] {
+    auto col7_data = cudf::detail::make_counting_transform_iterator(0, [&](auto i) {
+      return numeric::decimal128{ones[i], numeric::scale_type{-12}};
+    });
+    return cudf::test::fixed_width_column_wrapper<numeric::decimal128>(col7_data,
+                                                                       col7_data + num_rows);
+  }();
+
+  cudf::test::lists_column_wrapper<int64_t> col8{
+    {1, 1}, {1, 1, 1}, {}, {1}, {1, 1, 1, 1}, {1, 1, 1, 1, 1}, {}, {1, -1}, {}, {-1, -1}};
+
+  cudf::test::structs_column_wrapper col9 = [&ones] {
+    cudf::test::fixed_width_column_wrapper<int32_t> child_col(ones.begin(), ones.end());
+    return cudf::test::structs_column_wrapper{child_col};
+  }();
+
+  cudf::test::strings_column_wrapper col10 = [] {
+    std::vector<std::string> col10_data(num_rows, "rapids");
+    return cudf::test::strings_column_wrapper(col10_data.begin(), col10_data.end());
+  }();
+
+  auto colsptr = make_uniqueptrs_vector(col0.release(),
+                                        col1.release(),
+                                        col2.release(),
+                                        col3.release(),
+                                        col4.release(),
+                                        col5.release(),
+                                        col6.release(),
+                                        col7.release(),
+                                        col8.release(),
+                                        col9.release(),
+                                        col10.release());
+  return cudf::table(std::move(colsptr));
+}
+
+TEST_F(ParquetTest, ParquetWriter)
+{
+  auto tab      = construct_table();
+  auto filepath = temp_env->get_temp_filepath("MultiColumn.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tab);
+  cudf::io::write_parquet(out_opts, cudf::test::get_default_stream());
+}
+
+TEST_F(ParquetTest, ParquetReader)
+{
+  auto tab      = construct_table();
+  auto filepath = temp_env->get_temp_filepath("MultiColumn.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tab);
+  cudf::io::write_parquet(out_opts, cudf::test::get_default_stream());
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(in_opts, cudf::test::get_default_stream());
+  auto meta   = cudf::io::read_parquet_metadata(cudf::io::source_info{filepath});
+}
+
+TEST_F(ParquetTest, ChunkedOperations)
+{
+  auto tab      = construct_table();
+  auto filepath = temp_env->get_temp_filepath("MultiColumn.parquet");
+  cudf::io::chunked_parquet_writer_options out_opts =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer(out_opts, cudf::test::get_default_stream()).write(tab);
+
+  auto reader = cudf::io::chunked_parquet_reader(
+    1L << 31,
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}),
+    cudf::test::get_default_stream());
+  while (reader.has_next()) {
+    auto chunk = reader.read_chunk();
+  }
+}

From 9937c7f742ee4b453aa26198f4821095db40e671 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 11 Jan 2024 14:07:49 -1000
Subject: [PATCH 13/23] Remove unnecessary **kwargs in function signatures
 (#14635)

Helps makes function signatures stricter to avoid typo inputs being accepted into signatures

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/14635
---
 python/cudf/cudf/_lib/groupby.pyx            |  4 +-
 python/cudf/cudf/_lib/string_casting.pyx     | 36 +++++-------
 python/cudf/cudf/core/column/categorical.py  | 18 ++++--
 python/cudf/cudf/core/column/column.py       | 59 +++++++++-----------
 python/cudf/cudf/core/column/datetime.py     | 14 +++--
 python/cudf/cudf/core/column/decimal.py      |  9 ++-
 python/cudf/cudf/core/column/interval.py     |  4 +-
 python/cudf/cudf/core/column/lists.py        |  6 +-
 python/cudf/cudf/core/column/numerical.py    | 10 ++--
 python/cudf/cudf/core/column/string.py       | 14 ++---
 python/cudf/cudf/core/column/timedelta.py    | 10 ++--
 python/cudf/cudf/core/dataframe.py           | 17 +++---
 python/cudf/cudf/core/indexed_frame.py       |  4 +-
 python/cudf/cudf/core/series.py              |  2 -
 python/cudf/cudf/core/single_column_frame.py |  4 +-
 15 files changed, 104 insertions(+), 107 deletions(-)

diff --git a/python/cudf/cudf/_lib/groupby.pyx b/python/cudf/cudf/_lib/groupby.pyx
index b3778e45cde..f332fead8d1 100644
--- a/python/cudf/cudf/_lib/groupby.pyx
+++ b/python/cudf/cudf/_lib/groupby.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 from pandas.core.groupby.groupby import DataError
 
@@ -104,7 +104,7 @@ cdef class GroupBy:
     cdef unique_ptr[libcudf_groupby.groupby] c_obj
     cdef dict __dict__
 
-    def __cinit__(self, list keys, bool dropna=True, *args, **kwargs):
+    def __cinit__(self, list keys, bool dropna=True):
         cdef libcudf_types.null_policy c_null_handling
         cdef table_view keys_view
 
diff --git a/python/cudf/cudf/_lib/string_casting.pyx b/python/cudf/cudf/_lib/string_casting.pyx
index 4b44ac83a70..3826e71f850 100644
--- a/python/cudf/cudf/_lib/string_casting.pyx
+++ b/python/cudf/cudf/_lib/string_casting.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 from cudf._lib.column cimport Column
 
@@ -95,7 +95,7 @@ def dtos(Column input_col):
     return floating_to_string(input_col)
 
 
-def stod(Column input_col, **kwargs):
+def stod(Column input_col):
     """
     Converting/Casting input column of type string to double
 
@@ -127,7 +127,7 @@ def ftos(Column input_col):
     return floating_to_string(input_col)
 
 
-def stof(Column input_col, **kwargs):
+def stof(Column input_col):
     """
     Converting/Casting input column of type string to float
 
@@ -188,7 +188,7 @@ def i8tos(Column input_col):
     return integer_to_string(input_col)
 
 
-def stoi8(Column input_col, **kwargs):
+def stoi8(Column input_col):
     """
     Converting/Casting input column of type string to int8
 
@@ -284,7 +284,7 @@ def ltos(Column input_col):
     return integer_to_string(input_col)
 
 
-def stol(Column input_col, **kwargs):
+def stol(Column input_col):
     """
     Converting/Casting input column of type string to int64
 
@@ -316,7 +316,7 @@ def ui8tos(Column input_col):
     return integer_to_string(input_col)
 
 
-def stoui8(Column input_col, **kwargs):
+def stoui8(Column input_col):
     """
     Converting/Casting input column of type string to uint8
 
@@ -348,7 +348,7 @@ def ui16tos(Column input_col):
     return integer_to_string(input_col)
 
 
-def stoui16(Column input_col, **kwargs):
+def stoui16(Column input_col):
     """
     Converting/Casting input column of type string to uint16
 
@@ -380,7 +380,7 @@ def uitos(Column input_col):
     return integer_to_string(input_col)
 
 
-def stoui(Column input_col, **kwargs):
+def stoui(Column input_col):
     """
     Converting/Casting input column of type string to uint32
 
@@ -412,7 +412,7 @@ def ultos(Column input_col):
     return integer_to_string(input_col)
 
 
-def stoul(Column input_col, **kwargs):
+def stoul(Column input_col):
     """
     Converting/Casting input column of type string to uint64
 
@@ -456,7 +456,7 @@ def _to_booleans(Column input_col, object string_true="True"):
     return Column.from_unique_ptr(move(c_result))
 
 
-def to_booleans(Column input_col, **kwargs):
+def to_booleans(Column input_col):
 
     return _to_booleans(input_col)
 
@@ -631,9 +631,7 @@ def timedelta2int(Column input_col, dtype, format):
     return Column.from_unique_ptr(move(c_result))
 
 
-def int2timedelta(
-        Column input_col,
-        **kwargs):
+def int2timedelta(Column input_col, str format):
     """
     Converting/Casting input Timedelta column to string
     column with specified format
@@ -649,8 +647,7 @@ def int2timedelta(
     """
 
     cdef column_view input_column_view = input_col.view()
-    cdef string c_duration_format = kwargs.get(
-        'format', "%D days %H:%M:%S").encode('UTF-8')
+    cdef string c_duration_format = format.encode('UTF-8')
     cdef unique_ptr[column] c_result
     with nogil:
         c_result = move(
@@ -661,7 +658,7 @@ def int2timedelta(
     return Column.from_unique_ptr(move(c_result))
 
 
-def int2ip(Column input_col, **kwargs):
+def int2ip(Column input_col):
     """
     Converting/Casting integer column to string column in ipv4 format
 
@@ -684,7 +681,7 @@ def int2ip(Column input_col, **kwargs):
     return Column.from_unique_ptr(move(c_result))
 
 
-def ip2int(Column input_col, **kwargs):
+def ip2int(Column input_col):
     """
     Converting string ipv4 column to integer column
 
@@ -732,7 +729,6 @@ def htoi(Column input_col, **kwargs):
     Parameters
     ----------
     input_col : input column of type string
-    out_type : The type of integer column expected
 
     Returns
     -------
@@ -742,9 +738,7 @@ def htoi(Column input_col, **kwargs):
     cdef column_view input_column_view = input_col.view()
     cdef type_id tid = <type_id> (
         <underlying_type_t_type_id> (
-            SUPPORTED_NUMPY_TO_LIBCUDF_TYPES[
-                kwargs.get('dtype', cudf.dtype("int64"))
-            ]
+            SUPPORTED_NUMPY_TO_LIBCUDF_TYPES[cudf.dtype("int64")]
         )
     )
     cdef data_type c_out_type = data_type(tid)
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index 59fd4631067..71143fa7a95 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -1310,22 +1310,28 @@ def as_categorical_column(self, dtype: Dtype) -> CategoricalColumn:
             new_categories=dtype.categories, ordered=bool(dtype.ordered)
         )
 
-    def as_numerical_column(self, dtype: Dtype, **kwargs) -> NumericalColumn:
+    def as_numerical_column(self, dtype: Dtype) -> NumericalColumn:
         return self._get_decategorized_column().as_numerical_column(dtype)
 
-    def as_string_column(self, dtype, format=None, **kwargs) -> StringColumn:
+    def as_string_column(
+        self, dtype, format: str | None = None
+    ) -> StringColumn:
         return self._get_decategorized_column().as_string_column(
             dtype, format=format
         )
 
-    def as_datetime_column(self, dtype, **kwargs) -> DatetimeColumn:
+    def as_datetime_column(
+        self, dtype, format: str | None = None
+    ) -> DatetimeColumn:
         return self._get_decategorized_column().as_datetime_column(
-            dtype, **kwargs
+            dtype, format
         )
 
-    def as_timedelta_column(self, dtype, **kwargs) -> TimeDeltaColumn:
+    def as_timedelta_column(
+        self, dtype, format: str | None = None
+    ) -> TimeDeltaColumn:
         return self._get_decategorized_column().as_timedelta_column(
-            dtype, **kwargs
+            dtype, format
         )
 
     def _get_decategorized_column(self) -> ColumnBase:
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 19e76d4a95b..81579b53bb7 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -978,11 +978,17 @@ def distinct_count(self, dropna: bool = True) -> int:
     def can_cast_safely(self, to_dtype: Dtype) -> bool:
         raise NotImplementedError()
 
-    def astype(self, dtype: Dtype, **kwargs) -> ColumnBase:
+    def astype(
+        self, dtype: Dtype, copy: bool = False, format: str | None = None
+    ) -> ColumnBase:
+        if copy:
+            col = self.copy()
+        else:
+            col = self
         if self.dtype == dtype:
-            return self
+            return col
         if is_categorical_dtype(dtype):
-            return self.as_categorical_column(dtype)
+            return col.as_categorical_column(dtype)
 
         if (
             isinstance(dtype, str)
@@ -999,9 +1005,9 @@ def astype(self, dtype: Dtype, **kwargs) -> ColumnBase:
         else:
             dtype = pandas_dtypes_to_np_dtypes.get(dtype, dtype)
         if _is_non_decimal_numeric_dtype(dtype):
-            return self.as_numerical_column(dtype, **kwargs)
+            return col.as_numerical_column(dtype)
         elif is_categorical_dtype(dtype):
-            return self.as_categorical_column(dtype)
+            return col.as_categorical_column(dtype)
         elif cudf.dtype(dtype).type in {
             np.str_,
             np.object_,
@@ -1014,23 +1020,23 @@ def astype(self, dtype: Dtype, **kwargs) -> ColumnBase:
                     f"Casting to {dtype} is not supported, use "
                     "`.astype('str')` instead."
                 )
-            return self.as_string_column(dtype, **kwargs)
+            return col.as_string_column(dtype, format=format)
         elif isinstance(dtype, (ListDtype, StructDtype)):
-            if not self.dtype == dtype:
+            if not col.dtype == dtype:
                 raise NotImplementedError(
                     f"Casting {self.dtype} columns not currently supported"
                 )
-            return self
+            return col
         elif isinstance(dtype, IntervalDtype):
-            return self.as_interval_column(dtype, **kwargs)
+            return col.as_interval_column(dtype)
         elif isinstance(dtype, cudf.core.dtypes.DecimalDtype):
-            return self.as_decimal_column(dtype, **kwargs)
+            return col.as_decimal_column(dtype)
         elif np.issubdtype(cast(Any, dtype), np.datetime64):
-            return self.as_datetime_column(dtype, **kwargs)
+            return col.as_datetime_column(dtype, format=format)
         elif np.issubdtype(cast(Any, dtype), np.timedelta64):
-            return self.as_timedelta_column(dtype, **kwargs)
+            return col.as_timedelta_column(dtype, format=format)
         else:
-            return self.as_numerical_column(dtype, **kwargs)
+            return col.as_numerical_column(dtype)
 
     def as_categorical_column(self, dtype) -> ColumnBase:
         if isinstance(dtype, (cudf.CategoricalDtype, pd.CategoricalDtype)):
@@ -1076,50 +1082,35 @@ def as_categorical_column(self, dtype) -> ColumnBase:
         )
 
     def as_numerical_column(
-        self, dtype: Dtype, **kwargs
+        self, dtype: Dtype
     ) -> "cudf.core.column.NumericalColumn":
         raise NotImplementedError
 
     def as_datetime_column(
-        self, dtype: Dtype, **kwargs
+        self, dtype: Dtype, format: str | None = None
     ) -> "cudf.core.column.DatetimeColumn":
         raise NotImplementedError
 
     def as_interval_column(
-        self, dtype: Dtype, **kwargs
+        self, dtype: Dtype
     ) -> "cudf.core.column.IntervalColumn":
         raise NotImplementedError
 
     def as_timedelta_column(
-        self, dtype: Dtype, **kwargs
+        self, dtype: Dtype, format: str | None = None
     ) -> "cudf.core.column.TimeDeltaColumn":
         raise NotImplementedError
 
     def as_string_column(
-        self, dtype: Dtype, format=None, **kwargs
+        self, dtype: Dtype, format: str | None = None
     ) -> "cudf.core.column.StringColumn":
         raise NotImplementedError
 
     def as_decimal_column(
-        self, dtype: Dtype, **kwargs
+        self, dtype: Dtype
     ) -> Union["cudf.core.column.decimal.DecimalBaseColumn"]:
         raise NotImplementedError
 
-    def as_decimal128_column(
-        self, dtype: Dtype, **kwargs
-    ) -> "cudf.core.column.Decimal128Column":
-        raise NotImplementedError
-
-    def as_decimal64_column(
-        self, dtype: Dtype, **kwargs
-    ) -> "cudf.core.column.Decimal64Column":
-        raise NotImplementedError
-
-    def as_decimal32_column(
-        self, dtype: Dtype, **kwargs
-    ) -> "cudf.core.column.Decimal32Column":
-        raise NotImplementedError
-
     def apply_boolean_mask(self, mask) -> ColumnBase:
         mask = as_column(mask)
         if not is_bool_dtype(mask.dtype):
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 2b44b46bb9e..2ab2dd46c53 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -422,21 +422,23 @@ def __cuda_array_interface__(self) -> Mapping[str, Any]:
             )
         return output
 
-    def as_datetime_column(self, dtype: Dtype, **kwargs) -> DatetimeColumn:
+    def as_datetime_column(
+        self, dtype: Dtype, format: str | None = None
+    ) -> DatetimeColumn:
         dtype = cudf.dtype(dtype)
         if dtype == self.dtype:
             return self
         return libcudf.unary.cast(self, dtype=dtype)
 
     def as_timedelta_column(
-        self, dtype: Dtype, **kwargs
+        self, dtype: Dtype, format: str | None = None
     ) -> "cudf.core.column.TimeDeltaColumn":
         raise TypeError(
             f"cannot astype a datetimelike from {self.dtype} to {dtype}"
         )
 
     def as_numerical_column(
-        self, dtype: Dtype, **kwargs
+        self, dtype: Dtype
     ) -> "cudf.core.column.NumericalColumn":
         col = column.build_column(
             data=self.base_data,
@@ -448,7 +450,7 @@ def as_numerical_column(
         return cast("cudf.core.column.NumericalColumn", col.astype(dtype))
 
     def as_string_column(
-        self, dtype: Dtype, format=None, **kwargs
+        self, dtype: Dtype, format: str | None = None
     ) -> "cudf.core.column.StringColumn":
         if format is None:
             format = _dtype_to_format_conversion.get(
@@ -725,9 +727,9 @@ def _local_time(self):
         return utc_to_local(self, str(self.dtype.tz))
 
     def as_string_column(
-        self, dtype: Dtype, format=None, **kwargs
+        self, dtype: Dtype, format: str | None = None
     ) -> "cudf.core.column.StringColumn":
-        return self._local_time.as_string_column(dtype, format, **kwargs)
+        return self._local_time.as_string_column(dtype, format)
 
     def get_dt_field(self, field: str) -> ColumnBase:
         return libcudf.datetime.extract_datetime_component(
diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py
index 299875f0091..0e90b522f2c 100644
--- a/python/cudf/cudf/core/column/decimal.py
+++ b/python/cudf/cudf/core/column/decimal.py
@@ -1,5 +1,7 @@
 # Copyright (c) 2021-2024, NVIDIA CORPORATION.
 
+from __future__ import annotations
+
 import warnings
 from decimal import Decimal
 from typing import Any, Optional, Sequence, Union, cast
@@ -37,7 +39,8 @@ class DecimalBaseColumn(NumericalBaseColumn):
     _VALID_BINARY_OPERATIONS = BinaryOperand._SUPPORTED_BINARY_OPERATIONS
 
     def as_decimal_column(
-        self, dtype: Dtype, **kwargs
+        self,
+        dtype: Dtype,
     ) -> Union["DecimalBaseColumn"]:
         if (
             isinstance(dtype, cudf.core.dtypes.DecimalDtype)
@@ -53,7 +56,7 @@ def as_decimal_column(
         return libcudf.unary.cast(self, dtype)
 
     def as_string_column(
-        self, dtype: Dtype, format=None, **kwargs
+        self, dtype: Dtype, format: str | None = None
     ) -> "cudf.core.column.StringColumn":
         if len(self) > 0:
             return cpp_from_decimal(self)
@@ -201,7 +204,7 @@ def _decimal_quantile(
         return result._with_type_metadata(self.dtype)
 
     def as_numerical_column(
-        self, dtype: Dtype, **kwargs
+        self, dtype: Dtype
     ) -> "cudf.core.column.NumericalColumn":
         return libcudf.unary.cast(self, dtype)
 
diff --git a/python/cudf/cudf/core/column/interval.py b/python/cudf/cudf/core/column/interval.py
index eed7bba3628..81059717b20 100644
--- a/python/cudf/cudf/core/column/interval.py
+++ b/python/cudf/cudf/core/column/interval.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2023, NVIDIA CORPORATION.
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
 from typing import Optional
 
 import pandas as pd
@@ -99,7 +99,7 @@ def copy(self, deep=True):
             closed=closed,
         )
 
-    def as_interval_column(self, dtype, **kwargs):
+    def as_interval_column(self, dtype):
         if isinstance(dtype, IntervalDtype):
             if isinstance(self.dtype, CategoricalDtype):
                 new_struct = self._get_decategorized_column()
diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py
index a5653e66513..0cccec6f28a 100644
--- a/python/cudf/cudf/core/column/lists.py
+++ b/python/cudf/cudf/core/column/lists.py
@@ -1,4 +1,6 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+
+from __future__ import annotations
 
 from functools import cached_property
 from typing import List, Optional, Sequence, Tuple, Union
@@ -243,7 +245,7 @@ def from_sequences(
         return res
 
     def as_string_column(
-        self, dtype: Dtype, format=None, **kwargs
+        self, dtype: Dtype, format: str | None = None
     ) -> "cudf.core.column.StringColumn":
         """
         Create a strings column from a list column
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index 148fa252fad..5461d1b13b5 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -340,7 +340,7 @@ def int2ip(self) -> "cudf.core.column.StringColumn":
         return libcudf.string_casting.int2ip(self)
 
     def as_string_column(
-        self, dtype: Dtype, format=None, **kwargs
+        self, dtype: Dtype, format: str | None = None
     ) -> "cudf.core.column.StringColumn":
         if len(self) > 0:
             return string._numeric_to_str_typecast_functions[
@@ -353,7 +353,7 @@ def as_string_column(
             )
 
     def as_datetime_column(
-        self, dtype: Dtype, **kwargs
+        self, dtype: Dtype, format: str | None = None
     ) -> "cudf.core.column.DatetimeColumn":
         return cast(
             "cudf.core.column.DatetimeColumn",
@@ -367,7 +367,7 @@ def as_datetime_column(
         )
 
     def as_timedelta_column(
-        self, dtype: Dtype, **kwargs
+        self, dtype: Dtype, format: str | None = None
     ) -> "cudf.core.column.TimeDeltaColumn":
         return cast(
             "cudf.core.column.TimeDeltaColumn",
@@ -381,11 +381,11 @@ def as_timedelta_column(
         )
 
     def as_decimal_column(
-        self, dtype: Dtype, **kwargs
+        self, dtype: Dtype
     ) -> "cudf.core.column.DecimalBaseColumn":
         return libcudf.unary.cast(self, dtype)
 
-    def as_numerical_column(self, dtype: Dtype, **kwargs) -> NumericalColumn:
+    def as_numerical_column(self, dtype: Dtype) -> NumericalColumn:
         dtype = cudf.dtype(dtype)
         if dtype == self.dtype:
             return self
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 06b5ac31ca6..84333fc205a 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -5633,7 +5633,7 @@ def __contains__(self, item: ScalarLike) -> bool:
             )
 
     def as_numerical_column(
-        self, dtype: Dtype, **kwargs
+        self, dtype: Dtype
     ) -> "cudf.core.column.NumericalColumn":
         out_dtype = cudf.api.types.dtype(dtype)
         string_col = self
@@ -5696,14 +5696,13 @@ def _as_datetime_or_timedelta_column(self, dtype, format):
         return result_col
 
     def as_datetime_column(
-        self, dtype: Dtype, **kwargs
+        self, dtype: Dtype, format: str | None = None
     ) -> "cudf.core.column.DatetimeColumn":
         out_dtype = cudf.api.types.dtype(dtype)
 
         # infer on host from the first not na element
         # or return all null column if all values
         # are null in current column
-        format = kwargs.get("format", None)
         if format is None:
             if self.null_count == len(self):
                 return cast(
@@ -5720,19 +5719,20 @@ def as_datetime_column(
         return self._as_datetime_or_timedelta_column(out_dtype, format)
 
     def as_timedelta_column(
-        self, dtype: Dtype, **kwargs
+        self, dtype: Dtype, format: str | None = None
     ) -> "cudf.core.column.TimeDeltaColumn":
         out_dtype = cudf.api.types.dtype(dtype)
-        format = "%D days %H:%M:%S"
+        if format is None:
+            format = "%D days %H:%M:%S"
         return self._as_datetime_or_timedelta_column(out_dtype, format)
 
     def as_decimal_column(
-        self, dtype: Dtype, **kwargs
+        self, dtype: Dtype
     ) -> "cudf.core.column.DecimalBaseColumn":
         return libstrings.to_decimal(self, dtype)
 
     def as_string_column(
-        self, dtype: Dtype, format=None, **kwargs
+        self, dtype: Dtype, format: str | None = None
     ) -> StringColumn:
         return self
 
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index 2f842130f48..6038a1a1e97 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -288,7 +288,7 @@ def fillna(
         return super().fillna(fill_value, method)
 
     def as_numerical_column(
-        self, dtype: Dtype, **kwargs
+        self, dtype: Dtype
     ) -> "cudf.core.column.NumericalColumn":
         col = column.build_column(
             data=self.base_data,
@@ -300,14 +300,14 @@ def as_numerical_column(
         return cast("cudf.core.column.NumericalColumn", col.astype(dtype))
 
     def as_datetime_column(
-        self, dtype: Dtype, **kwargs
+        self, dtype: Dtype, format: str | None = None
     ) -> "cudf.core.column.DatetimeColumn":
         raise TypeError(
             f"cannot astype a timedelta from {self.dtype} to {dtype}"
         )
 
     def as_string_column(
-        self, dtype: Dtype, format=None, **kwargs
+        self, dtype: Dtype, format: str | None = None
     ) -> "cudf.core.column.StringColumn":
         if format is None:
             format = _dtype_to_format_conversion.get(
@@ -323,7 +323,9 @@ def as_string_column(
                 column.column_empty(0, dtype="object", masked=False),
             )
 
-    def as_timedelta_column(self, dtype: Dtype, **kwargs) -> TimeDeltaColumn:
+    def as_timedelta_column(
+        self, dtype: Dtype, format: str | None = None
+    ) -> TimeDeltaColumn:
         dtype = cudf.dtype(dtype)
         if dtype == self.dtype:
             return self
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 813ecc32069..51b661593fc 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -24,6 +24,7 @@
     Set,
     Tuple,
     Union,
+    cast,
 )
 
 import cupy
@@ -1986,8 +1987,6 @@ def _make_operands_and_index_for_binop(
         fill_value: Any = None,
         reflect: bool = False,
         can_reindex: bool = False,
-        *args,
-        **kwargs,
     ) -> Tuple[
         Union[
             Dict[Optional[str], Tuple[ColumnBase, Any, bool, Any]],
@@ -2338,7 +2337,7 @@ def to_dict(
 
     @_cudf_nvtx_annotate
     def scatter_by_map(
-        self, map_index, map_size=None, keep_index=True, **kwargs
+        self, map_index, map_size=None, keep_index=True, debug: bool = False
     ):
         """Scatter to a list of dataframes.
 
@@ -2379,7 +2378,11 @@ def scatter_by_map(
 
         # Convert string or categorical to integer
         if isinstance(map_index, cudf.core.column.StringColumn):
-            map_index = map_index.as_categorical_column("category").codes
+            cat_index = cast(
+                cudf.core.column.CategoricalColumn,
+                map_index.as_categorical_column("category"),
+            )
+            map_index = cat_index.codes
             warnings.warn(
                 "Using StringColumn for map_index in scatter_by_map. "
                 "Use an integer array/column for better performance."
@@ -2391,7 +2394,7 @@ def scatter_by_map(
                 "Use an integer array/column for better performance."
             )
 
-        if kwargs.get("debug", False) == 1 and map_size is not None:
+        if debug and map_size is not None:
             count = map_index.distinct_count()
             if map_size < count:
                 raise ValueError(
@@ -2406,7 +2409,7 @@ def scatter_by_map(
         partitioned = self._from_columns_like_self(
             partitioned_columns,
             column_names=self._column_names,
-            index_names=self._index_names if keep_index else None,
+            index_names=list(self._index_names) if keep_index else None,
         )
 
         # due to the split limitation mentioned
@@ -2537,7 +2540,7 @@ def items(self):
             yield (k, self[k])
 
     @_cudf_nvtx_annotate
-    def equals(self, other, **kwargs):
+    def equals(self, other):
         ret = super().equals(other)
         # If all other checks matched, validate names.
         if ret:
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index ab089ceb103..5955e21fea0 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2023, NVIDIA CORPORATION.
+# Copyright (c) 2021-2024, NVIDIA CORPORATION.
 """Base class for Frame types that have an index."""
 
 from __future__ import annotations
@@ -3612,8 +3612,6 @@ def _make_operands_and_index_for_binop(
         fill_value: Any = None,
         reflect: bool = False,
         can_reindex: bool = False,
-        *args,
-        **kwargs,
     ) -> Tuple[
         Union[
             Dict[Optional[str], Tuple[ColumnBase, Any, bool, Any]],
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 8739a61dd8b..df5a62b384e 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -1578,8 +1578,6 @@ def _make_operands_and_index_for_binop(
         fill_value: Any = None,
         reflect: bool = False,
         can_reindex: bool = False,
-        *args,
-        **kwargs,
     ) -> Tuple[
         Union[
             Dict[Optional[str], Tuple[ColumnBase, Any, bool, Any]],
diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py
index 911e7ac905c..b73f756d7dc 100644
--- a/python/cudf/cudf/core/single_column_frame.py
+++ b/python/cudf/cudf/core/single_column_frame.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2023, NVIDIA CORPORATION.
+# Copyright (c) 2021-2024, NVIDIA CORPORATION.
 """Base class for Frame types that only have a single column."""
 
 from __future__ import annotations
@@ -310,8 +310,6 @@ def _make_operands_for_binop(
         other: Any,
         fill_value: Any = None,
         reflect: bool = False,
-        *args,
-        **kwargs,
     ) -> Union[
         Dict[Optional[str], Tuple[ColumnBase, Any, bool, Any]],
         NotImplementedType,

From 9e6400b7d1f9d525b7f45e7b56874dc830c02d1f Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <3190405+shwina@users.noreply.github.com>
Date: Thu, 11 Jan 2024 18:38:07 -0600
Subject: [PATCH 14/23] Describe unpickling expectations when cudf.pandas is
 enabled (#14693)

Adds to the docs the unpickling expectations that were noted in #14692.

Authors:
  - Ashwin Srinath (https://github.com/shwina)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/14693
---
 docs/cudf/source/cudf_pandas/faq.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docs/cudf/source/cudf_pandas/faq.md b/docs/cudf/source/cudf_pandas/faq.md
index bf9c2b98c2d..bbeaf0a5f00 100644
--- a/docs/cudf/source/cudf_pandas/faq.md
+++ b/docs/cudf/source/cudf_pandas/faq.md
@@ -113,6 +113,9 @@ There are a few known limitations that you should be aware of:
   pandas
 - `cudf.pandas` isn't compatible with directly using `import cudf`
    and is intended to be used with pandas-based workflows.
+- Unpickling objects that were pickled with "regular" pandas will not
+  work: you must have pickled an object with `cudf.pandas` enabled for
+  it to be unpickled when `cudf.pandas` is enabled.
 - Global variables can be accessed but can't be modified during CPU-fallback
 
   ```python

From 3c55a6e82e1a53581e1efd1b29d9bc0802bb054b Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 11 Jan 2024 16:57:26 -0800
Subject: [PATCH 15/23] Fix CMake args (#14746)

This was an oversight in #13531

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Jake Awe (https://github.com/AyodeAwe)

URL: https://github.com/rapidsai/cudf/pull/14746
---
 ci/build_wheel_cudf.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ci/build_wheel_cudf.sh b/ci/build_wheel_cudf.sh
index e79b9a35aa2..cde22bb70d1 100755
--- a/ci/build_wheel_cudf.sh
+++ b/ci/build_wheel_cudf.sh
@@ -1,11 +1,11 @@
 #!/bin/bash
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
 set -euo pipefail
 
 package_dir="python/cudf"
 
-export SKBUILD_CONFIGURE_OPTIONS="-DUSE_LIBARROW_FROM_PYARROW=ON"
+export SKBUILD_CMAKE_ARGS="-DUSE_LIBARROW_FROM_PYARROW=ON"
 
 ./ci/build_wheel.sh cudf ${package_dir}
 

From 2003ea2e8d2b03fb1b3a3c2f2046893395328fd2 Mon Sep 17 00:00:00 2001
From: Kyle Edwards <kyedwards@nvidia.com>
Date: Thu, 11 Jan 2024 20:16:21 -0500
Subject: [PATCH 16/23] Remove usages of rapids-env-update (#14748)

Authors:
  - Kyle Edwards (https://github.com/KyleFromNVIDIA)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Ray Douglass (https://github.com/raydouglass)

URL: https://github.com/rapidsai/cudf/pull/14748
---
 ci/build_cpp.sh    | 8 ++++++--
 ci/build_python.sh | 8 ++++++--
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh
index f1ad8ee7778..740a6409ccd 100755
--- a/ci/build_cpp.sh
+++ b/ci/build_cpp.sh
@@ -1,9 +1,13 @@
 #!/bin/bash
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 
 set -euo pipefail
 
-source rapids-env-update
+rapids-configure-conda-channels
+
+source rapids-configure-sccache
+
+source rapids-date-string
 
 export CMAKE_GENERATOR=Ninja
 
diff --git a/ci/build_python.sh b/ci/build_python.sh
index 32fe7b6b3ce..3c2a7761e1a 100755
--- a/ci/build_python.sh
+++ b/ci/build_python.sh
@@ -1,9 +1,13 @@
 #!/bin/bash
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 
 set -euo pipefail
 
-source rapids-env-update
+rapids-configure-conda-channels
+
+source rapids-configure-sccache
+
+source rapids-date-string
 
 export CMAKE_GENERATOR=Ninja
 

From c0a3cd14eabd18ba8cedd3b7dd87cba8b6706719 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 11 Jan 2024 16:13:59 -1000
Subject: [PATCH 17/23] Clean up base column methods (#14725)

* Removed the need for a `drop_nan` argument in `Column.dropna`
* Removed the need for `Column.as_frame`
* Removed the need for `Column.force_deep_copy`

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Michael Wang (https://github.com/isVoid)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/14725
---
 .../cudf/benchmarks/internal/bench_column.py  |  7 ++---
 python/cudf/cudf/core/column/categorical.py   | 11 ++++----
 python/cudf/cudf/core/column/column.py        | 28 ++++---------------
 python/cudf/cudf/core/column/interval.py      |  5 +---
 python/cudf/cudf/core/column/numerical.py     |  5 ----
 python/cudf/cudf/io/dlpack.py                 |  4 +--
 6 files changed, 18 insertions(+), 42 deletions(-)

diff --git a/python/cudf/benchmarks/internal/bench_column.py b/python/cudf/benchmarks/internal/bench_column.py
index d4969b39f7f..8da769b7858 100644
--- a/python/cudf/benchmarks/internal/bench_column.py
+++ b/python/cudf/benchmarks/internal/bench_column.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 
 """Benchmarks of Column methods."""
 
@@ -18,9 +18,8 @@ def bench_apply_boolean_mask(benchmark, column):
 
 
 @benchmark_with_object(cls="column", dtype="float")
-@pytest.mark.parametrize("dropnan", [True, False])
-def bench_dropna(benchmark, column, dropnan):
-    benchmark(column.dropna, drop_nan=dropnan)
+def bench_dropna(benchmark, column):
+    benchmark(column.dropna)
 
 
 @benchmark_with_object(cls="column", dtype="float")
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index 71143fa7a95..eb4220c5895 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -987,15 +987,16 @@ def to_pandas(
             .fillna(_DEFAULT_CATEGORICAL_VALUE)
             .values_host
         )
-        if isinstance(col.categories.dtype, IntervalDtype):
+        cats = col.categories
+        if cats.dtype.kind in "biuf":
+            cats = cats.nans_to_nulls().dropna()  # type: ignore[attr-defined]
+        elif not isinstance(cats.dtype, IntervalDtype):
             # leaving out dropna because it temporarily changes an interval
             # index into a struct and throws off results.
             # TODO: work on interval index dropna
-            categories = col.categories.to_pandas()
-        else:
-            categories = col.categories.dropna(drop_nan=True).to_pandas()
+            cats = cats.dropna()
         data = pd.Categorical.from_codes(
-            codes, categories=categories, ordered=col.ordered
+            codes, categories=cats.to_pandas(), ordered=col.ordered
         )
         return pd.Series(data, index=index)
 
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 81579b53bb7..3cf686da7b0 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -109,16 +109,8 @@ class ColumnBase(Column, Serializable, BinaryOperand, Reducible):
         "min",
     }
 
-    def as_frame(self) -> "cudf.core.frame.Frame":
-        """
-        Converts a Column to Frame
-        """
-        return cudf.core.single_column_frame.SingleColumnFrame(
-            {None: self.copy(deep=False)}
-        )
-
     def data_array_view(
-        self, *, mode="write"
+        self, *, mode: Literal["write", "read"] = "write"
     ) -> "cuda.devicearray.DeviceNDArray":
         """
         View the data as a device array object
@@ -155,7 +147,7 @@ def data_array_view(
         return cuda.as_cuda_array(obj).view(self.dtype)
 
     def mask_array_view(
-        self, *, mode="write"
+        self, *, mode: Literal["write", "read"] = "write"
     ) -> "cuda.devicearray.DeviceNDArray":
         """
         View the mask as a device array
@@ -291,8 +283,7 @@ def any(self, skipna: bool = True) -> bool:
 
         return libcudf.reduce.reduce("any", self, dtype=np.bool_)
 
-    def dropna(self, drop_nan: bool = False) -> ColumnBase:
-        # The drop_nan argument is only used for numerical columns.
+    def dropna(self) -> ColumnBase:
         return drop_nulls([self])[0]._with_type_metadata(self.dtype)
 
     def to_arrow(self) -> pa.Array:
@@ -437,14 +428,6 @@ def nullmask(self) -> Buffer:
             raise ValueError("Column has no null mask")
         return self.mask_array_view(mode="read")
 
-    def force_deep_copy(self) -> Self:
-        """
-        A method to create deep copy irrespective of whether
-        `copy-on-write` is enabled.
-        """
-        result = libcudf.copying.copy_column(self)
-        return result._with_type_metadata(self.dtype)
-
     def copy(self, deep: bool = True) -> Self:
         """
         Makes a copy of the Column.
@@ -464,7 +447,8 @@ def copy(self, deep: bool = True) -> Self:
             them.
         """
         if deep:
-            return self.force_deep_copy()
+            result = libcudf.copying.copy_column(self)
+            return result._with_type_metadata(self.dtype)
         else:
             return cast(
                 Self,
@@ -1069,7 +1053,7 @@ def as_categorical_column(self, dtype) -> ColumnBase:
         )
         # columns include null index in factorization; remove:
         if self.has_nulls():
-            cats = cats.dropna(drop_nan=False)
+            cats = cats.dropna()
             min_type = min_unsigned_type(len(cats), 8)
             if cudf.dtype(min_type).itemsize < labels.dtype.itemsize:
                 labels = labels.astype(min_type)
diff --git a/python/cudf/cudf/core/column/interval.py b/python/cudf/cudf/core/column/interval.py
index 81059717b20..6a7e7729123 100644
--- a/python/cudf/cudf/core/column/interval.py
+++ b/python/cudf/cudf/core/column/interval.py
@@ -142,7 +142,4 @@ def element_indexing(self, index: int):
         result = super().element_indexing(index)
         if cudf.get_option("mode.pandas_compatible"):
             return pd.Interval(**result, closed=self._closed)
-        return {
-            field: value
-            for field, value in zip(self.dtype.fields, result.values())
-        }
+        return result
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index 5461d1b13b5..0577e0f37ed 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -20,7 +20,6 @@
 
 import cudf
 from cudf import _lib as libcudf
-from cudf._lib.stream_compaction import drop_nulls
 from cudf._lib.types import size_type_dtype
 from cudf._typing import (
     ColumnBinaryOperand,
@@ -421,10 +420,6 @@ def nan_count(self) -> int:
             self._nan_count = nan_col.sum()
         return self._nan_count
 
-    def dropna(self, drop_nan: bool = False) -> NumericalColumn:
-        col = self.nans_to_nulls() if drop_nan else self
-        return drop_nulls([col])[0]
-
     def _process_values_for_isin(
         self, values: Sequence
     ) -> Tuple[ColumnBase, ColumnBase]:
diff --git a/python/cudf/cudf/io/dlpack.py b/python/cudf/cudf/io/dlpack.py
index e1950c9f250..bed376e4a79 100644
--- a/python/cudf/cudf/io/dlpack.py
+++ b/python/cudf/cudf/io/dlpack.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
 
 
 import cudf
@@ -71,7 +71,7 @@ def to_dlpack(cudf_obj):
     if isinstance(cudf_obj, (cudf.DataFrame, cudf.Series, cudf.BaseIndex)):
         gdf = cudf_obj
     elif isinstance(cudf_obj, ColumnBase):
-        gdf = cudf_obj.as_frame()
+        gdf = cudf.Series._from_data({None: cudf_obj})
     else:
         raise TypeError(
             f"Input of type {type(cudf_obj)} cannot be converted "

From 7a42b8b57923b9515391cfe2c4668380b15ed118 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 11 Jan 2024 16:14:30 -1000
Subject: [PATCH 18/23] Use as_column instead of arange for range like inputs
 (#14689)

1. Allows range-like inputs in `as_column` to short circuit and not materialize when creating columns
2. Avoids diverging column construction logic between `column.arange` and `column.as_column`

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/14689
---
 python/cudf/cudf/core/column/__init__.py    |  3 +-
 python/cudf/cudf/core/column/categorical.py | 12 ++-
 python/cudf/cudf/core/column/column.py      | 99 +++++----------------
 python/cudf/cudf/core/dataframe.py          | 10 ++-
 python/cudf/cudf/core/groupby/groupby.py    | 10 ++-
 python/cudf/cudf/core/index.py              |  4 +-
 python/cudf/cudf/core/indexed_frame.py      | 18 ++--
 python/cudf/cudf/core/join/join.py          |  8 +-
 python/cudf/cudf/core/multiindex.py         | 16 ++--
 python/cudf/cudf/core/series.py             |  9 +-
 python/cudf/cudf/core/window/rolling.py     |  6 +-
 python/cudf/cudf/tests/test_column.py       | 10 +--
 12 files changed, 79 insertions(+), 126 deletions(-)

diff --git a/python/cudf/cudf/core/column/__init__.py b/python/cudf/cudf/core/column/__init__.py
index aba4ded4f9d..3dddcae85dc 100644
--- a/python/cudf/cudf/core/column/__init__.py
+++ b/python/cudf/cudf/core/column/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 """
 isort: skip_file
@@ -8,7 +8,6 @@
 from cudf.core.column.categorical import CategoricalColumn
 from cudf.core.column.column import (
     ColumnBase,
-    arange,
     as_column,
     build_categorical_column,
     build_column,
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index eb4220c5895..f52621dc444 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -1159,7 +1159,7 @@ def find_and_replace(
         new_cats_col = new_cats_col.apply_boolean_mask(bmask)
         new_cats = cudf.DataFrame._from_data(
             {
-                "index": cudf.core.column.arange(len(new_cats_col)),
+                "index": column.as_column(range(len(new_cats_col))),
                 "cats": new_cats_col,
             }
         )
@@ -1531,9 +1531,13 @@ def _set_categories(
         )
         out_code_dtype = min_unsigned_type(max_cat_size)
 
-        cur_order = column.arange(len(cur_codes))
-        old_codes = column.arange(len(cur_cats), dtype=out_code_dtype)
-        new_codes = column.arange(len(new_cats), dtype=out_code_dtype)
+        cur_order = column.as_column(range(len(cur_codes)))
+        old_codes = column.as_column(
+            range(len(cur_cats)), dtype=out_code_dtype
+        )
+        new_codes = column.as_column(
+            range(len(new_cats)), dtype=out_code_dtype
+        )
 
         new_df = cudf.DataFrame._from_data(
             data={"new_codes": new_codes, "cats": new_cats}
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 3cf686da7b0..c13ec33c51c 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -554,10 +554,8 @@ def slice(
             ]._with_type_metadata(self.dtype)
         else:
             # Need to create a gather map for given slice with stride
-            gather_map = arange(
-                start=start,
-                stop=stop,
-                step=stride,
+            gather_map = as_column(
+                range(start, stop, stride),
                 dtype=cudf.dtype(np.int32),
             )
             return self.take(gather_map)
@@ -626,10 +624,8 @@ def _scatter_by_slice(
                 )
 
         # step != 1, create a scatter map with arange
-        scatter_map = arange(
-            start=start,
-            stop=stop,
-            step=step,
+        scatter_map = as_column(
+            range(start, stop, step),
             dtype=cudf.dtype(np.int32),
         )
 
@@ -745,7 +741,7 @@ def indices_of(
             assert len(value) == 1
         mask = libcudf.search.contains(value, self)
         return apply_boolean_mask(
-            [arange(0, len(self), dtype=size_type_dtype)], mask
+            [as_column(range(0, len(self)), dtype=size_type_dtype)], mask
         )[0]
 
     def _find_first_and_last(self, value: ScalarLike) -> Tuple[int, int]:
@@ -1379,7 +1375,9 @@ def _return_sentinel_column():
             [self], [cats], how="left"
         )
         codes = libcudf.copying.gather(
-            [arange(len(cats), dtype=dtype)], right_gather_map, nullify=True
+            [as_column(range(len(cats)), dtype=dtype)],
+            right_gather_map,
+            nullify=True,
         )
         del right_gather_map
         # reorder `codes` so that its values correspond to the
@@ -1905,13 +1903,26 @@ def as_column(
     * Objects exposing ``__array_interface__``(e.g., numpy arrays)
     * pyarrow array
     * pandas.Categorical objects
+    * range objects
     """
-    if isinstance(arbitrary, ColumnBase):
+    if isinstance(arbitrary, (range, pd.RangeIndex, cudf.RangeIndex)):
+        column = libcudf.filling.sequence(
+            len(arbitrary),
+            as_device_scalar(arbitrary.start, dtype=cudf.dtype("int64")),
+            as_device_scalar(arbitrary.step, dtype=cudf.dtype("int64")),
+        )
+        if cudf.get_option("default_integer_bitwidth") and dtype is None:
+            dtype = cudf.dtype(
+                f'i{cudf.get_option("default_integer_bitwidth")//8}'
+            )
+        if dtype is not None:
+            column = column.astype(dtype)
+        return column
+    elif isinstance(arbitrary, ColumnBase):
         if dtype is not None:
             return arbitrary.astype(dtype)
         else:
             return arbitrary
-
     elif isinstance(arbitrary, cudf.Series):
         data = arbitrary._column
         if dtype is not None:
@@ -2614,70 +2625,6 @@ def deserialize_columns(headers: List[dict], frames: List) -> List[ColumnBase]:
     return columns
 
 
-def arange(
-    start: Union[int, float],
-    stop: Optional[Union[int, float]] = None,
-    step: Union[int, float] = 1,
-    dtype=None,
-) -> cudf.core.column.NumericalColumn:
-    """
-    Returns a column with evenly spaced values within a given interval.
-
-    Values are generated within the half-open interval [start, stop).
-    The first three arguments are mapped like the range built-in function,
-    i.e. start and step are optional.
-
-    Parameters
-    ----------
-    start : int/float
-        Start of the interval.
-    stop : int/float, default is None
-        Stop of the interval.
-    step : int/float, default 1
-        Step width between each pair of consecutive values.
-    dtype : default None
-        Data type specifier. It is inferred from other arguments by default.
-
-    Returns
-    -------
-    cudf.core.column.NumericalColumn
-
-    Examples
-    --------
-    >>> import cudf
-    >>> col = cudf.core.column.arange(2, 7, 1, dtype='int16')
-    >>> col
-    <cudf.core.column.numerical.NumericalColumn object at 0x7ff7998f8b90>
-    >>> cudf.Series(col)
-    0    2
-    1    3
-    2    4
-    3    5
-    4    6
-    dtype: int16
-    """
-    if stop is None:
-        stop = start
-        start = 0
-
-    if step is None:
-        step = 1
-
-    size = len(range(int(start), int(stop), int(step)))
-    if size == 0:
-        if dtype is None:
-            dtype = cudf.dtype("int64")
-        return cast(
-            cudf.core.column.NumericalColumn, column_empty(0, dtype=dtype)
-        )
-
-    return libcudf.filling.sequence(
-        size,
-        as_device_scalar(start, dtype=dtype),
-        as_device_scalar(step, dtype=dtype),
-    )
-
-
 def full(
     size: int, fill_value: ScalarLike, dtype: Optional[Dtype] = None
 ) -> ColumnBase:
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 51b661593fc..f9cf180ff44 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -342,10 +342,16 @@ def _getitem_tuple_arg(self, arg):
                         tmp_col_name = (tmp_col_name, *extra)
                         cantor_name = (cantor_name, *extra)
                     other_df = DataFrame(
-                        {tmp_col_name: column.arange(len(tmp_arg[0]))},
+                        {
+                            tmp_col_name: column.as_column(
+                                range(len(tmp_arg[0]))
+                            )
+                        },
                         index=as_index(tmp_arg[0]),
                     )
-                    columns_df[cantor_name] = column.arange(len(columns_df))
+                    columns_df[cantor_name] = column.as_column(
+                        range(len(columns_df))
+                    )
                     df = other_df.join(columns_df, how="inner")
                     # as join is not assigning any names to index,
                     # update it over here
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 73e6774f5ce..fbd85fd9876 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 import copy
 import itertools
@@ -23,7 +23,7 @@
 from cudf._typing import AggType, DataFrameOrSeries, MultiColumnAggType
 from cudf.api.types import is_bool_dtype, is_float_dtype, is_list_like
 from cudf.core.abc import Serializable
-from cudf.core.column.column import ColumnBase, arange, as_column
+from cudf.core.column.column import ColumnBase, as_column
 from cudf.core.column_accessor import ColumnAccessor
 from cudf.core.join._join_helpers import _match_join_keys
 from cudf.core.mixins import Reducible, Scannable
@@ -761,7 +761,7 @@ def _head_tail(self, n, *, take_head: bool, preserve_order: bool):
             # subsample the gather map from the full input ordering,
             # rather than permuting the gather map of the output.
             _, (ordering,), _ = self._groupby.groups(
-                [arange(0, len(self.obj))]
+                [as_column(range(0, len(self.obj)))]
             )
             # Invert permutation from original order to groups on the
             # subset of entries we want.
@@ -2543,7 +2543,9 @@ def _mimic_pandas_order(
         # result coming back from libcudf has null_count few rows than
         # the input, so we must produce an ordering from the full
         # input range.
-        _, (ordering,), _ = self._groupby.groups([arange(0, len(self.obj))])
+        _, (ordering,), _ = self._groupby.groups(
+            [as_column(range(0, len(self.obj)))]
+        )
         if self._dropna and any(
             c.has_nulls(include_nan=True) > 0
             for c in self.grouping._key_columns
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 5c33cd09ad1..e012d8e7140 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -286,9 +286,7 @@ def _num_rows(self):
     @_cudf_nvtx_annotate
     def _values(self):
         if len(self) > 0:
-            return column.arange(
-                self._start, self._stop, self._step, dtype=self.dtype
-            )
+            return column.as_column(self._range, dtype=self.dtype)
         else:
             return column.column_empty(0, masked=False, dtype=self.dtype)
 
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 5955e21fea0..2a35ac0f959 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -182,12 +182,8 @@ def _indices_from_labels(obj, labels):
     # join is not guaranteed to maintain the index ordering
     # so we will sort it with its initial ordering which is stored
     # in column "__"
-    lhs = cudf.DataFrame(
-        {"__": cudf.core.column.arange(len(labels))}, index=labels
-    )
-    rhs = cudf.DataFrame(
-        {"_": cudf.core.column.arange(len(obj))}, index=obj.index
-    )
+    lhs = cudf.DataFrame({"__": as_column(range(len(labels)))}, index=labels)
+    rhs = cudf.DataFrame({"_": as_column(range(len(obj)))}, index=obj.index)
     return lhs.join(rhs).sort_values(by=["__", "_"])["_"]
 
 
@@ -1897,10 +1893,8 @@ def _slice(self, arg: slice, keep_index: bool = True) -> Self:
         if stride != 1:
             return self._gather(
                 GatherMap.from_column_unchecked(
-                    cudf.core.column.arange(
-                        start,
-                        stop=stop,
-                        step=stride,
+                    as_column(
+                        range(start, stop, stride),
                         dtype=libcudf.types.size_type_dtype,
                     ),
                     len(self),
@@ -2541,9 +2535,9 @@ def _align_to_index(
         # to recover ordering after index alignment.
         sort_col_id = str(uuid4())
         if how == "left":
-            lhs[sort_col_id] = cudf.core.column.arange(len(lhs))
+            lhs[sort_col_id] = as_column(range(len(lhs)))
         elif how == "right":
-            rhs[sort_col_id] = cudf.core.column.arange(len(rhs))
+            rhs[sort_col_id] = as_column(range(len(rhs)))
 
         result = lhs.join(rhs, how=how, sort=sort)
         if how in ("left", "right"):
diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index 20f5b7989eb..86f0c8465ba 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 from __future__ import annotations
 
 import itertools
@@ -232,7 +232,11 @@ def _gather_maps(self, left_cols, right_cols):
         key_order = list(
             itertools.chain.from_iterable(
                 libcudf.copying.gather(
-                    [cudf.core.column.arange(n, dtype=size_type_dtype)],
+                    [
+                        cudf.core.column.as_column(
+                            range(n), dtype=size_type_dtype
+                        )
+                    ],
                     map_,
                     nullify=null,
                 )
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index 489f0e74dd6..0f323dd5540 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -501,9 +501,9 @@ def __repr__(self):
             # TODO: Update the following two arange calls to
             # a single arange call once arange has support for
             # a vector start/end points.
-            indices = column.arange(start=0, stop=n, step=1)
+            indices = column.as_column(range(n))
             indices = indices.append(
-                column.arange(start=len(self) - n, stop=len(self), step=1)
+                column.as_column(range(len(self) - n, len(self), 1))
             )
             preprocess = self.take(indices)
         else:
@@ -795,7 +795,7 @@ def _compute_validity_mask(self, index, row_tuple, max_length):
             [
                 frame,
                 cudf.DataFrame(
-                    {"idx": cudf.Series(column.arange(len(frame)))}
+                    {"idx": cudf.Series(column.as_column(range(len(frame))))}
                 ),
             ],
             axis=1,
@@ -807,7 +807,7 @@ def _compute_validity_mask(self, index, row_tuple, max_length):
         # obtain deterministic ordering.
         if cudf.get_option("mode.pandas_compatible"):
             lookup_order = "_" + "_".join(map(str, lookup._data.names))
-            lookup[lookup_order] = column.arange(len(lookup))
+            lookup[lookup_order] = column.as_column(range(len(lookup)))
             postprocess = operator.methodcaller(
                 "sort_values", by=[lookup_order, "idx"]
             )
@@ -840,14 +840,16 @@ def _get_valid_indices_by_tuple(self, index, row_tuple, max_length):
             ):
                 stop = row_tuple.stop or max_length
                 start, stop, step = row_tuple.indices(stop)
-                return column.arange(start, stop, step)
+                return column.as_column(range(start, stop, step))
             start_values = self._compute_validity_mask(
                 index, row_tuple.start, max_length
             )
             stop_values = self._compute_validity_mask(
                 index, row_tuple.stop, max_length
             )
-            return column.arange(start_values.min(), stop_values.max() + 1)
+            return column.as_column(
+                range(start_values.min(), stop_values.max() + 1)
+            )
         elif isinstance(row_tuple, numbers.Number):
             return row_tuple
         return self._compute_validity_mask(index, row_tuple, max_length)
@@ -1024,7 +1026,7 @@ def __getitem__(self, index):
             index = np.array(index)
         elif isinstance(index, slice):
             start, stop, step = index.indices(len(self))
-            index = column.arange(start, stop, step)
+            index = column.as_column(range(start, stop, step))
         result = MultiIndex.from_frame(
             self.to_frame(index=False, name=range(0, self.nlevels)).take(
                 index
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index df5a62b384e..bc1eaef86db 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -55,7 +55,6 @@
     DatetimeColumn,
     IntervalColumn,
     TimeDeltaColumn,
-    arange,
     as_column,
     full,
 )
@@ -1366,7 +1365,9 @@ def map(self, arg, na_action=None) -> "Series":
                 raise NotImplementedError(
                     "default values in dicts are currently not supported."
                 )
-            lhs = cudf.DataFrame({"x": self, "orig_order": arange(len(self))})
+            lhs = cudf.DataFrame(
+                {"x": self, "orig_order": as_column(range(len(self)))}
+            )
             rhs = cudf.DataFrame(
                 {
                     "x": arg.keys(),
@@ -1386,7 +1387,9 @@ def map(self, arg, na_action=None) -> "Series":
                     "Reindexing only valid with"
                     " uniquely valued Index objects"
                 )
-            lhs = cudf.DataFrame({"x": self, "orig_order": arange(len(self))})
+            lhs = cudf.DataFrame(
+                {"x": self, "orig_order": as_column(range(len(self)))}
+            )
             rhs = cudf.DataFrame(
                 {
                     "x": arg.keys(),
diff --git a/python/cudf/cudf/core/window/rolling.py b/python/cudf/cudf/core/window/rolling.py
index 8a92ea86d57..207fb469990 100644
--- a/python/cudf/cudf/core/window/rolling.py
+++ b/python/cudf/cudf/core/window/rolling.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION
+# Copyright (c) 2020-2024, NVIDIA CORPORATION
 
 import itertools
 
@@ -235,7 +235,7 @@ def _apply_agg_column(self, source_column, agg_name):
             start = as_column(start, dtype="int32")
             end = as_column(end, dtype="int32")
 
-            idx = cudf.core.column.arange(len(start))
+            idx = as_column(range(len(start)))
             preceding_window = (idx - start + cudf.Scalar(1, "int32")).astype(
                 "int32"
             )
@@ -531,7 +531,7 @@ def __init__(self, groupby, window, min_periods=None, center=False):
     def _window_to_window_sizes(self, window):
         if is_integer(window):
             return cudautils.grouped_window_sizes_from_offset(
-                column.arange(len(self.obj)).data_array_view(mode="read"),
+                as_column(range(len(self.obj))).data_array_view(mode="read"),
                 self._group_starts,
                 window,
             )
diff --git a/python/cudf/cudf/tests/test_column.py b/python/cudf/cudf/tests/test_column.py
index a4b27ae19ac..3d21994a8d5 100644
--- a/python/cudf/cudf/tests/test_column.py
+++ b/python/cudf/cudf/tests/test_column.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 import cupy as cp
 import numpy as np
@@ -8,7 +8,7 @@
 
 import cudf
 from cudf._lib.transform import mask_to_bools
-from cudf.core.column.column import arange, as_column
+from cudf.core.column.column import as_column
 from cudf.testing._utils import assert_eq, assert_exceptions_equal
 from cudf.utils import dtypes as dtypeutils
 
@@ -552,9 +552,3 @@ def test_astype_with_aliases(alias, expect_dtype, data):
     gd_data = cudf.Series.from_pandas(pd_data)
 
     assert_eq(pd_data.astype(expect_dtype), gd_data.astype(alias))
-
-
-def test_arange_empty():
-    result = arange(0)
-    assert len(result) == 0
-    assert result.dtype == np.dtype(np.int64)

From 27b106f832999afa5b3353aaa2adcdb695fb4a47 Mon Sep 17 00:00:00 2001
From: Raza Jafri <razajafri@users.noreply.github.com>
Date: Thu, 11 Jan 2024 18:32:19 -0800
Subject: [PATCH 19/23] [Java] Choose The Correct RoundingMode For Checking
 Decimal OutOfBounds (#14731)

This PR fixes an error in the `outOfBounds` method in which the `RoundingMode` was selected based on positive values only. The RHS should be rounded towards positive infinity (ROUND_CEILING) for the lower bound and towards negative infinity (ROUND_FLOOR) for the upper bound

closes #14732

Authors:
  - Raza Jafri (https://github.com/razajafri)

Approvers:
  - Jason Lowe (https://github.com/jlowe)
  - Robert (Bobby) Evans (https://github.com/revans2)

URL: https://github.com/rapidsai/cudf/pull/14731
---
 .../java/ai/rapids/cudf/DecimalUtils.java     | 30 +++++++-------
 .../java/ai/rapids/cudf/DecimalUtilsTest.java | 40 +++++++++++++++++++
 2 files changed, 55 insertions(+), 15 deletions(-)
 create mode 100644 java/src/test/java/ai/rapids/cudf/DecimalUtilsTest.java

diff --git a/java/src/main/java/ai/rapids/cudf/DecimalUtils.java b/java/src/main/java/ai/rapids/cudf/DecimalUtils.java
index 1979bd1bd5b..7a5be9b08b9 100644
--- a/java/src/main/java/ai/rapids/cudf/DecimalUtils.java
+++ b/java/src/main/java/ai/rapids/cudf/DecimalUtils.java
@@ -1,6 +1,6 @@
 /*
  *
- *  Copyright (c) 2022, NVIDIA CORPORATION.
+ *  Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
@@ -82,13 +82,13 @@ public static ColumnVector lessThan(ColumnView lhs, BigDecimal rhs) {
     int leftScale = lhs.getType().getScale();
     int leftPrecision = lhs.getType().getDecimalMaxPrecision();
 
-    // First we have to round the scalar (rhs) to the same scale as lhs.  Because this is a
-    // less than and it is rhs that we are rounding, we will round away from 0 (UP)
-    // to make sure we always return the correct value.
-    // For example:
-    //      100.1 < 100.19
-    // If we rounded down the rhs 100.19 would become 100.1, and now 100.1 is not < 100.1
-    BigDecimal roundedRhs = rhs.setScale(-leftScale, BigDecimal.ROUND_UP);
+    // First we have to round the scalar (rhs) to the same scale as lhs.
+    // For comparing the two values they should be the same scale, we round the value to positive infinity to maintain
+    // the relation. Ex:
+    // 10.2 < 10.29 = true, after rounding rhs to ceiling ===> 10.2 < 10.3 = true, relation is maintained
+    // 10.3 < 10.29 = false, after rounding rhs to ceiling ===> 10.3 < 10.3 = false, relation is maintained
+    // 10.1 < 10.10 = false, after rounding rhs to ceiling ===> 10.1 < 10.1 = false, relation is maintained
+    BigDecimal roundedRhs = rhs.setScale(-leftScale, BigDecimal.ROUND_CEILING);
 
     if (roundedRhs.precision() > leftPrecision) {
       // converting rhs to the same precision as lhs would result in an overflow/error, but
@@ -136,13 +136,13 @@ public static ColumnVector greaterThan(ColumnView lhs, BigDecimal rhs) {
     int cvScale = lhs.getType().getScale();
     int maxPrecision = lhs.getType().getDecimalMaxPrecision();
 
-    // First we have to round the scalar (rhs) to the same scale as lhs.  Because this is a
-    // greater than and it is rhs that we are rounding, we will round towards 0 (DOWN)
-    // to make sure we always return the correct value.
-    // For example:
-    //      100.2 > 100.19
-    // If we rounded up the rhs 100.19 would become 100.2, and now 100.2 is not > 100.2
-    BigDecimal roundedRhs = rhs.setScale(-cvScale, BigDecimal.ROUND_DOWN);
+    // First we have to round the scalar (rhs) to the same scale as lhs.
+    // For comparing the two values they should be the same scale, we round the value to negative infinity to maintain
+    // the relation. Ex:
+    // 10.3 > 10.29 = true, after rounding rhs to floor ===> 10.3 > 10.2 = true, relation is maintained
+    // 10.2 > 10.29 = false, after rounding rhs to floor ===> 10.2 > 10.2 = false, relation is maintained
+    // 10.1 > 10.10 = false, after rounding rhs to floor ===> 10.1 > 10.1 = false, relation is maintained
+    BigDecimal roundedRhs = rhs.setScale(-cvScale, BigDecimal.ROUND_FLOOR);
 
     if (roundedRhs.precision() > maxPrecision) {
       // converting rhs to the same precision as lhs would result in an overflow/error, but
diff --git a/java/src/test/java/ai/rapids/cudf/DecimalUtilsTest.java b/java/src/test/java/ai/rapids/cudf/DecimalUtilsTest.java
new file mode 100644
index 00000000000..a96eeda5dd7
--- /dev/null
+++ b/java/src/test/java/ai/rapids/cudf/DecimalUtilsTest.java
@@ -0,0 +1,40 @@
+/*
+ *
+ *  Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+
+package ai.rapids.cudf;
+
+import org.junit.jupiter.api.Test;
+
+import java.math.BigDecimal;
+import static ai.rapids.cudf.AssertUtils.assertColumnsAreEqual;
+
+public class DecimalUtilsTest extends CudfTestBase {
+    @Test
+    public void testOutOfBounds() {
+        try (ColumnView cv = ColumnVector.fromDecimals(
+                new BigDecimal("-1E+3"),
+                new BigDecimal("1E+3"),
+                new BigDecimal("9E+1"),
+                new BigDecimal("-9E+1"),
+                new BigDecimal("-91"));
+             ColumnView expected = ColumnVector.fromBooleans(true, true, false, false, true);
+             ColumnView result = DecimalUtils.outOfBounds(cv, 1, -1)) {
+            assertColumnsAreEqual(expected, result);
+        }
+    }
+}

From 5c78b7ea6b75f503d5df4abc828d80a0b470a284 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Fri, 12 Jan 2024 08:49:20 +0000
Subject: [PATCH 20/23] Fix logic bug introduced in #14730 (#14742)

The removal of `valid_count` on columns in #14730 had one logic bug, fixed here.

Authors:
  - Lawrence Mitchell (https://github.com/wence-)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/14742
---
 python/cudf/cudf/core/column/categorical.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index f52621dc444..6b3ee0ba852 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -1380,7 +1380,7 @@ def _concat(
 
         # Find the first non-null column:
         head = next(
-            (obj for obj in objs if not obj.null_count != len(obj)), objs[0]
+            (obj for obj in objs if obj.null_count != len(obj)), objs[0]
         )
 
         # Combine and de-dupe the categories

From 7ca988f207730a3ae936e90d0104c4e6a14749ff Mon Sep 17 00:00:00 2001
From: "Richard (Rick) Zamora" <rzamora217@gmail.com>
Date: Fri, 12 Jan 2024 12:22:58 -0600
Subject: [PATCH 21/23] Fix ``Groupby.get_group`` (#14728)

Closes https://github.com/rapidsai/cudf/issues/14727

Authors:
  - Richard (Rick) Zamora (https://github.com/rjzamora)
  - Charles Blackmon-Luca (https://github.com/charlesbluca)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Charles Blackmon-Luca (https://github.com/charlesbluca)

URL: https://github.com/rapidsai/cudf/pull/14728
---
 python/cudf/cudf/core/groupby/groupby.py        |  2 +-
 python/cudf/cudf/tests/groupby/test_indexing.py | 13 ++++++++++++-
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index fbd85fd9876..4e8947652ff 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -377,7 +377,7 @@ def get_group(self, name, obj=None):
         if obj is None:
             obj = self.obj
 
-        return obj.loc[self.groups[name]]
+        return obj.loc[self.groups[name].drop_duplicates()]
 
     @_cudf_nvtx_annotate
     def size(self):
diff --git a/python/cudf/cudf/tests/groupby/test_indexing.py b/python/cudf/cudf/tests/groupby/test_indexing.py
index 06777c8e6af..57e8bc1c2d8 100644
--- a/python/cudf/cudf/tests/groupby/test_indexing.py
+++ b/python/cudf/cudf/tests/groupby/test_indexing.py
@@ -1 +1,12 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+import cudf
+from cudf.testing._utils import assert_eq
+
+
+def test_rank_return_type_compatible_mode():
+    # in compatible mode, rank() always returns floats
+    df = cudf.DataFrame({"a": range(10), "b": [0] * 10}, index=[0] * 10)
+    pdf = df.to_pandas()
+    expect = pdf.groupby("b").get_group(0)
+    result = df.groupby("b").get_group(0)
+    assert_eq(expect, result)

From 07103355fea0fb3fd0e1115019bbac7d65bb132f Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Mon, 15 Jan 2024 09:33:11 +0100
Subject: [PATCH 22/23] Refactoring of Buffers (last step towards unifying COW
 and Spilling) (#13801)

This PR de-couples buffer slices/views from owning buffers. As it is now, all buffer classes (`ExposureTrackedBuffer`, `BufferSlice`, `SpillableBuffer`, `SpillableBufferSlice`) inherent from `Buffer`, however they are not Liskov substitutable as pointed by @wence- and @vyasr ([here](https://github.com/rapidsai/cudf/pull/13307#discussion_r1189748484) and [here](https://github.com/rapidsai/cudf/pull/13307#discussion_r1239014158)).

To fix this, we now have a `Buffer` and a `BufferOwner` class. We still use the `Buffer` throughout cuDF but it now points to an `BufferOwner`.

We have the following class hierarchy:
```
ExposureTrackedBufferOwner -> BufferOwner
SpillableBufferOwner -> BufferOwner
ExposureTrackedBuffer -> Buffer
SpillableBuffer -> Buffer
```

With the following relationship:
```
Buffer -> BufferOwner
ExposureTrackedBuffer -> ExposureTrackedBufferOwner
SpillableBuffer -> SpillableBufferOwner
```

#### Unify COW and Spilling

In a follow-up PR, the spilling buffer classes will inherent from the exposure tracked buffer classes so we get the following hierarchy:
```
SpillableBufferOwner -> ExposureTrackedBufferOwner -> BufferOwner
SpillableBuffer -> ExposureTrackedBuffer -> Buffer
```

Authors:
  - Mads R. B. Kristensen (https://github.com/madsbk)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/13801
---
 .../source/developer_guide/library_design.md  |  14 +-
 python/cudf/cudf/core/abc.py                  |  10 +-
 python/cudf/cudf/core/buffer/__init__.py      |   8 +-
 python/cudf/cudf/core/buffer/buffer.py        | 285 ++++++++++-----
 .../core/buffer/exposure_tracked_buffer.py    | 261 ++------------
 python/cudf/cudf/core/buffer/spill_manager.py |  14 +-
 .../cudf/cudf/core/buffer/spillable_buffer.py | 325 +++++-------------
 python/cudf/cudf/core/buffer/utils.py         | 105 +++++-
 python/cudf/cudf/tests/test_buffer.py         |  13 +-
 python/cudf/cudf/tests/test_copying.py        |  15 +-
 python/cudf/cudf/tests/test_spilling.py       |  31 +-
 11 files changed, 483 insertions(+), 598 deletions(-)

diff --git a/docs/cudf/source/developer_guide/library_design.md b/docs/cudf/source/developer_guide/library_design.md
index 016c2c1d281..0b37de00f6b 100644
--- a/docs/cudf/source/developer_guide/library_design.md
+++ b/docs/cudf/source/developer_guide/library_design.md
@@ -325,26 +325,26 @@ This section describes the internal implementation details of the copy-on-write
 It is recommended that developers familiarize themselves with [the user-facing documentation](copy-on-write-user-doc) of this functionality before reading through the internals
 below.
 
-The core copy-on-write implementation relies on the factory function `as_exposure_tracked_buffer` and the two classes `ExposureTrackedBuffer` and `BufferSlice`.
+The core copy-on-write implementation relies on `ExposureTrackedBuffer` and the tracking features of `BufferOwner`.
 
-An `ExposureTrackedBuffer` is a subclass of the regular `Buffer` that tracks internal and external references to its underlying memory. Internal references are tracked by maintaining [weak references](https://docs.python.org/3/library/weakref.html) to every `BufferSlice` of the underlying memory. External references are tracked through "exposure" status of the underlying memory. A buffer is considered exposed if the device pointer (integer or void*) has been handed out to a library outside of cudf. In this case, we have no way of knowing if the data are being modified by a third party.
+`BufferOwner` tracks internal and external references to its underlying memory. Internal references are tracked by maintaining [weak references](https://docs.python.org/3/library/weakref.html) to every `ExposureTrackedBuffer` of the underlying memory. External references are tracked through "exposure" status of the underlying memory. A buffer is considered exposed if the device pointer (integer or void*) has been handed out to a library outside of cudf. In this case, we have no way of knowing if the data are being modified by a third party.
 
-`BufferSlice` is a subclass of `ExposureTrackedBuffer` that represents a _slice_ of the memory underlying a exposure tracked buffer.
+`ExposureTrackedBuffer` is a subclass of `Buffer` that represents a _slice_ of the memory underlying an exposure tracked buffer.
 
-When the cudf option `"copy_on_write"` is `True`, `as_buffer` calls `as_exposure_tracked_buffer`, which always returns a `BufferSlice`. It is then the slices that determine whether or not to make a copy when a write operation is performed on a `Column` (see below). If multiple slices point to the same underlying memory, then a copy must be made whenever a modification is attempted.
+When the cudf option `"copy_on_write"` is `True`, `as_buffer` returns a `ExposureTrackedBuffer`. It is this class that determines whether or not to make a copy when a write operation is performed on a `Column` (see below). If multiple slices point to the same underlying memory, then a copy must be made whenever a modification is attempted.
 
 
 ### Eager copies when exposing to third-party libraries
 
-If a `Column`/`BufferSlice` is exposed to a third-party library via `__cuda_array_interface__`, we are no longer able to track whether or not modification of the buffer has occurred. Hence whenever
+If a `Column`/`ExposureTrackedBuffer` is exposed to a third-party library via `__cuda_array_interface__`, we are no longer able to track whether or not modification of the buffer has occurred. Hence whenever
 someone accesses data through the `__cuda_array_interface__`, we eagerly trigger the copy by calling
-`.make_single_owner_inplace` which ensures a true copy of underlying data is made and that the slice is the sole owner. Any future copy requests must also trigger a true physical copy (since we cannot track the lifetime of the third-party object). To handle this we also mark the `Column`/`BufferSlice` as exposed thus indicating that any future shallow-copy requests will trigger a true physical copy rather than a copy-on-write shallow copy.
+`.make_single_owner_inplace` which ensures a true copy of underlying data is made and that the slice is the sole owner. Any future copy requests must also trigger a true physical copy (since we cannot track the lifetime of the third-party object). To handle this we also mark the `Column`/`ExposureTrackedBuffer` as exposed thus indicating that any future shallow-copy requests will trigger a true physical copy rather than a copy-on-write shallow copy.
 
 ### Obtaining a read-only object
 
 A read-only object can be quite useful for operations that will not
 mutate the data. This can be achieved by calling `.get_ptr(mode="read")`, and using `cuda_array_interface_wrapper` to wrap a `__cuda_array_interface__` object around it.
-This will not trigger a deep copy even if multiple `BufferSlice` points to the same `ExposureTrackedBuffer`. This API should only be used when the lifetime of the proxy object is restricted to cudf's internal code execution. Handing this out to external libraries or user-facing APIs will lead to untracked references and undefined copy-on-write behavior. We currently use this API for device to host
+This will not trigger a deep copy even if multiple `ExposureTrackedBuffer`s point to the same `ExposureTrackedBufferOwner`. This API should only be used when the lifetime of the proxy object is restricted to cudf's internal code execution. Handing this out to external libraries or user-facing APIs will lead to untracked references and undefined copy-on-write behavior. We currently use this API for device to host
 copies like in `ColumnBase.data_array_view(mode="read")` which is used for `Column.values_host`.
 
 
diff --git a/python/cudf/cudf/core/abc.py b/python/cudf/cudf/core/abc.py
index adf9fe39e4f..ce6bb83bc77 100644
--- a/python/cudf/cudf/core/abc.py
+++ b/python/cudf/cudf/core/abc.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 """Common abstract base classes for cudf."""
 
 import pickle
@@ -89,7 +89,13 @@ def device_serialize(self):
         """
         header, frames = self.serialize()
         assert all(
-            isinstance(f, (cudf.core.buffer.Buffer, memoryview))
+            isinstance(
+                f,
+                (
+                    cudf.core.buffer.Buffer,
+                    memoryview,
+                ),
+            )
             for f in frames
         )
         header["type-serialized"] = pickle.dumps(type(self))
diff --git a/python/cudf/cudf/core/buffer/__init__.py b/python/cudf/cudf/core/buffer/__init__.py
index d8883bd97e5..9b9774c12be 100644
--- a/python/cudf/cudf/core/buffer/__init__.py
+++ b/python/cudf/cudf/core/buffer/__init__.py
@@ -1,6 +1,10 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 
-from cudf.core.buffer.buffer import Buffer, cuda_array_interface_wrapper
+from cudf.core.buffer.buffer import (
+    Buffer,
+    BufferOwner,
+    cuda_array_interface_wrapper,
+)
 from cudf.core.buffer.exposure_tracked_buffer import ExposureTrackedBuffer
 from cudf.core.buffer.spillable_buffer import SpillableBuffer, SpillLock
 from cudf.core.buffer.utils import (
diff --git a/python/cudf/cudf/core/buffer/buffer.py b/python/cudf/cudf/core/buffer/buffer.py
index 59d20a2784d..8d278c9c065 100644
--- a/python/cudf/cudf/core/buffer/buffer.py
+++ b/python/cudf/cudf/core/buffer/buffer.py
@@ -1,9 +1,10 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
 import math
 import pickle
+import weakref
 from types import SimpleNamespace
 from typing import Any, Dict, Literal, Mapping, Optional, Sequence, Tuple
 
@@ -90,15 +91,31 @@ def cuda_array_interface_wrapper(
     )
 
 
-class Buffer(Serializable):
-    """A Buffer represents device memory.
+class BufferOwner(Serializable):
+    """An owning buffer that represents device memory.
 
-    Use the factory function `as_buffer` to create a Buffer instance.
+    This class isn't meant to be used throughout cuDF. Instead, it
+    standardizes data owning by wrapping any data object that
+    represents device memory. Multiple `Buffer` instances, which are
+    the ones used throughout cuDF, can then refer to the same
+    `BufferOwner` instance.
+
+    In order to implement copy-on-write and spillable buffers, we need the
+    ability to detect external access to the underlying memory. We say that
+    the buffer has been exposed if the device pointer (integer or void*) has
+    been accessed outside of BufferOwner. In this case, we have no control
+    over knowing if the data is being modified by a third party.
+
+    Use `_from_device_memory` and `_from_host_memory` to create
+    a new instance from either device or host memory respectively.
     """
 
     _ptr: int
     _size: int
     _owner: object
+    _exposed: bool
+    # The set of buffers that point to this owner.
+    _slices: weakref.WeakSet[Buffer]
 
     def __init__(self):
         raise ValueError(
@@ -107,8 +124,8 @@ def __init__(self):
         )
 
     @classmethod
-    def _from_device_memory(cls, data: Any) -> Self:
-        """Create a Buffer from an object exposing `__cuda_array_interface__`.
+    def _from_device_memory(cls, data: Any, exposed: bool) -> Self:
+        """Create from an object providing a `__cuda_array_interface__`.
 
         No data is being copied.
 
@@ -116,16 +133,29 @@ def _from_device_memory(cls, data: Any) -> Self:
         ----------
         data : device-buffer-like
             An object implementing the CUDA Array Interface.
+        exposed : bool
+            Mark the buffer as permanently exposed. This is used by
+            ExposureTrackedBuffer to determine when a deep copy is required
+            and by SpillableBuffer to mark the buffer unspillable.
 
         Returns
         -------
-        Buffer
-            Buffer representing the same device memory as `data`
+        BufferOwner
+            BufferOwner wrapping `data`
+
+        Raises
+        ------
+        AttributeError
+            If data does not support the cuda array interface
+        ValueError
+            If the resulting buffer has negative size
         """
 
         # Bypass `__init__` and initialize attributes manually
         ret = cls.__new__(cls)
         ret._owner = data
+        ret._exposed = exposed
+        ret._slices = weakref.WeakSet()
         if isinstance(data, rmm.DeviceBuffer):  # Common case shortcut
             ret._ptr = data.ptr
             ret._size = data.size
@@ -139,7 +169,7 @@ def _from_device_memory(cls, data: Any) -> Self:
 
     @classmethod
     def _from_host_memory(cls, data: Any) -> Self:
-        """Create a Buffer from a buffer or array like object
+        """Create an owner from a buffer or array like object
 
         Data must implement `__array_interface__`, the buffer protocol, and/or
         be convertible to a buffer object using `numpy.array()`
@@ -155,8 +185,8 @@ def _from_host_memory(cls, data: Any) -> Self:
 
         Returns
         -------
-        Buffer
-            Buffer representing a copy of `data`.
+        BufferOwner
+            BufferOwner wrapping a device copy of `data`.
         """
 
         # Convert to numpy array, this will not copy data in most cases.
@@ -166,54 +196,7 @@ def _from_host_memory(cls, data: Any) -> Self:
         # Copy to device memory
         buf = rmm.DeviceBuffer(ptr=ptr, size=size)
         # Create from device memory
-        return cls._from_device_memory(buf)
-
-    def _getitem(self, offset: int, size: int) -> Self:
-        """
-        Sub-classes can overwrite this to implement __getitem__
-        without having to handle non-slice inputs.
-        """
-        return self._from_device_memory(
-            cuda_array_interface_wrapper(
-                ptr=self.get_ptr(mode="read") + offset,
-                size=size,
-                owner=self.owner,
-            )
-        )
-
-    def __getitem__(self, key: slice) -> Self:
-        """Create a new slice of the buffer."""
-        if not isinstance(key, slice):
-            raise TypeError(
-                "Argument 'key' has incorrect type "
-                f"(expected slice, got {key.__class__.__name__})"
-            )
-        start, stop, step = key.indices(self.size)
-        if step != 1:
-            raise ValueError("slice must be C-contiguous")
-        return self._getitem(offset=start, size=stop - start)
-
-    def copy(self, deep: bool = True) -> Self:
-        """
-        Return a copy of Buffer.
-
-        Parameters
-        ----------
-        deep : bool, default True
-            If True, returns a deep copy of the underlying Buffer data.
-            If False, returns a shallow copy of the Buffer pointing to
-            the same underlying data.
-
-        Returns
-        -------
-        Buffer
-        """
-        if deep:
-            return self._from_device_memory(
-                rmm.DeviceBuffer(ptr=self.get_ptr(mode="read"), size=self.size)
-            )
-        else:
-            return self[:]
+        return cls._from_device_memory(buf, exposed=False)
 
     @property
     def size(self) -> int:
@@ -226,20 +209,29 @@ def nbytes(self) -> int:
         return self._size
 
     @property
-    def owner(self) -> Any:
+    def owner(self) -> object:
         """Object owning the memory of the buffer."""
         return self._owner
 
     @property
-    def __cuda_array_interface__(self) -> Mapping:
-        """Implementation of the CUDA Array Interface."""
-        return {
-            "data": (self.get_ptr(mode="write"), False),
-            "shape": (self.size,),
-            "strides": None,
-            "typestr": "|u1",
-            "version": 0,
-        }
+    def exposed(self) -> bool:
+        """The current exposure status of the buffer
+
+        This is used by ExposureTrackedBuffer to determine when a deep copy
+        is required and by SpillableBuffer to mark the buffer unspillable.
+        """
+        return self._exposed
+
+    def mark_exposed(self) -> None:
+        """Mark the buffer as "exposed" permanently
+
+        This is used by ExposureTrackedBuffer to determine when a deep copy
+        is required and by SpillableBuffer to mark the buffer unspillable.
+
+        Notice, once the exposure status becomes True, it will never change
+        back.
+        """
+        self._exposed = True
 
     def get_ptr(self, *, mode: Literal["read", "write"]) -> int:
         """Device pointer to the start of the buffer.
@@ -277,20 +269,148 @@ def memoryview(
         )
         return memoryview(host_buf).toreadonly()
 
+    def __str__(self) -> str:
+        return (
+            f"<{self.__class__.__name__} size={format_bytes(self._size)} "
+            f"ptr={hex(self._ptr)} owner={self._owner!r}>"
+        )
+
+
+class Buffer(Serializable):
+    """A buffer that represents a slice or view of a `BufferOwner`.
+
+    Use the factory function `as_buffer` to create a Buffer instance.
+
+    Note
+    ----
+    This buffer is untyped, so all indexing and sizes are in bytes.
+
+    Parameters
+    ----------
+    owner
+        The owning exposure buffer this refers to.
+    offset
+        The offset relative to the start memory of owner (in bytes).
+    size
+        The size of the buffer (in bytes). If None, use the size of owner.
+    """
+
+    def __init__(
+        self,
+        *,
+        owner: BufferOwner,
+        offset: int = 0,
+        size: Optional[int] = None,
+    ) -> None:
+        size = owner.size if size is None else size
+        if size < 0:
+            raise ValueError("size cannot be negative")
+        if offset < 0:
+            raise ValueError("offset cannot be negative")
+        if offset + size > owner.size:
+            raise ValueError(
+                "offset+size cannot be greater than the size of owner"
+            )
+        self._owner = owner
+        self._offset = offset
+        self._size = size
+
+    @property
+    def size(self) -> int:
+        """Size of the buffer in bytes."""
+        return self._size
+
+    @property
+    def nbytes(self) -> int:
+        """Size of the buffer in bytes."""
+        return self._size
+
+    @property
+    def owner(self) -> BufferOwner:
+        """Object owning the memory of the buffer."""
+        return self._owner
+
+    def __getitem__(self, key: slice) -> Self:
+        """Create a new slice of the buffer."""
+        if not isinstance(key, slice):
+            raise TypeError(
+                "Argument 'key' has incorrect type "
+                f"(expected slice, got {key.__class__.__name__})"
+            )
+        start, stop, step = key.indices(self.size)
+        if step != 1:
+            raise ValueError("slice must be C-contiguous")
+        return self.__class__(
+            owner=self._owner, offset=self._offset + start, size=stop - start
+        )
+
+    def get_ptr(self, *, mode: Literal["read", "write"]) -> int:
+        return self._owner.get_ptr(mode=mode) + self._offset
+
+    def memoryview(self) -> memoryview:
+        return self._owner.memoryview(offset=self._offset, size=self._size)
+
+    def copy(self, deep: bool = True) -> Self:
+        """Return a copy of Buffer.
+
+        Parameters
+        ----------
+        deep : bool, default True
+            - If deep=True, returns a deep copy of the underlying data.
+            - If deep=False, returns a new `Buffer` instance that refers
+              to the same `BufferOwner` as this one. Thus, no device
+              data are being copied.
+
+        Returns
+        -------
+        Buffer
+            A new buffer that either refers to either a new or an existing
+            `BufferOwner` depending on the `deep` argument (see above).
+        """
+
+        # When doing a shallow copy, we just return a new slice
+        if not deep:
+            return self.__class__(
+                owner=self._owner, offset=self._offset, size=self._size
+            )
+
+        # Otherwise, we create a new copy of the memory
+        owner = self._owner._from_device_memory(
+            rmm.DeviceBuffer(
+                ptr=self._owner.get_ptr(mode="read") + self._offset,
+                size=self.size,
+            ),
+            exposed=False,
+        )
+        return self.__class__(owner=owner, offset=0, size=owner.size)
+
+    @property
+    def __cuda_array_interface__(self) -> Mapping:
+        """Implementation of the CUDA Array Interface."""
+        return {
+            "data": (self.get_ptr(mode="write"), False),
+            "shape": (self.size,),
+            "strides": None,
+            "typestr": "|u1",
+            "version": 0,
+        }
+
     def serialize(self) -> Tuple[dict, list]:
         """Serialize the buffer into header and frames.
 
-        The frames can be a mixture of memoryview and Buffer objects.
+        The frames can be a mixture of memoryview, Buffer, and BufferOwner
+        objects.
 
         Returns
         -------
         Tuple[dict, List]
             The first element of the returned tuple is a dict containing any
             serializable metadata required to reconstruct the object. The
-            second element is a list containing Buffers and memoryviews.
+            second element is a list containing single frame.
         """
         header: Dict[str, Any] = {}
         header["type-serialized"] = pickle.dumps(type(self))
+        header["owner-type-serialized"] = pickle.dumps(type(self._owner))
         header["frame_count"] = 1
         frames = [self]
         return header, frames
@@ -317,16 +437,27 @@ def deserialize(cls, header: dict, frames: list) -> Self:
         if isinstance(frame, cls):
             return frame  # The frame is already deserialized
 
+        owner_type: BufferOwner = pickle.loads(header["owner-type-serialized"])
         if hasattr(frame, "__cuda_array_interface__"):
-            return cls._from_device_memory(frame)
-        return cls._from_host_memory(frame)
+            owner = owner_type._from_device_memory(frame, exposed=False)
+        else:
+            owner = owner_type._from_host_memory(frame)
+        return cls(
+            owner=owner,
+            offset=0,
+            size=owner.size,
+        )
 
     def __repr__(self) -> str:
-        klass = self.__class__
-        name = f"{klass.__module__}.{klass.__qualname__}"
         return (
-            f"<{name} size={format_bytes(self._size)} "
-            f"ptr={hex(self._ptr)} owner={repr(self._owner)}>"
+            f"{self.__class__.__name__}(owner={self._owner!r}, "
+            f"offset={self._offset!r}, size={self._size!r})"
+        )
+
+    def __str__(self) -> str:
+        return (
+            f"<{self.__class__.__name__} size={format_bytes(self._size)} "
+            f"offset={format_bytes(self._offset)} of {self._owner}>"
         )
 
 
diff --git a/python/cudf/cudf/core/buffer/exposure_tracked_buffer.py b/python/cudf/cudf/core/buffer/exposure_tracked_buffer.py
index f2ac6301944..4c08016adbb 100644
--- a/python/cudf/cudf/core/buffer/exposure_tracked_buffer.py
+++ b/python/cudf/cudf/core/buffer/exposure_tracked_buffer.py
@@ -1,241 +1,47 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
-import weakref
-from typing import (
-    Any,
-    Container,
-    Literal,
-    Mapping,
-    Optional,
-    Type,
-    TypeVar,
-    cast,
-)
+from typing import Literal, Mapping, Optional
 
 from typing_extensions import Self
 
 import cudf
-from cudf.core.buffer.buffer import Buffer, get_ptr_and_size
-from cudf.utils.string import format_bytes
-
-T = TypeVar("T", bound="ExposureTrackedBuffer")
-
-
-def get_owner(data, klass: Type[T]) -> Optional[T]:
-    """Get the owner of `data`, if any exist
-
-    Search through the stack of data owners in order to find an
-    owner of type `klass` (not subclasses).
-
-    Parameters
-    ----------
-    data
-        The data object
-
-    Return
-    ------
-    klass or None
-        The owner of `data` if `klass` or None.
-    """
-
-    if type(data) is klass:
-        return data
-    if hasattr(data, "owner"):
-        return get_owner(data.owner, klass)
-    return None
-
-
-def as_exposure_tracked_buffer(
-    data, exposed: bool, subclass: Optional[Type[T]] = None
-) -> BufferSlice:
-    """Factory function to wrap `data` in a slice of an exposure tracked buffer
-
-    If `subclass` is None, a new ExposureTrackedBuffer that points to the
-    memory of `data` is created and a BufferSlice that points to all of the
-    new ExposureTrackedBuffer is returned.
-
-    If `subclass` is not None, a new `subclass` is created instead. Still,
-    a BufferSlice that points to all of the new `subclass` is returned
-
-    It is illegal for an exposure tracked buffer to own another exposure
-    tracked buffer. When representing the same memory, we should have a single
-    exposure tracked buffer and multiple buffer slices.
-
-    Developer Notes
-    ---------------
-    This function always returns slices thus all buffers in cudf will use
-    `BufferSlice` when copy-on-write is enabled. The slices implement
-    copy-on-write by trigging deep copies when write access is detected
-    and multiple slices points to the same exposure tracked buffer.
-
-    Parameters
-    ----------
-    data : buffer-like or array-like
-        A buffer-like or array-like object that represents C-contiguous memory.
-    exposed
-        Mark the buffer as permanently exposed.
-    subclass
-        If not None, a subclass of ExposureTrackedBuffer to wrap `data`.
-
-    Return
-    ------
-    BufferSlice
-        A buffer slice that points to a ExposureTrackedBuffer (or `subclass`),
-        which in turn wraps `data`.
-    """
-
-    if not hasattr(data, "__cuda_array_interface__"):
-        if exposed:
-            raise ValueError("cannot created exposed host memory")
-        return cast(
-            BufferSlice, ExposureTrackedBuffer._from_host_memory(data)[:]
-        )
-
-    owner = get_owner(data, subclass or ExposureTrackedBuffer)
-    if owner is None:
-        return cast(
-            BufferSlice,
-            ExposureTrackedBuffer._from_device_memory(data, exposed=exposed)[
-                :
-            ],
-        )
-
-    # At this point, we know that `data` is owned by a exposure tracked buffer
-    ptr, size = get_ptr_and_size(data.__cuda_array_interface__)
-    if size > 0 and owner._ptr == 0:
-        raise ValueError("Cannot create a non-empty slice of a null buffer")
-    return BufferSlice(base=owner, offset=ptr - owner._ptr, size=size)
+from cudf.core.buffer.buffer import Buffer, BufferOwner
 
 
 class ExposureTrackedBuffer(Buffer):
-    """A Buffer that tracks its "expose" status.
-
-    In order to implement copy-on-write and spillable buffers, we need the
-    ability to detect external access to the underlying memory. We say that
-    the buffer has been exposed if the device pointer (integer or void*) has
-    been accessed outside of ExposureTrackedBuffer. In this case, we have no
-    control over knowing if the data is being modified by a third-party.
-
-    Attributes
-    ----------
-    _exposed
-        The current exposure status of the buffer. Notice, once the exposure
-        status becomes True, it should never change back.
-    _slices
-        The set of BufferSlice instances that point to this buffer.
-    """
-
-    _exposed: bool
-    _slices: weakref.WeakSet[BufferSlice]
-
-    @property
-    def exposed(self) -> bool:
-        return self._exposed
-
-    def mark_exposed(self) -> None:
-        """Mark the buffer as "exposed" permanently"""
-        self._exposed = True
-
-    @classmethod
-    def _from_device_memory(cls, data: Any, *, exposed: bool = False) -> Self:
-        """Create an exposure tracked buffer from device memory.
-
-        No data is being copied.
-
-        Parameters
-        ----------
-        data : device-buffer-like
-            An object implementing the CUDA Array Interface.
-        exposed : bool, optional
-            Mark the buffer as permanently exposed.
-
-        Returns
-        -------
-        ExposureTrackedBuffer
-            Buffer representing the same device memory as `data`
-        """
-        ret = super()._from_device_memory(data)
-        ret._exposed = exposed
-        ret._slices = weakref.WeakSet()
-        return ret
-
-    def _getitem(self, offset: int, size: int) -> BufferSlice:
-        return BufferSlice(base=self, offset=offset, size=size)
-
-    @property
-    def __cuda_array_interface__(self) -> Mapping:
-        self.mark_exposed()
-        return super().__cuda_array_interface__
-
-    def __repr__(self) -> str:
-        return (
-            f"<ExposureTrackedBuffer exposed={self.exposed} "
-            f"size={format_bytes(self._size)} "
-            f"ptr={hex(self._ptr)} owner={repr(self._owner)}>"
-        )
-
-
-class BufferSlice(ExposureTrackedBuffer):
-    """A slice (aka. a view) of a exposure tracked buffer.
+    """An exposure tracked buffer.
 
     Parameters
     ----------
-    base
-        The exposure tracked buffer this slice refers to.
+    owner
+        The owning exposure tracked buffer this refers to.
     offset
-        The offset relative to the start memory of base (in bytes).
+        The offset relative to the start memory of owner (in bytes).
     size
         The size of the slice (in bytes)
-    passthrough_attributes
-        Name of attributes that are passed through to the base as-is.
     """
 
+    _owner: BufferOwner
+
     def __init__(
         self,
-        base: ExposureTrackedBuffer,
-        offset: int,
-        size: int,
-        *,
-        passthrough_attributes: Container[str] = ("exposed",),
+        owner: BufferOwner,
+        offset: int = 0,
+        size: Optional[int] = None,
     ) -> None:
-        if size < 0:
-            raise ValueError("size cannot be negative")
-        if offset < 0:
-            raise ValueError("offset cannot be negative")
-        if offset + size > base.size:
-            raise ValueError(
-                "offset+size cannot be greater than the size of base"
-            )
-        self._base = base
-        self._offset = offset
-        self._size = size
-        self._owner = base
-        self._passthrough_attributes = passthrough_attributes
-        base._slices.add(self)
-
-    def __getattr__(self, name):
-        if name in self._passthrough_attributes:
-            return getattr(self._base, name)
-        raise AttributeError(
-            f"{self.__class__.__name__} object has no attribute {name}"
-        )
+        super().__init__(owner=owner, offset=offset, size=size)
+        self._owner._slices.add(self)
 
-    def _getitem(self, offset: int, size: int) -> BufferSlice:
-        return BufferSlice(
-            base=self._base, offset=offset + self._offset, size=size
-        )
+    @property
+    def exposed(self) -> bool:
+        return self._owner.exposed
 
     def get_ptr(self, *, mode: Literal["read", "write"]) -> int:
         if mode == "write" and cudf.get_option("copy_on_write"):
             self.make_single_owner_inplace()
-        return self._base.get_ptr(mode=mode) + self._offset
-
-    def memoryview(
-        self, *, offset: int = 0, size: Optional[int] = None
-    ) -> memoryview:
-        return self._base.memoryview(offset=self._offset + offset, size=size)
+        return super().get_ptr(mode=mode)
 
     def copy(self, deep: bool = True) -> Self:
         """Return a copy of Buffer.
@@ -260,16 +66,14 @@ def copy(self, deep: bool = True) -> Self:
 
         Returns
         -------
-        BufferSlice
-            A slice pointing to either a new or the existing base buffer
-            depending on the expose status of the base buffer and the
+        ExposureTrackedBuffer
+            A slice pointing to either a new or the existing owner
+            depending on the expose status of the owner and the
             copy-on-write option (see above).
         """
         if cudf.get_option("copy_on_write"):
-            base_copy = self._base.copy(deep=deep or self.exposed)
-        else:
-            base_copy = self._base.copy(deep=deep)
-        return cast(Self, base_copy[self._offset : self._offset + self._size])
+            return super().copy(deep=deep or self.exposed)
+        return super().copy(deep=deep)
 
     @property
     def __cuda_array_interface__(self) -> Mapping:
@@ -278,7 +82,7 @@ def __cuda_array_interface__(self) -> Mapping:
         return super().__cuda_array_interface__
 
     def make_single_owner_inplace(self) -> None:
-        """Make sure this slice is the only one pointing to the base.
+        """Make sure this slice is the only one pointing to the owner.
 
         This is used by copy-on-write to trigger a deep copy when write
         access is detected.
@@ -294,18 +98,11 @@ def make_single_owner_inplace(self) -> None:
             Buffer representing the same device memory as `data`
         """
 
-        if len(self._base._slices) > 1:
-            # If this is not the only slice pointing to `self._base`, we
-            # point to a new deep copy of the base.
+        if len(self._owner._slices) > 1:
+            # If this is not the only slice pointing to `self._owner`, we
+            # point to a new deep copy of the owner.
             t = self.copy(deep=True)
-            self._base = t._base
+            self._owner = t._owner
             self._offset = t._offset
             self._size = t._size
-            self._owner = t._base
-            self._base._slices.add(self)
-
-    def __repr__(self) -> str:
-        return (
-            f"<BufferSlice size={format_bytes(self._size)} "
-            f"offset={format_bytes(self._offset)} of {self._base}>"
-        )
+            self._owner._slices.add(self)
diff --git a/python/cudf/cudf/core/buffer/spill_manager.py b/python/cudf/cudf/core/buffer/spill_manager.py
index 91f3b2cd544..3e654e01401 100644
--- a/python/cudf/cudf/core/buffer/spill_manager.py
+++ b/python/cudf/cudf/core/buffer/spill_manager.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -16,7 +16,7 @@
 
 import rmm.mr
 
-from cudf.core.buffer.spillable_buffer import SpillableBuffer
+from cudf.core.buffer.spillable_buffer import SpillableBufferOwner
 from cudf.options import get_option
 from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
 from cudf.utils.string import format_bytes
@@ -128,7 +128,7 @@ def log_spill(self, src: str, dst: str, nbytes: int, time: float) -> None:
                 total_time + time,
             )
 
-    def log_expose(self, buf: SpillableBuffer) -> None:
+    def log_expose(self, buf: SpillableBufferOwner) -> None:
         """Log an expose event
 
         We track logged exposes by grouping them by their traceback such
@@ -224,7 +224,7 @@ class SpillManager:
         SpillStatistics for the different levels.
     """
 
-    _buffers: weakref.WeakValueDictionary[int, SpillableBuffer]
+    _buffers: weakref.WeakValueDictionary[int, SpillableBufferOwner]
     statistics: SpillStatistics
 
     def __init__(
@@ -298,14 +298,14 @@ def _out_of_memory_handle(self, nbytes: int, *, retry_once=True) -> bool:
         )
         return False  # Since we didn't find anything to spill, we give up
 
-    def add(self, buffer: SpillableBuffer) -> None:
+    def add(self, buffer: SpillableBufferOwner) -> None:
         """Add buffer to the set of managed buffers
 
         The manager keeps a weak reference to the buffer
 
         Parameters
         ----------
-        buffer : SpillableBuffer
+        buffer : SpillableBufferOwner
             The buffer to manage
         """
         if buffer.size > 0 and not buffer.exposed:
@@ -316,7 +316,7 @@ def add(self, buffer: SpillableBuffer) -> None:
 
     def buffers(
         self, order_by_access_time: bool = False
-    ) -> Tuple[SpillableBuffer, ...]:
+    ) -> Tuple[SpillableBufferOwner, ...]:
         """Get all managed buffers
 
         Parameters
diff --git a/python/cudf/cudf/core/buffer/spillable_buffer.py b/python/cudf/cudf/core/buffer/spillable_buffer.py
index 1856bec1876..aeac4b76e58 100644
--- a/python/cudf/cudf/core/buffer/spillable_buffer.py
+++ b/python/cudf/cudf/core/buffer/spillable_buffer.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -16,8 +16,8 @@
 
 from cudf.core.buffer.buffer import (
     Buffer,
+    BufferOwner,
     cuda_array_interface_wrapper,
-    get_ptr_and_size,
     host_memory_allocation,
 )
 from cudf.utils.nvtx_annotation import _get_color_for_nvtx, annotate
@@ -27,86 +27,6 @@
     from cudf.core.buffer.spill_manager import SpillManager
 
 
-def get_spillable_owner(data) -> Optional[SpillableBuffer]:
-    """Get the spillable owner of `data`, if any exist
-
-    Search through the stack of data owners in order to find an
-    owner of type `SpillableBuffer` (not subclasses).
-
-    Parameters
-    ----------
-    data : buffer-like or array-like
-        A buffer-like or array-like object that represent C-contiguous memory.
-
-    Return
-    ------
-    SpillableBuffer or None
-        The owner of `data` if spillable or None.
-    """
-
-    if type(data) is SpillableBuffer:
-        return data
-    if hasattr(data, "owner"):
-        return get_spillable_owner(data.owner)
-    return None
-
-
-def as_spillable_buffer(data, exposed: bool) -> SpillableBuffer:
-    """Factory function to wrap `data` in a SpillableBuffer object.
-
-    If `data` isn't a buffer already, a new buffer that points to the memory of
-    `data` is created. If `data` represents host memory, it is copied to a new
-    `rmm.DeviceBuffer` device allocation. Otherwise, the memory of `data` is
-    **not** copied, instead the new buffer keeps a reference to `data` in order
-    to retain its lifetime.
-
-    If `data` is owned by a spillable buffer, a "slice" of the buffer is
-    returned. In this case, the spillable buffer must either be "exposed" or
-    spilled locked (called within an acquire_spill_lock context). This is to
-    guarantee that the memory of `data` isn't spilled before this function gets
-    to calculate the offset of the new slice.
-
-    It is illegal for a spillable buffer to own another spillable buffer.
-
-    Parameters
-    ----------
-    data : buffer-like or array-like
-        A buffer-like or array-like object that represent C-contiguous memory.
-    exposed : bool, optional
-        Mark the buffer as permanently exposed (unspillable).
-
-    Return
-    ------
-    SpillableBuffer
-        A spillabe buffer instance that represents the device memory of `data`.
-    """
-
-    from cudf.core.buffer.utils import get_spill_lock
-
-    if not hasattr(data, "__cuda_array_interface__"):
-        if exposed:
-            raise ValueError("cannot created exposed host memory")
-        return SpillableBuffer._from_host_memory(data)
-
-    spillable_owner = get_spillable_owner(data)
-    if spillable_owner is None:
-        return SpillableBuffer._from_device_memory(data, exposed=exposed)
-
-    if not spillable_owner.exposed and get_spill_lock() is None:
-        raise ValueError(
-            "A owning spillable buffer must "
-            "either be exposed or spilled locked."
-        )
-
-    # At this point, we know that `data` is owned by a spillable buffer,
-    # which is exposed or spilled locked.
-    ptr, size = get_ptr_and_size(data.__cuda_array_interface__)
-    base_ptr = spillable_owner.memory_info()[0]
-    return SpillableBufferSlice(
-        spillable_owner, offset=ptr - base_ptr, size=size
-    )
-
-
 class SpillLock:
     pass
 
@@ -141,7 +61,7 @@ def __getitem__(self, i):
         raise IndexError("tuple index out of range")
 
 
-class SpillableBuffer(Buffer):
+class SpillableBufferOwner(BufferOwner):
     """A Buffer that supports spilling memory off the GPU to avoid OOMs.
 
     This buffer supports spilling the represented data to host memory.
@@ -150,9 +70,9 @@ class SpillableBuffer(Buffer):
     device memory usage see `cudf.core.buffer.spill_manager.SpillManager`.
     Unspill is triggered automatically when accessing the data of the buffer.
 
-    The buffer might not be spillable, which is based on the "expose" status
-    of the buffer. We say that the buffer has been exposed if the device
-    pointer (integer or void*) has been accessed outside of SpillableBuffer.
+    The buffer might not be spillable, which is based on the "expose" status of
+    the buffer. We say that the buffer has been exposed if the device pointer
+    (integer or void*) has been accessed outside of SpillableBufferOwner.
     In this case, we cannot invalidate the device pointer by moving the data
     to host.
 
@@ -160,17 +80,17 @@ class SpillableBuffer(Buffer):
     property. To avoid this, one can use `.get_ptr()` instead, which support
     exposing the buffer temporarily.
 
-    Use the factory function `as_buffer` to create a SpillableBuffer instance.
+    Use the factory function `as_buffer` to create a SpillableBufferOwner
+    instance.
     """
 
     lock: RLock
     _spill_locks: weakref.WeakSet
     _last_accessed: float
     _ptr_desc: Dict[str, Any]
-    _exposed: bool
     _manager: SpillManager
 
-    def _finalize_init(self, ptr_desc: Dict[str, Any], exposed: bool) -> None:
+    def _finalize_init(self, ptr_desc: Dict[str, Any]) -> None:
         """Finish initialization of the spillable buffer
 
         This implements the common initialization that `_from_device_memory`
@@ -180,8 +100,6 @@ def _finalize_init(self, ptr_desc: Dict[str, Any], exposed: bool) -> None:
         ----------
         ptr_desc : dict
             Description of the memory.
-        exposed : bool, optional
-            Mark the buffer as permanently exposed (unspillable).
         """
 
         from cudf.core.buffer.spill_manager import get_global_manager
@@ -190,7 +108,6 @@ def _finalize_init(self, ptr_desc: Dict[str, Any], exposed: bool) -> None:
         self._spill_locks = weakref.WeakSet()
         self._last_accessed = time.monotonic()
         self._ptr_desc = ptr_desc
-        self._exposed = exposed
         manager = get_global_manager()
         if manager is None:
             raise ValueError(
@@ -202,7 +119,7 @@ def _finalize_init(self, ptr_desc: Dict[str, Any], exposed: bool) -> None:
         self._manager.add(self)
 
     @classmethod
-    def _from_device_memory(cls, data: Any, *, exposed: bool = False) -> Self:
+    def _from_device_memory(cls, data: Any, exposed: bool) -> Self:
         """Create a spillabe buffer from device memory.
 
         No data is being copied.
@@ -211,16 +128,16 @@ def _from_device_memory(cls, data: Any, *, exposed: bool = False) -> Self:
         ----------
         data : device-buffer-like
             An object implementing the CUDA Array Interface.
-        exposed : bool, optional
+        exposed : bool
             Mark the buffer as permanently exposed (unspillable).
 
         Returns
         -------
-        SpillableBuffer
+        SpillableBufferOwner
             Buffer representing the same device memory as `data`
         """
-        ret = super()._from_device_memory(data)
-        ret._finalize_init(ptr_desc={"type": "gpu"}, exposed=exposed)
+        ret = super()._from_device_memory(data, exposed=exposed)
+        ret._finalize_init(ptr_desc={"type": "gpu"})
         return ret
 
     @classmethod
@@ -241,7 +158,7 @@ def _from_host_memory(cls, data: Any) -> Self:
 
         Returns
         -------
-        SpillableBuffer
+        SpillableBufferOwner
             Buffer representing a copy of `data`.
         """
 
@@ -257,20 +174,14 @@ def _from_host_memory(cls, data: Any) -> Self:
         ret._owner = None
         ret._ptr = 0
         ret._size = data.nbytes
-        ret._finalize_init(
-            ptr_desc={"type": "cpu", "memoryview": data}, exposed=False
-        )
+        ret._exposed = False
+        ret._finalize_init(ptr_desc={"type": "cpu", "memoryview": data})
         return ret
 
     @property
     def is_spilled(self) -> bool:
         return self._ptr_desc["type"] != "gpu"
 
-    def copy(self, deep: bool = True) -> Self:
-        spill_lock = SpillLock()
-        self.spill_lock(spill_lock=spill_lock)
-        return super().copy(deep=deep)
-
     def spill(self, target: str = "cpu") -> None:
         """Spill or un-spill this buffer in-place
 
@@ -343,10 +254,10 @@ def mark_exposed(self) -> None:
 
         self._manager.spill_to_device_limit()
         with self.lock:
-            if not self._exposed:
+            if not self.exposed:
                 self._manager.statistics.log_expose(self)
             self.spill(target="gpu")
-            self._exposed = True
+            super().mark_exposed()
             self._last_accessed = time.monotonic()
 
     def spill_lock(self, spill_lock: SpillLock) -> None:
@@ -415,25 +326,9 @@ def memory_info(self) -> Tuple[int, int, str]:
             ).__array_interface__["data"][0]
         return (ptr, self.nbytes, self._ptr_desc["type"])
 
-    @property
-    def owner(self) -> Any:
-        return self._owner
-
-    @property
-    def exposed(self) -> bool:
-        return self._exposed
-
     @property
     def spillable(self) -> bool:
-        return not self._exposed and len(self._spill_locks) == 0
-
-    @property
-    def size(self) -> int:
-        return self._size
-
-    @property
-    def nbytes(self) -> int:
-        return self._size
+        return not self.exposed and len(self._spill_locks) == 0
 
     @property
     def last_accessed(self) -> float:
@@ -465,148 +360,114 @@ def memoryview(
                 )
                 return ret
 
-    def _getitem(self, offset: int, size: int) -> SpillableBufferSlice:
-        return SpillableBufferSlice(base=self, offset=offset, size=size)
-
-    def serialize(self) -> Tuple[dict, list]:
-        """Serialize the Buffer
-
-        Normally, we would use `[self]` as the frames. This would work but
-        also mean that `self` becomes exposed permanently if the frames are
-        later accessed through `__cuda_array_interface__`, which is exactly
-        what libraries like Dask+UCX would do when communicating!
-
-        The sound solution is to modify Dask et al. so that they access the
-        frames through `.get_ptr()` and holds on to the `spill_lock` until
-        the frame has been transferred. However, until this adaptation we
-        use a hack where the frame is a `Buffer` with a `spill_lock` as the
-        owner, which makes `self` unspillable while the frame is alive but
-        doesn't expose `self` when `__cuda_array_interface__` is accessed.
-
-        Warning, this hack means that the returned frame must be copied before
-        given to `.deserialize()`, otherwise we would have a `Buffer` pointing
-        to memory already owned by an existing `SpillableBuffer`.
-        """
-        header: Dict[Any, Any]
-        frames: List[Buffer | memoryview]
-        with self.lock:
-            header = {}
-            header["type-serialized"] = pickle.dumps(self.__class__)
-            header["frame_count"] = 1
-            if self.is_spilled:
-                frames = [self.memoryview()]
-            else:
-                # TODO: Use `frames=[self]` instead of this hack, see doc above
-                spill_lock = SpillLock()
-                self.spill_lock(spill_lock)
-                ptr, size, _ = self.memory_info()
-                frames = [
-                    Buffer._from_device_memory(
-                        cuda_array_interface_wrapper(
-                            ptr=ptr,
-                            size=size,
-                            owner=(self._owner, spill_lock),
-                        )
-                    )
-                ]
-            return header, frames
-
-    def __repr__(self) -> str:
+    def __str__(self) -> str:
         if self._ptr_desc["type"] != "gpu":
             ptr_info = str(self._ptr_desc)
         else:
             ptr_info = str(hex(self._ptr))
         return (
-            f"<SpillableBuffer size={format_bytes(self._size)} "
+            f"<{self.__class__.__name__} size={format_bytes(self._size)} "
             f"spillable={self.spillable} exposed={self.exposed} "
             f"num-spill-locks={len(self._spill_locks)} "
             f"ptr={ptr_info} owner={repr(self._owner)}>"
         )
 
 
-class SpillableBufferSlice(SpillableBuffer):
+class SpillableBuffer(Buffer):
     """A slice of a spillable buffer
 
     This buffer applies the slicing and then delegates all
-    operations to its base buffer.
+    operations to its owning buffer.
 
     Parameters
     ----------
-    base : SpillableBuffer
-        The base of the view
+    owner : SpillableBufferOwner
+        The owner of the view
     offset : int
-        Memory offset into the base buffer
+        Memory offset into the owning buffer
     size : int
         Size of the view (in bytes)
     """
 
-    def __init__(self, base: SpillableBuffer, offset: int, size: int) -> None:
-        if size < 0:
-            raise ValueError("size cannot be negative")
-        if offset < 0:
-            raise ValueError("offset cannot be negative")
-        if offset + size > base.size:
-            raise ValueError(
-                "offset+size cannot be greater than the size of base"
-            )
-        self._base = base
-        self._offset = offset
-        self._size = size
-        self._owner = base
-        self.lock = base.lock
-
-    def get_ptr(self, *, mode: Literal["read", "write"]) -> int:
-        """
-        A passthrough method to `SpillableBuffer.get_ptr`
-        with factoring in the `offset`.
-        """
-        return self._base.get_ptr(mode=mode) + self._offset
-
-    def _getitem(self, offset: int, size: int) -> SpillableBufferSlice:
-        return SpillableBufferSlice(
-            base=self._base, offset=offset + self._offset, size=size
-        )
+    _owner: SpillableBufferOwner
 
-    @classmethod
-    def deserialize(cls, header: dict, frames: list):
-        # TODO: because of the hack in `SpillableBuffer.serialize()` where
-        # frames are of type `Buffer`, we always deserialize as if they are
-        # `SpillableBuffer`. In the future, we should be able to
-        # deserialize into `SpillableBufferSlice` when the frames hasn't been
-        # copied.
-        return SpillableBuffer.deserialize(header, frames)
-
-    def memoryview(
-        self, *, offset: int = 0, size: Optional[int] = None
-    ) -> memoryview:
-        size = self._size if size is None else size
-        return self._base.memoryview(offset=self._offset + offset, size=size)
-
-    def __repr__(self) -> str:
-        return (
-            f"<SpillableBufferSlice size={format_bytes(self._size)} "
-            f"offset={format_bytes(self._offset)} of {self._base} "
-        )
-
-    # The rest of the methods delegate to the base buffer.
     def spill(self, target: str = "cpu") -> None:
-        return self._base.spill(target=target)
+        return self._owner.spill(target=target)
 
     @property
     def is_spilled(self) -> bool:
-        return self._base.is_spilled
+        return self._owner.is_spilled
 
     @property
     def exposed(self) -> bool:
-        return self._base.exposed
+        return self._owner.exposed
 
     @property
     def spillable(self) -> bool:
-        return self._base.spillable
+        return self._owner.spillable
 
     def spill_lock(self, spill_lock: SpillLock) -> None:
-        self._base.spill_lock(spill_lock=spill_lock)
+        self._owner.spill_lock(spill_lock=spill_lock)
 
     def memory_info(self) -> Tuple[int, int, str]:
-        (ptr, _, device_type) = self._base.memory_info()
+        (ptr, _, device_type) = self._owner.memory_info()
         return (ptr + self._offset, self.nbytes, device_type)
+
+    def mark_exposed(self) -> None:
+        self._owner.mark_exposed()
+
+    def serialize(self) -> Tuple[dict, list]:
+        """Serialize the Buffer
+
+        Normally, we would use `[self]` as the frames. This would work but
+        also mean that `self` becomes exposed permanently if the frames are
+        later accessed through `__cuda_array_interface__`, which is exactly
+        what libraries like Dask+UCX would do when communicating!
+
+        The sound solution is to modify Dask et al. so that they access the
+        frames through `.get_ptr()` and holds on to the `spill_lock` until
+        the frame has been transferred. However, until this adaptation we
+        use a hack where the frame is a `Buffer` with a `spill_lock` as the
+        owner, which makes `self` unspillable while the frame is alive but
+        doesn't expose `self` when `__cuda_array_interface__` is accessed.
+
+        Warning, this hack means that the returned frame must be copied before
+        given to `.deserialize()`, otherwise we would have a `Buffer` pointing
+        to memory already owned by an existing `SpillableBufferOwner`.
+        """
+        header: Dict[str, Any] = {}
+        frames: List[Buffer | memoryview]
+        with self._owner.lock:
+            header["type-serialized"] = pickle.dumps(self.__class__)
+            header["owner-type-serialized"] = pickle.dumps(type(self._owner))
+            header["frame_count"] = 1
+            if self.is_spilled:
+                frames = [self.memoryview()]
+            else:
+                # TODO: Use `frames=[self]` instead of this hack, see doc above
+                spill_lock = SpillLock()
+                self.spill_lock(spill_lock)
+                ptr, size, _ = self.memory_info()
+                frames = [
+                    Buffer(
+                        owner=BufferOwner._from_device_memory(
+                            cuda_array_interface_wrapper(
+                                ptr=ptr,
+                                size=size,
+                                owner=(self._owner, spill_lock),
+                            ),
+                            exposed=False,
+                        )
+                    )
+                ]
+            return header, frames
+
+    @property
+    def __cuda_array_interface__(self) -> dict:
+        return {
+            "data": DelayedPointerTuple(self),
+            "shape": (self.size,),
+            "strides": None,
+            "typestr": "|u1",
+            "version": 0,
+        }
diff --git a/python/cudf/cudf/core/buffer/utils.py b/python/cudf/cudf/core/buffer/utils.py
index 373be99ec96..c2ec7effd13 100644
--- a/python/cudf/cudf/core/buffer/utils.py
+++ b/python/cudf/cudf/core/buffer/utils.py
@@ -1,18 +1,51 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
 import threading
 from contextlib import ContextDecorator
-from typing import Any, Dict, Optional, Tuple, Union
-
-from cudf.core.buffer.buffer import Buffer, cuda_array_interface_wrapper
-from cudf.core.buffer.exposure_tracked_buffer import as_exposure_tracked_buffer
+from typing import Any, Dict, Optional, Tuple, Type, Union
+
+from cudf.core.buffer.buffer import (
+    Buffer,
+    BufferOwner,
+    cuda_array_interface_wrapper,
+    get_ptr_and_size,
+)
+from cudf.core.buffer.exposure_tracked_buffer import ExposureTrackedBuffer
 from cudf.core.buffer.spill_manager import get_global_manager
-from cudf.core.buffer.spillable_buffer import SpillLock, as_spillable_buffer
+from cudf.core.buffer.spillable_buffer import (
+    SpillableBuffer,
+    SpillableBufferOwner,
+    SpillLock,
+)
 from cudf.options import get_option
 
 
+def get_buffer_owner(data: Any) -> Optional[BufferOwner]:
+    """Get the owner of `data`, if one exists
+
+    Search through the stack of data owners in order to find an
+    owner BufferOwner (incl. subclasses).
+
+    Parameters
+    ----------
+    data
+        The data object to search for a BufferOwner instance
+
+    Return
+    ------
+    BufferOwner or None
+        The owner of `data` if found otherwise None.
+    """
+
+    if isinstance(data, BufferOwner):
+        return data
+    if hasattr(data, "owner"):
+        return get_buffer_owner(data.owner)
+    return None
+
+
 def as_buffer(
     data: Union[int, Any],
     *,
@@ -30,7 +63,17 @@ def as_buffer(
 
     If `data` is an integer, it is assumed to point to device memory.
 
-    Raises ValueError if data isn't C-contiguous.
+    Raises ValueError if `data` isn't C-contiguous.
+
+    If copy-on-write is enabled, an ExposureTrackedBuffer is returned.
+
+    If spilling is enabled, a SpillableBuffer that refers to a
+    SpillableBufferOwner is returned. If `data` is owned by a spillable buffer,
+    it must either be "exposed" or spill locked (called within an
+    acquire_spill_lock context). This is to guarantee that the memory of `data`
+    isn't spilled before this function gets to calculate the offset of the new
+    SpillableBuffer.
+
 
     Parameters
     ----------
@@ -73,13 +116,49 @@ def as_buffer(
             "`data` is a buffer-like or array-like object"
         )
 
-    if get_option("copy_on_write"):
-        return as_exposure_tracked_buffer(data, exposed=exposed)
+    # Find the buffer types to return based on the current config
+    owner_class: Type[BufferOwner]
+    buffer_class: Type[Buffer]
     if get_global_manager() is not None:
-        return as_spillable_buffer(data, exposed=exposed)
-    if hasattr(data, "__cuda_array_interface__"):
-        return Buffer._from_device_memory(data)
-    return Buffer._from_host_memory(data)
+        owner_class = SpillableBufferOwner
+        buffer_class = SpillableBuffer
+    elif get_option("copy_on_write"):
+        owner_class = BufferOwner
+        buffer_class = ExposureTrackedBuffer
+    else:
+        owner_class = BufferOwner
+        buffer_class = Buffer
+
+    # Handle host memory,
+    if not hasattr(data, "__cuda_array_interface__"):
+        if exposed:
+            raise ValueError("cannot created exposed host memory")
+        return buffer_class(owner=owner_class._from_host_memory(data))
+
+    # Check if `data` is owned by a known class
+    owner = get_buffer_owner(data)
+    if owner is None:  # `data` is new device memory
+        return buffer_class(
+            owner=owner_class._from_device_memory(data, exposed=exposed)
+        )
+
+    # At this point, we know that `data` is owned by a known class, which
+    # should be the same class as specified by the current config (see above)
+    assert owner.__class__ is owner_class
+    if (
+        isinstance(owner, SpillableBufferOwner)
+        and not owner.exposed
+        and get_spill_lock() is None
+    ):
+        raise ValueError(
+            "An owning spillable buffer must "
+            "either be exposed or spill locked."
+        )
+    ptr, size = get_ptr_and_size(data.__cuda_array_interface__)
+    base_ptr = owner.get_ptr(mode="read")
+    if size > 0 and base_ptr == 0:
+        raise ValueError("Cannot create a non-empty slice of a null buffer")
+    return buffer_class(owner=owner, offset=ptr - base_ptr, size=size)
 
 
 _thread_spill_locks: Dict[int, Tuple[Optional[SpillLock], int]] = {}
diff --git a/python/cudf/cudf/tests/test_buffer.py b/python/cudf/cudf/tests/test_buffer.py
index 1c9e7475080..03637e05eae 100644
--- a/python/cudf/cudf/tests/test_buffer.py
+++ b/python/cudf/cudf/tests/test_buffer.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 import cupy as cp
 import pytest
@@ -64,7 +64,14 @@ def test_buffer_creation_from_any():
     assert isinstance(b, Buffer)
     assert ary.data.ptr == b.get_ptr(mode="read")
     assert ary.nbytes == b.size
-    assert b.owner.owner is ary
+    assert b.owner.owner.owner is ary
+
+
+@pytest.mark.parametrize("size", [10, 2**10 + 500, 2**20])
+def test_buffer_str(size):
+    ary = cp.arange(size, dtype="uint8")
+    buf = as_buffer(ary)
+    assert f"size={size}" in repr(buf)
 
 
 @pytest.mark.parametrize(
@@ -73,7 +80,7 @@ def test_buffer_creation_from_any():
 def test_buffer_repr(size, expect):
     ary = cp.arange(size, dtype="uint8")
     buf = as_buffer(ary)
-    assert f"size={expect}" in repr(buf)
+    assert f"size={expect}" in str(buf)
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/test_copying.py b/python/cudf/cudf/tests/test_copying.py
index 085774e9dbc..e737a73e86b 100644
--- a/python/cudf/cudf/tests/test_copying.py
+++ b/python/cudf/cudf/tests/test_copying.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 import cupy as cp
 import numpy as np
@@ -113,11 +113,8 @@ def test_series_setitem_partial_slice_cow_on():
         assert_eq(new_copy, cudf.Series([1, 2, 300, 300, 5]))
 
         new_slice = actual[2:]
-        # TODO: when COW and spilling has been unified, find a clean way to
-        # test this without accessing the internal attributes _base and _ptr
         assert (
-            new_slice._column.base_data._base._ptr
-            == actual._column.base_data._base._ptr
+            new_slice._column.base_data.owner == actual._column.base_data.owner
         )
         new_slice[0:2] = 10
         assert_eq(new_slice, cudf.Series([10, 10, 5], index=[2, 3, 4]))
@@ -134,9 +131,11 @@ def test_series_setitem_partial_slice_cow_off():
         assert_eq(new_copy, cudf.Series([1, 2, 300, 300, 5]))
 
         new_slice = actual[2:]
-        assert (
-            new_slice._column.base_data._ptr == actual._column.base_data._ptr
-        )
+        # Since COW is off, a slice should point to the same memory
+        ptr1 = new_slice._column.base_data.get_ptr(mode="read")
+        ptr2 = actual._column.base_data.get_ptr(mode="read")
+        assert ptr1 == ptr2
+
         new_slice[0:2] = 10
         assert_eq(new_slice, cudf.Series([10, 10, 5], index=[2, 3, 4]))
         assert_eq(actual, cudf.Series([1, 2, 10, 10, 5]))
diff --git a/python/cudf/cudf/tests/test_spilling.py b/python/cudf/cudf/tests/test_spilling.py
index 88ce908aa5f..7e66a7ab4ba 100644
--- a/python/cudf/cudf/tests/test_spilling.py
+++ b/python/cudf/cudf/tests/test_spilling.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 
 import importlib
 import random
@@ -34,7 +34,7 @@
 )
 from cudf.core.buffer.spillable_buffer import (
     SpillableBuffer,
-    SpillableBufferSlice,
+    SpillableBufferOwner,
     SpillLock,
 )
 from cudf.testing._utils import assert_eq
@@ -196,10 +196,10 @@ def test_creations(manager: SpillManager):
 def test_spillable_df_groupby(manager: SpillManager):
     df = cudf.DataFrame({"a": [1, 1, 1]})
     gb = df.groupby("a")
-    assert len(single_column_df_base_data(df)._spill_locks) == 0
+    assert len(single_column_df_base_data(df).owner._spill_locks) == 0
     gb._groupby
     # `gb._groupby`, which is cached on `gb`, holds a spill lock
-    assert len(single_column_df_base_data(df)._spill_locks) == 1
+    assert len(single_column_df_base_data(df).owner._spill_locks) == 1
     assert not single_column_df_data(df).spillable
     del gb
     assert single_column_df_data(df).spillable
@@ -375,7 +375,7 @@ def test_get_ptr(manager: SpillManager, target):
         mem = np.empty(10, dtype="u1")
     buf = as_buffer(data=mem, exposed=False)
     assert buf.spillable
-    assert len(buf._spill_locks) == 0
+    assert len(buf.owner._spill_locks) == 0
     with acquire_spill_lock():
         buf.get_ptr(mode="read")
         assert not buf.spillable
@@ -496,8 +496,8 @@ def test_serialize_cuda_dataframe(manager: SpillManager):
     header, frames = protocol.serialize(
         df1, serializers=("cuda",), on_error="raise"
     )
-    buf: SpillableBufferSlice = single_column_df_data(df1)
-    assert len(buf._base._spill_locks) == 1
+    buf: SpillableBuffer = single_column_df_data(df1)
+    assert len(buf.owner._spill_locks) == 1
     assert len(frames) == 1
     assert isinstance(frames[0], Buffer)
     assert frames[0].get_ptr(mode="read") == buf.get_ptr(mode="read")
@@ -543,13 +543,14 @@ def test_as_buffer_of_spillable_buffer(manager: SpillManager):
     data = cupy.arange(10, dtype="u1")
     b1 = as_buffer(data, exposed=False)
     assert isinstance(b1, SpillableBuffer)
-    assert b1.owner is data
+    assert isinstance(b1.owner, SpillableBufferOwner)
+    assert b1.owner.owner is data
     b2 = as_buffer(b1)
     assert b1 is b2
 
     with pytest.raises(
         ValueError,
-        match="buffer must either be exposed or spilled locked",
+        match="owning spillable buffer must either be exposed or spill locked",
     ):
         # Use `memory_info` to access device point _without_ making
         # the buffer unspillable.
@@ -557,21 +558,21 @@ def test_as_buffer_of_spillable_buffer(manager: SpillManager):
 
     with acquire_spill_lock():
         b3 = as_buffer(b1.get_ptr(mode="read"), size=b1.size, owner=b1)
-    assert isinstance(b3, SpillableBufferSlice)
-    assert b3.owner is b1
+    assert isinstance(b3, SpillableBuffer)
+    assert b3.owner is b1.owner
 
     b4 = as_buffer(
         b1.get_ptr(mode="write") + data.itemsize,
         size=b1.size - data.itemsize,
         owner=b3,
     )
-    assert isinstance(b4, SpillableBufferSlice)
-    assert b4.owner is b1
+    assert isinstance(b4, SpillableBuffer)
+    assert b4.owner is b1.owner
     assert all(cupy.array(b4.memoryview()) == data[1:])
 
     b5 = as_buffer(b4.get_ptr(mode="write"), size=b4.size - 1, owner=b4)
-    assert isinstance(b5, SpillableBufferSlice)
-    assert b5.owner is b1
+    assert isinstance(b5, SpillableBuffer)
+    assert b5.owner is b1.owner
     assert all(cupy.array(b5.memoryview()) == data[1:-1])
 
 

From 726a7f30757d1a06d74d86bb82cf311cb159f7fd Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 16 Jan 2024 03:32:13 -1000
Subject: [PATCH 23/23] Pin pytest-cases<3.8.2 (#14756)

Appears in the pytest-cases 3.8.2, there's a requirement that automatically finding `cases` must be in a file named `test_*`, when historically looks like we use `bench_*`

https://github.com/smarie/python-pytest-cases/pull/320/files

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Karthikeyan (https://github.com/karthikeyann)
  - Ray Douglass (https://github.com/raydouglass)

URL: https://github.com/rapidsai/cudf/pull/14756
---
 conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +-
 conda/environments/all_cuda-120_arch-x86_64.yaml | 2 +-
 dependencies.yaml                                | 2 +-
 python/cudf/pyproject.toml                       | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 21b540e24ab..a5e3ea4c531 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -74,7 +74,7 @@ dependencies:
 - pydata-sphinx-theme!=0.14.2
 - pytest
 - pytest-benchmark
-- pytest-cases
+- pytest-cases<3.8.2
 - pytest-cov
 - pytest-xdist
 - python-confluent-kafka>=1.9.0,<1.10.0a0
diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml
index c109dcca625..579bbb6d52d 100644
--- a/conda/environments/all_cuda-120_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-120_arch-x86_64.yaml
@@ -71,7 +71,7 @@ dependencies:
 - pydata-sphinx-theme!=0.14.2
 - pytest
 - pytest-benchmark
-- pytest-cases
+- pytest-cases<3.8.2
 - pytest-cov
 - pytest-xdist
 - python-confluent-kafka>=1.9.0,<1.10.0a0
diff --git a/dependencies.yaml b/dependencies.yaml
index 28b3afd7bbc..20998847a75 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -619,7 +619,7 @@ dependencies:
           - fastavro>=0.22.9
           - hypothesis
           - pytest-benchmark
-          - pytest-cases
+          - pytest-cases<3.8.2
           - python-snappy>=0.6.0
           - scipy
       - output_types: conda
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index 7c3f4a97a5e..18771804f61 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -59,7 +59,7 @@ test = [
     "msgpack",
     "pytest",
     "pytest-benchmark",
-    "pytest-cases",
+    "pytest-cases<3.8.2",
     "pytest-cov",
     "pytest-xdist",
     "python-snappy>=0.6.0",