From 92652be87839e4a4e49216c49bd36860674bff6a Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 12 Dec 2024 13:17:28 -0800
Subject: [PATCH 1/4] Remove cudf._lib.parquet in favor of inlining pylibcudf
 (#17562)

Contributes to https://github.com/rapidsai/cudf/issues/17317

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/17562
---
 python/cudf/cudf/_lib/CMakeLists.txt    |   5 +-
 python/cudf/cudf/_lib/__init__.py       |   1 -
 python/cudf/cudf/_lib/io/CMakeLists.txt |  21 -
 python/cudf/cudf/_lib/io/__init__.pxd   |   0
 python/cudf/cudf/_lib/io/__init__.py    |   0
 python/cudf/cudf/_lib/io/utils.pxd      |  31 -
 python/cudf/cudf/_lib/io/utils.pyx      |  74 --
 python/cudf/cudf/_lib/parquet.pyx       | 817 -------------------
 python/cudf/cudf/io/parquet.py          | 992 +++++++++++++++++++++---
 python/cudf/cudf/tests/test_parquet.py  |  72 +-
 python/cudf/cudf/utils/ioutils.py       |   1 -
 11 files changed, 941 insertions(+), 1073 deletions(-)
 delete mode 100644 python/cudf/cudf/_lib/io/CMakeLists.txt
 delete mode 100644 python/cudf/cudf/_lib/io/__init__.pxd
 delete mode 100644 python/cudf/cudf/_lib/io/__init__.py
 delete mode 100644 python/cudf/cudf/_lib/io/utils.pxd
 delete mode 100644 python/cudf/cudf/_lib/io/utils.pyx
 delete mode 100644 python/cudf/cudf/_lib/parquet.pyx

diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt
index efe96ff6c3e..f422635d22a 100644
--- a/python/cudf/cudf/_lib/CMakeLists.txt
+++ b/python/cudf/cudf/_lib/CMakeLists.txt
@@ -13,8 +13,8 @@
 # =============================================================================
 
 set(cython_sources
-    column.pyx copying.pyx csv.pyx groupby.pyx interop.pyx parquet.pyx reduce.pyx scalar.pyx
-    sort.pyx stream_compaction.pyx string_casting.pyx strings_udf.pyx types.pyx utils.pyx
+    column.pyx copying.pyx csv.pyx groupby.pyx interop.pyx reduce.pyx scalar.pyx sort.pyx
+    stream_compaction.pyx string_casting.pyx strings_udf.pyx types.pyx utils.pyx
 )
 set(linked_libraries cudf::cudf)
 
@@ -31,5 +31,4 @@ include(${rapids-cmake-dir}/export/find_package_root.cmake)
 include(../../../../cpp/cmake/thirdparty/get_nanoarrow.cmake)
 target_link_libraries(interop PUBLIC nanoarrow)
 
-add_subdirectory(io)
 add_subdirectory(nvtext)
diff --git a/python/cudf/cudf/_lib/__init__.py b/python/cudf/cudf/_lib/__init__.py
index 52e9b89da7b..cfdcec4cd3b 100644
--- a/python/cudf/cudf/_lib/__init__.py
+++ b/python/cudf/cudf/_lib/__init__.py
@@ -7,7 +7,6 @@
     groupby,
     interop,
     nvtext,
-    parquet,
     reduce,
     sort,
     stream_compaction,
diff --git a/python/cudf/cudf/_lib/io/CMakeLists.txt b/python/cudf/cudf/_lib/io/CMakeLists.txt
deleted file mode 100644
index e7408cf2852..00000000000
--- a/python/cudf/cudf/_lib/io/CMakeLists.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-# =============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
-# in compliance with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software distributed under the License
-# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
-# or implied. See the License for the specific language governing permissions and limitations under
-# the License.
-# =============================================================================
-
-set(cython_sources utils.pyx)
-set(linked_libraries cudf::cudf)
-rapids_cython_create_modules(
-  CXX
-  SOURCE_FILES "${cython_sources}"
-  LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX io_ ASSOCIATED_TARGETS cudf
-)
diff --git a/python/cudf/cudf/_lib/io/__init__.pxd b/python/cudf/cudf/_lib/io/__init__.pxd
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/python/cudf/cudf/_lib/io/__init__.py b/python/cudf/cudf/_lib/io/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/python/cudf/cudf/_lib/io/utils.pxd b/python/cudf/cudf/_lib/io/utils.pxd
deleted file mode 100644
index 9b8bab012e2..00000000000
--- a/python/cudf/cudf/_lib/io/utils.pxd
+++ /dev/null
@@ -1,31 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-from libcpp.memory cimport unique_ptr
-from libcpp.vector cimport vector
-
-from pylibcudf.libcudf.io.data_sink cimport data_sink
-from pylibcudf.libcudf.io.types cimport (
-    column_name_info,
-    sink_info,
-    source_info,
-)
-
-from cudf._lib.column cimport Column
-
-
-cdef add_df_col_struct_names(
-    df,
-    child_names_dict
-)
-cdef update_col_struct_field_names(
-    Column col,
-    child_names
-)
-cdef update_struct_field_names(
-    table,
-    vector[column_name_info]& schema_info
-)
-cdef Column update_column_struct_field_names(
-    Column col,
-    column_name_info& info
-)
diff --git a/python/cudf/cudf/_lib/io/utils.pyx b/python/cudf/cudf/_lib/io/utils.pyx
deleted file mode 100644
index df4675be599..00000000000
--- a/python/cudf/cudf/_lib/io/utils.pyx
+++ /dev/null
@@ -1,74 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-
-from libcpp.string cimport string
-
-from libcpp.vector cimport vector
-
-from pylibcudf.libcudf.io.types cimport column_name_info
-
-from cudf._lib.column cimport Column
-
-from cudf.core.dtypes import StructDtype
-
-cdef add_df_col_struct_names(df, child_names_dict):
-    for name, child_names in child_names_dict.items():
-        col = df._data[name]
-
-        df._data[name] = update_col_struct_field_names(col, child_names)
-
-
-cdef update_col_struct_field_names(Column col, child_names):
-    if col.children:
-        children = list(col.children)
-        for i, (child, names) in enumerate(zip(children, child_names.values())):
-            children[i] = update_col_struct_field_names(
-                child,
-                names
-            )
-        col.set_base_children(tuple(children))
-
-    if isinstance(col.dtype, StructDtype):
-        col = col._rename_fields(
-            child_names.keys()
-        )
-
-    return col
-
-
-cdef update_struct_field_names(
-    table,
-    vector[column_name_info]& schema_info
-):
-    # Deprecated, remove in favor of add_col_struct_names
-    # when a reader is ported to pylibcudf
-    for i, (name, col) in enumerate(table._column_labels_and_values):
-        table._data[name] = update_column_struct_field_names(
-            col, schema_info[i]
-        )
-
-
-cdef Column update_column_struct_field_names(
-    Column col,
-    column_name_info& info
-):
-    cdef vector[string] field_names
-
-    if col.children:
-        children = list(col.children)
-        for i, child in enumerate(children):
-            children[i] = update_column_struct_field_names(
-                child,
-                info.children[i]
-            )
-        col.set_base_children(tuple(children))
-
-    if isinstance(col.dtype, StructDtype):
-        field_names.reserve(len(col.base_children))
-        for i in range(info.children.size()):
-            field_names.push_back(info.children[i].name)
-        col = col._rename_fields(
-            field_names
-        )
-
-    return col
diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx
deleted file mode 100644
index 00c434ae374..00000000000
--- a/python/cudf/cudf/_lib/parquet.pyx
+++ /dev/null
@@ -1,817 +0,0 @@
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
-
-import io
-
-import pyarrow as pa
-import itertools
-import cudf
-from cudf.core.buffer import acquire_spill_lock
-
-try:
-    import ujson as json
-except ImportError:
-    import json
-
-import numpy as np
-
-from cudf.api.types import is_list_like
-
-from cudf._lib.utils cimport _data_from_columns, data_from_pylibcudf_io
-
-from cudf._lib.utils import _index_level_name, generate_pandas_metadata
-
-from libc.stdint cimport int64_t
-from libcpp cimport bool
-
-from pylibcudf.expressions cimport Expression
-from pylibcudf.io.parquet cimport ChunkedParquetReader
-from pylibcudf.libcudf.io.types cimport (
-    statistics_freq,
-    compression_type,
-    dictionary_policy,
-)
-from pylibcudf.libcudf.types cimport size_type
-
-from cudf._lib.column cimport Column
-from cudf._lib.io.utils cimport (
-    add_df_col_struct_names,
-)
-
-import pylibcudf as plc
-
-from pylibcudf cimport Table
-
-from cudf.utils.ioutils import _ROW_GROUP_SIZE_BYTES_DEFAULT
-from pylibcudf.io.types cimport TableInputMetadata, SinkInfo, ColumnInMetadata
-from pylibcudf.io.parquet cimport ParquetChunkedWriter
-
-
-def _parse_metadata(meta):
-    file_is_range_index = False
-    file_index_cols = None
-    file_column_dtype = None
-
-    if 'index_columns' in meta and len(meta['index_columns']) > 0:
-        file_index_cols = meta['index_columns']
-
-        if isinstance(file_index_cols[0], dict) and \
-                file_index_cols[0]['kind'] == 'range':
-            file_is_range_index = True
-    if 'column_indexes' in meta and len(meta['column_indexes']) == 1:
-        file_column_dtype = meta['column_indexes'][0]["numpy_type"]
-    return file_is_range_index, file_index_cols, file_column_dtype
-
-
-cdef object _process_metadata(object df,
-                              list names,
-                              dict child_names,
-                              list per_file_user_data,
-                              object row_groups,
-                              object filepaths_or_buffers,
-                              bool allow_range_index,
-                              bool use_pandas_metadata,
-                              size_type nrows=-1,
-                              int64_t skip_rows=0,
-                              ):
-
-    add_df_col_struct_names(df, child_names)
-    index_col = None
-    is_range_index = True
-    column_index_type = None
-    index_col_names = None
-    meta = None
-    for single_file in per_file_user_data:
-        if b'pandas' not in single_file:
-            continue
-        json_str = single_file[b'pandas'].decode('utf-8')
-        meta = json.loads(json_str)
-        file_is_range_index, index_col, column_index_type = _parse_metadata(meta)
-        is_range_index &= file_is_range_index
-
-        if not file_is_range_index and index_col is not None \
-                and index_col_names is None:
-            index_col_names = {}
-            for idx_col in index_col:
-                for c in meta['columns']:
-                    if c['field_name'] == idx_col:
-                        index_col_names[idx_col] = c['name']
-
-    if meta is not None:
-        # Book keep each column metadata as the order
-        # of `meta["columns"]` and `column_names` are not
-        # guaranteed to be deterministic and same always.
-        meta_data_per_column = {
-            col_meta['name']: col_meta for col_meta in meta["columns"]
-        }
-
-        # update the decimal precision of each column
-        for col in names:
-            if isinstance(df._data[col].dtype, cudf.core.dtypes.DecimalDtype):
-                df._data[col].dtype.precision = (
-                    meta_data_per_column[col]["metadata"]["precision"]
-                )
-
-    # Set the index column
-    if index_col is not None and len(index_col) > 0:
-        if is_range_index:
-            if not allow_range_index:
-                return df
-
-            if len(per_file_user_data) > 1:
-                range_index_meta = {
-                    "kind": "range",
-                    "name": None,
-                    "start": 0,
-                    "stop": len(df),
-                    "step": 1
-                }
-            else:
-                range_index_meta = index_col[0]
-
-            if row_groups is not None:
-                per_file_metadata = [
-                    pa.parquet.read_metadata(
-                        # Pyarrow cannot read directly from bytes
-                        io.BytesIO(s) if isinstance(s, bytes) else s
-                    ) for s in filepaths_or_buffers
-                ]
-
-                filtered_idx = []
-                for i, file_meta in enumerate(per_file_metadata):
-                    row_groups_i = []
-                    start = 0
-                    for row_group in range(file_meta.num_row_groups):
-                        stop = start + file_meta.row_group(row_group).num_rows
-                        row_groups_i.append((start, stop))
-                        start = stop
-
-                    for rg in row_groups[i]:
-                        filtered_idx.append(
-                            cudf.RangeIndex(
-                                start=row_groups_i[rg][0],
-                                stop=row_groups_i[rg][1],
-                                step=range_index_meta['step']
-                            )
-                        )
-
-                if len(filtered_idx) > 0:
-                    idx = cudf.concat(filtered_idx)
-                else:
-                    idx = cudf.Index._from_column(cudf.core.column.column_empty(0))
-            else:
-                start = range_index_meta["start"] + skip_rows
-                stop = range_index_meta["stop"]
-                if nrows > -1:
-                    stop = start + nrows
-                idx = cudf.RangeIndex(
-                    start=start,
-                    stop=stop,
-                    step=range_index_meta['step'],
-                    name=range_index_meta['name']
-                )
-
-            df._index = idx
-        elif set(index_col).issubset(names):
-            index_data = df[index_col]
-            actual_index_names = iter(index_col_names.values())
-            if index_data._num_columns == 1:
-                idx = cudf.Index._from_column(
-                    index_data._columns[0],
-                    name=next(actual_index_names)
-                )
-            else:
-                idx = cudf.MultiIndex.from_frame(
-                    index_data,
-                    names=list(actual_index_names)
-                )
-            df.drop(columns=index_col, inplace=True)
-            df._index = idx
-        else:
-            if use_pandas_metadata:
-                df.index.names = index_col
-
-    if df._num_columns == 0 and column_index_type is not None:
-        df._data.label_dtype = cudf.dtype(column_index_type)
-
-    return df
-
-
-def read_parquet_chunked(
-    filepaths_or_buffers,
-    columns=None,
-    row_groups=None,
-    use_pandas_metadata=True,
-    size_t chunk_read_limit=0,
-    size_t pass_read_limit=1024000000,
-    size_type nrows=-1,
-    int64_t skip_rows=0,
-    allow_mismatched_pq_schemas=False
-):
-    # Note: If this function ever takes accepts filters
-    # allow_range_index needs to be False when a filter is passed
-    # (see read_parquet)
-    allow_range_index = columns is not None and len(columns) != 0
-
-    options = (
-        plc.io.parquet.ParquetReaderOptions.builder(
-            plc.io.SourceInfo(filepaths_or_buffers)
-        )
-        .use_pandas_metadata(use_pandas_metadata)
-        .allow_mismatched_pq_schemas(allow_mismatched_pq_schemas)
-        .build()
-    )
-    if row_groups is not None:
-        options.set_row_groups(row_groups)
-    if nrows > -1:
-        options.set_num_rows(nrows)
-    if skip_rows != 0:
-        options.set_skip_rows(skip_rows)
-    if columns is not None:
-        options.set_columns(columns)
-
-    reader = ChunkedParquetReader(
-        options,
-        chunk_read_limit=chunk_read_limit,
-        pass_read_limit=pass_read_limit,
-    )
-
-    tbl_w_meta = reader.read_chunk()
-    column_names = tbl_w_meta.column_names(include_children=False)
-    child_names = tbl_w_meta.child_names
-    per_file_user_data = tbl_w_meta.per_file_user_data
-    concatenated_columns = tbl_w_meta.tbl.columns()
-
-    # save memory
-    del tbl_w_meta
-
-    cdef Table tbl
-    while reader.has_next():
-        tbl = reader.read_chunk().tbl
-
-        for i in range(tbl.num_columns()):
-            concatenated_columns[i] = plc.concatenate.concatenate(
-                [concatenated_columns[i], tbl._columns[i]]
-            )
-            # Drop residual columns to save memory
-            tbl._columns[i] = None
-
-    df = cudf.DataFrame._from_data(
-        *_data_from_columns(
-            columns=[Column.from_pylibcudf(plc) for plc in concatenated_columns],
-            column_names=column_names,
-            index_names=None
-        )
-    )
-    df = _process_metadata(df, column_names, child_names,
-                           per_file_user_data, row_groups,
-                           filepaths_or_buffers,
-                           allow_range_index, use_pandas_metadata,
-                           nrows=nrows, skip_rows=skip_rows)
-    return df
-
-
-cpdef read_parquet(filepaths_or_buffers, columns=None, row_groups=None,
-                   use_pandas_metadata=True,
-                   Expression filters=None,
-                   size_type nrows=-1,
-                   int64_t skip_rows=0,
-                   allow_mismatched_pq_schemas=False):
-    """
-    Cython function to call into libcudf API, see `read_parquet`.
-
-    filters, if not None, should be an Expression that evaluates to a
-    boolean predicate as a function of columns being read.
-
-    See Also
-    --------
-    cudf.io.parquet.read_parquet
-    cudf.io.parquet.to_parquet
-    """
-
-    allow_range_index = True
-    if columns is not None and len(columns) == 0 or filters:
-        allow_range_index = False
-
-    options = (
-        plc.io.parquet.ParquetReaderOptions.builder(
-            plc.io.SourceInfo(filepaths_or_buffers)
-        )
-        .use_pandas_metadata(use_pandas_metadata)
-        .allow_mismatched_pq_schemas(allow_mismatched_pq_schemas)
-        .build()
-    )
-    if row_groups is not None:
-        options.set_row_groups(row_groups)
-    if nrows > -1:
-        options.set_num_rows(nrows)
-    if skip_rows != 0:
-        options.set_skip_rows(skip_rows)
-    if columns is not None:
-        options.set_columns(columns)
-    if filters is not None:
-        options.set_filter(filters)
-
-    tbl_w_meta = plc.io.parquet.read_parquet(options)
-
-    df = cudf.DataFrame._from_data(
-        *data_from_pylibcudf_io(tbl_w_meta)
-    )
-
-    df = _process_metadata(df, tbl_w_meta.column_names(include_children=False),
-                           tbl_w_meta.child_names, tbl_w_meta.per_file_user_data,
-                           row_groups, filepaths_or_buffers,
-                           allow_range_index, use_pandas_metadata,
-                           nrows=nrows, skip_rows=skip_rows)
-    return df
-
-cpdef read_parquet_metadata(list filepaths_or_buffers):
-    """
-    Cython function to call into libcudf API, see `read_parquet_metadata`.
-
-    See Also
-    --------
-    cudf.io.parquet.read_parquet
-    cudf.io.parquet.to_parquet
-    """
-    parquet_metadata = plc.io.parquet_metadata.read_parquet_metadata(
-        plc.io.SourceInfo(filepaths_or_buffers)
-    )
-
-    # read all column names including index column, if any
-    col_names = [info.name() for info in parquet_metadata.schema().root().children()]
-
-    index_col_names = set()
-    json_str = parquet_metadata.metadata()['pandas']
-    if json_str != "":
-        meta = json.loads(json_str)
-        file_is_range_index, index_col, _ = _parse_metadata(meta)
-        if (
-            not file_is_range_index
-            and index_col is not None
-        ):
-            columns = meta['columns']
-            for idx_col in index_col:
-                for c in columns:
-                    if c['field_name'] == idx_col:
-                        index_col_names.add(idx_col)
-
-    # remove the index column from the list of column names
-    # only if index_col_names is not None
-    if len(index_col_names) >= 0:
-        col_names = [name for name in col_names if name not in index_col_names]
-
-    return (
-        parquet_metadata.num_rows(),
-        parquet_metadata.num_rowgroups(),
-        col_names,
-        len(col_names),
-        parquet_metadata.rowgroup_metadata()
-    )
-
-
-@acquire_spill_lock()
-def write_parquet(
-    table,
-    object filepaths_or_buffers,
-    object index=None,
-    object compression="snappy",
-    object statistics="ROWGROUP",
-    object metadata_file_path=None,
-    object int96_timestamps=False,
-    object row_group_size_bytes=None,
-    object row_group_size_rows=None,
-    object max_page_size_bytes=None,
-    object max_page_size_rows=None,
-    object max_dictionary_size=None,
-    object partitions_info=None,
-    object force_nullable_schema=False,
-    header_version="1.0",
-    use_dictionary=True,
-    object skip_compression=None,
-    object column_encoding=None,
-    object column_type_length=None,
-    object output_as_binary=None,
-    write_arrow_schema=False,
-):
-    """
-    Cython function to call into libcudf API, see `write_parquet`.
-
-    See Also
-    --------
-    cudf.io.parquet.write_parquet
-    """
-    if index is True or (
-        index is None and not isinstance(table._index, cudf.RangeIndex)
-    ):
-        columns = [*table.index._columns, *table._columns]
-        plc_table = plc.Table([col.to_pylibcudf(mode="read") for col in columns])
-        tbl_meta = TableInputMetadata(plc_table)
-        for level, idx_name in enumerate(table._index.names):
-            tbl_meta.column_metadata[level].set_name(
-                _index_level_name(idx_name, level, table._column_names)
-            )
-        num_index_cols_meta = len(table._index.names)
-    else:
-        plc_table = plc.Table(
-            [col.to_pylibcudf(mode="read") for col in table._columns]
-        )
-        tbl_meta = TableInputMetadata(plc_table)
-        num_index_cols_meta = 0
-
-    for i, name in enumerate(table._column_names, num_index_cols_meta):
-        if not isinstance(name, str):
-            if cudf.get_option("mode.pandas_compatible"):
-                tbl_meta.column_metadata[i].set_name(str(name))
-            else:
-                raise ValueError(
-                    "Writing a Parquet file requires string column names"
-                )
-        else:
-            tbl_meta.column_metadata[i].set_name(name)
-
-        _set_col_metadata(
-            table[name]._column,
-            tbl_meta.column_metadata[i],
-            force_nullable_schema,
-            None,
-            skip_compression,
-            column_encoding,
-            column_type_length,
-            output_as_binary
-        )
-    if partitions_info is not None:
-        user_data = [
-            {"pandas": generate_pandas_metadata(
-                table.iloc[start_row:start_row + num_row].copy(deep=False),
-                index
-            )}
-            for start_row, num_row in partitions_info
-        ]
-    else:
-        user_data = [{"pandas": generate_pandas_metadata(table, index)}]
-
-    if header_version not in ("1.0", "2.0"):
-        raise ValueError(
-            f"Invalid parquet header version: {header_version}. "
-            "Valid values are '1.0' and '2.0'"
-        )
-
-    dict_policy = (
-        plc.io.types.DictionaryPolicy.ADAPTIVE
-        if use_dictionary
-        else plc.io.types.DictionaryPolicy.NEVER
-    )
-
-    comp_type = _get_comp_type(compression)
-    stat_freq = _get_stat_freq(statistics)
-    options = (
-        plc.io.parquet.ParquetWriterOptions.builder(
-            plc.io.SinkInfo(filepaths_or_buffers), plc_table
-        )
-        .metadata(tbl_meta)
-        .key_value_metadata(user_data)
-        .compression(comp_type)
-        .stats_level(stat_freq)
-        .int96_timestamps(int96_timestamps)
-        .write_v2_headers(header_version == "2.0")
-        .dictionary_policy(dict_policy)
-        .utc_timestamps(False)
-        .write_arrow_schema(write_arrow_schema)
-        .build()
-    )
-    if partitions_info is not None:
-        options.set_partitions(
-            [plc.io.types.PartitionInfo(part[0], part[1]) for part in partitions_info]
-        )
-    if metadata_file_path is not None:
-        if is_list_like(metadata_file_path):
-            options.set_column_chunks_file_paths(metadata_file_path)
-        else:
-            options.set_column_chunks_file_paths([metadata_file_path])
-    if row_group_size_bytes is not None:
-        options.set_row_group_size_bytes(row_group_size_bytes)
-    if row_group_size_rows is not None:
-        options.set_row_group_size_rows(row_group_size_rows)
-    if max_page_size_bytes is not None:
-        options.set_max_page_size_bytes(max_page_size_bytes)
-    if max_page_size_rows is not None:
-        options.set_max_page_size_rows(max_page_size_rows)
-    if max_dictionary_size is not None:
-        options.set_max_dictionary_size(max_dictionary_size)
-    blob = plc.io.parquet.write_parquet(options)
-    if metadata_file_path is not None:
-        return np.asarray(blob.obj)
-    else:
-        return None
-
-
-cdef class ParquetWriter:
-    """
-    ParquetWriter lets you incrementally write out a Parquet file from a series
-    of cudf tables
-
-    Parameters
-    ----------
-    filepath_or_buffer : str, io.IOBase, os.PathLike, or list
-        File path or buffer to write to. The argument may also correspond
-        to a list of file paths or buffers.
-    index : bool or None, default None
-        If ``True``, include a dataframe's index(es) in the file output.
-        If ``False``, they will not be written to the file. If ``None``,
-        index(es) other than RangeIndex will be saved as columns.
-    compression : {'snappy', None}, default 'snappy'
-        Name of the compression to use. Use ``None`` for no compression.
-    statistics : {'ROWGROUP', 'PAGE', 'COLUMN', 'NONE'}, default 'ROWGROUP'
-        Level at which column statistics should be included in file.
-    row_group_size_bytes: int, default ``uint64 max``
-        Maximum size of each stripe of the output.
-        By default, a virtually infinite size equal to ``uint64 max`` will be used.
-    row_group_size_rows: int, default 1000000
-        Maximum number of rows of each stripe of the output.
-        By default, 1000000 (10^6 rows) will be used.
-    max_page_size_bytes: int, default 524288
-        Maximum uncompressed size of each page of the output.
-        By default, 524288 (512KB) will be used.
-    max_page_size_rows: int, default 20000
-        Maximum number of rows of each page of the output.
-        By default, 20000 will be used.
-    max_dictionary_size: int, default 1048576
-        Maximum size of the dictionary page for each output column chunk. Dictionary
-        encoding for column chunks that exceeds this limit will be disabled.
-        By default, 1048576 (1MB) will be used.
-    use_dictionary : bool, default True
-        If ``True``, enable dictionary encoding for Parquet page data
-        subject to ``max_dictionary_size`` constraints.
-        If ``False``, disable dictionary encoding for Parquet page data.
-    store_schema : bool, default False
-        If ``True``, enable computing and writing arrow schema to Parquet
-        file footer's key-value metadata section for faithful round-tripping.
-    See Also
-    --------
-    cudf.io.parquet.write_parquet
-    """
-    cdef bool initialized
-    cdef ParquetChunkedWriter writer
-    cdef SinkInfo sink
-    cdef TableInputMetadata tbl_meta
-    cdef str statistics
-    cdef object compression
-    cdef object index
-    cdef size_t row_group_size_bytes
-    cdef size_type row_group_size_rows
-    cdef size_t max_page_size_bytes
-    cdef size_type max_page_size_rows
-    cdef size_t max_dictionary_size
-    cdef bool use_dictionary
-    cdef bool write_arrow_schema
-
-    def __cinit__(self, object filepath_or_buffer, object index=None,
-                  object compression="snappy", str statistics="ROWGROUP",
-                  size_t row_group_size_bytes=_ROW_GROUP_SIZE_BYTES_DEFAULT,
-                  size_type row_group_size_rows=1000000,
-                  size_t max_page_size_bytes=524288,
-                  size_type max_page_size_rows=20000,
-                  size_t max_dictionary_size=1048576,
-                  bool use_dictionary=True,
-                  bool store_schema=False):
-        filepaths_or_buffers = (
-            list(filepath_or_buffer)
-            if is_list_like(filepath_or_buffer)
-            else [filepath_or_buffer]
-        )
-        self.sink = plc.io.SinkInfo(filepaths_or_buffers)
-        self.statistics = statistics
-        self.compression = compression
-        self.index = index
-        self.initialized = False
-        self.row_group_size_bytes = row_group_size_bytes
-        self.row_group_size_rows = row_group_size_rows
-        self.max_page_size_bytes = max_page_size_bytes
-        self.max_page_size_rows = max_page_size_rows
-        self.max_dictionary_size = max_dictionary_size
-        self.use_dictionary = use_dictionary
-        self.write_arrow_schema = store_schema
-
-    def write_table(self, table, object partitions_info=None):
-        """ Writes a single table to the file """
-        if not self.initialized:
-            self._initialize_chunked_state(
-                table,
-                num_partitions=len(partitions_info) if partitions_info else 1
-            )
-        if self.index is not False and (
-            table._index.name is not None or
-                isinstance(table._index, cudf.core.multiindex.MultiIndex)):
-            columns = [*table.index._columns, *table._columns]
-            plc_table = plc.Table([col.to_pylibcudf(mode="read") for col in columns])
-        else:
-            plc_table = plc.Table(
-                [col.to_pylibcudf(mode="read") for col in table._columns]
-            )
-        self.writer.write(plc_table, partitions_info)
-
-    def close(self, object metadata_file_path=None):
-        if not self.initialized:
-            return None
-        column_chunks_file_paths=[]
-        if metadata_file_path is not None:
-            if is_list_like(metadata_file_path):
-                column_chunks_file_paths = list(metadata_file_path)
-            else:
-                column_chunks_file_paths = [metadata_file_path]
-        blob = self.writer.close(column_chunks_file_paths)
-        if metadata_file_path is not None:
-            return np.asarray(blob.obj)
-        return None
-
-    def __enter__(self):
-        return self
-
-    def __exit__(self, *args):
-        self.close()
-
-    def _initialize_chunked_state(self, table, num_partitions=1):
-        """ Prepares all the values required to build the
-        chunked_parquet_writer_options and creates a writer"""
-
-        # Set the table_metadata
-        num_index_cols_meta = 0
-        plc_table = plc.Table(
-            [
-                col.to_pylibcudf(mode="read")
-                for col in table._columns
-            ]
-        )
-        self.tbl_meta = TableInputMetadata(plc_table)
-        if self.index is not False:
-            if isinstance(table._index, cudf.core.multiindex.MultiIndex):
-                plc_table = plc.Table(
-                    [
-                        col.to_pylibcudf(mode="read")
-                        for col in itertools.chain(table.index._columns, table._columns)
-                    ]
-                )
-                self.tbl_meta = TableInputMetadata(plc_table)
-                for level, idx_name in enumerate(table._index.names):
-                    self.tbl_meta.column_metadata[level].set_name(idx_name)
-                num_index_cols_meta = len(table._index.names)
-            else:
-                if table._index.name is not None:
-                    plc_table = plc.Table(
-                        [
-                            col.to_pylibcudf(mode="read")
-                            for col in itertools.chain(
-                                table.index._columns, table._columns
-                            )
-                        ]
-                    )
-                    self.tbl_meta = TableInputMetadata(plc_table)
-                    self.tbl_meta.column_metadata[0].set_name(table._index.name)
-                    num_index_cols_meta = 1
-
-        for i, name in enumerate(table._column_names, num_index_cols_meta):
-            self.tbl_meta.column_metadata[i].set_name(name)
-            _set_col_metadata(
-                table[name]._column,
-                self.tbl_meta.column_metadata[i],
-            )
-
-        index = (
-            False if isinstance(table._index, cudf.RangeIndex) else self.index
-        )
-        user_data = [{"pandas" : generate_pandas_metadata(table, index)}]*num_partitions
-        cdef compression_type comp_type = _get_comp_type(self.compression)
-        cdef statistics_freq stat_freq = _get_stat_freq(self.statistics)
-        cdef dictionary_policy dict_policy = (
-            plc.io.types.DictionaryPolicy.ADAPTIVE
-            if self.use_dictionary
-            else plc.io.types.DictionaryPolicy.NEVER
-        )
-        options = (
-            plc.io.parquet.ChunkedParquetWriterOptions.builder(self.sink)
-            .metadata(self.tbl_meta)
-            .key_value_metadata(user_data)
-            .compression(comp_type)
-            .stats_level(stat_freq)
-            .row_group_size_bytes(self.row_group_size_bytes)
-            .row_group_size_rows(self.row_group_size_rows)
-            .max_page_size_bytes(self.max_page_size_bytes)
-            .max_page_size_rows(self.max_page_size_rows)
-            .max_dictionary_size(self.max_dictionary_size)
-            .write_arrow_schema(self.write_arrow_schema)
-            .build()
-        )
-        options.set_dictionary_policy(dict_policy)
-        self.writer = plc.io.parquet.ParquetChunkedWriter.from_options(options)
-        self.initialized = True
-
-
-cpdef merge_filemetadata(object filemetadata_list):
-    """
-    Cython function to call into libcudf API, see `merge_row_group_metadata`.
-
-    See Also
-    --------
-    cudf.io.parquet.merge_row_group_metadata
-    """
-    return np.asarray(
-        plc.io.parquet.merge_row_group_metadata(filemetadata_list).obj
-    )
-
-
-cdef statistics_freq _get_stat_freq(str statistics):
-    result = getattr(
-        plc.io.types.StatisticsFreq,
-        f"STATISTICS_{statistics.upper()}",
-        None
-    )
-    if result is None:
-        raise ValueError("Unsupported `statistics_freq` type")
-    return result
-
-
-cdef compression_type _get_comp_type(object compression):
-    if compression is None:
-        return plc.io.types.CompressionType.NONE
-    result = getattr(
-        plc.io.types.CompressionType,
-        str(compression).upper(),
-        None
-    )
-    if result is None:
-        raise ValueError("Unsupported `compression` type")
-    return result
-
-
-cdef _set_col_metadata(
-    Column col,
-    ColumnInMetadata col_meta,
-    bool force_nullable_schema=False,
-    str path=None,
-    object skip_compression=None,
-    object column_encoding=None,
-    object column_type_length=None,
-    object output_as_binary=None,
-):
-    need_path = (skip_compression is not None or column_encoding is not None or
-                 column_type_length is not None or output_as_binary is not None)
-    name = col_meta.get_name() if need_path else None
-    full_path = path + "." + name if path is not None else name
-
-    if force_nullable_schema:
-        # Only set nullability if `force_nullable_schema`
-        # is true.
-        col_meta.set_nullability(True)
-
-    if skip_compression is not None and full_path in skip_compression:
-        col_meta.set_skip_compression(True)
-
-    if column_encoding is not None and full_path in column_encoding:
-        encoding = column_encoding[full_path]
-        if encoding is None:
-            c_encoding = plc.io.types.ColumnEncoding.USE_DEFAULT
-        else:
-            enc = str(encoding).upper()
-            c_encoding = getattr(plc.io.types.ColumnEncoding, enc, None)
-            if c_encoding is None:
-                raise ValueError("Unsupported `column_encoding` type")
-        col_meta.set_encoding(c_encoding)
-
-    if column_type_length is not None and full_path in column_type_length:
-        col_meta.set_output_as_binary(True)
-        col_meta.set_type_length(column_type_length[full_path])
-
-    if output_as_binary is not None and full_path in output_as_binary:
-        col_meta.set_output_as_binary(True)
-
-    if isinstance(col.dtype, cudf.StructDtype):
-        for i, (child_col, name) in enumerate(
-            zip(col.children, list(col.dtype.fields))
-        ):
-            col_meta.child(i).set_name(name)
-            _set_col_metadata(
-                child_col,
-                col_meta.child(i),
-                force_nullable_schema,
-                full_path,
-                skip_compression,
-                column_encoding,
-                column_type_length,
-                output_as_binary
-            )
-    elif isinstance(col.dtype, cudf.ListDtype):
-        if full_path is not None:
-            full_path = full_path + ".list"
-            col_meta.child(1).set_name("element")
-        _set_col_metadata(
-            col.children[1],
-            col_meta.child(1),
-            force_nullable_schema,
-            full_path,
-            skip_compression,
-            column_encoding,
-            column_type_length,
-            output_as_binary
-        )
-    elif isinstance(col.dtype, cudf.core.dtypes.DecimalDtype):
-        col_meta.set_decimal_precision(col.dtype.precision)
diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py
index 2382e9f12ed..66095d4a155 100644
--- a/python/cudf/cudf/io/parquet.py
+++ b/python/cudf/cudf/io/parquet.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2019-2024, NVIDIA CORPORATION.
 from __future__ import annotations
 
+import io
 import itertools
 import math
 import operator
@@ -10,23 +11,42 @@
 from collections import defaultdict
 from contextlib import ExitStack
 from functools import partial, reduce
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any, Literal
 from uuid import uuid4
 
 import numpy as np
 import pandas as pd
+import pyarrow as pa
 from pyarrow import dataset as ds
 
+import pylibcudf as plc
+
 import cudf
-from cudf._lib import parquet as libparquet
+from cudf._lib.column import Column
+from cudf._lib.utils import (
+    _data_from_columns,
+    _index_level_name,
+    data_from_pylibcudf_io,
+    generate_pandas_metadata,
+)
 from cudf.api.types import is_list_like
+from cudf.core.buffer import acquire_spill_lock
 from cudf.core.column import as_column, column_empty
 from cudf.core.column.categorical import CategoricalColumn, as_unsigned_codes
 from cudf.utils import ioutils
 from cudf.utils.performance_tracking import _performance_tracking
 
+try:
+    import ujson as json  # type: ignore[import-untyped]
+except ImportError:
+    import json
+
 if TYPE_CHECKING:
-    from collections.abc import Callable
+    from collections.abc import Callable, Hashable
+
+    from typing_extensions import Self
+
+    from cudf.core.column import ColumnBase
 
 
 BYTE_SIZES = {
@@ -55,31 +75,200 @@
 }
 
 
+@acquire_spill_lock()
+def _plc_write_parquet(
+    table,
+    filepaths_or_buffers,
+    index: bool | None = None,
+    compression: Literal["snappy", "ZSTD", "ZLIB", "LZ4", None] = "snappy",
+    statistics: Literal["ROWGROUP", "PAGE", "COLUMN", "NONE"] = "ROWGROUP",
+    metadata_file_path: str | None = None,
+    int96_timestamps: bool = False,
+    row_group_size_bytes: int | None = None,
+    row_group_size_rows: int | None = None,
+    max_page_size_bytes: int | None = None,
+    max_page_size_rows: int | None = None,
+    max_dictionary_size: int | None = None,
+    partitions_info=None,
+    force_nullable_schema: bool = False,
+    header_version: Literal["1.0", "2.0"] = "1.0",
+    use_dictionary: bool = True,
+    skip_compression: set[Hashable] | None = None,
+    column_encoding: dict[
+        Hashable,
+        Literal[
+            "PLAIN",
+            "DICTIONARY",
+            "DELTA_BINARY_PACKED",
+            "DELTA_LENGTH_BYTE_ARRAY",
+            "DELTA_BYTE_ARRAY",
+            "BYTE_STREAM_SPLIT",
+            "USE_DEFAULT",
+        ],
+    ]
+    | None = None,
+    column_type_length: dict | None = None,
+    output_as_binary: set[Hashable] | None = None,
+    write_arrow_schema: bool = False,
+) -> np.ndarray | None:
+    """
+    Cython function to call into libcudf API, see `write_parquet`.
+
+    See Also
+    --------
+    cudf.io.parquet.write_parquet
+    """
+    if index is True or (
+        index is None and not isinstance(table.index, cudf.RangeIndex)
+    ):
+        columns = itertools.chain(table.index._columns, table._columns)
+        plc_table = plc.Table(
+            [col.to_pylibcudf(mode="read") for col in columns]
+        )
+        tbl_meta = plc.io.types.TableInputMetadata(plc_table)
+        for level, idx_name in enumerate(table.index.names):
+            tbl_meta.column_metadata[level].set_name(
+                _index_level_name(idx_name, level, table._column_names)
+            )
+        num_index_cols_meta = len(table.index.names)
+    else:
+        plc_table = plc.Table(
+            [col.to_pylibcudf(mode="read") for col in table._columns]
+        )
+        tbl_meta = plc.io.types.TableInputMetadata(plc_table)
+        num_index_cols_meta = 0
+
+    for i, name in enumerate(table._column_names, num_index_cols_meta):
+        if not isinstance(name, str):
+            if cudf.get_option("mode.pandas_compatible"):
+                tbl_meta.column_metadata[i].set_name(str(name))
+            else:
+                raise ValueError(
+                    "Writing a Parquet file requires string column names"
+                )
+        else:
+            tbl_meta.column_metadata[i].set_name(name)
+
+        _set_col_metadata(
+            table[name]._column,
+            tbl_meta.column_metadata[i],
+            force_nullable_schema,
+            None,
+            skip_compression,
+            column_encoding,
+            column_type_length,
+            output_as_binary,
+        )
+    if partitions_info is not None:
+        user_data = [
+            {
+                "pandas": generate_pandas_metadata(
+                    table.iloc[start_row : start_row + num_row].copy(
+                        deep=False
+                    ),
+                    index,
+                )
+            }
+            for start_row, num_row in partitions_info
+        ]
+    else:
+        user_data = [{"pandas": generate_pandas_metadata(table, index)}]
+
+    if header_version not in ("1.0", "2.0"):
+        raise ValueError(
+            f"Invalid parquet header version: {header_version}. "
+            "Valid values are '1.0' and '2.0'"
+        )
+
+    dict_policy = (
+        plc.io.types.DictionaryPolicy.ADAPTIVE
+        if use_dictionary
+        else plc.io.types.DictionaryPolicy.NEVER
+    )
+
+    comp_type = _get_comp_type(compression)
+    stat_freq = _get_stat_freq(statistics)
+    options = (
+        plc.io.parquet.ParquetWriterOptions.builder(
+            plc.io.SinkInfo(filepaths_or_buffers), plc_table
+        )
+        .metadata(tbl_meta)
+        .key_value_metadata(user_data)
+        .compression(comp_type)
+        .stats_level(stat_freq)
+        .int96_timestamps(int96_timestamps)
+        .write_v2_headers(header_version == "2.0")
+        .dictionary_policy(dict_policy)
+        .utc_timestamps(False)
+        .write_arrow_schema(write_arrow_schema)
+        .build()
+    )
+    if partitions_info is not None:
+        options.set_partitions(
+            [
+                plc.io.types.PartitionInfo(part[0], part[1])
+                for part in partitions_info
+            ]
+        )
+    if metadata_file_path is not None:
+        if is_list_like(metadata_file_path):
+            options.set_column_chunks_file_paths(metadata_file_path)
+        else:
+            options.set_column_chunks_file_paths([metadata_file_path])
+    if row_group_size_bytes is not None:
+        options.set_row_group_size_bytes(row_group_size_bytes)
+    if row_group_size_rows is not None:
+        options.set_row_group_size_rows(row_group_size_rows)
+    if max_page_size_bytes is not None:
+        options.set_max_page_size_bytes(max_page_size_bytes)
+    if max_page_size_rows is not None:
+        options.set_max_page_size_rows(max_page_size_rows)
+    if max_dictionary_size is not None:
+        options.set_max_dictionary_size(max_dictionary_size)
+    blob = plc.io.parquet.write_parquet(options)
+    if metadata_file_path is not None:
+        return np.asarray(blob.obj)
+    else:
+        return None
+
+
 @_performance_tracking
 def _write_parquet(
     df,
     paths,
-    compression="snappy",
-    index=None,
-    statistics="ROWGROUP",
-    metadata_file_path=None,
-    int96_timestamps=False,
-    row_group_size_bytes=None,
-    row_group_size_rows=None,
-    max_page_size_bytes=None,
-    max_page_size_rows=None,
-    max_dictionary_size=None,
+    compression: Literal["snappy", "ZSTD", "ZLIB", "LZ4", None] = "snappy",
+    index: bool | None = None,
+    statistics: Literal["ROWGROUP", "PAGE", "COLUMN", "NONE"] = "ROWGROUP",
+    metadata_file_path: str | None = None,
+    int96_timestamps: bool = False,
+    row_group_size_bytes: int | None = None,
+    row_group_size_rows: int | None = None,
+    max_page_size_bytes: int | None = None,
+    max_page_size_rows: int | None = None,
+    max_dictionary_size: int | None = None,
     partitions_info=None,
     storage_options=None,
-    force_nullable_schema=False,
-    header_version="1.0",
-    use_dictionary=True,
-    skip_compression=None,
-    column_encoding=None,
-    column_type_length=None,
-    output_as_binary=None,
-    write_arrow_schema=True,
-):
+    force_nullable_schema: bool = False,
+    header_version: Literal["1.0", "2.0"] = "1.0",
+    use_dictionary: bool = True,
+    skip_compression: set[Hashable] | None = None,
+    column_encoding: dict[
+        Hashable,
+        Literal[
+            "PLAIN",
+            "DICTIONARY",
+            "DELTA_BINARY_PACKED",
+            "DELTA_LENGTH_BYTE_ARRAY",
+            "DELTA_BYTE_ARRAY",
+            "BYTE_STREAM_SPLIT",
+            "USE_DEFAULT",
+        ],
+    ]
+    | None = None,
+    column_type_length: dict | None = None,
+    output_as_binary: set[Hashable] | None = None,
+    write_arrow_schema: bool = True,
+) -> np.ndarray | None:
     if is_list_like(paths) and len(paths) > 1:
         if partitions_info is None:
             ValueError("partition info is required for multiple paths")
@@ -124,11 +313,11 @@ def _write_parquet(
             file_objs = [
                 ioutils.get_IOBase_writer(file_obj) for file_obj in fsspec_objs
             ]
-            write_parquet_res = libparquet.write_parquet(
+            write_parquet_res = _plc_write_parquet(
                 df, filepaths_or_buffers=file_objs, **common_args
             )
     else:
-        write_parquet_res = libparquet.write_parquet(
+        write_parquet_res = _plc_write_parquet(
             df, filepaths_or_buffers=paths_or_bufs, **common_args
         )
 
@@ -141,26 +330,38 @@ def _write_parquet(
 def write_to_dataset(
     df,
     root_path,
-    compression="snappy",
+    compression: Literal["snappy", "ZSTD", "ZLIB", "LZ4", None] = "snappy",
     filename=None,
     partition_cols=None,
     fs=None,
-    preserve_index=False,
-    return_metadata=False,
-    statistics="ROWGROUP",
-    int96_timestamps=False,
-    row_group_size_bytes=None,
-    row_group_size_rows=None,
-    max_page_size_bytes=None,
-    max_page_size_rows=None,
+    preserve_index: bool = False,
+    return_metadata: bool = False,
+    statistics: Literal["ROWGROUP", "PAGE", "COLUMN", "NONE"] = "ROWGROUP",
+    int96_timestamps: bool = False,
+    row_group_size_bytes: int | None = None,
+    row_group_size_rows: int | None = None,
+    max_page_size_bytes: int | None = None,
+    max_page_size_rows: int | None = None,
     storage_options=None,
-    force_nullable_schema=False,
-    header_version="1.0",
-    use_dictionary=True,
-    skip_compression=None,
-    column_encoding=None,
-    column_type_length=None,
-    output_as_binary=None,
+    force_nullable_schema: bool = False,
+    header_version: Literal["1.0", "2.0"] = "1.0",
+    use_dictionary: bool = True,
+    skip_compression: set[Hashable] | None = None,
+    column_encoding: dict[
+        Hashable,
+        Literal[
+            "PLAIN",
+            "DICTIONARY",
+            "DELTA_BINARY_PACKED",
+            "DELTA_LENGTH_BYTE_ARRAY",
+            "DELTA_BYTE_ARRAY",
+            "BYTE_STREAM_SPLIT",
+            "USE_DEFAULT",
+        ],
+    ]
+    | None = None,
+    column_type_length: dict | None = None,
+    output_as_binary: set[Hashable] | None = None,
     store_schema=False,
 ):
     """Wraps `to_parquet` to write partitioned Parquet datasets.
@@ -330,9 +531,29 @@ def write_to_dataset(
     return metadata
 
 
+def _parse_metadata(meta) -> tuple[bool, Any, Any]:
+    file_is_range_index = False
+    file_index_cols = None
+    file_column_dtype = None
+
+    if "index_columns" in meta and len(meta["index_columns"]) > 0:
+        file_index_cols = meta["index_columns"]
+
+        if (
+            isinstance(file_index_cols[0], dict)
+            and file_index_cols[0]["kind"] == "range"
+        ):
+            file_is_range_index = True
+    if "column_indexes" in meta and len(meta["column_indexes"]) == 1:
+        file_column_dtype = meta["column_indexes"][0]["numpy_type"]
+    return file_is_range_index, file_index_cols, file_column_dtype
+
+
 @ioutils.doc_read_parquet_metadata()
 @_performance_tracking
-def read_parquet_metadata(filepath_or_buffer):
+def read_parquet_metadata(
+    filepath_or_buffer,
+) -> tuple[int, int, list[Hashable], int, list[dict[str, int]]]:
     """{docstring}"""
 
     # List of filepaths or buffers
@@ -341,7 +562,39 @@ def read_parquet_metadata(filepath_or_buffer):
         bytes_per_thread=None,
     )
 
-    return libparquet.read_parquet_metadata(filepaths_or_buffers)
+    parquet_metadata = plc.io.parquet_metadata.read_parquet_metadata(
+        plc.io.SourceInfo(filepaths_or_buffers)
+    )
+
+    # read all column names including index column, if any
+    col_names = [
+        info.name() for info in parquet_metadata.schema().root().children()
+    ]
+
+    index_col_names = set()
+    json_str = parquet_metadata.metadata()["pandas"]
+    if json_str != "":
+        meta = json.loads(json_str)
+        file_is_range_index, index_col, _ = _parse_metadata(meta)
+        if not file_is_range_index and index_col is not None:
+            columns = meta["columns"]
+            for idx_col in index_col:
+                for c in columns:
+                    if c["field_name"] == idx_col:
+                        index_col_names.add(idx_col)
+
+    # remove the index column from the list of column names
+    # only if index_col_names is not None
+    if len(index_col_names) >= 0:
+        col_names = [name for name in col_names if name not in index_col_names]
+
+    return (
+        parquet_metadata.num_rows(),
+        parquet_metadata.num_rowgroups(),
+        col_names,
+        len(col_names),
+        parquet_metadata.rowgroup_metadata(),
+    )
 
 
 @_performance_tracking
@@ -913,16 +1166,18 @@ def _read_parquet(
     columns=None,
     row_groups=None,
     use_pandas_metadata=None,
-    nrows=None,
-    skip_rows=None,
-    allow_mismatched_pq_schemas=False,
+    nrows: int | None = None,
+    skip_rows: int | None = None,
+    allow_mismatched_pq_schemas: bool = False,
     *args,
     **kwargs,
-):
+) -> cudf.DataFrame:
     # Simple helper function to dispatch between
     # cudf and pyarrow to read parquet data
     if engine == "cudf":
-        if kwargs:
+        if set(kwargs.keys()).difference(
+            set(("_chunk_read_limit", "_pass_read_limit"))
+        ):
             raise ValueError(
                 "cudf engine doesn't support the "
                 f"following keyword arguments: {list(kwargs.keys())}"
@@ -932,30 +1187,123 @@ def _read_parquet(
                 "cudf engine doesn't support the "
                 f"following positional arguments: {list(args)}"
             )
+        if nrows is None:
+            nrows = -1
+        if skip_rows is None:
+            skip_rows = 0
         if cudf.get_option("io.parquet.low_memory"):
-            return libparquet.read_parquet_chunked(
+            # Note: If this function ever takes accepts filters
+            # allow_range_index needs to be False when a filter is passed
+            # (see read_parquet)
+            allow_range_index = columns is not None and len(columns) != 0
+
+            options = (
+                plc.io.parquet.ParquetReaderOptions.builder(
+                    plc.io.SourceInfo(filepaths_or_buffers)
+                )
+                .use_pandas_metadata(use_pandas_metadata)
+                .allow_mismatched_pq_schemas(allow_mismatched_pq_schemas)
+                .build()
+            )
+            if row_groups is not None:
+                options.set_row_groups(row_groups)
+            if nrows > -1:
+                options.set_num_rows(nrows)
+            if skip_rows != 0:
+                options.set_skip_rows(skip_rows)
+            if columns is not None:
+                options.set_columns(columns)
+
+            reader = plc.io.parquet.ChunkedParquetReader(
+                options,
+                chunk_read_limit=kwargs.get("_chunk_read_limit", 0),
+                pass_read_limit=kwargs.get("_pass_read_limit", 1024000000),
+            )
+
+            tbl_w_meta = reader.read_chunk()
+            column_names = tbl_w_meta.column_names(include_children=False)
+            child_names = tbl_w_meta.child_names
+            per_file_user_data = tbl_w_meta.per_file_user_data
+            concatenated_columns = tbl_w_meta.tbl.columns()
+
+            # save memory
+            del tbl_w_meta
+
+            while reader.has_next():
+                tbl = reader.read_chunk().tbl
+
+                for i in range(tbl.num_columns()):
+                    concatenated_columns[i] = plc.concatenate.concatenate(
+                        [concatenated_columns[i], tbl._columns[i]]
+                    )
+                    # Drop residual columns to save memory
+                    tbl._columns[i] = None
+
+            df = cudf.DataFrame._from_data(
+                *_data_from_columns(
+                    columns=[
+                        Column.from_pylibcudf(plc)
+                        for plc in concatenated_columns
+                    ],
+                    column_names=column_names,
+                    index_names=None,
+                )
+            )
+            df = _process_metadata(
+                df,
+                column_names,
+                child_names,
+                per_file_user_data,
+                row_groups,
                 filepaths_or_buffers,
-                columns=columns,
-                row_groups=row_groups,
-                use_pandas_metadata=use_pandas_metadata,
-                nrows=nrows if nrows is not None else -1,
-                skip_rows=skip_rows if skip_rows is not None else 0,
-                allow_mismatched_pq_schemas=allow_mismatched_pq_schemas,
+                allow_range_index,
+                use_pandas_metadata,
+                nrows=nrows,
+                skip_rows=skip_rows,
             )
+            return df
         else:
-            if nrows is None:
-                nrows = -1
-            if skip_rows is None:
-                skip_rows = 0
-            return libparquet.read_parquet(
+            allow_range_index = True
+            filters = kwargs.get("filters", None)
+            if columns is not None and len(columns) == 0 or filters:
+                allow_range_index = False
+
+            options = (
+                plc.io.parquet.ParquetReaderOptions.builder(
+                    plc.io.SourceInfo(filepaths_or_buffers)
+                )
+                .use_pandas_metadata(use_pandas_metadata)
+                .allow_mismatched_pq_schemas(allow_mismatched_pq_schemas)
+                .build()
+            )
+            if row_groups is not None:
+                options.set_row_groups(row_groups)
+            if nrows > -1:
+                options.set_num_rows(nrows)
+            if skip_rows != 0:
+                options.set_skip_rows(skip_rows)
+            if columns is not None:
+                options.set_columns(columns)
+            if filters is not None:
+                options.set_filter(filters)
+
+            tbl_w_meta = plc.io.parquet.read_parquet(options)
+
+            df = cudf.DataFrame._from_data(*data_from_pylibcudf_io(tbl_w_meta))
+
+            df = _process_metadata(
+                df,
+                tbl_w_meta.column_names(include_children=False),
+                tbl_w_meta.child_names,
+                tbl_w_meta.per_file_user_data,
+                row_groups,
                 filepaths_or_buffers,
-                columns=columns,
-                row_groups=row_groups,
-                use_pandas_metadata=use_pandas_metadata,
+                allow_range_index,
+                use_pandas_metadata,
                 nrows=nrows,
                 skip_rows=skip_rows,
-                allow_mismatched_pq_schemas=allow_mismatched_pq_schemas,
             )
+            return df
     else:
         if (
             isinstance(filepaths_or_buffers, list)
@@ -980,28 +1328,40 @@ def to_parquet(
     df,
     path,
     engine="cudf",
-    compression="snappy",
-    index=None,
+    compression: Literal["snappy", "ZSTD", "ZLIB", "LZ4", None] = "snappy",
+    index: bool | None = None,
     partition_cols=None,
     partition_file_name=None,
     partition_offsets=None,
-    statistics="ROWGROUP",
-    metadata_file_path=None,
-    int96_timestamps=False,
-    row_group_size_bytes=None,
-    row_group_size_rows=None,
-    max_page_size_bytes=None,
-    max_page_size_rows=None,
-    max_dictionary_size=None,
+    statistics: Literal["ROWGROUP", "PAGE", "COLUMN", "NONE"] = "ROWGROUP",
+    metadata_file_path: str | None = None,
+    int96_timestamps: bool = False,
+    row_group_size_bytes: int | None = None,
+    row_group_size_rows: int | None = None,
+    max_page_size_bytes: int | None = None,
+    max_page_size_rows: int | None = None,
+    max_dictionary_size: int | None = None,
     storage_options=None,
-    return_metadata=False,
-    force_nullable_schema=False,
-    header_version="1.0",
-    use_dictionary=True,
-    skip_compression=None,
-    column_encoding=None,
-    column_type_length=None,
-    output_as_binary=None,
+    return_metadata: bool = False,
+    force_nullable_schema: bool = False,
+    header_version: Literal["1.0", "2.0"] = "1.0",
+    use_dictionary: bool = True,
+    skip_compression: set[Hashable] | None = None,
+    column_encoding: dict[
+        Hashable,
+        Literal[
+            "PLAIN",
+            "DICTIONARY",
+            "DELTA_BINARY_PACKED",
+            "DELTA_LENGTH_BYTE_ARRAY",
+            "DELTA_BYTE_ARRAY",
+            "BYTE_STREAM_SPLIT",
+            "USE_DEFAULT",
+        ],
+    ]
+    | None = None,
+    column_type_length: dict | None = None,
+    output_as_binary: set[Hashable] | None = None,
     store_schema=False,
     *args,
     **kwargs,
@@ -1114,10 +1474,11 @@ def to_parquet(
 
 
 @ioutils.doc_merge_parquet_filemetadata()
-def merge_parquet_filemetadata(filemetadata_list):
+def merge_parquet_filemetadata(filemetadata_list: list) -> np.ndarray:
     """{docstring}"""
-
-    return libparquet.merge_filemetadata(filemetadata_list)
+    return np.asarray(
+        plc.io.parquet.merge_row_group_metadata(filemetadata_list).obj
+    )
 
 
 def _generate_filename():
@@ -1205,10 +1566,207 @@ def _get_groups_and_offsets(
     return part_names, grouped_df, part_offsets
 
 
-ParquetWriter = libparquet.ParquetWriter
+class ParquetWriter:
+    """
+    ParquetWriter lets you incrementally write out a Parquet file from a series
+    of cudf tables
+
+    Parameters
+    ----------
+    filepath_or_buffer : str, io.IOBase, os.PathLike, or list
+        File path or buffer to write to. The argument may also correspond
+        to a list of file paths or buffers.
+    index : bool or None, default None
+        If ``True``, include a dataframe's index(es) in the file output.
+        If ``False``, they will not be written to the file. If ``None``,
+        index(es) other than RangeIndex will be saved as columns.
+    compression : {'snappy', None}, default 'snappy'
+        Name of the compression to use. Use ``None`` for no compression.
+    statistics : {'ROWGROUP', 'PAGE', 'COLUMN', 'NONE'}, default 'ROWGROUP'
+        Level at which column statistics should be included in file.
+    row_group_size_bytes: int, default ``uint64 max``
+        Maximum size of each stripe of the output.
+        By default, a virtually infinite size equal to ``uint64 max`` will be used.
+    row_group_size_rows: int, default 1000000
+        Maximum number of rows of each stripe of the output.
+        By default, 1000000 (10^6 rows) will be used.
+    max_page_size_bytes: int, default 524288
+        Maximum uncompressed size of each page of the output.
+        By default, 524288 (512KB) will be used.
+    max_page_size_rows: int, default 20000
+        Maximum number of rows of each page of the output.
+        By default, 20000 will be used.
+    max_dictionary_size: int, default 1048576
+        Maximum size of the dictionary page for each output column chunk. Dictionary
+        encoding for column chunks that exceeds this limit will be disabled.
+        By default, 1048576 (1MB) will be used.
+    use_dictionary : bool, default True
+        If ``True``, enable dictionary encoding for Parquet page data
+        subject to ``max_dictionary_size`` constraints.
+        If ``False``, disable dictionary encoding for Parquet page data.
+    store_schema : bool, default False
+        If ``True``, enable computing and writing arrow schema to Parquet
+        file footer's key-value metadata section for faithful round-tripping.
+
+    See Also
+    --------
+    cudf.io.parquet.write_parquet
+    """
+
+    def __init__(
+        self,
+        filepath_or_buffer,
+        index: bool | None = None,
+        compression: Literal["snappy", "ZSTD", "ZLIB", "LZ4", None] = "snappy",
+        statistics: Literal["ROWGROUP", "PAGE", "COLUMN", "NONE"] = "ROWGROUP",
+        row_group_size_bytes: int = int(np.iinfo(np.uint64).max),
+        row_group_size_rows: int = 1000000,
+        max_page_size_bytes: int = 524288,
+        max_page_size_rows: int = 20000,
+        max_dictionary_size: int = 1048576,
+        use_dictionary: bool = True,
+        store_schema: bool = False,
+    ):
+        filepaths_or_buffers = (
+            list(filepath_or_buffer)
+            if is_list_like(filepath_or_buffer)
+            else [filepath_or_buffer]
+        )
+        self.sink = plc.io.SinkInfo(filepaths_or_buffers)
+        self.statistics = statistics
+        self.compression = compression
+        self.index = index
+        self.initialized = False
+        self.row_group_size_bytes = row_group_size_bytes
+        self.row_group_size_rows = row_group_size_rows
+        self.max_page_size_bytes = max_page_size_bytes
+        self.max_page_size_rows = max_page_size_rows
+        self.max_dictionary_size = max_dictionary_size
+        self.use_dictionary = use_dictionary
+        self.write_arrow_schema = store_schema
+
+    def write_table(self, table, partitions_info=None) -> None:
+        """Writes a single table to the file"""
+        if not self.initialized:
+            self._initialize_chunked_state(
+                table,
+                num_partitions=len(partitions_info) if partitions_info else 1,
+            )
+        if self.index is not False and (
+            table.index.name is not None
+            or isinstance(table.index, cudf.MultiIndex)
+        ):
+            columns = itertools.chain(table.index._columns, table._columns)
+            plc_table = plc.Table(
+                [col.to_pylibcudf(mode="read") for col in columns]
+            )
+        else:
+            plc_table = plc.Table(
+                [col.to_pylibcudf(mode="read") for col in table._columns]
+            )
+        self.writer.write(plc_table, partitions_info)
+
+    def close(self, metadata_file_path=None) -> np.ndarray | None:
+        if not self.initialized:
+            return None
+        column_chunks_file_paths = []
+        if metadata_file_path is not None:
+            if is_list_like(metadata_file_path):
+                column_chunks_file_paths = list(metadata_file_path)
+            else:
+                column_chunks_file_paths = [metadata_file_path]
+        blob = self.writer.close(column_chunks_file_paths)
+        if metadata_file_path is not None:
+            return np.asarray(blob.obj)
+        return None
+
+    def __enter__(self) -> Self:
+        return self
+
+    def __exit__(self, *args) -> None:
+        self.close()
+
+    def _initialize_chunked_state(
+        self, table, num_partitions: int = 1
+    ) -> None:
+        """Prepares all the values required to build the
+        chunked_parquet_writer_options and creates a writer
+        """
 
+        # Set the table_metadata
+        num_index_cols_meta = 0
+        plc_table = plc.Table(
+            [col.to_pylibcudf(mode="read") for col in table._columns]
+        )
+        self.tbl_meta = plc.io.types.TableInputMetadata(plc_table)
+        if self.index is not False:
+            if isinstance(table.index, cudf.MultiIndex):
+                plc_table = plc.Table(
+                    [
+                        col.to_pylibcudf(mode="read")
+                        for col in itertools.chain(
+                            table.index._columns, table._columns
+                        )
+                    ]
+                )
+                self.tbl_meta = plc.io.types.TableInputMetadata(plc_table)
+                for level, idx_name in enumerate(table.index.names):
+                    self.tbl_meta.column_metadata[level].set_name(idx_name)
+                num_index_cols_meta = len(table.index.names)
+            else:
+                if table.index.name is not None:
+                    plc_table = plc.Table(
+                        [
+                            col.to_pylibcudf(mode="read")
+                            for col in itertools.chain(
+                                table.index._columns, table._columns
+                            )
+                        ]
+                    )
+                    self.tbl_meta = plc.io.types.TableInputMetadata(plc_table)
+                    self.tbl_meta.column_metadata[0].set_name(table.index.name)
+                    num_index_cols_meta = 1
+
+        for i, name in enumerate(table._column_names, num_index_cols_meta):
+            self.tbl_meta.column_metadata[i].set_name(name)
+            _set_col_metadata(
+                table[name]._column,
+                self.tbl_meta.column_metadata[i],
+            )
 
-def _parse_bytes(s):
+        index = (
+            False if isinstance(table.index, cudf.RangeIndex) else self.index
+        )
+        user_data = [
+            {"pandas": generate_pandas_metadata(table, index)}
+        ] * num_partitions
+        comp_type = _get_comp_type(self.compression)
+        stat_freq = _get_stat_freq(self.statistics)
+        dict_policy = (
+            plc.io.types.DictionaryPolicy.ADAPTIVE
+            if self.use_dictionary
+            else plc.io.types.DictionaryPolicy.NEVER
+        )
+        options = (
+            plc.io.parquet.ChunkedParquetWriterOptions.builder(self.sink)
+            .metadata(self.tbl_meta)
+            .key_value_metadata(user_data)
+            .compression(comp_type)
+            .stats_level(stat_freq)
+            .row_group_size_bytes(self.row_group_size_bytes)
+            .row_group_size_rows(self.row_group_size_rows)
+            .max_page_size_bytes(self.max_page_size_bytes)
+            .max_page_size_rows(self.max_page_size_rows)
+            .max_dictionary_size(self.max_dictionary_size)
+            .write_arrow_schema(self.write_arrow_schema)
+            .build()
+        )
+        options.set_dictionary_policy(dict_policy)
+        self.writer = plc.io.parquet.ParquetChunkedWriter.from_options(options)
+        self.initialized = True
+
+
+def _parse_bytes(s: str) -> int:
     """Parse byte string to numbers
 
     Utility function vendored from Dask.
@@ -1345,8 +1903,8 @@ def __init__(
         path,
         partition_cols,
         index=None,
-        compression="snappy",
-        statistics="ROWGROUP",
+        compression: Literal["snappy", "ZSTD", "ZLIB", "LZ4", None] = "snappy",
+        statistics: Literal["ROWGROUP", "PAGE", "COLUMN", "NONE"] = "ROWGROUP",
         max_file_size=None,
         file_name_prefix=None,
         storage_options=None,
@@ -1370,9 +1928,7 @@ def __init__(
         self.partition_cols = partition_cols
         # Collection of `ParquetWriter`s, and the corresponding
         # partition_col values they're responsible for
-        self._chunked_writers: list[
-            tuple[libparquet.ParquetWriter, list[str], str]
-        ] = []
+        self._chunked_writers: list[tuple[ParquetWriter, list[str], str]] = []
         # Map of partition_col values to their ParquetWriter's index
         # in self._chunked_writers for reverse lookup
         self.path_cw_map: dict[str, int] = {}
@@ -1563,3 +2119,257 @@ def _hive_dirname(name, val):
     if pd.isna(val):
         val = "__HIVE_DEFAULT_PARTITION__"
     return f"{name}={val}"
+
+
+def _set_col_metadata(
+    col: ColumnBase,
+    col_meta: plc.io.types.ColumnInMetadata,
+    force_nullable_schema: bool = False,
+    path: str | None = None,
+    skip_compression: set[Hashable] | None = None,
+    column_encoding: dict[
+        Hashable,
+        Literal[
+            "PLAIN",
+            "DICTIONARY",
+            "DELTA_BINARY_PACKED",
+            "DELTA_LENGTH_BYTE_ARRAY",
+            "DELTA_BYTE_ARRAY",
+            "BYTE_STREAM_SPLIT",
+            "USE_DEFAULT",
+        ],
+    ]
+    | None = None,
+    column_type_length: dict | None = None,
+    output_as_binary: set[Hashable] | None = None,
+) -> None:
+    need_path = (
+        skip_compression is not None
+        or column_encoding is not None
+        or column_type_length is not None
+        or output_as_binary is not None
+    )
+    name = col_meta.get_name() if need_path else None
+    full_path = (
+        path + "." + name if (path is not None and name is not None) else name
+    )
+
+    if force_nullable_schema:
+        # Only set nullability if `force_nullable_schema`
+        # is true.
+        col_meta.set_nullability(True)
+
+    if skip_compression is not None and full_path in skip_compression:
+        col_meta.set_skip_compression(True)
+
+    if column_encoding is not None and full_path in column_encoding:
+        encoding = column_encoding[full_path]
+        if encoding is None:
+            c_encoding = plc.io.types.ColumnEncoding.USE_DEFAULT
+        else:
+            enc = str(encoding).upper()
+            c_encoding = getattr(plc.io.types.ColumnEncoding, enc, None)
+            if c_encoding is None:
+                raise ValueError("Unsupported `column_encoding` type")
+        col_meta.set_encoding(c_encoding)
+
+    if column_type_length is not None and full_path in column_type_length:
+        col_meta.set_output_as_binary(True)
+        col_meta.set_type_length(column_type_length[full_path])
+
+    if output_as_binary is not None and full_path in output_as_binary:
+        col_meta.set_output_as_binary(True)
+
+    if isinstance(col.dtype, cudf.StructDtype):
+        for i, (child_col, name) in enumerate(
+            zip(col.children, list(col.dtype.fields))
+        ):
+            col_meta.child(i).set_name(name)
+            _set_col_metadata(
+                child_col,
+                col_meta.child(i),
+                force_nullable_schema,
+                full_path,
+                skip_compression,
+                column_encoding,
+                column_type_length,
+                output_as_binary,
+            )
+    elif isinstance(col.dtype, cudf.ListDtype):
+        if full_path is not None:
+            full_path = full_path + ".list"
+            col_meta.child(1).set_name("element")
+        _set_col_metadata(
+            col.children[1],
+            col_meta.child(1),
+            force_nullable_schema,
+            full_path,
+            skip_compression,
+            column_encoding,
+            column_type_length,
+            output_as_binary,
+        )
+    elif isinstance(col.dtype, cudf.core.dtypes.DecimalDtype):
+        col_meta.set_decimal_precision(col.dtype.precision)
+
+
+def _get_comp_type(
+    compression: Literal["snappy", "ZSTD", "ZLIB", "LZ4", None],
+) -> plc.io.types.CompressionType:
+    if compression is None:
+        return plc.io.types.CompressionType.NONE
+    result = getattr(plc.io.types.CompressionType, compression.upper(), None)
+    if result is None:
+        raise ValueError("Unsupported `compression` type")
+    return result
+
+
+def _get_stat_freq(
+    statistics: Literal["ROWGROUP", "PAGE", "COLUMN", "NONE"],
+) -> plc.io.types.StatisticsFreq:
+    result = getattr(
+        plc.io.types.StatisticsFreq, f"STATISTICS_{statistics.upper()}", None
+    )
+    if result is None:
+        raise ValueError("Unsupported `statistics_freq` type")
+    return result
+
+
+def _process_metadata(
+    df: cudf.DataFrame,
+    names: list[Hashable],
+    child_names: dict,
+    per_file_user_data: list,
+    row_groups,
+    filepaths_or_buffers,
+    allow_range_index: bool,
+    use_pandas_metadata: bool,
+    nrows: int = -1,
+    skip_rows: int = 0,
+) -> cudf.DataFrame:
+    ioutils._add_df_col_struct_names(df, child_names)
+    index_col = None
+    is_range_index = True
+    column_index_type = None
+    index_col_names = None
+    meta = None
+    for single_file in per_file_user_data:
+        if b"pandas" not in single_file:
+            continue
+        json_str = single_file[b"pandas"].decode("utf-8")
+        meta = json.loads(json_str)
+        file_is_range_index, index_col, column_index_type = _parse_metadata(
+            meta
+        )
+        is_range_index &= file_is_range_index
+
+        if (
+            not file_is_range_index
+            and index_col is not None
+            and index_col_names is None
+        ):
+            index_col_names = {}
+            for idx_col in index_col:
+                for c in meta["columns"]:
+                    if c["field_name"] == idx_col:
+                        index_col_names[idx_col] = c["name"]
+
+    if meta is not None:
+        # Book keep each column metadata as the order
+        # of `meta["columns"]` and `column_names` are not
+        # guaranteed to be deterministic and same always.
+        meta_data_per_column = {
+            col_meta["name"]: col_meta for col_meta in meta["columns"]
+        }
+
+        # update the decimal precision of each column
+        for col in names:
+            if isinstance(df._data[col].dtype, cudf.core.dtypes.DecimalDtype):
+                df._data[col].dtype.precision = meta_data_per_column[col][
+                    "metadata"
+                ]["precision"]
+
+    # Set the index column
+    if index_col is not None and len(index_col) > 0:
+        if is_range_index:
+            if not allow_range_index:
+                return df
+
+            if len(per_file_user_data) > 1:
+                range_index_meta = {
+                    "kind": "range",
+                    "name": None,
+                    "start": 0,
+                    "stop": len(df),
+                    "step": 1,
+                }
+            else:
+                range_index_meta = index_col[0]
+
+            if row_groups is not None:
+                per_file_metadata = [
+                    pa.parquet.read_metadata(
+                        # Pyarrow cannot read directly from bytes
+                        io.BytesIO(s) if isinstance(s, bytes) else s
+                    )
+                    for s in filepaths_or_buffers
+                ]
+
+                filtered_idx = []
+                for i, file_meta in enumerate(per_file_metadata):
+                    row_groups_i = []
+                    start = 0
+                    for row_group in range(file_meta.num_row_groups):
+                        stop = start + file_meta.row_group(row_group).num_rows
+                        row_groups_i.append((start, stop))
+                        start = stop
+
+                    for rg in row_groups[i]:
+                        filtered_idx.append(
+                            cudf.RangeIndex(
+                                start=row_groups_i[rg][0],
+                                stop=row_groups_i[rg][1],
+                                step=range_index_meta["step"],
+                            )
+                        )
+
+                if len(filtered_idx) > 0:
+                    idx = cudf.concat(filtered_idx)
+                else:
+                    idx = cudf.Index._from_column(
+                        cudf.core.column.column_empty(0)
+                    )
+            else:
+                start = range_index_meta["start"] + skip_rows  # type: ignore[operator]
+                stop = range_index_meta["stop"]
+                if nrows > -1:
+                    stop = start + nrows
+                idx = cudf.RangeIndex(
+                    start=start,
+                    stop=stop,
+                    step=range_index_meta["step"],
+                    name=range_index_meta["name"],
+                )
+
+            df.index = idx
+        elif set(index_col).issubset(names):
+            index_data = df[index_col]
+            actual_index_names = iter(index_col_names.values())
+            if index_data._num_columns == 1:
+                idx = cudf.Index._from_column(
+                    index_data._columns[0], name=next(actual_index_names)
+                )
+            else:
+                idx = cudf.MultiIndex.from_frame(
+                    index_data, names=list(actual_index_names)
+                )
+            df.drop(columns=index_col, inplace=True)
+            df.index = idx
+        else:
+            if use_pandas_metadata:
+                df.index.names = index_col
+
+    if df._num_columns == 0 and column_index_type is not None:
+        df._data.label_dtype = cudf.dtype(column_index_type)
+
+    return df
diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py
index 13efa71ebae..77d1f77d30b 100644
--- a/python/cudf/cudf/tests/test_parquet.py
+++ b/python/cudf/cudf/tests/test_parquet.py
@@ -22,7 +22,6 @@
 from pyarrow import parquet as pq
 
 import cudf
-from cudf._lib.parquet import read_parquet_chunked
 from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
 from cudf.io.parquet import (
     ParquetDatasetWriter,
@@ -3775,13 +3774,14 @@ def test_parquet_chunked_reader(
     )
     buffer = BytesIO()
     df.to_parquet(buffer, row_group_size=10000)
-    actual = read_parquet_chunked(
-        [buffer],
-        chunk_read_limit=chunk_read_limit,
-        pass_read_limit=pass_read_limit,
-        use_pandas_metadata=use_pandas_metadata,
-        row_groups=row_groups,
-    )
+    with cudf.option_context("io.parquet.low_memory", True):
+        actual = cudf.read_parquet(
+            [buffer],
+            _chunk_read_limit=chunk_read_limit,
+            _pass_read_limit=pass_read_limit,
+            use_pandas_metadata=use_pandas_metadata,
+            row_groups=row_groups,
+        )
     expected = cudf.read_parquet(
         buffer, use_pandas_metadata=use_pandas_metadata, row_groups=row_groups
     )
@@ -3825,12 +3825,13 @@ def test_parquet_chunked_reader_structs(
     # Number of rows to read
     nrows = num_rows if num_rows is not None else len(df)
 
-    actual = read_parquet_chunked(
-        [buffer],
-        chunk_read_limit=chunk_read_limit,
-        pass_read_limit=pass_read_limit,
-        nrows=nrows,
-    )
+    with cudf.option_context("io.parquet.low_memory", True):
+        actual = cudf.read_parquet(
+            [buffer],
+            _chunk_read_limit=chunk_read_limit,
+            _pass_read_limit=pass_read_limit,
+            nrows=nrows,
+        )
     expected = cudf.read_parquet(
         buffer,
         nrows=nrows,
@@ -3877,12 +3878,13 @@ def test_parquet_chunked_reader_string_decoders(
     nrows = num_rows if num_rows is not None else len(df)
 
     # Check with num_rows specified
-    actual = read_parquet_chunked(
-        [buffer],
-        chunk_read_limit=chunk_read_limit,
-        pass_read_limit=pass_read_limit,
-        nrows=nrows,
-    )
+    with cudf.option_context("io.parquet.low_memory", True):
+        actual = cudf.read_parquet(
+            [buffer],
+            _chunk_read_limit=chunk_read_limit,
+            _pass_read_limit=pass_read_limit,
+            nrows=nrows,
+        )
     expected = cudf.read_parquet(
         buffer,
         nrows=nrows,
@@ -3982,13 +3984,14 @@ def test_parquet_reader_with_mismatched_tables(store_schema):
     ).reset_index(drop=True)
 
     # Read with chunked reader (filter columns not supported)
-    got_chunked = read_parquet_chunked(
-        [buf1, buf2],
-        columns=["list", "d_list", "str"],
-        chunk_read_limit=240,
-        pass_read_limit=240,
-        allow_mismatched_pq_schemas=True,
-    )
+    with cudf.option_context("io.parquet.low_memory", True):
+        got_chunked = cudf.read_parquet(
+            [buf1, buf2],
+            columns=["list", "d_list", "str"],
+            _chunk_read_limit=240,
+            _pass_read_limit=240,
+            allow_mismatched_pq_schemas=True,
+        )
 
     # Construct the expected table without filter columns
     expected_chunked = cudf.concat(
@@ -4054,13 +4057,14 @@ def test_parquet_reader_with_mismatched_structs():
     )
 
     # Read with chunked reader
-    got_chunked = read_parquet_chunked(
-        [buf1, buf2],
-        columns=["struct.b.b_b.b_b_a"],
-        chunk_read_limit=240,
-        pass_read_limit=240,
-        allow_mismatched_pq_schemas=True,
-    )
+    with cudf.option_context("io.parquet.low_memory", True):
+        got_chunked = cudf.read_parquet(
+            [buf1, buf2],
+            columns=["struct.b.b_b.b_b_a"],
+            _chunk_read_limit=240,
+            _pass_read_limit=240,
+            allow_mismatched_pq_schemas=True,
+        )
     got_chunked = (
         cudf.Series(got_chunked["struct"])
         .struct.field("b")
diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py
index d9a3da6666d..a04fcb8df7a 100644
--- a/python/cudf/cudf/utils/ioutils.py
+++ b/python/cudf/cudf/utils/ioutils.py
@@ -43,7 +43,6 @@
 }
 
 _BYTES_PER_THREAD_DEFAULT = 256 * 1024 * 1024
-_ROW_GROUP_SIZE_BYTES_DEFAULT = np.iinfo(np.uint64).max
 
 _docstring_remote_sources = """
 - cuDF supports local and remote data stores. See configuration details for

From f811c383b46d7a8acc8496593e3d0caff83d6c8f Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Thu, 12 Dec 2024 17:56:03 -0500
Subject: [PATCH 2/4] Allow large strings in nvbench strings benchmarks
 (#17571)

Removes the 2GB limit check from the strings benchmarks and adjusts the parameters to be consistent across the benchmarks.
The default parameters will still not exceed 2GB for automation purposes.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Paul Mattione (https://github.com/pmattione-nvidia)

URL: https://github.com/rapidsai/cudf/pull/17571
---
 cpp/benchmarks/string/case.cpp              | 19 +++----
 cpp/benchmarks/string/char_types.cpp        | 15 +++---
 cpp/benchmarks/string/contains.cpp          | 13 ++---
 cpp/benchmarks/string/copy_if_else.cpp      | 15 +++---
 cpp/benchmarks/string/copy_range.cpp        | 15 +++---
 cpp/benchmarks/string/count.cpp             | 15 +++---
 cpp/benchmarks/string/extract.cpp           |  9 +---
 cpp/benchmarks/string/join_strings.cpp      | 15 +++---
 cpp/benchmarks/string/lengths.cpp           | 15 +++---
 cpp/benchmarks/string/like.cpp              |  9 +---
 cpp/benchmarks/string/replace_re.cpp        | 19 +++----
 cpp/benchmarks/string/reverse.cpp           | 15 +++---
 cpp/benchmarks/string/slice.cpp             |  9 +---
 cpp/benchmarks/string/split.cpp             | 15 +++---
 cpp/benchmarks/string/split_re.cpp          | 15 +++---
 cpp/benchmarks/string/string_bench_args.hpp | 56 ---------------------
 16 files changed, 80 insertions(+), 189 deletions(-)
 delete mode 100644 cpp/benchmarks/string/string_bench_args.hpp

diff --git a/cpp/benchmarks/string/case.cpp b/cpp/benchmarks/string/case.cpp
index cd4d3ca964b..9750475a079 100644
--- a/cpp/benchmarks/string/case.cpp
+++ b/cpp/benchmarks/string/case.cpp
@@ -24,18 +24,14 @@
 
 void bench_case(nvbench::state& state)
 {
-  auto const n_rows    = static_cast<cudf::size_type>(state.get_int64("num_rows"));
-  auto const max_width = static_cast<int32_t>(state.get_int64("row_width"));
+  auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const min_width = static_cast<cudf::size_type>(state.get_int64("min_width"));
+  auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));
   auto const encoding  = state.get_string("encoding");
 
-  if (static_cast<std::size_t>(n_rows) * static_cast<std::size_t>(max_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
-
   data_profile const profile = data_profile_builder().distribution(
-    cudf::type_id::STRING, distribution_id::NORMAL, 0, max_width);
-  auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile);
+    cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
+  auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile);
 
   auto col_view = column->view();
 
@@ -74,6 +70,7 @@ void bench_case(nvbench::state& state)
 
 NVBENCH_BENCH(bench_case)
   .set_name("case")
-  .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048})
-  .add_int64_axis("num_rows", {32768, 262144, 2097152, 16777216})
+  .add_int64_axis("min_width", {0})
+  .add_int64_axis("max_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152})
   .add_string_axis("encoding", {"ascii", "utf8"});
diff --git a/cpp/benchmarks/string/char_types.cpp b/cpp/benchmarks/string/char_types.cpp
index eec9a5f54d7..abc5254392e 100644
--- a/cpp/benchmarks/string/char_types.cpp
+++ b/cpp/benchmarks/string/char_types.cpp
@@ -25,16 +25,12 @@
 static void bench_char_types(nvbench::state& state)
 {
   auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
-  auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
+  auto const min_width = static_cast<cudf::size_type>(state.get_int64("min_width"));
+  auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));
   auto const api_type  = state.get_string("api");
 
-  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
-
   data_profile const table_profile = data_profile_builder().distribution(
-    cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
+    cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
   auto const table =
     create_random_table({cudf::type_id::STRING}, row_count{num_rows}, table_profile);
   cudf::strings_column_view input(table->view().column(0));
@@ -61,6 +57,7 @@ static void bench_char_types(nvbench::state& state)
 
 NVBENCH_BENCH(bench_char_types)
   .set_name("char_types")
-  .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048, 4096})
-  .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216})
+  .add_int64_axis("min_width", {0})
+  .add_int64_axis("max_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152})
   .add_string_axis("api", {"all", "filter"});
diff --git a/cpp/benchmarks/string/contains.cpp b/cpp/benchmarks/string/contains.cpp
index a73017dda18..e3940cbc0c7 100644
--- a/cpp/benchmarks/string/contains.cpp
+++ b/cpp/benchmarks/string/contains.cpp
@@ -29,17 +29,12 @@ std::string patterns[] = {"^\\d+ [a-z]+", "[A-Z ]+\\d+ +\\d+[A-Z]+\\d+$", "5W43"
 
 static void bench_contains(nvbench::state& state)
 {
-  auto const n_rows        = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const num_rows      = static_cast<cudf::size_type>(state.get_int64("num_rows"));
   auto const row_width     = static_cast<cudf::size_type>(state.get_int64("row_width"));
   auto const pattern_index = static_cast<cudf::size_type>(state.get_int64("pattern"));
   auto const hit_rate      = static_cast<cudf::size_type>(state.get_int64("hit_rate"));
 
-  if (static_cast<std::size_t>(n_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
-
-  auto col   = create_string_column(n_rows, row_width, hit_rate);
+  auto col   = create_string_column(num_rows, row_width, hit_rate);
   auto input = cudf::strings_column_view(col->view());
 
   auto pattern = patterns[pattern_index];
@@ -56,7 +51,7 @@ static void bench_contains(nvbench::state& state)
 
 NVBENCH_BENCH(bench_contains)
   .set_name("contains")
-  .add_int64_axis("row_width", {32, 64, 128, 256, 512})
-  .add_int64_axis("num_rows", {32768, 262144, 2097152, 16777216})
+  .add_int64_axis("row_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152})
   .add_int64_axis("hit_rate", {50, 100})  // percentage
   .add_int64_axis("pattern", {0, 1, 2});
diff --git a/cpp/benchmarks/string/copy_if_else.cpp b/cpp/benchmarks/string/copy_if_else.cpp
index e06cca497c2..5a5743dfddf 100644
--- a/cpp/benchmarks/string/copy_if_else.cpp
+++ b/cpp/benchmarks/string/copy_if_else.cpp
@@ -25,15 +25,11 @@
 static void bench_copy(nvbench::state& state)
 {
   auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
-  auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
-
-  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
+  auto const min_width = static_cast<cudf::size_type>(state.get_int64("min_width"));
+  auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));
 
   data_profile const str_profile = data_profile_builder().distribution(
-    cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
+    cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
   auto const source_table =
     create_random_table({cudf::type_id::STRING}, row_count{num_rows}, str_profile);
   auto const target_table =
@@ -58,5 +54,6 @@ static void bench_copy(nvbench::state& state)
 
 NVBENCH_BENCH(bench_copy)
   .set_name("copy_if_else")
-  .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048, 4096})
-  .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216});
+  .add_int64_axis("min_width", {0})
+  .add_int64_axis("max_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152});
diff --git a/cpp/benchmarks/string/copy_range.cpp b/cpp/benchmarks/string/copy_range.cpp
index af217a49195..7e7353a0e78 100644
--- a/cpp/benchmarks/string/copy_range.cpp
+++ b/cpp/benchmarks/string/copy_range.cpp
@@ -25,16 +25,12 @@
 static void bench_copy_range(nvbench::state& state)
 {
   auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
-  auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
-
-  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
+  auto const min_width = static_cast<cudf::size_type>(state.get_int64("min_width"));
+  auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));
 
   data_profile const table_profile =
     data_profile_builder()
-      .distribution(cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width)
+      .distribution(cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width)
       .no_validity();
   auto const source_tables = create_random_table(
     {cudf::type_id::STRING, cudf::type_id::STRING}, row_count{num_rows}, table_profile);
@@ -56,5 +52,6 @@ static void bench_copy_range(nvbench::state& state)
 
 NVBENCH_BENCH(bench_copy_range)
   .set_name("copy_range")
-  .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048, 4096})
-  .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216});
+  .add_int64_axis("min_width", {0})
+  .add_int64_axis("max_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152});
diff --git a/cpp/benchmarks/string/count.cpp b/cpp/benchmarks/string/count.cpp
index f964bc5d224..cf90e316f71 100644
--- a/cpp/benchmarks/string/count.cpp
+++ b/cpp/benchmarks/string/count.cpp
@@ -30,16 +30,12 @@ static std::string patterns[] = {"\\d+", "a"};
 static void bench_count(nvbench::state& state)
 {
   auto const num_rows      = static_cast<cudf::size_type>(state.get_int64("num_rows"));
-  auto const row_width     = static_cast<cudf::size_type>(state.get_int64("row_width"));
+  auto const min_width     = static_cast<cudf::size_type>(state.get_int64("min_width"));
+  auto const max_width     = static_cast<cudf::size_type>(state.get_int64("max_width"));
   auto const pattern_index = static_cast<cudf::size_type>(state.get_int64("pattern"));
 
-  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
-
   data_profile const table_profile = data_profile_builder().distribution(
-    cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
+    cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
   auto const table =
     create_random_table({cudf::type_id::STRING}, row_count{num_rows}, table_profile);
   cudf::strings_column_view input(table->view().column(0));
@@ -61,6 +57,7 @@ static void bench_count(nvbench::state& state)
 
 NVBENCH_BENCH(bench_count)
   .set_name("count")
-  .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048})
-  .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216})
+  .add_int64_axis("min_width", {0})
+  .add_int64_axis("max_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152})
   .add_int64_axis("pattern", {0, 1});
diff --git a/cpp/benchmarks/string/extract.cpp b/cpp/benchmarks/string/extract.cpp
index af4fedb5799..d6866598ff4 100644
--- a/cpp/benchmarks/string/extract.cpp
+++ b/cpp/benchmarks/string/extract.cpp
@@ -32,11 +32,6 @@ static void bench_extract(nvbench::state& state)
   auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
   auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
 
-  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
-
   auto groups = static_cast<cudf::size_type>(state.get_int64("groups"));
 
   std::default_random_engine generator;
@@ -79,6 +74,6 @@ static void bench_extract(nvbench::state& state)
 
 NVBENCH_BENCH(bench_extract)
   .set_name("extract")
-  .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048})
-  .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216})
+  .add_int64_axis("row_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152})
   .add_int64_axis("groups", {1, 2, 4});
diff --git a/cpp/benchmarks/string/join_strings.cpp b/cpp/benchmarks/string/join_strings.cpp
index 6dcf731ad3c..27652193b7b 100644
--- a/cpp/benchmarks/string/join_strings.cpp
+++ b/cpp/benchmarks/string/join_strings.cpp
@@ -25,15 +25,11 @@
 static void bench_join(nvbench::state& state)
 {
   auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
-  auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
-
-  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
+  auto const min_width = static_cast<cudf::size_type>(state.get_int64("min_width"));
+  auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));
 
   data_profile const table_profile = data_profile_builder().distribution(
-    cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
+    cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
   auto const table =
     create_random_table({cudf::type_id::STRING}, row_count{num_rows}, table_profile);
   cudf::strings_column_view input(table->view().column(0));
@@ -54,5 +50,6 @@ static void bench_join(nvbench::state& state)
 
 NVBENCH_BENCH(bench_join)
   .set_name("strings_join")
-  .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024})
-  .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216});
+  .add_int64_axis("min_width", {0})
+  .add_int64_axis("max_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152});
diff --git a/cpp/benchmarks/string/lengths.cpp b/cpp/benchmarks/string/lengths.cpp
index a19060ead3b..8156e19412b 100644
--- a/cpp/benchmarks/string/lengths.cpp
+++ b/cpp/benchmarks/string/lengths.cpp
@@ -25,15 +25,11 @@
 static void bench_lengths(nvbench::state& state)
 {
   auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
-  auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
-
-  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
+  auto const min_width = static_cast<cudf::size_type>(state.get_int64("min_width"));
+  auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));
 
   data_profile const table_profile = data_profile_builder().distribution(
-    cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
+    cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
   auto const table =
     create_random_table({cudf::type_id::STRING}, row_count{num_rows}, table_profile);
   cudf::strings_column_view input(table->view().column(0));
@@ -51,5 +47,6 @@ static void bench_lengths(nvbench::state& state)
 
 NVBENCH_BENCH(bench_lengths)
   .set_name("lengths")
-  .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048, 4096})
-  .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216});
+  .add_int64_axis("min_width", {0})
+  .add_int64_axis("max_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152});
diff --git a/cpp/benchmarks/string/like.cpp b/cpp/benchmarks/string/like.cpp
index 105ae65cbe8..f6410aaef30 100644
--- a/cpp/benchmarks/string/like.cpp
+++ b/cpp/benchmarks/string/like.cpp
@@ -30,11 +30,6 @@ static void bench_like(nvbench::state& state)
   auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
   auto const hit_rate  = static_cast<int32_t>(state.get_int64("hit_rate"));
 
-  if (static_cast<std::size_t>(n_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
-
   auto col   = create_string_column(n_rows, row_width, hit_rate);
   auto input = cudf::strings_column_view(col->view());
 
@@ -54,6 +49,6 @@ static void bench_like(nvbench::state& state)
 
 NVBENCH_BENCH(bench_like)
   .set_name("strings_like")
-  .add_int64_axis("row_width", {32, 64, 128, 256, 512})
-  .add_int64_axis("num_rows", {32768, 262144, 2097152, 16777216})
+  .add_int64_axis("row_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152})
   .add_int64_axis("hit_rate", {10, 25, 70, 100});
diff --git a/cpp/benchmarks/string/replace_re.cpp b/cpp/benchmarks/string/replace_re.cpp
index 4dcf1314f83..69426a2d484 100644
--- a/cpp/benchmarks/string/replace_re.cpp
+++ b/cpp/benchmarks/string/replace_re.cpp
@@ -26,18 +26,14 @@
 
 static void bench_replace(nvbench::state& state)
 {
-  auto const n_rows    = static_cast<cudf::size_type>(state.get_int64("num_rows"));
-  auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
+  auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const min_width = static_cast<cudf::size_type>(state.get_int64("min_width"));
+  auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));
   auto const rtype     = state.get_string("type");
 
-  if (static_cast<std::size_t>(n_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
-
   data_profile const profile = data_profile_builder().distribution(
-    cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
-  auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile);
+    cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
+  auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile);
   cudf::strings_column_view input(column->view());
 
   auto program = cudf::strings::regex_program::create("(\\d+)");
@@ -62,6 +58,7 @@ static void bench_replace(nvbench::state& state)
 
 NVBENCH_BENCH(bench_replace)
   .set_name("replace_re")
-  .add_int64_axis("row_width", {32, 64, 128, 256, 512})
-  .add_int64_axis("num_rows", {32768, 262144, 2097152, 16777216})
+  .add_int64_axis("min_width", {0})
+  .add_int64_axis("max_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152})
   .add_string_axis("type", {"replace", "backref"});
diff --git a/cpp/benchmarks/string/reverse.cpp b/cpp/benchmarks/string/reverse.cpp
index a2676609a40..e2e914cb350 100644
--- a/cpp/benchmarks/string/reverse.cpp
+++ b/cpp/benchmarks/string/reverse.cpp
@@ -25,15 +25,11 @@
 static void bench_reverse(nvbench::state& state)
 {
   auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
-  auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
-
-  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
+  auto const min_width = static_cast<cudf::size_type>(state.get_int64("min_width"));
+  auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));
 
   data_profile const table_profile = data_profile_builder().distribution(
-    cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
+    cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
   auto const table =
     create_random_table({cudf::type_id::STRING}, row_count{num_rows}, table_profile);
   cudf::strings_column_view input(table->view().column(0));
@@ -51,5 +47,6 @@ static void bench_reverse(nvbench::state& state)
 
 NVBENCH_BENCH(bench_reverse)
   .set_name("reverse")
-  .add_int64_axis("row_width", {8, 16, 32, 64, 128})
-  .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216});
+  .add_int64_axis("min_width", {0})
+  .add_int64_axis("max_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152});
diff --git a/cpp/benchmarks/string/slice.cpp b/cpp/benchmarks/string/slice.cpp
index 1898f0340b6..c828a8ed0b0 100644
--- a/cpp/benchmarks/string/slice.cpp
+++ b/cpp/benchmarks/string/slice.cpp
@@ -36,11 +36,6 @@ static void bench_slice(nvbench::state& state)
   auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
   auto const stype     = state.get_string("type");
 
-  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
-
   data_profile const profile = data_profile_builder().distribution(
     cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
   auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile);
@@ -76,6 +71,6 @@ static void bench_slice(nvbench::state& state)
 
 NVBENCH_BENCH(bench_slice)
   .set_name("slice")
-  .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048})
-  .add_int64_axis("num_rows", {262144, 2097152, 16777216})
+  .add_int64_axis("row_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152})
   .add_string_axis("type", {"position", "multi"});
diff --git a/cpp/benchmarks/string/split.cpp b/cpp/benchmarks/string/split.cpp
index 9ef58daf0fc..9c7c27c4f07 100644
--- a/cpp/benchmarks/string/split.cpp
+++ b/cpp/benchmarks/string/split.cpp
@@ -28,16 +28,12 @@
 static void bench_split(nvbench::state& state)
 {
   auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
-  auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
+  auto const min_width = static_cast<cudf::size_type>(state.get_int64("min_width"));
+  auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));
   auto const stype     = state.get_string("type");
 
-  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
-
   data_profile const profile = data_profile_builder().distribution(
-    cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
+    cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
   auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile);
   cudf::strings_column_view input(column->view());
   cudf::string_scalar target("+");
@@ -66,6 +62,7 @@ static void bench_split(nvbench::state& state)
 
 NVBENCH_BENCH(bench_split)
   .set_name("split")
-  .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048})
-  .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216})
+  .add_int64_axis("min_width", {0})
+  .add_int64_axis("max_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152})
   .add_string_axis("type", {"split", "split_ws", "record", "record_ws"});
diff --git a/cpp/benchmarks/string/split_re.cpp b/cpp/benchmarks/string/split_re.cpp
index 1fdb6e67109..34a7aa96e84 100644
--- a/cpp/benchmarks/string/split_re.cpp
+++ b/cpp/benchmarks/string/split_re.cpp
@@ -28,17 +28,13 @@
 static void bench_split(nvbench::state& state)
 {
   auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
-  auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
-
-  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
+  auto const min_width = static_cast<cudf::size_type>(state.get_int64("min_width"));
+  auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));
 
   auto prog = cudf::strings::regex_program::create("\\d+");
 
   data_profile const profile = data_profile_builder().distribution(
-    cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
+    cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
   auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile);
   cudf::strings_column_view input(column->view());
 
@@ -56,5 +52,6 @@ static void bench_split(nvbench::state& state)
 
 NVBENCH_BENCH(bench_split)
   .set_name("split_re")
-  .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048})
-  .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216});
+  .add_int64_axis("min_width", {0})
+  .add_int64_axis("max_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152});
diff --git a/cpp/benchmarks/string/string_bench_args.hpp b/cpp/benchmarks/string/string_bench_args.hpp
deleted file mode 100644
index a34026281e8..00000000000
--- a/cpp/benchmarks/string/string_bench_args.hpp
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include <cudf/types.hpp>
-
-#include <benchmark/benchmark.h>
-
-#include <limits>
-
-/**
- * @brief Generate row count and row length argument ranges for a string benchmark.
- *
- * Generates a series of row count and row length arguments for string benchmarks.
- * Combinations of row count and row length that would exceed the maximum string character
- * column data length are not generated.
- *
- * @param b           Benchmark to update with row count and row length arguments.
- * @param min_rows    Minimum row count argument to generate.
- * @param max_rows    Maximum row count argument to generate.
- * @param rows_mult   Row count multiplier to generate intermediate row count arguments.
- * @param min_rowlen  Minimum row length argument to generate.
- * @param max_rowlen  Maximum row length argument to generate.
- * @param rowlen_mult Row length multiplier to generate intermediate row length arguments.
- */
-inline void generate_string_bench_args(benchmark::internal::Benchmark* b,
-                                       int min_rows,
-                                       int max_rows,
-                                       int rows_mult,
-                                       int min_rowlen,
-                                       int max_rowlen,
-                                       int rowlen_mult)
-{
-  for (int row_count = min_rows; row_count <= max_rows; row_count *= rows_mult) {
-    for (int rowlen = min_rowlen; rowlen <= max_rowlen; rowlen *= rowlen_mult) {
-      // avoid generating combinations that exceed the cudf column limit
-      size_t total_chars = static_cast<size_t>(row_count) * rowlen;
-      if (total_chars < static_cast<size_t>(std::numeric_limits<cudf::size_type>::max())) {
-        b->Args({row_count, rowlen});
-      }
-    }
-  }
-}

From 8a3e5f1a7af6c638397fcabf17bea9192bd799d2 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 12 Dec 2024 17:40:20 -0800
Subject: [PATCH 3/4] Remove cudf._lib.nvtext in favor of inlining pylibcudf
 (#17535)

Contributes to https://github.com/rapidsai/cudf/issues/17317

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/17535
---
 python/cudf/cudf/_lib/CMakeLists.txt          |   2 -
 python/cudf/cudf/_lib/__init__.py             |   1 -
 python/cudf/cudf/_lib/nvtext/CMakeLists.txt   |  24 --
 python/cudf/cudf/_lib/nvtext/__init__.pxd     |   0
 python/cudf/cudf/_lib/nvtext/__init__.py      |   0
 .../cudf/_lib/nvtext/byte_pair_encode.pyx     |  24 --
 .../cudf/cudf/_lib/nvtext/edit_distance.pyx   |  24 --
 .../cudf/cudf/_lib/nvtext/generate_ngrams.pyx |  35 --
 python/cudf/cudf/_lib/nvtext/jaccard.pyx      |  17 -
 python/cudf/cudf/_lib/nvtext/minhash.pyx      |  35 --
 .../cudf/cudf/_lib/nvtext/ngrams_tokenize.pyx |  24 --
 python/cudf/cudf/_lib/nvtext/normalize.pyx    |  28 --
 python/cudf/cudf/_lib/nvtext/replace.pyx      |  52 ---
 python/cudf/cudf/_lib/nvtext/stemmer.pyx      |  55 ---
 .../cudf/_lib/nvtext/subword_tokenize.pyx     |  38 --
 python/cudf/cudf/_lib/nvtext/tokenize.pyx     |  86 ----
 python/cudf/cudf/_lib/strings/__init__.pxd    |   0
 python/cudf/cudf/_lib/strings/__init__.py     |  30 --
 python/cudf/cudf/core/byte_pair_encoding.py   |  13 +-
 python/cudf/cudf/core/column/string.py        | 388 ++++++++++++++----
 python/cudf/cudf/core/subword_tokenizer.py    |   7 +-
 python/cudf/cudf/core/tokenize_vocabulary.py  |   9 +-
 22 files changed, 328 insertions(+), 564 deletions(-)
 delete mode 100644 python/cudf/cudf/_lib/nvtext/CMakeLists.txt
 delete mode 100644 python/cudf/cudf/_lib/nvtext/__init__.pxd
 delete mode 100644 python/cudf/cudf/_lib/nvtext/__init__.py
 delete mode 100644 python/cudf/cudf/_lib/nvtext/byte_pair_encode.pyx
 delete mode 100644 python/cudf/cudf/_lib/nvtext/edit_distance.pyx
 delete mode 100644 python/cudf/cudf/_lib/nvtext/generate_ngrams.pyx
 delete mode 100644 python/cudf/cudf/_lib/nvtext/jaccard.pyx
 delete mode 100644 python/cudf/cudf/_lib/nvtext/minhash.pyx
 delete mode 100644 python/cudf/cudf/_lib/nvtext/ngrams_tokenize.pyx
 delete mode 100644 python/cudf/cudf/_lib/nvtext/normalize.pyx
 delete mode 100644 python/cudf/cudf/_lib/nvtext/replace.pyx
 delete mode 100644 python/cudf/cudf/_lib/nvtext/stemmer.pyx
 delete mode 100644 python/cudf/cudf/_lib/nvtext/subword_tokenize.pyx
 delete mode 100644 python/cudf/cudf/_lib/nvtext/tokenize.pyx
 delete mode 100644 python/cudf/cudf/_lib/strings/__init__.pxd
 delete mode 100644 python/cudf/cudf/_lib/strings/__init__.py

diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt
index f422635d22a..c2677c6d88d 100644
--- a/python/cudf/cudf/_lib/CMakeLists.txt
+++ b/python/cudf/cudf/_lib/CMakeLists.txt
@@ -30,5 +30,3 @@ target_include_directories(interop PUBLIC "$<BUILD_INTERFACE:${DLPACK_INCLUDE_DI
 include(${rapids-cmake-dir}/export/find_package_root.cmake)
 include(../../../../cpp/cmake/thirdparty/get_nanoarrow.cmake)
 target_link_libraries(interop PUBLIC nanoarrow)
-
-add_subdirectory(nvtext)
diff --git a/python/cudf/cudf/_lib/__init__.py b/python/cudf/cudf/_lib/__init__.py
index cfdcec4cd3b..f86a15b932b 100644
--- a/python/cudf/cudf/_lib/__init__.py
+++ b/python/cudf/cudf/_lib/__init__.py
@@ -6,7 +6,6 @@
     csv,
     groupby,
     interop,
-    nvtext,
     reduce,
     sort,
     stream_compaction,
diff --git a/python/cudf/cudf/_lib/nvtext/CMakeLists.txt b/python/cudf/cudf/_lib/nvtext/CMakeLists.txt
deleted file mode 100644
index 22ec5d472f2..00000000000
--- a/python/cudf/cudf/_lib/nvtext/CMakeLists.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-# =============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
-# in compliance with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software distributed under the License
-# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
-# or implied. See the License for the specific language governing permissions and limitations under
-# the License.
-# =============================================================================
-
-set(cython_sources
-    byte_pair_encode.pyx edit_distance.pyx generate_ngrams.pyx jaccard.pyx minhash.pyx
-    ngrams_tokenize.pyx normalize.pyx replace.pyx stemmer.pyx subword_tokenize.pyx tokenize.pyx
-)
-set(linked_libraries cudf::cudf)
-rapids_cython_create_modules(
-  CXX
-  SOURCE_FILES "${cython_sources}"
-  LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX nvtext_ ASSOCIATED_TARGETS cudf
-)
diff --git a/python/cudf/cudf/_lib/nvtext/__init__.pxd b/python/cudf/cudf/_lib/nvtext/__init__.pxd
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/python/cudf/cudf/_lib/nvtext/__init__.py b/python/cudf/cudf/_lib/nvtext/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/python/cudf/cudf/_lib/nvtext/byte_pair_encode.pyx b/python/cudf/cudf/_lib/nvtext/byte_pair_encode.pyx
deleted file mode 100644
index 2b2762eead2..00000000000
--- a/python/cudf/cudf/_lib/nvtext/byte_pair_encode.pyx
+++ /dev/null
@@ -1,24 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-
-
-from cudf.core.buffer import acquire_spill_lock
-
-from cudf._lib.column cimport Column
-
-from pylibcudf import nvtext
-from pylibcudf.nvtext.byte_pair_encode import BPEMergePairs  # no-cython-lint
-
-
-@acquire_spill_lock()
-def byte_pair_encoding(
-    Column strings,
-    object merge_pairs,
-    object separator
-):
-    return Column.from_pylibcudf(
-        nvtext.byte_pair_encode.byte_pair_encoding(
-            strings.to_pylibcudf(mode="read"),
-            merge_pairs,
-            separator.device_value.c_value
-        )
-    )
diff --git a/python/cudf/cudf/_lib/nvtext/edit_distance.pyx b/python/cudf/cudf/_lib/nvtext/edit_distance.pyx
deleted file mode 100644
index 3dd99c42d76..00000000000
--- a/python/cudf/cudf/_lib/nvtext/edit_distance.pyx
+++ /dev/null
@@ -1,24 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-from cudf.core.buffer import acquire_spill_lock
-
-from pylibcudf cimport nvtext
-
-from cudf._lib.column cimport Column
-
-
-@acquire_spill_lock()
-def edit_distance(Column strings, Column targets):
-    result = nvtext.edit_distance.edit_distance(
-        strings.to_pylibcudf(mode="read"),
-        targets.to_pylibcudf(mode="read")
-    )
-    return Column.from_pylibcudf(result)
-
-
-@acquire_spill_lock()
-def edit_distance_matrix(Column strings):
-    result = nvtext.edit_distance.edit_distance_matrix(
-        strings.to_pylibcudf(mode="read")
-    )
-    return Column.from_pylibcudf(result)
diff --git a/python/cudf/cudf/_lib/nvtext/generate_ngrams.pyx b/python/cudf/cudf/_lib/nvtext/generate_ngrams.pyx
deleted file mode 100644
index 7fdf9258b7f..00000000000
--- a/python/cudf/cudf/_lib/nvtext/generate_ngrams.pyx
+++ /dev/null
@@ -1,35 +0,0 @@
-# Copyright (c) 2018-2024, NVIDIA CORPORATION.
-
-from cudf.core.buffer import acquire_spill_lock
-
-from cudf._lib.column cimport Column
-
-from pylibcudf import nvtext
-
-
-@acquire_spill_lock()
-def generate_ngrams(Column strings, int ngrams, object py_separator):
-    result = nvtext.generate_ngrams.generate_ngrams(
-        strings.to_pylibcudf(mode="read"),
-        ngrams,
-        py_separator.device_value.c_value
-    )
-    return Column.from_pylibcudf(result)
-
-
-@acquire_spill_lock()
-def generate_character_ngrams(Column strings, int ngrams):
-    result = nvtext.generate_ngrams.generate_character_ngrams(
-        strings.to_pylibcudf(mode="read"),
-        ngrams
-    )
-    return Column.from_pylibcudf(result)
-
-
-@acquire_spill_lock()
-def hash_character_ngrams(Column strings, int ngrams):
-    result = nvtext.generate_ngrams.hash_character_ngrams(
-        strings.to_pylibcudf(mode="read"),
-        ngrams
-    )
-    return Column.from_pylibcudf(result)
diff --git a/python/cudf/cudf/_lib/nvtext/jaccard.pyx b/python/cudf/cudf/_lib/nvtext/jaccard.pyx
deleted file mode 100644
index c964d0206b7..00000000000
--- a/python/cudf/cudf/_lib/nvtext/jaccard.pyx
+++ /dev/null
@@ -1,17 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-
-from cudf.core.buffer import acquire_spill_lock
-
-from cudf._lib.column cimport Column
-
-from pylibcudf import nvtext
-
-
-@acquire_spill_lock()
-def jaccard_index(Column input1, Column input2, int width):
-    result = nvtext.jaccard.jaccard_index(
-        input1.to_pylibcudf(mode="read"),
-        input2.to_pylibcudf(mode="read"),
-        width,
-    )
-    return Column.from_pylibcudf(result)
diff --git a/python/cudf/cudf/_lib/nvtext/minhash.pyx b/python/cudf/cudf/_lib/nvtext/minhash.pyx
deleted file mode 100644
index 9f2b3f92502..00000000000
--- a/python/cudf/cudf/_lib/nvtext/minhash.pyx
+++ /dev/null
@@ -1,35 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-
-from libc.stdint cimport uint32_t, uint64_t
-
-from cudf.core.buffer import acquire_spill_lock
-
-from cudf._lib.column cimport Column
-
-from pylibcudf import nvtext
-
-
-@acquire_spill_lock()
-def minhash(Column input, uint32_t seed, Column a, Column b, int width):
-    return Column.from_pylibcudf(
-        nvtext.minhash.minhash(
-            input.to_pylibcudf(mode="read"),
-            seed,
-            a.to_pylibcudf(mode="read"),
-            b.to_pylibcudf(mode="read"),
-            width,
-        )
-    )
-
-
-@acquire_spill_lock()
-def minhash64(Column input, uint64_t seed, Column a, Column b, int width):
-    return Column.from_pylibcudf(
-        nvtext.minhash.minhash64(
-            input.to_pylibcudf(mode="read"),
-            seed,
-            a.to_pylibcudf(mode="read"),
-            b.to_pylibcudf(mode="read"),
-            width,
-        )
-    )
diff --git a/python/cudf/cudf/_lib/nvtext/ngrams_tokenize.pyx b/python/cudf/cudf/_lib/nvtext/ngrams_tokenize.pyx
deleted file mode 100644
index c125d92a24e..00000000000
--- a/python/cudf/cudf/_lib/nvtext/ngrams_tokenize.pyx
+++ /dev/null
@@ -1,24 +0,0 @@
-# Copyright (c) 2018-2024, NVIDIA CORPORATION.
-
-from cudf.core.buffer import acquire_spill_lock
-
-from cudf._lib.column cimport Column
-
-from pylibcudf import nvtext
-
-
-@acquire_spill_lock()
-def ngrams_tokenize(
-    Column input,
-    int ngrams,
-    object py_delimiter,
-    object py_separator
-):
-    return Column.from_pylibcudf(
-        nvtext.ngrams_tokenize.ngrams_tokenize(
-            input.to_pylibcudf(mode="read"),
-            ngrams,
-            py_delimiter.device_value.c_value,
-            py_separator.device_value.c_value
-        )
-    )
diff --git a/python/cudf/cudf/_lib/nvtext/normalize.pyx b/python/cudf/cudf/_lib/nvtext/normalize.pyx
deleted file mode 100644
index cc45123dd0a..00000000000
--- a/python/cudf/cudf/_lib/nvtext/normalize.pyx
+++ /dev/null
@@ -1,28 +0,0 @@
-# Copyright (c) 2018-2024, NVIDIA CORPORATION.
-
-from cudf.core.buffer import acquire_spill_lock
-
-from libcpp cimport bool
-
-from cudf._lib.column cimport Column
-
-from pylibcudf import nvtext
-
-
-@acquire_spill_lock()
-def normalize_spaces(Column input):
-    return Column.from_pylibcudf(
-        nvtext.normalize.normalize_spaces(
-            input.to_pylibcudf(mode="read")
-        )
-    )
-
-
-@acquire_spill_lock()
-def normalize_characters(Column input, bool do_lower=True):
-    return Column.from_pylibcudf(
-        nvtext.normalize.normalize_characters(
-            input.to_pylibcudf(mode="read"),
-            do_lower,
-        )
-    )
diff --git a/python/cudf/cudf/_lib/nvtext/replace.pyx b/python/cudf/cudf/_lib/nvtext/replace.pyx
deleted file mode 100644
index bec56ade83c..00000000000
--- a/python/cudf/cudf/_lib/nvtext/replace.pyx
+++ /dev/null
@@ -1,52 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-from cudf.core.buffer import acquire_spill_lock
-
-from pylibcudf.libcudf.types cimport size_type
-
-from cudf._lib.column cimport Column
-from pylibcudf import nvtext
-
-
-@acquire_spill_lock()
-def replace_tokens(Column strings,
-                   Column targets,
-                   Column replacements,
-                   object py_delimiter):
-    """
-    The `targets` tokens are searched for within each `strings`
-    in the Column and replaced with the corresponding `replacements`
-    if found. Tokens are identified by the `py_delimiter` character
-    provided.
-    """
-
-    return Column.from_pylibcudf(
-        nvtext.replace.replace_tokens(
-            strings.to_pylibcudf(mode="read"),
-            targets.to_pylibcudf(mode="read"),
-            replacements.to_pylibcudf(mode="read"),
-            py_delimiter.device_value.c_value,
-        )
-    )
-
-
-@acquire_spill_lock()
-def filter_tokens(Column strings,
-                  size_type min_token_length,
-                  object py_replacement,
-                  object py_delimiter):
-    """
-    Tokens smaller than `min_token_length` are removed from `strings`
-    in the Column and optionally replaced with the corresponding
-    `py_replacement` string. Tokens are identified by the `py_delimiter`
-    character provided.
-    """
-
-    return Column.from_pylibcudf(
-        nvtext.replace.filter_tokens(
-            strings.to_pylibcudf(mode="read"),
-            min_token_length,
-            py_replacement.device_value.c_value,
-            py_delimiter.device_value.c_value,
-        )
-    )
diff --git a/python/cudf/cudf/_lib/nvtext/stemmer.pyx b/python/cudf/cudf/_lib/nvtext/stemmer.pyx
deleted file mode 100644
index 63a389b64d5..00000000000
--- a/python/cudf/cudf/_lib/nvtext/stemmer.pyx
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-from enum import IntEnum
-
-from cudf.core.buffer import acquire_spill_lock
-
-from pylibcudf.libcudf.nvtext.stemmer cimport (
-    letter_type,
-    underlying_type_t_letter_type,
-)
-from pylibcudf.libcudf.types cimport size_type
-
-from cudf._lib.column cimport Column
-
-from pylibcudf import nvtext
-
-
-class LetterType(IntEnum):
-    CONSONANT = <underlying_type_t_letter_type> letter_type.CONSONANT
-    VOWEL = <underlying_type_t_letter_type> letter_type.VOWEL
-
-
-@acquire_spill_lock()
-def porter_stemmer_measure(Column strings):
-    return Column.from_pylibcudf(
-        nvtext.stemmer.porter_stemmer_measure(
-            strings.to_pylibcudf(mode="read"),
-        )
-    )
-
-
-@acquire_spill_lock()
-def is_letter(Column strings,
-              object ltype,
-              size_type index):
-    return Column.from_pylibcudf(
-        nvtext.stemmer.is_letter(
-            strings.to_pylibcudf(mode="read"),
-            ltype==LetterType.VOWEL,
-            index,
-        )
-    )
-
-
-@acquire_spill_lock()
-def is_letter_multi(Column strings,
-                    object ltype,
-                    Column indices):
-    return Column.from_pylibcudf(
-        nvtext.stemmer.is_letter(
-            strings.to_pylibcudf(mode="read"),
-            ltype==LetterType.VOWEL,
-            indices.to_pylibcudf(mode="read"),
-        )
-    )
diff --git a/python/cudf/cudf/_lib/nvtext/subword_tokenize.pyx b/python/cudf/cudf/_lib/nvtext/subword_tokenize.pyx
deleted file mode 100644
index 5e0bfb74705..00000000000
--- a/python/cudf/cudf/_lib/nvtext/subword_tokenize.pyx
+++ /dev/null
@@ -1,38 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-from libc.stdint cimport uint32_t
-
-from cudf.core.buffer import acquire_spill_lock
-
-from libcpp cimport bool
-
-from cudf._lib.column cimport Column
-
-from pylibcudf import nvtext
-
-
-@acquire_spill_lock()
-def subword_tokenize_inmem_hash(
-    Column strings,
-    object hashed_vocabulary,
-    uint32_t max_sequence_length=64,
-    uint32_t stride=48,
-    bool do_lower=True,
-    bool do_truncate=False,
-):
-    """
-    Subword tokenizes text series by using the pre-loaded hashed vocabulary
-    """
-    result = nvtext.subword_tokenize.subword_tokenize(
-        strings.to_pylibcudf(mode="read"),
-        hashed_vocabulary,
-        max_sequence_length,
-        stride,
-        do_lower,
-        do_truncate,
-    )
-    # return the 3 tensor components
-    tokens = Column.from_pylibcudf(result[0])
-    masks = Column.from_pylibcudf(result[1])
-    metadata = Column.from_pylibcudf(result[2])
-    return tokens, masks, metadata
diff --git a/python/cudf/cudf/_lib/nvtext/tokenize.pyx b/python/cudf/cudf/_lib/nvtext/tokenize.pyx
deleted file mode 100644
index f473c48e2f7..00000000000
--- a/python/cudf/cudf/_lib/nvtext/tokenize.pyx
+++ /dev/null
@@ -1,86 +0,0 @@
-# Copyright (c) 2018-2024, NVIDIA CORPORATION.
-
-from cudf.core.buffer import acquire_spill_lock
-
-from pylibcudf.libcudf.types cimport size_type
-
-from pylibcudf.nvtext.tokenize import TokenizeVocabulary  # no-cython-lint
-
-from cudf._lib.column cimport Column
-
-from pylibcudf import nvtext
-
-
-@acquire_spill_lock()
-def _tokenize_scalar(Column strings, object py_delimiter):
-    return Column.from_pylibcudf(
-        nvtext.tokenize.tokenize_scalar(
-            strings.to_pylibcudf(mode="read"),
-            py_delimiter.device_value.c_value
-        )
-    )
-
-
-@acquire_spill_lock()
-def _tokenize_column(Column strings, Column delimiters):
-    return Column.from_pylibcudf(
-        nvtext.tokenize.tokenize_column(
-            strings.to_pylibcudf(mode="read"),
-            delimiters.to_pylibcudf(mode="read"),
-        )
-    )
-
-
-@acquire_spill_lock()
-def _count_tokens_scalar(Column strings, object py_delimiter):
-    return Column.from_pylibcudf(
-        nvtext.tokenize.count_tokens_scalar(
-            strings.to_pylibcudf(mode="read"),
-            py_delimiter.device_value.c_value
-        )
-    )
-
-
-@acquire_spill_lock()
-def _count_tokens_column(Column strings, Column delimiters):
-    return Column.from_pylibcudf(
-        nvtext.tokenize.count_tokens_column(
-            strings.to_pylibcudf(mode="read"),
-            delimiters.to_pylibcudf(mode="read")
-        )
-    )
-
-
-@acquire_spill_lock()
-def character_tokenize(Column strings):
-    return Column.from_pylibcudf(
-        nvtext.tokenize.character_tokenize(
-            strings.to_pylibcudf(mode="read")
-        )
-    )
-
-
-@acquire_spill_lock()
-def detokenize(Column strings, Column indices, object py_separator):
-    return Column.from_pylibcudf(
-        nvtext.tokenize.detokenize(
-            strings.to_pylibcudf(mode="read"),
-            indices.to_pylibcudf(mode="read"),
-            py_separator.device_value.c_value
-        )
-    )
-
-
-@acquire_spill_lock()
-def tokenize_with_vocabulary(Column strings,
-                             object vocabulary,
-                             object py_delimiter,
-                             size_type default_id):
-    return Column.from_pylibcudf(
-        nvtext.tokenize.tokenize_with_vocabulary(
-            strings.to_pylibcudf(mode="read"),
-            vocabulary,
-            py_delimiter.device_value.c_value,
-            default_id
-        )
-    )
diff --git a/python/cudf/cudf/_lib/strings/__init__.pxd b/python/cudf/cudf/_lib/strings/__init__.pxd
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/python/cudf/cudf/_lib/strings/__init__.py b/python/cudf/cudf/_lib/strings/__init__.py
deleted file mode 100644
index b9095a22a42..00000000000
--- a/python/cudf/cudf/_lib/strings/__init__.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-from cudf._lib.nvtext.edit_distance import edit_distance, edit_distance_matrix
-from cudf._lib.nvtext.generate_ngrams import (
-    generate_character_ngrams,
-    generate_ngrams,
-    hash_character_ngrams,
-)
-from cudf._lib.nvtext.jaccard import jaccard_index
-from cudf._lib.nvtext.minhash import (
-    minhash,
-    minhash64,
-)
-from cudf._lib.nvtext.ngrams_tokenize import ngrams_tokenize
-from cudf._lib.nvtext.normalize import normalize_characters, normalize_spaces
-from cudf._lib.nvtext.replace import filter_tokens, replace_tokens
-from cudf._lib.nvtext.stemmer import (
-    LetterType,
-    is_letter,
-    is_letter_multi,
-    porter_stemmer_measure,
-)
-from cudf._lib.nvtext.tokenize import (
-    _count_tokens_column,
-    _count_tokens_scalar,
-    _tokenize_column,
-    _tokenize_scalar,
-    character_tokenize,
-    detokenize,
-    tokenize_with_vocabulary,
-)
diff --git a/python/cudf/cudf/core/byte_pair_encoding.py b/python/cudf/cudf/core/byte_pair_encoding.py
index 8d38a5f2272..b49f5154697 100644
--- a/python/cudf/cudf/core/byte_pair_encoding.py
+++ b/python/cudf/cudf/core/byte_pair_encoding.py
@@ -5,9 +5,6 @@
 import pylibcudf as plc
 
 import cudf
-from cudf._lib.nvtext.byte_pair_encode import (
-    byte_pair_encoding as cpp_byte_pair_encoding,
-)
 
 
 class BytePairEncoder:
@@ -25,12 +22,12 @@ class BytePairEncoder:
     BytePairEncoder
     """
 
-    def __init__(self, merges_pair: "cudf.Series"):
+    def __init__(self, merges_pair: cudf.Series) -> None:
         self.merge_pairs = plc.nvtext.byte_pair_encode.BPEMergePairs(
             merges_pair._column.to_pylibcudf(mode="read")
         )
 
-    def __call__(self, text, separator: str = " ") -> cudf.Series:
+    def __call__(self, text: cudf.Series, separator: str = " ") -> cudf.Series:
         """
 
         Parameters
@@ -57,6 +54,6 @@ def __call__(self, text, separator: str = " ") -> cudf.Series:
         dtype: object
         """
         sep = cudf.Scalar(separator, dtype="str")
-        result = cpp_byte_pair_encoding(text._column, self.merge_pairs, sep)
-
-        return cudf.Series._from_column(result)
+        return cudf.Series._from_column(
+            text._column.byte_pair_encoding(self.merge_pairs, sep)
+        )
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 06196717ce3..c021554f3bd 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -20,7 +20,7 @@
 import cudf.core.column.column as column
 import cudf.core.column.datetime as datetime
 from cudf import _lib as libcudf
-from cudf._lib import string_casting as str_cast, strings as libstrings
+from cudf._lib import string_casting as str_cast
 from cudf._lib.column import Column
 from cudf._lib.types import size_type_dtype
 from cudf.api.types import is_integer, is_scalar, is_string_dtype
@@ -45,6 +45,7 @@
         SeriesOrIndex,
     )
     from cudf.core.buffer import Buffer
+    from cudf.core.column.lists import ListColumn
     from cudf.core.column.numerical import NumericalColumn
 
 
@@ -624,7 +625,7 @@ def join(
 
     def _split_by_character(self):
         col = self._column.fillna("")  # sanitize nulls
-        result_col = libstrings.character_tokenize(col)
+        result_col = col.character_tokenize()
 
         offset_col = col.children[0]
 
@@ -4693,9 +4694,7 @@ def normalize_spaces(self) -> SeriesOrIndex:
         1    test string
         dtype: object
         """
-        return self._return_or_inplace(
-            libstrings.normalize_spaces(self._column)
-        )
+        return self._return_or_inplace(self._column.normalize_spaces())
 
     def normalize_characters(self, do_lower: bool = True) -> SeriesOrIndex:
         r"""
@@ -4743,7 +4742,7 @@ def normalize_characters(self, do_lower: bool = True) -> SeriesOrIndex:
         dtype: object
         """
         return self._return_or_inplace(
-            libstrings.normalize_characters(self._column, do_lower)
+            self._column.normalize_characters(do_lower)
         )
 
     def tokenize(self, delimiter: str = " ") -> SeriesOrIndex:
@@ -4775,16 +4774,16 @@ def tokenize(self, delimiter: str = " ") -> SeriesOrIndex:
         2    goodbye
         dtype: object
         """
-        delimiter = _massage_string_arg(delimiter, "delimiter", allow_col=True)
+        delim = _massage_string_arg(delimiter, "delimiter", allow_col=True)
 
-        if isinstance(delimiter, Column):
+        if isinstance(delim, Column):
             result = self._return_or_inplace(
-                libstrings._tokenize_column(self._column, delimiter),
+                self._column.tokenize_column(delim),
                 retain_index=False,
             )
-        elif isinstance(delimiter, cudf.Scalar):
+        elif isinstance(delim, cudf.Scalar):
             result = self._return_or_inplace(
-                libstrings._tokenize_scalar(self._column, delimiter),
+                self._column.tokenize_scalar(delim),
                 retain_index=False,
             )
         else:
@@ -4799,7 +4798,7 @@ def tokenize(self, delimiter: str = " ") -> SeriesOrIndex:
         return result
 
     def detokenize(
-        self, indices: "cudf.Series", separator: str = " "
+        self, indices: cudf.Series, separator: str = " "
     ) -> SeriesOrIndex:
         """
         Combines tokens into strings by concatenating them in the order
@@ -4829,9 +4828,9 @@ def detokenize(
         2          three
         dtype: object
         """
-        separator = _massage_string_arg(separator, "separator")
+        sep = _massage_string_arg(separator, "separator")
         return self._return_or_inplace(
-            libstrings.detokenize(self._column, indices._column, separator),
+            self._column.detokenize(indices._column, sep),  # type: ignore[arg-type]
             retain_index=False,
         )
 
@@ -4882,17 +4881,15 @@ def character_tokenize(self) -> SeriesOrIndex:
         2    .
         dtype: object
         """
-        result_col = libstrings.character_tokenize(self._column)
+        result_col = self._column.character_tokenize()
         if isinstance(self._parent, cudf.Series):
             lengths = self.len().fillna(0)
             index = self._parent.index.repeat(lengths)
-            return cudf.Series._from_column(
+            return type(self._parent)._from_column(
                 result_col, name=self._parent.name, index=index
             )
-        elif isinstance(self._parent, cudf.BaseIndex):
-            return cudf.Index._from_column(result_col, name=self._parent.name)
         else:
-            return result_col
+            return self._return_or_inplace(result_col)
 
     def token_count(self, delimiter: str = " ") -> SeriesOrIndex:
         """
@@ -4919,15 +4916,15 @@ def token_count(self, delimiter: str = " ") -> SeriesOrIndex:
         2    0
         dtype: int32
         """
-        delimiter = _massage_string_arg(delimiter, "delimiter", allow_col=True)
-        if isinstance(delimiter, Column):
+        delim = _massage_string_arg(delimiter, "delimiter", allow_col=True)
+        if isinstance(delim, Column):
             return self._return_or_inplace(
-                libstrings._count_tokens_column(self._column, delimiter)
+                self._column.count_tokens_column(delim)
             )
 
-        elif isinstance(delimiter, cudf.Scalar):
+        elif isinstance(delim, cudf.Scalar):
             return self._return_or_inplace(
-                libstrings._count_tokens_scalar(self._column, delimiter)
+                self._column.count_tokens_scalar(delim)  # type: ignore[arg-type]
             )
         else:
             raise TypeError(
@@ -4966,9 +4963,9 @@ def ngrams(self, n: int = 2, separator: str = "_") -> SeriesOrIndex:
         2    xyz_hhh
         dtype: object
         """
-        separator = _massage_string_arg(separator, "separator")
+        sep = _massage_string_arg(separator, "separator")
         return self._return_or_inplace(
-            libstrings.generate_ngrams(self._column, n, separator),
+            self._column.generate_ngrams(n, sep),  # type: ignore[arg-type]
             retain_index=False,
         )
 
@@ -5015,7 +5012,7 @@ def character_ngrams(
         dtype: list
         """
         result = self._return_or_inplace(
-            libstrings.generate_character_ngrams(self._column, n),
+            self._column.generate_character_ngrams(n),
             retain_index=True,
         )
         if isinstance(result, cudf.Series) and not as_list:
@@ -5060,7 +5057,7 @@ def hash_character_ngrams(
         """
 
         result = self._return_or_inplace(
-            libstrings.hash_character_ngrams(self._column, n),
+            self._column.hash_character_ngrams(n),
             retain_index=True,
         )
         if isinstance(result, cudf.Series) and not as_list:
@@ -5098,10 +5095,10 @@ def ngrams_tokenize(
         2    best_book
         dtype: object
         """
-        delimiter = _massage_string_arg(delimiter, "delimiter")
-        separator = _massage_string_arg(separator, "separator")
+        delim = _massage_string_arg(delimiter, "delimiter")
+        sep = _massage_string_arg(separator, "separator")
         return self._return_or_inplace(
-            libstrings.ngrams_tokenize(self._column, n, delimiter, separator),
+            self._column.ngrams_tokenize(n, delim, sep),  # type: ignore[arg-type]
             retain_index=False,
         )
 
@@ -5180,10 +5177,9 @@ def replace_tokens(
             )
 
         return self._return_or_inplace(
-            libstrings.replace_tokens(
-                self._column,
-                targets_column,
-                replacements_column,
+            self._column.replace_tokens(
+                targets_column,  # type: ignore[arg-type]
+                replacements_column,  # type: ignore[arg-type]
                 cudf.Scalar(delimiter, dtype="str"),
             ),
         )
@@ -5251,8 +5247,7 @@ def filter_tokens(
             )
 
         return self._return_or_inplace(
-            libstrings.filter_tokens(
-                self._column,
+            self._column.filter_tokens(
                 min_token_length,
                 cudf.Scalar(replacement, dtype="str"),
                 cudf.Scalar(delimiter, dtype="str"),
@@ -5278,9 +5273,7 @@ def porter_stemmer_measure(self) -> SeriesOrIndex:
         1    2
         dtype: int32
         """
-        return self._return_or_inplace(
-            libstrings.porter_stemmer_measure(self._column)
-        )
+        return self._return_or_inplace(self._column.porter_stemmer_measure())
 
     def is_consonant(self, position) -> SeriesOrIndex:
         """
@@ -5313,17 +5306,10 @@ def is_consonant(self, position) -> SeriesOrIndex:
         1    False
         dtype: bool
         """
-        ltype = libstrings.LetterType.CONSONANT
-
         if can_convert_to_column(position):
-            return self._return_or_inplace(
-                libstrings.is_letter_multi(
-                    self._column, ltype, column.as_column(position)
-                ),
-            )
-
+            position = column.as_column(position)
         return self._return_or_inplace(
-            libstrings.is_letter(self._column, ltype, position)
+            self._column.is_letter(False, position)  # type: ignore[arg-type]
         )
 
     def is_vowel(self, position) -> SeriesOrIndex:
@@ -5357,17 +5343,10 @@ def is_vowel(self, position) -> SeriesOrIndex:
         1     True
         dtype: bool
         """
-        ltype = libstrings.LetterType.VOWEL
-
         if can_convert_to_column(position):
-            return self._return_or_inplace(
-                libstrings.is_letter_multi(
-                    self._column, ltype, column.as_column(position)
-                ),
-            )
-
+            position = column.as_column(position)
         return self._return_or_inplace(
-            libstrings.is_letter(self._column, ltype, position)
+            self._column.is_letter(True, position)  # type: ignore[arg-type]
         )
 
     def edit_distance(self, targets) -> SeriesOrIndex:
@@ -5416,7 +5395,7 @@ def edit_distance(self, targets) -> SeriesOrIndex:
             )
 
         return self._return_or_inplace(
-            libstrings.edit_distance(self._column, targets_column)
+            self._column.edit_distance(targets_column)  # type: ignore[arg-type]
         )
 
     def edit_distance_matrix(self) -> SeriesOrIndex:
@@ -5456,9 +5435,7 @@ def edit_distance_matrix(self) -> SeriesOrIndex:
                 "Cannot compute edit distance between null strings. "
                 "Consider removing them using `dropna` or fill with `fillna`."
             )
-        return self._return_or_inplace(
-            libstrings.edit_distance_matrix(self._column)
-        )
+        return self._return_or_inplace(self._column.edit_distance_matrix())
 
     def minhash(
         self, seed: np.uint32, a: ColumnLike, b: ColumnLike, width: int
@@ -5508,7 +5485,7 @@ def minhash(
                 f"Expecting a Series with dtype uint32, got {type(b)}"
             )
         return self._return_or_inplace(
-            libstrings.minhash(self._column, seed, a_column, b_column, width)
+            self._column.minhash(seed, a_column, b_column, width)  # type: ignore[arg-type]
         )
 
     def minhash64(
@@ -5559,7 +5536,7 @@ def minhash64(
                 f"Expecting a Series with dtype uint64, got {type(b)}"
             )
         return self._return_or_inplace(
-            libstrings.minhash64(self._column, seed, a_column, b_column, width)
+            self._column.minhash64(seed, a_column, b_column, width)  # type: ignore[arg-type]
         )
 
     def jaccard_index(self, input: cudf.Series, width: int) -> SeriesOrIndex:
@@ -5585,13 +5562,14 @@ def jaccard_index(self, input: cudf.Series, width: int) -> SeriesOrIndex:
         1    0.307692
         dtype: float32
         """
-
         return self._return_or_inplace(
-            libstrings.jaccard_index(self._column, input._column, width),
+            self._column.jaccard_index(input._column, width)
         )
 
 
-def _massage_string_arg(value, name, allow_col=False):
+def _massage_string_arg(
+    value, name, allow_col: bool = False
+) -> StringColumn | cudf.Scalar:
     if isinstance(value, cudf.Scalar):
         return value
 
@@ -5602,9 +5580,9 @@ def _massage_string_arg(value, name, allow_col=False):
 
     if allow_col:
         if isinstance(value, list):
-            return column.as_column(value, dtype="str")
+            return column.as_column(value, dtype="str")  # type: ignore[return-value]
 
-        if isinstance(value, Column) and is_string_dtype(value.dtype):
+        if isinstance(value, StringColumn):
             return value
 
         allowed_types.append("Column")
@@ -6148,6 +6126,278 @@ def view(self, dtype) -> "cudf.core.column.ColumnBase":
 
         return to_view.view(dtype)
 
+    @acquire_spill_lock()
+    def minhash(
+        self,
+        seed: np.uint32,
+        a: NumericalColumn,
+        b: NumericalColumn,
+        width: int,
+    ) -> ListColumn:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.nvtext.minhash.minhash(
+                self.to_pylibcudf(mode="read"),
+                seed,
+                a.to_pylibcudf(mode="read"),
+                b.to_pylibcudf(mode="read"),
+                width,
+            )
+        )
+
+    @acquire_spill_lock()
+    def minhash64(
+        self,
+        seed: np.uint64,
+        a: NumericalColumn,
+        b: NumericalColumn,
+        width: int,
+    ) -> ListColumn:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.nvtext.minhash.minhash64(
+                self.to_pylibcudf(mode="read"),
+                seed,
+                a.to_pylibcudf(mode="read"),
+                b.to_pylibcudf(mode="read"),
+                width,
+            )
+        )
+
+    @acquire_spill_lock()
+    def jaccard_index(self, other: Self, width: int) -> NumericalColumn:
+        result = plc.nvtext.jaccard.jaccard_index(
+            self.to_pylibcudf(mode="read"),
+            other.to_pylibcudf(mode="read"),
+            width,
+        )
+        return type(self).from_pylibcudf(result)  # type: ignore[return-value]
+
+    @acquire_spill_lock()
+    def generate_ngrams(self, ngrams: int, separator: cudf.Scalar) -> Self:
+        result = plc.nvtext.generate_ngrams.generate_ngrams(
+            self.to_pylibcudf(mode="read"),
+            ngrams,
+            separator.device_value.c_value,
+        )
+        return type(self).from_pylibcudf(result)  # type: ignore[return-value]
+
+    @acquire_spill_lock()
+    def generate_character_ngrams(self, ngrams: int) -> ListColumn:
+        result = plc.nvtext.generate_ngrams.generate_character_ngrams(
+            self.to_pylibcudf(mode="read"), ngrams
+        )
+        return type(self).from_pylibcudf(result)  # type: ignore[return-value]
+
+    @acquire_spill_lock()
+    def hash_character_ngrams(self, ngrams: int) -> ListColumn:
+        result = plc.nvtext.generate_ngrams.hash_character_ngrams(
+            self.to_pylibcudf(mode="read"), ngrams
+        )
+        return type(self).from_pylibcudf(result)  # type: ignore[return-value]
+
+    @acquire_spill_lock()
+    def edit_distance(self, targets: Self) -> NumericalColumn:
+        result = plc.nvtext.edit_distance.edit_distance(
+            self.to_pylibcudf(mode="read"), targets.to_pylibcudf(mode="read")
+        )
+        return type(self).from_pylibcudf(result)  # type: ignore[return-value]
+
+    @acquire_spill_lock()
+    def edit_distance_matrix(self) -> ListColumn:
+        result = plc.nvtext.edit_distance.edit_distance_matrix(
+            self.to_pylibcudf(mode="read")
+        )
+        return type(self).from_pylibcudf(result)  # type: ignore[return-value]
+
+    @acquire_spill_lock()
+    def byte_pair_encoding(
+        self,
+        merge_pairs: plc.nvtext.byte_pair_encode.BPEMergePairs,
+        separator: cudf.Scalar,
+    ) -> Self:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.nvtext.byte_pair_encode.byte_pair_encoding(
+                self.to_pylibcudf(mode="read"),
+                merge_pairs,
+                separator.device_value.c_value,
+            )
+        )
+
+    @acquire_spill_lock()
+    def ngrams_tokenize(
+        self,
+        ngrams: int,
+        delimiter: cudf.Scalar,
+        separator: cudf.Scalar,
+    ) -> Self:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.nvtext.ngrams_tokenize.ngrams_tokenize(
+                self.to_pylibcudf(mode="read"),
+                ngrams,
+                delimiter.device_value.c_value,
+                separator.device_value.c_value,
+            )
+        )
+
+    @acquire_spill_lock()
+    def normalize_spaces(self) -> Self:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.nvtext.normalize.normalize_spaces(
+                self.to_pylibcudf(mode="read")
+            )
+        )
+
+    @acquire_spill_lock()
+    def normalize_characters(self, do_lower: bool = True) -> Self:
+        return Column.from_pylibcudf(  # type: ignore[return-value]
+            plc.nvtext.normalize.normalize_characters(
+                self.to_pylibcudf(mode="read"),
+                do_lower,
+            )
+        )
+
+    @acquire_spill_lock()
+    def replace_tokens(
+        self, targets: Self, replacements: Self, delimiter: cudf.Scalar
+    ) -> Self:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.nvtext.replace.replace_tokens(
+                self.to_pylibcudf(mode="read"),
+                targets.to_pylibcudf(mode="read"),
+                replacements.to_pylibcudf(mode="read"),
+                delimiter.device_value.c_value,
+            )
+        )
+
+    @acquire_spill_lock()
+    def filter_tokens(
+        self,
+        min_token_length: int,
+        replacement: cudf.Scalar,
+        delimiter: cudf.Scalar,
+    ) -> Self:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.nvtext.replace.filter_tokens(
+                self.to_pylibcudf(mode="read"),
+                min_token_length,
+                replacement.device_value.c_value,
+                delimiter.device_value.c_value,
+            )
+        )
+
+    @acquire_spill_lock()
+    def porter_stemmer_measure(self) -> NumericalColumn:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.nvtext.stemmer.porter_stemmer_measure(
+                self.to_pylibcudf(mode="read")
+            )
+        )
+
+    @acquire_spill_lock()
+    def is_letter(self, is_vowel: bool, index: int | NumericalColumn) -> Self:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.nvtext.stemmer.is_letter(
+                self.to_pylibcudf(mode="read"),
+                is_vowel,
+                index
+                if isinstance(index, int)
+                else index.to_pylibcudf(mode="read"),
+            )
+        )
+
+    @acquire_spill_lock()
+    def subword_tokenize(
+        self,
+        hashed_vocabulary: plc.nvtext.subword_tokenize.HashedVocabulary,
+        max_sequence_length: int = 64,
+        stride: int = 48,
+        do_lower: bool = True,
+        do_truncate: bool = False,
+    ) -> tuple[ColumnBase, ColumnBase, ColumnBase]:
+        """
+        Subword tokenizes text series by using the pre-loaded hashed vocabulary
+        """
+        result = plc.nvtext.subword_tokenize.subword_tokenize(
+            self.to_pylibcudf(mode="read"),
+            hashed_vocabulary,
+            max_sequence_length,
+            stride,
+            do_lower,
+            do_truncate,
+        )
+        # return the 3 tensor components
+        tokens = type(self).from_pylibcudf(result[0])
+        masks = type(self).from_pylibcudf(result[1])
+        metadata = type(self).from_pylibcudf(result[2])
+        return tokens, masks, metadata
+
+    @acquire_spill_lock()
+    def tokenize_scalar(self, delimiter: cudf.Scalar) -> Self:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.nvtext.tokenize.tokenize_scalar(
+                self.to_pylibcudf(mode="read"), delimiter.device_value.c_value
+            )
+        )
+
+    @acquire_spill_lock()
+    def tokenize_column(self, delimiters: Self) -> Self:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.nvtext.tokenize.tokenize_column(
+                self.to_pylibcudf(mode="read"),
+                delimiters.to_pylibcudf(mode="read"),
+            )
+        )
+
+    @acquire_spill_lock()
+    def count_tokens_scalar(self, delimiter: cudf.Scalar) -> NumericalColumn:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.nvtext.tokenize.count_tokens_scalar(
+                self.to_pylibcudf(mode="read"), delimiter.device_value.c_value
+            )
+        )
+
+    @acquire_spill_lock()
+    def count_tokens_column(self, delimiters: Self) -> NumericalColumn:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.nvtext.tokenize.count_tokens_column(
+                self.to_pylibcudf(mode="read"),
+                delimiters.to_pylibcudf(mode="read"),
+            )
+        )
+
+    @acquire_spill_lock()
+    def character_tokenize(self) -> Self:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.nvtext.tokenize.character_tokenize(
+                self.to_pylibcudf(mode="read")
+            )
+        )
+
+    @acquire_spill_lock()
+    def tokenize_with_vocabulary(
+        self,
+        vocabulary: plc.nvtext.tokenize.TokenizeVocabulary,
+        delimiter: cudf.Scalar,
+        default_id: int,
+    ) -> Self:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.nvtext.tokenize.tokenize_with_vocabulary(
+                self.to_pylibcudf(mode="read"),
+                vocabulary,
+                delimiter.device_value.c_value,
+                default_id,
+            )
+        )
+
+    @acquire_spill_lock()
+    def detokenize(self, indices: ColumnBase, separator: cudf.Scalar) -> Self:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.nvtext.tokenize.detokenize(
+                self.to_pylibcudf(mode="read"),
+                indices.to_pylibcudf(mode="read"),
+                separator.device_value.c_value,
+            )
+        )
+
     def _modify_characters(
         self, method: Callable[[plc.Column], plc.Column]
     ) -> Self:
diff --git a/python/cudf/cudf/core/subword_tokenizer.py b/python/cudf/cudf/core/subword_tokenizer.py
index dda1f199078..479838ef2a8 100644
--- a/python/cudf/cudf/core/subword_tokenizer.py
+++ b/python/cudf/cudf/core/subword_tokenizer.py
@@ -8,10 +8,6 @@
 
 import pylibcudf as plc
 
-from cudf._lib.nvtext.subword_tokenize import (
-    subword_tokenize_inmem_hash as cpp_subword_tokenize,
-)
-
 
 def _cast_to_appropriate_type(ar, cast_type):
     if cast_type == "cp":
@@ -210,8 +206,7 @@ def __call__(
         stride = max_length - stride
         # behavior varies from subword_tokenize but maps with huggingface
 
-        input_ids, attention_mask, metadata = cpp_subword_tokenize(
-            text._column,
+        input_ids, attention_mask, metadata = text._column.subword_tokenize(
             self.vocab_file,
             max_sequence_length=max_length,
             stride=stride,
diff --git a/python/cudf/cudf/core/tokenize_vocabulary.py b/python/cudf/cudf/core/tokenize_vocabulary.py
index 1e31376cce8..fb8b9b3131c 100644
--- a/python/cudf/cudf/core/tokenize_vocabulary.py
+++ b/python/cudf/cudf/core/tokenize_vocabulary.py
@@ -5,9 +5,6 @@
 import pylibcudf as plc
 
 import cudf
-from cudf._lib.nvtext.tokenize import (
-    tokenize_with_vocabulary as cpp_tokenize_with_vocabulary,
-)
 
 
 class TokenizeVocabulary:
@@ -20,7 +17,7 @@ class TokenizeVocabulary:
         Strings column of vocabulary terms
     """
 
-    def __init__(self, vocabulary: "cudf.Series"):
+    def __init__(self, vocabulary: cudf.Series) -> None:
         self.vocabulary = plc.nvtext.tokenize.TokenizeVocabulary(
             vocabulary._column.to_pylibcudf(mode="read")
         )
@@ -46,8 +43,8 @@ def tokenize(
         if delimiter is None:
             delimiter = ""
         delim = cudf.Scalar(delimiter, dtype="str")
-        result = cpp_tokenize_with_vocabulary(
-            text._column, self.vocabulary, delim, default_id
+        result = text._column.tokenize_with_vocabulary(
+            self.vocabulary, delim, default_id
         )
 
         return cudf.Series._from_column(result)

From 774970283bfa6ca5ac4bc0619fc8595f01b7362b Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 12 Dec 2024 18:06:44 -0800
Subject: [PATCH 4/4] Remove cudf._lib.csv in favor in inlining pylibcudf
 (#17485)

Contributes to https://github.com/rapidsai/cudf/issues/17317

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/17485
---
 python/cudf/cudf/_lib/CMakeLists.txt |   5 +-
 python/cudf/cudf/_lib/__init__.py    |   1 -
 python/cudf/cudf/_lib/csv.pyx        | 414 ------------------------
 python/cudf/cudf/io/csv.py           | 466 ++++++++++++++++++++++-----
 4 files changed, 385 insertions(+), 501 deletions(-)
 delete mode 100644 python/cudf/cudf/_lib/csv.pyx

diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt
index c2677c6d88d..b402db0443d 100644
--- a/python/cudf/cudf/_lib/CMakeLists.txt
+++ b/python/cudf/cudf/_lib/CMakeLists.txt
@@ -12,9 +12,8 @@
 # the License.
 # =============================================================================
 
-set(cython_sources
-    column.pyx copying.pyx csv.pyx groupby.pyx interop.pyx reduce.pyx scalar.pyx sort.pyx
-    stream_compaction.pyx string_casting.pyx strings_udf.pyx types.pyx utils.pyx
+set(cython_sources column.pyx copying.pyx groupby.pyx interop.pyx reduce.pyx scalar.pyx sort.pyx
+                   stream_compaction.pyx string_casting.pyx strings_udf.pyx types.pyx utils.pyx
 )
 set(linked_libraries cudf::cudf)
 
diff --git a/python/cudf/cudf/_lib/__init__.py b/python/cudf/cudf/_lib/__init__.py
index f86a15b932b..0299b264189 100644
--- a/python/cudf/cudf/_lib/__init__.py
+++ b/python/cudf/cudf/_lib/__init__.py
@@ -3,7 +3,6 @@
 
 from . import (
     copying,
-    csv,
     groupby,
     interop,
     reduce,
diff --git a/python/cudf/cudf/_lib/csv.pyx b/python/cudf/cudf/_lib/csv.pyx
deleted file mode 100644
index 641fc18c203..00000000000
--- a/python/cudf/cudf/_lib/csv.pyx
+++ /dev/null
@@ -1,414 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-from libcpp cimport bool
-
-cimport pylibcudf.libcudf.types as libcudf_types
-
-from cudf._lib.types cimport dtype_to_pylibcudf_type
-
-import errno
-import os
-from collections import abc
-from io import BytesIO, StringIO
-
-import numpy as np
-import pandas as pd
-
-import cudf
-from cudf.core.buffer import acquire_spill_lock
-
-from libcpp cimport bool
-
-from cudf._lib.utils cimport data_from_pylibcudf_io
-
-import pylibcudf as plc
-
-from cudf.api.types import is_hashable
-
-from pylibcudf.types cimport DataType
-
-CSV_HEX_TYPE_MAP = {
-    "hex": np.dtype("int64"),
-    "hex64": np.dtype("int64"),
-    "hex32": np.dtype("int32")
-}
-
-
-def validate_args(
-    object delimiter,
-    object sep,
-    bool delim_whitespace,
-    object decimal,
-    object thousands,
-    object nrows,
-    int skipfooter,
-    object byte_range,
-    int skiprows
-):
-    if delim_whitespace:
-        if delimiter is not None:
-            raise ValueError("cannot set both delimiter and delim_whitespace")
-        if sep != ',':
-            raise ValueError("cannot set both sep and delim_whitespace")
-
-    # Alias sep -> delimiter.
-    actual_delimiter = delimiter if delimiter else sep
-
-    if decimal == actual_delimiter:
-        raise ValueError("decimal cannot be the same as delimiter")
-
-    if thousands == actual_delimiter:
-        raise ValueError("thousands cannot be the same as delimiter")
-
-    if nrows is not None and skipfooter != 0:
-        raise ValueError("cannot use both nrows and skipfooter parameters")
-
-    if byte_range is not None:
-        if skipfooter != 0 or skiprows != 0 or nrows is not None:
-            raise ValueError("""cannot manually limit rows to be read when
-                                using the byte range parameter""")
-
-
-def read_csv(
-    object datasource,
-    object lineterminator="\n",
-    object quotechar='"',
-    int quoting=0,
-    bool doublequote=True,
-    object header="infer",
-    bool mangle_dupe_cols=True,
-    object usecols=None,
-    object sep=",",
-    object delimiter=None,
-    bool delim_whitespace=False,
-    bool skipinitialspace=False,
-    object names=None,
-    object dtype=None,
-    int skipfooter=0,
-    int skiprows=0,
-    bool dayfirst=False,
-    object compression="infer",
-    object thousands=None,
-    object decimal=".",
-    object true_values=None,
-    object false_values=None,
-    object nrows=None,
-    object byte_range=None,
-    bool skip_blank_lines=True,
-    object parse_dates=None,
-    object comment=None,
-    object na_values=None,
-    bool keep_default_na=True,
-    bool na_filter=True,
-    object prefix=None,
-    object index_col=None,
-):
-    """
-    Cython function to call into libcudf API, see `read_csv`.
-
-    See Also
-    --------
-    cudf.read_csv
-    """
-
-    if not isinstance(datasource, (BytesIO, StringIO, bytes)):
-        if not os.path.isfile(datasource):
-            raise FileNotFoundError(
-                errno.ENOENT, os.strerror(errno.ENOENT), datasource
-            )
-
-    if isinstance(datasource, StringIO):
-        datasource = datasource.read().encode()
-    elif isinstance(datasource, str) and not os.path.isfile(datasource):
-        datasource = datasource.encode()
-
-    validate_args(delimiter, sep, delim_whitespace, decimal, thousands,
-                  nrows, skipfooter, byte_range, skiprows)
-
-    # Alias sep -> delimiter.
-    if delimiter is None:
-        delimiter = sep
-
-    delimiter = str(delimiter)
-
-    if byte_range is None:
-        byte_range = (0, 0)
-
-    if compression is None:
-        c_compression = plc.io.types.CompressionType.NONE
-    else:
-        compression_map = {
-            "infer": plc.io.types.CompressionType.AUTO,
-            "gzip": plc.io.types.CompressionType.GZIP,
-            "bz2": plc.io.types.CompressionType.BZIP2,
-            "zip": plc.io.types.CompressionType.ZIP,
-        }
-        c_compression = compression_map[compression]
-
-    # We need this later when setting index cols
-    orig_header = header
-
-    if names is not None:
-        # explicitly mentioned name, so don't check header
-        if header is None or header == 'infer':
-            header = -1
-        else:
-            header = header
-        names = list(names)
-    else:
-        if header is None:
-            header = -1
-        elif header == 'infer':
-            header = 0
-
-    hex_cols = []
-
-    new_dtypes = []
-    if dtype is not None:
-        if isinstance(dtype, abc.Mapping):
-            new_dtypes = dict()
-            for k, v in dtype.items():
-                col_type = v
-                if is_hashable(v) and v in CSV_HEX_TYPE_MAP:
-                    col_type = CSV_HEX_TYPE_MAP[v]
-                    hex_cols.append(str(k))
-
-                new_dtypes[k] = _get_plc_data_type_from_dtype(
-                    cudf.dtype(col_type)
-                )
-        elif (
-            cudf.api.types.is_scalar(dtype) or
-            isinstance(dtype, (
-                np.dtype, pd.api.extensions.ExtensionDtype, type
-            ))
-        ):
-            if is_hashable(dtype) and dtype in CSV_HEX_TYPE_MAP:
-                dtype = CSV_HEX_TYPE_MAP[dtype]
-                hex_cols.append(0)
-
-            new_dtypes.append(
-                _get_plc_data_type_from_dtype(dtype)
-            )
-        elif isinstance(dtype, abc.Collection):
-            for index, col_dtype in enumerate(dtype):
-                if is_hashable(col_dtype) and col_dtype in CSV_HEX_TYPE_MAP:
-                    col_dtype = CSV_HEX_TYPE_MAP[col_dtype]
-                    hex_cols.append(index)
-
-                new_dtypes.append(
-                    _get_plc_data_type_from_dtype(col_dtype)
-                )
-        else:
-            raise ValueError(
-                "dtype should be a scalar/str/list-like/dict-like"
-            )
-    options = (
-        plc.io.csv.CsvReaderOptions.builder(plc.io.SourceInfo([datasource]))
-        .compression(c_compression)
-        .mangle_dupe_cols(mangle_dupe_cols)
-        .byte_range_offset(byte_range[0])
-        .byte_range_size(byte_range[1])
-        .nrows(nrows if nrows is not None else -1)
-        .skiprows(skiprows)
-        .skipfooter(skipfooter)
-        .quoting(quoting)
-        .lineterminator(str(lineterminator))
-        .quotechar(quotechar)
-        .decimal(decimal)
-        .delim_whitespace(delim_whitespace)
-        .skipinitialspace(skipinitialspace)
-        .skip_blank_lines(skip_blank_lines)
-        .doublequote(doublequote)
-        .keep_default_na(keep_default_na)
-        .na_filter(na_filter)
-        .dayfirst(dayfirst)
-        .build()
-    )
-
-    options.set_header(header)
-
-    if names is not None:
-        options.set_names([str(name) for name in names])
-
-    if prefix is not None:
-        options.set_prefix(prefix)
-
-    if usecols is not None:
-        if all(isinstance(col, int) for col in usecols):
-            options.set_use_cols_indexes(list(usecols))
-        else:
-            options.set_use_cols_names([str(name) for name in usecols])
-
-    if delimiter is not None:
-        options.set_delimiter(delimiter)
-
-    if thousands is not None:
-        options.set_thousands(thousands)
-
-    if comment is not None:
-        options.set_comment(comment)
-
-    if parse_dates is not None:
-        options.set_parse_dates(list(parse_dates))
-
-    if hex_cols is not None:
-        options.set_parse_hex(list(hex_cols))
-
-    options.set_dtypes(new_dtypes)
-
-    if true_values is not None:
-        options.set_true_values([str(val) for val in true_values])
-
-    if false_values is not None:
-        options.set_false_values([str(val) for val in false_values])
-
-    if na_values is not None:
-        options.set_na_values([str(val) for val in na_values])
-
-    df = cudf.DataFrame._from_data(
-        *data_from_pylibcudf_io(plc.io.csv.read_csv(options))
-    )
-
-    if dtype is not None:
-        if isinstance(dtype, abc.Mapping):
-            for k, v in dtype.items():
-                if isinstance(cudf.dtype(v), cudf.CategoricalDtype):
-                    df._data[str(k)] = df._data[str(k)].astype(v)
-        elif (
-            cudf.api.types.is_scalar(dtype) or
-            isinstance(dtype, (
-                np.dtype, pd.api.extensions.ExtensionDtype, type
-            ))
-        ):
-            if isinstance(cudf.dtype(dtype), cudf.CategoricalDtype):
-                df = df.astype(dtype)
-        elif isinstance(dtype, abc.Collection):
-            for index, col_dtype in enumerate(dtype):
-                if isinstance(cudf.dtype(col_dtype), cudf.CategoricalDtype):
-                    col_name = df._column_names[index]
-                    df._data[col_name] = df._data[col_name].astype(col_dtype)
-
-    if names is not None and len(names) and isinstance(names[0], int):
-        df.columns = [int(x) for x in df._data]
-    elif names is None and header == -1 and cudf.get_option("mode.pandas_compatible"):
-        df.columns = [int(x) for x in df._column_names]
-
-    # Set index if the index_col parameter is passed
-    if index_col is not None and index_col is not False:
-        if isinstance(index_col, int):
-            index_col_name = df._data.get_labels_by_index(index_col)[0]
-            df = df.set_index(index_col_name)
-            if isinstance(index_col_name, str) and \
-                    names is None and orig_header == "infer":
-                if index_col_name.startswith("Unnamed:"):
-                    # TODO: Try to upstream it to libcudf
-                    # csv reader in future
-                    df._index.name = None
-            elif names is None:
-                df._index.name = index_col
-        else:
-            df = df.set_index(index_col)
-
-    return df
-
-
-@acquire_spill_lock()
-def write_csv(
-    table,
-    object path_or_buf=None,
-    object sep=",",
-    object na_rep="",
-    bool header=True,
-    object lineterminator="\n",
-    int rows_per_chunk=8,
-    bool index=True,
-):
-    """
-    Cython function to call into libcudf API, see `write_csv`.
-
-    See Also
-    --------
-    cudf.to_csv
-    """
-    index_and_not_empty = index is True and table.index is not None
-    columns = [
-        col.to_pylibcudf(mode="read") for col in table.index._columns
-    ] if index_and_not_empty else []
-    columns.extend(col.to_pylibcudf(mode="read") for col in table._columns)
-    col_names = []
-    if header:
-        all_names = list(table.index.names) if index_and_not_empty else []
-        all_names.extend(
-            na_rep if name is None or pd.isnull(name)
-            else name for name in table._column_names
-        )
-        col_names = [
-            '""' if (name in (None, '') and len(all_names) == 1)
-            else (str(name) if name not in (None, '') else '')
-            for name in all_names
-        ]
-    try:
-        plc.io.csv.write_csv(
-            (
-                plc.io.csv.CsvWriterOptions.builder(
-                    plc.io.SinkInfo([path_or_buf]), plc.Table(columns)
-                )
-                .names(col_names)
-                .na_rep(na_rep)
-                .include_header(header)
-                .rows_per_chunk(rows_per_chunk)
-                .line_terminator(str(lineterminator))
-                .inter_column_delimiter(str(sep))
-                .true_value("True")
-                .false_value("False")
-                .build()
-            )
-        )
-    except OverflowError:
-        raise OverflowError(
-            f"Writing CSV file with chunksize={rows_per_chunk} failed. "
-            "Consider providing a smaller chunksize argument."
-        )
-
-
-cdef DataType _get_plc_data_type_from_dtype(object dtype) except *:
-    # TODO: Remove this work-around Dictionary types
-    # in libcudf are fully mapped to categorical columns:
-    # https://github.com/rapidsai/cudf/issues/3960
-    if isinstance(dtype, cudf.CategoricalDtype):
-        dtype = dtype.categories.dtype
-    elif dtype == "category":
-        dtype = "str"
-
-    if isinstance(dtype, str):
-        if str(dtype) == "date32":
-            return DataType(
-                libcudf_types.type_id.TIMESTAMP_DAYS
-            )
-        elif str(dtype) in ("date", "date64"):
-            return DataType(
-                libcudf_types.type_id.TIMESTAMP_MILLISECONDS
-            )
-        elif str(dtype) == "timestamp":
-            return DataType(
-                libcudf_types.type_id.TIMESTAMP_MILLISECONDS
-            )
-        elif str(dtype) == "timestamp[us]":
-            return DataType(
-                libcudf_types.type_id.TIMESTAMP_MICROSECONDS
-            )
-        elif str(dtype) == "timestamp[s]":
-            return DataType(
-                libcudf_types.type_id.TIMESTAMP_SECONDS
-            )
-        elif str(dtype) == "timestamp[ms]":
-            return DataType(
-                libcudf_types.type_id.TIMESTAMP_MILLISECONDS
-            )
-        elif str(dtype) == "timestamp[ns]":
-            return DataType(
-                libcudf_types.type_id.TIMESTAMP_NANOSECONDS
-            )
-
-    dtype = cudf.dtype(dtype)
-    return dtype_to_pylibcudf_type(dtype)
diff --git a/python/cudf/cudf/io/csv.py b/python/cudf/cudf/io/csv.py
index 3dc8915bfd1..da9a66f3874 100644
--- a/python/cudf/cudf/io/csv.py
+++ b/python/cudf/cudf/io/csv.py
@@ -1,57 +1,73 @@
 # Copyright (c) 2018-2024, NVIDIA CORPORATION.
+from __future__ import annotations
 
+import errno
+import itertools
+import os
 import warnings
 from collections import abc
 from io import BytesIO, StringIO
+from typing import cast
 
 import numpy as np
+import pandas as pd
+
+import pylibcudf as plc
 
 import cudf
-from cudf import _lib as libcudf
-from cudf.api.types import is_scalar
+from cudf._lib.types import dtype_to_pylibcudf_type
+from cudf._lib.utils import data_from_pylibcudf_io
+from cudf.api.types import is_hashable, is_scalar
+from cudf.core.buffer import acquire_spill_lock
 from cudf.utils import ioutils
 from cudf.utils.dtypes import _maybe_convert_to_default_type
 from cudf.utils.performance_tracking import _performance_tracking
 
+_CSV_HEX_TYPE_MAP = {
+    "hex": np.dtype("int64"),
+    "hex64": np.dtype("int64"),
+    "hex32": np.dtype("int32"),
+}
+
 
 @_performance_tracking
 @ioutils.doc_read_csv()
 def read_csv(
     filepath_or_buffer,
-    sep=",",
-    delimiter=None,
+    sep: str = ",",
+    delimiter: str | None = None,
     header="infer",
     names=None,
     index_col=None,
     usecols=None,
     prefix=None,
-    mangle_dupe_cols=True,
+    mangle_dupe_cols: bool = True,
     dtype=None,
     true_values=None,
     false_values=None,
-    skipinitialspace=False,
-    skiprows=0,
-    skipfooter=0,
-    nrows=None,
+    skipinitialspace: bool = False,
+    skiprows: int = 0,
+    skipfooter: int = 0,
+    nrows: int | None = None,
     na_values=None,
-    keep_default_na=True,
-    na_filter=True,
-    skip_blank_lines=True,
+    keep_default_na: bool = True,
+    na_filter: bool = True,
+    skip_blank_lines: bool = True,
     parse_dates=None,
-    dayfirst=False,
+    dayfirst: bool = False,
     compression="infer",
-    thousands=None,
-    decimal=".",
-    lineterminator="\n",
-    quotechar='"',
-    quoting=0,
-    doublequote=True,
-    comment=None,
-    delim_whitespace=False,
-    byte_range=None,
+    thousands: str | None = None,
+    decimal: str = ".",
+    lineterminator: str = "\n",
+    quotechar: str = '"',
+    quoting: int = 0,
+    doublequote: bool = True,
+    comment: str | None = None,
+    delim_whitespace: bool = False,
+    byte_range: list[int] | tuple[int, int] | None = None,
     storage_options=None,
-    bytes_per_thread=None,
-):
+    bytes_per_thread: int | None = None,
+) -> cudf.DataFrame:
     """{docstring}"""
 
     if delim_whitespace is not False:
@@ -77,60 +93,225 @@ def read_csv(
     if na_values is not None and is_scalar(na_values):
         na_values = [na_values]
 
-    df = libcudf.csv.read_csv(
-        filepath_or_buffer,
-        lineterminator=lineterminator,
-        quotechar=quotechar,
-        quoting=quoting,
-        doublequote=doublequote,
-        header=header,
-        mangle_dupe_cols=mangle_dupe_cols,
-        usecols=usecols,
-        sep=sep,
-        delimiter=delimiter,
-        delim_whitespace=delim_whitespace,
-        skipinitialspace=skipinitialspace,
-        names=names,
-        dtype=dtype,
-        skipfooter=skipfooter,
-        skiprows=skiprows,
-        dayfirst=dayfirst,
-        compression=compression,
-        thousands=thousands,
-        decimal=decimal,
-        true_values=true_values,
-        false_values=false_values,
-        nrows=nrows,
-        byte_range=byte_range,
-        skip_blank_lines=skip_blank_lines,
-        parse_dates=parse_dates,
-        comment=comment,
-        na_values=na_values,
-        keep_default_na=keep_default_na,
-        na_filter=na_filter,
-        prefix=prefix,
-        index_col=index_col,
+    if not isinstance(filepath_or_buffer, (BytesIO, StringIO, bytes)):
+        if not os.path.isfile(filepath_or_buffer):
+            raise FileNotFoundError(
+                errno.ENOENT, os.strerror(errno.ENOENT), filepath_or_buffer
+            )
+
+    if isinstance(filepath_or_buffer, StringIO):
+        filepath_or_buffer = filepath_or_buffer.read().encode()
+    elif isinstance(filepath_or_buffer, str) and not os.path.isfile(
+        filepath_or_buffer
+    ):
+        filepath_or_buffer = filepath_or_buffer.encode()
+
+    _validate_args(
+        delimiter,
+        sep,
+        delim_whitespace,
+        decimal,
+        thousands,
+        nrows,
+        skipfooter,
+        byte_range,
+        skiprows,
+    )
+
+    # Alias sep -> delimiter.
+    if delimiter is None:
+        delimiter = sep
+
+    delimiter = str(delimiter)
+
+    if byte_range is None:
+        byte_range = (0, 0)
+
+    if compression is None:
+        c_compression = plc.io.types.CompressionType.NONE
+    else:
+        compression_map = {
+            "infer": plc.io.types.CompressionType.AUTO,
+            "gzip": plc.io.types.CompressionType.GZIP,
+            "bz2": plc.io.types.CompressionType.BZIP2,
+            "zip": plc.io.types.CompressionType.ZIP,
+        }
+        c_compression = compression_map[compression]
+
+    # We need this later when setting index cols
+    orig_header = header
+
+    if names is not None:
+        # explicitly mentioned name, so don't check header
+        if header is None or header == "infer":
+            header = -1
+        else:
+            header = header
+        names = list(names)
+    else:
+        if header is None:
+            header = -1
+        elif header == "infer":
+            header = 0
+
+    hex_cols: list[abc.Hashable] = []
+    new_dtypes: list[plc.DataType] | dict[abc.Hashable, plc.DataType] = []
+    if dtype is not None:
+        if isinstance(dtype, abc.Mapping):
+            new_dtypes = {}
+            for k, col_type in dtype.items():
+                if is_hashable(col_type) and col_type in _CSV_HEX_TYPE_MAP:
+                    col_type = _CSV_HEX_TYPE_MAP[col_type]
+                    hex_cols.append(str(k))
+
+                new_dtypes[k] = _get_plc_data_type_from_dtype(
+                    cudf.dtype(col_type)
+                )
+        elif cudf.api.types.is_scalar(dtype) or isinstance(
+            dtype, (np.dtype, pd.api.extensions.ExtensionDtype, type)
+        ):
+            if is_hashable(dtype) and dtype in _CSV_HEX_TYPE_MAP:
+                dtype = _CSV_HEX_TYPE_MAP[dtype]
+                hex_cols.append(0)
+
+            cast(list, new_dtypes).append(_get_plc_data_type_from_dtype(dtype))
+        elif isinstance(dtype, abc.Collection):
+            for index, col_dtype in enumerate(dtype):
+                if is_hashable(col_dtype) and col_dtype in _CSV_HEX_TYPE_MAP:
+                    col_dtype = _CSV_HEX_TYPE_MAP[col_dtype]
+                    hex_cols.append(index)
+
+                new_dtypes.append(_get_plc_data_type_from_dtype(col_dtype))
+        else:
+            raise ValueError(
+                "dtype should be a scalar/str/list-like/dict-like"
+            )
+    options = (
+        plc.io.csv.CsvReaderOptions.builder(
+            plc.io.SourceInfo([filepath_or_buffer])
+        )
+        .compression(c_compression)
+        .mangle_dupe_cols(mangle_dupe_cols)
+        .byte_range_offset(byte_range[0])
+        .byte_range_size(byte_range[1])
+        .nrows(nrows if nrows is not None else -1)
+        .skiprows(skiprows)
+        .skipfooter(skipfooter)
+        .quoting(quoting)
+        .lineterminator(str(lineterminator))
+        .quotechar(quotechar)
+        .decimal(decimal)
+        .delim_whitespace(delim_whitespace)
+        .skipinitialspace(skipinitialspace)
+        .skip_blank_lines(skip_blank_lines)
+        .doublequote(doublequote)
+        .keep_default_na(keep_default_na)
+        .na_filter(na_filter)
+        .dayfirst(dayfirst)
+        .build()
+    )
+
+    options.set_header(header)
+
+    if names is not None:
+        options.set_names([str(name) for name in names])
+
+    if prefix is not None:
+        options.set_prefix(prefix)
+
+    if usecols is not None:
+        if all(isinstance(col, int) for col in usecols):
+            options.set_use_cols_indexes(list(usecols))
+        else:
+            options.set_use_cols_names([str(name) for name in usecols])
+
+    if delimiter is not None:
+        options.set_delimiter(delimiter)
+
+    if thousands is not None:
+        options.set_thousands(thousands)
+
+    if comment is not None:
+        options.set_comment(comment)
+
+    if parse_dates is not None:
+        options.set_parse_dates(list(parse_dates))
+
+    if hex_cols is not None:
+        options.set_parse_hex(list(hex_cols))
+
+    options.set_dtypes(new_dtypes)
+
+    if true_values is not None:
+        options.set_true_values([str(val) for val in true_values])
+
+    if false_values is not None:
+        options.set_false_values([str(val) for val in false_values])
+
+    if na_values is not None:
+        options.set_na_values([str(val) for val in na_values])
+
+    df = cudf.DataFrame._from_data(
+        *data_from_pylibcudf_io(plc.io.csv.read_csv(options))
     )
 
+    if isinstance(dtype, abc.Mapping):
+        for k, v in dtype.items():
+            if isinstance(cudf.dtype(v), cudf.CategoricalDtype):
+                df._data[str(k)] = df._data[str(k)].astype(v)
+    elif dtype == "category" or isinstance(dtype, cudf.CategoricalDtype):
+        df = df.astype(dtype)
+    elif isinstance(dtype, abc.Collection) and not is_scalar(dtype):
+        for index, col_dtype in enumerate(dtype):
+            if isinstance(cudf.dtype(col_dtype), cudf.CategoricalDtype):
+                col_name = df._column_names[index]
+                df._data[col_name] = df._data[col_name].astype(col_dtype)
+
+    if names is not None and len(names) and isinstance(names[0], int):
+        df.columns = [int(x) for x in df._data]
+    elif (
+        names is None
+        and header == -1
+        and cudf.get_option("mode.pandas_compatible")
+    ):
+        df.columns = [int(x) for x in df._column_names]
+
+    # Set index if the index_col parameter is passed
+    if index_col is not None and index_col is not False:
+        if isinstance(index_col, int):
+            index_col_name = df._data.get_labels_by_index(index_col)[0]
+            df = df.set_index(index_col_name)
+            if (
+                isinstance(index_col_name, str)
+                and names is None
+                and orig_header == "infer"
+            ):
+                if index_col_name.startswith("Unnamed:"):
+                    # TODO: Try to upstream it to libcudf
+                    # csv reader in future
+                    df.index.name = None
+            elif names is None:
+                df.index.name = index_col
+        else:
+            df = df.set_index(index_col)
+
     if dtype is None or isinstance(dtype, abc.Mapping):
         # There exists some dtypes in the result columns that is inferred.
         # Find them and map them to the default dtypes.
         specified_dtypes = {} if dtype is None else dtype
-        unspecified_dtypes = {
-            name: dtype
-            for name, dtype in df._dtypes
-            if name not in specified_dtypes
-        }
         default_dtypes = {}
-
-        for name, dt in unspecified_dtypes.items():
-            if dt == np.dtype("i1"):
+        for name, dt in df._dtypes:
+            if name in specified_dtypes:
+                continue
+            elif dt == np.dtype("i1"):
                 # csv reader reads all null column as int8.
                 # The dtype should remain int8.
                 default_dtypes[name] = dt
             else:
                 default_dtypes[name] = _maybe_convert_to_default_type(dt)
-        df = df.astype(default_dtypes)
+
+        if default_dtypes:
+            df = df.astype(default_dtypes)
 
     return df
 
@@ -138,17 +319,17 @@ def read_csv(
 @_performance_tracking
 @ioutils.doc_to_csv()
 def to_csv(
-    df,
+    df: cudf.DataFrame,
     path_or_buf=None,
-    sep=",",
-    na_rep="",
+    sep: str = ",",
+    na_rep: str = "",
     columns=None,
-    header=True,
-    index=True,
+    header: bool = True,
+    index: bool = True,
     encoding=None,
     compression=None,
-    lineterminator="\n",
-    chunksize=None,
+    lineterminator: str = "\n",
+    chunksize: int | None = None,
     storage_options=None,
 ):
     """{docstring}"""
@@ -187,15 +368,10 @@ def to_csv(
             )
 
     for _, dtype in df._dtypes:
-        if isinstance(dtype, cudf.ListDtype):
-            raise NotImplementedError(
-                "Writing to csv format is not yet supported with "
-                "list columns."
-            )
-        elif isinstance(dtype, cudf.StructDtype):
+        if isinstance(dtype, (cudf.ListDtype, cudf.StructDtype)):
             raise NotImplementedError(
                 "Writing to csv format is not yet supported with "
-                "Struct columns."
+                f"{dtype} columns."
             )
 
     # TODO: Need to typecast categorical columns to the underlying
@@ -208,7 +384,7 @@ def to_csv(
         df = df.copy(deep=False)
         for col_name, col in df._column_labels_and_values:
             if isinstance(col.dtype, cudf.CategoricalDtype):
-                df._data[col_name] = col.astype(col.categories.dtype)
+                df._data[col_name] = col.astype(col.dtype.categories.dtype)
 
         if isinstance(df.index, cudf.CategoricalIndex):
             df.index = df.index.astype(df.index.categories.dtype)
@@ -218,7 +394,7 @@ def to_csv(
     if ioutils.is_fsspec_open_file(path_or_buf):
         with path_or_buf as file_obj:
             file_obj = ioutils.get_IOBase_writer(file_obj)
-            libcudf.csv.write_csv(
+            _plc_write_csv(
                 df,
                 path_or_buf=file_obj,
                 sep=sep,
@@ -229,7 +405,7 @@ def to_csv(
                 index=index,
             )
     else:
-        libcudf.csv.write_csv(
+        _plc_write_csv(
             df,
             path_or_buf=path_or_buf,
             sep=sep,
@@ -243,3 +419,127 @@ def to_csv(
     if return_as_string:
         path_or_buf.seek(0)
         return path_or_buf.read()
+
+
+@acquire_spill_lock()
+def _plc_write_csv(
+    table: cudf.DataFrame,
+    path_or_buf=None,
+    sep: str = ",",
+    na_rep: str = "",
+    header: bool = True,
+    lineterminator: str = "\n",
+    rows_per_chunk: int = 8,
+    index: bool = True,
+) -> None:
+    iter_columns = (
+        itertools.chain(table.index._columns, table._columns)
+        if index
+        else table._columns
+    )
+    columns = [col.to_pylibcudf(mode="read") for col in iter_columns]
+    col_names = []
+    if header:
+        table_names = (
+            na_rep if name is None or pd.isnull(name) else name
+            for name in table._column_names
+        )
+        iter_names = (
+            itertools.chain(table.index.names, table_names)
+            if index
+            else table_names
+        )
+        all_names = list(iter_names)
+        col_names = [
+            '""'
+            if (name in (None, "") and len(all_names) == 1)
+            else (str(name) if name not in (None, "") else "")
+            for name in all_names
+        ]
+    try:
+        plc.io.csv.write_csv(
+            (
+                plc.io.csv.CsvWriterOptions.builder(
+                    plc.io.SinkInfo([path_or_buf]), plc.Table(columns)
+                )
+                .names(col_names)
+                .na_rep(na_rep)
+                .include_header(header)
+                .rows_per_chunk(rows_per_chunk)
+                .line_terminator(str(lineterminator))
+                .inter_column_delimiter(str(sep))
+                .true_value("True")
+                .false_value("False")
+                .build()
+            )
+        )
+    except OverflowError as err:
+        raise OverflowError(
+            f"Writing CSV file with chunksize={rows_per_chunk} failed. "
+            "Consider providing a smaller chunksize argument."
+        ) from err
+
+
+def _validate_args(
+    delimiter: str | None,
+    sep: str,
+    delim_whitespace: bool,
+    decimal: str,
+    thousands: str | None,
+    nrows: int | None,
+    skipfooter: int,
+    byte_range: list[int] | tuple[int, int] | None,
+    skiprows: int,
+) -> None:
+    if delim_whitespace:
+        if delimiter is not None:
+            raise ValueError("cannot set both delimiter and delim_whitespace")
+        if sep != ",":
+            raise ValueError("cannot set both sep and delim_whitespace")
+
+    # Alias sep -> delimiter.
+    actual_delimiter = delimiter if delimiter else sep
+
+    if decimal == actual_delimiter:
+        raise ValueError("decimal cannot be the same as delimiter")
+
+    if thousands == actual_delimiter:
+        raise ValueError("thousands cannot be the same as delimiter")
+
+    if nrows is not None and skipfooter != 0:
+        raise ValueError("cannot use both nrows and skipfooter parameters")
+
+    if byte_range is not None:
+        if skipfooter != 0 or skiprows != 0 or nrows is not None:
+            raise ValueError(
+                "cannot manually limit rows to be read when using the byte range parameter"
+            )
+
+
+def _get_plc_data_type_from_dtype(dtype) -> plc.DataType:
+    # TODO: Remove this work-around Dictionary types
+    # in libcudf are fully mapped to categorical columns:
+    # https://github.com/rapidsai/cudf/issues/3960
+    if isinstance(dtype, cudf.CategoricalDtype):
+        dtype = dtype.categories.dtype
+    elif dtype == "category":
+        dtype = "str"
+
+    if isinstance(dtype, str):
+        if dtype == "date32":
+            return plc.DataType(plc.types.TypeId.TIMESTAMP_DAYS)
+        elif dtype in ("date", "date64"):
+            return plc.DataType(plc.types.TypeId.TIMESTAMP_MILLISECONDS)
+        elif dtype == "timestamp":
+            return plc.DataType(plc.types.TypeId.TIMESTAMP_MILLISECONDS)
+        elif dtype == "timestamp[us]":
+            return plc.DataType(plc.types.TypeId.TIMESTAMP_MICROSECONDS)
+        elif dtype == "timestamp[s]":
+            return plc.DataType(plc.types.TypeId.TIMESTAMP_SECONDS)
+        elif dtype == "timestamp[ms]":
+            return plc.DataType(plc.types.TypeId.TIMESTAMP_MILLISECONDS)
+        elif dtype == "timestamp[ns]":
+            return plc.DataType(plc.types.TypeId.TIMESTAMP_NANOSECONDS)
+
+    dtype = cudf.dtype(dtype)
+    return dtype_to_pylibcudf_type(dtype)