diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index 71ec11e75af..a0cf38c6f51 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -1193,7 +1193,7 @@ def _concat( f"size > {libcudf.MAX_COLUMN_SIZE_STR}" ) elif newsize == 0: - codes_col = column.column_empty(0, head.codes.dtype, masked=True) + codes_col = column.column_empty(0, head.codes.dtype) else: codes_col = column.concat_columns(codes) # type: ignore[arg-type] diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 42b4fda8be2..624a3ac95ed 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -551,7 +551,7 @@ def slice(self, start: int, stop: int, stride: int | None = None) -> Self: if stop < 0 and not (stride < 0 and stop == -1): stop = stop + len(self) if (stride > 0 and start >= stop) or (stride < 0 and start <= stop): - return cast(Self, column_empty(0, self.dtype, masked=True)) + return cast(Self, column_empty(0, self.dtype)) # compute mask slice if stride == 1: return libcudf.copying.column_slice(self, [start, stop])[ @@ -1054,7 +1054,7 @@ def astype(self, dtype: Dtype, copy: bool = False) -> ColumnBase: if self.dtype == dtype: result = self else: - result = column_empty(0, dtype=dtype, masked=self.nullable) + result = column_empty(0, dtype=dtype) elif dtype == "category": # TODO: Figure out why `cudf.dtype("category")` # astype's different than just the string @@ -1625,7 +1625,6 @@ def _has_any_nan(arbitrary: pd.Series | np.ndarray) -> bool: def column_empty( row_count: int, dtype: Dtype = "object", - masked: bool = False, for_numba: bool = False, ) -> ColumnBase: """ @@ -1642,9 +1641,6 @@ def column_empty( dtype : Dtype Type of the column. - masked : bool - Unused. - for_numba : bool, default False If True, don't allocate a mask as it's not supported by numba. """ @@ -2420,7 +2416,7 @@ def concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase: """Concatenate a sequence of columns.""" if len(objs) == 0: dtype = cudf.dtype(None) - return column_empty(0, dtype=dtype, masked=True) + return column_empty(0, dtype=dtype) # If all columns are `NumericalColumn` with different dtypes, # we cast them to a common dtype. @@ -2467,7 +2463,7 @@ def concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase: f"size > {libcudf.MAX_COLUMN_SIZE_STR}" ) elif newsize == 0: - return column_empty(0, head.dtype, masked=True) + return column_empty(0, head.dtype) # Filter out inputs that have 0 length, then concatenate. objs_with_len = [o for o in objs if len(o)] diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index b526a6efa51..81b82040b8d 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -598,14 +598,12 @@ def strftime(self, format: str) -> cudf.core.column.StringColumn: if len(self) == 0: return cast( cudf.core.column.StringColumn, - column.column_empty(0, dtype="object", masked=False), + column.column_empty(0, dtype="object"), ) if format in _DATETIME_SPECIAL_FORMATS: names = as_column(_DATETIME_NAMES) else: - names = cudf.core.column.column_empty( - 0, dtype="object", masked=False - ) + names = column.column_empty(0, dtype="object") return string._datetime_to_str_typecast_functions[self.dtype]( self, format, names ) diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py index e06a0447f5c..7a39355dd50 100644 --- a/python/cudf/cudf/core/column/numerical_base.py +++ b/python/cudf/cudf/core/column/numerical_base.py @@ -139,7 +139,7 @@ def quantile( result = cast( NumericalBaseColumn, cudf.core.column.column_empty( - row_count=len(q), dtype=self.dtype, masked=True + row_count=len(q), dtype=self.dtype ), ) else: diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index c021554f3bd..d76caa5c3b8 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -5855,7 +5855,7 @@ def strptime( f"dtype must be datetime or timedelta type, not {dtype}" ) elif self.null_count == len(self): - return column.column_empty(len(self), dtype=dtype, masked=True) # type: ignore[return-value] + return column.column_empty(len(self), dtype=dtype) # type: ignore[return-value] elif (self == "None").any(): raise ValueError( "Cannot convert `None` value to datetime or timedelta." diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py index f3a7916aa35..8b1515acae2 100644 --- a/python/cudf/cudf/core/column/timedelta.py +++ b/python/cudf/cudf/core/column/timedelta.py @@ -294,7 +294,7 @@ def strftime(self, format: str) -> cudf.core.column.StringColumn: if len(self) == 0: return cast( cudf.core.column.StringColumn, - column.column_empty(0, dtype="object", masked=False), + column.column_empty(0, dtype="object"), ) else: return string._timedelta_to_str_typecast_functions[self.dtype]( diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 8cdc45e12da..fce361e18ea 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -774,9 +774,7 @@ def __init__( label_dtype = getattr(columns, "dtype", None) self._data = ColumnAccessor( { - k: column.column_empty( - len(self), dtype="object", masked=True - ) + k: column_empty(len(self), dtype="object") for k in columns }, level_names=tuple(columns.names) @@ -979,8 +977,8 @@ def _init_from_series_list(self, data, columns, index): if columns is not None: for col_name in columns: if col_name not in self._data: - self._data[col_name] = column.column_empty( - row_count=len(self), dtype=None, masked=True + self._data[col_name] = column_empty( + row_count=len(self), dtype=None ) self._data._level_names = ( tuple(columns.names) @@ -1031,11 +1029,7 @@ def _init_from_list_like(self, data, index=None, columns=None): data = list(itertools.zip_longest(*data)) if columns is not None and len(data) == 0: - data = [ - cudf.core.column.column_empty(row_count=0, dtype=None) - for _ in columns - ] - + data = [column_empty(row_count=0, dtype=None) for _ in columns] for col_name, col in enumerate(data): self._data[col_name] = column.as_column(col) self._data.rangeindex = True @@ -1074,9 +1068,8 @@ def _init_from_dict_like( # the provided index, so we need to return a masked # array of nulls if an index is given. empty_column = functools.partial( - cudf.core.column.column_empty, - row_count=(0 if index is None else len(index)), - masked=index is not None, + column_empty, + row_count=0 if index is None else len(index), ) data = { @@ -1421,7 +1414,7 @@ def __setitem__(self, arg, value): new_columns = ( value if key == arg - else column.column_empty( + else column_empty( row_count=length, dtype=col.dtype ) for key, col in self._column_labels_and_values @@ -3373,7 +3366,7 @@ def _insert(self, loc, name, value, nan_as_null=None, ignore_index=True): if num_cols != 0: ca = self._data._from_columns_like_self( ( - column.column_empty(row_count=length, dtype=dtype) + column_empty(row_count=length, dtype=dtype) for _, dtype in self._dtypes ), verify=False, @@ -3479,7 +3472,7 @@ def diff(self, periods=1, axis=0): if abs(periods) > len(self): df = cudf.DataFrame._from_data( { - name: column_empty(len(self), dtype=dtype, masked=True) + name: column_empty(len(self), dtype=dtype) for name, dtype in zip(self._column_names, self.dtypes) } ) @@ -3859,9 +3852,7 @@ def agg(self, aggs, axis=None): result = DataFrame(index=idxs, columns=cols) for key in aggs.keys(): col = self[key] - col_empty = column_empty( - len(idxs), dtype=col.dtype, masked=True - ) + col_empty = column_empty(len(idxs), dtype=col.dtype) ans = cudf.Series._from_column( col_empty, index=cudf.Index(idxs) ) @@ -6177,9 +6168,7 @@ def quantile( quant_index=False, )._column if len(res) == 0: - res = column.column_empty( - row_count=len(qs), dtype=ser.dtype - ) + res = column_empty(row_count=len(qs), dtype=ser.dtype) result[k] = res result = DataFrame._from_data(result) @@ -7333,9 +7322,7 @@ def unnamed_group_generator(): ) all_nulls = functools.cache( - functools.partial( - column_empty, self.shape[0], common_type, masked=True - ) + functools.partial(column_empty, self.shape[0], common_type) ) # homogenize the dtypes of the columns @@ -8582,7 +8569,7 @@ def _cast_cols_to_common_dtypes(col_idxs, list_of_columns, dtypes, categories): # If column not in this df, fill with an all-null column if idx >= len(cols) or cols[idx] is None: n = len(next(x for x in cols if x is not None)) - cols[idx] = column_empty(row_count=n, dtype=dtype, masked=True) + cols[idx] = column_empty(row_count=n, dtype=dtype) else: # If column is categorical, rebase the codes with the # combined categories, and cast the new codes to the diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py index 9bb29f1920a..971f0be77f8 100644 --- a/python/cudf/cudf/core/dtypes.py +++ b/python/cudf/cudf/core/dtypes.py @@ -189,9 +189,7 @@ def categories(self) -> cudf.Index: Index(['b', 'a'], dtype='object') """ if self._categories is None: - col = cudf.core.column.column_empty( - 0, dtype="object", masked=False - ) + col = cudf.core.column.column_empty(0, dtype="object") else: col = self._categories return cudf.Index._from_column(col) diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index d4f3394833a..a8d82f977d5 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -493,9 +493,7 @@ def size(self): """ Return the size of each group. """ - col = cudf.core.column.column_empty( - len(self.obj), "int8", masked=False - ) + col = cudf.core.column.column_empty(len(self.obj), "int8") result = ( cudf.Series._from_column(col, name=getattr(self.obj, "name", None)) .groupby(self.grouping, sort=self._sort, dropna=self._dropna) @@ -523,7 +521,8 @@ def cumcount(self, ascending: bool = True): return ( cudf.Series._from_column( cudf.core.column.column_empty( - len(self.obj), "int8", masked=False + len(self.obj), + "int8", ), index=self.obj.index, ) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index eeb6e3bd547..8d3ef1036d1 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -336,7 +336,7 @@ def _values(self) -> ColumnBase: if len(self) > 0: return column.as_column(self._range, dtype=self.dtype) else: - return column.column_empty(0, masked=False, dtype=self.dtype) + return column.column_empty(0, dtype=self.dtype) def _clean_nulls_from_index(self) -> Self: return self diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 0e6a5e03ea6..81d954960e2 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -3851,7 +3851,6 @@ def _reindex( if name in df._data else cudf.core.column.column.column_empty( dtype=dtypes.get(name, np.float64), - masked=True, row_count=len(index), ) ) diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py index 66095d4a155..153ee0fa01a 100644 --- a/python/cudf/cudf/io/parquet.py +++ b/python/cudf/cudf/io/parquet.py @@ -1139,7 +1139,6 @@ def _parquet_to_frame( dfs[-1][name] = column_empty( row_count=_len, dtype=_dtype, - masked=True, ) else: dfs[-1][name] = as_column(