Skip to content

Commit

Permalink
PERF-#6668: Use copy=False for internal usage of set_axis (#6667)
Browse files Browse the repository at this point in the history
Signed-off-by: Anatoly Myachev <[email protected]>
  • Loading branch information
anmyachev authored Oct 31, 2023
1 parent dcd750c commit 521eb60
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 2 deletions.
7 changes: 6 additions & 1 deletion modin/core/dataframe/pandas/dataframe/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -808,7 +808,12 @@ def _propagate_index_objs(self, axis=None):
if axis is None:

def apply_idx_objs(df, idx, cols):
return df.set_axis(idx, axis="index").set_axis(cols, axis="columns")
# We should make at least one copy to avoid the data modification problem
# that may arise when sharing buffers from distributed storage
# (zero-copy pickling).
return df.set_axis(idx, axis="index").set_axis(
cols, axis="columns", copy=False
)

self._partitions = np.array(
[
Expand Down
4 changes: 3 additions & 1 deletion modin/pandas/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1990,7 +1990,9 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
# because there is no need to identify which original column's aggregation
# the new column represents. alternatively we could give the query compiler
# a hint that it's for a series, not a dataframe.
return result.set_axis(labels=self._try_get_str_func(func), axis=1)
return result.set_axis(
labels=self._try_get_str_func(func), axis=1, copy=False
)
else:
return super().aggregate(
func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs
Expand Down

0 comments on commit 521eb60

Please sign in to comment.