Skip to content

Commit

Permalink
cleanup
Browse files Browse the repository at this point in the history
Signed-off-by: Anatoly Myachev <[email protected]>
  • Loading branch information
anmyachev committed May 13, 2024
1 parent 9c6ce78 commit a04b0a2
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 23 deletions.
8 changes: 1 addition & 7 deletions modin/core/storage_formats/base/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import abc
import warnings
from functools import cached_property
from typing import TYPE_CHECKING, Hashable, List, Optional
from typing import Hashable, List, Optional

import numpy as np
import pandas
Expand Down Expand Up @@ -53,10 +53,6 @@

from . import doc_utils

if TYPE_CHECKING:
# TODO: should be ModinDataframe
from modin.core.dataframe.pandas.dataframe.dataframe import PandasDataframe


def _get_axis(axis):
"""
Expand Down Expand Up @@ -131,8 +127,6 @@ class BaseQueryCompiler(
for a list of requirements for subclassing this object.
"""

_modin_frame: PandasDataframe

def __wrap_in_qc(self, obj):
"""
Wrap `obj` in query compiler.
Expand Down
10 changes: 5 additions & 5 deletions modin/core/storage_formats/pandas/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -945,7 +945,7 @@ def compute_dtypes_fn(dtypes, axis, **kwargs):
):
return np.object_
# how to take into account backend here?
return "float64"
return np.float64

return TreeReduce.register(
map_fn,
Expand Down Expand Up @@ -2141,7 +2141,7 @@ def searchsorted(df):
dt_month_name = Map.register(_dt_func_map("month_name"), dtypes=np.object_)
dt_day_name = Map.register(_dt_func_map("day_name"), dtypes=np.object_)
dt_to_pytimedelta = Map.register(_dt_func_map("to_pytimedelta"), dtypes=np.object_)
dt_total_seconds = Map.register(_dt_func_map("total_seconds"), dtypes="float64")
dt_total_seconds = Map.register(_dt_func_map("total_seconds"), dtypes=np.float64)
dt_seconds = Map.register(_dt_prop_map("seconds"), dtypes=np.int64)
dt_days = Map.register(_dt_prop_map("days"), dtypes=np.int64)
dt_microseconds = Map.register(_dt_prop_map("microseconds"), dtypes=np.int64)
Expand Down Expand Up @@ -2323,7 +2323,7 @@ def map_func(df): # pragma: no cover
# Does it work with pyarrow backend?
df_mask = np.isfinite(df)

result = np.empty((n_rows, n_cols), dtype="float64")
result = np.empty((n_rows, n_cols), dtype=np.float64)

for i in range(n_rows):
df_ith_row = df[i]
Expand Down Expand Up @@ -2679,7 +2679,7 @@ def quantile_builder(df, **kwargs):
lambda df: quantile_builder(df, **kwargs),
new_index=q_index,
new_columns=new_columns,
dtypes="float64",
dtypes=np.float64,
)
result = self.__constructor__(new_modin_frame)
return result.transpose() if axis == 1 else result
Expand All @@ -2696,7 +2696,7 @@ def rank(self, **kwargs):
if not numeric_only
else None
),
dtypes="float64",
dtypes=np.float64,
sync_labels=False,
)
return self.__constructor__(new_modin_frame)
Expand Down
4 changes: 2 additions & 2 deletions modin/pandas/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1623,7 +1623,7 @@ def prod(
and numeric_only is False
and min_count > len(axis_to_apply)
# Type inference is not so simple for pyarrow
and self._query_compiler.get_backend() == "default"
and self._query_compiler.get_backend() is not None
):
new_index = self.columns if not axis else self.index
# >>> pd.DataFrame([1,2,3,4], dtype="int64[pyarrow]").prod(min_count=10)
Expand Down Expand Up @@ -2153,7 +2153,7 @@ def sum(
and numeric_only is False
and min_count > len(axis_to_apply)
# Type inference is not so simple for pyarrow
and self._query_compiler.get_backend() == "default"
and self._query_compiler.get_backend() is not None
):
new_index = self.columns if not axis else self.index
return Series(
Expand Down
10 changes: 1 addition & 9 deletions modin/tests/pandas/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1088,11 +1088,7 @@ def eval_io_from_str(csv_str: str, unique_filename: str, **kwargs):


def create_test_dfs(*args, **kwargs) -> tuple[pd.DataFrame, pandas.DataFrame]:
post_fn = kwargs.pop("post_fn", None)

if post_fn is None:
# TODO: REVERT ME
post_fn = lambda df: df.convert_dtypes(dtype_backend="pyarrow") # noqa: E731
post_fn = kwargs.pop("post_fn", lambda df: df)
return tuple(
map(post_fn, [pd.DataFrame(*args, **kwargs), pandas.DataFrame(*args, **kwargs)])
)
Expand All @@ -1108,10 +1104,6 @@ def create_test_series(vals, sort=False, **kwargs) -> tuple[pd.Series, pandas.Se
if sort:
modin_series = modin_series.sort_values().reset_index(drop=True)
pandas_series = pandas_series.sort_values().reset_index(drop=True)

# TODO: REVERT ME
modin_series = modin_series.convert_dtypes(dtype_backend="pyarrow")
pandas_series = pandas_series.convert_dtypes(dtype_backend="pyarrow")
return modin_series, pandas_series


Expand Down

0 comments on commit a04b0a2

Please sign in to comment.