Skip to content

Commit

Permalink
new approach
Browse files Browse the repository at this point in the history
Signed-off-by: Anatoly Myachev <[email protected]>
  • Loading branch information
anmyachev committed May 12, 2024
1 parent 8b93500 commit ae861e3
Show file tree
Hide file tree
Showing 8 changed files with 40 additions and 94 deletions.
19 changes: 2 additions & 17 deletions modin/core/dataframe/algebra/binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@ def maybe_compute_dtypes_common_cast(
# belong to the intersection, these will be NaN columns in the result
mismatch_columns = columns_first ^ columns_second
elif isinstance(second, dict):
# TODO: pyarrow backend
dtypes_second = {
key: pandas.api.types.pandas_dtype(type(value))
for key, value in second.items()
Expand All @@ -98,7 +97,6 @@ def maybe_compute_dtypes_common_cast(
mismatch_columns = columns_first.difference(columns_second)
else:
if isinstance(second, (list, tuple)):
# TODO: pyarrow backend
second_dtypes_list = (
[pandas.api.types.pandas_dtype(type(value)) for value in second]
if axis == 1
Expand All @@ -107,7 +105,6 @@ def maybe_compute_dtypes_common_cast(
else [np.array(second).dtype] * len(dtypes_first)
)
elif is_scalar(second) or isinstance(second, np.ndarray):
# TODO: pyarrow backend
try:
dtype = getattr(second, "dtype", None) or pandas.api.types.pandas_dtype(
type(second)
Expand All @@ -133,7 +130,6 @@ def maybe_compute_dtypes_common_cast(
mismatch_columns = []

# If at least one column doesn't match, the result of the non matching column would be nan.
# TODO: pyarrow backend
nan_dtype = pandas.api.types.pandas_dtype(type(np.nan))
dtypes = None
if func is not None:
Expand Down Expand Up @@ -249,7 +245,7 @@ def try_compute_new_dtypes(
infer_dtypes : {"common_cast", "try_sample", "bool", None}, default: None
How dtypes should be infered (see ``Binary.register`` doc for more info).
result_dtype : np.dtype, optional
NumPy dtype of the result. If not specified it will be inferred from the `infer_dtypes` parameter. Only NumPy?
NumPy dtype of the result. If not specified it will be inferred from the `infer_dtypes` parameter.
axis : int, default: 0
Axis to perform the binary operation along.
func : callable(pandas.DataFrame, pandas.DataFrame) -> pandas.DataFrame, optional
Expand All @@ -264,19 +260,8 @@ def try_compute_new_dtypes(

try:
if infer_dtypes == "bool" or is_bool_dtype(result_dtype):
# dataframe can contain types of different backends at the same time, for example:
# (Pdb) (pandas.DataFrame([[1,2,3], [4,5,6]]).astype({0: "int64[pyarrow]"}) > 4).dtypes
# 0 bool[pyarrow]
# 1 bool
# 2 bool
# dtype: object
backend = ""
if any("pyarrow" in str(x) for x in first.dtypes) or any(
"pyarrow" in str(x) for x in second.dtypes
):
backend = "[pyarrow]"
dtypes = maybe_build_dtypes_series(
first, second, dtype=pandas.api.types.pandas_dtype(f"bool{backend}")
first, second, dtype=pandas.api.types.pandas_dtype(bool)
)
elif infer_dtypes == "common_cast":
dtypes = maybe_compute_dtypes_common_cast(
Expand Down
13 changes: 2 additions & 11 deletions modin/core/dataframe/algebra/fold.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,14 @@

"""Module houses builder class for Fold operator."""

from __future__ import annotations

from typing import TYPE_CHECKING, Callable

from .operator import Operator

if TYPE_CHECKING:
from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler


class Fold(Operator):
"""Builder class for Fold functions."""

@classmethod
def register(cls, fold_function) -> Callable[..., PandasQueryCompiler]:
def register(cls, fold_function):
"""
Build Fold operator that will be performed across rows/columns.
Expand All @@ -42,9 +35,7 @@ def register(cls, fold_function) -> Callable[..., PandasQueryCompiler]:
Function that takes query compiler and executes Fold function.
"""

def caller(
query_compiler: PandasQueryCompiler, fold_axis=None, *args, **kwargs
) -> PandasQueryCompiler:
def caller(query_compiler, fold_axis=None, *args, **kwargs):
"""
Execute Fold function against passed query compiler.
Expand Down
9 changes: 1 addition & 8 deletions modin/core/dataframe/algebra/map.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,8 @@

"""Module houses builder class for Map operator."""

from __future__ import annotations

from typing import TYPE_CHECKING

from .operator import Operator

if TYPE_CHECKING:
from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler


class Map(Operator):
"""Builder class for Map operator."""
Expand All @@ -48,7 +41,7 @@ def register(cls, function, *call_args, **call_kwds):
Function that takes query compiler and executes map function.
"""

def caller(query_compiler: PandasQueryCompiler, *args, **kwargs):
def caller(query_compiler, *args, **kwargs):
"""Execute Map function against passed query compiler."""
shape_hint = call_kwds.pop("shape_hint", None) or query_compiler._shape_hint
return query_compiler.__constructor__(
Expand Down
2 changes: 1 addition & 1 deletion modin/core/dataframe/algebra/tree_reduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def register(
axis : int, optional
Specifies axis to apply function along.
compute_dtypes : callable(pandas.Series, *func_args, **func_kwargs) -> np.dtype, optional
Callable for computing dtypes. Only NumPy?
Callable for computing dtypes.
Returns
-------
Expand Down
Loading

0 comments on commit ae861e3

Please sign in to comment.