cleanup

Signed-off-by: Anatoly Myachev <[email protected]>
anmyachev · May 13, 2024 · c25a419 · c25a419
1 parent 07f9927
commit c25a419
Show file tree

Hide file tree

Showing 3 changed files with 10 additions and 18 deletions.
diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py
@@ -1950,7 +1950,7 @@ def convert_dtypes(
     str_capitalize = Map.register(_str_map("capitalize"), dtypes="copy")
     str_center = Map.register(_str_map("center"), dtypes="copy")
     str_contains = Map.register(_str_map("contains"), dtypes=np.bool_)
-    str_count = Map.register(_str_map("count"), dtypes=np.int64)
+    str_count = Map.register(_str_map("count"), dtypes=int)
     str_endswith = Map.register(_str_map("endswith"), dtypes=np.bool_)
     str_find = Map.register(_str_map("find"), dtypes=np.int64)
     str_findall = Map.register(_str_map("findall"), dtypes="copy")
@@ -1966,7 +1966,7 @@ def convert_dtypes(
     str_istitle = Map.register(_str_map("istitle"), dtypes=np.bool_)
     str_isupper = Map.register(_str_map("isupper"), dtypes=np.bool_)
     str_join = Map.register(_str_map("join"), dtypes="copy")
-    str_len = Map.register(_str_map("len"), dtypes=np.int64)
+    str_len = Map.register(_str_map("len"), dtypes=int)
     str_ljust = Map.register(_str_map("ljust"), dtypes="copy")
     str_lower = Map.register(_str_map("lower"), dtypes="copy")
     str_lstrip = Map.register(_str_map("lstrip"), dtypes="copy")
@@ -2105,9 +2105,9 @@ def searchsorted(df):
     dt_date = Map.register(_dt_prop_map("date"), dtypes=np.object_)
     dt_time = Map.register(_dt_prop_map("time"), dtypes=np.object_)
     dt_timetz = Map.register(_dt_prop_map("timetz"), dtypes=np.object_)
-    dt_year = Map.register(_dt_prop_map("year"), dtypes="int32")
-    dt_month = Map.register(_dt_prop_map("month"), dtypes="int32")
-    dt_day = Map.register(_dt_prop_map("day"), dtypes="int32")
+    dt_year = Map.register(_dt_prop_map("year"), dtypes=np.int32)
+    dt_month = Map.register(_dt_prop_map("month"), dtypes=np.int32)
+    dt_day = Map.register(_dt_prop_map("day"), dtypes=np.int32)
     dt_hour = Map.register(_dt_prop_map("hour"), dtypes=np.int64)
     dt_minute = Map.register(_dt_prop_map("minute"), dtypes=np.int64)
     dt_second = Map.register(_dt_prop_map("second"), dtypes=np.int64)
@@ -2158,7 +2158,6 @@ def astype(self, col_dtypes, errors: str = "raise"):
         # other query compilers may not take care of error handling at the API
         # layer. This query compiler assumes there won't be any errors due to
         # invalid type keys.
-        # Function that can change the backend
         return self.__constructor__(
             self._modin_frame.astype(col_dtypes, errors=errors),
             shape_hint=self._shape_hint,
@@ -2320,7 +2319,6 @@ def map_func(df):  # pragma: no cover
             """Compute covariance or correlation matrix for the passed frame."""
             df = df.to_numpy()
             n_rows = df.shape[0]
-            # Does it work with pyarrow backend?
             df_mask = np.isfinite(df)
 
             result = np.empty((n_rows, n_cols), dtype=np.float64)
@@ -3191,8 +3189,7 @@ def _compute_duplicated(df):  # pragma: no cover
             hashed_modin_frame = self._modin_frame.reduce(
                 axis=1,
                 function=_compute_hash,
-                # TODO: pyarrow backend
-                dtypes=np.object_,
+                dtypes=pandas.api.types.pandas_dtype("O"),
             )
         else:
             hashed_modin_frame = self._modin_frame
@@ -3628,7 +3625,7 @@ def groupby_mean(self, by, axis, groupby_kwargs, agg_args, agg_kwargs, drop=Fals
                 )
 
         qc_with_converted_datetime_cols = (
-            self.astype({col: np.int64 for col in datetime_cols.keys()})
+            self.astype({col: "int64" for col in datetime_cols.keys()})
             if len(datetime_cols) > 0
             else self
         )
@@ -4480,15 +4477,14 @@ def map_fn(df):  # pragma: no cover
         # efficient if we are mapping over all of the data to do it this way
         # than it would be to reuse the code for specific columns.
         if len(columns) == len(self.columns):
-            # TODO: pyarrow backend
             new_modin_frame = self._modin_frame.apply_full_axis(
-                0, map_fn, new_index=self.index, dtypes=np.bool_
+                0, map_fn, new_index=self.index, dtypes=bool
             )
             untouched_frame = None
         else:
             new_modin_frame = self._modin_frame.take_2d_labels_or_positional(
                 col_labels=columns
-            ).apply_full_axis(0, map_fn, new_index=self.index, dtypes=np.bool_)
+            ).apply_full_axis(0, map_fn, new_index=self.index, dtypes=bool)
             untouched_frame = self.drop(columns=columns)
         # If we mapped over all the data we are done. If not, we need to
         # prepend the `new_modin_frame` with the raw data from the columns that were

diff --git a/modin/tests/pandas/dataframe/test_binary.py b/modin/tests/pandas/dataframe/test_binary.py
@@ -472,8 +472,6 @@ def test_non_commutative_multiply():
     eval_general(modin_df, pandas_df, lambda s: s * integer)
 
 
-# TODO: just for developing purpose; remove `skip` mark
-@pytest.mark.skip
 @pytest.mark.parametrize(
     "op",
     [

diff --git a/modin/tests/pandas/dataframe/test_default.py b/modin/tests/pandas/dataframe/test_default.py
@@ -528,8 +528,6 @@ def test_info(data, verbose, max_cols, memory_usage, show_counts):
         assert modin_info[1:] == pandas_info[1:]
 
 
-# TODO: just for developing purpose; remove `xfail` mark
-@pytest.mark.xfail
 @pytest.mark.parametrize("axis", axis_values, ids=axis_keys)
 @pytest.mark.parametrize("skipna", [False, True])
 @pytest.mark.parametrize("numeric_only", [False, True])
@@ -718,7 +716,7 @@ def test_pivot_table_data(data, index, columns, values, aggfunc, request):
         "callable_tree_reduce_func" in request.node.callspec.id
         and "int_data" in request.node.callspec.id
     ):
-        expected_exception = TypeError("'float' object is not callable")
+        expected_exception = TypeError("'numpy.float64' object is not callable")
 
     eval_general(
         md_df,