From f5f9ae993ba5ed26461d3c9d26fbefecab88ee69 Mon Sep 17 00:00:00 2001
From: Wei Chen <wei@aqueducthq.com>
Date: Mon, 22 Jul 2024 05:42:51 -0700
Subject: [PATCH 01/20] DOCS-#0000: Update RunLLM Ask AI widget script path
 (#7345)

Signed-off-by: Wei Chen <wei@runllm.com>
---
 docs/_static/custom.js | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/docs/_static/custom.js b/docs/_static/custom.js
index 6b867194374..552612baaa5 100644
--- a/docs/_static/custom.js
+++ b/docs/_static/custom.js
@@ -3,9 +3,8 @@ document.addEventListener("DOMContentLoaded", function () {
   script.type = "module";
   script.id = "runllm-widget-script"
 
-  script.src = "https://cdn.jsdelivr.net/npm/@runllm/search-widget@stable/dist/run-llm-search-widget.es.js";
+  script.src = "https://widget.runllm.com";
 
-  script.setAttribute("version", "stable");
   script.setAttribute("runllm-keyboard-shortcut", "Mod+j"); // cmd-j or ctrl-j to open the widget.
   script.setAttribute("runllm-name", "Modin");
   script.setAttribute("runllm-position", "BOTTOM_RIGHT");

From 7c1dde071632abb8c54eff7da0ab9d6448cef863 Mon Sep 17 00:00:00 2001
From: Devin Petersohn <devin-petersohn@users.noreply.github.com>
Date: Wed, 24 Jul 2024 15:39:20 -0500
Subject: [PATCH 02/20] FEAT-#7331: Initial Polars API (#7332)

* FEAT-#7331: Initial Polars API

This commit adds a polars namespace to Modin, and the DataFrame and
Series objects and their respective APIs. This doesn't include error
handling and is still missing several polars features:

* LazyFrame
* Expressions
* String, Temporal, Struct, and other Series accessors
* Several parameters
* Operators that we don't have query compiler methods for
   * e.g. sin, cos, tan, etc.

Those will be handled in a future PR.

Signed-off-by: Devin Petersohn <devin.petersohn@snowflake.com>

* Lint

Signed-off-by: Devin Petersohn <devin.petersohn@snowflake.com>

* flake8

Signed-off-by: Devin Petersohn <devin.petersohn@snowflake.com>

* isort

Signed-off-by: Devin Petersohn <devin.petersohn@snowflake.com>

* headers

Signed-off-by: Devin Petersohn <devin.petersohn@snowflake.com>

* forgot one

Signed-off-by: Devin Petersohn <devin.petersohn@snowflake.com>

* Add test

Signed-off-by: Devin Petersohn <devin.petersohn@snowflake.com>

* header

Signed-off-by: Devin Petersohn <devin.petersohn@snowflake.com>

* isort

Signed-off-by: Devin Petersohn <devin.petersohn@snowflake.com>

* Add to CI

Signed-off-by: Devin Petersohn <devin.petersohn@snowflake.com>

* fix name

Signed-off-by: Devin Petersohn <devin.petersohn@snowflake.com>

* Update modin/polars/base.py

Co-authored-by: Mahesh Vashishtha <mahesh.vashishtha@snowflake.com>

* address comments

Signed-off-by: Devin Petersohn <devin.petersohn@snowflake.com>

* polars 1

Signed-off-by: Devin Petersohn <devin.petersohn@snowflake.com>

* Update for polars 1.x and fix some hacks

Signed-off-by: Devin Petersohn <devin.petersohn@snowflake.com>

* Remove hax

Signed-off-by: Devin Petersohn <devin.petersohn@snowflake.com>

* Black

Signed-off-by: Devin Petersohn <devin.petersohn@snowflake.com>

* Address comments

Signed-off-by: Devin Petersohn <devin.petersohn@snowflake.com>

* Lint

Signed-off-by: Devin Petersohn <devin.petersohn@snowflake.com>

* Address comment

Signed-off-by: Devin Petersohn <devin.petersohn@snowflake.com>

---------

Signed-off-by: Devin Petersohn <devin.petersohn@snowflake.com>
Co-authored-by: Devin Petersohn <devin.petersohn@snowflake.com>
Co-authored-by: Mahesh Vashishtha <mahesh.vashishtha@snowflake.com>
---
 .github/workflows/ci.yml             |    1 +
 environment-dev.yml                  |    1 +
 modin/polars/__init__.py             |   17 +
 modin/polars/base.py                 |  668 ++++++++
 modin/polars/dataframe.py            | 1439 +++++++++++++++++
 modin/polars/groupby.py              |  247 +++
 modin/polars/lazyframe.py            |   22 +
 modin/polars/series.py               | 2159 ++++++++++++++++++++++++++
 modin/tests/polars/test_dataframe.py |   25 +
 9 files changed, 4579 insertions(+)
 create mode 100644 modin/polars/__init__.py
 create mode 100644 modin/polars/base.py
 create mode 100644 modin/polars/dataframe.py
 create mode 100644 modin/polars/groupby.py
 create mode 100644 modin/polars/lazyframe.py
 create mode 100644 modin/polars/series.py
 create mode 100644 modin/tests/polars/test_dataframe.py

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 9ec1ca9d22d..5f82d4ca7f9 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -462,6 +462,7 @@ jobs:
         if: matrix.engine == 'python' || matrix.test_task == 'group_4'
       - run: python -m pytest modin/tests/interchange/dataframe_protocol/pandas/test_protocol.py
         if: matrix.engine == 'python' || matrix.test_task == 'group_4'
+      - run: python -m pytest modin/tests/polars/test_dataframe.py
       - run: |
           python -m pip install lazy_import
           python -m pytest modin/tests/pandas/integrations/
diff --git a/environment-dev.yml b/environment-dev.yml
index 3ea51032bde..049b3e39830 100644
--- a/environment-dev.yml
+++ b/environment-dev.yml
@@ -70,3 +70,4 @@ dependencies:
       - git+https://github.com/modin-project/modin-spreadsheet.git@49ffd89f683f54c311867d602c55443fb11bf2a5
       # The `numpydoc` version should match the version installed in the `lint-pydocstyle` job of the CI.
       - numpydoc==1.6.0
+      - polars
diff --git a/modin/polars/__init__.py b/modin/polars/__init__.py
new file mode 100644
index 00000000000..3407698eb64
--- /dev/null
+++ b/modin/polars/__init__.py
@@ -0,0 +1,17 @@
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+from modin.polars.dataframe import DataFrame
+from modin.polars.series import Series
+
+__all__ = ["DataFrame", "Series"]
diff --git a/modin/polars/base.py b/modin/polars/base.py
new file mode 100644
index 00000000000..010ee9e946c
--- /dev/null
+++ b/modin/polars/base.py
@@ -0,0 +1,668 @@
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+"""Implement DataFrame/Series public API as polars does."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Sequence
+
+import polars
+
+from modin.core.storage_formats import BaseQueryCompiler
+
+if TYPE_CHECKING:
+    import numpy as np
+
+    from modin.polars import DataFrame, Series
+
+
+class BasePolarsDataset:
+
+    _query_compiler: BaseQueryCompiler
+
+    @property
+    def __constructor__(self):
+        """
+        DataFrame constructor.
+
+        Returns:
+            Constructor of the DataFrame
+        """
+        return type(self)
+
+    def __eq__(self, other) -> "BasePolarsDataset":
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.eq(
+                other._query_compiler if isinstance(other, BasePolarsDataset) else other
+            )
+        )
+
+    def __ne__(self, other) -> "BasePolarsDataset":
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.ne(
+                other._query_compiler if isinstance(other, BasePolarsDataset) else other
+            )
+        )
+
+    def __add__(self, other) -> "BasePolarsDataset":
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.add(
+                other._query_compiler if isinstance(other, BasePolarsDataset) else other
+            )
+        )
+
+    def __sub__(self, other) -> "BasePolarsDataset":
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.sub(
+                other._query_compiler if isinstance(other, BasePolarsDataset) else other
+            )
+        )
+
+    def __mul__(self, other) -> "BasePolarsDataset":
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.mul(
+                other._query_compiler if isinstance(other, BasePolarsDataset) else other
+            )
+        )
+
+    def __truediv__(self, other) -> "BasePolarsDataset":
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.truediv(
+                other._query_compiler if isinstance(other, BasePolarsDataset) else other
+            )
+        )
+
+    def __floordiv__(self, other) -> "BasePolarsDataset":
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.floordiv(
+                other._query_compiler if isinstance(other, BasePolarsDataset) else other
+            )
+        )
+
+    def __mod__(self, other) -> "BasePolarsDataset":
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.mod(
+                other._query_compiler if isinstance(other, BasePolarsDataset) else other
+            )
+        )
+
+    def __pow__(self, other) -> "BasePolarsDataset":
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.pow(
+                other._query_compiler if isinstance(other, BasePolarsDataset) else other
+            )
+        )
+
+    def __and__(self, other) -> "BasePolarsDataset":
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.__and__(
+                other._query_compiler if isinstance(other, BasePolarsDataset) else other
+            )
+        )
+
+    def __or__(self, other) -> "BasePolarsDataset":
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.__or__(
+                other._query_compiler if isinstance(other, BasePolarsDataset) else other
+            )
+        )
+
+    def __xor__(self, other) -> "BasePolarsDataset":
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.__xor__(
+                other._query_compiler if isinstance(other, BasePolarsDataset) else other
+            )
+        )
+
+    def __lt__(self, other) -> "BasePolarsDataset":
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.lt(
+                other._query_compiler if isinstance(other, BasePolarsDataset) else other
+            )
+        )
+
+    def __le__(self, other) -> "BasePolarsDataset":
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.le(
+                other._query_compiler if isinstance(other, BasePolarsDataset) else other
+            )
+        )
+
+    def __gt__(self, other) -> "BasePolarsDataset":
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.gt(
+                other._query_compiler if isinstance(other, BasePolarsDataset) else other
+            )
+        )
+
+    def __ge__(self, other) -> "BasePolarsDataset":
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.ge(
+                other._query_compiler if isinstance(other, BasePolarsDataset) else other
+            )
+        )
+
+    def __invert__(self) -> "BasePolarsDataset":
+        return self.__constructor__(_query_compiler=self._query_compiler.invert())
+
+    def __neg__(self) -> "BasePolarsDataset":
+        return self.__constructor__(_query_compiler=self._query_compiler.negative())
+
+    def __abs__(self) -> "BasePolarsDataset":
+        return self.__constructor__(_query_compiler=self._query_compiler.abs())
+
+    def is_duplicated(self):
+        """
+        Determine whether each row is a duplicate in the DataFrame.
+
+        Returns:
+            DataFrame with True for each duplicate row, and False for unique rows.
+        """
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.duplicated(keep=False)
+        )
+
+    def is_empty(self) -> bool:
+        """
+        Determine whether the DataFrame is empty.
+
+        Returns:
+            True if the DataFrame is empty, False otherwise
+        """
+        return self.height == 0
+
+    def is_unique(self):
+        """
+        Determine whether each row is unique in the DataFrame.
+
+        Returns:
+            DataFrame with True for each unique row, and False for duplicate rows.
+        """
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.duplicated(keep=False).invert()
+        )
+
+    def n_chunks(self, strategy: str = "first") -> int | list[int]:
+        raise NotImplementedError("not yet")
+
+    def to_arrow(self):
+        """
+        Convert the DataFrame to Arrow format.
+
+        Returns:
+            Arrow representation of the DataFrame.
+        """
+        return polars.from_pandas(self._query_compiler.to_pandas()).to_arrow()
+
+    def to_jax(self, device=None):
+        """
+        Convert the DataFrame to JAX format.
+
+        Args:
+            device: The device to use.
+
+        Returns:
+            JAX representation of the DataFrame.
+        """
+        return polars.from_pandas(self._query_compiler.to_pandas()).to_jax(
+            device=device
+        )
+
+    def to_numpy(
+        self,
+        *,
+        writable: bool = False,
+        allow_copy: bool = True,
+        use_pyarrow: bool | None = None,
+        zero_copy_only: bool | None = None,
+    ) -> "np.ndarray":
+        """
+        Convert the DataFrame to a NumPy representation.
+
+        Args:
+            writable: Whether the NumPy array should be writable.
+            allow_copy: Whether to allow copying the data.
+            use_pyarrow: Whether to use PyArrow for conversion.
+            zero_copy_only: Whether to use zero-copy conversion only.
+
+        Returns:
+            NumPy representation of the DataFrame.
+        """
+        return polars.from_pandas(self._query_compiler.to_pandas()).to_numpy(
+            writable=writable,
+            allow_copy=allow_copy,
+            use_pyarrow=use_pyarrow,
+            zero_copy_only=zero_copy_only,
+        )
+
+    def to_torch(self):
+        """
+        Convert the DataFrame to PyTorch format.
+
+        Returns:
+            PyTorch representation of the DataFrame.
+        """
+        return polars.from_pandas(self._query_compiler.to_pandas()).to_torch()
+
+    def bottom_k(
+        self,
+        k: int,
+        *,
+        by,
+        descending: bool | Sequence[bool] = False,
+        nulls_last: bool | Sequence[bool] | None = None,
+        maintain_order: bool | None = None,
+    ) -> "BasePolarsDataset":
+        raise NotImplementedError("not yet")
+
+    def cast(self, dtypes, *, strict: bool = True) -> "BasePolarsDataset":
+        """
+        Cast the DataFrame to the given dtypes.
+
+        Args:
+            dtypes: Dtypes to cast the DataFrame to.
+            strict: Whether to enforce strict casting.
+
+        Returns:
+            DataFrame with the new dtypes.
+        """
+        # TODO: support strict
+        return self.__constructor__(_query_compiler=self._query_compiler.astype(dtypes))
+
+    def clone(self) -> "BasePolarsDataset":
+        """
+        Clone the DataFrame.
+
+        Returns:
+            Cloned DataFrame.
+        """
+        return self.copy()
+
+    def drop_nulls(self, subset=None):
+        """
+        Drop the rows with null values.
+
+        Args:
+            subset: Columns to consider for null values.
+
+        Returns:
+            DataFrame with the rows with null values dropped.
+        """
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.dropna(subset=subset, how="any")
+        )
+
+    def explode(self, columns: str, *more_columns: str) -> "BasePolarsDataset":
+        """
+        Explode the given columns to long format.
+
+        Args:
+            columns: Columns to explode.
+            more_columns: Additional columns to explode.
+
+        Returns:
+            DataFrame with the columns exploded.
+        """
+        if len(more_columns) > 0:
+            columns = [columns, *more_columns]
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.explode(columns)
+        )
+
+    def extend(self, other: "BasePolarsDataset") -> "BasePolarsDataset":
+        """
+        Extend the DataFrame with another DataFrame.
+
+        Args:
+            other: DataFrame to extend with.
+
+        Returns:
+            Extended DataFrame for convenience. DataFrame is modified in place.
+        """
+        self._query_compiler = self._query_compiler.concat(
+            axis=0, other=other._query_compiler
+        )
+        return self
+
+    def fill_nan(self, value):
+        """
+        Fill NaN values with the given value.
+
+        Args:
+            value: Value to fill NaN values with.
+
+        Returns:
+            DataFrame with NaN values filled.
+        """
+        # TODO: Handle null values differently than nan.
+        return self.__constructor__(_query_compiler=self._query_compiler.fillna(value))
+
+    def fill_null(
+        self,
+        value: Any | None = None,
+        strategy: str | None = None,
+        limit: int | None = None,
+        *,
+        matches_supertype: bool = True,
+    ) -> "BasePolarsDataset":
+        """
+        Fill null values with the given value or strategy.
+
+        Args:
+            value: Value to fill null values with.
+            strategy: Strategy to fill null values with.
+            limit: Maximum number of null values to fill.
+            matches_supertype: Whether the value matches the supertype.
+
+        Returns:
+            DataFrame with null values filled.
+        """
+        if strategy == "forward":
+            strategy = "ffill"
+        elif strategy == "backward":
+            strategy = "bfill"
+        elif strategy in ["min", "max", "mean"]:
+            value = getattr(self, strategy)()._query_compiler
+            strategy = None
+        elif strategy == "zero":
+            strategy = None
+            value = 0
+        elif strategy == "one":
+            strategy = None
+            value = 1
+        else:
+            raise ValueError(f"Unknown strategy: {strategy}")
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.fillna(
+                value=value, method=strategy, limit=limit
+            )
+        )
+
+    def filter(self, *predicates, **constraints: Any) -> "BasePolarsDataset":
+        predicates = predicates[0]
+        for p in predicates[1:]:
+            predicates = predicates & p
+        if constraints:
+            raise NotImplementedError("Named constraints are not supported")
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.getitem_array(
+                predicates._query_compiler
+            )
+        )
+
+    def gather_every(self, n: int, offset: int = 0) -> "BasePolarsDataset":
+        """
+        Gather every nth row of the DataFrame.
+
+        Args:
+            n: Number of rows to gather.
+            offset: Offset to start gathering from.
+
+        Returns:
+            DataFrame with every nth row gathered.
+        """
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.getitem_row_array(
+                slice(offset, None, n)
+            )
+        )
+
+    def head(self, n: int = 5) -> "BasePolarsDataset":
+        """
+        Get the first n rows of the DataFrame.
+
+        Args:
+            n: Number of rows to get.
+
+        Returns:
+            DataFrame with the first n rows.
+        """
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.getitem_row_array(slice(0, n))
+        )
+
+    def limit(self, n: int = 10) -> "BasePolarsDataset":
+        """
+        Limit the DataFrame to the first n rows.
+
+        Args:
+            n: Number of rows to limit to.
+
+        Returns:
+            DataFrame with the first n rows.
+        """
+        return self.head(n)
+
+    def interpolate(self) -> "BasePolarsDataset":
+        """
+        Interpolate values the DataFrame using a linear method.
+
+        Returns:
+            DataFrame with the interpolated values.
+        """
+        return self.__constructor__(_query_compiler=self._query_compiler.interpolate())
+
+    def sample(
+        self,
+        n: int | "Series" | None = None,
+        *,
+        fraction: float | "Series" | None = None,
+        with_replacement: bool = False,
+        shuffle: bool = False,
+        seed: int | None = None,
+    ) -> "BasePolarsDataset":
+        """
+        Sample the DataFrame.
+
+        Args:
+            n: Number of rows to sample.
+            fraction: Fraction of rows to sample.
+            with_replacement: Whether to sample with replacement.
+            shuffle: Whether to shuffle the rows.
+            seed: Seed for the random number generator.
+
+        Returns:
+            Sampled DataFrame.
+        """
+        return self.__constructor__(
+            _query_compiler=self.to_pandas()
+            .sample(n=n, frac=fraction, replace=with_replacement, random_state=seed)
+            ._query_compiler
+        )
+
+    def shift(self, n: int = 1, *, fill_value=None) -> "DataFrame":
+        raise NotImplementedError("not yet")
+
+    def shrink_to_fit(self) -> "DataFrame":
+        """
+        Shrink the DataFrame to fit in memory.
+
+        Returns:
+            A copy of the DataFrame.
+        """
+        return self.copy()
+
+    def slice(self, offset: int, length: int) -> "DataFrame":
+        """
+        Slice the DataFrame.
+
+        Args:
+            offset: Offset to start the slice from.
+            length: Length of the slice.
+
+        Returns:
+            Sliced DataFrame.
+        """
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.getitem_row_array(
+                slice(offset, offset + length)
+            )
+        )
+
+    def sort(
+        self,
+        by,
+        *more_by,
+        descending: bool | Sequence[bool] = False,
+        nulls_last: bool | Sequence[bool] | None = None,
+        multithreaded: bool = True,
+        maintain_order: bool = False,
+    ) -> "DataFrame":
+        """
+        Sort the DataFrame.
+
+        Args:
+            by: Column to sort by.
+            more_by: Additional columns to sort by.
+            descending: Whether to sort in descending order.
+            nulls_last: Whether to sort null values last.
+            multithreaded: Whether to use multiple threads.
+            maintain_order: Whether to maintain the order of the DataFrame.
+
+        Returns:
+            Sorted DataFrame.
+        """
+        # TODO: support expressions in by
+        if len(more_by) > 0:
+            by = [by, *more_by]
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.sort_rows_by_column_values(
+                by=by,
+                reverse=descending,
+                nulls_first=None if nulls_last is None else not nulls_last,
+            )
+        )
+
+    def tail(self, n: int = 5) -> "DataFrame":
+        """
+        Get the last n rows of the DataFrame.
+
+        Args:
+            n: Number of rows to get.
+
+        Returns:
+            DataFrame with the last n rows.
+        """
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.getitem_row_array(slice(-n, None))
+        )
+
+    def to_dummies(
+        self,
+        columns: str | Sequence[str] | None = None,
+        *,
+        separator: str = "_",
+        drop_first: bool = False,
+    ) -> "DataFrame":
+        """
+        Convert the columns to dummy variables.
+
+        Args:
+            columns: Columns to convert to dummy variables.
+            separator: Separator for the dummy variables.
+            drop_first: Whether to drop the first dummy variable.
+
+        Returns:
+            DataFrame with the columns converted to dummy variables.
+        """
+        if columns is not None:
+            if isinstance(columns, str):
+                columns = [columns]
+        else:
+            columns = self.columns
+        result = self.__constructor__(
+            _query_compiler=self._query_compiler.get_dummies(columns)
+        )
+        if separator != "_":
+            result.columns = [
+                c.replace(separator, "_") if separator in c else c
+                for c in result.columns
+            ]
+        if drop_first:
+            columns_to_drop = [
+                next(
+                    result_col
+                    for result_col in result.columns
+                    if result_col.startswith(c)
+                )
+                for c in columns
+            ]
+            return result.drop(columns_to_drop)
+        else:
+            return result
+
+    def top_k(
+        self,
+        k: int,
+        *,
+        by,
+        descending: bool | Sequence[bool] = False,
+        nulls_last: bool | Sequence[bool] | None = None,
+        maintain_order: bool | None = None,
+    ) -> "DataFrame":
+        raise NotImplementedError("not yet")
+
+    def unique(self, subset=None, *, keep="any", maintain_order: bool = False):
+        """
+        Get the unique values in each column.
+
+        Args:
+            subset: Columns to consider for unique values.
+            keep: Strategy to keep unique values.
+            maintain_order: Whether to maintain the order of the unique values.
+
+        Returns:
+            DataFrame with the unique values in each column.
+        """
+        if keep == "none" or keep == "last":
+            # TODO: support keep="none"
+            raise NotImplementedError("not yet")
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.unique(subset=subset)
+        )
+
+    def equals(self, other: "BasePolarsDataset", *, null_equal: bool = True) -> bool:
+        """
+        Determine whether the DataFrame is equal to another DataFrame.
+
+        Args:
+            other: DataFrame to compare with.
+
+        Returns:
+            True if the DataFrames are equal, False otherwise.
+        """
+        return (
+            isinstance(other, type(self))
+            and self._query_compiler.equals(other._query_compiler)
+            and (
+                null_equal
+                or (
+                    not self.to_pandas().isna().any(axis=None)
+                    and not other.to_pandas().isna().any(axis=None)
+                )
+            )
+        )
+
+    @property
+    def plot(self):
+        return polars.from_pandas(self._query_compiler.to_pandas()).plot
+
+    def count(self):
+        """
+        Get the number of non-null values in each column.
+
+        Returns:
+            DataFrame with the counts.
+        """
+        return self.__constructor__(_query_compiler=self._query_compiler.count(axis=0))
diff --git a/modin/polars/dataframe.py b/modin/polars/dataframe.py
new file mode 100644
index 00000000000..d4408ff39f0
--- /dev/null
+++ b/modin/polars/dataframe.py
@@ -0,0 +1,1439 @@
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+"""Module houses ``DataFrame`` class, that is distributed version of ``polars.DataFrame``."""
+
+from __future__ import annotations
+
+from collections import OrderedDict
+from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, Sequence
+
+import numpy as np
+import pandas
+import polars
+from pandas.core.dtypes.common import is_list_like
+
+from modin.core.storage_formats.base.query_compiler import BaseQueryCompiler
+from modin.pandas import DataFrame as ModinPandasDataFrame
+from modin.pandas import Series as ModinPandasSeries
+from modin.pandas.io import from_pandas
+from modin.polars.base import BasePolarsDataset
+
+if TYPE_CHECKING:
+    from modin.polars import Series
+    from modin.polars.groupby import GroupBy
+    from modin.polars.lazyframe import LazyFrame
+
+
+class DataFrame(BasePolarsDataset):
+
+    def __init__(
+        self,
+        data=None,
+        schema=None,
+        *,
+        schema_overrides=None,
+        strict=True,
+        orient=None,
+        infer_schema_length=100,
+        nan_to_null=False,
+        _query_compiler=None,
+    ) -> None:
+        """
+        Constructor for DataFrame object.
+
+        Args:
+            data: Data to be converted to DataFrame.
+            schema: Schema of the data.
+            schema_overrides: Schema overrides.
+            strict: Whether to enforce strict schema.
+            orient: Orientation of the data.
+            infer_schema_length: Length of the data to infer schema.
+            nan_to_null: Whether to convert NaNs to nulls.
+            _query_compiler: Query compiler to use.
+        """
+        if _query_compiler is None:
+            if isinstance(data, (ModinPandasDataFrame, ModinPandasSeries)):
+                self._query_compiler: BaseQueryCompiler = data._query_compiler.copy()
+            else:
+                self._query_compiler: BaseQueryCompiler = from_pandas(
+                    polars.DataFrame(
+                        data=data,
+                        schema=schema,
+                        schema_overrides=schema_overrides,
+                        strict=strict,
+                        orient=orient,
+                        infer_schema_length=infer_schema_length,
+                        nan_to_null=nan_to_null,
+                    ).to_pandas()
+                )._query_compiler
+        else:
+            self._query_compiler: BaseQueryCompiler = _query_compiler
+
+    def __getitem__(self, item):
+        """
+        Get item from DataFrame.
+
+        Args:
+            item: Column to get.
+
+        Returns:
+            Series or DataFrame with the column.
+        """
+        if is_list_like(item):
+            missing = [i for i in item if i not in self.columns]
+            if len(missing) > 0:
+                raise polars.exceptions.ColumnNotFoundError(missing[0])
+            return self.__constructor__(
+                _query_compiler=self._query_compiler.getitem_array(item)
+            )
+        else:
+            if item not in self.columns:
+                raise polars.exceptions.ColumnNotFoundError(item)
+            from .series import Series
+
+            return Series(_query_compiler=self._query_compiler.getitem_array([item]))
+
+    def _to_polars(self) -> polars.DataFrame:
+        """
+        Convert the DataFrame to Polars format.
+
+        Returns:
+            Polars representation of the DataFrame.
+        """
+        return polars.from_pandas(self._query_compiler.to_pandas())
+
+    def _get_columns(self):
+        """
+        Get columns of the DataFrame.
+
+        Returns:
+            List of columns.
+        """
+        return list(self._query_compiler.columns)
+
+    def _set_columns(self, new_columns):
+        """
+        Set columns of the DataFrame.
+
+        Args:
+            new_columns: New columns to set.
+        """
+        new_query_compiler = self._query_compiler.copy()
+        new_query_compiler.columns = new_columns
+        self._query_compiler = new_query_compiler
+
+    columns = property(_get_columns, _set_columns)
+
+    _sorted_columns_cache = None
+
+    def _get_sorted_columns(self):
+        if self._sorted_columns_cache is None:
+            self._sorted_columns_cache = [False] * len(self.columns)
+        return self._sorted_columns_cache
+
+    def _set_sorted_columns(self, value):
+        self._sorted_columns_cache = value
+
+    _sorted_columns = property(_get_sorted_columns, _set_sorted_columns)
+
+    @property
+    def dtypes(self):
+        """
+        Get dtypes of the DataFrame.
+
+        Returns:
+            List of dtypes.
+        """
+        return polars.from_pandas(
+            pandas.DataFrame(columns=self.columns).astype(self._query_compiler.dtypes)
+        ).dtypes
+
+    @property
+    def flags(self):
+        """
+        Get flags of the DataFrame.
+
+        Returns:
+            List of flags.
+        """
+        # TODO: Add flags support
+        return []
+
+    @property
+    def height(self):
+        """
+        Get height of the DataFrame.
+
+        Returns:
+            Number of rows in the DataFrame.
+        """
+        return len(self._query_compiler.index)
+
+    @property
+    def schema(self):
+        """
+        Get schema of the DataFrame.
+
+        Returns:
+            OrderedDict of column names and dtypes.
+        """
+        return OrderedDict(zip(self.columns, self.dtypes, strict=True))
+
+    @property
+    def shape(self):
+        """
+        Get shape of the DataFrame.
+
+        Returns:
+            Tuple of (height, width
+        """
+        return self.height, self.width
+
+    @property
+    def width(self):
+        """
+        Get width of the DataFrame.
+
+        Returns:
+            Number of columns in the DataFrame.
+        """
+        return len(self.columns)
+
+    def __repr__(self):
+        """
+        Get string representation of the DataFrame.
+
+        Returns:
+            String representation of the DataFrame.
+        """
+        return repr(polars.from_pandas(self._query_compiler.to_pandas()))
+
+    def max(self, axis=None):
+        """
+        Get the maximum value in each column.
+
+        Args:
+            axis: Axis to get the maximum value on.
+
+        Returns:
+            DataFrame with the maximum values.
+        """
+        if axis is None or axis == 0:
+            return self.__constructor__(
+                _query_compiler=self._query_compiler.max(axis=0)
+            )
+        else:
+            return self.max_horizontal()
+
+    def max_horizontal(self):
+        """
+        Get the maximum value in each row.
+
+        Returns:
+            DataFrame with the maximum values.
+        """
+        return self.__constructor__(_query_compiler=self._query_compiler.max(axis=1))
+
+    def _convert_non_numeric_to_null(self):
+        """
+        Convert non-numeric columns to null.
+
+        Returns:
+            DataFrame with non-numeric columns converted to null.
+        """
+        non_numeric_cols = [
+            c
+            for c, t in zip(self.columns, self.dtypes, strict=True)
+            if not t.is_numeric()
+        ]
+        if len(non_numeric_cols) > 0:
+            return self.__constructor__(
+                _query_compiler=self._query_compiler.write_items(
+                    slice(None),
+                    [self.columns.index(c) for c in non_numeric_cols],
+                    pandas.NA,
+                    need_columns_reindex=False,
+                ).astype({c: self._query_compiler.dtypes[c] for c in non_numeric_cols})
+            )
+        return self.copy()
+
+    def mean(self, *, axis=None, null_strategy="ignore"):
+        """
+        Get the mean of each column.
+
+        Args:
+            axis: Axis to get the mean on.
+            null_strategy: Strategy to handle null values.
+
+        Returns:
+            DataFrame with the mean of each column or row.
+        """
+        # TODO: this converts non numeric columns to numeric
+        obj = self._convert_non_numeric_to_null()
+        if axis is None or axis == 0:
+            return self.__constructor__(
+                _query_compiler=obj._query_compiler.mean(
+                    axis=0,
+                    skipna=True if null_strategy == "ignore" else False,
+                )
+            )
+        else:
+            return obj.mean_horizontal(
+                ignore_nulls=True if null_strategy == "ignore" else False
+            )
+
+    def median(self) -> "DataFrame":
+        """
+        Get the median of each column.
+
+        Returns:
+            DataFrame with the median of each column.
+        """
+        return self.__constructor__(
+            _query_compiler=self._convert_non_numeric_to_null()._query_compiler.median(
+                0
+            )
+        )
+
+    def mean_horizontal(self, *, ignore_nulls: bool = True):
+        """
+        Get the mean of each row.
+
+        Args:
+            ignore_nulls: Whether to ignore null values.
+
+        Returns:
+            DataFrame with the mean of each row.
+        """
+        obj = self._convert_non_numeric_to_null()
+        return self.__constructor__(
+            _query_compiler=obj._query_compiler.mean(axis=1, skipna=ignore_nulls)
+        )
+
+    def min(self, axis=None):
+        """
+        Get the minimum value in each column.
+
+        Args:
+            axis: Axis to get the minimum value on.
+
+        Returns:
+            DataFrame with the minimum values of each row or column.
+        """
+        if axis is None or axis == 0:
+            return self.__constructor__(
+                _query_compiler=self._query_compiler.min(axis=0)
+            )
+        else:
+            return self.max_horizontal()
+
+    def min_horizontal(self):
+        """
+        Get the minimum value in each row.
+
+        Returns:
+            DataFrame with the minimum values of each row.
+        """
+        return self.__constructor__(_query_compiler=self._query_compiler.min(axis=1))
+
+    def product(self):
+        """
+        Get the product of each column.
+
+        Returns:
+            DataFrame with the product of each column.
+        """
+        obj = self._convert_non_numeric_to_null()
+        return self.__constructor__(_query_compiler=obj._query_compiler.prod(axis=0))
+
+    def quantile(self, quantile: float, interpolation="nearest"):
+        """
+        Get the quantile of each column.
+
+        Args:
+            quantile: Quantile to get.
+            interpolation: Interpolation method.
+
+        Returns:
+            DataFrame with the quantile of each column.
+        """
+        obj = self._convert_non_numeric_to_null()
+        # TODO: interpolation support
+        return self.__constructor__(
+            _query_compiler=obj._query_compiler.quantile_for_single_value(quantile)
+        )
+
+    def std(self, ddof: int = 1):
+        """
+        Get the standard deviation of each column.
+
+        Args:
+            ddof: Delta degrees of freedom.
+
+        Returns:
+            DataFrame with the standard deviation of each column
+        """
+        obj = self._convert_non_numeric_to_null()
+        return self.__constructor__(_query_compiler=obj._query_compiler.std(ddof=ddof))
+
+    def sum(self, axis: int | None = None, null_strategy="ignore"):
+        """
+        Get the sum of each column.
+
+        Args:
+            axis: Axis to get the sum on.
+            null_strategy: Strategy to handle null values.
+
+        Returns:
+            DataFrame with the sum of each column or row.
+        """
+        obj = self._convert_non_numeric_to_null()
+        if axis is None or axis == 0:
+            return self.__constructor__(
+                _query_compiler=obj._query_compiler.sum(
+                    axis=0,
+                    skipna=True if null_strategy == "ignore" else False,
+                )
+            )
+        else:
+            return obj.sum_horizontal(
+                ignore_nulls=True if null_strategy == "ignore" else False
+            )
+
+    def sum_horizontal(self, *, ignore_nulls: bool = True):
+        """
+        Get the sum of each row.
+
+        Args:
+            ignore_nulls: Whether to ignore null values.
+
+        Returns:
+            DataFrame with the sum of each row.
+        """
+        # TODO: if there are strings in the row, polars will append numeric values
+        # this behavior may not be intended so doing this instead (for now)
+        obj = self._convert_non_numeric_to_null()
+        return self.__constructor__(
+            _query_compiler=obj._query_compiler.sum(axis=1, skipna=ignore_nulls)
+        )
+
+    def var(self, ddof: int = 1):
+        """
+        Get the variance of each column.
+
+        Args:
+            ddof: Delta degrees of freedom.
+
+        Returns:
+            DataFrame with the variance of each column.
+        """
+        obj = self._convert_non_numeric_to_null()
+        return self.__constructor__(_query_compiler=obj._query_compiler.var(ddof=ddof))
+
+    def approx_n_unique(self):
+        """
+        Get the approximate number of unique values in each column.
+
+        Returns:
+            DataFrame with the approximate number of unique values in each column.
+        """
+        return self.__constructor__(_query_compiler=self._query_compiler.nunique())
+
+    def describe(self, percentiles: Sequence[float] | float = (0.25, 0.5, 0.75)):
+        """
+        Get the descriptive statistics of each column.
+
+        Args:
+            percentiles: Percentiles to get.
+
+        Returns:
+            DataFrame with the descriptive statistics of each column.
+        """
+        return self.__constructor__(
+            self.__constructor__(
+                _query_compiler=self._query_compiler.describe(
+                    percentiles=np.array(percentiles)
+                ).astype(
+                    {
+                        k: str
+                        for k, v in zip(self.columns, self.dtypes, strict=True)
+                        if v == polars.String
+                    }
+                )
+            )
+            .to_pandas()
+            .loc[
+                [
+                    "count",
+                    # "null_count",  TODO: support null_count in describe
+                    "mean",
+                    "std",
+                    "min",
+                    "25%",
+                    "50%",
+                    "75%",
+                    "max",
+                ]
+            ]
+            .reset_index()
+            .rename({"index": "statistic"})
+        )
+
+    def estimated_size(self, unit="b"):
+        """
+        Get the estimated amount of memory used by the DataFrame.
+
+        Args:
+            unit: Unit of the memory size.
+
+        Returns:
+            DataFrame with the extimated memory usage.
+        """
+        return self.__constructor__(_query_compiler=self._query_compiler.memory_usage())
+
+    def glimpse(
+        self,
+        *,
+        max_items_per_column: int = 10,
+        max_colname_length: int = 50,
+        return_as_string: bool = False,
+    ) -> str | None:
+        raise NotImplementedError("not yet")
+
+    def n_unique(self, subset=None) -> int:
+        """
+        Get the number of unique values in each column.
+
+        Args:
+            subset: Columns to get the number of unique values for.
+
+        Returns:
+            Number of unique values in each column.
+        """
+        if subset is not None:
+            raise NotImplementedError("not yet")
+        return (
+            self.is_unique()._query_compiler.sum(axis=0).to_pandas().squeeze(axis=None)
+        )
+
+    def null_count(self) -> "DataFrame":
+        """
+        Get the number of null values in each column.
+
+        Returns:
+            DataFrame with the number of null values in each column.
+        """
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.isna().sum(axis=0)
+        )
+
+    def to_pandas(self):
+        """
+        Convert the DataFrame to Pandas format.
+
+        Returns:
+            modin.pandas representation of the DataFrame.
+        """
+        return ModinPandasDataFrame(query_compiler=self._query_compiler.copy())
+
+    def group_by(
+        self,
+        *by,
+        maintain_order: bool = False,
+        **named_by,
+    ) -> "GroupBy":
+        """
+        Group the DataFrame by the given columns.
+
+        Args:
+            by: Columns to group by.
+            maintain_order: Whether to maintain the order of the groups.
+            named_by: Named columns to group by.
+
+        Returns:
+            GroupBy object.
+        """
+        from modin.polars.groupby import GroupBy
+
+        return GroupBy(self, *by, maintain_order=maintain_order, **named_by)
+
+    def drop(self, *columns, strict: bool = True) -> "DataFrame":
+        """
+        Drop the given columns.
+
+        Args:
+            columns: Columns to drop.
+            strict: Whether to raise an error if a column is not found.
+
+        Returns:
+            DataFrame with the columns dropped.
+        """
+        if strict:
+            for c in columns:
+                if c not in self.columns:
+                    raise KeyError(c)
+        columns = list(columns) if not isinstance(columns[0], list) else columns[0]
+        return self.__constructor__(_query_compiler=self._query_compiler.drop(columns))
+
+    def drop_in_place(self, name: str) -> "DataFrame":
+        """
+        Drop the given column in place and return the dropped column.
+
+        Args:
+            name: Column to drop.
+
+        Returns:
+            The column that was dropped from the DataFrame.
+        """
+        col_to_return = self[name]
+        self._query_compiler = self._query_compiler.drop([name])
+        return col_to_return
+
+    def get_column(self, name: str) -> "Series":
+        """
+        Get the column by name.
+
+        Args:
+            name: Name of the column to get.
+
+        Returns:
+            Series with the column.
+        """
+        return self[name]
+
+    def get_column_index(self, name: str) -> int:
+        """
+        Find the index of the column by name.
+
+        Args:
+            name: Name of the column to find.
+
+        Returns:
+            Index of the column.
+        """
+        return self.columns.index(name)
+
+    def get_columns(self) -> list["Series"]:
+        """
+        Get the columns of the DataFrame.
+
+        Returns:
+            List of Series with the columns.
+        """
+        return [self[name] for name in self.columns]
+
+    def group_by_dynamic(
+        self,
+        index_column,
+        *,
+        every,
+        period,
+        offset,
+        truncate,
+        include_boundaries,
+        closed,
+        label,
+        group_by,
+        start_by,
+        check_sorted,
+    ):
+        raise NotImplementedError("not yet")
+
+    def hstack(self, columns, *, inplace: bool = False) -> "DataFrame":
+        """
+        Stack the given columns horizontally.
+
+        Args:
+            columns: Columns to stack.
+            inplace: Whether to stack the columns in place.
+
+        Returns:
+            DataFrame with the columns stacked horizontally.
+        """
+        if isinstance(columns, DataFrame):
+            columns = columns.get_columns()
+        result_query_compiler = self._query_compiler.concat(
+            axis=1, other=[c._query_compiler for c in columns]
+        )
+        if inplace:
+            self._query_compiler = result_query_compiler
+            return self
+        return self.__constructor__(_query_compiler=result_query_compiler)
+
+    def insert_column(self, index: int, column: "Series") -> "DataFrame":
+        """
+        Insert the given column at the given index.
+
+        Args:
+            index: Index to insert the column at.
+            column: Column to insert.
+            name: Name of the column to insert.
+
+        Returns:
+            DataFrame with the column inserted.
+        """
+        return self.__constructor__(
+            self._query_compiler.insert(index, column.name, column._query_compiler)
+        )
+
+    def item(self, row: int | None = None, column: str | int | None = None) -> Any:
+        """
+        Get the value at the given row and column.
+
+        Args:
+            row: Row to get the value from.
+            column: Column to get the value from.
+
+        Returns:
+            Value at the given row and column.
+        """
+        if row is None:
+            row = 0
+        if column is None:
+            column = 0
+        if isinstance(column, str):
+            column = self.columns.index(column)
+        return (
+            self._query_compiler.take_2d_labels(row, column)
+            .to_pandas()
+            .squeeze(axis=None)
+        )
+
+    def iter_columns(self) -> Iterator["Series"]:
+        """
+        Iterate over the columns of the DataFrame.
+
+        Returns:
+            Iterator over the columns.
+        """
+        return iter(self.get_columns())
+
+    def iter_rows(
+        self,
+        *,
+        named: bool = False,
+        buffer_size: int = 512,
+    ) -> Iterator[tuple[Any]] | Iterator[dict[str, Any]]:
+        """
+        Iterate over the rows of the DataFrame.
+
+        Returns:
+            Iterator over the rows.
+        """
+        raise NotImplementedError("not yet")
+
+    def iter_slices(
+        self,
+        n_rows: int = 10000,
+    ) -> Iterator["DataFrame"]:
+        """
+        Iterate over the slices of the DataFrame.
+
+        Args:
+            n_rows: Number of rows in each slice.
+
+        Returns:
+            Iterator over the slices.
+        """
+        raise NotImplementedError("not yet")
+
+    def join(
+        self,
+        other: "DataFrame",
+        on: str | list[str] | None = None,
+        how: str = "inner",
+        *,
+        left_on: str | list[str] | None = None,
+        right_on: str | list[str] | None = None,
+        suffix: str = "_right",
+        validate="m:m",
+        join_nulls: bool = False,
+        coalesce: bool | None = None,
+    ) -> "DataFrame":
+        """
+        Join the DataFrame with another DataFrame.
+
+        Args:
+            other: DataFrame to join with.
+            on: Column to join on.
+            how: How to join the DataFrames.
+
+        Returns:
+            Joined DataFrame.
+        """
+        if how == "full":
+            how = "outer"
+        elif how == "cross":
+            raise NotImplementedError("not yet")
+        elif how == "semi":
+            how = "right"
+        elif how == "anti":
+            raise NotImplementedError("not yet")
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.merge(
+                other._query_compiler,
+                on=on,
+                how=how,
+                suffixes=("", suffix),
+                left_on=left_on,
+                right_on=right_on,
+            )
+        )
+
+    def join_asof(
+        self,
+        other: "DataFrame",
+        *,
+        left_on: str | None = None,
+        right_on: str | None = None,
+        on: str | None = None,
+        by_left: str | Sequence[str] | None = None,
+        by_right: str | Sequence[str] | None = None,
+        by: str | Sequence[str] | None = None,
+        strategy: str = "backward",
+        suffix: str = "_right",
+        tolerance: str,
+    ) -> "DataFrame":
+        """
+        Join the DataFrame with another DataFrame using asof logic.
+
+        Args:
+            other: DataFrame to join with.
+            left_on: Column to join on in the left DataFrame.
+            right_on: Column to join on in the right DataFrame.
+            on: Column to join on in both DataFrames.
+            by_left: Columns to join on in the left DataFrame.
+            by_right: Columns to join on in the right DataFrame.
+            by: Columns to join on in both DataFrames.
+            strategy: Strategy to use for the join.
+            suffix: Suffix to add to the columns.
+            tolerance: Tolerance for the join.
+
+        Returns:
+            Joined DataFrame.
+        """
+        if on is not None and left_on is None and right_on is None:
+            left_on = right_on = on
+        if by is not None and by_left is None and by_right is None:
+            by_left = by_right = by
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.merge_asof(
+                other._query_compiler,
+                left_on=left_on,
+                right_on=right_on,
+                left_by=by_left,
+                right_by=by_right,
+                direction=strategy,
+                suffixes=("", suffix),
+                tolerance=tolerance,
+            )
+        )
+
+    def melt(
+        self,
+        id_vars=None,
+        value_vars=None,
+        variable_name: str | None = None,
+        value_name: str | None = None,
+    ) -> "DataFrame":
+        """
+        Melt the DataFrame.
+
+        Args:
+            id_vars: Columns to keep.
+            value_vars: Columns to melt.
+            variable_name: Name of the variable column.
+            value_name: Name of the value column.
+
+        Returns:
+            Melted DataFrame.
+        """
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.melt(
+                id_vars=id_vars,
+                value_vars=value_vars,
+                var_name=variable_name,
+                value_name=value_name,
+            )
+        )
+
+    def merge_sorted(self, other: "DataFrame", on: str | list[str]) -> "DataFrame":
+        # TODO: support natural join + sort
+        raise NotImplementedError("not yet")
+
+    def partition_by(
+        self,
+        by,
+        *more_by,
+        maintain_order: bool = True,
+        include_key: bool = True,
+        as_dict: bool = False,
+    ) -> list["DataFrame"] | dict[Any, "DataFrame"]:
+        """
+        Partition the DataFrame by the given columns.
+
+        Args:
+            by: Columns to partition by.
+            more_by: Additional columns to partition by.
+            maintain_order: Whether to maintain the order of the partitions.
+            include_key: Whether to include the partition key.
+            as_dict: Whether to return the partitions as a dictionary.
+
+        Returns:
+            List of DataFrames or dictionary of DataFrames.
+        """
+        if isinstance(by, str):
+            by = [by, *more_by]
+        elif isinstance(by, list):
+            by = [*by, *more_by]
+        if as_dict:
+            return {
+                k: self.__constructor__(v)
+                for k, v in self.to_pandas()
+                .groupby(by, as_index=not include_key)
+                .groups
+            }
+        else:
+            return [
+                self.__constructor__(g)
+                for g in self.to_pandas().groupby(by, as_index=not include_key)
+            ]
+
+    def pipe(self, function, *args, **kwargs) -> Any:
+        return function(self, *args, **kwargs)
+
+    def pivot(
+        self,
+        *,
+        values,
+        index,
+        columns,
+        aggregate_function=None,
+        maintain_order: bool = True,
+        sort_columns: bool = False,
+        separator: str = "_",
+    ) -> "DataFrame":
+        """
+        Pivot the DataFrame.
+
+        Args:
+            values: Values to pivot.
+            index: Index columns.
+            columns: Columns to pivot.
+            aggregate_function: Function to aggregate the values.
+            maintain_order: Whether to maintain the order of the pivot.
+            sort_columns: Whether to sort the columns.
+            separator: Separator for the columns.
+
+        Returns:
+            Pivoted DataFrame.
+        """
+        # TODO: handle maintain_order, sort_columns, separator
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.pivot(
+                values=values,
+                index=index,
+                columns=columns,
+                agg=aggregate_function,
+            )
+        )
+
+    def rechunk(self) -> "DataFrame":
+        """
+        Rechunk the DataFrame into the given number of partitions.
+
+        Returns:
+            Rechunked DataFrame.
+        """
+        return self.copy()
+
+    def rename(self, mapping: dict[str, str] | callable) -> "DataFrame":
+        """
+        Rename the columns of the DataFrame.
+
+        Args:
+            mapping: Mapping of old names to new names.
+
+        Returns:
+            DataFrame with the columns renamed.
+        """
+        if callable(mapping):
+            mapping = {c: mapping(c) for c in self.columns}
+        # TODO: add a query compiler method for `rename`
+        new_columns = {c: mapping.get(c, c) for c in self.columns}
+        new_obj = self.copy()
+        new_obj.columns = new_columns
+        return new_obj
+
+    def replace_column(self, index: int, column: "Series") -> "DataFrame":
+        """
+        Replace the column at the given index with the new column.
+
+        Args:
+            index: Index of the column to replace.
+            column: New column to replace with.
+
+        Returns:
+            DataFrame with the column replaced.
+        """
+        self._query_compiler = self._query_compiler.drop([self.columns[index]]).insert(
+            index,
+            column.name,
+            column._query_compiler,
+        )
+        return self
+
+    def reverse(self) -> "DataFrame":
+        """
+        Reverse the DataFrame.
+
+        Returns:
+            Reversed DataFrame.
+        """
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.getitem_row_array(
+                slice(None, None, -1)
+            )
+        )
+
+    def rolling(self, index_column, *, period, offset, closed, group_by, check_sorted):
+        raise NotImplementedError("not yet")
+
+    def row(
+        self, index: int | None = None, *, by_predicate=None, named: bool = False
+    ) -> tuple[Any] | dict[str, Any]:
+        """
+        Get the row at the given index.
+
+        Args:
+            index: Index of the row to get.
+            by_predicate: Predicate to get the row by.
+            named: Whether to return the row as a dictionary.
+
+        Returns:
+            Row at the given index.
+        """
+        if index is not None:
+            if named:
+                return dict(self.to_pandas().iloc[index])
+            else:
+                return tuple(self.to_pandas().iloc[index])
+        else:
+            # TODO: support expressions
+            raise NotImplementedError("not yet")
+
+    def rows(self, *, named: bool = False) -> list[tuple[Any]] | list[dict[str, Any]]:
+        raise NotImplementedError("not yet")
+
+    def rows_by_key(
+        self,
+        key: Any,
+        *,
+        named: bool = False,
+        include_key: bool = False,
+        unique: bool = False,
+    ) -> dict[Any, Iterable[Any]]:
+        raise NotImplementedError("not yet")
+
+    def select(self, *exprs, **named_exprs) -> "DataFrame":
+        # TODO: support expressions
+        raise NotImplementedError("not yet")
+
+    def select_seq(self, *exprs, **named_exprs) -> "DataFrame":
+        # TODO: support expressions
+        raise NotImplementedError("not yet")
+
+    def set_sorted(
+        self, column: str | Iterable[str], *more_columns: str, descending: bool = False
+    ) -> "DataFrame":
+        """
+        Set the columns to be sorted.
+
+        Args:
+            column: Column to sort by.
+            more_columns: Additional columns to sort by.
+            descending: Whether to sort in descending order.
+
+        Returns:
+            DataFrame with the columns sorted.
+        """
+        if len(more_columns) > 0:
+            if isinstance(column, Iterable):
+                column = [*column, *more_columns]
+            else:
+                column = [column, *more_columns]
+        if isinstance(column, str):
+            column = [column]
+        new_sorted_columns = [c in column for c in self.columns]
+        obj = self.copy()
+        obj._sorted_columns = new_sorted_columns
+        return obj
+
+    def sql(self, query: str, *, table_name: str = "self") -> "DataFrame":
+        raise NotImplementedError("not yet")
+
+    def to_series(self, index: int = 0) -> "Series":
+        """
+        Convert the DataFrame at index provided to a Series.
+
+        Args:
+            index: Index of the column to convert to a Series.
+
+        Returns:
+            Series representation of the DataFrame at index provided.
+        """
+        return self[self.columns[index]]
+
+    def transpose(
+        self,
+        *,
+        include_header: bool = False,
+        header_name: str = "column",
+        column_names: str | Sequence[str] | None = None,
+    ) -> "DataFrame":
+        """
+        Transpose the DataFrame.
+
+        Args:
+            include_header: Whether to include a header.
+            header_name: Name of the header.
+            column_names: Names of the columns.
+
+        Returns:
+            Transposed DataFrame.
+        """
+        result = self.__constructor__(_query_compiler=self._query_compiler.transpose())
+        if column_names is not None:
+            result.columns = column_names
+        elif include_header:
+            result.columns = [f"{header_name}_{i}" for i in range(result.width)]
+        return result
+
+    def unnest(self, columns, *more_columns) -> "DataFrame":
+        """
+        Unnest the given columns.
+
+        Args:
+            columns: Columns to unnest.
+            more_columns: Additional columns to unnest.
+
+        Returns:
+            DataFrame with the columns unnested.
+        """
+        raise NotImplementedError("not yet")
+
+    def unstack(
+        self,
+        step: int,
+        how: str = "vertical",
+        columns=None,
+        fill_values: list[Any] | None = None,
+    ):
+        """
+        Unstack the DataFrame.
+
+        Args:
+            step: Step to unstack by.
+            how: How to unstack the DataFrame.
+            columns: Columns to unstack.
+            fill_values: Values to fill the unstacked DataFrame with.
+
+        Returns:
+            Unstacked DataFrame.
+        """
+        raise NotImplementedError("not yet")
+
+    def update(
+        self,
+        other: "DataFrame",
+        on: str | Sequence[str] | None = None,
+        how: Literal["left", "inner", "full"] = "left",
+        *,
+        left_on: str | Sequence[str] | None = None,
+        right_on: str | Sequence[str] | None = None,
+        include_nulls: bool = False,
+    ) -> "DataFrame":
+        """
+        Update the DataFrame with another DataFrame.
+
+        Args:
+            other: DataFrame to update with.
+            on: Column to update on.
+            how: How to update the DataFrame.
+
+        Returns:
+            Updated DataFrame.
+        """
+        raise NotImplementedError("not yet")
+
+    def upsample(
+        self,
+        time_column: str,
+        *,
+        every: str,
+        offset: str | None = None,
+        group_by: str | Sequence[str] | None = None,
+        maintain_order: bool = False,
+    ) -> "DataFrame":
+        raise NotImplementedError("not yet")
+
+    def vstack(self, other: "DataFrame", *, in_place: bool = False) -> "DataFrame":
+        """
+        Stack the given DataFrame vertically.
+
+        Args:
+            other: DataFrame to stack.
+            in_place: Whether to stack the DataFrames in place.
+
+        Returns:
+            Stacked DataFrame.
+        """
+        if in_place:
+            self._query_compiler = self._query_compiler.concat(
+                axis=0, other=other._query_compiler
+            )
+            return self
+        else:
+            return self.__constructor__(
+                _query_compiler=self._query_compiler.concat(
+                    axis=0, other=other._query_compiler
+                )
+            )
+
+    def with_columns(self, *exprs, **named_exprs) -> "DataFrame":
+        # TODO: support expressions
+        raise NotImplementedError("not yet")
+
+    def with_columns_seq(self, *exprs, **named_exprs) -> "DataFrame":
+        # TODO: support expressions
+        raise NotImplementedError("not yet")
+
+    def with_row_index(self, name: str = "index", offset: int = 0) -> "DataFrame":
+        """
+        Add a row index to the DataFrame.
+
+        Args:
+            name: Name of the row index.
+            offset: Offset for the row index.
+
+        Returns:
+            DataFrame with the row index added.
+        """
+        if offset != 0:
+            obj = self.copy()
+            obj.index = obj.index + offset
+        result = self.__constructor__(
+            _query_compiler=self._query_compiler.reset_index(drop=False)
+        )
+        result.columns = [name, *self.columns]
+        return result
+
+    with_row_count = with_row_index
+
+    def map_rows(
+        self, function: callable, return_dtype=None, *, inference_size: int = 256
+    ) -> "DataFrame":
+        """
+        Apply the given function to the DataFrame.
+
+        Args:
+            function: Function to apply.
+            return_dtype: Return type of the function.
+            inference_size: Size of the inference.
+
+        Returns:
+            DataFrame with the function applied.
+        """
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.apply(function, axis=1)
+        )
+
+    def corr(self, **kwargs: Any) -> "DataFrame":
+        """
+        Compute the correlation of the DataFrame.
+
+        Returns:
+            DataFrame with the correlation.
+        """
+        return self.__constructor__(_query_compiler=self._query_compiler.corr(**kwargs))
+
+    def lazy(self) -> "LazyFrame":
+        """
+        Convert the DataFrame to a lazy DataFrame.
+
+        Returns:
+            Lazy DataFrame.
+        """
+        raise NotImplementedError("not yet")
+
+    @classmethod
+    def deserialize(cls, source) -> "DataFrame":
+        """
+        Deserialize the DataFrame.
+
+        Args:
+            source: Source to deserialize.
+
+        Returns:
+            Deserialized DataFrame.
+        """
+        return cls(polars.DataFrame.deserialize(source))
+
+    def serialize(self, file=None) -> str | None:
+        """
+        Serialize the DataFrame.
+
+        Args:
+            file: File to serialize to.
+
+        Returns:
+            Serialized DataFrame.
+        """
+        return polars.from_pandas(self._query_compiler.to_pandas()).serialize(file)
+
+    @property
+    def style(self):
+        """
+        Create a Great Table for styling.
+
+        Returns:
+            GreatTable object.
+        """
+        return self._to_polars().style
+
+    def to_dict(
+        self, *, as_series: bool = True
+    ) -> dict[str, "Series"] | dict[str, list[Any]]:
+        """
+        Convert the DataFrame to a dictionary representation.
+
+        Args:
+            as_series: Whether to convert the columns to Series.
+
+        Returns:
+            Dictionary representation of the DataFrame.
+        """
+        if as_series:
+            return {name: self[name] for name in self.columns}
+        else:
+            return polars.from_pandas(self._query_compiler.to_pandas()).to_dict(
+                as_series=as_series
+            )
+
+    def to_dicts(self) -> list[dict[str, Any]]:
+        """
+        Convert the DataFrame to a list of dictionaries.
+
+        Returns:
+            List of dictionaries.
+        """
+        return self._to_polars().to_dicts()
+
+    def to_init_repr(self, n: int = 1000) -> str:
+        """
+        Get the string representation of the DataFrame for initialization.
+
+        Returns:
+            String representation of the DataFrame for initialization.
+        """
+        return self._to_polars().to_init_repr(n)
+
+    def to_struct(self, name: str = "") -> "Series":
+        """
+        Convert the DataFrame to a struct.
+
+        Args:
+            name: Name of the struct.
+
+        Returns:
+            Series representation of the DataFrame as a struct.
+        """
+        raise NotImplementedError("not yet")
+
+    def unpivot(
+        self,
+        on,
+        *,
+        index,
+        variable_name: str | None = None,
+        value_name: str | None = None,
+    ) -> "DataFrame":
+        """
+        Unpivot a DataFrame from wide to long format.
+
+        Args:
+            on: Columns to unpivot.
+            index: Columns to keep.
+            variable_name: Name of the variable column.
+            value_name: Name of the value column.
+
+        Returns:
+            Unpivoted DataFrame.
+        """
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.melt(
+                on=on,
+                index=index,
+                var_name=variable_name,
+                value_name=value_name,
+            )
+        )
+
+    write_avro = write_clipboard = write_csv = write_database = write_delta = (
+        write_excel
+    ) = write_ipc = write_ipc_stream = write_json = write_ndjson = write_parquet = (
+        write_parquet_partitioned
+    ) = lambda *args, **kwargs: (_ for _ in ()).throw(NotImplementedError("not yet"))
+
+    def clear(self, n: int = 0) -> "DataFrame":
+        """
+        Create an empty (n=0) or null filled (n>0) DataFrame.
+
+        Args:
+            n: Number of rows to create.
+
+        Returns:
+            Empty or null filled DataFrame.
+        """
+        return self.__constructor__(polars.DataFrame(schema=self.schema).clear(n=n))
+
+    def collect_schema(self) -> dict[str, str]:
+        """
+        Collect the schema of the DataFrame.
+
+        Returns:
+            Dictionary of the schema.
+        """
+        return self.schema
+
+    def fold(self, operation: callable) -> "Series":
+        """
+        Fold the DataFrame.
+
+        Args:
+            operation: Operation to fold the DataFrame with.
+
+        Returns:
+            Series with the folded DataFrame.
+        """
+        raise NotImplementedError("not yet")
+
+    def hash_rows(
+        self,
+        seed: int = 0,
+        seed_1: int | None = None,
+        seed_2: int | None = None,
+        seed_3: int | None = None,
+    ) -> "Series":
+        raise NotImplementedError("not yet")
diff --git a/modin/polars/groupby.py b/modin/polars/groupby.py
new file mode 100644
index 00000000000..ec6305a4b2b
--- /dev/null
+++ b/modin/polars/groupby.py
@@ -0,0 +1,247 @@
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+"""Implement GroupBy public API as pandas does."""
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from modin.polars import DataFrame
+
+
+class GroupBy:
+
+    def __init__(
+        self,
+        df: "DataFrame",
+        *by,
+        maintain_order: bool = False,
+        **named_by,
+    ) -> None:
+        self.df = df
+        if len(by) == 1:
+            self.by = by[0]
+        else:
+            if all(isinstance(b, str) and b in self.df.columns for b in by):
+                self.by = self.df[list(by)]._query_compiler
+            elif all(isinstance(b, type(self._df._query_compiler)) for b in by):
+                self.by = by
+            else:
+                raise NotImplementedError("not yet")
+        self.named_by = named_by
+        self.maintain_order = maintain_order
+
+    def agg(self, *aggs, **named_aggs):
+        raise NotImplementedError("not yet")
+
+    def all(self):
+        raise NotImplementedError("not yet")
+
+    def map_groups(self, function) -> "DataFrame":
+        raise NotImplementedError("not yet")
+
+    apply = map_groups
+
+    def count(self):
+        return self.len(name="count")
+
+    def first(self) -> "DataFrame":
+        return self.df.__constructor__(
+            _query_compiler=self.df._query_compiler.groupby_first(
+                self.by,
+                axis=0,
+                groupby_kwargs=dict(
+                    sort=not self.maintain_order,
+                    as_index=True,
+                ),
+                agg_args=(),
+                agg_kwargs={},
+                drop=False,
+            ).reset_index(drop=False)
+        )
+
+    def head(self, n: int = 5):
+        return self.df.__constructor__(
+            _query_compiler=self.df._query_compiler.groupby_head(
+                self.by,
+                axis=0,
+                groupby_kwargs=dict(
+                    sort=not self.maintain_order,
+                    as_index=False,
+                ),
+                agg_args=(),
+                agg_kwargs=dict(n=n),
+                drop=False,
+            )
+        )
+
+    def last(self) -> "DataFrame":
+        return self.df.__constructor__(
+            _query_compiler=self.df._query_compiler.groupby_last(
+                self.by,
+                axis=0,
+                groupby_kwargs=dict(
+                    sort=not self.maintain_order,
+                    as_index=True,
+                ),
+                agg_args=(),
+                agg_kwargs={},
+                drop=False,
+            ).reset_index(drop=False)
+        )
+
+    def len(self, name: str | None = None) -> "DataFrame":
+        if name is None:
+            name = "len"
+        result = self.df.__constructor__(
+            _query_compiler=self.df._query_compiler.groupby_size(
+                self.by,
+                axis=0,
+                groupby_kwargs=dict(
+                    sort=not self.maintain_order,
+                    as_index=False,
+                ),
+                agg_args=(),
+                agg_kwargs={},
+                drop=False,
+            )
+        )
+        result._query_compiler.columns = [
+            c if c != "size" else name for c in result.columns
+        ]
+        return result
+
+    def max(self) -> "DataFrame":
+        return self.df.__constructor__(
+            _query_compiler=self.df._query_compiler.groupby_max(
+                self.by,
+                axis=0,
+                groupby_kwargs=dict(
+                    sort=not self.maintain_order,
+                    as_index=False,
+                ),
+                agg_args=(),
+                agg_kwargs={},
+                drop=False,
+            )
+        )
+
+    def mean(self) -> "DataFrame":
+        # TODO: Non numeric columns are dropped, but in Polars they are converted to null
+        return self.df.__constructor__(
+            _query_compiler=self.df._query_compiler.groupby_mean(
+                self.by,
+                axis=0,
+                groupby_kwargs=dict(
+                    sort=not self.maintain_order,
+                    as_index=True,
+                ),
+                agg_args=(),
+                agg_kwargs=dict(numeric_only=True),
+                drop=False,
+            ).reset_index(drop=False)
+        )
+
+    def median(self) -> "DataFrame":
+        # TODO: Non numeric columns are dropped, but in Polars they are converted to null
+        return self.df.__constructor__(
+            _query_compiler=self.df._query_compiler.groupby_median(
+                self.by,
+                axis=0,
+                groupby_kwargs=dict(
+                    sort=not self.maintain_order,
+                    as_index=True,
+                ),
+                agg_args=(),
+                agg_kwargs=dict(numeric_only=True),
+                drop=False,
+            ).reset_index(drop=False)
+        )
+
+    def min(self) -> "DataFrame":
+        return self.df.__constructor__(
+            _query_compiler=self.df._query_compiler.groupby_min(
+                self.by,
+                axis=0,
+                groupby_kwargs=dict(
+                    sort=not self.maintain_order,
+                    as_index=False,
+                ),
+                agg_args=(),
+                agg_kwargs={},
+                drop=False,
+            )
+        )
+
+    def n_unique(self) -> "DataFrame":
+        return self.df.__constructor__(
+            _query_compiler=self.df._query_compiler.groupby_nunique(
+                self.by,
+                axis=0,
+                groupby_kwargs=dict(
+                    sort=not self.maintain_order,
+                    as_index=False,
+                ),
+                agg_args=(),
+                agg_kwargs={},
+                drop=False,
+            )
+        )
+
+    def quantile(self, quantile: float, interpolation="nearest") -> "DataFrame":
+        # TODO: Non numeric columns are dropped, but in Polars they are converted to null
+        # TODO: interpolation types not yet supported
+        return self.df.__constructor__(
+            _query_compiler=self.df._query_compiler.groupby_quantile(
+                self.by,
+                axis=0,
+                groupby_kwargs=dict(
+                    sort=not self.maintain_order,
+                    as_index=True,
+                ),
+                agg_args=(),
+                agg_kwargs=dict(numeric_only=True, q=quantile),
+                drop=False,
+            ).reset_index(drop=False)
+        )
+
+    def sum(self) -> "DataFrame":
+        # TODO: Non numeric columns are dropped, but in Polars they are converted to null
+        return self.df.__constructor__(
+            _query_compiler=self.df._query_compiler.groupby_sum(
+                self.by,
+                axis=0,
+                groupby_kwargs=dict(
+                    sort=not self.maintain_order,
+                    as_index=True,
+                ),
+                agg_args=(),
+                agg_kwargs=dict(numeric_only=True),
+                drop=False,
+            ).reset_index(drop=False)
+        )
+
+    def tail(self, n: int = 5):
+        return self.df.__constructor__(
+            _query_compiler=self.df._query_compiler.groupby_tail(
+                self.by,
+                axis=0,
+                groupby_kwargs=dict(
+                    sort=not self.maintain_order,
+                    as_index=False,
+                ),
+                agg_args=(),
+                agg_kwargs=dict(n=n),
+                drop=False,
+            )
+        )
diff --git a/modin/polars/lazyframe.py b/modin/polars/lazyframe.py
new file mode 100644
index 00000000000..8616b6ae15c
--- /dev/null
+++ b/modin/polars/lazyframe.py
@@ -0,0 +1,22 @@
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+from modin.polars.base import BasePolarsDataset
+
+
+class LazyFrame(BasePolarsDataset):
+    """
+    Stub for Lazy Frame implementation.
+    """
+
+    pass
diff --git a/modin/polars/series.py b/modin/polars/series.py
new file mode 100644
index 00000000000..8db757908c9
--- /dev/null
+++ b/modin/polars/series.py
@@ -0,0 +1,2159 @@
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+"""Module houses `Series` class, that is distributed version of `polars.Series`."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Sequence
+
+import numpy as np
+import pandas
+import polars
+from polars._utils.various import no_default
+
+from modin.core.storage_formats.base.query_compiler import BaseQueryCompiler
+from modin.error_message import ErrorMessage
+from modin.pandas import Series as ModinPandasSeries
+from modin.pandas.io import from_pandas
+from modin.polars.base import BasePolarsDataset
+
+if TYPE_CHECKING:
+    from numpy.typing import ArrayLike
+    from polars import PolarsDataType
+
+    from modin.polars import DataFrame
+
+
+class Series(BasePolarsDataset):
+    def __init__(
+        self,
+        name: str | "ArrayLike" | None = None,
+        values: "ArrayLike" | None = None,
+        dtype: "PolarsDataType | None" = None,
+        *,
+        strict: "bool" = True,
+        nan_to_null: "bool" = False,
+        dtype_if_empty: "PolarsDataType" = polars.Null,
+        _query_compiler: BaseQueryCompiler | None = None,
+    ) -> None:
+        if _query_compiler is None:
+            if isinstance(values, ModinPandasSeries):
+                self._query_compiler = values._query_compiler.copy()
+            else:
+                self._query_compiler: BaseQueryCompiler = from_pandas(
+                    polars.Series(
+                        name=name,
+                        values=values,
+                        dtype=dtype,
+                        strict=strict,
+                        nan_to_null=nan_to_null,
+                        dtype_if_empty=dtype_if_empty,
+                    )
+                    .to_pandas()
+                    .to_frame()
+                )._query_compiler
+        else:
+            self._query_compiler: BaseQueryCompiler = _query_compiler
+
+    def __repr__(self):
+        return repr(
+            polars.from_pandas(self._query_compiler.to_pandas().squeeze(axis=1))
+        )
+
+    _sorted = False
+    _descending = None
+
+    def to_pandas(self) -> ModinPandasSeries:
+        return ModinPandasSeries(query_compiler=self._query_compiler)
+
+    def arg_max(self) -> int:
+        """
+        Get the index of the maximum value.
+
+        Returns:
+            Index of the maximum value.
+        """
+        return self.to_pandas().argmax()
+
+    def arg_min(self) -> int:
+        """
+        Get the index of the minimum value.
+
+        Returns:
+            Index of the minimum value.
+        """
+        return self.to_pandas().argmin()
+
+    def implode(self) -> "Series":
+        """
+        Aggregate values into a list.
+
+        Returns:
+            Imploded Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def max(self) -> Any:
+        """
+        Get the maximum value.
+
+        Returns:
+            Maximum value.
+        """
+        return self.to_pandas().max()
+
+    def min(self) -> Any:
+        """
+        Get the minimum value.
+
+        Returns:
+            Minimum value.
+        """
+        return self.to_pandas().min()
+
+    def mean(self) -> Any:
+        """
+        Get the mean value.
+
+        Returns:
+            Mean value.
+        """
+        return self.to_pandas().mean()
+
+    def median(self) -> Any:
+        """
+        Get the median value.
+
+        Returns:
+            Median value.
+        """
+        return self.to_pandas().median()
+
+    def mode(self) -> Any:
+        """
+        Get the mode value.
+
+        Returns:
+            Mode value.
+        """
+        return self.to_pandas().mode()
+
+    def nan_max(self) -> Any:
+        """
+        Get the maximum value, ignoring NaN values.
+
+        Returns:
+            Maximum value.
+        """
+        return self.to_pandas().max(skipna=True)
+
+    def nan_min(self) -> Any:
+        """
+        Get the minimum value, ignoring NaN values.
+
+        Returns:
+            Minimum value.
+        """
+        return self.to_pandas().min(skipna=True)
+
+    def product(self) -> Any:
+        """
+        Get the product of all values.
+
+        Returns:
+            Product of all values.
+        """
+        return self.to_pandas().product()
+
+    def quantile(self, quantile: float, interpolation: str = "nearest") -> float | None:
+        """
+        Get the quantile value.
+
+        Args:
+            quantile: Quantile to calculate.
+            interpolation: Interpolation method.
+
+        Returns:
+            Quantile value.
+        """
+        return self.to_pandas().quantile(quantile, interpolation=interpolation)
+
+    def std(self, ddof: int = 1) -> float:
+        """
+        Get the standard deviation.
+
+        Args:
+            ddof: Delta Degrees of Freedom.
+
+        Returns:
+            Standard deviation.
+        """
+        return self.to_pandas().std(ddof=ddof)
+
+    def sum(self) -> Any:
+        """
+        Get the sum of all values.
+
+        Returns:
+            Sum of all values.
+        """
+        return self.to_pandas().sum()
+
+    def var(self, ddof: int = 1) -> float:
+        """
+        Get the variance.
+
+        Args:
+            ddof: Delta Degrees of Freedom.
+
+        Returns:
+            Variance.
+        """
+        return self.to_pandas().var(ddof=ddof)
+
+    @property
+    def arr(self) -> polars.series.array.ArrayNameSpace:
+        """
+        Get the underlying array.
+
+        Returns:
+            Underlying array.
+        """
+        return polars.from_pandas(self._query_compiler.to_pandas().squeeze(axis=1)).arr
+
+    @property
+    def dtype(self) -> polars.datatypes.DataType:
+        """
+        Get the data type.
+
+        Returns:
+            Data type.
+        """
+        return polars.from_pandas(
+            pandas.Series().astype(self._query_compiler.dtypes.iloc[0])
+        ).dtype
+
+    @property
+    def name(self) -> str:
+        """
+        Get the name.
+
+        Returns:
+            Name.
+        """
+        return self._query_compiler.columns[0]
+
+    @property
+    def shape(self) -> tuple[int]:
+        """
+        Get the shape.
+
+        Returns:
+            Shape.
+        """
+        return (len(self._query_compiler.index),)
+
+    flags = []
+
+    @property
+    def bin(self):
+        raise NotImplementedError("not yet")
+
+    def all(self) -> bool:
+        """
+        Check if all values are True.
+
+        Returns:
+            True if all values are True, False otherwise.
+        """
+        return self.to_pandas().all()
+
+    def any(self) -> bool:
+        """
+        Check if any value is True.
+
+        Returns:
+            True if any value is True, False otherwise.
+        """
+        return self.to_pandas().any()
+
+    def not_(self) -> "Series":
+        """
+        Negate the values.
+
+        Returns:
+            Negated Series.
+        """
+        return self.__constructor__(_query_compiler=self._query_compiler.invert())
+
+    @property
+    def cat(self):
+        raise NotImplementedError("not yet")
+
+    def abs(self) -> "Series":
+        """
+        Get the absolute values.
+
+        Returns:
+            Absolute values Series.
+        """
+        return self.__constructor__(_query_compiler=self._query_compiler.abs())
+
+    def arccos(self) -> "Series":
+        """
+        Get the arc cosine values.
+
+        Returns:
+            Arc cosine values Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def arccosh(self) -> "Series":
+        """
+        Get the hyperbolic arc cosine values.
+
+        Returns:
+            Hyperbolic arc cosine values Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def arcsin(self) -> "Series":
+        """
+        Get the arc sine values.
+
+        Returns:
+            Arc sine values Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def arcsinh(self) -> "Series":
+        """
+        Get the hyperbolic arc sine values.
+
+        Returns:
+            Hyperbolic arc sine values Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def arctan(self) -> "Series":
+        """
+        Get the arc tangent values.
+
+        Returns:
+            Arc tangent values Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def arctanh(self) -> "Series":
+        """
+        Get the hyperbolic arc tangent values.
+
+        Returns:
+            Hyperbolic arc tangent values Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def arg_true(self) -> "Series":
+        """
+        Get the index of the first True value.
+
+        Returns:
+            Index of the first True value.
+        """
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.reset_index(drop=False)
+            .getitem_array(self._query_compiler)
+            .getitem_column_array(0, numeric=True)
+        ).rename(self.name)
+
+    def arg_unique(self) -> "Series":
+        """
+        Get the index of the first unique value.
+
+        Returns:
+            Index of the first unique value.
+        """
+        raise NotImplementedError("not yet")
+
+    def cbrt(self) -> "Series":
+        """
+        Get the cube root values.
+
+        Returns:
+            Cube root values Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def cos(self) -> "Series":
+        """
+        Get the cosine values.
+
+        Returns:
+            Cosine values Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def cosh(self) -> "Series":
+        """
+        Get the hyperbolic cosine values.
+
+        Returns:
+            Hyperbolic cosine values Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def cot(self) -> "Series":
+        """
+        Get the cotangent values.
+
+        Returns:
+            Cotangent values Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def cum_count(self) -> "Series":
+        """
+        Get the cumulative count values.
+
+        Returns:
+            Cumulative count values Series.
+        """
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.isna().cumsum()
+        )
+
+    def cum_max(self) -> "Series":
+        """
+        Get the cumulative maximum values.
+
+        Returns:
+            Cumulative maximum values Series.
+        """
+        return self.__constructor__(_query_compiler=self._query_compiler.cummax())
+
+    def cum_min(self) -> "Series":
+        """
+        Get the cumulative minimum values.
+
+        Returns:
+            Cumulative minimum values Series.
+        """
+        return self.__constructor__(_query_compiler=self._query_compiler.cummin())
+
+    def cum_prod(self) -> "Series":
+        """
+        Get the cumulative product values.
+
+        Returns:
+            Cumulative product values Series.
+        """
+        return self.__constructor__(_query_compiler=self._query_compiler.cumprod())
+
+    def cum_sum(self) -> "Series":
+        """
+        Get the cumulative sum values.
+
+        Returns:
+            Cumulative sum values Series.
+        """
+        return self.__constructor__(_query_compiler=self._query_compiler.cumsum())
+
+    def cumulative_eval(
+        self, expr, min_periods: int = 1, *, parallel: bool = False
+    ) -> "Series":
+        """
+        Get the cumulative evaluation values.
+
+        Args:
+            expr: Expression to evaluate.
+            min_periods: Minimum number of periods.
+
+        Returns:
+            Cumulative evaluation values Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def diff(self, n: int = 1, null_behavior: str = "ignore") -> "Series":
+        """
+        Calculate the first discrete difference between shifted items.
+
+        Args:
+            n: Number of periods to shift.
+            null_behavior: Null behavior.
+
+        Returns:
+            Difference values Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def dot(self, other) -> int | float | None:
+        """
+        Calculate the dot product.
+
+        Args:
+            other: Other Series.
+
+        Returns:
+            Dot product.
+        """
+        if isinstance(other, Series):
+            other = other.to_pandas()
+        return self.to_pandas().dot(other)
+
+    def entropy(
+        self, base: float = 2.718281828459045, *, normalize: bool = False
+    ) -> float:
+        """
+        Calculate the entropy.
+
+        Args:
+            base: Logarithm base.
+            normalize: Normalize the entropy.
+
+        Returns:
+            Entropy.
+        """
+        raise NotImplementedError("not yet")
+
+    def ewm_mean(
+        self,
+        com: int | None = None,
+        span: int | None = None,
+        half_life: int | None = None,
+        alpha: float | None = None,
+        *,
+        adjust: bool = True,
+        min_periods: int = 1,
+        ignore_nulls: bool | None = None,
+    ) -> "Series":
+        """
+        Calculate the exponential weighted mean.
+
+        Args:
+            com: Center of mass.
+            span: Span.
+
+        Returns:
+            Exponential weighted mean Series.
+        """
+        return self.__constructor__(
+            self.to_pandas()
+            .ewm(
+                com=com,
+                span=span,
+                halflife=half_life,
+                alpha=alpha,
+                adjust=adjust,
+                min_periods=min_periods,
+                ignore_na=ignore_nulls,
+            )
+            .mean()
+        )
+
+    def ewm_mean_by(self, by, *, half_life: int | None = None) -> "Series":
+        """
+        Calculate the exponential weighted mean by group.
+
+        Args:
+            by: Grouping Series.
+
+        Returns:
+            Exponential weighted mean Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def ewm_std(
+        self,
+        com: int | None = None,
+        span: int | None = None,
+        half_life: int | None = None,
+        alpha: float | None = None,
+        *,
+        adjust: bool = True,
+        min_periods: int = 1,
+        ignore_nulls: bool | None = None,
+    ) -> "Series":
+        """
+        Calculate the exponential weighted standard deviation.
+
+        Args:
+            com: Center of mass.
+            span: Span.
+
+        Returns:
+            Exponential weighted standard deviation Series.
+        """
+        return self.__constructor__(
+            self.to_pandas()
+            .ewm(
+                com=com,
+                span=span,
+                halflife=half_life,
+                alpha=alpha,
+                adjust=adjust,
+                min_periods=min_periods,
+                ignore_na=ignore_nulls,
+            )
+            .std()
+        )
+
+    def ewm_var(
+        self,
+        com: int | None = None,
+        span: int | None = None,
+        half_life: int | None = None,
+        alpha: float | None = None,
+        *,
+        adjust: bool = True,
+        min_periods: int = 1,
+        ignore_nulls: bool | None = None,
+    ) -> "Series":
+        """
+        Calculate the exponential weighted variance.
+
+        Args:
+            com: Center of mass.
+            span: Span.
+
+        Returns:
+            Exponential weighted variance Series.
+        """
+        return self.__constructor__(
+            self.to_pandas()
+            .ewm(
+                com=com,
+                span=span,
+                halflife=half_life,
+                alpha=alpha,
+                adjust=adjust,
+                min_periods=min_periods,
+                ignore_na=ignore_nulls,
+            )
+            .var()
+        )
+
+    def exp(self) -> "Series":
+        """
+        Calculate the exponential values.
+
+        Returns:
+            Exponential values Series.
+        """
+        return self.__constructor__(self.to_pandas().exp())
+
+    def hash(
+        self,
+        seed: int = 0,
+        seed_1: int | None = None,
+        seed_2: int | None = None,
+        seed_3: int | None = None,
+    ) -> "Series":
+        """
+        Calculate the hash values.
+
+        Args:
+            seed: Seed.
+            seed_1: Seed 1.
+            seed_2: Seed 2.
+            seed_3: Seed 3.
+
+        Returns:
+            Hash values Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def hist(
+        self,
+        bins: list[float] | None = None,
+        *,
+        bin_count: int | None = None,
+        include_category: bool = True,
+        include_breakpoint: bool = True,
+    ) -> "Series":
+        """
+        Calculate the histogram.
+
+        Args:
+            bins: Bins.
+            bin_count: Bin count.
+
+        Returns:
+            Histogram Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def is_between(self, lower_bound, upper_bound, closed: str = "both") -> "Series":
+        """
+        Check if values are between the bounds.
+
+        Args:
+            lower_bound: Lower bound.
+            upper_bound: Upper bound.
+            closed: Closed bounds.
+
+        Returns:
+            Boolean Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def kurtosis(self, *, fisher: bool = True, bias: bool = True) -> float | None:
+        """
+        Calculate the kurtosis.
+
+        Args:
+            fisher: Fisher method.
+            bias: Bias method.
+
+        Returns:
+            Kurtosis.
+        """
+        return self.to_pandas().kurtosis(fisher=fisher, bias=bias)
+
+    def log(self, base: float = 2.718281828459045) -> "Series":
+        """
+        Calculate the logarithm values.
+
+        Args:
+            base: Logarithm base.
+
+        Returns:
+            Logarithm values Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def log10(self) -> "Series":
+        """
+        Calculate the base 10 logarithm values.
+
+        Returns:
+            Base 10 logarithm values Series.
+        """
+        return self.log(10)
+
+    def log1p(self) -> "Series":
+        """
+        Calculate the natural logarithm of 1 plus the values.
+
+        Returns:
+            Natural logarithm of 1 plus the values Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def replace(
+        self,
+        mapping: dict[Any, Any],
+        *,
+        default: Any = None,
+        return_dtype=None,
+    ) -> "Series":
+        """
+        Map values to other values.
+
+        Args:
+            mapping: Mapping.
+
+        Returns:
+            Mapped Series.
+        """
+        return self.__constructor__(
+            self.to_pandas().apply(lambda x: mapping.get(x, default))
+        )
+
+    def pct_change(self, n: int = 1) -> "Series":
+        """
+        Calculate the percentage change.
+
+        Args:
+            n: Number of periods to shift.
+
+        Returns:
+            Percentage change Series.
+        """
+        return self.__constructor__(self.to_pandas().pct_change(n))
+
+    def peak_max(self) -> "Series":
+        """
+        Get the peak maximum values.
+
+        Returns:
+            Peak maximum values Series.
+        """
+        return self.__eq__(self.max())
+
+    def peak_min(self) -> "Series":
+        """
+        Get the peak minimum values.
+
+        Returns:
+            Peak minimum values Series.
+        """
+        return self.__eq__(self.min())
+
+    def rank(
+        self,
+        method: str = "average",
+        *,
+        descending: bool = False,
+        seed: int | None = None,
+    ) -> "Series":
+        """
+        Calculate the rank.
+
+        Args:
+            method: Rank method.
+
+        Returns:
+            Rank Series.
+        """
+        # TODO: support seed
+        if method not in ["average", "min", "max", "first", "dense"]:
+            raise ValueError(f"method {method} not supported")
+        return self.__constructor__(
+            self.to_pandas().rank(method=method, ascending=not descending)
+        )
+
+    def rolling_map(
+        self,
+        function: callable,
+        window_size: int,
+        weights: list[float] | None = None,
+        min_periods: int = 1,
+        *,
+        center: bool = False,
+    ) -> "Series":
+        """
+        Apply a rolling function.
+
+        Args:
+            function: Function to apply.
+            window_size: Window size.
+
+        Returns:
+            Applied Series.
+        """
+        if weights is not None:
+            raise NotImplementedError("not yet")
+        return self.__constructor__(
+            self.to_pandas()
+            .rolling(window=window_size, min_periods=min_periods, center=center)
+            .apply(function)
+        )
+
+    def rolling_max(
+        self,
+        window_size: int,
+        weights: list[float] | None = None,
+        min_periods: int = 1,
+        *,
+        center: bool = False,
+    ) -> "Series":
+        """
+        Apply a rolling maximum function.
+
+        Args:
+            function: Function to apply.
+            window_size: Window size.
+
+        Returns:
+            Applied Series.
+        """
+        if weights is not None:
+            raise NotImplementedError("not yet")
+        return self.__constructor__(
+            self.to_pandas()
+            .rolling(window=window_size, min_periods=min_periods, center=center)
+            .max()
+        )
+
+    def rolling_mean(
+        self,
+        window_size: int,
+        weights: list[float] | None = None,
+        min_periods: int = 1,
+        *,
+        center: bool = False,
+    ) -> "Series":
+        """
+        Apply a rolling mean function.
+
+        Args:
+            function: Function to apply.
+            window_size: Window size.
+
+        Returns:
+            Applied Series.
+        """
+        if weights is not None:
+            raise NotImplementedError("not yet")
+        return self.__constructor__(
+            self.to_pandas()
+            .rolling(window=window_size, min_periods=min_periods, center=center)
+            .mean()
+        )
+
+    def rolling_median(
+        self,
+        window_size: int,
+        weights: list[float] | None = None,
+        min_periods: int = 1,
+        *,
+        center: bool = False,
+    ) -> "Series":
+        """
+        Apply a rolling median function.
+
+        Args:
+            function: Function to apply.
+            window_size: Window size.
+
+        Returns:
+            Applied Series.
+        """
+        if weights is not None:
+            raise NotImplementedError("not yet")
+        return self.__constructor__(
+            self.to_pandas()
+            .rolling(window=window_size, min_periods=min_periods, center=center)
+            .median()
+        )
+
+    def rolling_min(
+        self,
+        window_size: int,
+        weights: list[float] | None = None,
+        min_periods: int = 1,
+        *,
+        center: bool = False,
+    ) -> "Series":
+        """
+        Apply a rolling minimum function.
+
+        Args:
+            function: Function to apply.
+            window_size: Window size.
+
+        Returns:
+            Applied Series.
+        """
+        if weights is not None:
+            raise NotImplementedError("not yet")
+        return self.__constructor__(
+            self.to_pandas()
+            .rolling(window=window_size, min_periods=min_periods, center=center)
+            .min()
+        )
+
+    def rolling_quantile(
+        self,
+        window_size: int,
+        quantile: float,
+        interpolation: str = "nearest",
+        weights: list[float] | None = None,
+        min_periods: int = 1,
+        *,
+        center: bool = False,
+    ) -> "Series":
+        """
+        Apply a rolling quantile function.
+
+        Args:
+            function: Function to apply.
+            window_size: Window size.
+
+        Returns:
+            Applied Series.
+        """
+        if weights is not None:
+            raise NotImplementedError("not yet")
+        return self.__constructor__(
+            self.to_pandas()
+            .rolling(window=window_size, min_periods=min_periods, center=center)
+            .quantile(quantile, interpolation=interpolation)
+        )
+
+    def rolling_skew(self, window_size: int, *, bias: bool = False) -> "Series":
+        """
+        Apply a rolling skewness function.
+
+        Args:
+            function: Function to apply.
+            window_size: Window size.
+
+        Returns:
+            Applied Series.
+        """
+        return self.__constructor__(self.to_pandas().rolling(window=window_size).skew())
+
+    def rolling_std(
+        self,
+        window_size: int,
+        weights: list[float] | None = None,
+        min_periods: int = 1,
+        *,
+        center: bool = False,
+        ddof: int = 1,
+    ) -> "Series":
+        """
+        Apply a rolling standard deviation function.
+
+        Args:
+            function: Function to apply.
+            window_size: Window size.
+
+        Returns:
+            Applied Series.
+        """
+        if weights is not None:
+            raise NotImplementedError("not yet")
+        return self.__constructor__(
+            self.to_pandas()
+            .rolling(window=window_size, min_periods=min_periods, center=center)
+            .std(ddof=ddof)
+        )
+
+    def rolling_sum(
+        self,
+        window_size: int,
+        weights: list[float] | None = None,
+        min_periods: int = 1,
+        *,
+        center: bool = False,
+    ) -> "Series":
+        """
+        Apply a rolling sum function.
+
+        Args:
+            function: Function to apply.
+            window_size: Window size.
+
+        Returns:
+            Applied Series.
+        """
+        if weights is not None:
+            raise NotImplementedError("not yet")
+        return self.__constructor__(
+            self.to_pandas()
+            .rolling(window=window_size, min_periods=min_periods, center=center)
+            .sum()
+        )
+
+    def rolling_var(
+        self,
+        window_size: int,
+        weights: list[float] | None = None,
+        min_periods: int = 1,
+        *,
+        center: bool = False,
+        ddof: int = 1,
+    ) -> "Series":
+        """
+        Apply a rolling variance function.
+
+        Args:
+            function: Function to apply.
+            window_size: Window size.
+
+        Returns:
+            Applied Series.
+        """
+        if weights is not None:
+            raise NotImplementedError("not yet")
+        return self.__constructor__(
+            self.to_pandas()
+            .rolling(window=window_size, min_periods=min_periods, center=center)
+            .var(ddof=ddof)
+        )
+
+    def search_sorted(self, element, side: str = "any") -> int | "Series":
+        """
+        Search for the element in the sorted Series.
+
+        Args:
+            element: Element to search.
+            side: Side to search.
+
+        Returns:
+            Index of the element.
+        """
+        if side == "any":
+            side = "left"
+        return self.__constructor__(self.to_pandas().searchsorted(element, side=side))
+
+    def sign(self) -> "Series":
+        """
+        Get the sign values.
+
+        Returns:
+            Sign values Series.
+        """
+        return self.__lt__(0).__mul__(-1).__add__(self.__gt__(0))
+
+    def sin(self) -> "Series":
+        """
+        Get the sine values.
+
+        Returns:
+            Sine values Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def sinh(self) -> "Series":
+        """
+        Get the hyperbolic sine values.
+
+        Returns:
+            Hyperbolic sine values Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def skew(self, *, bias: bool = True) -> float:
+        """
+        Calculate the skewness.
+
+        Args:
+            bias: Bias method.
+
+        Returns:
+            Skewness.
+        """
+        return self.to_pandas().skew()
+
+    def sqrt(self) -> "Series":
+        """
+        Get the square root values.
+
+        Returns:
+            Square root values Series.
+        """
+        return self.__constructor__(self.to_pandas().sqrt())
+
+    def tan(self) -> "Series":
+        """
+        Get the tangent values.
+
+        Returns:
+            Tangent values Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def tanh(self) -> "Series":
+        """
+        Get the hyperbolic tangent values.
+
+        Returns:
+            Hyperbolic tangent values Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def chunk_lengths(self) -> list[int]:
+        """
+        Get the chunk lengths.
+
+        Returns:
+            Chunk lengths.
+        """
+        raise NotImplementedError("not yet")
+
+    def describe(
+        self,
+        percentiles: Sequence[float] | float | None = (0.25, 0.5, 0.75),
+        interpolation: str = "nearest",
+    ):
+        """
+        Generate descriptive statistics.
+
+        Args:
+            percentiles: Percentiles to calculate.
+
+        Returns:
+            Descriptive statistics.
+        """
+        return self.to_pandas().describe(percentiles=percentiles)
+
+    def estimated_size(self) -> int:
+        """
+        Get the estimated size.
+
+        Returns:
+            Estimated size.
+        """
+        return self.to_pandas().memory_usage(index=False)
+
+    def has_nulls(self) -> bool:
+        """
+        Check if there are null values.
+
+        Returns:
+            True if there are null values, False otherwise.
+        """
+        return self.to_pandas().isnull().any()
+
+    has_validity = has_nulls
+
+    def is_finite(self) -> "Series":
+        """
+        Check if the values are finite.
+
+        Returns:
+            True if the values are finite, False otherwise.
+        """
+        return self.__ne__(np.inf)
+
+    def is_first_distinct(self) -> "Series":
+        """
+        Check if the values are the first occurrence.
+
+        Returns:
+            True if the values are the first occurrence, False otherwise.
+        """
+        raise NotImplementedError("not yet")
+
+    def is_in(self, other: "Series" | list[Any]) -> "Series":
+        """
+        Check if the values are in the other Series.
+
+        Args:
+            other: Other Series.
+
+        Returns:
+            True if the values are in the other Series, False otherwise.
+        """
+        return self.__constructor__(self.to_pandas().isin(other))
+
+    def is_infinite(self) -> "Series":
+        """
+        Check if the values are infinite.
+
+        Returns:
+            True if the values are infinite, False otherwise.
+        """
+        return self.__eq__(np.inf)
+
+    def is_last_distinct(self) -> "Series":
+        """
+        Check if the values are the last occurrence.
+
+        Returns:
+            True if the values are the last occurrence, False otherwise.
+        """
+        raise NotImplementedError("not yet")
+
+    def is_nan(self) -> "Series":
+        """
+        Check if the values are NaN.
+
+        Returns:
+            True if the values are NaN, False otherwise.
+        """
+        return self.__constructor__(_query_compiler=self._query_compiler.isna())
+
+    def is_not_nan(self) -> "Series":
+        """
+        Check if the values are not NaN.
+
+        Returns:
+            True if the values are not NaN, False otherwise.
+        """
+        return self.__constructor__(_query_compiler=self._query_compiler.notna())
+
+    def is_not_null(self) -> "Series":
+        """
+        Check if the values are not null.
+
+        Returns:
+            True if the values are not null, False otherwise.
+        """
+        return self.is_not_nan()
+
+    def is_null(self) -> "Series":
+        """
+        Check if the values are null.
+
+        Returns:
+            True if the values are null, False otherwise.
+        """
+        return self.is_nan()
+
+    def is_sorted(
+        self,
+        *,
+        descending: bool = False,
+        nulls_last: bool = False,
+    ) -> bool:
+        """
+        Check if the values are sorted.
+
+        Args:
+            descending: Descending order.
+
+        Returns:
+            True if the values are sorted, False otherwise.
+        """
+        return (
+            self.to_pandas().is_monotonic_increasing
+            if not descending
+            else self.to_pandas().is_monotonic_decreasing
+        )
+
+    def len(self) -> int:
+        """
+        Get the length of the values.
+
+        Returns:
+            Length of the values Series.
+        """
+        return len(self.to_pandas())
+
+    def lower_bound(self) -> "Series":
+        """
+        Get the lower bound values.
+
+        Returns:
+            Lower bound values Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def null_count(self) -> int:
+        """
+        Get the number of null values.
+
+        Returns:
+            Number of null values.
+        """
+        return self.to_pandas().isnull().sum()
+
+    def unique_counts(self) -> "Series":
+        """
+        Get the unique counts.
+
+        Returns:
+            Unique counts.
+        """
+        return self.__constructor__(values=self.to_pandas().value_counts())
+
+    def upper_bound(self) -> "Series":
+        """
+        Get the upper bound values.
+
+        Returns:
+            Upper bound values Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def value_counts(
+        self, *, sort: bool = False, parallel: bool = False, name: str = "count"
+    ) -> "DataFrame":
+        """
+        Get the value counts.
+
+        Returns:
+            Value counts.
+        """
+        from modin.polars import DataFrame
+
+        return DataFrame(
+            self.to_pandas().value_counts(sort=sort).reset_index(drop=False, names=name)
+        )
+
+    def to_frame(self, name: str | None = None) -> "DataFrame":
+        """
+        Convert the Series to a DataFrame.
+
+        Args:
+            name: Name of the Series.
+
+        Returns:
+            DataFrame representation of the Series.
+        """
+        from modin.polars import DataFrame
+
+        return DataFrame(_query_compiler=self._query_compiler).rename({self.name: name})
+
+    def to_init_repr(self, n: int = 1000) -> str:
+        """
+        Convert Series to instantiatable string representation.
+
+        Args:
+            n: First n elements.
+
+        Returns:
+            Instantiatable string representation.
+        """
+        return polars.from_pandas(
+            self.slice(0, n)._query_compiler.to_pandas()
+        ).to_init_repr()
+
+    @property
+    def list(self):
+        # TODO: implement list object
+        #  https://docs.pola.rs/api/python/stable/reference/series/list.html
+        raise NotImplementedError("not yet")
+
+    def alias(self, name: str) -> "Series":
+        """
+        Rename the Series.
+
+        Args:
+            name: New name.
+
+        Returns:
+            Renamed Series.
+        """
+        return self.to_frame(name).to_series()
+
+    def append(self, other: "Series") -> "Series":
+        """
+        Append another Series.
+
+        Args:
+            other: Other Series.
+
+        Returns:
+            Appended Series.
+        """
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.concat(0, other._query_compiler)
+        )
+
+    def arg_sort(
+        self, *, descending: bool = False, nulls_last: bool = False
+    ) -> "Series":
+        """
+        Get the sorted indices.
+
+        Args:
+            descending: Descending order.
+
+        Returns:
+            Sorted indices Series.
+        """
+        # TODO: implement nulls_last
+        result = self.__constructor__(values=self.to_pandas().argsort())
+        if descending:
+            return result.reverse()
+        else:
+            return result
+
+    def ceil(self) -> "Series":
+        """
+        Get the ceiling values.
+
+        Returns:
+            Ceiling values Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def clear(self, n: int = 0) -> "Series":
+        """
+        Create an empty copy of the current Series, with zero to ‘n’ elements.
+
+        Args:
+            n: Number of elements.
+
+        Returns:
+            Series will n nulls.
+        """
+        raise NotImplementedError("not yet")
+
+    def clip(self, lower_bound=None, upper_bound=None) -> "Series":
+        """
+        Clip the values.
+
+        Args:
+            lower_bound: Lower bound.
+            upper_bound: Upper bound.
+
+        Returns:
+            Clipped values Series.
+        """
+        return self.__constructor__(
+            values=self.to_pandas().clip(lower_bound, upper_bound)
+        )
+
+    def cut(
+        self,
+        breaks: Sequence[float],
+        *,
+        labels: list[str] | None = None,
+        break_point_label: str = "breakpoint",
+        left_closed: bool = False,
+        include_breaks: bool = False,
+        as_series: bool = True,
+    ) -> "BasePolarsDataset":
+        raise NotImplementedError("not yet")
+
+    def extend_constant(self, value) -> "Series":
+        """
+        Extend the Series with a constant value.
+
+        Args:
+            value: Constant value.
+
+        Returns:
+            Extended Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def floor(self) -> "BasePolarsDataset":
+        return self.__floordiv__(1)
+
+    def gather(self, indices) -> "Series":
+        """
+        Gather values by indices.
+
+        Args:
+            indices: Indices.
+
+        Returns:
+            Gathered Series.
+        """
+        return self.__constructor__(
+            values=self.to_pandas().iloc[
+                (
+                    indices._query_compiler
+                    if hasattr(indices, "_query_compiler")
+                    else indices
+                )
+            ]
+        )
+
+    def interpolate_by(self, by) -> "Series":
+        """
+        Interpolate values by group.
+
+        Args:
+            by: Grouping Series.
+
+        Returns:
+            Interpolated Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def item(self, index: int | None = None) -> Any:
+        """
+        Get the item at the index.
+
+        Args:
+            index: Index.
+
+        Returns:
+            Item at the index.
+        """
+        return self.to_pandas().iloc[index]
+
+    def new_from_index(self, index: int, length: int) -> "Series":
+        """
+        Create a new Series from the index.
+
+        Args:
+            index: Index.
+            length: Length.
+
+        Returns:
+            New Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def qcut(
+        self,
+        quantiles: Sequence[float] | int,
+        *,
+        labels: Sequence[str] | None = None,
+        left_closed: bool = False,
+        allow_duplicates: bool = False,
+        include_breaks: bool = False,
+        break_point_label: str = "breakpoint",
+        category_labels: str = "category",
+        as_series: bool = True,
+    ) -> "Series" | "DataFrame":
+        """
+        Bin continuous values into discrete categories based on quantiles.
+
+        Args:
+            quantiles: Number of quantiles or sequence of quantiles.
+            labels: Labels for the resulting bins.
+            left_closed: Whether the intervals are left-closed.
+            allow_duplicates: Whether to allow duplicate intervals.
+            include_breaks: Whether to include the breaks in the result.
+            break_point_label: Label for the break points.
+            category_labels: Label for the categories.
+            as_series: Whether to return a Series.
+
+        Returns:
+            Binned Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def rechunk(self, *, in_place: bool = False) -> "Series":
+        """
+        Rechunk the Series.
+
+        Args:
+            in_place: In-place operation.
+
+        Returns:
+            Rechunked Series.
+        """
+        raise NotImplementedError("not yet")
+
+    rename = alias
+
+    def reshape(self, dimensions, nested_type) -> "Series":
+        """
+        Reshape the Series.
+
+        Args:
+            dimensions: Dimensions.
+            nested_type: Nested type.
+
+        Returns:
+            Reshaped Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def reverse(self) -> "Series":
+        """
+        Reverse the Series.
+
+        Returns:
+            Reversed Series.
+        """
+        return self.__constructor__(values=self.to_pandas().iloc[::-1])
+
+    def rle(self) -> "Series":
+        """
+        Run-length encode the Series.
+
+        Returns:
+            Run-length encoded Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def rle_id(self) -> "Series":
+        """
+        Run-length encode the Series with IDs.
+
+        Returns:
+            Run-length encoded Series with IDs.
+        """
+        raise NotImplementedError("not yet")
+
+    def round(self, decimals: int = 0) -> "Series":
+        """
+        Round the values.
+
+        Args:
+            decimals: Number of decimals.
+
+        Returns:
+            Rounded values Series.
+        """
+        return self.__constructor__(values=self.to_pandas().round(decimals))
+
+    def round_sig_figs(self, digits: int) -> "Series":
+        """
+        Round the values to significant figures.
+
+        Args:
+            digits: Number of significant figures.
+
+        Returns:
+            Rounded values Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def scatter(self, indices, values) -> "Series":
+        """
+        Scatter values by indices.
+
+        Args:
+            indices: Indices.
+            values: Values.
+
+        Returns:
+            Scattered Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def set(self, filter: "Series", value: int | float | str | bool | None) -> "Series":
+        """
+        Set values by filter.
+
+        Args:
+            filter: Filter.
+            value: Value.
+
+        Returns:
+            Set Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def shrink_dtype(self) -> "Series":
+        """
+        Shrink the data type.
+
+        Returns:
+            Shrunk Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def shuffle(self, seed: int | None = None) -> "Series":
+        """
+        Shuffle the Series.
+
+        Args:
+            seed: Seed.
+
+        Returns:
+            Shuffled Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def zip_with(self, mask: "Series", other: "Series") -> "Series":
+        """
+        Zip the Series with another Series.
+
+        Args:
+            mask: Mask Series.
+            other: Other Series.
+
+        Returns:
+            Zipped Series.
+        """
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.where(
+                mask._query_compiler, other._query_compiler
+            )
+        )
+
+    def map_elements(
+        self,
+        function: callable,
+        return_dtype=None,
+        *,
+        skip_nulls: bool = True,
+    ) -> "Series":
+        """
+        Map the elements.
+
+        Args:
+            function: Function to apply.
+
+        Returns:
+            Mapped Series.
+        """
+        if return_dtype is not None or skip_nulls is False:
+            ErrorMessage.warn(
+                "`return_dtype` and `skip_nulls=False` are not supported yet"
+            )
+        return self.__constructor__(values=self.to_pandas().apply(function))
+
+    def reinterpret(self, *, signed: bool = True) -> "Series":
+        """
+        Reinterpret the data type of the series as signed or unsigned.
+
+        Args:
+            signed: If True, reinterpret as signed, otherwise as unsigned.
+
+        Returns:
+            Reinterpreted Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def set_sorted(self, *, descending: bool = False) -> "Series":
+        """
+        Set the Series as sorted.
+
+        Args:
+            descending: Descending order.
+
+        Returns:
+            Sorted Series.
+        """
+        self._sorted = True
+        self._descending = descending
+        return self
+
+    def to_physical(self) -> "Series":
+        """
+        Convert the Series to physical.
+
+        Returns:
+            Physical Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def get_chunks(self) -> list["Series"]:
+        """
+        Get the chunks.
+
+        Returns:
+            Chunks.
+        """
+        raise NotImplementedError("not yet")
+
+    @property
+    def str(self):
+        # TODO: implement str object
+        #  https://docs.pola.rs/api/python/stable/reference/series/string.html
+        raise NotImplementedError("not yet")
+
+    @property
+    def struct(self):
+        # TODO: implement struct object
+        #  https://docs.pola.rs/api/python/stable/reference/series/struct.html
+        raise NotImplementedError("not yet")
+
+    @property
+    def dt(self):
+        # TODO: implement dt object
+        #  https://docs.pola.rs/api/python/stable/reference/series/temporal.html
+        raise NotImplementedError("not yet")
+
+    def __len__(self) -> int:
+        """
+        Get the length of the Series.
+        """
+        return self.len()
+
+    def __matmul__(self, other) -> "Series":
+        """
+        Matrix multiplication.
+
+        Args:
+            other: Other Series.
+
+        Returns:
+            Matrix multiplication Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def __radd__(self, other) -> "Series":
+        """
+        Right addition.
+
+        Args:
+            other: Other Series.
+
+        Returns:
+            Added Series.
+        """
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.radd(other, axis=0)
+        )
+
+    def __rand__(self, other) -> "Series":
+        """
+        Right and.
+
+        Args:
+            other: Other Series.
+
+        Returns:
+            And Series.
+        """
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.__rand__(other, axis=0)
+        )
+
+    def __rfloordiv__(self, other) -> "Series":
+        """
+        Right floor division.
+
+        Args:
+            other: Other Series.
+
+        Returns:
+            Floored Series.
+        """
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.rfloordiv(other, axis=0)
+        )
+
+    def __rmatmul__(self, other) -> "Series":
+        """
+        Right matrix multiplication.
+
+        Args:
+            other: Other Series.
+
+        Returns:
+            Matrix multiplication Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def __rmod__(self, other) -> "Series":
+        """
+        Right modulo.
+
+        Args:
+            other: Other Series.
+
+        Returns:
+            Modulo Series.
+        """
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.rmod(other, axis=0)
+        )
+
+    def __rmul__(self, other) -> "Series":
+        """
+        Right multiplication.
+
+        Args:
+            other: Other Series.
+
+        Returns:
+            Multiplied Series.
+        """
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.rmul(other, axis=0)
+        )
+
+    def __ror__(self, other) -> "Series":
+        """
+        Right or.
+
+        Args:
+            other: Other Series.
+
+        Returns:
+            Or Series.
+        """
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.__ror__(other, axis=0)
+        )
+
+    def __rpow__(self, other) -> "Series":
+        """
+        Right power.
+
+        Args:
+            other: Other Series.
+
+        Returns:
+            Powered Series.
+        """
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.rpow(other, axis=0)
+        )
+
+    def __rsub__(self, other) -> "Series":
+        """
+        Right subtraction.
+
+        Args:
+            other: Other Series.
+
+        Returns:
+            Subtracted Series.
+        """
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.rsub(other, axis=0)
+        )
+
+    def __rtruediv__(self, other) -> "Series":
+        """
+        Right true division.
+
+        Args:
+            other: Other Series.
+
+        Returns:
+            Divided Series.
+        """
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.rtruediv(other, axis=0)
+        )
+
+    def __rxor__(self, other) -> "Series":
+        """
+        Right xor.
+
+        Args:
+            other: Other Series.
+
+        Returns:
+            Xor Series.
+        """
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.__rxor__(other, axis=0)
+        )
+
+    def eq(self, other) -> "Series":
+        """
+        Check if the values are equal to the other Series.
+
+        Args:
+            other: Other Series.
+
+        Returns:
+            Boolean Series.
+        """
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.eq(other._query_compiler)
+        )
+
+    def eq_missing(self, other) -> "Series":
+        """
+        Check if the values are equal to the other Series, including missing values.
+
+        Args:
+            other: Other Series.
+
+        Returns:
+            Boolean Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def ge(self, other) -> "Series":
+        """
+        Check if the values are greater than or equal to the other Series.
+
+        Args:
+            other: Other Series.
+
+        Returns:
+            Boolean Series.
+        """
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.ge(other._query_compiler)
+        )
+
+    def gt(self, other) -> "Series":
+        """
+        Check if the values are greater than the other Series.
+
+        Args:
+            other: Other Series.
+
+        Returns:
+            Boolean Series.
+        """
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.gt(other._query_compiler)
+        )
+
+    def le(self, other) -> "Series":
+        """
+        Check if the values are less than or equal to the other Series.
+
+        Args:
+            other: Other Series.
+
+        Returns:
+            Boolean Series.
+        """
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.le(other._query_compiler)
+        )
+
+    def lt(self, other) -> "Series":
+        """
+        Check if the values are less than the other Series.
+
+        Args:
+            other: Other Series.
+
+        Returns:
+            Boolean Series.
+        """
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.lt(other._query_compiler)
+        )
+
+    def n_unique(self) -> int:
+        """
+        Get the number of unique values.
+
+        Returns:
+            Number of unique values.
+        """
+        return self._query_compiler.nunique().to_pandas().squeeze(axis=None)
+
+    def ne(self, other) -> "Series":
+        """
+        Check if the values are not equal to the other Series.
+
+        Args:
+            other: Other Series.
+
+        Returns:
+            Boolean Series.
+        """
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.ne(other._query_compiler)
+        )
+
+    def ne_missing(self, other) -> "Series":
+        """
+        Check if the values are not equal to the other Series, including missing values.
+
+        Args:
+            other: Other Series.
+
+        Returns:
+            Boolean Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def pow(self, exponent) -> "Series":
+        """
+        Raise the values to the power of the exponent.
+
+        Args:
+            exponent: Exponent.
+
+        Returns:
+            Powered Series.
+        """
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.pow(exponent, axis=0)
+        )
+
+    def replace_strict(
+        self, old, new=no_default, *, default=no_default, return_dtype=None
+    ) -> "Series":
+        """
+        Replace values strictly.
+
+        Args:
+            old: Old values.
+            new: New values.
+            default: Default value.
+
+        Returns:
+            Replaced Series.
+        """
+        raise NotImplementedError("not yet")
+
+    def to_list(self) -> list:
+        """
+        Convert the Series to a list.
+
+        Returns:
+            List representation of the Series.
+        """
+        return self._to_polars().tolist()
+
+    def drop_nans(self) -> "Series":
+        """
+        Drop NaN values.
+
+        Returns:
+            Series without NaN values.
+        """
+        return self.__constructor__(
+            _query_compiler=self._query_compiler.dropna(how="any")
+        )
diff --git a/modin/tests/polars/test_dataframe.py b/modin/tests/polars/test_dataframe.py
new file mode 100644
index 00000000000..29936c0b0f7
--- /dev/null
+++ b/modin/tests/polars/test_dataframe.py
@@ -0,0 +1,25 @@
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+import polars
+import polars.testing
+
+import modin.polars as pl
+
+
+def test_init_roundtrip():
+    data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+    df = pl.DataFrame(data)
+    polars_df = polars.DataFrame(data)
+    to_polars = polars.from_pandas(df._query_compiler.to_pandas())
+    polars.testing.assert_frame_equal(polars_df, to_polars)

From a40cef7f54c54571008346a7e7882add12ac9dc1 Mon Sep 17 00:00:00 2001
From: Jonathan Shi <jhshi07@gmail.com>
Date: Mon, 29 Jul 2024 09:11:16 -0700
Subject: [PATCH 03/20] FEAT-#7340: Add more granular lazy flags to query
 compiler (#7348)

Signed-off-by: Jonathan Shi <jhshi07@gmail.com>
---
 .../storage_formats/base/query_compiler.py    | 45 ++++++++++---
 .../storage_formats/pandas/query_compiler.py  | 64 +++++++++++++++++--
 modin/pandas/base.py                          | 17 +++--
 modin/pandas/dataframe.py                     |  4 +-
 modin/pandas/general.py                       |  2 +-
 modin/pandas/groupby.py                       |  2 +-
 modin/tests/pandas/test_groupby.py            |  4 +-
 7 files changed, 111 insertions(+), 27 deletions(-)

diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index 80e89a577a2..343008d2a3d 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -120,10 +120,21 @@ class BaseQueryCompiler(
 
     Attributes
     ----------
-    lazy_execution : bool
-        Whether underlying execution engine is designed to be executed in a lazy mode only.
-        If True, such QueryCompiler will be handled differently at the front-end in order
-        to reduce execution triggering as much as possible.
+    lazy_row_labels : bool, default False
+        True if the backend defers computations of the row labels (`df.index` for a frame).
+        Used by the frontend to avoid unnecessary execution or defer error validation.
+    lazy_row_count : bool, default False
+        True if the backend defers computations of the number of rows (`len(df.index)`).
+        Used by the frontend to avoid unnecessary execution or defer error validation.
+    lazy_column_types : bool, default False
+        True if the backend defers computations of the column types (`df.dtypes`).
+        Used by the frontend to avoid unnecessary execution or defer error validation.
+    lazy_column_labels : bool, default False
+        True if the backend defers computations of the column labels (`df.columns`).
+        Used by the frontend to avoid unnecessary execution or defer error validation.
+    lazy_column_count : bool, default False
+        True if the backend defers computations of the number of columns (`len(df.columns)`).
+        Used by the frontend to avoid unnecessary execution or defer error validation.
     _shape_hint : {"row", "column", None}, default: None
         Shape hint for frames known to be a column or a row, otherwise None.
 
@@ -197,7 +208,25 @@ def default_to_pandas(self, pandas_op, *args, **kwargs) -> Self:
     # some of these abstract methods, but for the sake of generality they are
     # treated differently.
 
-    lazy_execution = False
+    lazy_row_labels = False
+    lazy_row_count = False
+    lazy_column_types = False
+    lazy_column_labels = False
+    lazy_column_count = False
+
+    @property
+    def lazy_shape(self):
+        """
+        Whether either of the underlying dataframe's dimensions (row count/column count) are computed lazily.
+
+        If True, the frontend should avoid length/shape checks as much as possible.
+
+        Returns
+        -------
+        bool
+        """
+        return self.lazy_row_count or self.lazy_column_count
+
     _shape_hint = None
 
     # Metadata modification abstract methods
@@ -4524,7 +4553,7 @@ def has_multiindex(self, axis=0):
     @property
     def frame_has_materialized_dtypes(self) -> bool:
         """
-        Check if the undelying dataframe has materialized dtypes.
+        Check if the underlying dataframe has materialized dtypes.
 
         Returns
         -------
@@ -4535,7 +4564,7 @@ def frame_has_materialized_dtypes(self) -> bool:
     @property
     def frame_has_materialized_columns(self) -> bool:
         """
-        Check if the undelying dataframe has materialized columns.
+        Check if the underlying dataframe has materialized columns.
 
         Returns
         -------
@@ -4546,7 +4575,7 @@ def frame_has_materialized_columns(self) -> bool:
     @property
     def frame_has_materialized_index(self) -> bool:
         """
-        Check if the undelying dataframe has materialized index.
+        Check if the underlying dataframe has materialized index.
 
         Returns
         -------
diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py
index 9d4467c2085..3581516a638 100644
--- a/modin/core/storage_formats/pandas/query_compiler.py
+++ b/modin/core/storage_formats/pandas/query_compiler.py
@@ -276,19 +276,69 @@ def __init__(self, modin_frame: PandasDataframe, shape_hint: Optional[str] = Non
         self._shape_hint = shape_hint
 
     @property
-    def lazy_execution(self):
+    def lazy_row_labels(self):
         """
-        Whether underlying Modin frame should be executed in a lazy mode.
+        Whether the row labels are computed lazily.
 
-        If True, such QueryCompiler will be handled differently at the front-end in order
-        to reduce triggering the computation as much as possible.
+        Equivalent to `not self.frame_has_materialized_index`.
 
         Returns
         -------
         bool
         """
-        frame = self._modin_frame
-        return not frame.has_materialized_index or not frame.has_materialized_columns
+        return not self.frame_has_materialized_index
+
+    @property
+    def lazy_row_count(self):
+        """
+        Whether the row count is computed lazily.
+
+        Equivalent to `not self.frame_has_materialized_index`.
+
+        Returns
+        -------
+        bool
+        """
+        return not self.frame_has_materialized_index
+
+    @property
+    def lazy_column_types(self):
+        """
+        Whether the dtypes are computed lazily.
+
+        Equivalent to `not self.frame_has_materialized_dtypes`.
+
+        Returns
+        -------
+        bool
+        """
+        return not self.frame_has_materialized_dtypes
+
+    @property
+    def lazy_column_labels(self):
+        """
+        Whether the column labels are computed lazily.
+
+        Equivalent to `not self.frame_has_materialized_columns`.
+
+        Returns
+        -------
+        bool
+        """
+        return not self.frame_has_materialized_columns
+
+    @property
+    def lazy_column_count(self):
+        """
+        Whether the column count is are computed lazily.
+
+        Equivalent to `not self.frame_has_materialized_columns`.
+
+        Returns
+        -------
+        bool
+        """
+        return not self.frame_has_materialized_columns
 
     def finalize(self):
         self._modin_frame.finalize()
@@ -607,7 +657,7 @@ def reindex(self, axis, labels, **kwargs):
         return self.__constructor__(new_modin_frame)
 
     def reset_index(self, **kwargs) -> PandasQueryCompiler:
-        if self.lazy_execution:
+        if self.lazy_row_labels:
 
             def _reset(df, *axis_lengths, partition_idx):  # pragma: no cover
                 df = df.reset_index(**kwargs)
diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index b930f1bc7c0..51c9cd8156f 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -1068,7 +1068,7 @@ def astype(
             # will handle errors where dtype dict includes keys that are not
             # in columns.
             if (
-                not self._query_compiler.lazy_execution
+                not self._query_compiler.lazy_column_labels
                 and not set(dtype.keys()).issubset(set(self._query_compiler.columns))
                 and errors == "raise"
             ):
@@ -1462,7 +1462,9 @@ def drop(
                     axes[axis] = [axes[axis]]
                 # In case of lazy execution we should bypass these error checking components
                 # because they can force the materialization of the row or column labels.
-                if self._query_compiler.lazy_execution:
+                if (axis == "index" and self._query_compiler.lazy_row_labels) or (
+                    axis == "columns" and self._query_compiler.lazy_column_labels
+                ):
                     continue
                 if errors == "raise":
                     non_existent = pandas.Index(axes[axis]).difference(
@@ -2657,7 +2659,10 @@ def reset_index(
         # exist.
         if (
             not drop
-            and not self._query_compiler.lazy_execution
+            and not (
+                self._query_compiler.lazy_column_labels
+                or self._query_compiler.lazy_row_labels
+            )
             and not self._query_compiler.has_multiindex()
             and all(n in self.columns for n in ["level_0", "index"])
         ):
@@ -3944,7 +3949,7 @@ def __getitem__(self, key) -> Self:
         BasePandasDataset
             Located dataset.
         """
-        if not self._query_compiler.lazy_execution and len(self) == 0:
+        if not self._query_compiler.lazy_row_count and len(self) == 0:
             return self._default_to_pandas("__getitem__", key)
         # see if we can slice the rows
         # This lets us reuse code in pandas to error check
@@ -4075,7 +4080,7 @@ def _getitem_slice(self, key: slice) -> Self:
         if is_full_grab_slice(
             key,
             # Avoid triggering shape computation for lazy executions
-            sequence_len=(None if self._query_compiler.lazy_execution else len(self)),
+            sequence_len=(None if self._query_compiler.lazy_row_count else len(self)),
         ):
             return self.copy()
         return self.iloc[key]
@@ -4301,7 +4306,7 @@ def __getattribute__(self, item) -> Any:
         Any
         """
         attr = super().__getattribute__(item)
-        if item not in _DEFAULT_BEHAVIOUR and not self._query_compiler.lazy_execution:
+        if item not in _DEFAULT_BEHAVIOUR and not self._query_compiler.lazy_shape:
             # We default to pandas on empty DataFrames. This avoids a large amount of
             # pain in underlying implementation and returns a result immediately rather
             # than dealing with the edge cases that empty DataFrames have.
diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index 8a80809dd3e..3d97efb4af4 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -1084,7 +1084,7 @@ def insert(
                     + f"{len(value.columns)} columns instead."
                 )
             value = value.squeeze(axis=1)
-        if not self._query_compiler.lazy_execution and len(self.index) == 0:
+        if not self._query_compiler.lazy_row_count and len(self.index) == 0:
             if not hasattr(value, "index"):
                 try:
                     value = pandas.Series(value)
@@ -2783,7 +2783,7 @@ def setitem_unhashable_key(df, value):
             if not isinstance(value, (Series, Categorical, np.ndarray, list, range)):
                 value = list(value)
 
-        if not self._query_compiler.lazy_execution and len(self.index) == 0:
+        if not self._query_compiler.lazy_row_count and len(self.index) == 0:
             new_self = self.__constructor__({key: value}, columns=self.columns)
             self._update_inplace(new_self._query_compiler)
         else:
diff --git a/modin/pandas/general.py b/modin/pandas/general.py
index aeff9986f35..92aa195eae4 100644
--- a/modin/pandas/general.py
+++ b/modin/pandas/general.py
@@ -491,7 +491,7 @@ def concat(
             for obj in list_of_objs
             if (
                 isinstance(obj, (Series, pandas.Series))
-                or (isinstance(obj, DataFrame) and obj._query_compiler.lazy_execution)
+                or (isinstance(obj, DataFrame) and obj._query_compiler.lazy_shape)
                 or sum(obj.shape) > 0
             )
         ]
diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index 882ae19f6d7..080424a1761 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -196,7 +196,7 @@ def __getattr__(self, key):
 
     def __getattribute__(self, item):
         attr = super().__getattribute__(item)
-        if item not in _DEFAULT_BEHAVIOUR and not self._query_compiler.lazy_execution:
+        if item not in _DEFAULT_BEHAVIOUR and not self._query_compiler.lazy_shape:
             # We default to pandas on empty DataFrames. This avoids a large amount of
             # pain in underlying implementation and returns a result immediately rather
             # than dealing with the edge cases that empty DataFrames have.
diff --git a/modin/tests/pandas/test_groupby.py b/modin/tests/pandas/test_groupby.py
index cf5d06e10e1..b82473c674b 100644
--- a/modin/tests/pandas/test_groupby.py
+++ b/modin/tests/pandas/test_groupby.py
@@ -2752,7 +2752,7 @@ def lazy_frame(self):
             donor_obj = pd.DataFrame()._query_compiler
 
             self._mock_obj = mock.patch(
-                f"{donor_obj.__module__}.{donor_obj.__class__.__name__}.lazy_execution",
+                f"{donor_obj.__module__}.{donor_obj.__class__.__name__}.lazy_shape",
                 new_callable=mock.PropertyMock,
             )
             patch_obj = self._mock_obj.__enter__()
@@ -2760,7 +2760,7 @@ def lazy_frame(self):
 
             df = pd.DataFrame(**self._df_kwargs)
             # The frame is lazy until `self.__exit__()` is called
-            assert df._query_compiler.lazy_execution
+            assert df._query_compiler.lazy_shape
             return df
 
         def __enter__(self):

From 621f49e0875178d7f099d26431ff60bd5103e5f0 Mon Sep 17 00:00:00 2001
From: Devin Petersohn <devin-petersohn@users.noreply.github.com>
Date: Wed, 31 Jul 2024 06:14:04 -0500
Subject: [PATCH 04/20] FIX-#7351: Add ipython method calls to non-lookup list
 (#7352)

Signed-off-by: Devin Petersohn <devin.petersohn@snowflake.com>
---
 modin/pandas/base.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 51c9cd8156f..04dd845915c 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -99,7 +99,13 @@
 
 # Do not lookup certain attributes in columns or index, as they're used for some
 # special purposes, like serving remote context
-_ATTRS_NO_LOOKUP = {"__name__", "_cache"}
+_ATTRS_NO_LOOKUP = {
+    "__name__",
+    "_cache",
+    "_ipython_canary_method_should_not_exist_",
+    "_ipython_display_",
+    "_repr_mimebundle_",
+}
 
 _DEFAULT_BEHAVIOUR = {
     "__init__",

From 24018dbf9c3932e201c4b49869281eff7960bddf Mon Sep 17 00:00:00 2001
From: Mahesh Vashishtha <mahesh.vashishtha@snowflake.com>
Date: Wed, 31 Jul 2024 04:40:55 -0700
Subject: [PATCH 05/20] FIX-#7134: Use a separate docstring class for
 BasePandasDataset. (#7353)

Signed-off-by: sfc-gh-mvashishtha <mahesh.vashishtha@snowflake.com>
---
 modin/tests/config/docs_module/__init__.py |  4 ++--
 modin/tests/config/docs_module/classes.py  |  8 ++++++++
 modin/tests/config/test_envvars.py         |  8 ++++++++
 modin/utils.py                             | 13 ++++++++++++-
 4 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/modin/tests/config/docs_module/__init__.py b/modin/tests/config/docs_module/__init__.py
index aa21549f1bd..5f617d683ba 100644
--- a/modin/tests/config/docs_module/__init__.py
+++ b/modin/tests/config/docs_module/__init__.py
@@ -11,7 +11,7 @@
 # ANY KIND, either express or implied. See the License for the specific language
 # governing permissions and limitations under the License.
 
-from .classes import DataFrame, Series
+from .classes import BasePandasDataset, DataFrame, Series
 from .functions import read_csv
 
-__all__ = ["DataFrame", "Series", "read_csv"]
+__all__ = ["BasePandasDataset", "DataFrame", "Series", "read_csv"]
diff --git a/modin/tests/config/docs_module/classes.py b/modin/tests/config/docs_module/classes.py
index 8dc152e23cd..235c99bdf0f 100644
--- a/modin/tests/config/docs_module/classes.py
+++ b/modin/tests/config/docs_module/classes.py
@@ -22,3 +22,11 @@ class Series:
     def isna(self):
         """This is a test of the documentation module for Series."""
         return
+
+
+class BasePandasDataset:
+    """This is a test of the documentation module for BasePandasDataSet."""
+
+    def apply():
+        """This is a test of the documentation module for BasePandasDataSet.apply."""
+        return
diff --git a/modin/tests/config/test_envvars.py b/modin/tests/config/test_envvars.py
index 384bd5f199a..d057ecb0299 100644
--- a/modin/tests/config/test_envvars.py
+++ b/modin/tests/config/test_envvars.py
@@ -20,6 +20,7 @@
 import modin.pandas as pd
 from modin.config.envvars import _check_vars
 from modin.config.pubsub import _UNSET, ExactStr
+from modin.pandas.base import BasePandasDataset
 
 
 def reset_vars(*vars: tuple[cfg.Parameter]):
@@ -89,6 +90,12 @@ def test_overrides(self):
         cfg.DocModule.put("modin.tests.config.docs_module")
 
         # Test for override
+        assert BasePandasDataset.__doc__ == (
+            "This is a test of the documentation module for BasePandasDataSet."
+        )
+        assert BasePandasDataset.apply.__doc__ == (
+            "This is a test of the documentation module for BasePandasDataSet.apply."
+        )
         assert (
             pd.DataFrame.apply.__doc__
             == "This is a test of the documentation module for DataFrame."
@@ -96,6 +103,7 @@ def test_overrides(self):
         # Test for pandas doc when method is not defined on the plugin module
         assert pandas.DataFrame.isna.__doc__ in pd.DataFrame.isna.__doc__
         assert pandas.DataFrame.isnull.__doc__ in pd.DataFrame.isnull.__doc__
+        assert BasePandasDataset.astype.__doc__ in pd.DataFrame.astype.__doc__
         # Test for override
         assert (
             pd.Series.isna.__doc__
diff --git a/modin/utils.py b/modin/utils.py
index 34071be132b..8623732671b 100644
--- a/modin/utils.py
+++ b/modin/utils.py
@@ -462,7 +462,18 @@ def _inherit_docstrings_in_place(
     if doc_module != DocModule.default and "pandas" in str(
         getattr(parent, "__module__", "")
     ):
-        parent = getattr(imported_doc_module, getattr(parent, "__name__", ""), parent)
+        parent_name = (
+            # DocModule should use the class BasePandasDataset to override the
+            # docstrings of BasePandasDataset, even if BasePandasDataset
+            # normally inherits docstrings from a different `parent`.
+            "BasePandasDataset"
+            if getattr(cls_or_func, "__name__", "") == "BasePandasDataset"
+            # For other classes, override docstrings with the class that has the
+            # same name as the `parent` class, e.g. DataFrame inherits
+            # docstrings from doc_module.DataFrame.
+            else getattr(parent, "__name__", "")
+        )
+        parent = getattr(imported_doc_module, parent_name, parent)
     if parent != default_parent:
         # Reset API link in case the docs are overridden.
         apilink = None

From 22ed4d87b7f7b6b9e068f65294aeb6311ec08bd2 Mon Sep 17 00:00:00 2001
From: Mahesh Vashishtha <mahesh.vashishtha@snowflake.com>
Date: Thu, 1 Aug 2024 07:06:55 -0700
Subject: [PATCH 06/20] FIX-#7113: Fix docstring overrides for subclasses.
 (#7354)

Signed-off-by: sfc-gh-mvashishtha <mahesh.vashishtha@snowflake.com>
---
 modin/tests/config/docs_module/classes.py      |  3 +++
 .../docs_module_with_just_base/__init__.py     | 16 ++++++++++++++++
 .../docs_module_with_just_base/classes.py      | 17 +++++++++++++++++
 modin/tests/config/test_envvars.py             | 18 ++++++++++++++++++
 modin/utils.py                                 | 15 +++++++++++++--
 5 files changed, 67 insertions(+), 2 deletions(-)
 create mode 100644 modin/tests/config/docs_module_with_just_base/__init__.py
 create mode 100644 modin/tests/config/docs_module_with_just_base/classes.py

diff --git a/modin/tests/config/docs_module/classes.py b/modin/tests/config/docs_module/classes.py
index 235c99bdf0f..9a8eabac61b 100644
--- a/modin/tests/config/docs_module/classes.py
+++ b/modin/tests/config/docs_module/classes.py
@@ -30,3 +30,6 @@ class BasePandasDataset:
     def apply():
         """This is a test of the documentation module for BasePandasDataSet.apply."""
         return
+
+    def astype():
+        """This is a test of the documentation module for BasePandasDataSet.astype."""
diff --git a/modin/tests/config/docs_module_with_just_base/__init__.py b/modin/tests/config/docs_module_with_just_base/__init__.py
new file mode 100644
index 00000000000..f2da948e26c
--- /dev/null
+++ b/modin/tests/config/docs_module_with_just_base/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+from .classes import BasePandasDataset
+
+__all__ = ["BasePandasDataset"]
diff --git a/modin/tests/config/docs_module_with_just_base/classes.py b/modin/tests/config/docs_module_with_just_base/classes.py
new file mode 100644
index 00000000000..645c7c63df6
--- /dev/null
+++ b/modin/tests/config/docs_module_with_just_base/classes.py
@@ -0,0 +1,17 @@
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+
+class BasePandasDataset:
+    def astype():
+        """This is a test of the documentation module for BasePandasDataSet.astype."""
diff --git a/modin/tests/config/test_envvars.py b/modin/tests/config/test_envvars.py
index d057ecb0299..17ac0b58786 100644
--- a/modin/tests/config/test_envvars.py
+++ b/modin/tests/config/test_envvars.py
@@ -12,6 +12,7 @@
 # governing permissions and limitations under the License.
 
 import os
+import sys
 
 import pandas
 import pytest
@@ -96,6 +97,13 @@ def test_overrides(self):
         assert BasePandasDataset.apply.__doc__ == (
             "This is a test of the documentation module for BasePandasDataSet.apply."
         )
+        # Test scenario 2 from https://github.com/modin-project/modin/issues/7113:
+        # We can correctly override the docstring for BasePandasDataset.astype,
+        # which is the same method as Series.astype.
+        assert pd.Series.astype is BasePandasDataset.astype
+        assert BasePandasDataset.astype.__doc__ == (
+            "This is a test of the documentation module for BasePandasDataSet.astype."
+        )
         assert (
             pd.DataFrame.apply.__doc__
             == "This is a test of the documentation module for DataFrame."
@@ -130,6 +138,16 @@ def test_not_redefining_classes_modin_issue_7138(self):
 
         assert pd.DataFrame is original_dataframe_class
 
+    def test_base_docstring_override_with_no_dataframe_or_series_class_issue_7113(
+        self,
+    ):
+        # This test case tests scenario 1 from issue 7113.
+        sys.path.append(f"{os.path.dirname(__file__)}")
+        cfg.DocModule.put("docs_module_with_just_base")
+        assert BasePandasDataset.astype.__doc__ == (
+            "This is a test of the documentation module for BasePandasDataSet.astype."
+        )
+
 
 @pytest.mark.skipif(cfg.Engine.get() != "Ray", reason="Ray specific test")
 def test_ray_cluster_resources():
diff --git a/modin/utils.py b/modin/utils.py
index 8623732671b..6c17b1b12d3 100644
--- a/modin/utils.py
+++ b/modin/utils.py
@@ -384,6 +384,13 @@ def _replace_doc(
 # inherited docstrings.
 _docstring_inheritance_calls: list[Callable[[str], None]] = []
 
+# This is a set of (class, attribute_name) pairs whose docstrings we have
+# already replaced since we last updated DocModule. Note that we don't store
+# the attributes themselves since we replace property attributes instead of
+# modifying them in place:
+# https://github.com/modin-project/modin/blob/e9dbcc127913db77473a83936e8b6bb94ef84f0d/modin/utils.py#L353
+_attributes_with_docstrings_replaced: set[tuple[type, str]] = set()
+
 
 def _documentable_obj(obj: object) -> bool:
     """
@@ -417,6 +424,7 @@ def _update_inherited_docstrings(doc_module: DocModule) -> None:
     doc_module : DocModule
         The current DocModule.
     """
+    _attributes_with_docstrings_replaced.clear()
     _doc_module = doc_module.get()
     for doc_inheritance_call in _docstring_inheritance_calls:
         doc_inheritance_call(doc_module=_doc_module)  # type: ignore[call-arg]
@@ -488,7 +496,8 @@ def _inherit_docstrings_in_place(
             if base is object:
                 continue
             for attr, obj in base.__dict__.items():
-                if attr in seen:
+                # only replace docstrings once to prevent https://github.com/modin-project/modin/issues/7113
+                if attr in seen or (base, attr) in _attributes_with_docstrings_replaced:
                     continue
                 seen.add(attr)
                 # Try to get the attribute from the docs class first, then
@@ -507,10 +516,12 @@ def _inherit_docstrings_in_place(
                     obj,
                     overwrite_existing,
                     apilink,
-                    parent_cls=cls_or_func,
+                    parent_cls=base,
                     attr_name=attr,
                 )
 
+                _attributes_with_docstrings_replaced.add((base, attr))
+
 
 def _inherit_docstrings(
     parent: object,

From 6dce30e5538b7de3ed291e0741f5a63b7481bd2d Mon Sep 17 00:00:00 2001
From: Arun Jose <40291569+arunjose696@users.noreply.github.com>
Date: Thu, 1 Aug 2024 18:33:11 +0200
Subject: [PATCH 07/20] FIX-#7355: Cpu count would be set incorrectly on a
 cluster (#7356)

Signed-off-by: arunjose696 <arunjose696@gmail.com>
---
 modin/config/envvars.py                      | 18 ++++++++++++++++++
 modin/core/execution/dask/common/utils.py    |  1 +
 modin/core/execution/ray/common/utils.py     |  1 +
 modin/core/execution/unidist/common/utils.py |  1 +
 4 files changed, 21 insertions(+)

diff --git a/modin/config/envvars.py b/modin/config/envvars.py
index 8654ebe30c1..3635c63d026 100644
--- a/modin/config/envvars.py
+++ b/modin/config/envvars.py
@@ -332,6 +332,24 @@ class CpuCount(EnvironmentVariable, type=int):
 
     varname = "MODIN_CPUS"
 
+    @classmethod
+    def _put(cls, value: int) -> None:
+        """
+        Put specific value if CpuCount wasn't set by a user yet.
+
+        Parameters
+        ----------
+        value : int
+            Config value to set.
+
+        Notes
+        -----
+        This method is used to set CpuCount from cluster resources internally
+        and should not be called by a user.
+        """
+        if cls.get_value_source() == ValueSource.DEFAULT:
+            cls.put(value)
+
     @classmethod
     def _get_default(cls) -> int:
         """
diff --git a/modin/core/execution/dask/common/utils.py b/modin/core/execution/dask/common/utils.py
index 067a94fcdf0..52b4e38f53d 100644
--- a/modin/core/execution/dask/common/utils.py
+++ b/modin/core/execution/dask/common/utils.py
@@ -74,3 +74,4 @@ def _disable_warnings():
 
     num_cpus = len(client.ncores())
     NPartitions._put(num_cpus)
+    CpuCount._put(num_cpus)
diff --git a/modin/core/execution/ray/common/utils.py b/modin/core/execution/ray/common/utils.py
index cc2010fc7fb..d419a61a0d2 100644
--- a/modin/core/execution/ray/common/utils.py
+++ b/modin/core/execution/ray/common/utils.py
@@ -151,6 +151,7 @@ def initialize_ray(
 
     num_cpus = int(ray.cluster_resources()["CPU"])
     NPartitions._put(num_cpus)
+    CpuCount._put(num_cpus)
 
     # TODO(https://github.com/ray-project/ray/issues/28216): remove this
     # workaround once Ray gives a better way to suppress task errors.
diff --git a/modin/core/execution/unidist/common/utils.py b/modin/core/execution/unidist/common/utils.py
index 5aa31698b6a..6455d194b25 100644
--- a/modin/core/execution/unidist/common/utils.py
+++ b/modin/core/execution/unidist/common/utils.py
@@ -42,6 +42,7 @@ def initialize_unidist():
 
     num_cpus = sum(v["CPU"] for v in unidist.cluster_resources().values())
     modin_cfg.NPartitions._put(num_cpus)
+    modin_cfg.CpuCount._put(num_cpus)
 
 
 def deserialize(obj):  # pragma: no cover

From b236b76ece7bc917485bfc35aa2c89006213f1f1 Mon Sep 17 00:00:00 2001
From: Devin Petersohn <devin-petersohn@users.noreply.github.com>
Date: Fri, 2 Aug 2024 03:27:12 -0500
Subject: [PATCH 08/20] FIX-#7357: Fix `NoAttributeError` on `DataFrame.copy`
 (#7358)

Signed-off-by: Devin Petersohn <devin.petersohn@snowflake.com>
---
 modin/polars/dataframe.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/modin/polars/dataframe.py b/modin/polars/dataframe.py
index d4408ff39f0..021daea692a 100644
--- a/modin/polars/dataframe.py
+++ b/modin/polars/dataframe.py
@@ -104,6 +104,9 @@ def __getitem__(self, item):
 
             return Series(_query_compiler=self._query_compiler.getitem_array([item]))
 
+    def _copy(self):
+        return self.__constructor__(_query_compiler=self._query_compiler.copy())
+
     def _to_polars(self) -> polars.DataFrame:
         """
         Convert the DataFrame to Polars format.
@@ -266,7 +269,7 @@ def _convert_non_numeric_to_null(self):
                     need_columns_reindex=False,
                 ).astype({c: self._query_compiler.dtypes[c] for c in non_numeric_cols})
             )
-        return self.copy()
+        return self._copy()
 
     def mean(self, *, axis=None, null_strategy="ignore"):
         """
@@ -956,7 +959,7 @@ def rechunk(self) -> "DataFrame":
         Returns:
             Rechunked DataFrame.
         """
-        return self.copy()
+        return self._copy()
 
     def rename(self, mapping: dict[str, str] | callable) -> "DataFrame":
         """
@@ -972,7 +975,7 @@ def rename(self, mapping: dict[str, str] | callable) -> "DataFrame":
             mapping = {c: mapping(c) for c in self.columns}
         # TODO: add a query compiler method for `rename`
         new_columns = {c: mapping.get(c, c) for c in self.columns}
-        new_obj = self.copy()
+        new_obj = self._copy()
         new_obj.columns = new_columns
         return new_obj
 
@@ -1076,7 +1079,7 @@ def set_sorted(
         if isinstance(column, str):
             column = [column]
         new_sorted_columns = [c in column for c in self.columns]
-        obj = self.copy()
+        obj = self._copy()
         obj._sorted_columns = new_sorted_columns
         return obj
 
@@ -1231,7 +1234,7 @@ def with_row_index(self, name: str = "index", offset: int = 0) -> "DataFrame":
             DataFrame with the row index added.
         """
         if offset != 0:
-            obj = self.copy()
+            obj = self._copy()
             obj.index = obj.index + offset
         result = self.__constructor__(
             _query_compiler=self._query_compiler.reset_index(drop=False)

From 05e5c484d476c920bf5ca03633b0fb0bcf2cd0e7 Mon Sep 17 00:00:00 2001
From: Kirill Suvorov <kirill.suvorov@intel.com>
Date: Mon, 19 Aug 2024 13:56:21 +0200
Subject: [PATCH 09/20] FEAT-#7368: Add a new environment variable for using
 dynamic partitioning (#7369)

Signed-off-by: Kirill Suvorov <kirill.suvorov@intel.com>
Co-authored-by: Anatoly Myachev <anatoliimyachev@mail.com>
---
 docs/usage_guide/optimization_notes/index.rst | 33 +++++++++++++++++++
 modin/config/__init__.py                      |  2 ++
 modin/config/envvars.py                       | 12 +++++++
 .../pandas/partitioning/partition_manager.py  |  3 +-
 .../storage_formats/pandas/test_internals.py  |  7 ++--
 5 files changed, 53 insertions(+), 4 deletions(-)

diff --git a/docs/usage_guide/optimization_notes/index.rst b/docs/usage_guide/optimization_notes/index.rst
index aadd813e318..0dcbe5a25d7 100644
--- a/docs/usage_guide/optimization_notes/index.rst
+++ b/docs/usage_guide/optimization_notes/index.rst
@@ -37,6 +37,38 @@ Range-partitioning is not a silver bullet, meaning that enabling it is not alway
 a link to the list of operations that have support for range-partitioning and practical advices on when one should
 enable it: :doc:`operations that support range-partitioning </usage_guide/optimization_notes/range_partitioning_ops>`.
 
+Dynamic-partitioning in Modin
+"""""""""""""""""""""""""""""
+
+Ray engine experiences slowdowns when running a large number of small remote tasks at the same time. Ray Core recommends to `avoid tiny task`_.
+When modin DataFrame has a large number of partitions, some functions produce a large number of remote tasks, which can cause slowdowns. 
+To solve this problem, Modin suggests using dynamic partitioning. This approach reduces the number of remote tasks 
+by combining multiple partitions into a single virtual partition and perform a common remote task on them.
+
+Dynamic partitioning is typically used for operations that are fully or partially executed on all partitions separately.
+
+.. code-block:: python
+
+    import modin.pandas as pd
+    from modin.config import context
+
+    df = pd.DataFrame(...)
+
+    with context(DynamicPartitioning=True):
+        df.abs()
+
+Dynamic partitioning is also not always useful, and this approach is usually used for medium-sized DataFrames with a large number of columns.
+If the number of columns is small, the number of partitions will be close to the number of CPUs, and Ray will not have this problem.
+If the DataFrame has too many rows, this is also not a good case for using Dynamic-partitioning, since each task is no longer tiny and performing 
+the combined tasks carries more overhead than assigning them separately.
+
+Unfortunately, the use of Dynamic-partitioning depends on various factors such as data size, number of CPUs, operations performed, 
+and it is up to the user to determine whether Dynamic-partitioning will give a boost in his case or not.
+
+..
+  TODO: Define heuristics to automatically enable dynamic partitioning without performance penalty.
+  `Issue #7370 <https://github.com/modin-project/modin/issues/7370>`_
+
 Understanding Modin's partitioning mechanism
 """"""""""""""""""""""""""""""""""""""""""""
 
@@ -311,3 +343,4 @@ an inner join you may want to swap left and right DataFrames.
 Note that result columns order may differ for first and second ``merge``.
 
 .. _range-partitioning: https://www.techopedia.com/definition/31994/range-partitioning
+.. _`avoid tiny task`: https://docs.ray.io/en/latest/ray-core/tips-for-first-time.html#tip-2-avoid-tiny-tasks
diff --git a/modin/config/__init__.py b/modin/config/__init__.py
index cf5f7895c5d..60806a79231 100644
--- a/modin/config/__init__.py
+++ b/modin/config/__init__.py
@@ -23,6 +23,7 @@
     CpuCount,
     DaskThreadsPerWorker,
     DocModule,
+    DynamicPartitioning,
     Engine,
     EnvironmentVariable,
     GithubCI,
@@ -95,6 +96,7 @@
     "AsyncReadMode",
     "ReadSqlEngine",
     "IsExperimental",
+    "DynamicPartitioning",
     # For tests
     "TrackFileLeaks",
     "TestReadFromSqlServer",
diff --git a/modin/config/envvars.py b/modin/config/envvars.py
index 3635c63d026..60d82e4a22c 100644
--- a/modin/config/envvars.py
+++ b/modin/config/envvars.py
@@ -892,6 +892,18 @@ class DaskThreadsPerWorker(EnvironmentVariable, type=int):
     default = 1
 
 
+class DynamicPartitioning(EnvironmentVariable, type=bool):
+    """
+    Set to true to use Modin's dynamic-partitioning implementation where possible.
+
+    Please refer to documentation for cases where enabling this options would be beneficial:
+    https://modin.readthedocs.io/en/stable/usage_guide/optimization_notes/index.html#dynamic-partitioning-in-modin
+    """
+
+    varname = "MODIN_DYNAMIC_PARTITIONING"
+    default = False
+
+
 def _check_vars() -> None:
     """
     Check validity of environment variables.
diff --git a/modin/core/dataframe/pandas/partitioning/partition_manager.py b/modin/core/dataframe/pandas/partitioning/partition_manager.py
index 8b4f6788931..05854239206 100644
--- a/modin/core/dataframe/pandas/partitioning/partition_manager.py
+++ b/modin/core/dataframe/pandas/partitioning/partition_manager.py
@@ -30,6 +30,7 @@
 from modin.config import (
     BenchmarkMode,
     CpuCount,
+    DynamicPartitioning,
     Engine,
     MinColumnPartitionSize,
     MinRowPartitionSize,
@@ -675,7 +676,7 @@ def map_partitions(
         NumPy array
             An array of partitions
         """
-        if np.prod(partitions.shape) <= 1.5 * CpuCount.get():
+        if not DynamicPartitioning.get():
             # block-wise map
             new_partitions = cls.base_map_partitions(
                 partitions, map_func, func_args, func_kwargs
diff --git a/modin/tests/core/storage_formats/pandas/test_internals.py b/modin/tests/core/storage_formats/pandas/test_internals.py
index 4113f3ce0ed..b030fe7b216 100644
--- a/modin/tests/core/storage_formats/pandas/test_internals.py
+++ b/modin/tests/core/storage_formats/pandas/test_internals.py
@@ -2658,7 +2658,7 @@ def remote_func():
         ),
     ],
 )
-def test_map_approaches(partitioning_scheme, expected_map_approach):
+def test_dynamic_partitioning(partitioning_scheme, expected_map_approach):
     data_size = MinRowPartitionSize.get() * CpuCount.get()
     data = {f"col{i}": np.ones(data_size) for i in range(data_size)}
     df = pandas.DataFrame(data)
@@ -2672,8 +2672,9 @@ def test_map_approaches(partitioning_scheme, expected_map_approach):
         expected_map_approach,
         wraps=getattr(partition_mgr_cls, expected_map_approach),
     ) as expected_method:
-        partition_mgr_cls.map_partitions(partitions, lambda x: x * 2)
-        expected_method.assert_called()
+        with context(DynamicPartitioning=True):
+            partition_mgr_cls.map_partitions(partitions, lambda x: x * 2)
+            expected_method.assert_called()
 
 
 def test_map_partitions_joined_by_column():

From 8fc230a0a624eb61389859eb4fbf55fc0a2bda0c Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoliimyachev@mail.com>
Date: Mon, 26 Aug 2024 13:34:37 +0200
Subject: [PATCH 10/20] FIX-#7373: Try a previous version of `motoserver/moto`
 service, pin to 5.0.13 (#7374)

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 .github/workflows/ci.yml           | 8 ++++----
 .github/workflows/push-to-main.yml | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 5f82d4ca7f9..10b17d0e4a6 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -293,7 +293,7 @@ jobs:
     name: test-ubuntu (engine unidist ${{matrix.unidist-backend}}, python ${{matrix.python-version}})
     services:
       moto:
-        image: motoserver/moto
+        image: motoserver/moto:5.0.13
         ports:
           - 5000:5000
         env:
@@ -382,7 +382,7 @@ jobs:
       # Using workaround https://github.com/actions/runner/issues/822#issuecomment-1524826092
       moto:
         # we only need moto service on Ubuntu and for group_4 task or python engine
-        image: ${{ (matrix.os == 'ubuntu' && (matrix.engine == 'python' || matrix.test_task == 'group_4')) && 'motoserver/moto' || '' }}
+        image: ${{ (matrix.os == 'ubuntu' && (matrix.engine == 'python' || matrix.test_task == 'group_4')) && 'motoserver/moto:5.0.13' || '' }}
         ports:
           - 5000:5000
         env:
@@ -508,7 +508,7 @@ jobs:
     name: test-${{ matrix.os }}-sanity (engine ${{ matrix.execution.name }}, python ${{matrix.python-version}})
     services:
       moto:
-        image: ${{ matrix.os != 'windows' && 'motoserver/moto' || '' }}
+        image: ${{ matrix.os != 'windows' && 'motoserver/moto:5.0.13' || '' }}
         ports:
           - 5000:5000
         env:
@@ -623,7 +623,7 @@ jobs:
     name: test experimental
     services:
       moto:
-        image: motoserver/moto
+        image: motoserver/moto:5.0.13
         ports:
           - 5000:5000
         env:
diff --git a/.github/workflows/push-to-main.yml b/.github/workflows/push-to-main.yml
index f8f05541ce1..dc0e83e102c 100644
--- a/.github/workflows/push-to-main.yml
+++ b/.github/workflows/push-to-main.yml
@@ -19,7 +19,7 @@ jobs:
         shell: bash -l {0}
     services:
       moto:
-        image: motoserver/moto
+        image: motoserver/moto:5.0.13
         ports:
           - 5000:5000
         env:

From da015711d94787e044624a08b2660377eacab30f Mon Sep 17 00:00:00 2001
From: Arun Jose <40291569+arunjose696@users.noreply.github.com>
Date: Mon, 26 Aug 2024 15:26:11 +0200
Subject: [PATCH 11/20] FEAT-#4605: Add native query compiler (#7259)

Co-authored-by: Igoshev, Iaroslav <iaroslav.igoshev@intel.com>
Signed-off-by: arunjose696 <arunjose696@gmail.com>
---
 .github/workflows/ci.yml                      |   35 +
 modin/config/__init__.py                      |    2 +
 modin/config/envvars.py                       |   22 +
 .../dispatching/factories/factories.py        |    5 +
 .../pandas/native_query_compiler.py           | 1285 +++++++++++++++++
 modin/tests/pandas/dataframe/test_binary.py   |    6 +-
 modin/tests/pandas/dataframe/test_default.py  |   25 +-
 modin/tests/pandas/dataframe/test_indexing.py |   10 +-
 modin/tests/pandas/dataframe/test_iter.py     |    8 +-
 .../tests/pandas/dataframe/test_join_sort.py  |    9 +-
 .../pandas/dataframe/test_map_metadata.py     |   22 +-
 modin/tests/pandas/dataframe/test_pickle.py   |    1 -
 modin/tests/test_utils.py                     |   18 +-
 13 files changed, 1427 insertions(+), 21 deletions(-)
 create mode 100644 modin/core/storage_formats/pandas/native_query_compiler.py

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 10b17d0e4a6..9186500682a 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -246,12 +246,16 @@ jobs:
       unidist: ${{ steps.filter.outputs.unidist }}
       engines: ${{ steps.engines.outputs.engines }}
       experimental: ${{ steps.experimental.outputs.experimental }}
+      test-native-dataframe-mode: ${{ steps.filter.outputs.test-native-dataframe-mode }}
     steps:
     - uses: actions/checkout@v4
     - uses: dorny/paths-filter@v3
       id: filter
       with:
         filters: |
+          test-native-dataframe-mode:
+            - 'modin/core/storage_formats/pandas/native_query_compiler.py'
+            - 'modin/core/storage_formats/base/query_compiler.py'
           shared: &shared
             - 'modin/core/execution/dispatching/**'
           ray:
@@ -665,6 +669,37 @@ jobs:
           python-version: ${{matrix.python-version}}
       - run: python -m pytest modin/tests/experimental/spreadsheet/test_general.py
 
+  test-native-dataframe-mode:
+    needs: [ lint-flake8, execution-filter]
+    if: ${{ needs.execution-filter.outputs.test-native-dataframe-mode == 'true' }}
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        shell: bash -l {0}
+    strategy:
+      matrix:
+        python-version: ["3.9"]
+    env:
+      MODIN_NATIVE_DATAFRAME_MODE: "Pandas"
+    name: test-native-dataframe-mode python ${{matrix.python-version}})
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/actions/mamba-env
+        with:
+          environment-file: environment-dev.yml
+          python-version: ${{matrix.python-version}}
+      - run: python -m pytest modin/tests/pandas/dataframe/test_binary.py
+      - run: python -m pytest modin/tests/pandas/dataframe/test_default.py
+      - run: python -m pytest modin/tests/pandas/dataframe/test_indexing.py
+      - run: python -m pytest modin/tests/pandas/dataframe/test_iter.py
+      - run: python -m pytest modin/tests/pandas/dataframe/test_join_sort.py
+      - run: python -m pytest modin/tests/pandas/dataframe/test_map_metadata.py
+      - run: python -m pytest modin/tests/pandas/dataframe/test_pickle.py
+      - run: python -m pytest modin/tests/pandas/dataframe/test_reduce.py
+      - run: python -m pytest modin/tests/pandas/dataframe/test_udf.py
+      - run: python -m pytest modin/tests/pandas/dataframe/test_window.py
+      - uses: ./.github/actions/upload-coverage
+
   merge-coverage-artifacts:
     needs: [test-internals, test-api-and-no-engine, test-defaults, test-all-unidist, test-all, test-experimental, test-sanity]
     if: always()  # we need to run it regardless of some job being skipped, like in PR
diff --git a/modin/config/__init__.py b/modin/config/__init__.py
index 60806a79231..d38596eff5c 100644
--- a/modin/config/__init__.py
+++ b/modin/config/__init__.py
@@ -40,6 +40,7 @@
     MinPartitionSize,
     MinRowPartitionSize,
     ModinNumpy,
+    NativeDataframeMode,
     NPartitions,
     PersistentPickle,
     ProgressBar,
@@ -69,6 +70,7 @@
     "CpuCount",
     "GpuCount",
     "Memory",
+    "NativeDataframeMode",
     # Ray specific
     "IsRayCluster",
     "RayRedisAddress",
diff --git a/modin/config/envvars.py b/modin/config/envvars.py
index 60d82e4a22c..97ed1579667 100644
--- a/modin/config/envvars.py
+++ b/modin/config/envvars.py
@@ -943,4 +943,26 @@ def _check_vars() -> None:
         )
 
 
+class NativeDataframeMode(EnvironmentVariable, type=str):
+    """
+    Configures the query compiler to process Modin data.
+
+    When this config is set to ``Default``, ``PandasQueryCompiler`` is used,
+    which leads to Modin executing dataframes in distributed fashion.
+    When set to a string (e.g., ``pandas``), ``NativeQueryCompiler`` is used,
+    which handles the dataframes without distributing,
+    falling back to native library functions (e.g., ``pandas``).
+
+    This could be beneficial for handling relatively small dataframes
+    without involving additional overhead of communication between processes.
+    """
+
+    varname = "MODIN_NATIVE_DATAFRAME_MODE"
+    choices = (
+        "Default",
+        "Pandas",
+    )
+    default = "Default"
+
+
 _check_vars()
diff --git a/modin/core/execution/dispatching/factories/factories.py b/modin/core/execution/dispatching/factories/factories.py
index e7f2493e404..deda5113287 100644
--- a/modin/core/execution/dispatching/factories/factories.py
+++ b/modin/core/execution/dispatching/factories/factories.py
@@ -26,7 +26,9 @@
 import pandas
 from pandas.util._decorators import doc
 
+from modin.config import NativeDataframeMode
 from modin.core.io import BaseIO
+from modin.core.storage_formats.pandas.native_query_compiler import NativeQueryCompiler
 from modin.utils import get_current_execution
 
 _doc_abstract_factory_class = """
@@ -168,6 +170,9 @@ def prepare(cls):
         method="io.from_pandas",
     )
     def _from_pandas(cls, df):
+        if NativeDataframeMode.get() == "Pandas":
+            df_copy = df.copy()
+            return NativeQueryCompiler(df_copy)
         return cls.io_cls.from_pandas(df)
 
     @classmethod
diff --git a/modin/core/storage_formats/pandas/native_query_compiler.py b/modin/core/storage_formats/pandas/native_query_compiler.py
new file mode 100644
index 00000000000..bfe331cfc6e
--- /dev/null
+++ b/modin/core/storage_formats/pandas/native_query_compiler.py
@@ -0,0 +1,1285 @@
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+"""
+Module contains ``NativeQueryCompiler`` class.
+
+``NativeQueryCompiler`` is responsible for compiling efficient DataFrame algebra
+queries for small data and empty ``PandasDataFrame``.
+"""
+
+from typing import Optional
+
+import numpy as np
+import pandas
+from pandas.core.dtypes.common import is_list_like, is_scalar
+
+from modin.config.envvars import NativeDataframeMode
+from modin.core.storage_formats.base.query_compiler import BaseQueryCompiler
+from modin.utils import (
+    MODIN_UNNAMED_SERIES_LABEL,
+    _inherit_docstrings,
+    try_cast_to_pandas,
+)
+
+
+def _get_axis(axis):
+    """
+    Build index labels getter of the specified axis.
+
+    Parameters
+    ----------
+    axis : {0, 1}
+        Axis to get labels from. 0 is for index and 1 is for column.
+
+    Returns
+    -------
+    callable(NativeQueryCompiler) -> pandas.Index
+    """
+    if axis == 0:
+        return lambda self: self._modin_frame.index
+    else:
+        return lambda self: self._modin_frame.columns
+
+
+def _set_axis(axis):
+    """
+    Build index labels setter of the specified axis.
+
+    Parameters
+    ----------
+    axis : {0, 1}
+        Axis to set labels on. 0 is for index and 1 is for column.
+
+    Returns
+    -------
+    callable(NativeQueryCompiler)
+    """
+    if axis == 0:
+
+        def set_axis(self, idx):
+            self._modin_frame.index = idx
+
+    else:
+
+        def set_axis(self, cols):
+            self._modin_frame.columns = cols
+
+    return set_axis
+
+
+def _str_map(func_name):
+    """
+    Build function that calls specified string function on frames ``str`` accessor.
+
+    Parameters
+    ----------
+    func_name : str
+        String function name to execute on ``str`` accessor.
+
+    Returns
+    -------
+    callable(pandas.DataFrame, *args, **kwargs) -> pandas.DataFrame
+    """
+
+    def str_op_builder(df, *args, **kwargs):
+        """Apply specified function against `str` accessor of the passed frame."""
+        str_s = df.squeeze(axis=1).str
+        return getattr(pandas.Series.str, func_name)(str_s, *args, **kwargs).to_frame()
+
+    return str_op_builder
+
+
+def _dt_prop_map(property_name):
+    """
+    Build function that access specified property of the ``dt`` property of the passed frame.
+
+    Parameters
+    ----------
+    property_name : str
+        Date-time property name to access.
+
+    Returns
+    -------
+    callable(pandas.DataFrame, *args, **kwargs) -> pandas.DataFrame
+        Function to be applied in the frame.
+
+    Notes
+    -----
+    This applies non-callable properties of ``Series.dt``.
+    """
+
+    def dt_op_builder(df, *args, **kwargs):
+        """Access specified date-time property of the passed frame."""
+        prop_val = getattr(df.squeeze(axis=1).dt, property_name)
+        if isinstance(prop_val, pandas.Series):
+            return prop_val.to_frame()
+        elif isinstance(prop_val, pandas.DataFrame):
+            return prop_val
+        else:
+            return pandas.DataFrame([prop_val])
+
+    return dt_op_builder
+
+
+def _dt_func_map(func_name):
+    """
+    Build function that apply specified method against ``dt`` property of the passed frame.
+
+    Parameters
+    ----------
+    func_name : str
+        Date-time function name to apply.
+
+    Returns
+    -------
+    callable(pandas.DataFrame, *args, **kwargs) -> pandas.DataFrame
+        Function to be applied in the frame.
+
+    Notes
+    -----
+    This applies callable methods of ``Series.dt``.
+    """
+
+    def dt_op_builder(df, *args, **kwargs):
+        """Apply specified function against ``dt`` accessor of the passed frame."""
+        dt_s = df.squeeze(axis=1).dt
+        dt_func_result = getattr(pandas.Series.dt, func_name)(dt_s, *args, **kwargs)
+        # If we don't specify the dtype for the frame, the frame might get the
+        # wrong dtype, e.g. for to_pydatetime in https://github.com/modin-project/modin/issues/4436
+        return pandas.DataFrame(dt_func_result, dtype=dt_func_result.dtype)
+
+    return dt_op_builder
+
+
+def _rolling_func(func):
+    """
+    Build function that apply specified rolling method of the passed frame.
+
+    Parameters
+    ----------
+    func : str
+        Rolling function name to apply.
+
+    Returns
+    -------
+    callable(pandas.DataFrame, *args, **kwargs) -> pandas.DataFrame
+        Function to be applied to the frame.
+    """
+
+    def rolling_builder(df, fold_axis, rolling_args, *args, **kwargs):
+        rolling_result = df.rolling(*rolling_args)
+        rolling_op = getattr(rolling_result, func)
+        return rolling_op(*args, **kwargs)
+
+    return rolling_builder
+
+
+def _reindex(df, axis, labels, **kwargs):  # noqa: GL08
+    return df.reindex(labels=labels, axis=axis, **kwargs)
+
+
+def _concat(df, axis, other, join_axes=None, **kwargs):  # noqa: GL08
+    if not isinstance(other, list):
+        other = [other]
+    if (
+        isinstance(df, pandas.DataFrame)
+        and len(df.columns) == 1
+        and df.columns[0] == MODIN_UNNAMED_SERIES_LABEL
+    ):
+        df = df[df.columns[0]]
+
+    ignore_index = kwargs.get("ignore_index", False)
+    concat_join = ["outer", "inner"]
+    if kwargs.get("join", "outer") in concat_join:
+        if not isinstance(other, list):
+            other = [other]
+        other = [df] + other
+        result = pandas.concat(other, axis=axis, **kwargs)
+    else:
+        if isinstance(other, (list, np.ndarray)) and len(other) == 1:
+            other = other[0]
+        ignore_index = kwargs.pop("ignore_index", None)
+        kwargs["how"] = kwargs.pop("join", None)
+        if isinstance(other, (pandas.DataFrame, pandas.Series)):
+            result = df.join(other, rsuffix="r_", **kwargs)
+        else:
+            result = df.join(other, **kwargs)
+    if ignore_index:
+        if axis == 0:
+            result = result.reset_index(drop=True)
+        else:
+            result.columns = pandas.RangeIndex(len(result.columns))
+    return result
+
+
+def _to_datetime(df, *args, **kwargs):  # noqa: GL08
+    return pandas.to_datetime(df.squeeze(axis=1), *args, **kwargs)
+
+
+def _to_numeric(df, *args, **kwargs):  # noqa: GL08
+    return pandas.to_numeric(df.squeeze(axis=1), *args, **kwargs)
+
+
+def _groupby(agg_name):
+    """
+    Build function that apply specified groupby method of the passed frame.
+
+    Parameters
+    ----------
+    agg_name : str
+        GroupBy aggregate function name to apply.
+
+    Returns
+    -------
+    callable(pandas.DataFrame, *args, **kwargs) -> pandas.DataFrame
+        Function to be applied to the frame.
+    """
+    __aggregation_methods_dict = {
+        "axis_wise": pandas.core.groupby.DataFrameGroupBy.aggregate,
+        "group_wise": pandas.core.groupby.DataFrameGroupBy.apply,
+        "transform": pandas.core.groupby.DataFrameGroupBy.transform,
+    }
+
+    def groupby_callable(
+        df,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        agg_func=None,
+        how="axis_wise",
+        drop=False,
+        **kwargs,
+    ):
+        by_names = []
+        if isinstance(by, pandas.DataFrame):
+            by = by.squeeze(axis=1)
+        if isinstance(by, list):
+            for i in range(len(by)):
+                if isinstance(by[i], pandas.DataFrame):
+                    by[i] = by[i].squeeze(axis=1)
+                if isinstance(by[i], pandas.Series):
+                    if isinstance(df.index, pandas.MultiIndex):
+                        by[i].name = pandas.MultiIndex.from_tuples(by[i].name)
+                    by_names.append(by[i].name)
+                elif isinstance(by[i], str):
+                    by_names.append(by[i])
+        if isinstance(by, pandas.DataFrame):
+            by_names = list(by.columns)
+            to_append = by.columns[[name not in df.columns for name in by_names]]
+            if len(to_append) > 0:
+                df = pandas.concat([df, by[to_append]], axis=1)
+            by = by_names
+        if isinstance(by, pandas.Series) and drop:
+            by_names = [by.name]
+        if (
+            is_list_like(by)
+            and drop
+            and not any([is_list_like(curr_by) for curr_by in by])
+        ):
+            by = by_names
+
+        groupby_obj = df.groupby(by=by, axis=axis, **groupby_kwargs)
+        if agg_name == "agg":
+            if isinstance(agg_func, dict):
+                agg_func = {
+                    k: v[0] if isinstance(v, list) and len(v) == 1 else v
+                    for k, v in agg_func.items()
+                }
+            groupby_agg = __aggregation_methods_dict[how]
+            result = groupby_agg(groupby_obj, agg_func, *agg_args, **agg_kwargs)
+        else:
+            groupby_agg = getattr(groupby_obj, agg_name)
+            if callable(groupby_agg):
+                result = groupby_agg(*agg_args, **agg_kwargs)
+            else:
+                result = groupby_agg
+
+        return result
+
+    return groupby_callable
+
+
+def _register_binary(op):
+    """
+    Build function that apply specified binary method of the passed frame.
+
+    Parameters
+    ----------
+    op : str
+        Binary function name to apply.
+
+    Returns
+    -------
+    callable(pandas.DataFrame, *args, **kwargs) -> pandas.DataFrame
+        Function to be applied to the frame.
+    """
+
+    def binary_operator(df, other, **kwargs):
+        squeeze_other = kwargs.pop("broadcast", False) or kwargs.pop(
+            "squeeze_other", False
+        )
+        squeeze_self = kwargs.pop("squeeze_self", False)
+
+        if squeeze_other:
+            other = other.squeeze(axis=1)
+
+        if squeeze_self:
+            df = df.squeeze(axis=1)
+        result = getattr(df, op)(other, **kwargs)
+        if (
+            not isinstance(result, pandas.Series)
+            and not isinstance(result, pandas.DataFrame)
+            and is_list_like(result)
+        ):
+            result = pandas.DataFrame(result)
+
+        return result
+
+    return binary_operator
+
+
+def _register_expanding(func):
+    """
+    Build function that apply specified expanding window functions.
+
+    Parameters
+    ----------
+    func : str
+        Expanding window functionname to apply.
+
+    Returns
+    -------
+    callable(pandas.DataFrame, *args, **kwargs) -> pandas.DataFrame
+        Function to be applied to the frame.
+    """
+
+    def expanding_operator(df, fold_axis, rolling_args, *args, **kwargs):
+        squeeze_self = kwargs.pop("squeeze_self", False)
+
+        if squeeze_self:
+            df = df.squeeze(axis=1)
+        roller = df.expanding(*rolling_args)
+        if type(func) is property:
+            return func.fget(roller)
+
+        return func(roller, *args, **kwargs)
+
+    return expanding_operator
+
+
+def _register_resample(op):
+    """
+    Build function that apply specified resample method of the passed frame.
+
+    Parameters
+    ----------
+    op : str
+        Resample function name to apply.
+
+    Returns
+    -------
+    callable(pandas.DataFrame, *args, **kwargs) -> pandas.DataFrame
+        Function to be applied to the frame.
+    """
+
+    def resample_operator(df, resample_kwargs, *args, **kwargs):
+        resampler = df.resample(**resample_kwargs)
+        result = getattr(resampler, op)(*args, **kwargs)
+        return result
+
+    return resample_operator
+
+
+def _drop(df, **kwargs):  # noqa: GL08
+    if (
+        kwargs.get("labels", None) is not None
+        or kwargs.get("index", None) is not None
+        or kwargs.get("columns", None) is not None
+    ):
+        return df.drop(**kwargs)
+    return df
+
+
+def _fillna(df, value, **kwargs):  # noqa: GL08
+    squeeze_self = kwargs.pop("squeeze_self", False)
+    squeeze_value = kwargs.pop("squeeze_value", False)
+    if squeeze_self and isinstance(df, pandas.DataFrame):
+        df = df.squeeze(axis=1)
+    if squeeze_value and isinstance(value, pandas.DataFrame):
+        value = value.squeeze(axis=1)
+    return df.fillna(value, **kwargs)
+
+
+def _is_monotonic(monotonic_type):  # noqa: GL08
+    def is_monotonic_caller(ser):
+        return pandas.DataFrame([getattr(ser, monotonic_type)])
+
+    return is_monotonic_caller
+
+
+def _sort_index(df, inplace=False, **kwargs):  # noqa: GL08
+    if inplace:
+        df.sort_index(inplace=inplace, **kwargs)
+    else:
+        df = df.sort_index(inplace=inplace, **kwargs)
+    return df
+
+
+def _combine(df, other, func, **kwargs):  # noqa: GL08
+    if isinstance(df, pandas.Series):
+        return func(df, other)
+    return df.combine(other, func)
+
+
+def _getitem_array(df, key):  # noqa: GL08
+    if isinstance(key, pandas.DataFrame):
+        key = key.squeeze(axis=1)
+    return df[key]
+
+
+def _getitem_row_array(df, key):  # noqa: GL08
+    if isinstance(key, pandas.DataFrame):
+        key = key.squeeze(axis=1)
+    return df.iloc[key]
+
+
+def _write_items(
+    df,
+    row_numeric_index,
+    col_numeric_index,
+    item,
+    need_columns_reindex=True,
+):  # noqa: GL08
+    from modin.pandas.utils import broadcast_item, is_scalar
+
+    if not isinstance(row_numeric_index, slice):
+        row_numeric_index = list(row_numeric_index)
+    if not isinstance(col_numeric_index, slice):
+        col_numeric_index = list(col_numeric_index)
+    if not is_scalar(item):
+        broadcasted_items, _ = broadcast_item(
+            df,
+            row_numeric_index,
+            col_numeric_index,
+            item,
+            need_columns_reindex=need_columns_reindex,
+        )
+    else:
+        broadcasted_items = item
+
+    if isinstance(df.iloc[row_numeric_index, col_numeric_index], pandas.Series):
+        broadcasted_items = broadcasted_items.squeeze()
+    df.iloc[row_numeric_index, col_numeric_index] = broadcasted_items
+    return df
+
+
+def _setitem(df, axis, key, value):  # noqa: GL08
+    if is_scalar(key) and isinstance(value, pandas.DataFrame):
+        value = value.squeeze()
+    if not axis:
+        df[key] = value
+    else:
+        df.loc[key] = value
+    return df
+
+
+def _delitem(df, key):  # noqa: GL08
+    return df.drop(columns=[key])
+
+
+def _get_dummies(df, columns, **kwargs):  # noqa: GL08
+    return pandas.get_dummies(df, columns=columns, **kwargs)
+
+
+def _register_default_pandas(
+    func,
+    is_series=False,
+    squeeze_args=False,
+    squeeze_kwargs=False,
+    return_raw=False,
+    in_place=False,
+):
+    """
+    Build function that apply specified method of the passed frame.
+
+    Parameters
+    ----------
+    func : callable
+        Function to apply.
+    is_series : bool, default: False
+        If True, the passed frame will always be squeezed to a series.
+    squeeze_args : bool, default: False
+        If True, all passed arguments will be squeezed.
+    squeeze_kwargs : bool, default: False
+        If True, all passed key word arguments will be squeezed.
+    return_raw : bool, default: False
+        If True, and the result not DataFrame or Series it is returned as is without wrapping in query compiler.
+    in_place : bool, default: False
+        If True, the specified function will be applied on the passed frame in place.
+
+    Returns
+    -------
+    callable(pandas.DataFrame, *args, **kwargs) -> pandas.DataFrame
+        Function to be applied to the frame.
+    """
+
+    def caller(query_compiler, *args, **kwargs):
+        df = query_compiler._modin_frame
+        if is_series:
+            df = df.squeeze(axis=1)
+        exclude_names = ["fold_axis", "dtypes"]
+        kwargs = kwargs.copy()
+        for name in exclude_names:
+            kwargs.pop(name, None)
+        args = try_cast_to_pandas(args, squeeze=squeeze_args)
+        kwargs = try_cast_to_pandas(kwargs, squeeze=squeeze_kwargs)
+        result = func(df, *args, **kwargs)
+        inplace_method = kwargs.get("inplace", False)
+        if in_place:
+            inplace_method = in_place
+        if inplace_method:
+            result = df
+        if return_raw and not isinstance(result, (pandas.Series, pandas.DataFrame)):
+            return result
+        if isinstance(result, pandas.Series):
+            if result.name is None:
+                result.name = MODIN_UNNAMED_SERIES_LABEL
+            result = result.to_frame()
+
+        return query_compiler.__constructor__(result)
+
+    return caller
+
+
+@_inherit_docstrings(BaseQueryCompiler)
+class NativeQueryCompiler(BaseQueryCompiler):
+    """
+    Query compiler for the pandas storage format.
+
+    This class translates common query compiler API into
+    native library functions (e.g., pandas) to execute operations
+    on small data depending on the threshold.
+
+    Parameters
+    ----------
+    pandas_frame : pandas.DataFrame
+        The pandas frame to query with the compiled queries.
+    shape_hint : {"row", "column", None}, default: None
+        Shape hint for frames known to be a column or a row, otherwise None.
+    """
+
+    _modin_frame: pandas.DataFrame
+    _shape_hint: Optional[str]
+
+    def __init__(self, pandas_frame, shape_hint: Optional[str] = None):
+        assert NativeDataframeMode.get() == "Pandas"
+        if hasattr(pandas_frame, "_to_pandas"):
+            pandas_frame = pandas_frame._to_pandas()
+        if is_scalar(pandas_frame):
+            pandas_frame = pandas.DataFrame([pandas_frame])
+        elif not isinstance(pandas_frame, pandas.DataFrame):
+            pandas_frame = pandas.DataFrame(pandas_frame)
+
+        self._modin_frame = pandas_frame
+        self._shape_hint = shape_hint
+
+    def execute(self):
+        pass
+
+    @property
+    def frame_has_materialized_dtypes(self) -> bool:
+        """
+        Check if the undelying dataframe has materialized dtypes.
+
+        Returns
+        -------
+        bool
+        """
+        return True
+
+    def set_frame_dtypes_cache(self, dtypes):
+        """
+        Set dtypes cache for the underlying dataframe frame.
+
+        Parameters
+        ----------
+        dtypes : pandas.Series, ModinDtypes, callable or None
+
+        Notes
+        -----
+        This function is for consistency with other QCs,
+        dtypes should be assigned directly on the frame.
+        """
+        pass
+
+    def set_frame_index_cache(self, index):
+        """
+        Set index cache for underlying dataframe.
+
+        Parameters
+        ----------
+        index : sequence, callable or None
+
+        Notes
+        -----
+        This function is for consistency with other QCs,
+        index should be assigned directly on the frame.
+        """
+        pass
+
+    @property
+    def frame_has_index_cache(self):
+        """
+        Check if the index cache exists for underlying dataframe.
+
+        Returns
+        -------
+        bool
+        """
+        return True
+
+    @property
+    def frame_has_dtypes_cache(self) -> bool:
+        """
+        Check if the dtypes cache exists for the underlying dataframe.
+
+        Returns
+        -------
+        bool
+        """
+        return True
+
+    def take_2d_positional(self, index=None, columns=None):
+        index = slice(None) if index is None else index
+        columns = slice(None) if columns is None else columns
+        return self.__constructor__(self._modin_frame.iloc[index, columns])
+
+    def copy(self):
+        return self.__constructor__(self._modin_frame.copy())
+
+    def setitem_bool(self, row_loc, col_loc, item):
+
+        self._modin_frame.loc[row_loc._modin_frame.squeeze(axis=1), col_loc] = item
+        return self.__constructor__(self._modin_frame)
+
+    __and__ = _register_default_pandas(pandas.DataFrame.__and__)
+    __dir__ = _register_default_pandas(pandas.DataFrame.__dir__)
+    __eq__ = _register_default_pandas(pandas.DataFrame.__eq__)
+    __format__ = _register_default_pandas(pandas.DataFrame.__format__)
+    __ge__ = _register_default_pandas(pandas.DataFrame.__ge__)
+    __gt__ = _register_default_pandas(pandas.DataFrame.__gt__)
+    __le__ = _register_default_pandas(pandas.DataFrame.__le__)
+    __lt__ = _register_default_pandas(pandas.DataFrame.__lt__)
+    __ne__ = _register_default_pandas(pandas.DataFrame.__ne__)
+    __or__ = _register_default_pandas(pandas.DataFrame.__or__)
+    __rand__ = _register_default_pandas(pandas.DataFrame.__rand__)
+    __reduce__ = _register_default_pandas(pandas.DataFrame.__reduce__, return_raw=True)
+    __reduce_ex__ = _register_default_pandas(
+        pandas.DataFrame.__reduce_ex__, return_raw=True
+    )
+    __ror__ = _register_default_pandas(pandas.DataFrame.__ror__)
+    __rxor__ = _register_default_pandas(pandas.DataFrame.__rxor__)
+    __sizeof__ = _register_default_pandas(pandas.DataFrame.__sizeof__)
+    __xor__ = _register_default_pandas(pandas.DataFrame.__xor__)
+    abs = _register_default_pandas(pandas.DataFrame.abs)
+    add = _register_default_pandas(_register_binary("add"))
+    all = _register_default_pandas(pandas.DataFrame.all)
+    any = _register_default_pandas(pandas.DataFrame.any)
+    apply = _register_default_pandas(pandas.DataFrame.apply)
+    apply_on_series = _register_default_pandas(pandas.Series.apply, is_series=True)
+    applymap = _register_default_pandas(pandas.DataFrame.applymap)
+    astype = _register_default_pandas(pandas.DataFrame.astype)
+    case_when = _register_default_pandas(pandas.Series.case_when)
+    cat_codes = _register_default_pandas(lambda ser: ser.cat.codes, is_series=True)
+    combine = _register_default_pandas(_combine)
+    combine_first = _register_default_pandas(lambda df, other: df.combine_first(other))
+    compare = _register_default_pandas(pandas.DataFrame.compare)
+    concat = _register_default_pandas(_concat)
+    conj = _register_default_pandas(
+        lambda df, *args, **kwargs: pandas.DataFrame(np.conj(df))
+    )
+    convert_dtypes = _register_default_pandas(pandas.DataFrame.convert_dtypes)
+    count = _register_default_pandas(pandas.DataFrame.count)
+    corr = _register_default_pandas(pandas.DataFrame.corr)
+    cov = _register_default_pandas(pandas.DataFrame.cov)
+    cummax = _register_default_pandas(pandas.DataFrame.cummax)
+    cummin = _register_default_pandas(pandas.DataFrame.cummin)
+    cumprod = _register_default_pandas(pandas.DataFrame.cumprod)
+    cumsum = _register_default_pandas(pandas.DataFrame.cumsum)
+    delitem = _register_default_pandas(_delitem)
+    df_update = _register_default_pandas(pandas.DataFrame.update, in_place=True)
+    diff = _register_default_pandas(pandas.DataFrame.diff)
+    dot = _register_default_pandas(_register_binary("dot"))
+    drop = _register_default_pandas(_drop)
+    dropna = _register_default_pandas(pandas.DataFrame.dropna)  # axis values switched?
+    dt_ceil = _register_default_pandas(_dt_func_map("ceil"))
+    dt_components = _register_default_pandas(_dt_prop_map("components"))
+    dt_date = _register_default_pandas(_dt_prop_map("date"))
+    dt_day = _register_default_pandas(_dt_prop_map("day"))
+    dt_day_name = _register_default_pandas(_dt_func_map("day_name"))
+    dt_dayofweek = _register_default_pandas(_dt_prop_map("dayofweek"))
+    dt_dayofyear = _register_default_pandas(_dt_prop_map("dayofyear"))
+    dt_days = _register_default_pandas(_dt_prop_map("days"))
+    dt_days_in_month = _register_default_pandas(_dt_prop_map("days_in_month"))
+    dt_daysinmonth = _register_default_pandas(_dt_prop_map("daysinmonth"))
+    dt_end_time = _register_default_pandas(_dt_prop_map("end_time"))
+    dt_floor = _register_default_pandas(_dt_func_map("floor"))
+    dt_freq = _register_default_pandas(
+        lambda df: pandas.DataFrame([df.squeeze(axis=1).dt.freq])
+    )
+    dt_hour = _register_default_pandas(_dt_prop_map("hour"))
+    dt_is_leap_year = _register_default_pandas(_dt_prop_map("is_leap_year"))
+    dt_is_month_end = _register_default_pandas(_dt_prop_map("is_month_end"))
+    dt_is_month_start = _register_default_pandas(_dt_prop_map("is_month_start"))
+    dt_is_quarter_end = _register_default_pandas(_dt_prop_map("is_quarter_end"))
+    dt_is_quarter_start = _register_default_pandas(_dt_prop_map("is_quarter_start"))
+    dt_is_year_end = _register_default_pandas(_dt_prop_map("is_year_end"))
+    dt_is_year_start = _register_default_pandas(_dt_prop_map("is_year_start"))
+    dt_microsecond = _register_default_pandas(_dt_prop_map("microsecond"))
+    dt_microseconds = _register_default_pandas(_dt_prop_map("microseconds"))
+    dt_minute = _register_default_pandas(_dt_prop_map("minute"))
+    dt_month = _register_default_pandas(_dt_prop_map("month"))
+    dt_month_name = _register_default_pandas(_dt_func_map("month_name"))
+    dt_nanosecond = _register_default_pandas(_dt_prop_map("nanosecond"))
+    dt_nanoseconds = _register_default_pandas(_dt_prop_map("nanoseconds"))
+    dt_normalize = _register_default_pandas(_dt_func_map("normalize"))
+    dt_quarter = _register_default_pandas(_dt_prop_map("quarter"))
+    dt_qyear = _register_default_pandas(_dt_prop_map("qyear"))
+    dt_round = _register_default_pandas(_dt_func_map("round"))
+    dt_second = _register_default_pandas(_dt_prop_map("second"))
+    dt_seconds = _register_default_pandas(_dt_prop_map("seconds"))
+    dt_start_time = _register_default_pandas(_dt_prop_map("start_time"))
+    dt_strftime = _register_default_pandas(_dt_func_map("strftime"))
+    dt_time = _register_default_pandas(_dt_prop_map("time"))
+    dt_timetz = _register_default_pandas(_dt_prop_map("timetz"))
+    dt_to_period = _register_default_pandas(_dt_func_map("to_period"))
+    dt_to_pydatetime = _register_default_pandas(_dt_func_map("to_pydatetime"))
+    dt_to_pytimedelta = _register_default_pandas(_dt_func_map("to_pytimedelta"))
+    dt_to_timestamp = _register_default_pandas(_dt_func_map("to_timestamp"))
+    dt_total_seconds = _register_default_pandas(_dt_func_map("total_seconds"))
+    dt_tz = _register_default_pandas(
+        lambda df: pandas.DataFrame([df.squeeze(axis=1).dt.tz])
+    )
+    dt_tz_convert = _register_default_pandas(_dt_func_map("tz_convert"))
+    dt_tz_localize = _register_default_pandas(_dt_func_map("tz_localize"))
+    dt_week = _register_default_pandas(_dt_prop_map("week"))
+    dt_weekday = _register_default_pandas(_dt_prop_map("weekday"))
+    dt_weekofyear = _register_default_pandas(_dt_prop_map("weekofyear"))
+    dt_year = _register_default_pandas(_dt_prop_map("year"))
+    duplicated = _register_default_pandas(pandas.DataFrame.duplicated)
+    eq = _register_default_pandas(_register_binary("eq"))
+    equals = _register_default_pandas(_register_binary("equals"))
+    eval = _register_default_pandas(pandas.DataFrame.eval)
+    explode = _register_default_pandas(pandas.DataFrame.explode)
+    expanding_count = _register_default_pandas(
+        _register_expanding(pandas.core.window.expanding.Expanding.count)
+    )
+    expanding_sum = _register_default_pandas(
+        _register_expanding(pandas.core.window.expanding.Expanding.sum)
+    )
+    expanding_mean = _register_default_pandas(
+        _register_expanding(pandas.core.window.expanding.Expanding.mean)
+    )
+    expanding_median = _register_default_pandas(
+        _register_expanding(pandas.core.window.expanding.Expanding.median)
+    )
+    expanding_std = _register_default_pandas(
+        _register_expanding(pandas.core.window.expanding.Expanding.std)
+    )
+    expanding_min = _register_default_pandas(
+        _register_expanding(pandas.core.window.expanding.Expanding.min)
+    )
+    expanding_max = _register_default_pandas(
+        _register_expanding(pandas.core.window.expanding.Expanding.max)
+    )
+    expanding_skew = _register_default_pandas(
+        _register_expanding(pandas.core.window.expanding.Expanding.skew)
+    )
+    expanding_kurt = _register_default_pandas(
+        _register_expanding(pandas.core.window.expanding.Expanding.kurt)
+    )
+    expanding_sem = _register_default_pandas(
+        _register_expanding(pandas.core.window.expanding.Expanding.sem)
+    )
+    expanding_quantile = _register_default_pandas(
+        _register_expanding(pandas.core.window.expanding.Expanding.quantile)
+    )
+    expanding_aggregate = _register_default_pandas(
+        _register_expanding(pandas.core.window.expanding.Expanding.aggregate)
+    )
+    expanding_var = _register_default_pandas(
+        _register_expanding(pandas.core.window.expanding.Expanding.var)
+    )
+    expanding_rank = _register_default_pandas(
+        _register_expanding(pandas.core.window.expanding.Expanding.rank)
+    )
+
+    fillna = _register_default_pandas(_fillna)
+    first_valid_index = _register_default_pandas(
+        pandas.DataFrame.first_valid_index, return_raw=True
+    )
+    floordiv = _register_default_pandas(_register_binary("floordiv"))
+    ge = _register_default_pandas(_register_binary("ge"))
+    get_dummies = _register_default_pandas(_get_dummies)
+    getitem_array = _register_default_pandas(_getitem_array)
+    getitem_row_array = _register_default_pandas(_getitem_row_array)
+    groupby_agg = _register_default_pandas(_groupby("agg"))
+    groupby_all = _register_default_pandas(_groupby("all"))
+    groupby_any = _register_default_pandas(_groupby("any"))
+    groupby_count = _register_default_pandas(_groupby("count"))
+    groupby_cummax = _register_default_pandas(_groupby("cummax"))
+    groupby_cummin = _register_default_pandas(_groupby("cummin"))
+    groupby_cumprod = _register_default_pandas(_groupby("cumprod"))
+    groupby_cumsum = _register_default_pandas(_groupby("cumsum"))
+    groupby_dtypes = _register_default_pandas(_groupby("dtypes"))
+    groupby_fillna = _register_default_pandas(_groupby("fillna"))
+    groupby_max = _register_default_pandas(_groupby("max"))
+    groupby_mean = _register_default_pandas(_groupby("mean"))
+    groupby_median = _register_default_pandas(_groupby("median"))
+    groupby_min = _register_default_pandas(_groupby("min"))
+    groupby_nunique = _register_default_pandas(_groupby("nunique"))
+    groupby_prod = _register_default_pandas(_groupby("prod"))
+    groupby_quantile = _register_default_pandas(_groupby("quantile"))
+    groupby_rank = _register_default_pandas(_groupby("rank"))
+    groupby_shift = _register_default_pandas(_groupby("shift"))
+    groupby_skew = _register_default_pandas(_groupby("skew"))
+    groupby_std = _register_default_pandas(_groupby("std"))
+    groupby_sum = _register_default_pandas(_groupby("sum"))
+    groupby_var = _register_default_pandas(_groupby("var"))
+    gt = _register_default_pandas(_register_binary("gt"))
+    idxmax = _register_default_pandas(pandas.DataFrame.idxmax)
+    idxmin = _register_default_pandas(pandas.DataFrame.idxmin)
+    infer_objects = _register_default_pandas(
+        pandas.DataFrame.infer_objects, return_raw=True
+    )
+    insert = _register_default_pandas(
+        pandas.DataFrame.insert, in_place=True, squeeze_args=True
+    )
+    invert = _register_default_pandas(pandas.DataFrame.__invert__)
+    is_monotonic = _register_default_pandas(
+        _is_monotonic("is_monotonic"), is_series=True
+    )
+    is_monotonic_decreasing = _register_default_pandas(
+        _is_monotonic("is_monotonic_decreasing"), is_series=True
+    )
+    is_monotonic_increasing = _register_default_pandas(
+        _is_monotonic("is_monotonic_increasing"), is_series=True
+    )
+    isna = _register_default_pandas(pandas.DataFrame.isna)
+    join = _register_default_pandas(pandas.DataFrame.join)
+    kurt = _register_default_pandas(pandas.DataFrame.kurt, return_raw=True)
+    last_valid_index = _register_default_pandas(
+        pandas.DataFrame.last_valid_index, return_raw=True
+    )
+    le = _register_default_pandas(_register_binary("le"))
+    lt = _register_default_pandas(_register_binary("lt"))
+    # mad = _register_default_pandas(pandas.DataFrame.mad)
+    mask = _register_default_pandas(pandas.DataFrame.mask)
+    max = _register_default_pandas(pandas.DataFrame.max)
+    map = _register_default_pandas(pandas.DataFrame.map)
+    mean = _register_default_pandas(pandas.DataFrame.mean, return_raw=True)
+    median = _register_default_pandas(pandas.DataFrame.median, return_raw=True)
+    melt = _register_default_pandas(pandas.DataFrame.melt)
+    memory_usage = _register_default_pandas(pandas.DataFrame.memory_usage)
+    merge = _register_default_pandas(pandas.DataFrame.merge)
+    min = _register_default_pandas(pandas.DataFrame.min)
+    mod = _register_default_pandas(_register_binary("mod"))
+    mode = _register_default_pandas(pandas.DataFrame.mode)
+    mul = _register_default_pandas(_register_binary("mul"))
+    ne = _register_default_pandas(_register_binary("ne"))
+    negative = _register_default_pandas(pandas.DataFrame.__neg__)
+    nlargest = _register_default_pandas(pandas.DataFrame.nlargest)
+    notna = _register_default_pandas(pandas.DataFrame.notna)
+    nsmallest = _register_default_pandas(lambda df, **kwargs: df.nsmallest(**kwargs))
+    nunique = _register_default_pandas(pandas.DataFrame.nunique)
+    pivot = _register_default_pandas(pandas.DataFrame.pivot)
+    pivot_table = _register_default_pandas(pandas.DataFrame.pivot_table)
+    pow = _register_default_pandas(_register_binary("pow"))
+    prod = _register_default_pandas(pandas.DataFrame.prod)
+    prod_min_count = _register_default_pandas(pandas.DataFrame.prod)
+    quantile_for_list_of_values = _register_default_pandas(pandas.DataFrame.quantile)
+    quantile_for_single_value = _register_default_pandas(pandas.DataFrame.quantile)
+    query = _register_default_pandas(pandas.DataFrame.query)
+    radd = _register_default_pandas(_register_binary("radd"))
+    rank = _register_default_pandas(pandas.DataFrame.rank)
+    reindex = _register_default_pandas(_reindex)
+    repeat = _register_default_pandas(pandas.Series.repeat, is_series=True)
+    replace = _register_default_pandas(pandas.DataFrame.replace)
+    resample_agg_df = _register_default_pandas(_register_resample("agg"))
+    resample_agg_ser = _register_default_pandas(
+        _register_resample("agg"), is_series=True
+    )
+    resample_app_df = _register_default_pandas(_register_resample("apply"))
+    resample_app_ser = _register_default_pandas(
+        _register_resample("apply"), is_series=True
+    )
+    resample_asfreq = _register_default_pandas(_register_resample("asfreq"))
+    resample_backfill = _register_default_pandas(_register_resample("backfill"))
+    resample_bfill = _register_default_pandas(_register_resample("bfill"))
+    resample_count = _register_default_pandas(_register_resample("count"))
+    resample_ffill = _register_default_pandas(_register_resample("ffill"))
+    resample_fillna = _register_default_pandas(_register_resample("fillna"))
+    resample_first = _register_default_pandas(_register_resample("first"))
+    resample_get_group = _register_default_pandas(_register_resample("get_group"))
+    resample_interpolate = _register_default_pandas(_register_resample("interpolate"))
+    resample_last = _register_default_pandas(_register_resample("last"))
+    resample_max = _register_default_pandas(_register_resample("max"))
+    resample_mean = _register_default_pandas(_register_resample("mean"))
+    resample_median = _register_default_pandas(_register_resample("median"))
+    resample_min = _register_default_pandas(_register_resample("min"))
+    resample_nearest = _register_default_pandas(_register_resample("nearest"))
+    resample_nunique = _register_default_pandas(_register_resample("nunique"))
+    resample_ohlc_df = _register_default_pandas(_register_resample("ohlc"))
+    resample_ohlc_ser = _register_default_pandas(
+        _register_resample("ohlc"), is_series=True
+    )
+    resample_pad = _register_default_pandas(_register_resample("pad"))
+    resample_pipe = _register_default_pandas(_register_resample("pipe"))
+    resample_prod = _register_default_pandas(_register_resample("prod"))
+    resample_quantile = _register_default_pandas(_register_resample("quantile"))
+    resample_sem = _register_default_pandas(_register_resample("sem"))
+    resample_size = _register_default_pandas(_register_resample("size"))
+    resample_std = _register_default_pandas(_register_resample("std"))
+    resample_sum = _register_default_pandas(_register_resample("sum"))
+    resample_transform = _register_default_pandas(_register_resample("transform"))
+    resample_var = _register_default_pandas(_register_resample("var"))
+    reset_index = _register_default_pandas(pandas.DataFrame.reset_index)
+    rfloordiv = _register_default_pandas(_register_binary("rfloordiv"))
+    rmod = _register_default_pandas(_register_binary("rmod"))
+    rolling_aggregate = _register_default_pandas(_rolling_func("aggregate"))
+    rolling_apply = _register_default_pandas(_rolling_func("apply"))
+    rolling_corr = _register_default_pandas(_rolling_func("corr"))
+    rolling_count = _register_default_pandas(_rolling_func("count"))
+    rolling_cov = _register_default_pandas(_rolling_func("cov"))
+    rolling_kurt = _register_default_pandas(_rolling_func("kurt"))
+    rolling_max = _register_default_pandas(_rolling_func("max"))
+    rolling_mean = _register_default_pandas(_rolling_func("mean"))
+    rolling_median = _register_default_pandas(_rolling_func("median"))
+    rolling_min = _register_default_pandas(_rolling_func("min"))
+    rolling_quantile = _register_default_pandas(_rolling_func("quantile"))
+    rolling_skew = _register_default_pandas(_rolling_func("skew"))
+    rolling_std = _register_default_pandas(_rolling_func("std"))
+    rolling_sum = _register_default_pandas(_rolling_func("sum"))
+    rolling_var = _register_default_pandas(_rolling_func("var"))
+    round = _register_default_pandas(pandas.DataFrame.round)
+    rmul = _register_default_pandas(_register_binary("rmul"))
+    rpow = _register_default_pandas(_register_binary("rpow"))
+    rsub = _register_default_pandas(_register_binary("rsub"))
+    rtruediv = _register_default_pandas(_register_binary("rtruediv"))
+    searchsorted = _register_default_pandas(pandas.Series.searchsorted, is_series=True)
+    sem = _register_default_pandas(pandas.DataFrame.sem)
+    series_view = _register_default_pandas(pandas.Series.view, is_series=True)
+    set_index_from_columns = _register_default_pandas(pandas.DataFrame.set_index)
+    setitem = _register_default_pandas(_setitem)
+    skew = _register_default_pandas(pandas.DataFrame.skew, return_raw=True)
+    sort_index = _register_default_pandas(_sort_index)
+    sort_columns_by_row_values = _register_default_pandas(
+        lambda df, columns, **kwargs: df.sort_values(by=columns, axis=1, **kwargs)
+    )
+    sort_rows_by_column_values = _register_default_pandas(
+        lambda df, columns, **kwargs: df.sort_values(by=columns, axis=0, **kwargs)
+    )
+    stack = _register_default_pandas(pandas.DataFrame.stack)
+    std = _register_default_pandas(pandas.DataFrame.std)
+    str___getitem__ = _register_default_pandas(_str_map("__getitem__"))
+    str_capitalize = _register_default_pandas(_str_map("capitalize"))
+    str_center = _register_default_pandas(_str_map("center"))
+    str_contains = _register_default_pandas(_str_map("contains"))
+    str_count = _register_default_pandas(_str_map("count"))
+    str_endswith = _register_default_pandas(_str_map("endswith"))
+    str_find = _register_default_pandas(_str_map("find"))
+    str_findall = _register_default_pandas(_str_map("findall"))
+    str_get = _register_default_pandas(_str_map("get"))
+    str_index = _register_default_pandas(_str_map("index"))
+    str_isalnum = _register_default_pandas(_str_map("isalnum"))
+    str_isalpha = _register_default_pandas(_str_map("isalpha"))
+    str_isdecimal = _register_default_pandas(_str_map("isdecimal"))
+    str_isdigit = _register_default_pandas(_str_map("isdigit"))
+    str_islower = _register_default_pandas(_str_map("islower"))
+    str_isnumeric = _register_default_pandas(_str_map("isnumeric"))
+    str_isspace = _register_default_pandas(_str_map("isspace"))
+    str_istitle = _register_default_pandas(_str_map("istitle"))
+    str_isupper = _register_default_pandas(_str_map("isupper"))
+    str_join = _register_default_pandas(_str_map("join"))
+    str_len = _register_default_pandas(_str_map("len"))
+    str_ljust = _register_default_pandas(_str_map("ljust"))
+    str_lower = _register_default_pandas(_str_map("lower"))
+    str_lstrip = _register_default_pandas(_str_map("lstrip"))
+    str_match = _register_default_pandas(_str_map("match"))
+    str_normalize = _register_default_pandas(_str_map("normalize"))
+    str_pad = _register_default_pandas(_str_map("pad"))
+    str_partition = _register_default_pandas(_str_map("partition"))
+    str_repeat = _register_default_pandas(_str_map("repeat"))
+    str_replace = _register_default_pandas(_str_map("replace"))
+    str_rfind = _register_default_pandas(_str_map("rfind"))
+    str_rindex = _register_default_pandas(_str_map("rindex"))
+    str_rjust = _register_default_pandas(_str_map("rjust"))
+    str_rpartition = _register_default_pandas(_str_map("rpartition"))
+    str_rsplit = _register_default_pandas(_str_map("rsplit"))
+    str_rstrip = _register_default_pandas(_str_map("rstrip"))
+    str_slice = _register_default_pandas(_str_map("slice"))
+    str_slice_replace = _register_default_pandas(_str_map("slice_replace"))
+    str_split = _register_default_pandas(_str_map("split"))
+    str_startswith = _register_default_pandas(_str_map("startswith"))
+    str_strip = _register_default_pandas(_str_map("strip"))
+    str_swapcase = _register_default_pandas(_str_map("swapcase"))
+    str_title = _register_default_pandas(_str_map("title"))
+    str_translate = _register_default_pandas(_str_map("translate"))
+    str_upper = _register_default_pandas(_str_map("upper"))
+    str_wrap = _register_default_pandas(_str_map("wrap"))
+    str_zfill = _register_default_pandas(_str_map("zfill"))
+    sub = _register_default_pandas(_register_binary("sub"))
+    sum = _register_default_pandas(pandas.DataFrame.sum)
+    sum_min_count = _register_default_pandas(pandas.DataFrame.sum)
+    to_datetime = _register_default_pandas(_to_datetime)
+    to_numeric = _register_default_pandas(_to_numeric)
+    to_numpy = _register_default_pandas(pandas.DataFrame.to_numpy, return_raw=True)
+    to_timedelta = _register_default_pandas(
+        lambda ser, *args, **kwargs: pandas.to_timedelta(ser, *args, **kwargs),
+        is_series=True,
+    )
+    transpose = _register_default_pandas(pandas.DataFrame.transpose)
+    truediv = _register_default_pandas(_register_binary("truediv"))
+    unstack = _register_default_pandas(pandas.DataFrame.unstack)
+    var = _register_default_pandas(pandas.DataFrame.var)
+    where = _register_default_pandas(pandas.DataFrame.where)
+    window_mean = _register_default_pandas(_rolling_func("mean"))
+    window_std = _register_default_pandas(_rolling_func("std"))
+    window_sum = _register_default_pandas(_rolling_func("sum"))
+    window_var = _register_default_pandas(_rolling_func("var"))
+    write_items = _register_default_pandas(_write_items)
+
+    T = property(transpose)
+
+    add_prefix = _register_default_pandas(pandas.DataFrame.add_prefix)
+    add_suffix = _register_default_pandas(pandas.DataFrame.add_suffix)
+
+    def clip(self, lower, upper, **kwargs):
+        if isinstance(lower, BaseQueryCompiler):
+            lower = lower.to_pandas().squeeze(1)
+        if isinstance(upper, BaseQueryCompiler):
+            upper = upper.to_pandas().squeeze(1)
+        return _register_default_pandas(pandas.DataFrame.clip)(
+            self, lower, upper, **kwargs
+        )
+
+    def describe(self, percentiles: np.ndarray):
+        return _register_default_pandas(pandas.DataFrame.describe)(
+            self,
+            percentiles=percentiles,
+            include="all",
+        )
+
+    def series_update(self, other, **kwargs):
+        return _register_default_pandas(_register_binary("update"), in_place=True)(
+            self,
+            other=other,
+            squeeze_self=True,
+            squeeze_other=True,
+            **kwargs,
+        )
+
+    def expanding_cov(
+        self,
+        fold_axis,
+        expanding_args,
+        squeeze_self,
+        squeeze_other,
+        other=None,
+        pairwise=None,
+        ddof=1,
+        numeric_only=False,
+        **kwargs,
+    ):
+        other_for_default = (
+            other
+            if other is None
+            else (
+                other.to_pandas().squeeze(axis=1)
+                if squeeze_other
+                else other.to_pandas()
+            )
+        )
+        return _register_default_pandas(
+            _register_expanding(pandas.core.window.expanding.Expanding.cov)
+        )(
+            self,
+            fold_axis,
+            expanding_args,
+            other=other_for_default,
+            pairwise=pairwise,
+            ddof=ddof,
+            numeric_only=numeric_only,
+            squeeze_self=squeeze_self,
+            **kwargs,
+        )
+
+    def expanding_corr(
+        self,
+        fold_axis,
+        expanding_args,
+        squeeze_self,
+        squeeze_other,
+        other=None,
+        pairwise=None,
+        ddof=1,
+        numeric_only=False,
+        **kwargs,
+    ):
+        other_for_default = (
+            other
+            if other is None
+            else (
+                other.to_pandas().squeeze(axis=1)
+                if squeeze_other
+                else other.to_pandas()
+            )
+        )
+        return _register_default_pandas(
+            _register_expanding(pandas.core.window.expanding.Expanding.corr)
+        )(
+            self,
+            fold_axis,
+            expanding_args,
+            other=other_for_default,
+            pairwise=pairwise,
+            ddof=ddof,
+            numeric_only=numeric_only,
+            squeeze_self=squeeze_self,
+            **kwargs,
+        )
+
+    def groupby_size(
+        self,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
+    ):
+        result = _register_default_pandas(_groupby("size"))(
+            self,
+            by=by,
+            axis=axis,
+            groupby_kwargs=groupby_kwargs,
+            agg_args=agg_args,
+            agg_kwargs=agg_kwargs,
+            drop=drop,
+            method="size",
+        )
+        if not groupby_kwargs.get("as_index", False):
+            # Renaming 'MODIN_UNNAMED_SERIES_LABEL' to a proper name
+
+            result.columns = result.columns[:-1].append(pandas.Index(["size"]))
+        return result
+
+    def get_axis(self, axis):
+        return self._modin_frame.index if axis == 0 else self._modin_frame.columns
+
+    def get_index_name(self, axis=0):
+        return self.get_axis(axis).name
+
+    def get_index_names(self, axis=0):
+        return self.get_axis(axis).names
+
+    def set_index_name(self, name, axis=0):
+        self.get_axis(axis).name = name
+
+    def has_multiindex(self, axis=0):
+        if axis == 0:
+            return isinstance(self._modin_frame.index, pandas.MultiIndex)
+        assert axis == 1
+        return isinstance(self._modin_frame.columns, pandas.MultiIndex)
+
+    def isin(self, values, ignore_indices=False, **kwargs):
+        if isinstance(values, type(self)) and ignore_indices:
+            # Pandas logic is that it ignores indexing if 'values' is a 1D object
+            values = values.to_pandas().squeeze(axis=1)
+        if self._shape_hint == "column":
+            return _register_default_pandas(pandas.Series.isin, is_series=True)(
+                self, values, **kwargs
+            )
+        else:
+            return _register_default_pandas(pandas.DataFrame.isin)(
+                self, values, **kwargs
+            )
+
+    def to_pandas(self):
+        return self._modin_frame
+
+    @classmethod
+    def from_pandas(cls, df, data_cls):
+        return cls(df)
+
+    @classmethod
+    def from_arrow(cls, at, data_cls):
+        return cls(at.to_pandas())
+
+    def free(self):
+        return
+
+    def finalize(self):
+        return
+
+    # Dataframe exchange protocol
+
+    def to_dataframe(self, nan_as_null: bool = False, allow_copy: bool = True):
+        return self._modin_frame.__dataframe__(
+            nan_as_null=nan_as_null, allow_copy=allow_copy
+        )
+
+    @classmethod
+    def from_dataframe(cls, df, data_cls):
+        return cls(pandas.api.interchange.from_dataframe(df))
+
+    # END Dataframe exchange protocol
+
+    index = property(_get_axis(0), _set_axis(0))
+    columns = property(_get_axis(1), _set_axis(1))
+
+    @property
+    def dtypes(self):
+        return self._modin_frame.dtypes
+
+    def getitem_column_array(self, key, numeric=False, ignore_order=False):
+        if numeric:
+            return self.__constructor__(self._modin_frame.iloc[:, key])
+        return self.__constructor__(self._modin_frame.loc[:, key])
+
+    def is_series_like(self):
+        return len(self._modin_frame.columns) == 1 or len(self._modin_frame.index) == 1
+
+    def support_materialization_in_worker_process(self) -> bool:
+        """
+        Whether it's possible to call function `to_pandas` during the pickling process, at the moment of recreating the object.
+
+        Returns
+        -------
+        bool
+        """
+        return False
+
+    def get_pandas_backend(self) -> Optional[str]:
+        """
+        Get backend stored in `_modin_frame`.
+
+        Returns
+        -------
+        str | None
+            Backend name.
+        """
+        return None
diff --git a/modin/tests/pandas/dataframe/test_binary.py b/modin/tests/pandas/dataframe/test_binary.py
index 108e2620aac..ea67592097c 100644
--- a/modin/tests/pandas/dataframe/test_binary.py
+++ b/modin/tests/pandas/dataframe/test_binary.py
@@ -17,7 +17,7 @@
 import pytest
 
 import modin.pandas as pd
-from modin.config import NPartitions, StorageFormat
+from modin.config import NativeDataframeMode, NPartitions, StorageFormat
 from modin.core.dataframe.pandas.partitioning.axis_partition import (
     PandasDataframeAxisPartition,
 )
@@ -210,6 +210,10 @@ def operation(df):
     StorageFormat.get() != "Pandas",
     reason="Modin on this engine doesn't create virtual partitions.",
 )
+@pytest.mark.skipif(
+    NativeDataframeMode.get() == "Pandas",
+    reason="NativeQueryCompiler does not contain partitions.",
+)
 @pytest.mark.parametrize(
     "left_virtual,right_virtual", [(True, False), (False, True), (True, True)]
 )
diff --git a/modin/tests/pandas/dataframe/test_default.py b/modin/tests/pandas/dataframe/test_default.py
index 697a0d7f120..71f49924c94 100644
--- a/modin/tests/pandas/dataframe/test_default.py
+++ b/modin/tests/pandas/dataframe/test_default.py
@@ -22,7 +22,7 @@
 from numpy.testing import assert_array_equal
 
 import modin.pandas as pd
-from modin.config import Engine, NPartitions, StorageFormat
+from modin.config import Engine, NativeDataframeMode, NPartitions, StorageFormat
 from modin.pandas.io import to_pandas
 from modin.tests.pandas.utils import (
     axis_keys,
@@ -123,6 +123,10 @@ def test_to_numpy(data):
     assert_array_equal(modin_df.values, pandas_df.values)
 
 
+@pytest.mark.skipif(
+    NativeDataframeMode.get() == "Pandas",
+    reason="NativeQueryCompiler does not contain partitions.",
+)
 @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
 def test_partition_to_numpy(data):
     frame = pd.DataFrame(data)
@@ -294,8 +298,8 @@ def test_corr_min_periods(self, min_periods):
                 {"a": [1, np.nan, 3, 4, 5, 6], "b": [1, 2, 1, 4, 5, np.nan]}
             )
             modin_df = pd.concat([modin_df.iloc[:3], modin_df.iloc[3:]])
-
-            assert modin_df._query_compiler._modin_frame._partitions.shape == (2, 1)
+            if NativeDataframeMode.get() == "Default":
+                assert modin_df._query_compiler._modin_frame._partitions.shape == (2, 1)
             eval_general(
                 modin_df, pandas_df, lambda df: df.corr(min_periods=min_periods)
             )
@@ -313,6 +317,10 @@ def test_corr_non_numeric(self, numeric_only):
         StorageFormat.get() != "Pandas",
         reason="doesn't make sense for non-partitioned executions",
     )
+    @pytest.mark.skipif(
+        NativeDataframeMode.get() == "Pandas",
+        reason="NativeQueryCompiler does not contain partitions.",
+    )
     def test_corr_nans_in_different_partitions(self):
         # NaN in the first partition
         modin_df, pandas_df = create_test_dfs(
@@ -602,7 +610,13 @@ def test_pivot(data, index, columns, values, request):
         in request.node.callspec.id
         or "default-one_column-several_columns_index" in request.node.callspec.id
         or "default-one_column-one_column_index" in request.node.callspec.id
-        or (current_execution in ("BaseOnPython",) and index is lib.no_default)
+        or (
+            (
+                current_execution in ("BaseOnPython",)
+                or NativeDataframeMode.get() == "Pandas"
+            )
+            and index is lib.no_default
+        )
     ):
         pytest.xfail(reason="https://github.com/modin-project/modin/issues/7010")
 
@@ -980,7 +994,8 @@ def test_resampler_functions_with_arg(rule, axis, method_arg):
             "DateColumn",
             marks=pytest.mark.xfail(
                 condition=Engine.get() in ("Ray", "Unidist", "Dask", "Python")
-                and StorageFormat.get() != "Base",
+                and StorageFormat.get() != "Base"
+                and NativeDataframeMode.get() == "Default",
                 reason="https://github.com/modin-project/modin/issues/6399",
             ),
         ),
diff --git a/modin/tests/pandas/dataframe/test_indexing.py b/modin/tests/pandas/dataframe/test_indexing.py
index 52603343619..a47474eb76c 100644
--- a/modin/tests/pandas/dataframe/test_indexing.py
+++ b/modin/tests/pandas/dataframe/test_indexing.py
@@ -21,7 +21,7 @@
 from pandas._testing import ensure_clean
 
 import modin.pandas as pd
-from modin.config import MinRowPartitionSize, NPartitions
+from modin.config import MinRowPartitionSize, NativeDataframeMode, NPartitions
 from modin.pandas.indexing import is_range_like
 from modin.pandas.testing import assert_index_equal
 from modin.tests.pandas.utils import (
@@ -586,6 +586,10 @@ def test_loc_setting_single_categorical_column():
     df_equals(modin_df, pandas_df)
 
 
+@pytest.mark.skipif(
+    NativeDataframeMode.get() == "Pandas",
+    reason="NativeQueryCompiler does not currently support IO functions.",
+)
 def test_loc_multi_index():
     modin_df = pd.read_csv(
         "modin/tests/pandas/data/blah.csv", header=[0, 1, 2, 3], index_col=0
@@ -2238,6 +2242,10 @@ def test___setitem__partitions_aligning():
     df_equals(md_df, pd_df)
 
 
+@pytest.mark.skipif(
+    NativeDataframeMode.get() == "Pandas",
+    reason="NativeQueryCompiler does not currently support IO functions.",
+)
 def test___setitem__with_mismatched_partitions():
     with ensure_clean(".csv") as fname:
         np.savetxt(fname, np.random.randint(0, 100, size=(200_000, 99)), delimiter=",")
diff --git a/modin/tests/pandas/dataframe/test_iter.py b/modin/tests/pandas/dataframe/test_iter.py
index b00ae056920..38ab70524a2 100644
--- a/modin/tests/pandas/dataframe/test_iter.py
+++ b/modin/tests/pandas/dataframe/test_iter.py
@@ -142,7 +142,9 @@ def test_display_options_for___repr__(max_rows_columns, expand_frame_repr, frame
 
 def test___finalize__():
     data = test_data_values[0]
-    with warns_that_defaulting_to_pandas():
+    # Using `force` for `NativeDataframeMode` as the warnings are raised at the API layer,
+    # before geting into the Query Compiler layer.
+    with warns_that_defaulting_to_pandas(force=True):
         pd.DataFrame(data).__finalize__(None)
 
 
@@ -230,7 +232,9 @@ def test___repr__():
 "2016-08-26 09:00:16.413",5,60.193055,24.767427,5,"WALKING",85,"ON_BICYCLE",15,"UNKNOWN",0
 "2016-08-26 09:00:20.578",3,60.152996,24.745216,3.90000009536743,"STILL",69,"IN_VEHICLE",31,"UNKNOWN",0"""
     pandas_df = pandas.read_csv(io.StringIO(string_data))
-    with warns_that_defaulting_to_pandas():
+    # Using `force` for `NativeDataframeMode` as the warnings are raised at the API layer,
+    # before geting into the Query Compiler layer.
+    with warns_that_defaulting_to_pandas(force=True):
         modin_df = pd.read_csv(io.StringIO(string_data))
     assert repr(pandas_df) == repr(modin_df)
 
diff --git a/modin/tests/pandas/dataframe/test_join_sort.py b/modin/tests/pandas/dataframe/test_join_sort.py
index 670eb9ff911..06ee419e6ec 100644
--- a/modin/tests/pandas/dataframe/test_join_sort.py
+++ b/modin/tests/pandas/dataframe/test_join_sort.py
@@ -19,7 +19,7 @@
 import pytest
 
 import modin.pandas as pd
-from modin.config import Engine, NPartitions, StorageFormat
+from modin.config import Engine, NativeDataframeMode, NPartitions, StorageFormat
 from modin.pandas.io import to_pandas
 from modin.tests.pandas.utils import (
     arg_keys,
@@ -732,7 +732,7 @@ def test_sort_values_descending_with_only_two_bins():
     modin_df = pd.concat([part1, part2])
     pandas_df = modin_df._to_pandas()
 
-    if StorageFormat.get() == "Pandas":
+    if StorageFormat.get() == "Pandas" and NativeDataframeMode.get() == "Default":
         assert modin_df._query_compiler._modin_frame._partitions.shape == (2, 1)
 
     eval_general(
@@ -772,7 +772,7 @@ def test_sort_values_with_one_partition(ascending):
         np.array([["hello", "goodbye"], ["hello", "Hello"]])
     )
 
-    if StorageFormat.get() == "Pandas":
+    if StorageFormat.get() == "Pandas" and NativeDataframeMode.get() == "Default":
         assert modin_df._query_compiler._modin_frame._partitions.shape == (1, 1)
 
     eval_general(
@@ -892,7 +892,8 @@ def test_sort_values_with_only_one_non_na_row_in_partition(ascending, na_positio
 
 
 @pytest.mark.skipif(
-    Engine.get() not in ("Ray", "Unidist", "Dask"),
+    Engine.get() not in ("Ray", "Unidist", "Dask")
+    or NativeDataframeMode.get() == "Pandas",
     reason="We only need to test this case where sort does not default to pandas.",
 )
 def test_sort_values_with_sort_key_on_partition_boundary():
diff --git a/modin/tests/pandas/dataframe/test_map_metadata.py b/modin/tests/pandas/dataframe/test_map_metadata.py
index d6980cd6761..07b195bdafa 100644
--- a/modin/tests/pandas/dataframe/test_map_metadata.py
+++ b/modin/tests/pandas/dataframe/test_map_metadata.py
@@ -19,7 +19,12 @@
 import pytest
 
 import modin.pandas as pd
-from modin.config import MinRowPartitionSize, NPartitions, StorageFormat
+from modin.config import (
+    MinRowPartitionSize,
+    NativeDataframeMode,
+    NPartitions,
+    StorageFormat,
+)
 from modin.core.dataframe.pandas.metadata import LazyProxyCategoricalDtype
 from modin.core.storage_formats.pandas.utils import split_result_of_axis_func_pandas
 from modin.pandas.testing import assert_index_equal, assert_series_equal
@@ -299,7 +304,10 @@ def test_copy(data):
     assert new_modin_df.columns is not modin_df.columns
     assert new_modin_df.dtypes is not modin_df.dtypes
 
-    if get_current_execution() != "BaseOnPython":
+    if (
+        get_current_execution() != "BaseOnPython"
+        and NativeDataframeMode.get() == "Default"
+    ):
         assert np.array_equal(
             new_modin_df._query_compiler._modin_frame._partitions,
             modin_df._query_compiler._modin_frame._partitions,
@@ -565,6 +573,10 @@ def test_astype_int64_to_astype_category_github_issue_6259():
     get_current_execution() == "BaseOnPython",
     reason="BaseOnPython doesn't have proxy categories",
 )
+@pytest.mark.skipif(
+    NativeDataframeMode.get() == "Pandas",
+    reason="NativeQueryCompiler doesn't have proxy categories",
+)
 class TestCategoricalProxyDtype:
     """This class contains test and test usilities for the ``LazyProxyCategoricalDtype`` class."""
 
@@ -787,6 +799,10 @@ def comparator(df1, df2):
     )
 
 
+@pytest.mark.skipif(
+    NativeDataframeMode.get() == "Pandas",
+    reason="NativeQueryCompiler does not contain partitions.",
+)
 def test_convert_dtypes_multiple_row_partitions():
     # Column 0 should have string dtype
     modin_part1 = pd.DataFrame(["a"]).convert_dtypes()
@@ -811,7 +827,7 @@ def test_convert_dtypes_5653():
     modin_part1 = pd.DataFrame({"col1": ["a", "b", "c", "d"]})
     modin_part2 = pd.DataFrame({"col1": [None, None, None, None]})
     modin_df = pd.concat([modin_part1, modin_part2])
-    if StorageFormat.get() == "Pandas":
+    if StorageFormat.get() == "Pandas" and NativeDataframeMode.get() == "Default":
         assert modin_df._query_compiler._modin_frame._partitions.shape == (2, 1)
     modin_df = modin_df.convert_dtypes()
     assert len(modin_df.dtypes) == 1
diff --git a/modin/tests/pandas/dataframe/test_pickle.py b/modin/tests/pandas/dataframe/test_pickle.py
index 97c78c9cd74..5450ca4f26c 100644
--- a/modin/tests/pandas/dataframe/test_pickle.py
+++ b/modin/tests/pandas/dataframe/test_pickle.py
@@ -52,7 +52,6 @@ def test__reduce__():
     # pre-processed for the distributed engine.
     dataframe_data = ["Major League Baseball", "National Basketball Association"]
     abbr_md, abbr_pd = create_test_dfs(dataframe_data, index=["MLB", "NBA"])
-    # breakpoint()
 
     dataframe_data = {
         "name": ["Mariners", "Lakers"] * 500,
diff --git a/modin/tests/test_utils.py b/modin/tests/test_utils.py
index bc478d957f9..1597b052853 100644
--- a/modin/tests/test_utils.py
+++ b/modin/tests/test_utils.py
@@ -11,6 +11,7 @@
 # ANY KIND, either express or implied. See the License for the specific language
 # governing permissions and limitations under the License.
 
+import contextlib
 import json
 from textwrap import dedent, indent
 from unittest.mock import Mock, patch
@@ -21,6 +22,7 @@
 
 import modin.pandas as pd
 import modin.utils
+from modin.config import NativeDataframeMode
 from modin.error_message import ErrorMessage
 from modin.tests.pandas.utils import create_test_dfs
 
@@ -248,7 +250,7 @@ def test_format_string():
     assert answer == expected
 
 
-def warns_that_defaulting_to_pandas(prefix=None, suffix=None):
+def warns_that_defaulting_to_pandas(prefix=None, suffix=None, force=False):
     """
     Assert that code warns that it's defaulting to pandas.
 
@@ -260,13 +262,21 @@ def warns_that_defaulting_to_pandas(prefix=None, suffix=None):
     suffix : Optional[str]
         If specified, checks that the end of the warning message matches this argument
         after "[Dd]efaulting to pandas".
+    force : Optional[bool]
+        If ``True``, return the ``pytest.recwarn.WarningsChecker`` irrespective of ``NativeDataframeMode``.
 
     Returns
     -------
-    pytest.recwarn.WarningsChecker
-        A WarningsChecker checking for a UserWarning saying that Modin is
-        defaulting to Pandas.
+    pytest.recwarn.WarningsChecker or contextlib.nullcontext
+        If Modin is not operating in ``NativeDataframeMode``, a ``WarningsChecker``
+        is returned, which will check for a ``UserWarning`` indicating that Modin
+        is defaulting to Pandas. If ``NativeDataframeMode`` is set, a
+        ``nullcontext`` is returned to avoid the warning about defaulting to Pandas,
+        as this occurs due to user setting of ``NativeDataframeMode``.
     """
+    if NativeDataframeMode.get() == "Pandas" and not force:
+        return contextlib.nullcontext()
+
     match = "[Dd]efaulting to pandas"
     if prefix:
         # Message may be separated by newlines

From 82499151f8fecc4c858695cc75ecd2f164457c32 Mon Sep 17 00:00:00 2001
From: Kirill Suvorov <kirill.suvorov@intel.com>
Date: Mon, 26 Aug 2024 15:31:23 +0200
Subject: [PATCH 12/20] FEAT-#7337: Using dynamic partitionning in
 `broadcast_apply` (#7338)

Signed-off-by: Kirill Suvorov <kirill.suvorov@intel.com>
---
 modin/core/dataframe/algebra/groupby.py       |  8 ++-
 .../pandas/partitioning/partition_manager.py  | 59 ++++++++++++++++++-
 .../storage_formats/pandas/query_compiler.py  |  4 +-
 modin/tests/pandas/test_groupby.py            | 33 ++++++++++-
 4 files changed, 95 insertions(+), 9 deletions(-)

diff --git a/modin/core/dataframe/algebra/groupby.py b/modin/core/dataframe/algebra/groupby.py
index cc9196a422a..fec0fe3c6ac 100644
--- a/modin/core/dataframe/algebra/groupby.py
+++ b/modin/core/dataframe/algebra/groupby.py
@@ -655,9 +655,11 @@ def aggregate_on_dict(grp_obj, *args, **kwargs):
                     )
 
             native_res_part = [] if native_agg_res is None else [native_agg_res]
-            result = pandas.concat(
-                [*native_res_part, *custom_results], axis=1, copy=False
-            )
+            parts = [*native_res_part, *custom_results]
+            if parts:
+                result = pandas.concat(parts, axis=1, copy=False)
+            else:
+                result = pandas.DataFrame(columns=result_columns)
 
             # The order is naturally preserved if there's no custom aggregations
             if preserve_aggregation_order and len(custom_aggs):
diff --git a/modin/core/dataframe/pandas/partitioning/partition_manager.py b/modin/core/dataframe/pandas/partitioning/partition_manager.py
index 05854239206..cb207f64d4e 100644
--- a/modin/core/dataframe/pandas/partitioning/partition_manager.py
+++ b/modin/core/dataframe/pandas/partitioning/partition_manager.py
@@ -440,7 +440,7 @@ def get_partitions(index):
 
     @classmethod
     @wait_computations_if_benchmark_mode
-    def broadcast_apply(cls, axis, apply_func, left, right):
+    def base_broadcast_apply(cls, axis, apply_func, left, right):
         """
         Broadcast the `right` partitions to `left` and apply `apply_func` function.
 
@@ -504,6 +504,7 @@ def broadcast_axis_partitions(
         keep_partitioning=False,
         num_splits=None,
         apply_indices=None,
+        broadcast_all=True,
         enumerate_partitions=False,
         lengths=None,
         apply_func_args=None,
@@ -532,6 +533,8 @@ def broadcast_axis_partitions(
             then the number of splits is preserved.
         apply_indices : list of ints, default: None
             Indices of `axis ^ 1` to apply function over.
+        broadcast_all : bool, default: True
+            Whether or not to pass all right axis partitions to each of the left axis partitions.
         enumerate_partitions : bool, default: False
             Whether or not to pass partition index into `apply_func`.
             Note that `apply_func` must be able to accept `partition_idx` kwarg.
@@ -578,7 +581,6 @@ def broadcast_axis_partitions(
         # load-balance the data as well.
         kw = {
             "num_splits": num_splits,
-            "other_axis_partition": right_partitions,
             "maintain_partitioning": keep_partitioning,
         }
         if lengths:
@@ -593,6 +595,9 @@ def broadcast_axis_partitions(
                 left_partitions[i].apply(
                     preprocessed_map_func,
                     *(apply_func_args if apply_func_args else []),
+                    other_axis_partition=(
+                        right_partitions if broadcast_all else right_partitions[i]
+                    ),
                     **kw,
                     **({"partition_idx": idx} if enumerate_partitions else {}),
                     **kwargs,
@@ -648,6 +653,56 @@ def base_map_partitions(
             ]
         )
 
+    @classmethod
+    @wait_computations_if_benchmark_mode
+    def broadcast_apply(
+        cls,
+        axis,
+        apply_func,
+        left,
+        right,
+    ):
+        """
+        Broadcast the `right` partitions to `left` and apply `apply_func` function using different approaches to achieve the best performance.
+
+        Parameters
+        ----------
+        axis : {0, 1}
+            Axis to apply and broadcast over.
+        apply_func : callable
+            Function to apply.
+        left : np.ndarray
+            NumPy array of left partitions.
+        right : np.ndarray
+            NumPy array of right partitions.
+
+        Returns
+        -------
+        np.ndarray
+            NumPy array of result partition objects.
+        """
+        if not DynamicPartitioning.get():
+            # block-wise broadcast
+            new_partitions = cls.base_broadcast_apply(
+                axis,
+                apply_func,
+                left,
+                right,
+            )
+        else:
+            # The dynamic partitioning behavior of `broadcast_apply` differs from that of `map_partitions`,
+            # since the columnar approach for `broadcast_apply` results in slowdown.
+            # axis-wise broadcast
+            new_partitions = cls.broadcast_axis_partitions(
+                axis=axis ^ 1,
+                left=left,
+                right=right,
+                apply_func=apply_func,
+                broadcast_all=False,
+                keep_partitioning=True,
+            )
+        return new_partitions
+
     @classmethod
     @wait_computations_if_benchmark_mode
     def map_partitions(
diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py
index 3581516a638..410bd2b50d8 100644
--- a/modin/core/storage_formats/pandas/query_compiler.py
+++ b/modin/core/storage_formats/pandas/query_compiler.py
@@ -3157,9 +3157,7 @@ def dropna(self, **kwargs):
             lib.no_default,
             None,
         )
-        # FIXME: this is a naive workaround for this problem: https://github.com/modin-project/modin/issues/5394
-        # if there are too many partitions then all non-full-axis implementations start acting very badly.
-        # The here threshold is pretty random though it works fine on simple scenarios
+        # The map reduce approach works well for frames with few columnar partitions
         processable_amount_of_partitions = (
             self._modin_frame.num_parts < CpuCount.get() * 32
         )
diff --git a/modin/tests/pandas/test_groupby.py b/modin/tests/pandas/test_groupby.py
index b82473c674b..36987c0d931 100644
--- a/modin/tests/pandas/test_groupby.py
+++ b/modin/tests/pandas/test_groupby.py
@@ -21,7 +21,13 @@
 import pytest
 
 import modin.pandas as pd
-from modin.config import IsRayCluster, NPartitions, RangePartitioning, StorageFormat
+from modin.config import (
+    IsRayCluster,
+    NPartitions,
+    RangePartitioning,
+    StorageFormat,
+    context,
+)
 from modin.core.dataframe.algebra.default2pandas.groupby import GroupBy
 from modin.core.dataframe.pandas.partitioning.axis_partition import (
     PandasDataframeAxisPartition,
@@ -2431,6 +2437,31 @@ def test_multi_column_groupby_different_partitions(
     )
 
 
+def test_empty_partitions_after_groupby():
+    def func_to_apply(grp):
+        return grp.agg(
+            {
+                list(test_data_values[0].keys())[1]: "sum",
+                list(test_data_values[0].keys())[-1]: "sum",
+            }
+        )
+
+    data = test_data_values[0]
+    md_df, pd_df = create_test_dfs(data)
+    by = pd_df.columns[0]
+
+    with context(DynamicPartitioning=True):
+        md_grp, pd_grp = (
+            md_df.groupby(by),
+            pd_df.groupby(by),
+        )
+        eval_general(
+            md_grp,
+            pd_grp,
+            func_to_apply,
+        )
+
+
 @pytest.mark.parametrize(
     "by",
     [

From f70176a796db92a5484ae4d3530906bdc3f5eb70 Mon Sep 17 00:00:00 2001
From: Jonathan Shi <jhshi07@gmail.com>
Date: Wed, 28 Aug 2024 03:32:28 -0700
Subject: [PATCH 13/20] FIX-#7371: Fix inserting datelike values into a
 DataFrame (#7372)

Signed-off-by: Jonathan Shi <jhshi07@gmail.com>
---
 modin/core/dataframe/pandas/metadata/dtypes.py    |  2 +-
 modin/tests/pandas/dataframe/test_map_metadata.py | 15 +++++++++++++++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/modin/core/dataframe/pandas/metadata/dtypes.py b/modin/core/dataframe/pandas/metadata/dtypes.py
index 1918cce16fa..9220a2dace4 100644
--- a/modin/core/dataframe/pandas/metadata/dtypes.py
+++ b/modin/core/dataframe/pandas/metadata/dtypes.py
@@ -1225,7 +1225,7 @@ def extract_dtype(value) -> DtypeObj | pandas.Series:
     """
     try:
         dtype = pandas.api.types.pandas_dtype(value)
-    except TypeError:
+    except (TypeError, ValueError):
         dtype = pandas.Series(value).dtype
 
     return dtype
diff --git a/modin/tests/pandas/dataframe/test_map_metadata.py b/modin/tests/pandas/dataframe/test_map_metadata.py
index 07b195bdafa..fc9c3b76ea7 100644
--- a/modin/tests/pandas/dataframe/test_map_metadata.py
+++ b/modin/tests/pandas/dataframe/test_map_metadata.py
@@ -1837,3 +1837,18 @@ def test_constructor_from_index():
     data = pd.Index([1, 2, 3], name="pricing_date")
     modin_df, pandas_df = create_test_dfs(data)
     df_equals(modin_df, pandas_df)
+
+
+def test_insert_datelike_string_issue_7371():
+    # When a new value is inserted into a frame, we call pandas.api.types.pandas_dtype(value) to
+    # extract the dtype of an object like a pandas Series or numpy array. When a scalar value is passed,
+    # this usually raises a TypeError, so we construct a local pandas Series from the object and
+    # extract the dtype from there.
+    # When the passed value is a date-like string, pandas will instead raise a ValueError because
+    # it tries to parse it as a numpy structured dtype. After fixing GH#7371, we now catch
+    # ValueError in addition to TypeError to handle this case.
+    modin_df = pd.DataFrame({"a": [0]})
+    modin_df["c"] = "2020-01-01"
+    pandas_df = pandas.DataFrame({"a": [0]})
+    pandas_df["c"] = "2020-01-01"
+    df_equals(modin_df, pandas_df)

From 5f4d40114ed89ef42116b272583fc2f6f700f72e Mon Sep 17 00:00:00 2001
From: Jonathan Shi <jhshi07@gmail.com>
Date: Mon, 2 Sep 2024 01:29:24 -0700
Subject: [PATCH 14/20] FIX-#7379: Fix __imul__ performing addition instead of
 multiplication (#7380)

Signed-off-by: Jonathan Shi <jhshi07@gmail.com>
---
 modin/pandas/series.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index 76470ab243c..d18a0bec778 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -517,7 +517,7 @@ def __rtruediv__(self, left) -> Series:
         return self.rtruediv(left)
 
     __iadd__ = __add__
-    __imul__ = __add__
+    __imul__ = __mul__
     __ipow__ = __pow__
     __isub__ = __sub__
     __itruediv__ = __truediv__

From cf5d638ec7a69d2d851a7d43f23c96640eaab9dd Mon Sep 17 00:00:00 2001
From: Arun Jose <40291569+arunjose696@users.noreply.github.com>
Date: Mon, 2 Sep 2024 14:29:23 +0200
Subject: [PATCH 15/20] FEAT-#7308: Interoperability between query compilers
 (#7376)

Co-authored-by: Anatoly Myachev <anatoliimyachev@mail.com>
Co-authored-by: Igoshev, Iaroslav <iaroslav.igoshev@intel.com>
Signed-off-by: arunjose696 <arunjose696@gmail.com>
---
 .github/workflows/ci.yml                      |   8 +
 .../pandas/native_query_compiler.py           |   5 +-
 .../storage_formats/pandas/query_compiler.py  |   3 +-
 .../pandas/query_compiler_caster.py           | 159 +++++
 modin/pandas/dataframe.py                     |   5 +-
 modin/tests/pandas/native_df_mode/__init__.py |  12 +
 .../pandas/native_df_mode/test_binary.py      | 198 ++++++
 .../pandas/native_df_mode/test_default.py     | 338 +++++++++
 .../pandas/native_df_mode/test_indexing.py    | 668 ++++++++++++++++++
 .../tests/pandas/native_df_mode/test_iter.py  | 137 ++++
 .../pandas/native_df_mode/test_join_sort.py   | 411 +++++++++++
 .../native_df_mode/test_map_metadata.py       | 258 +++++++
 .../pandas/native_df_mode/test_pickle.py      |  73 ++
 .../pandas/native_df_mode/test_window.py      | 101 +++
 modin/tests/pandas/native_df_mode/utils.py    | 133 ++++
 15 files changed, 2502 insertions(+), 7 deletions(-)
 create mode 100644 modin/core/storage_formats/pandas/query_compiler_caster.py
 create mode 100644 modin/tests/pandas/native_df_mode/__init__.py
 create mode 100644 modin/tests/pandas/native_df_mode/test_binary.py
 create mode 100644 modin/tests/pandas/native_df_mode/test_default.py
 create mode 100644 modin/tests/pandas/native_df_mode/test_indexing.py
 create mode 100644 modin/tests/pandas/native_df_mode/test_iter.py
 create mode 100644 modin/tests/pandas/native_df_mode/test_join_sort.py
 create mode 100644 modin/tests/pandas/native_df_mode/test_map_metadata.py
 create mode 100644 modin/tests/pandas/native_df_mode/test_pickle.py
 create mode 100644 modin/tests/pandas/native_df_mode/test_window.py
 create mode 100644 modin/tests/pandas/native_df_mode/utils.py

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 9186500682a..8fb26225613 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -698,6 +698,14 @@ jobs:
       - run: python -m pytest modin/tests/pandas/dataframe/test_reduce.py
       - run: python -m pytest modin/tests/pandas/dataframe/test_udf.py
       - run: python -m pytest modin/tests/pandas/dataframe/test_window.py
+      - run: python -m pytest modin/tests/pandas/native_df_mode/test_binary.py
+      - run: python -m pytest modin/tests/pandas/native_df_mode/test_default.py
+      - run: python -m pytest modin/tests/pandas/native_df_mode/test_indexing.py
+      - run: python -m pytest modin/tests/pandas/native_df_mode/test_iter.py
+      - run: python -m pytest modin/tests/pandas/native_df_mode/test_join_sort.py
+      - run: python -m pytest modin/tests/pandas/native_df_mode/test_map_metadata.py
+      - run: python -m pytest modin/tests/pandas/native_df_mode/test_pickle.py
+      - run: python -m pytest modin/tests/pandas/native_df_mode/test_window.py
       - uses: ./.github/actions/upload-coverage
 
   merge-coverage-artifacts:
diff --git a/modin/core/storage_formats/pandas/native_query_compiler.py b/modin/core/storage_formats/pandas/native_query_compiler.py
index bfe331cfc6e..12f9da6ef46 100644
--- a/modin/core/storage_formats/pandas/native_query_compiler.py
+++ b/modin/core/storage_formats/pandas/native_query_compiler.py
@@ -24,8 +24,8 @@
 import pandas
 from pandas.core.dtypes.common import is_list_like, is_scalar
 
-from modin.config.envvars import NativeDataframeMode
 from modin.core.storage_formats.base.query_compiler import BaseQueryCompiler
+from modin.core.storage_formats.pandas.query_compiler_caster import QueryCompilerCaster
 from modin.utils import (
     MODIN_UNNAMED_SERIES_LABEL,
     _inherit_docstrings,
@@ -565,7 +565,7 @@ def caller(query_compiler, *args, **kwargs):
 
 
 @_inherit_docstrings(BaseQueryCompiler)
-class NativeQueryCompiler(BaseQueryCompiler):
+class NativeQueryCompiler(BaseQueryCompiler, QueryCompilerCaster):
     """
     Query compiler for the pandas storage format.
 
@@ -585,7 +585,6 @@ class NativeQueryCompiler(BaseQueryCompiler):
     _shape_hint: Optional[str]
 
     def __init__(self, pandas_frame, shape_hint: Optional[str] = None):
-        assert NativeDataframeMode.get() == "Pandas"
         if hasattr(pandas_frame, "_to_pandas"):
             pandas_frame = pandas_frame._to_pandas()
         if is_scalar(pandas_frame):
diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py
index 410bd2b50d8..c7fb0bae21b 100644
--- a/modin/core/storage_formats/pandas/query_compiler.py
+++ b/modin/core/storage_formats/pandas/query_compiler.py
@@ -66,6 +66,7 @@
     extract_dtype,
 )
 from modin.core.storage_formats import BaseQueryCompiler
+from modin.core.storage_formats.pandas.query_compiler_caster import QueryCompilerCaster
 from modin.error_message import ErrorMessage
 from modin.logging import get_logger
 from modin.utils import (
@@ -253,7 +254,7 @@ def caller(df, *args, **kwargs):
 
 
 @_inherit_docstrings(BaseQueryCompiler)
-class PandasQueryCompiler(BaseQueryCompiler):
+class PandasQueryCompiler(BaseQueryCompiler, QueryCompilerCaster):
     """
     Query compiler for the pandas storage format.
 
diff --git a/modin/core/storage_formats/pandas/query_compiler_caster.py b/modin/core/storage_formats/pandas/query_compiler_caster.py
new file mode 100644
index 00000000000..211860a8427
--- /dev/null
+++ b/modin/core/storage_formats/pandas/query_compiler_caster.py
@@ -0,0 +1,159 @@
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+"""
+Module contains ``QueryCompilerCaster`` class.
+
+``QueryCompilerCaster`` is used for automatically casting query compiler
+arguments to the type of the current query compiler for query compiler class functions.
+This ensures compatibility between different query compiler classes.
+"""
+
+import functools
+import inspect
+from types import FunctionType, MethodType
+from typing import Any, Dict, Tuple, TypeVar
+
+from pandas.core.indexes.frozen import FrozenList
+
+from modin.core.storage_formats.base.query_compiler import BaseQueryCompiler
+
+Fn = TypeVar("Fn", bound=Any)
+
+
+class QueryCompilerCaster:
+    """Cast all query compiler arguments of the member function to current query compiler."""
+
+    @classmethod
+    def __init_subclass__(
+        cls,
+        **kwargs: Dict,
+    ) -> None:
+        """
+        Apply type casting to all children of ``QueryCompilerCaster``.
+
+        This method is called automatically when a class inherits from
+        ``QueryCompilerCaster``. It ensures that all member functions within the
+        subclass have their arguments automatically casted to the current query
+        compiler type.
+
+        Parameters
+        ----------
+        **kwargs : Additional keyword arguments
+        """
+        super().__init_subclass__(**kwargs)
+        apply_argument_cast(cls)
+
+
+def cast_nested_args_to_current_qc_type(arguments, current_qc):
+    """
+    Cast all arguments in nested fashion to current query compiler.
+
+    Parameters
+    ----------
+    arguments : tuple or dict
+    current_qc : BaseQueryCompiler
+
+    Returns
+    -------
+    tuple or dict
+        Returns args and kwargs with all query compilers casted to current_qc.
+    """
+
+    def cast_arg_to_current_qc(arg):
+        current_qc_type = type(current_qc)
+        if isinstance(arg, BaseQueryCompiler) and not isinstance(arg, current_qc_type):
+            data_cls = current_qc._modin_frame
+            return current_qc_type.from_pandas(arg.to_pandas(), data_cls)
+        else:
+            return arg
+
+    imutable_types = (FrozenList, tuple)
+    if isinstance(arguments, imutable_types):
+        args_type = type(arguments)
+        arguments = list(arguments)
+        arguments = cast_nested_args_to_current_qc_type(arguments, current_qc)
+
+        return args_type(arguments)
+    if isinstance(arguments, list):
+        for i in range(len(arguments)):
+            if isinstance(arguments[i], (list, dict)):
+                cast_nested_args_to_current_qc_type(arguments[i], current_qc)
+            else:
+                arguments[i] = cast_arg_to_current_qc(arguments[i])
+    elif isinstance(arguments, dict):
+        for key in arguments:
+            if isinstance(arguments[key], (list, dict)):
+                cast_nested_args_to_current_qc_type(arguments[key], current_qc)
+            else:
+                arguments[key] = cast_arg_to_current_qc(arguments[key])
+    return arguments
+
+
+def apply_argument_cast(obj: Fn) -> Fn:
+    """
+    Cast all arguments that are query compilers to the current query compiler.
+
+    Parameters
+    ----------
+    obj : function
+
+    Returns
+    -------
+    function
+        Returns decorated function which does argument casting.
+    """
+    if isinstance(obj, type):
+        all_attrs = dict(inspect.getmembers(obj))
+        all_attrs.pop("__abstractmethods__")
+
+        # This is required because inspect converts class methods to member functions
+        current_class_attrs = vars(obj)
+        for key in current_class_attrs:
+            all_attrs[key] = current_class_attrs[key]
+
+        for attr_name, attr_value in all_attrs.items():
+            if isinstance(
+                attr_value, (FunctionType, MethodType, classmethod, staticmethod)
+            ):
+                wrapped = apply_argument_cast(attr_value)
+                setattr(obj, attr_name, wrapped)
+        return obj  # type: ignore [return-value]
+    elif isinstance(obj, classmethod):
+        return classmethod(apply_argument_cast(obj.__func__))  # type: ignore [return-value, arg-type]
+    elif isinstance(obj, staticmethod):
+        return staticmethod(apply_argument_cast(obj.__func__))
+
+    @functools.wraps(obj)
+    def cast_args(*args: Tuple, **kwargs: Dict) -> Any:
+        """
+        Add casting for query compiler arguments.
+
+        Parameters
+        ----------
+        *args : tuple
+            The function arguments.
+        **kwargs : dict
+            The function keyword arguments.
+
+        Returns
+        -------
+        Any
+        """
+        current_qc = args[0]
+        if isinstance(current_qc, BaseQueryCompiler):
+            kwargs = cast_nested_args_to_current_qc_type(kwargs, current_qc)
+            args = cast_nested_args_to_current_qc_type(args, current_qc)
+        return obj(*args, **kwargs)
+
+    return cast_args
diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index 3d97efb4af4..de96ea0ab26 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -2993,9 +2993,8 @@ def _create_or_update_from_compiler(
         DataFrame or None
             None if update was done, ``DataFrame`` otherwise.
         """
-        assert (
-            isinstance(new_query_compiler, type(self._query_compiler))
-            or type(new_query_compiler) in self._query_compiler.__class__.__bases__
+        assert isinstance(
+            new_query_compiler, self._query_compiler.__class__.__bases__
         ), "Invalid Query Compiler object: {}".format(type(new_query_compiler))
         if not inplace:
             return self.__constructor__(query_compiler=new_query_compiler)
diff --git a/modin/tests/pandas/native_df_mode/__init__.py b/modin/tests/pandas/native_df_mode/__init__.py
new file mode 100644
index 00000000000..cae6413e559
--- /dev/null
+++ b/modin/tests/pandas/native_df_mode/__init__.py
@@ -0,0 +1,12 @@
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
diff --git a/modin/tests/pandas/native_df_mode/test_binary.py b/modin/tests/pandas/native_df_mode/test_binary.py
new file mode 100644
index 00000000000..82c837b6416
--- /dev/null
+++ b/modin/tests/pandas/native_df_mode/test_binary.py
@@ -0,0 +1,198 @@
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+from itertools import product
+
+import matplotlib
+import pytest
+
+from modin.config import NativeDataframeMode, NPartitions
+from modin.tests.pandas.native_df_mode.utils import (
+    create_test_df_in_defined_mode,
+    eval_general_interop,
+)
+from modin.tests.pandas.utils import (
+    default_to_pandas_ignore_string,
+    df_equals,
+    test_data,
+    test_data_keys,
+    test_data_values,
+)
+
+NPartitions.put(4)
+
+# Force matplotlib to not use any Xwindows backend.
+matplotlib.use("Agg")
+
+# Our configuration in pytest.ini requires that we explicitly catch all
+# instances of defaulting to pandas, but some test modules, like this one,
+# have too many such instances.
+pytestmark = pytest.mark.filterwarnings(default_to_pandas_ignore_string)
+
+
+@pytest.mark.parametrize(
+    "other",
+    [
+        lambda df, axis: 4,
+        lambda df, axis: df.iloc[0] if axis == "columns" else list(df[df.columns[0]]),
+        lambda df, axis: {
+            label: idx + 1
+            for idx, label in enumerate(df.axes[0 if axis == "rows" else 1])
+        },
+        lambda df, axis: {
+            label if idx % 2 else f"random_key{idx}": idx + 1
+            for idx, label in enumerate(df.axes[0 if axis == "rows" else 1][::-1])
+        },
+    ],
+    ids=[
+        "scalar",
+        "series_or_list",
+        "dictionary_keys_equal_columns",
+        "dictionary_keys_unequal_columns",
+    ],
+)
+@pytest.mark.parametrize("axis", ["rows", "columns"])
+@pytest.mark.parametrize(
+    "op",
+    [
+        *("add", "radd", "sub", "rsub", "mod", "rmod", "pow", "rpow"),
+        *("truediv", "rtruediv", "mul", "rmul", "floordiv", "rfloordiv"),
+    ],
+)
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+@pytest.mark.parametrize("backend", [None, "pyarrow"])
+def test_math_functions(other, axis, op, backend, df_mode_pair):
+    data = test_data["float_nan_data"]
+    if (op == "floordiv" or op == "rfloordiv") and axis == "rows":
+        # lambda == "series_or_list"
+        pytest.xfail(reason="different behavior")
+
+    if op == "rmod" and axis == "rows":
+        # lambda == "series_or_list"
+        pytest.xfail(reason="different behavior")
+
+    if op in ("mod", "rmod") and backend == "pyarrow":
+        pytest.skip(reason="These functions are not implemented in pandas itself")
+
+    eval_general_interop(
+        data,
+        backend,
+        lambda df1, df2: getattr(df1, op)(other(df2, axis), axis=axis),
+        df_mode_pair,
+    )
+
+
+@pytest.mark.parametrize("other", [lambda df: 2, lambda df: df])
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test___divmod__(other, df_mode_pair):
+    data = test_data["float_nan_data"]
+    eval_general_interop(
+        data, None, lambda df1, df2: divmod(df1, other(df2)), df_mode_pair
+    )
+
+
+@pytest.mark.parametrize("other", ["as_left", 4])
+@pytest.mark.parametrize("op", ["eq", "ge", "gt", "le", "lt", "ne"])
+@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_comparison(data, op, other, request, df_mode_pair):
+    def operation(df1, df2):
+        return getattr(df1, op)(df2 if other == "as_left" else other)
+
+    expected_exception = None
+    if "int_data" in request.node.callspec.id and other == "a":
+        pytest.xfail(reason="https://github.com/modin-project/modin/issues/7019")
+    elif "float_nan_data" in request.node.callspec.id and other == "a":
+        expected_exception = TypeError(
+            "Invalid comparison between dtype=float64 and str"
+        )
+    eval_general_interop(
+        data,
+        None,
+        operation,
+        df_mode_pair,
+        expected_exception=expected_exception,
+    )
+
+
+@pytest.mark.parametrize(
+    "frame1_data,frame2_data,expected_pandas_equals",
+    [
+        pytest.param({}, {}, True, id="two_empty_dataframes"),
+        pytest.param([[1]], [[0]], False, id="single_unequal_values"),
+        pytest.param([[None]], [[None]], True, id="single_none_values"),
+        pytest.param(
+            [[1, 2], [3, 4]],
+            [[1, 2], [3, 4]],
+            True,
+            id="equal_two_by_two_dataframes",
+        ),
+        pytest.param(
+            [[1, 2], [3, 4]],
+            [[5, 2], [3, 4]],
+            False,
+            id="unequal_two_by_two_dataframes",
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_equals(frame1_data, frame2_data, expected_pandas_equals, df_mode_pair):
+    modin_df1, pandas_df1 = create_test_df_in_defined_mode(
+        frame1_data, df_mode=df_mode_pair[0]
+    )
+    modin_df2, pandas_df2 = create_test_df_in_defined_mode(
+        frame2_data, df_mode=df_mode_pair[1]
+    )
+
+    pandas_equals = pandas_df1.equals(pandas_df2)
+    assert pandas_equals == expected_pandas_equals, (
+        "Test expected pandas to say the dataframes were"
+        + f"{'' if expected_pandas_equals else ' not'} equal, but they were"
+        + f"{' not' if expected_pandas_equals else ''} equal."
+    )
+
+    assert modin_df1.equals(modin_df2) == pandas_equals
+    assert modin_df1.equals(pandas_df2) == pandas_equals
+
+
+@pytest.mark.parametrize("empty_operand", ["right", "left", "both"])
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_empty_df(empty_operand, df_mode_pair):
+    modin_df, pandas_df = create_test_df_in_defined_mode(
+        [0, 1, 2, 0, 1, 2], df_mode=df_mode_pair[0]
+    )
+    modin_df_empty, pandas_df_empty = create_test_df_in_defined_mode(
+        df_mode=df_mode_pair[1]
+    )
+
+    if empty_operand == "right":
+        modin_res = modin_df + modin_df_empty
+        pandas_res = pandas_df + pandas_df_empty
+    elif empty_operand == "left":
+        modin_res = modin_df_empty + modin_df
+        pandas_res = pandas_df_empty + pandas_df
+    else:
+        modin_res = modin_df_empty + modin_df_empty
+        pandas_res = pandas_df_empty + pandas_df_empty
+
+    df_equals(modin_res, pandas_res)
diff --git a/modin/tests/pandas/native_df_mode/test_default.py b/modin/tests/pandas/native_df_mode/test_default.py
new file mode 100644
index 00000000000..03d6d372fd4
--- /dev/null
+++ b/modin/tests/pandas/native_df_mode/test_default.py
@@ -0,0 +1,338 @@
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+
+from itertools import product
+
+import matplotlib
+import numpy as np
+import pandas
+import pytest
+from numpy.testing import assert_array_equal
+
+import modin.pandas as pd
+from modin.config import NativeDataframeMode, NPartitions
+from modin.pandas.io import to_pandas
+from modin.tests.pandas.native_df_mode.utils import (
+    create_test_df_in_defined_mode,
+    create_test_series_in_defined_mode,
+    eval_general_interop,
+)
+from modin.tests.pandas.utils import (
+    default_to_pandas_ignore_string,
+    df_equals,
+    test_data,
+    test_data_diff_dtype,
+    test_data_keys,
+    test_data_large_categorical_dataframe,
+    test_data_values,
+)
+from modin.tests.test_utils import warns_that_defaulting_to_pandas
+
+NPartitions.put(4)
+
+# Force matplotlib to not use any Xwindows backend.
+matplotlib.use("Agg")
+
+# Our configuration in pytest.ini requires that we explicitly catch all
+# instances of defaulting to pandas, but some test modules, like this one,
+# have too many such instances.
+pytestmark = [
+    pytest.mark.filterwarnings(default_to_pandas_ignore_string),
+    # IGNORE FUTUREWARNINGS MARKS TO CLEANUP OUTPUT
+    pytest.mark.filterwarnings(
+        "ignore:.*bool is now deprecated and will be removed:FutureWarning"
+    ),
+    pytest.mark.filterwarnings(
+        "ignore:first is deprecated and will be removed:FutureWarning"
+    ),
+    pytest.mark.filterwarnings(
+        "ignore:last is deprecated and will be removed:FutureWarning"
+    ),
+]
+
+
+@pytest.mark.parametrize(
+    "op, make_args",
+    [
+        ("align", lambda df: {"other": df}),
+        ("corrwith", lambda df: {"other": df}),
+        ("ewm", lambda df: {"com": 0.5}),
+        ("from_dict", lambda df: {"data": None}),
+        ("from_records", lambda df: {"data": to_pandas(df)}),
+        ("hist", lambda df: {"column": "int_col"}),
+        ("interpolate", None),
+        ("mask", lambda df: {"cond": df != 0}),
+        ("pct_change", None),
+        ("to_xarray", None),
+        ("flags", None),
+        ("set_flags", lambda df: {"allows_duplicate_labels": False}),
+    ],
+)
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_ops_defaulting_to_pandas(op, make_args, df_mode_pair):
+    modin_df1, _ = create_test_df_in_defined_mode(
+        test_data_diff_dtype,
+        post_fn=lambda df: df.drop(["str_col", "bool_col"], axis=1),
+        df_mode=df_mode_pair[0],
+    )
+    modin_df2, _ = create_test_df_in_defined_mode(
+        test_data_diff_dtype,
+        post_fn=lambda df: df.drop(["str_col", "bool_col"], axis=1),
+        df_mode=df_mode_pair[1],
+    )
+    with warns_that_defaulting_to_pandas():
+        operation = getattr(modin_df1, op)
+        if make_args is not None:
+            operation(**make_args(modin_df2))
+        else:
+            try:
+                operation()
+            # `except` for non callable attributes
+            except TypeError:
+                pass
+
+
+@pytest.mark.parametrize(
+    "data",
+    test_data_values + [test_data_large_categorical_dataframe],
+    ids=test_data_keys + ["categorical_ints"],
+)
+def test_to_numpy(data):
+    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)
+    assert_array_equal(modin_df.values, pandas_df.values)
+
+
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_asfreq(df_mode_pair):
+    index = pd.date_range("1/1/2000", periods=4, freq="min")
+    series, _ = create_test_series_in_defined_mode(
+        [0.0, None, 2.0, 3.0], index=index, df_mode=df_mode_pair[0]
+    )
+    df, _ = create_test_df_in_defined_mode({"s": series}, df_mode=df_mode_pair[1])
+    with warns_that_defaulting_to_pandas():
+        # We are only testing that this defaults to pandas, so we will just check for
+        # the warning
+        df.asfreq(freq="30S")
+
+
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_assign(df_mode_pair):
+    data = test_data_values[0]
+
+    def assign_one_column(df1, df2):
+        df1.assign(new_column=pd.Series(df2.iloc[:, 0]))
+
+    eval_general_interop(data, None, assign_one_column, df_mode_pair)
+
+    def assign_multiple_columns(df1, df2):
+        df1.assign(
+            new_column=pd.Series(df2.iloc[:, 0]), new_column2=pd.Series(df2.iloc[:, 1])
+        )
+
+    eval_general_interop(data, None, assign_multiple_columns, df_mode_pair)
+
+
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_combine_first(df_mode_pair):
+    data1 = {"A": [None, 0], "B": [None, 4]}
+    modin_df1, pandas_df1 = create_test_df_in_defined_mode(
+        data1, df_mode=df_mode_pair[0]
+    )
+    data2 = {"A": [1, 1], "B": [3, 3]}
+    modin_df2, pandas_df2 = create_test_df_in_defined_mode(
+        data2, df_mode=df_mode_pair[1]
+    )
+
+    df_equals(
+        modin_df1.combine_first(modin_df2),
+        pandas_df1.combine_first(pandas_df2),
+        # https://github.com/modin-project/modin/issues/5959
+        check_dtypes=False,
+    )
+
+
+@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_dot(data, df_mode_pair):
+
+    modin_df, pandas_df = create_test_df_in_defined_mode(data, df_mode=df_mode_pair[0])
+    col_len = len(modin_df.columns)
+
+    # Test series input
+    modin_series, pandas_series = create_test_series_in_defined_mode(
+        np.arange(col_len),
+        index=pandas_df.columns,
+        df_mode=df_mode_pair[1],
+    )
+    modin_result = modin_df.dot(modin_series)
+    pandas_result = pandas_df.dot(pandas_series)
+    df_equals(modin_result, pandas_result)
+
+    def dot_func(df1, df2):
+        return df1.dot(df2.T)
+
+    # modin_result = modin_df.dot(modin_df.T)
+    # pandas_result = pandas_df.dot(pandas_df.T)
+    # df_equals(modin_result, pandas_result)
+    # Test dataframe input
+    eval_general_interop(data, None, dot_func, df_mode_pair)
+
+    # Test when input series index doesn't line up with columns
+    with pytest.raises(ValueError):
+        modin_series_without_index, _ = create_test_series_in_defined_mode(
+            np.arange(col_len), df_mode=df_mode_pair[1]
+        )
+        modin_df.dot(modin_series_without_index)
+
+    # Test case when left dataframe has size (n x 1)
+    # and right dataframe has size (1 x n)
+    eval_general_interop(pandas_series, None, dot_func, df_mode_pair)
+
+
+@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_matmul(data, df_mode_pair):
+    modin_df, pandas_df = create_test_df_in_defined_mode(data, df_mode=df_mode_pair[0])
+    col_len = len(modin_df.columns)
+
+    # Test list input
+    arr = np.arange(col_len)
+    modin_result = modin_df @ arr
+    pandas_result = pandas_df @ arr
+    df_equals(modin_result, pandas_result)
+
+    # Test bad dimensions
+    with pytest.raises(ValueError):
+        modin_df @ np.arange(col_len + 10)
+
+    # Test series input
+    modin_series, pandas_series = create_test_series_in_defined_mode(
+        np.arange(col_len),
+        index=pandas_df.columns,
+        df_mode=df_mode_pair[1],
+    )
+    modin_result = modin_df @ modin_series
+    pandas_result = pandas_df @ pandas_series
+    df_equals(modin_result, pandas_result)
+
+    # Test dataframe input
+    def matmul_func(df1, df2):
+        return df1 @ df2.T
+
+    # Test dataframe input
+    eval_general_interop(data, None, matmul_func, df_mode_pair)
+
+    # Test when input series index doesn't line up with columns
+    with pytest.raises(ValueError):
+        modin_series_without_index, _ = create_test_series_in_defined_mode(
+            np.arange(col_len), df_mode=df_mode_pair[1]
+        )
+        modin_df @ modin_series_without_index
+
+
+@pytest.mark.parametrize("data", [test_data["int_data"]], ids=["int_data"])
+@pytest.mark.parametrize(
+    "index",
+    [
+        pytest.param(lambda _, df: df.columns[0], id="single_index_col"),
+        pytest.param(
+            lambda _, df: [*df.columns[0:2], *df.columns[-7:-4]],
+            id="multiple_index_cols",
+        ),
+        pytest.param(None, id="default_index"),
+    ],
+)
+@pytest.mark.parametrize(
+    "columns",
+    [
+        pytest.param(lambda _, df: df.columns[len(df.columns) // 2], id="single_col"),
+        pytest.param(
+            lambda _, df: [
+                *df.columns[(len(df.columns) // 2) : (len(df.columns) // 2 + 4)],
+                df.columns[-7],
+            ],
+            id="multiple_cols",
+        ),
+        pytest.param(None, id="default_columns"),
+    ],
+)
+@pytest.mark.parametrize(
+    "values",
+    [
+        pytest.param(lambda _, df: df.columns[-1], id="single_value_col"),
+        pytest.param(lambda _, df: df.columns[-4:-1], id="multiple_value_cols"),
+    ],
+)
+@pytest.mark.parametrize(
+    "aggfunc",
+    [
+        pytest.param(lambda df, _: np.mean(df), id="callable_tree_reduce_func"),
+        pytest.param("mean", id="tree_reduce_func"),
+        pytest.param("nunique", id="full_axis_func"),
+    ],
+)
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_pivot_table_data(data, index, columns, values, aggfunc, request, df_mode_pair):
+    if (
+        "callable_tree_reduce_func-single_value_col-multiple_cols-multiple_index_cols"
+        in request.node.callspec.id
+        or "callable_tree_reduce_func-multiple_value_cols-multiple_cols-multiple_index_cols"
+        in request.node.callspec.id
+        or "tree_reduce_func-single_value_col-multiple_cols-multiple_index_cols"
+        in request.node.callspec.id
+        or "tree_reduce_func-multiple_value_cols-multiple_cols-multiple_index_cols"
+        in request.node.callspec.id
+        or "full_axis_func-single_value_col-multiple_cols-multiple_index_cols"
+        in request.node.callspec.id
+        or "full_axis_func-multiple_value_cols-multiple_cols-multiple_index_cols"
+        in request.node.callspec.id
+    ):
+        pytest.xfail(reason="https://github.com/modin-project/modin/issues/7011")
+
+    expected_exception = None
+    if "default_columns-default_index" in request.node.callspec.id:
+        expected_exception = ValueError("No group keys passed!")
+    elif (
+        "callable_tree_reduce_func" in request.node.callspec.id
+        and "int_data" in request.node.callspec.id
+    ):
+        expected_exception = TypeError("'numpy.float64' object is not callable")
+
+    eval_general_interop(
+        data,
+        None,
+        operation=lambda df, _, *args, **kwargs: df.pivot_table(
+            *args, **kwargs
+        ).sort_index(axis=int(index is not None)),
+        df_mode_pair=df_mode_pair,
+        index=index,
+        columns=columns,
+        values=values,
+        aggfunc=aggfunc,
+        expected_exception=expected_exception,
+    )
diff --git a/modin/tests/pandas/native_df_mode/test_indexing.py b/modin/tests/pandas/native_df_mode/test_indexing.py
new file mode 100644
index 00000000000..b434026394a
--- /dev/null
+++ b/modin/tests/pandas/native_df_mode/test_indexing.py
@@ -0,0 +1,668 @@
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+from itertools import product
+
+import matplotlib
+import numpy as np
+import pandas
+import pytest
+
+import modin.pandas as pd
+from modin.config import NativeDataframeMode, NPartitions
+from modin.tests.pandas.native_df_mode.utils import (
+    create_test_df_in_defined_mode,
+    create_test_series_in_defined_mode,
+    eval_general_interop,
+)
+from modin.tests.pandas.utils import (
+    RAND_HIGH,
+    RAND_LOW,
+    default_to_pandas_ignore_string,
+    df_equals,
+    eval_general,
+    test_data,
+    test_data_keys,
+    test_data_values,
+)
+
+NPartitions.put(4)
+
+# Force matplotlib to not use any Xwindows backend.
+matplotlib.use("Agg")
+
+# Our configuration in pytest.ini requires that we explicitly catch all
+# instances of defaulting to pandas, but some test modules, like this one,
+# have too many such instances.
+# TODO(https://github.com/modin-project/modin/issues/3655): catch all instances
+# of defaulting to pandas.
+pytestmark = pytest.mark.filterwarnings(default_to_pandas_ignore_string)
+
+
+def eval_setitem(md_df, pd_df, value, col=None, loc=None, expected_exception=None):
+    if loc is not None:
+        col = pd_df.columns[loc]
+
+    value_getter = value if callable(value) else (lambda *args, **kwargs: value)
+
+    eval_general(
+        md_df,
+        pd_df,
+        lambda df: df.__setitem__(col, value_getter(df)),
+        __inplace__=True,
+        expected_exception=expected_exception,
+    )
+    df_mode_pair_list = list(product(NativeDataframeMode.choices, repeat=2))
+    for df_mode_pair in df_mode_pair_list:
+        eval_general_interop(
+            pd_df,
+            None,
+            lambda df1, df2: df1.__setitem__(col, value_getter(df2)),
+            df_mode_pair,
+            __inplace__=True,
+            expected_exception=expected_exception,
+        )
+
+
+def eval_loc(md_df, pd_df, value, key):
+    if isinstance(value, tuple):
+        assert len(value) == 2
+        # case when value for pandas different
+        md_value, pd_value = value
+    else:
+        md_value, pd_value = value, value
+
+    eval_general(
+        md_df,
+        pd_df,
+        lambda df: df.loc.__setitem__(
+            key, pd_value if isinstance(df, pandas.DataFrame) else md_value
+        ),
+        __inplace__=True,
+    )
+
+
+@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
+@pytest.mark.parametrize(
+    "key_func",
+    [
+        # test for the case from https://github.com/modin-project/modin/issues/4308
+        lambda df: "non_existing_column",
+        lambda df: df.columns[0],
+        lambda df: df.index,
+        lambda df: [df.index, df.columns[0]],
+        lambda df: (
+            pandas.Series(list(range(len(df.index))))
+            if isinstance(df, pandas.DataFrame)
+            else pd.Series(list(range(len(df))))
+        ),
+    ],
+    ids=[
+        "non_existing_column",
+        "first_column_name",
+        "original_index",
+        "list_of_index_and_first_column_name",
+        "series_of_integers",
+    ],
+)
+@pytest.mark.parametrize(
+    "drop_kwargs",
+    [{"drop": True}, {"drop": False}, {}],
+    ids=["drop_True", "drop_False", "no_drop_param"],
+)
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_set_index(data, key_func, drop_kwargs, request, df_mode_pair):
+    if (
+        "list_of_index_and_first_column_name" in request.node.name
+        and "drop_False" in request.node.name
+    ):
+        pytest.xfail(
+            reason="KeyError: https://github.com/modin-project/modin/issues/5636"
+        )
+    expected_exception = None
+    if "non_existing_column" in request.node.callspec.id:
+        expected_exception = KeyError(
+            "None of ['non_existing_column'] are in the columns"
+        )
+
+    eval_general_interop(
+        data,
+        None,
+        lambda df1, df2: df1.set_index(key_func(df2), **drop_kwargs),
+        expected_exception=expected_exception,
+        df_mode_pair=df_mode_pair,
+    )
+
+
+@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_loc(data, df_mode_pair):
+    modin_df, pandas_df = create_test_df_in_defined_mode(data, df_mode=df_mode_pair[0])
+
+    indices = [i % 3 == 0 for i in range(len(modin_df.index))]
+    columns = [i % 5 == 0 for i in range(len(modin_df.columns))]
+
+    # Key is a Modin or pandas series of booleans
+    series1, _ = create_test_series_in_defined_mode(indices, df_mode=df_mode_pair[0])
+    series2, _ = create_test_series_in_defined_mode(
+        columns, index=modin_df.columns, df_mode=df_mode_pair[0]
+    )
+    df_equals(
+        modin_df.loc[series1, series2],
+        pandas_df.loc[
+            pandas.Series(indices), pandas.Series(columns, index=modin_df.columns)
+        ],
+    )
+
+
+@pytest.mark.parametrize("left, right", [(2, 1), (6, 1), (lambda df: 70, 1), (90, 70)])
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_loc_insert_row(left, right, df_mode_pair):
+    # This test case comes from
+    # https://github.com/modin-project/modin/issues/3764
+    data = [[1, 2, 3], [4, 5, 6]]
+
+    def _test_loc_rows(df1, df2):
+        df1.loc[left] = df2.loc[right]
+        return df1
+
+    expected_exception = None
+    if right == 70:
+        pytest.xfail(reason="https://github.com/modin-project/modin/issues/7024")
+
+    eval_general_interop(
+        data,
+        None,
+        _test_loc_rows,
+        expected_exception=expected_exception,
+        df_mode_pair=df_mode_pair,
+    )
+
+
+@pytest.fixture(params=list(product(NativeDataframeMode.choices, repeat=2)))
+def loc_iter_dfs_interop(request):
+    df_mode_pair = request.param
+    columns = ["col1", "col2", "col3"]
+    index = ["row1", "row2", "row3"]
+    md_df1, pd_df1 = create_test_df_in_defined_mode(
+        {col: ([idx] * len(index)) for idx, col in enumerate(columns)},
+        columns=columns,
+        index=index,
+        df_mode=df_mode_pair[0],
+    )
+    md_df2, pd_df2 = create_test_df_in_defined_mode(
+        {col: ([idx] * len(index)) for idx, col in enumerate(columns)},
+        columns=columns,
+        index=index,
+        df_mode=df_mode_pair[1],
+    )
+    return md_df1, pd_df1, md_df2, pd_df2
+
+
+@pytest.mark.parametrize("reverse_order", [False, True])
+@pytest.mark.parametrize("axis", [0, 1])
+def test_loc_iter_assignment(loc_iter_dfs_interop, reverse_order, axis):
+    if reverse_order and axis:
+        pytest.xfail(
+            "Due to internal sorting of lookup values assignment order is lost, see GH-#2552"
+        )
+
+    md_df1, pd_df1, md_df2, pd_df2 = loc_iter_dfs_interop
+
+    select = [slice(None), slice(None)]
+    select[axis] = sorted(pd_df1.axes[axis][:-1], reverse=reverse_order)
+    select = tuple(select)
+
+    pd_df1.loc[select] = pd_df1.loc[select] + pd_df2.loc[select]
+    md_df1.loc[select] = md_df1.loc[select] + md_df2.loc[select]
+    df_equals(md_df1, pd_df1)
+
+
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_loc_series(df_mode_pair):
+    md_df1, pd_df1 = create_test_df_in_defined_mode(
+        {"a": [1, 2], "b": [3, 4]}, df_mode=df_mode_pair[0]
+    )
+    md_df2, pd_df2 = create_test_df_in_defined_mode(
+        {"a": [1, 2], "b": [3, 4]}, df_mode=df_mode_pair[1]
+    )
+
+    pd_df1.loc[pd_df2["a"] > 1, "b"] = np.log(pd_df1["b"])
+    md_df1.loc[md_df2["a"] > 1, "b"] = np.log(md_df1["b"])
+
+    df_equals(pd_df1, md_df1)
+
+
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_reindex_like(df_mode_pair):
+    o_data = [
+        [24.3, 75.7, "high"],
+        [31, 87.8, "high"],
+        [22, 71.6, "medium"],
+        [35, 95, "medium"],
+    ]
+    o_columns = ["temp_celsius", "temp_fahrenheit", "windspeed"]
+    o_index = pd.date_range(start="2014-02-12", end="2014-02-15", freq="D")
+    new_data = [[28, "low"], [30, "low"], [35.1, "medium"]]
+    new_columns = ["temp_celsius", "windspeed"]
+    new_index = pd.DatetimeIndex(["2014-02-12", "2014-02-13", "2014-02-15"])
+    modin_df1, pandas_df1 = create_test_df_in_defined_mode(
+        o_data,
+        columns=o_columns,
+        index=o_index,
+        df_mode=df_mode_pair[0],
+    )
+    modin_df2, pandas_df2 = create_test_df_in_defined_mode(
+        new_data,
+        columns=new_columns,
+        index=new_index,
+        df_mode=df_mode_pair[1],
+    )
+    modin_result = modin_df2.reindex_like(modin_df1)
+    pandas_result = pandas_df2.reindex_like(pandas_df1)
+    df_equals(modin_result, pandas_result)
+
+
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_reindex_multiindex(df_mode_pair):
+    data1, data2 = np.random.randint(1, 20, (5, 5)), np.random.randint(10, 25, 6)
+    index = np.array(["AUD", "BRL", "CAD", "EUR", "INR"])
+    pandas_midx = pandas.MultiIndex.from_product(
+        [["Bank_1", "Bank_2"], ["AUD", "CAD", "EUR"]], names=["Bank", "Curency"]
+    )
+    modin_df1, pandas_df1 = create_test_df_in_defined_mode(
+        data=data1, index=index, columns=index, df_mode=df_mode_pair[0]
+    )
+    modin_df2, pandas_df2 = create_test_df_in_defined_mode(
+        data=data2, index=pandas_midx, df_mode=df_mode_pair[1]
+    )
+
+    modin_df2.columns, pandas_df2.columns = ["Notional"], ["Notional"]
+    md_midx = pd.MultiIndex.from_product([modin_df2.index.levels[0], modin_df1.index])
+    pd_midx = pandas.MultiIndex.from_product(
+        [pandas_df2.index.levels[0], pandas_df1.index]
+    )
+    # reindex without axis, index, or columns
+    modin_result = modin_df1.reindex(md_midx, fill_value=0)
+    pandas_result = pandas_df1.reindex(pd_midx, fill_value=0)
+    df_equals(modin_result, pandas_result)
+    # reindex with only axis
+    modin_result = modin_df1.reindex(md_midx, fill_value=0, axis=0)
+    pandas_result = pandas_df1.reindex(pd_midx, fill_value=0, axis=0)
+    df_equals(modin_result, pandas_result)
+    # reindex with axis and level
+    modin_result = modin_df1.reindex(md_midx, fill_value=0, axis=0, level=0)
+    pandas_result = pandas_df1.reindex(pd_midx, fill_value=0, axis=0, level=0)
+    df_equals(modin_result, pandas_result)
+
+
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_getitem_empty_mask(df_mode_pair):
+    # modin-project/modin#517
+    modin_frames = []
+    pandas_frames = []
+    data1 = np.random.randint(0, 100, size=(100, 4))
+    mdf1, pdf1 = create_test_df_in_defined_mode(
+        data1, columns=list("ABCD"), df_mode=df_mode_pair[0]
+    )
+
+    modin_frames.append(mdf1)
+    pandas_frames.append(pdf1)
+
+    data2 = np.random.randint(0, 100, size=(100, 4))
+    mdf2, pdf2 = create_test_df_in_defined_mode(
+        data2, columns=list("ABCD"), df_mode=df_mode_pair[1]
+    )
+    modin_frames.append(mdf2)
+    pandas_frames.append(pdf2)
+
+    data3 = np.random.randint(0, 100, size=(100, 4))
+    mdf3, pdf3 = create_test_df_in_defined_mode(
+        data3, columns=list("ABCD"), df_mode=df_mode_pair[0]
+    )
+    modin_frames.append(mdf3)
+    pandas_frames.append(pdf3)
+
+    modin_data = pd.concat(modin_frames)
+    pandas_data = pandas.concat(pandas_frames)
+    df_equals(
+        modin_data[[False for _ in modin_data.index]],
+        pandas_data[[False for _ in modin_data.index]],
+    )
+
+
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test___setitem__mask(df_mode_pair):
+    # DataFrame mask:
+    data = test_data["int_data"]
+    modin_df1, pandas_df1 = create_test_df_in_defined_mode(
+        data, df_mode=df_mode_pair[0]
+    )
+    modin_df2, pandas_df2 = create_test_df_in_defined_mode(
+        data, df_mode=df_mode_pair[0]
+    )
+
+    mean = int((RAND_HIGH + RAND_LOW) / 2)
+    pandas_df1[pandas_df2 > mean] = -50
+    modin_df1[modin_df2 > mean] = -50
+
+    df_equals(modin_df1, pandas_df1)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {},
+        {"id": [], "max_speed": [], "health": []},
+        {"id": [1], "max_speed": [2], "health": [3]},
+        {"id": [4, 40, 400], "max_speed": [111, 222, 333], "health": [33, 22, 11]},
+    ],
+    ids=["empty_frame", "empty_cols", "1_length_cols", "2_length_cols"],
+)
+@pytest.mark.parametrize(
+    "value",
+    [[11, 22], [11, 22, 33]],
+    ids=["2_length_val", "3_length_val"],
+)
+@pytest.mark.parametrize("convert_to_series", [False, True])
+@pytest.mark.parametrize("new_col_id", [123, "new_col"], ids=["integer", "string"])
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_setitem_on_empty_df(data, value, convert_to_series, new_col_id, df_mode_pair):
+    modin_df, pandas_df = create_test_df_in_defined_mode(data, df_mode=df_mode_pair[0])
+
+    def applyier(df):
+        if convert_to_series:
+            converted_value = (
+                pandas.Series(value)
+                if isinstance(df, pandas.DataFrame)
+                else create_test_series_in_defined_mode(value, df_mode=df_mode_pair[1])[
+                    1
+                ]
+            )
+        else:
+            converted_value = value
+        df[new_col_id] = converted_value
+        return df
+
+    expected_exception = None
+    if not convert_to_series:
+        values_length = len(value)
+        index_length = len(pandas_df.index)
+        expected_exception = ValueError(
+            f"Length of values ({values_length}) does not match length of index ({index_length})"
+        )
+
+    eval_general(
+        modin_df,
+        pandas_df,
+        applyier,
+        # https://github.com/modin-project/modin/issues/5961
+        comparator_kwargs={
+            "check_dtypes": not (len(pandas_df) == 0 and len(pandas_df.columns) != 0)
+        },
+        expected_exception=expected_exception,
+    )
+
+
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_setitem_on_empty_df_4407(df_mode_pair):
+    data = {}
+    index = pd.date_range(end="1/1/2018", periods=0, freq="D")
+    column = pd.date_range(end="1/1/2018", periods=1, freq="h")[0]
+    modin_df, pandas_df = create_test_df_in_defined_mode(
+        data, columns=index, df_mode=df_mode_pair[0]
+    )
+    modin_ser, pandas_ser = create_test_series_in_defined_mode(
+        [1], df_mode=df_mode_pair[1]
+    )
+    modin_df[column] = modin_ser
+    pandas_df[column] = pandas_ser
+
+    df_equals(modin_df, pandas_df)
+    assert modin_df.columns.freq == pandas_df.columns.freq
+
+
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_setitem_2d_insertion(df_mode_pair):
+    def build_value_picker(modin_value, pandas_value):
+        """Build a function that returns either Modin or pandas DataFrame depending on the passed frame."""
+        return lambda source_df, *args, **kwargs: (
+            modin_value
+            if isinstance(source_df, (pd.DataFrame, pd.Series))
+            else pandas_value
+        )
+
+    modin_df, pandas_df = create_test_df_in_defined_mode(
+        test_data["int_data"], df_mode=df_mode_pair[0]
+    )
+
+    # Easy case - key and value.columns are equal
+    modin_value, pandas_value = create_test_df_in_defined_mode(
+        {
+            "new_value1": np.arange(len(modin_df)),
+            "new_value2": np.arange(len(modin_df)),
+        },
+        df_mode=df_mode_pair[1],
+    )
+    eval_setitem(
+        modin_df,
+        pandas_df,
+        build_value_picker(modin_value, pandas_value),
+        col=["new_value1", "new_value2"],
+    )
+
+    # Key and value.columns have equal values but in different order
+    new_columns = ["new_value3", "new_value4"]
+    modin_value.columns, pandas_value.columns = new_columns, new_columns
+    eval_setitem(
+        modin_df,
+        pandas_df,
+        build_value_picker(modin_value, pandas_value),
+        col=["new_value4", "new_value3"],
+    )
+
+    # Key and value.columns have different values
+    new_columns = ["new_value5", "new_value6"]
+    modin_value.columns, pandas_value.columns = new_columns, new_columns
+    eval_setitem(
+        modin_df,
+        pandas_df,
+        build_value_picker(modin_value, pandas_value),
+        col=["__new_value5", "__new_value6"],
+    )
+
+    # Key and value.columns have different lengths, testing that both raise the same exception
+    eval_setitem(
+        modin_df,
+        pandas_df,
+        build_value_picker(modin_value.iloc[:, [0]], pandas_value.iloc[:, [0]]),
+        col=["new_value7", "new_value8"],
+        expected_exception=ValueError("Columns must be same length as key"),
+    )
+
+
+@pytest.mark.parametrize("does_value_have_different_columns", [True, False])
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_setitem_2d_update(does_value_have_different_columns, df_mode_pair):
+    def test(dfs, iloc):
+        """Update columns on the given numeric indices."""
+        df1, df2 = dfs
+        cols1 = df1.columns[iloc].tolist()
+        cols2 = df2.columns[iloc].tolist()
+        df1[cols1] = df2[cols2]
+        return df1
+
+    modin_df, pandas_df = create_test_df_in_defined_mode(
+        test_data["int_data"], df_mode=df_mode_pair[0]
+    )
+    modin_df2, pandas_df2 = create_test_df_in_defined_mode(
+        test_data["int_data"], df_mode=df_mode_pair[1]
+    )
+    modin_df2 *= 10
+    pandas_df2 *= 10
+
+    if does_value_have_different_columns:
+        new_columns = [f"{col}_new" for col in modin_df.columns]
+        modin_df2.columns = new_columns
+        pandas_df2.columns = new_columns
+
+    modin_dfs = (modin_df, modin_df2)
+    pandas_dfs = (pandas_df, pandas_df2)
+
+    eval_general(modin_dfs, pandas_dfs, test, iloc=[0, 1, 2])
+    eval_general(modin_dfs, pandas_dfs, test, iloc=[0, -1])
+    eval_general(
+        modin_dfs, pandas_dfs, test, iloc=slice(1, None)
+    )  # (start=1, stop=None)
+    eval_general(
+        modin_dfs, pandas_dfs, test, iloc=slice(None, -2)
+    )  # (start=None, stop=-2)
+    eval_general(
+        modin_dfs,
+        pandas_dfs,
+        test,
+        iloc=[0, 1, 5, 6, 9, 10, -2, -1],
+    )
+    eval_general(
+        modin_dfs,
+        pandas_dfs,
+        test,
+        iloc=[5, 4, 0, 10, 1, -1],
+    )
+    eval_general(
+        modin_dfs, pandas_dfs, test, iloc=slice(None, None, 2)
+    )  # (start=None, stop=None, step=2)
+
+
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test___setitem__single_item_in_series(df_mode_pair):
+    # Test assigning a single item in a Series for issue
+    # https://github.com/modin-project/modin/issues/3860
+    modin_series1, pandas_series1 = create_test_series_in_defined_mode(
+        99, df_mode=df_mode_pair[0]
+    )
+    modin_series2, pandas_series2 = create_test_series_in_defined_mode(
+        100, df_mode=df_mode_pair[1]
+    )
+    modin_series1[:1] = modin_series2
+    pandas_series1[:1] = pandas_series2
+    df_equals(modin_series1, pandas_series1)
+
+
+@pytest.mark.parametrize(
+    "value",
+    [
+        1,
+        np.int32(1),
+        1.0,
+        "str val",
+        pandas.Timestamp("1/4/2018"),
+        np.datetime64(0, "ms"),
+        True,
+    ],
+)
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_loc_boolean_assignment_scalar_dtypes(value, df_mode_pair):
+    modin_df, pandas_df = create_test_df_in_defined_mode(
+        {
+            "a": [1, 2, 3],
+            "b": [3.0, 5.0, 6.0],
+            "c": ["a", "b", "c"],
+            "d": [1.0, "c", 2.0],
+            "e": pandas.to_datetime(["1/1/2018", "1/2/2018", "1/3/2018"]),
+            "f": [True, False, True],
+        },
+        df_mode=df_mode_pair[1],
+    )
+    modin_idx, pandas_idx = create_test_series_in_defined_mode(
+        [False, True, True], df_mode=df_mode_pair[1]
+    )
+
+    modin_df.loc[modin_idx] = value
+    pandas_df.loc[pandas_idx] = value
+    df_equals(modin_df, pandas_df)
+
+
+# This is a very subtle bug that comes from:
+# https://github.com/modin-project/modin/issues/4945
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_lazy_eval_index(df_mode_pair):
+    data = {"col0": [0, 1]}
+
+    def func(df1, df2):
+        df_copy = df1[df2["col0"] < 6].copy()
+        # The problem here is that the index is not copied over so it needs
+        # to get recomputed at some point. Our implementation of __setitem__
+        # requires us to build a mask and insert the value from the right
+        # handside into the new DataFrame. However, it's possible that we
+        # won't have any new partitions, so we will end up computing an empty
+        # index.
+        df_copy["col0"] = df_copy["col0"].apply(lambda x: x + 1)
+        return df_copy
+
+    eval_general_interop(data, None, func, df_mode_pair=df_mode_pair)
+
+
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_index_of_empty_frame(df_mode_pair):
+    # Test on an empty frame created by user
+
+    # Test on an empty frame produced by Modin's logic
+    data = test_data_values[0]
+    md_df1, pd_df1 = create_test_df_in_defined_mode(
+        data,
+        index=pandas.RangeIndex(len(next(iter(data.values()))), name="index name"),
+        df_mode=df_mode_pair[0],
+    )
+    md_df2, pd_df2 = create_test_df_in_defined_mode(
+        data,
+        index=pandas.RangeIndex(len(next(iter(data.values()))), name="index name"),
+        df_mode=df_mode_pair[1],
+    )
+
+    md_res = md_df1.query(f"{md_df2.columns[0]} > {RAND_HIGH}")
+    pd_res = pd_df1.query(f"{pd_df2.columns[0]} > {RAND_HIGH}")
+
+    assert md_res.empty and pd_res.empty
+    df_equals(md_res.index, pd_res.index)
diff --git a/modin/tests/pandas/native_df_mode/test_iter.py b/modin/tests/pandas/native_df_mode/test_iter.py
new file mode 100644
index 00000000000..a2e176d4372
--- /dev/null
+++ b/modin/tests/pandas/native_df_mode/test_iter.py
@@ -0,0 +1,137 @@
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+import warnings
+from itertools import product
+
+import matplotlib
+import pytest
+
+import modin.pandas as pd
+from modin.config import NativeDataframeMode, NPartitions
+from modin.pandas.utils import SET_DATAFRAME_ATTRIBUTE_WARNING
+from modin.tests.pandas.native_df_mode.utils import (
+    create_test_df_in_defined_mode,
+    create_test_series_in_defined_mode,
+)
+from modin.tests.pandas.utils import df_equals, eval_general
+
+NPartitions.put(4)
+
+# Force matplotlib to not use any Xwindows backend.
+matplotlib.use("Agg")
+
+
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test___setattr__mutating_column(df_mode_pair):
+    # Use case from issue #4577
+    modin_df, pandas_df = create_test_df_in_defined_mode(
+        [[1]], columns=["col0"], df_mode=df_mode_pair[0]
+    )
+    # Replacing a column with a list should mutate the column in place.
+    pandas_df.col0 = [3]
+    modin_df.col0 = [3]
+    modin_ser, pandas_ser = create_test_series_in_defined_mode(
+        [3], df_mode=df_mode_pair[1]
+    )
+    df_equals(modin_df, pandas_df)
+    # Check that the col0 attribute reflects the value update.
+    df_equals(modin_df.col0, pandas_df.col0)
+
+    pandas_df.col0 = pandas_ser
+    modin_df.col0 = modin_ser
+
+    # Check that the col0 attribute reflects this update
+    df_equals(modin_df, pandas_df)
+
+    pandas_df.loc[0, "col0"] = 4
+    modin_df.loc[0, "col0"] = 4
+
+    # Check that the col0 attribute reflects update via loc
+    df_equals(modin_df, pandas_df)
+    assert modin_df.col0.equals(modin_df["col0"])
+
+    # Check that attempting to add a new col via attributes raises warning
+    # and adds the provided list as a new attribute and not a column.
+    with pytest.warns(
+        UserWarning,
+        match=SET_DATAFRAME_ATTRIBUTE_WARNING,
+    ):
+        modin_df.col1 = [4]
+
+    with warnings.catch_warnings():
+        warnings.filterwarnings(
+            action="error",
+            message=SET_DATAFRAME_ATTRIBUTE_WARNING,
+        )
+        modin_df.col1 = [5]
+        modin_df.new_attr = 6
+        modin_df.col0 = 7
+
+    assert "new_attr" in dir(
+        modin_df
+    ), "Modin attribute was not correctly added to the df."
+    assert (
+        "new_attr" not in modin_df
+    ), "New attribute was not correctly added to columns."
+    assert modin_df.new_attr == 6, "Modin attribute value was set incorrectly."
+    assert isinstance(
+        modin_df.col0, pd.Series
+    ), "Scalar was not broadcasted properly to an existing column."
+
+
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_isin_with_modin_objects(df_mode_pair):
+    modin_df1, pandas_df1 = create_test_df_in_defined_mode(
+        {"a": [1, 2], "b": [3, 4]}, df_mode=df_mode_pair[0]
+    )
+    modin_series, pandas_series = create_test_series_in_defined_mode(
+        [1, 4, 5, 6], df_mode=df_mode_pair[1]
+    )
+
+    eval_general(
+        (modin_df1, modin_series),
+        (pandas_df1, pandas_series),
+        lambda srs: srs[0].isin(srs[1]),
+    )
+
+    modin_df2 = modin_series.to_frame("a")
+    pandas_df2 = pandas_series.to_frame("a")
+
+    eval_general(
+        (modin_df1, modin_df2),
+        (pandas_df1, pandas_df2),
+        lambda srs: srs[0].isin(srs[1]),
+    )
+
+    # Check case when indices are not matching
+    modin_df1, pandas_df1 = create_test_df_in_defined_mode(
+        {"a": [1, 2], "b": [3, 4]},
+        index=[10, 11],
+        df_mode=df_mode_pair[0],
+    )
+
+    eval_general(
+        (modin_df1, modin_series),
+        (pandas_df1, pandas_series),
+        lambda srs: srs[0].isin(srs[1]),
+    )
+    eval_general(
+        (modin_df1, modin_df2),
+        (pandas_df1, pandas_df2),
+        lambda srs: srs[0].isin(srs[1]),
+    )
diff --git a/modin/tests/pandas/native_df_mode/test_join_sort.py b/modin/tests/pandas/native_df_mode/test_join_sort.py
new file mode 100644
index 00000000000..62565dde382
--- /dev/null
+++ b/modin/tests/pandas/native_df_mode/test_join_sort.py
@@ -0,0 +1,411 @@
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+from itertools import product
+
+import matplotlib
+import numpy as np
+import pandas
+import pytest
+
+import modin.pandas as pd
+from modin.config import NativeDataframeMode, NPartitions
+from modin.pandas.io import to_pandas
+from modin.tests.pandas.native_df_mode.utils import (
+    create_test_df_in_defined_mode,
+    create_test_series_in_defined_mode,
+    eval_general_interop,
+)
+from modin.tests.pandas.utils import (
+    default_to_pandas_ignore_string,
+    df_equals,
+    eval_general,
+    random_state,
+    test_data_keys,
+    test_data_values,
+)
+
+NPartitions.put(4)
+
+# Force matplotlib to not use any Xwindows backend.
+matplotlib.use("Agg")
+
+# Our configuration in pytest.ini requires that we explicitly catch all
+# instances of defaulting to pandas, but some test modules, like this one,
+# have too many such instances.
+pytestmark = pytest.mark.filterwarnings(default_to_pandas_ignore_string)
+
+# Initialize env for storage format detection in @pytest.mark.*
+pd.DataFrame()
+
+
+def df_equals_and_sort(df1, df2):
+    """Sort dataframe's rows and run ``df_equals()`` for them."""
+    df1 = df1.sort_values(by=df1.columns.tolist(), ignore_index=True)
+    df2 = df2.sort_values(by=df2.columns.tolist(), ignore_index=True)
+    df_equals(df1, df2)
+
+
+@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_combine(data, df_mode_pair):
+    modin_df_1, pandas_df_1 = create_test_df_in_defined_mode(
+        data, df_mode=df_mode_pair[0]
+    )
+    modin_df_2, pandas_df_2 = create_test_df_in_defined_mode(
+        data, df_mode=df_mode_pair[1]
+    )
+    modin_df_1.combine(
+        modin_df_2 + 1, lambda s1, s2: s1 if s1.count() < s2.count() else s2
+    )
+    pandas_df_1.combine(
+        pandas_df_2 + 1, lambda s1, s2: s1 if s1.count() < s2.count() else s2
+    )
+
+
+@pytest.mark.parametrize(
+    "test_data, test_data2",
+    [
+        (
+            np.random.randint(0, 100, size=(64, 64)),
+            np.random.randint(0, 100, size=(128, 64)),
+        ),
+        (
+            np.random.randint(0, 100, size=(128, 64)),
+            np.random.randint(0, 100, size=(64, 64)),
+        ),
+        (
+            np.random.randint(0, 100, size=(64, 64)),
+            np.random.randint(0, 100, size=(64, 128)),
+        ),
+        (
+            np.random.randint(0, 100, size=(64, 128)),
+            np.random.randint(0, 100, size=(64, 64)),
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_join(test_data, test_data2, df_mode_pair):
+    modin_df, pandas_df = create_test_df_in_defined_mode(
+        test_data,
+        columns=["col{}".format(i) for i in range(test_data.shape[1])],
+        index=pd.Index([i for i in range(1, test_data.shape[0] + 1)], name="key"),
+        df_mode=df_mode_pair[0],
+    )
+    modin_df2, pandas_df2 = create_test_df_in_defined_mode(
+        test_data2,
+        columns=["col{}".format(i) for i in range(test_data2.shape[1])],
+        index=pd.Index([i for i in range(1, test_data2.shape[0] + 1)], name="key"),
+        df_mode=df_mode_pair[1],
+    )
+
+    hows = ["inner", "left", "right", "outer"]
+    ons = ["col33", "col34"]
+    sorts = [False, True]
+    assert len(ons) == len(sorts), "the loop below is designed for this condition"
+    for i in range(len(hows)):
+        for j in range(len(ons)):
+            modin_result = modin_df.join(
+                modin_df2,
+                how=hows[i],
+                on=ons[j],
+                sort=sorts[j],
+                lsuffix="_caller",
+                rsuffix="_other",
+            )
+            pandas_result = pandas_df.join(
+                pandas_df2,
+                how=hows[i],
+                on=ons[j],
+                sort=sorts[j],
+                lsuffix="_caller",
+                rsuffix="_other",
+            )
+            if sorts[j]:
+                # sorting in `join` is implemented through range partitioning technique
+                # therefore the order of the rows after it does not match the pandas,
+                # so additional sorting is needed in order to get the same result as for pandas
+                df_equals_and_sort(modin_result, pandas_result)
+            else:
+                df_equals(modin_result, pandas_result)
+
+    frame_data = {
+        "col1": [0, 1, 2, 3],
+        "col2": [4, 5, 6, 7],
+        "col3": [8, 9, 0, 1],
+        "col4": [2, 4, 5, 6],
+    }
+
+    modin_df = pd.DataFrame(frame_data)
+    pandas_df = pandas.DataFrame(frame_data)
+
+    frame_data2 = {"col5": [0], "col6": [1]}
+    modin_df2 = pd.DataFrame(frame_data2)
+    pandas_df2 = pandas.DataFrame(frame_data2)
+
+    join_types = ["left", "right", "outer", "inner"]
+    for how in join_types:
+        modin_join = modin_df.join(modin_df2, how=how)
+        pandas_join = pandas_df.join(pandas_df2, how=how)
+        df_equals(modin_join, pandas_join)
+
+    frame_data3 = {"col7": [1, 2, 3, 5, 6, 7, 8]}
+
+    modin_df3 = pd.DataFrame(frame_data3)
+    pandas_df3 = pandas.DataFrame(frame_data3)
+
+    join_types = ["left", "outer", "inner"]
+    for how in join_types:
+        modin_join = modin_df.join([modin_df2, modin_df3], how=how)
+        pandas_join = pandas_df.join([pandas_df2, pandas_df3], how=how)
+        df_equals(modin_join, pandas_join)
+
+
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_join_cross_6786(df_mode_pair):
+    data = [[7, 8, 9], [10, 11, 12]]
+    modin_df_1, pandas_df_1 = create_test_df_in_defined_mode(
+        data, columns=["x", "y", "z"], df_mode=df_mode_pair[0]
+    )
+    modin_df_2, pandas_df_2 = create_test_df_in_defined_mode(
+        data, columns=["x", "y", "z"], df_mode=df_mode_pair[1]
+    )
+    modin_join = modin_df_1.join(
+        modin_df_2[["x"]].set_axis(["p", "q"], axis=0), how="cross", lsuffix="p"
+    )
+    pandas_join = pandas_df_1.join(
+        pandas_df_2[["x"]].set_axis(["p", "q"], axis=0), how="cross", lsuffix="p"
+    )
+    df_equals(modin_join, pandas_join)
+
+
+@pytest.mark.parametrize(
+    "test_data, test_data2",
+    [
+        (
+            np.random.randint(0, 100, size=(64, 64)),
+            np.random.randint(0, 100, size=(128, 64)),
+        ),
+        (
+            np.random.randint(0, 100, size=(128, 64)),
+            np.random.randint(0, 100, size=(64, 64)),
+        ),
+        (
+            np.random.randint(0, 100, size=(64, 64)),
+            np.random.randint(0, 100, size=(64, 128)),
+        ),
+        (
+            np.random.randint(0, 100, size=(64, 128)),
+            np.random.randint(0, 100, size=(64, 64)),
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_merge(test_data, test_data2, df_mode_pair):
+    modin_df, pandas_df = create_test_df_in_defined_mode(
+        test_data,
+        columns=["col{}".format(i) for i in range(test_data.shape[1])],
+        index=pd.Index([i for i in range(1, test_data.shape[0] + 1)], name="key"),
+        df_mode=df_mode_pair[0],
+    )
+    modin_df2, pandas_df2 = create_test_df_in_defined_mode(
+        test_data2,
+        columns=["col{}".format(i) for i in range(test_data2.shape[1])],
+        index=pd.Index([i for i in range(1, test_data2.shape[0] + 1)], name="key"),
+        df_mode=df_mode_pair[1],
+    )
+    hows = ["left", "inner", "right"]
+    ons = ["col33", ["col33", "col34"]]
+    sorts = [False, True]
+    assert len(ons) == len(sorts), "the loop below is designed for this condition"
+    for i in range(len(hows)):
+        for j in range(len(ons)):
+            modin_result = modin_df.merge(
+                modin_df2, how=hows[i], on=ons[j], sort=sorts[j]
+            )
+            pandas_result = pandas_df.merge(
+                pandas_df2, how=hows[i], on=ons[j], sort=sorts[j]
+            )
+            # FIXME: https://github.com/modin-project/modin/issues/2246
+            df_equals_and_sort(modin_result, pandas_result)
+
+            modin_result = modin_df.merge(
+                modin_df2,
+                how=hows[i],
+                left_on="key",
+                right_on="key",
+                sort=sorts[j],
+            )
+            pandas_result = pandas_df.merge(
+                pandas_df2,
+                how=hows[i],
+                left_on="key",
+                right_on="key",
+                sort=sorts[j],
+            )
+            # FIXME: https://github.com/modin-project/modin/issues/2246
+            df_equals_and_sort(modin_result, pandas_result)
+
+
+@pytest.mark.parametrize("how", ["left", "inner", "right"])
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_merge_empty(
+    how,
+    df_mode_pair,
+):
+    data = np.random.randint(0, 100, size=(64, 64))
+    eval_general_interop(
+        data,
+        None,
+        lambda df1, df2: df1.merge(df2.iloc[:0], how=how),
+        df_mode_pair,
+    )
+
+
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_merge_with_mi_columns(df_mode_pair):
+    modin_df1, pandas_df1 = create_test_df_in_defined_mode(
+        {
+            ("col0", "a"): [1, 2, 3, 4],
+            ("col0", "b"): [2, 3, 4, 5],
+            ("col1", "a"): [3, 4, 5, 6],
+        },
+        df_mode=df_mode_pair[0],
+    )
+
+    modin_df2, pandas_df2 = create_test_df_in_defined_mode(
+        {
+            ("col0", "a"): [1, 2, 3, 4],
+            ("col0", "c"): [2, 3, 4, 5],
+            ("col1", "a"): [3, 4, 5, 6],
+        },
+        df_mode=df_mode_pair[1],
+    )
+
+    eval_general(
+        (modin_df1, modin_df2),
+        (pandas_df1, pandas_df2),
+        lambda dfs: dfs[0].merge(dfs[1], on=[("col0", "a")]),
+    )
+
+
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_where(df_mode_pair):
+    columns = list("abcdefghij")
+
+    frame_data = random_state.randn(100, 10)
+    modin_df_1, pandas_df_1 = create_test_df_in_defined_mode(
+        frame_data, columns=columns, df_mode=df_mode_pair[0]
+    )
+    modin_df_2, pandas_df_2 = create_test_df_in_defined_mode(
+        frame_data, columns=columns, df_mode=df_mode_pair[1]
+    )
+    pandas_cond_df = pandas_df_2 % 5 < 2
+    modin_cond_df = modin_df_2 % 5 < 2
+
+    pandas_result = pandas_df_1.where(pandas_cond_df, -pandas_df_2)
+    modin_result = modin_df_1.where(modin_cond_df, -modin_df_2)
+    assert all((to_pandas(modin_result) == pandas_result).all())
+
+    # test case when other is Series
+    other_data = random_state.randn(len(pandas_df_1))
+    modin_other, pandas_other = create_test_series_in_defined_mode(
+        other_data, df_mode=df_mode_pair[0]
+    )
+    pandas_result = pandas_df_1.where(pandas_cond_df, pandas_other, axis=0)
+    modin_result = modin_df_1.where(modin_cond_df, modin_other, axis=0)
+    df_equals(modin_result, pandas_result)
+
+    # Test that we choose the right values to replace when `other` == `True`
+    # everywhere.
+    other_data = np.full(shape=pandas_df_1.shape, fill_value=True)
+    modin_other, pandas_other = create_test_df_in_defined_mode(
+        other_data, columns=columns, df_mode=df_mode_pair[0]
+    )
+    pandas_result = pandas_df_1.where(pandas_cond_df, pandas_other)
+    modin_result = modin_df_1.where(modin_cond_df, modin_other)
+    df_equals(modin_result, pandas_result)
+
+    other = pandas_df_1.loc[3]
+    pandas_result = pandas_df_1.where(pandas_cond_df, other, axis=1)
+    modin_result = modin_df_1.where(modin_cond_df, other, axis=1)
+    assert all((to_pandas(modin_result) == pandas_result).all())
+
+    other = pandas_df_1["e"]
+    pandas_result = pandas_df_1.where(pandas_cond_df, other, axis=0)
+    modin_result = modin_df_1.where(modin_cond_df, other, axis=0)
+    assert all((to_pandas(modin_result) == pandas_result).all())
+
+    pandas_result = pandas_df_1.where(pandas_df_2 < 2, True)
+    modin_result = modin_df_1.where(modin_df_2 < 2, True)
+    assert all((to_pandas(modin_result) == pandas_result).all())
+
+
+@pytest.mark.parametrize("align_axis", ["index", "columns"])
+@pytest.mark.parametrize("keep_shape", [False, True])
+@pytest.mark.parametrize("keep_equal", [False, True])
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_compare(align_axis, keep_shape, keep_equal, df_mode_pair):
+    kwargs = {
+        "align_axis": align_axis,
+        "keep_shape": keep_shape,
+        "keep_equal": keep_equal,
+    }
+    frame_data1 = random_state.randn(100, 10)
+    frame_data2 = random_state.randn(100, 10)
+    modin_df, pandas_df = create_test_df_in_defined_mode(
+        frame_data1, columns=list("abcdefghij"), df_mode=df_mode_pair[0]
+    )
+    modin_df2, pandas_df2 = create_test_df_in_defined_mode(
+        frame_data2, columns=list("abcdefghij"), df_mode=df_mode_pair[0]
+    )
+    modin_result = modin_df.compare(modin_df2, **kwargs)
+    pandas_result = pandas_df.compare(pandas_df2, **kwargs)
+    assert to_pandas(modin_result).equals(pandas_result)
+
+    modin_result = modin_df2.compare(modin_df, **kwargs)
+    pandas_result = pandas_df2.compare(pandas_df, **kwargs)
+    assert to_pandas(modin_result).equals(pandas_result)
+
+    series_data1 = ["a", "b", "c", "d", "e"]
+    series_data2 = ["a", "a", "c", "b", "e"]
+    modin_series1, pandas_series1 = create_test_series_in_defined_mode(
+        series_data1, df_mode=df_mode_pair[0]
+    )
+    modin_series2, pandas_series2 = create_test_series_in_defined_mode(
+        series_data2, df_mode=df_mode_pair[1]
+    )
+
+    modin_result = modin_series1.compare(modin_series2, **kwargs)
+    pandas_result = pandas_series1.compare(pandas_series2, **kwargs)
+    assert to_pandas(modin_result).equals(pandas_result)
+
+    modin_result = modin_series2.compare(modin_series1, **kwargs)
+    pandas_result = pandas_series2.compare(pandas_series1, **kwargs)
+    assert to_pandas(modin_result).equals(pandas_result)
diff --git a/modin/tests/pandas/native_df_mode/test_map_metadata.py b/modin/tests/pandas/native_df_mode/test_map_metadata.py
new file mode 100644
index 00000000000..e9e460ffbc8
--- /dev/null
+++ b/modin/tests/pandas/native_df_mode/test_map_metadata.py
@@ -0,0 +1,258 @@
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+
+from itertools import product
+
+import matplotlib
+import numpy as np
+import pandas
+import pytest
+
+import modin.pandas as pd
+from modin.config import NativeDataframeMode, NPartitions, StorageFormat
+from modin.tests.pandas.native_df_mode.utils import (
+    create_test_df_in_defined_mode,
+    create_test_series_in_defined_mode,
+)
+from modin.tests.pandas.utils import (
+    RAND_HIGH,
+    RAND_LOW,
+    axis_keys,
+    axis_values,
+    default_to_pandas_ignore_string,
+    df_equals,
+    eval_general,
+    name_contains,
+    numeric_dfs,
+    random_state,
+    test_data,
+    test_data_keys,
+    test_data_values,
+)
+
+NPartitions.put(4)
+
+# Force matplotlib to not use any Xwindows backend.
+matplotlib.use("Agg")
+
+# Our configuration in pytest.ini requires that we explicitly catch all
+# instances of defaulting to pandas, but some test modules, like this one,
+# have too many such instances.
+pytestmark = pytest.mark.filterwarnings(default_to_pandas_ignore_string)
+
+
+def eval_insert(modin_df, pandas_df, **kwargs):
+    if "col" in kwargs and "column" not in kwargs:
+        kwargs["column"] = kwargs.pop("col")
+    _kwargs = {"loc": 0, "column": "New column"}
+    _kwargs.update(kwargs)
+
+    eval_general(
+        modin_df,
+        pandas_df,
+        operation=lambda df, **kwargs: df.insert(**kwargs),
+        __inplace__=True,
+        **_kwargs,
+    )
+
+
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_empty_df(df_mode_pair):
+    modin_df, pd_df = create_test_df_in_defined_mode(None, df_mode=df_mode_pair[0])
+    md_series, pd_series = create_test_series_in_defined_mode(
+        [1, 2, 3, 4, 5], df_mode=df_mode_pair[1]
+    )
+    modin_df["a"] = md_series
+    pd_df["a"] = pd_series
+    df_equals(modin_df, pd_df)
+
+
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_astype(df_mode_pair):
+    td = pandas.DataFrame(test_data["int_data"])[["col1", "index", "col3", "col4"]]
+    modin_df, pandas_df = create_test_df_in_defined_mode(
+        td.values,
+        index=td.index,
+        columns=td.columns,
+        df_mode=df_mode_pair[0],
+    )
+
+    def astype_func(df):
+        md_ser, pd_ser = create_test_series_in_defined_mode(
+            [str, str], index=["col1", "col1"], df_mode=df_mode_pair[1]
+        )
+        if isinstance(df, pd.DataFrame):
+            return df.astype(md_ser)
+        else:
+            return df.astype(pd_ser)
+
+    # The dtypes series must have a unique index.
+    eval_general(
+        modin_df,
+        pandas_df,
+        astype_func,
+        expected_exception=ValueError(
+            "cannot reindex on an axis with duplicate labels"
+        ),
+    )
+
+
+###########################################################################
+
+
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_convert_dtypes_5653(df_mode_pair):
+    modin_part1, _ = create_test_df_in_defined_mode(
+        {"col1": ["a", "b", "c", "d"]}, df_mode=df_mode_pair[0]
+    )
+    modin_part2, _ = create_test_df_in_defined_mode(
+        {"col1": [None, None, None, None]}, df_mode=df_mode_pair[1]
+    )
+    modin_df = pd.concat([modin_part1, modin_part2])
+    if StorageFormat.get() == "Pandas" and NativeDataframeMode.get() == "Default":
+        assert modin_df._query_compiler._modin_frame._partitions.shape == (2, 1)
+    modin_df = modin_df.convert_dtypes()
+    assert len(modin_df.dtypes) == 1
+    assert modin_df.dtypes.iloc[0] == "string"
+
+
+@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
+@pytest.mark.parametrize("axis", axis_values, ids=axis_keys)
+@pytest.mark.parametrize("bound_type", ["list", "series"], ids=["list", "series"])
+@pytest.mark.exclude_in_sanity
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_clip(request, data, axis, bound_type, df_mode_pair):
+    modin_df, pandas_df = create_test_df_in_defined_mode(data, df_mode=df_mode_pair[0])
+
+    if name_contains(request.node.name, numeric_dfs):
+        ind_len = (
+            len(modin_df.index)
+            if not pandas.DataFrame()._get_axis_number(axis)
+            else len(modin_df.columns)
+        )
+
+        lower = random_state.randint(RAND_LOW, RAND_HIGH, ind_len)
+        upper = random_state.randint(RAND_LOW, RAND_HIGH, ind_len)
+
+        if bound_type == "series":
+            modin_lower, pandas_lower = create_test_series_in_defined_mode(
+                lower, df_mode=df_mode_pair[1]
+            )
+            modin_upper, pandas_upper = create_test_series_in_defined_mode(
+                upper, df_mode=df_mode_pair[0]
+            )
+        else:
+            modin_lower = pandas_lower = lower
+            modin_upper = pandas_upper = upper
+
+        # test lower and upper list bound on each column
+        modin_result = modin_df.clip(modin_lower, modin_upper, axis=axis)
+        pandas_result = pandas_df.clip(pandas_lower, pandas_upper, axis=axis)
+        df_equals(modin_result, pandas_result)
+
+        # test only upper list bound on each column
+        modin_result = modin_df.clip(np.nan, modin_upper, axis=axis)
+        pandas_result = pandas_df.clip(np.nan, pandas_upper, axis=axis)
+        df_equals(modin_result, pandas_result)
+
+        with pytest.raises(ValueError):
+            modin_df.clip(lower=[1, 2, 3], axis=None)
+
+
+@pytest.mark.parametrize(
+    "data, other_data",
+    [
+        ({"A": [1, 2, 3], "B": [400, 500, 600]}, {"B": [4, 5, 6], "C": [7, 8, 9]}),
+        ({"C": [1, 2, 3], "B": [400, 500, 600]}, {"B": [4, 5, 6], "A": [7, 8, 9]}),
+        (
+            {"A": ["a", "b", "c"], "B": ["x", "y", "z"]},
+            {"B": ["d", "e", "f", "g", "h", "i"]},
+        ),
+        ({"A": [1, 2, 3], "B": [400, 500, 600]}, {"B": [4, np.nan, 6]}),
+    ],
+)
+@pytest.mark.parametrize("errors", ["raise", "ignore"])
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_update(data, other_data, errors, df_mode_pair):
+    modin_df, pandas_df = create_test_df_in_defined_mode(data, df_mode=df_mode_pair[0])
+    other_modin_df, other_pandas_df = create_test_df_in_defined_mode(
+        other_data, df_mode=df_mode_pair[1]
+    )
+    expected_exception = None
+    if errors == "raise":
+        expected_exception = ValueError("Data overlaps.")
+    eval_general(
+        modin_df,
+        pandas_df,
+        lambda df: (
+            df.update(other_modin_df, errors=errors)
+            if isinstance(df, pd.DataFrame)
+            else df.update(other_pandas_df, errors=errors)
+        ),
+        __inplace__=True,
+        expected_exception=expected_exception,
+    )
+
+
+@pytest.mark.parametrize(
+    "get_index",
+    [
+        pytest.param(lambda idx: None, id="None_idx"),
+        pytest.param(lambda idx: ["a", "b", "c"], id="No_intersection_idx"),
+        pytest.param(lambda idx: idx, id="Equal_idx"),
+        pytest.param(lambda idx: idx[::-1], id="Reversed_idx"),
+    ],
+)
+@pytest.mark.parametrize(
+    "get_columns",
+    [
+        pytest.param(lambda idx: None, id="None_idx"),
+        pytest.param(lambda idx: ["a", "b", "c"], id="No_intersection_idx"),
+        pytest.param(lambda idx: idx, id="Equal_idx"),
+        pytest.param(lambda idx: idx[::-1], id="Reversed_idx"),
+    ],
+)
+@pytest.mark.parametrize("dtype", [None, "str"])
+@pytest.mark.exclude_in_sanity
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_constructor_from_modin_series(get_index, get_columns, dtype, df_mode_pair):
+    modin_df, pandas_df = create_test_df_in_defined_mode(
+        test_data_values[0], df_mode=df_mode_pair[0]
+    )
+
+    modin_data = {f"new_col{i}": modin_df.iloc[:, i] for i in range(modin_df.shape[1])}
+    pandas_data = {
+        f"new_col{i}": pandas_df.iloc[:, i] for i in range(pandas_df.shape[1])
+    }
+
+    index = get_index(modin_df.index)
+    columns = get_columns(list(modin_data.keys()))
+
+    new_modin = pd.DataFrame(modin_data, index=index, columns=columns, dtype=dtype)
+    new_pandas = pandas.DataFrame(
+        pandas_data, index=index, columns=columns, dtype=dtype
+    )
+    df_equals(new_modin, new_pandas)
diff --git a/modin/tests/pandas/native_df_mode/test_pickle.py b/modin/tests/pandas/native_df_mode/test_pickle.py
new file mode 100644
index 00000000000..cf9b4dfcb9c
--- /dev/null
+++ b/modin/tests/pandas/native_df_mode/test_pickle.py
@@ -0,0 +1,73 @@
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+from itertools import product
+
+import numpy as np
+import pytest
+
+import modin.pandas as pd
+from modin.config import NativeDataframeMode, PersistentPickle
+from modin.tests.pandas.native_df_mode.utils import create_test_df_in_defined_mode
+from modin.tests.pandas.utils import df_equals
+
+
+@pytest.fixture
+def modin_df():
+    return pd.DataFrame({"col1": np.arange(1000), "col2": np.arange(2000, 3000)})
+
+
+@pytest.fixture
+def modin_column(modin_df):
+    return modin_df["col1"]
+
+
+@pytest.fixture(params=[True, False])
+def persistent(request):
+    old = PersistentPickle.get()
+    PersistentPickle.put(request.param)
+    yield request.param
+    PersistentPickle.put(old)
+
+
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test__reduce__(df_mode_pair):
+    # `DataFrame.__reduce__` will be called implicitly when lambda expressions are
+    # pre-processed for the distributed engine.
+    dataframe_data = ["Major League Baseball", "National Basketball Association"]
+    abbr_md, abbr_pd = create_test_df_in_defined_mode(
+        dataframe_data, index=["MLB", "NBA"], df_mode=df_mode_pair[0]
+    )
+
+    dataframe_data = {
+        "name": ["Mariners", "Lakers"] * 500,
+        "league_abbreviation": ["MLB", "NBA"] * 500,
+    }
+    teams_md, teams_pd = create_test_df_in_defined_mode(
+        dataframe_data, df_mode=df_mode_pair[1]
+    )
+
+    result_md = (
+        teams_md.set_index("name")
+        .league_abbreviation.apply(lambda abbr: abbr_md[0].loc[abbr])
+        .rename("league")
+    )
+
+    result_pd = (
+        teams_pd.set_index("name")
+        .league_abbreviation.apply(lambda abbr: abbr_pd[0].loc[abbr])
+        .rename("league")
+    )
+    df_equals(result_md, result_pd)
diff --git a/modin/tests/pandas/native_df_mode/test_window.py b/modin/tests/pandas/native_df_mode/test_window.py
new file mode 100644
index 00000000000..7e8e5da9342
--- /dev/null
+++ b/modin/tests/pandas/native_df_mode/test_window.py
@@ -0,0 +1,101 @@
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+from itertools import product
+
+import matplotlib
+import numpy as np
+import pandas
+import pytest
+
+import modin.pandas as pd
+from modin.config import NativeDataframeMode, NPartitions
+from modin.tests.pandas.native_df_mode.utils import create_test_df_in_defined_mode
+from modin.tests.pandas.utils import df_equals
+
+NPartitions.put(4)
+
+# Force matplotlib to not use any Xwindows backend.
+matplotlib.use("Agg")
+
+
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_fillna_4660(df_mode_pair):
+    modin_df_1, pandas_df_1 = create_test_df_in_defined_mode(
+        {"a": ["a"], "b": ["b"], "c": [pd.NA]},
+        index=["row1"],
+        df_mode=df_mode_pair[0],
+    )
+    modin_df_2, pandas_df_2 = create_test_df_in_defined_mode(
+        {"a": ["a"], "b": ["b"], "c": [pd.NA]},
+        index=["row1"],
+        df_mode=df_mode_pair[1],
+    )
+    modin_result = modin_df_1["c"].fillna(modin_df_2["b"])
+    pandas_result = pandas_df_1["c"].fillna(pandas_df_2["b"])
+    df_equals(modin_result, pandas_result)
+
+
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_fillna_dict_series(df_mode_pair):
+    frame_data = {
+        "a": [np.nan, 1, 2, np.nan, np.nan],
+        "b": [1, 2, 3, np.nan, np.nan],
+        "c": [np.nan, 1, 2, 3, 4],
+    }
+    df = pandas.DataFrame(frame_data)
+    modin_df = pd.DataFrame(frame_data)
+    modin_df_1, pandas_df_1 = create_test_df_in_defined_mode(
+        frame_data, df_mode=df_mode_pair[0]
+    )
+    modin_df_2, pandas_df_2 = create_test_df_in_defined_mode(
+        frame_data, df_mode=df_mode_pair[1]
+    )
+
+    df_equals(modin_df.fillna({"a": 0, "b": 5}), df.fillna({"a": 0, "b": 5}))
+
+    df_equals(
+        modin_df.fillna({"a": 0, "b": 5, "d": 7}),
+        df.fillna({"a": 0, "b": 5, "d": 7}),
+    )
+
+    # Series treated same as dict
+    df_equals(
+        modin_df_1.fillna(modin_df_2.max()), pandas_df_1.fillna(pandas_df_2.max())
+    )
+
+
+@pytest.mark.parametrize(
+    "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2))
+)
+def test_fillna_dataframe(df_mode_pair):
+    frame_data = {
+        "a": [np.nan, 1, 2, np.nan, np.nan],
+        "b": [1, 2, 3, np.nan, np.nan],
+        "c": [np.nan, 1, 2, 3, 4],
+    }
+    modin_df_1, pandas_df_1 = create_test_df_in_defined_mode(
+        frame_data, index=list("VWXYZ"), df_mode=df_mode_pair[0]
+    )
+    modin_df_2, pandas_df_2 = create_test_df_in_defined_mode(
+        {"a": [np.nan, 10, 20, 30, 40], "b": [50, 60, 70, 80, 90], "foo": ["bar"] * 5},
+        index=list("VWXuZ"),
+        df_mode=df_mode_pair[1],
+    )
+
+    # only those columns and indices which are shared get filled
+    df_equals(modin_df_1.fillna(modin_df_2), pandas_df_1.fillna(pandas_df_2))
diff --git a/modin/tests/pandas/native_df_mode/utils.py b/modin/tests/pandas/native_df_mode/utils.py
new file mode 100644
index 00000000000..9e9d77ac1f7
--- /dev/null
+++ b/modin/tests/pandas/native_df_mode/utils.py
@@ -0,0 +1,133 @@
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+from modin.config import Engine
+from modin.config.pubsub import context
+from modin.tests.pandas.utils import (
+    NoModinException,
+    create_test_dfs,
+    create_test_series,
+    df_equals,
+)
+from modin.utils import try_cast_to_pandas
+
+
+def create_test_df_in_defined_mode(
+    *args, post_fn=None, backend=None, df_mode=None, **kwargs
+):
+    with context(NativeDataframeMode=df_mode):
+        return create_test_dfs(*args, post_fn=post_fn, backend=backend, **kwargs)
+
+
+def create_test_series_in_defined_mode(
+    vals, sort=False, backend=None, df_mode=None, **kwargs
+):
+    with context(NativeDataframeMode=df_mode):
+        return create_test_series(vals, sort=sort, backend=backend, **kwargs)
+
+
+def eval_general_interop(
+    data,
+    backend,
+    operation,
+    df_mode_pair,
+    comparator=df_equals,
+    __inplace__=False,
+    expected_exception=None,
+    check_kwargs_callable=True,
+    md_extra_kwargs=None,
+    comparator_kwargs=None,
+    **kwargs,
+):
+    df_mode1, df_mode2 = df_mode_pair
+    modin_df1, pandas_df1 = create_test_df_in_defined_mode(
+        data, backend=backend, df_mode=df_mode1
+    )
+    modin_df2, pandas_df2 = create_test_df_in_defined_mode(
+        data, backend=backend, df_mode=df_mode2
+    )
+    md_kwargs, pd_kwargs = {}, {}
+
+    def execute_callable(fn, inplace=False, md_kwargs={}, pd_kwargs={}):
+        try:
+            pd_result = fn(pandas_df1, pandas_df2, **pd_kwargs)
+        except Exception as pd_e:
+            try:
+                if inplace:
+                    _ = fn(modin_df1, modin_df2, **md_kwargs)
+                    try_cast_to_pandas(modin_df1)  # force materialization
+                else:
+                    try_cast_to_pandas(
+                        fn(modin_df1, modin_df2, **md_kwargs)
+                    )  # force materialization
+            except Exception as md_e:
+                assert isinstance(
+                    md_e, type(pd_e)
+                ), "Got Modin Exception type {}, but pandas Exception type {} was expected".format(
+                    type(md_e), type(pd_e)
+                )
+                if expected_exception:
+                    if Engine.get() == "Ray":
+                        from ray.exceptions import RayTaskError
+
+                        # unwrap ray exceptions from remote worker
+                        if isinstance(md_e, RayTaskError):
+                            md_e = md_e.args[0]
+                    assert (
+                        type(md_e) is type(expected_exception)
+                        and md_e.args == expected_exception.args
+                    ), f"not acceptable Modin's exception: [{repr(md_e)}]"
+                    assert (
+                        pd_e.args == expected_exception.args
+                    ), f"not acceptable Pandas' exception: [{repr(pd_e)}]"
+                elif expected_exception is False:
+                    # The only way to disable exception message checking.
+                    pass
+                else:
+                    # It’s not enough that Modin and pandas have the same types of exceptions;
+                    # we need to explicitly specify the instance of an exception
+                    # (using `expected_exception`) in tests so that we can check exception messages.
+                    # This allows us to eliminate situations where exceptions are thrown
+                    # that we don't expect, which could hide different bugs.
+                    raise pd_e
+            else:
+                raise NoModinException(
+                    f"Modin doesn't throw an exception, while pandas does: [{repr(pd_e)}]"
+                )
+        else:
+            md_result = fn(modin_df1, modin_df2, **md_kwargs)
+            return (md_result, pd_result) if not inplace else (modin_df1, pandas_df1)
+
+    for key, value in kwargs.items():
+        if check_kwargs_callable and callable(value):
+            values = execute_callable(value)
+            # that means, that callable raised an exception
+            if values is None:
+                return
+            else:
+                md_value, pd_value = values
+        else:
+            md_value, pd_value = value, value
+
+        md_kwargs[key] = md_value
+        pd_kwargs[key] = pd_value
+
+        if md_extra_kwargs:
+            assert isinstance(md_extra_kwargs, dict)
+            md_kwargs.update(md_extra_kwargs)
+
+    values = execute_callable(
+        operation, md_kwargs=md_kwargs, pd_kwargs=pd_kwargs, inplace=__inplace__
+    )
+    if values is not None:
+        comparator(*values, **(comparator_kwargs or {}))

From 156cd51fd779fbff5a9e5da928c5b3624114b185 Mon Sep 17 00:00:00 2001
From: Arun Jose <40291569+arunjose696@users.noreply.github.com>
Date: Fri, 6 Sep 2024 15:33:08 +0200
Subject: [PATCH 16/20] DOCS-#7382: Add documentation on how to use Modin
 Native query compiler (#7386)

Co-authored-by: Iaroslav Igoshev <Poolliver868@mail.ru>
Signed-off-by: arunjose696 <arunjose696@gmail.com>
---
 docs/usage_guide/optimization_notes/index.rst | 31 +++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/docs/usage_guide/optimization_notes/index.rst b/docs/usage_guide/optimization_notes/index.rst
index 0dcbe5a25d7..6e9d1ca7d63 100644
--- a/docs/usage_guide/optimization_notes/index.rst
+++ b/docs/usage_guide/optimization_notes/index.rst
@@ -314,6 +314,37 @@ Copy-pastable example, showing how mixing pandas and Modin DataFrames in a singl
   # Possible output: TypeError
 
 
+Execute DataFrame operations using NativeQueryCompiler
+""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+By default, Modin distributes data across partitions and performs operations
+using the ``PandasQueryCompiler``. However, for certain scenarios such as handling small or empty DataFrames,
+distributing them may introduce unnecessary overhead. In such cases, it's more efficient to default
+to pandas at the query compiler layer. This can be achieved by setting the ``cfg.NativeDataframeMode``
+:doc:`configuration variable: </flow/modin/config>` to ``Pandas``. When set to ``Pandas``, all operations in Modin default to pandas, and the DataFrames are not distributed,
+avoiding additional overhead. This configuration can be toggled on or off depending on whether
+DataFrame distribution is required.
+
+DataFrames created while the ``NativeDataframeMode`` is active will continue to use the ``NativeQueryCompiler``
+even after the config is disabled. Modin supports interoperability between distributed Modin DataFrames and
+those using the ``NativeQueryCompiler``.
+
+.. code-block:: python
+
+  import modin.pandas as pd
+  import modin.config as cfg
+
+  # This dataframe will be distributed and use `PandasQueryCompiler` by default
+  df_distributed = pd.DataFrame(...)
+
+  # Set mode to "Pandas" to avoid distribution and use `NativeQueryCompiler`
+  cfg.NativeDataframeMode.put("Pandas")
+  df_native_qc = pd.DataFrame(...)
+
+  # Revert to default settings for distributed dataframes
+  cfg.NativeDataframeMode.put("Default")
+  df_distributed = pd.DataFrame(...)
+
 Operation-specific optimizations
 """"""""""""""""""""""""""""""""
 

From f3c0a63579bb6cee861ea04344ddedd72221634e Mon Sep 17 00:00:00 2001
From: Iaroslav Igoshev <iaroslav.igoshev@intel.com>
Date: Fri, 6 Sep 2024 18:34:43 +0200
Subject: [PATCH 17/20] FIX-#7387: Limit the number of pytest workers for tests
 with Ray engine on Windows (#7388)

Signed-off-by: Igoshev, Iaroslav <iaroslav.igoshev@intel.com>
---
 .github/workflows/ci.yml | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 8fb26225613..9b0d5b49783 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -443,12 +443,28 @@ jobs:
       - run: python -m pytest -n 2 modin/tests/experimental/test_pipeline.py
         if: matrix.engine == 'python' || matrix.test_task == 'group_1'
       - uses: ./.github/actions/run-core-tests/group_1
+        with:
+          # When running with Ray engine on Windows using 2 pytest workers tests are failing in CI.
+          # See https://github.com/modin-project/modin/issues/7387.
+          parallel: ${{ matrix.engine == 'ray' && matrix.os == 'windows' && '-n 1' || '-n 2' }}
         if: matrix.engine == 'python' || matrix.test_task == 'group_1'
       - uses: ./.github/actions/run-core-tests/group_2
+        with:
+          # When running with Ray engine on Windows using 2 pytest workers tests are failing in CI.
+          # See https://github.com/modin-project/modin/issues/7387.
+          parallel: ${{ matrix.engine == 'ray' && matrix.os == 'windows' && '-n 1' || '-n 2' }}
         if: matrix.engine == 'python' || matrix.test_task == 'group_2'
       - uses: ./.github/actions/run-core-tests/group_3
+        with:
+          # When running with Ray engine on Windows using 2 pytest workers tests are failing in CI.
+          # See https://github.com/modin-project/modin/issues/7387.
+          parallel: ${{ matrix.engine == 'ray' && matrix.os == 'windows' && '-n 1' || '-n 2' }}
         if: matrix.engine == 'python' || matrix.test_task == 'group_3'
       - uses: ./.github/actions/run-core-tests/group_4
+        with:
+          # When running with Ray engine on Windows using 2 pytest workers tests are failing in CI.
+          # See https://github.com/modin-project/modin/issues/7387.
+          parallel: ${{ matrix.engine == 'ray' && matrix.os == 'windows' && '-n 1' || '-n 2' }}
         if: matrix.engine == 'python' || matrix.test_task == 'group_4'
       - run: python -m pytest -n 2 modin/tests/numpy
         if: matrix.engine == 'python' || matrix.test_task == 'group_4'

From 33577098afa51d6c96ac154af88f9680fc4abf8b Mon Sep 17 00:00:00 2001
From: Iaroslav Igoshev <iaroslav.igoshev@intel.com>
Date: Mon, 9 Sep 2024 16:32:37 +0200
Subject: [PATCH 18/20] FIX-#7389: Fix uploading artifacts (#7390)

Signed-off-by: Igoshev, Iaroslav <iaroslav.igoshev@intel.com>
---
 .github/actions/upload-coverage/action.yml | 1 +
 .github/workflows/ci.yml                   | 2 ++
 .github/workflows/fuzzydata-test.yml       | 1 +
 .github/workflows/publish-to-pypi.yml      | 1 +
 4 files changed, 5 insertions(+)

diff --git a/.github/actions/upload-coverage/action.yml b/.github/actions/upload-coverage/action.yml
index 07c08984111..3d918f12c1c 100644
--- a/.github/actions/upload-coverage/action.yml
+++ b/.github/actions/upload-coverage/action.yml
@@ -15,3 +15,4 @@ runs:
       with:
         name: coverage-data-${{ env.COVERAGE_UUID }}
         path: .coverage*
+        include-hidden-files: true
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 9b0d5b49783..450d1d01a5d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -235,6 +235,7 @@ jobs:
         with:
           name: Benchmarks log
           path: asv_bench/benchmarks.log
+          include-hidden-files: true
         if: failure()
 
   execution-filter:
@@ -737,6 +738,7 @@ jobs:
         with:
           name: coverage-data
           pattern: coverage-data-*
+          include-hidden-files: true
           delete-merged: true
 
   upload-coverage:
diff --git a/.github/workflows/fuzzydata-test.yml b/.github/workflows/fuzzydata-test.yml
index c9b2b2a4a29..2dd86ad9dd6 100644
--- a/.github/workflows/fuzzydata-test.yml
+++ b/.github/workflows/fuzzydata-test.yml
@@ -48,3 +48,4 @@ jobs:
            name: fuzzydata-test-workflow-${{matrix.engine}}
            path: /tmp/fuzzydata-test-wf-${{matrix.engine}}/* # Must match output dir in test_fuzzydata.py
            if-no-files-found: error
+           include-hidden-files: true
diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml
index 514a72481cb..e2beac1eac9 100644
--- a/.github/workflows/publish-to-pypi.yml
+++ b/.github/workflows/publish-to-pypi.yml
@@ -38,6 +38,7 @@ jobs:
       with:
         name: modin-wheel-and-source-tarball 
         path: ./dist/
+        include-hidden-files: true
 
     - name: Publish Modin wheel to PyPI
       if: github.event_name == 'push'

From 3e951a63084a9cbfd5e73f6f36653ee12d2a2bfa Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoliimyachev@mail.com>
Date: Wed, 11 Sep 2024 14:51:21 +0200
Subject: [PATCH 19/20] Release version 0.32.0 (#7393)

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>

From 05f5e7d2f9aedcecc3f26e42be76d94ec5faf713 Mon Sep 17 00:00:00 2001
From: Jonathan Shi <jhshi07@gmail.com>
Date: Sat, 14 Sep 2024 06:43:16 -0700
Subject: [PATCH 20/20] FIX-#7375: Fix Series.duplicated dropping name (#7395)

* FIX-#7375: Fix Series.duplicated dropping name

Signed-off-by: Jonathan Shi <jhshi07@gmail.com>

* Update modin/pandas/series.py

Co-authored-by: Anatoly Myachev <anatoliimyachev@mail.com>

---------

Signed-off-by: Jonathan Shi <jhshi07@gmail.com>
Co-authored-by: Anatoly Myachev <anatoliimyachev@mail.com>
---
 modin/pandas/series.py            | 7 ++++++-
 modin/tests/pandas/test_series.py | 6 ++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index d18a0bec778..00083200762 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -1022,7 +1022,12 @@ def duplicated(self, keep="first") -> Series:  # noqa: PR01, RT01, D200
         """
         Indicate duplicate Series values.
         """
-        return self.to_frame().duplicated(keep=keep)
+        name = self.name
+        result = self.to_frame().duplicated(keep=keep)
+        # DataFrame.duplicated drops the name, so we need to manually restore it
+        if name is not None:
+            result.name = name
+        return result
 
     def eq(
         self, other, level=None, fill_value=None, axis=0
diff --git a/modin/tests/pandas/test_series.py b/modin/tests/pandas/test_series.py
index 9dd8b98aac3..b283a7a1ede 100644
--- a/modin/tests/pandas/test_series.py
+++ b/modin/tests/pandas/test_series.py
@@ -1942,6 +1942,12 @@ def test_duplicated(data, keep):
     df_equals(modin_result, pandas_series.duplicated(keep=keep))
 
 
+def test_duplicated_keeps_name_issue_7375():
+    # Ensure that the name property of a series is preserved across duplicated
+    modin_series, pandas_series = create_test_series([1, 2, 3, 1], name="a")
+    df_equals(modin_series.duplicated(), pandas_series.duplicated())
+
+
 @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
 def test_empty(data):
     modin_series, pandas_series = create_test_series(data)