Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DNM] Try dask-expr #837

Draft
wants to merge 10 commits into
base: main
Choose a base branch
from
3 changes: 3 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,9 @@ jobs:
echo $EXTRA_OPTIONS
echo EXTRA_OPTIONS=$EXTRA_OPTIONS >> $GITHUB_ENV

- name: Install dask-expr
run: python -m pip install git+https://github.com/phofl/dask-expr.git@benchmarks

- name: Run Coiled Runtime Tests
id: test
env:
Expand Down
2 changes: 1 addition & 1 deletion tests/benchmarks/test_csv.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import dask.dataframe as dd
import dask_expr as dd
import pandas as pd

from ..utils_test import run_up_to_nthreads
Expand Down
2 changes: 1 addition & 1 deletion tests/benchmarks/test_h2o.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"""
import os

import dask.dataframe as dd
import dask_expr as dd
import pandas as pd
import pytest

Expand Down
2 changes: 1 addition & 1 deletion tests/benchmarks/test_join.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import dask.dataframe as dd
import dask_expr as dd
import pytest

from ..utils_test import cluster_memory, run_up_to_nthreads, timeseries_of_size, wait
Expand Down
5 changes: 2 additions & 3 deletions tests/benchmarks/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
import uuid

import boto3
import dask.dataframe as dd
import dask.datasets
import dask_expr as dd
import distributed
import fsspec
import pandas
Expand Down Expand Up @@ -79,7 +78,7 @@ def test_read_hive_partitioned_data(parquet_client):
@run_up_to_nthreads("parquet_cluster", 100, reason="fixed dataset")
def test_write_wide_data(parquet_client, s3_url):
# Write a ~700 partition, ~200 GB dataset with a lot of columns
ddf = dask.datasets.timeseries(
ddf = dd.datasets.timeseries(
dtypes={
**{f"name-{i}": str for i in range(25)},
**{f"price-{i}": float for i in range(25)},
Expand Down
2 changes: 1 addition & 1 deletion tests/benchmarks/test_xgboost.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import dask.dataframe as dd
import dask_expr as dd
import numpy as np
import pandas as pd
import pytest
Expand Down
2 changes: 1 addition & 1 deletion tests/runtime/test_xgboost.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import dask.dataframe as dd
import dask_expr as dd
import pytest

# `coiled-runtime=0.0.4` don't contain `xgboost`
Expand Down
6 changes: 3 additions & 3 deletions tests/stability/test_deadlock.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import uuid

import dask
import dask_expr as dx
import distributed
import pytest
from coiled import Cluster
Expand All @@ -27,12 +27,12 @@ def test_repeated_merge_spill(
) as cluster:
with Client(cluster) as client:
with upload_cluster_dump(client), benchmark_all(client):
ddf = dask.datasets.timeseries(
ddf = dx.datasets.timeseries(
"2020",
"2025",
partition_freq="2w",
)
ddf2 = dask.datasets.timeseries(
ddf2 = dx.datasets.timeseries(
"2020",
"2023",
partition_freq="2w",
Expand Down
4 changes: 2 additions & 2 deletions tests/utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@

import dask
import dask.array as da
import dask.dataframe as dd
import dask_expr as dd
import distributed
import numpy as np
import pandas as pd
import pytest
from dask.datasets import timeseries
from dask.sizeof import sizeof
from dask.utils import format_bytes, parse_bytes
from dask_expr.datasets import timeseries


def scaled_array_shape(
Expand Down
2 changes: 1 addition & 1 deletion tests/workflows/test_uber_lyft.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import dask.dataframe as dd
import dask_expr as dd
import pytest


Expand Down