Fix CI Build Issues (#110)

* Fix crossfit build Signed-off-by: Vibhu Jawa <[email protected]> * Fix crossfit build Signed-off-by: Vibhu Jawa <[email protected]> * Test on 3.10+ only Signed-off-by: Vibhu Jawa <[email protected]> * Test on 3.10+ only Signed-off-by: Vibhu Jawa <[email protected]> * Update rapids to 25.02 * Try a different docker image Signed-off-by: Vibhu Jawa <[email protected]> * Switch dask min dependency to 2024.12.1 Signed-off-by: Vibhu Jawa <[email protected]> --------- Signed-off-by: Vibhu Jawa <[email protected]> Signed-off-by: Vibhu Jawa <[email protected]> Co-authored-by: Vibhu Jawa <[email protected]>
rapidsai · Feb 19, 2025 · 745208d · 745208d
1 parent 654e1dc
commit 745208d
Show file tree

Hide file tree

Showing 7 changed files with 15 additions and 69 deletions.
diff --git a/.github/workflows/base.yml b/.github/workflows/base.yml
@@ -14,7 +14,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.8", "3.9", "3.10"]
+        python-version: ["3.10", "3.11", "3.12"]
     steps:
     - uses: actions/checkout@v3
     - name: Set up Python ${{ matrix.python-version }}

diff --git a/.github/workflows/cf_backends.yml b/.github/workflows/cf_backends.yml
@@ -14,7 +14,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: [3.8]
+        python-version: ["3.10"]
         os: [ubuntu-latest]
         torch-version: ["~=1.11.0", "~=1.12.0", "~=1.13.0"]
 

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
@@ -13,16 +13,16 @@ jobs:
     needs:
       - python-gpu-tests
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.08
+    uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.02
 
   python-gpu-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.08
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02
     with:
       build_type: pull-request
       node_type: "gpu-v100-latest-1"
       arch: "amd64"
-      container_image: "rapidsai/base:24.08-cuda12.2-py3.11"
+      container_image: "rapidsai/base:25.02-cuda12.8-py3.11"
       run_script: "ci/test_gpu.sh"
 
   # benchmark:

diff --git a/conda/environments/cuda_dev.yaml b/conda/environments/cuda_dev.yaml
@@ -7,17 +7,17 @@ dependencies:
   - bandit
   - black
   - cuda-version=12.0
-  - cudf>=24.12
-  - cuml>=24.12
+  - cudf>=25.02
+  - cuml>=25.02
   - cupy>=12.0.0
-  - cuvs>=24.12
-  - dask-cuda>=24.12
-  - dask-cudf>=24.12
+  - cuvs>=25.02
+  - dask-cuda>=25.02
+  - dask-cudf>=25.02
   - flake8
   - isort
   - pip
   - pre_commit
-  - pylibraft>=24.12
+  - pylibraft>=25.02
   - pytest
   - pytest-benchmark
   - pytest-cov>=2

diff --git a/crossfit/backend/dask/cluster.py b/crossfit/backend/dask/cluster.py
@@ -20,7 +20,6 @@
 
 import dask
 import distributed
-from dask.dataframe.optimize import optimize as dd_optimize
 from dask.distributed import Client, get_client
 
 from crossfit.backend.gpu import HAS_GPU
@@ -93,60 +92,6 @@ def increase_gc_threshold():
     gc.set_threshold(g0 * 3, g1 * 3, g2 * 3)
 
 
-def ensure_optimize_dataframe_graph(ddf=None, dsk=None, keys=None):
-    """Perform HLG DataFrame optimizations
-
-    If `ddf` is specified, an optimized Dataframe
-    collection will be returned. If `dsk` and `keys`
-    are specified, an optimized graph will be returned.
-
-    These optimizations are performed automatically
-    when a DataFrame collection is computed/persisted,
-    but they are NOT always performed when statistics
-    are computed. The purpose of this utility is to
-    ensure that the Dataframe-based optimizations are
-    always applied.
-
-    Parameters
-    ----------
-    ddf : dask_cudf.DataFrame, optional
-        The dataframe to optimize, by default None
-    dsk : dask.highlevelgraph.HighLevelGraph, optional
-        Dask high level graph, by default None
-    keys : List[str], optional
-        The keys to optimize, by default None
-
-    Returns
-    -------
-    Union[dask_cudf.DataFrame, dask.highlevelgraph.HighLevelGraph]
-        A dask_cudf DataFrame or dask HighLevelGraph depending
-        on the parameters provided.
-
-    Raises
-    ------
-    ValueError
-        If ddf is not provided and one of dsk or keys are None.
-    """
-
-    if ddf is None:
-        if dsk is None or keys is None:
-            raise ValueError("Must specify both `dsk` and `keys` if `ddf` is not supplied.")
-    dsk = ddf.dask if dsk is None else dsk
-    keys = ddf.__dask_keys__() if keys is None else keys
-
-    if isinstance(dsk, dask.highlevelgraph.HighLevelGraph):
-        with dask.config.set({"optimization.fuse.active": False}):
-            dsk = dd_optimize(dsk, keys=keys)
-
-    if ddf is None:
-        # Return optimized graph
-        return dsk
-
-    # Return optimized ddf
-    ddf.dask = dsk
-    return ddf
-
-
 class Distributed:
     """Distributed-Execution Context Manager
 

diff --git a/requirements/base.txt b/requirements/base.txt
@@ -1,9 +1,10 @@
 astunparse
 pandas
+pyarrow
 numpy
 numba
-dask
-distributed>=2022.11.1
+dask[dataframe]>=2024.12.1
+distributed>=2024.12.1
 scikit-learn>=1.2.0
 fsspec>=2022.7.1
 tensorflow_metadata

diff --git a/setup.py b/setup.py
@@ -78,6 +78,6 @@ def read_requirements(filename):
         **dev_requirements,
         "all": list(itertools.chain(*list(requirements.values()))),
     },
-    python_requires=">=3.8, <3.13",
+    python_requires=">=3.10, <3.13",
     test_suite="tests",
 )