From bf87ef75cbf1d1d45dd6c9796cb36f321d2a48a0 Mon Sep 17 00:00:00 2001 From: Ray Douglass Date: Wed, 7 Aug 2024 10:41:56 -0400 Subject: [PATCH 1/3] Update Changelog [skip ci] --- CHANGELOG.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 264cef755..faa934484 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,30 @@ +# cuspatial 24.08.00 (7 Aug 2024) + +## 🐛 Bug Fixes + +- Unskip taxi notebook from CI ([#1422](https://github.com/rapidsai/cuspatial/pull/1422)) [@harrism](https://github.com/harrism) +- Use CuPy array in `pip_bitmap_column_to_binary_array` ([#1418](https://github.com/rapidsai/cuspatial/pull/1418)) [@isVoid](https://github.com/isVoid) +- Conda: Move `cmake` to `requirements/build` ([#1409](https://github.com/rapidsai/cuspatial/pull/1409)) [@jakirkham](https://github.com/jakirkham) +- test notebooks in 'docs/', make cuspatial_api_examples self-contained, skip long-running notebook, fix some docs ([#1407](https://github.com/rapidsai/cuspatial/pull/1407)) [@jameslamb](https://github.com/jameslamb) +- Fix geopandas upgrades ([#1404](https://github.com/rapidsai/cuspatial/pull/1404)) [@quasiben](https://github.com/quasiben) +- Fix multipolygon geometry iterator. ([#1402](https://github.com/rapidsai/cuspatial/pull/1402)) [@bdice](https://github.com/bdice) +- Change _GeoSeriesUtility._from_data(index=) default to None ([#1400](https://github.com/rapidsai/cuspatial/pull/1400)) [@mroeschke](https://github.com/mroeschke) +- Fix test_intersections.py post cudf refactor ([#1398](https://github.com/rapidsai/cuspatial/pull/1398)) [@mroeschke](https://github.com/mroeschke) +- Allow anonymous user in devcontainer name ([#1396](https://github.com/rapidsai/cuspatial/pull/1396)) [@bdice](https://github.com/bdice) + +## 🛠️ Improvements + +- Ensure _from_data accepts columns objects only ([#1415](https://github.com/rapidsai/cuspatial/pull/1415)) [@mroeschke](https://github.com/mroeschke) +- split up CUDA-suffixed dependencies in dependencies.yaml ([#1414](https://github.com/rapidsai/cuspatial/pull/1414)) [@jameslamb](https://github.com/jameslamb) +- Build and test with CUDA 12.5.1 ([#1405](https://github.com/rapidsai/cuspatial/pull/1405)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA) +- skip CMake 3.30.0 ([#1401](https://github.com/rapidsai/cuspatial/pull/1401)) [@jameslamb](https://github.com/jameslamb) +- Use verify-alpha-spec hook ([#1399](https://github.com/rapidsai/cuspatial/pull/1399)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA) +- Adopt CI/packaging codeowners ([#1397](https://github.com/rapidsai/cuspatial/pull/1397)) [@bdice](https://github.com/bdice) +- Remove text builds of documentation ([#1394](https://github.com/rapidsai/cuspatial/pull/1394)) [@vyasr](https://github.com/vyasr) +- use rapids-build-backend ([#1393](https://github.com/rapidsai/cuspatial/pull/1393)) [@jameslamb](https://github.com/jameslamb) +- Fix forward-merge `branch-24.06` into `branch-24.08` ([#1390](https://github.com/rapidsai/cuspatial/pull/1390)) [@trxcllnt](https://github.com/trxcllnt) +- remove unnecessary 'setuptools' dependencies ([#1389](https://github.com/rapidsai/cuspatial/pull/1389)) [@jameslamb](https://github.com/jameslamb) + # cuspatial 24.06.00 (5 Jun 2024) ## 🚨 Breaking Changes From 3906fb9dd47bc0e2923e471a1319439fed218838 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 7 Aug 2024 05:30:32 -1000 Subject: [PATCH 2/3] Address cudf.DataFrame.insert API change (#1428) closes https://github.com/rapidsai/cuspatial/issues/1427 The API of `cudf.DataFrame.insert` will change in 24.10 to better align with pandas, so adjusting the usage here xref https://github.com/rapidsai/cudf/pull/16402 Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Mark Harris (https://github.com/harrism) URL: https://github.com/rapidsai/cuspatial/pull/1428 --- python/cuspatial/cuspatial/core/geodataframe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cuspatial/cuspatial/core/geodataframe.py b/python/cuspatial/cuspatial/core/geodataframe.py index 90dcaf24e..08f752759 100644 --- a/python/cuspatial/cuspatial/core/geodataframe.py +++ b/python/cuspatial/cuspatial/core/geodataframe.py @@ -270,7 +270,7 @@ def reset_index( if not drop: if not isinstance(cudf_data.index, cudf.MultiIndex): recombiner.insert( - loc=0, name="index", value=cudf_reindexed["index"] + loc=0, column="index", value=cudf_reindexed["index"] ) # If the index is a MultiIndex, we need to insert the # individual levels into the GeoDataFrame. @@ -288,7 +288,7 @@ def reset_index( for n, name in enumerate(levels): recombiner.insert( loc=n, - name=name, + column=name, value=cudf_reindexed[name].reset_index(drop=True), ) recombiner.index = cudf_reindexed.index From 6865f7ced3409fe72ce35af8061052e7f65fdc5a Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 7 Aug 2024 05:35:19 -1000 Subject: [PATCH 3/3] Pass columns instead of Series to `cudf.DataFrame` in split-combine workflow (#1429) closes https://github.com/rapidsai/cuspatial/issues/1426 It appears there was a change in 24.08 that broke a notebook demonstrating a `merge` on two geometry columns. It seems like the merge result tries to reconstruct a `GeoDataFrame` from a `dict[Any, GeoSeries | Series]` but the `Series.index` alignment requires the types to be recognized cudf types (not `"geometry"`) I don't think this alignment is entirely necessary though since it goes through the `_split_out_geometry_columns`/`_recombine_columns` methods which appears to be used on operations that maintain row ordering so index alignment isn't required. This PR instead passes a `dict[Any, GeoColumn | Column]` to `cudf.DataFrame._from_data` given that this row ordering is preserved. (This PR also includes the fix for https://github.com/rapidsai/cuspatial/issues/1427) Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Mark Harris (https://github.com/harrism) URL: https://github.com/rapidsai/cuspatial/pull/1429 --- .../cuspatial/cuspatial/core/geodataframe.py | 44 +++++++++++-------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/python/cuspatial/cuspatial/core/geodataframe.py b/python/cuspatial/cuspatial/core/geodataframe.py index 08f752759..79d2de665 100644 --- a/python/cuspatial/cuspatial/core/geodataframe.py +++ b/python/cuspatial/cuspatial/core/geodataframe.py @@ -161,18 +161,25 @@ def _split_out_geometry_columns( def _recombine_columns( self, geo_columns: GeoDataFrame, data_columns: cudf.DataFrame - ) -> dict[Any, GeoSeries | cudf.Series]: + ) -> dict[Any, GeoColumn | cudf.core.column.ColumnBase]: """ Combine a GeoDataFrame of only geometry columns with a DataFrame of non-geometry columns in the same order as the columns in `self` + + The output is meant for GeoDataFrame._from_data. """ + if not geo_columns.index.equals(data_columns.index): + raise ValueError("geo_columns.index must equal data_columns.index") + columns_mask = self.columns - geocolumn_mask = ( - isinstance(self[col], GeoSeries) for col in columns_mask - ) + col_is_geo = (isinstance(self[col], GeoSeries) for col in columns_mask) return { - name: (geo_columns[name] if mask else data_columns[name]) - for name, mask in zip(columns_mask, geocolumn_mask) + name: ( + geo_columns[name]._column + if is_geo + else data_columns[name]._column + ) + for name, is_geo in zip(columns_mask, col_is_geo) } def _slice(self: T, arg: slice) -> T: @@ -184,10 +191,10 @@ def _slice(self: T, arg: slice) -> T: {name: geo_columns[name].iloc[arg] for name in geo_columns.columns} ) sliced_data_columns = data_columns._slice(arg) - result = self._recombine_columns( - sliced_geo_columns, sliced_data_columns + return self._from_data( + self._recombine_columns(sliced_geo_columns, sliced_data_columns), + index=sliced_data_columns.index, ) - return self.__class__(result) def _apply_boolean_mask(self, mask: BooleanMask, keep_index=True) -> T: geo_columns, data_columns = self._split_out_geometry_columns() @@ -197,7 +204,7 @@ def _apply_boolean_mask(self, mask: BooleanMask, keep_index=True) -> T: {name: geo_columns[name][mask.column] for name in geo_columns} ) - res = self.__class__(self._recombine_columns(geo, data)) + res = self._from_data(self._recombine_columns(geo, data)) if keep_index: res.index = data.index return res @@ -215,14 +222,14 @@ def _gather(self, gather_map: GatherMap, keep_index=True): for geo in geo_data.keys() } geo_gathered = GeoDataFrame(gathered) + # _gather may have discarded the index, so re-add + cudf_gathered.index = geo_gathered.index # combine - result = GeoDataFrame( - self._recombine_columns(geo_gathered, cudf_gathered) + return GeoDataFrame._from_data( + self._recombine_columns(geo_gathered, cudf_gathered), + index=geo_gathered.index, ) - result.index = geo_gathered.index - # return - return result def reset_index( self, level=None, drop=False, inplace=False, col_level=0, col_fill="" @@ -301,11 +308,10 @@ def reset_index( # Reset the index of the GeoDataFrame to match the # cudf DataFrame and recombine. geo_data.index = cudf_reindexed.index - result = GeoDataFrame( - recombiner._recombine_columns(geo_data, cudf_reindexed) + return GeoDataFrame._from_data( + recombiner._recombine_columns(geo_data, cudf_reindexed), + index=cudf_reindexed.index, ) - result.index = geo_data.index - return result class _GeoSeriesUtility: