Skip to content

Commit

Permalink
Merge branch 'branch-25.02' into cln/column_empty/masked
Browse files Browse the repository at this point in the history
  • Loading branch information
mroeschke authored Dec 12, 2024
2 parents b8ade91 + 98d9856 commit d183bea
Show file tree
Hide file tree
Showing 165 changed files with 2,327 additions and 3,924 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ jobs:
OTEL_SERVICE_NAME: 'pr-cudf'
steps:
- name: Telemetry setup
if: ${{ vars.TELEMETRY_ENABLED == 'true' }}
uses: rapidsai/shared-actions/telemetry-dispatch-stash-base-env-vars@main
changed-files:
secrets: inherit
Expand Down Expand Up @@ -329,7 +330,7 @@ jobs:
telemetry-summarize:
runs-on: ubuntu-latest
needs: pr-builder
if: always()
if: ${{ vars.TELEMETRY_ENABLED == 'true' && !cancelled() }}
continue-on-error: true
steps:
- name: Load stashed telemetry env vars
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/trigger-breaking-change-alert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
trigger-notifier:
if: contains(github.event.pull_request.labels.*.name, 'breaking')
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@branch-24.12
uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@branch-25.02
with:
sender_login: ${{ github.event.sender.login }}
sender_avatar: ${{ github.event.sender.avatar_url }}
Expand Down
3 changes: 1 addition & 2 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ dependencies:
- cramjam
- cubinlinker
- cuda-nvtx=11.8
- cuda-python>=11.7.1,<12.0a0
- cuda-python>=11.8.5,<12.0a0
- cuda-sanitizer-api=11.8.86
- cuda-version=11.8
- cudatoolkit
Expand Down Expand Up @@ -87,7 +87,6 @@ dependencies:
- s3fs>=2022.3.0
- scikit-build-core>=0.10.0
- scipy
- spdlog>=1.14.1,<1.15
- sphinx
- sphinx-autobuild
- sphinx-copybutton
Expand Down
3 changes: 1 addition & 2 deletions conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ dependencies:
- cuda-nvcc
- cuda-nvrtc-dev
- cuda-nvtx-dev
- cuda-python>=12.0,<13.0a0
- cuda-python>=12.6.2,<13.0a0
- cuda-sanitizer-api
- cuda-version=12.5
- cupy>=12.0.0
Expand Down Expand Up @@ -86,7 +86,6 @@ dependencies:
- s3fs>=2022.3.0
- scikit-build-core>=0.10.0
- scipy
- spdlog>=1.14.1,<1.15
- sphinx
- sphinx-autobuild
- sphinx-copybutton
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ requirements:
- cudatoolkit
- ptxcompiler >=0.7.0
- cubinlinker # CUDA enhanced compatibility.
- cuda-python >=11.7.1,<12.0a0
- cuda-python >=11.8.5,<12.0a0
{% else %}
- cuda-cudart
- libcufile # [linux64]
Expand All @@ -100,7 +100,7 @@ requirements:
# TODO: Add nvjitlink here
# xref: https://github.com/rapidsai/cudf/issues/12822
- cuda-nvrtc
- cuda-python >=12.0,<13.0a0
- cuda-python >=12.6.2,<13.0a0
- pynvjitlink
{% endif %}
- {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
Expand Down
3 changes: 0 additions & 3 deletions conda/recipes/libcudf/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,6 @@ fmt_version:
flatbuffers_version:
- "=24.3.25"

spdlog_version:
- ">=1.14.1,<1.15"

nvcomp_version:
- "=4.1.0.6"

Expand Down
1 change: 0 additions & 1 deletion conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ requirements:
- librdkafka {{ librdkafka_version }}
- fmt {{ fmt_version }}
- flatbuffers {{ flatbuffers_version }}
- spdlog {{ spdlog_version }}
- zlib {{ zlib_version }}

outputs:
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/pylibcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,9 @@ requirements:
- {{ pin_compatible('rmm', max_pin='x.x') }}
- fsspec >=0.6.0
{% if cuda_major == "11" %}
- cuda-python >=11.7.1,<12.0a0
- cuda-python >=11.8.5,<12.0a0
{% else %}
- cuda-python >=12.0,<13.0a0
- cuda-python >=12.6.2,<13.0a0
{% endif %}
- nvtx >=0.2.1
- packaging
Expand Down
26 changes: 16 additions & 10 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,14 @@ endif()

# add third party dependencies using CPM
rapids_cpm_init()

# Not using rapids-cmake since we never want to find, always download.
CPMAddPackage(
NAME rapids_logger GITHUB_REPOSITORY rapidsai/rapids-logger GIT_SHALLOW TRUE GIT_TAG
c510947ae9d3a67530cfe3e5eaccb5a3b8ea0e55 VERSION c510947ae9d3a67530cfe3e5eaccb5a3b8ea0e55
)
rapids_make_logger(cudf EXPORT_SET cudf-exports)

# find jitify
include(cmake/thirdparty/get_jitify.cmake)
# find NVTX
Expand All @@ -299,8 +307,6 @@ include(cmake/Modules/JitifyPreprocessKernels.cmake)
include(cmake/thirdparty/get_kvikio.cmake)
# find fmt
include(cmake/thirdparty/get_fmt.cmake)
# find spdlog
include(cmake/thirdparty/get_spdlog.cmake)
# find nanoarrow
include(cmake/thirdparty/get_nanoarrow.cmake)
# find thread_pool
Expand Down Expand Up @@ -772,7 +778,6 @@ add_library(
src/utilities/default_stream.cpp
src/utilities/host_memory.cpp
src/utilities/linked_column.cpp
src/utilities/logger.cpp
src/utilities/prefetch.cpp
src/utilities/stacktrace.cpp
src/utilities/stream_pool.cpp
Expand Down Expand Up @@ -910,11 +915,8 @@ if(CUDF_LARGE_STRINGS_DISABLED)
target_compile_definitions(cudf PRIVATE CUDF_LARGE_STRINGS_DISABLED)
endif()

# Define RMM logging level
target_compile_definitions(cudf PRIVATE "RMM_LOGGING_LEVEL=LIBCUDF_LOGGING_LEVEL")

# Define spdlog level
target_compile_definitions(cudf PUBLIC "SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${LIBCUDF_LOGGING_LEVEL}")
# Define logging level
target_compile_definitions(cudf PRIVATE "CUDF_LOG_ACTIVE_LEVEL=${LIBCUDF_LOGGING_LEVEL}")

# Enable remote IO through KvikIO
target_compile_definitions(cudf PRIVATE $<$<BOOL:${CUDF_KVIKIO_REMOTE_IO}>:CUDF_KVIKIO_REMOTE_IO>)
Expand All @@ -928,14 +930,17 @@ if(TARGET CUDA::cuFile${_cufile_suffix})
target_compile_definitions(cudf PRIVATE CUDF_CUFILE_FOUND)
endif()

# Remove this after upgrading to a CCCL that has a proper CMake option. See
# https://github.com/NVIDIA/cccl/pull/2844
target_compile_definitions(cudf PRIVATE THRUST_FORCE_32_BIT_OFFSET_TYPE=1)

# Compile stringified JIT sources first
add_dependencies(cudf jitify_preprocess_run)

# Specify the target module library dependencies
target_link_libraries(
cudf
PUBLIC CCCL::CCCL rmm::rmm rmm::rmm_logger $<BUILD_LOCAL_INTERFACE:BS::thread_pool>
spdlog::spdlog_header_only
PUBLIC CCCL::CCCL rmm::rmm rmm::rmm_logger $<BUILD_LOCAL_INTERFACE:BS::thread_pool> cudf_logger
PRIVATE $<BUILD_LOCAL_INTERFACE:nvtx3::nvtx3-cpp>
cuco::cuco
ZLIB::ZLIB
Expand All @@ -944,6 +949,7 @@ target_link_libraries(
$<TARGET_NAME_IF_EXISTS:CUDA::cuFile${_cufile_suffix}>
nanoarrow
rmm::rmm_logger_impl
cudf_logger_impl
)

# Add Conda library, and include paths if specified
Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/io/cuio_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#include <benchmarks/io/cuio_common.hpp>

#include <cudf/detail/utilities/integer_utils.hpp>
#include <cudf/detail/utilities/logger.hpp>
#include <cudf/logger.hpp>
#include <cudf/utilities/memory_resource.hpp>

#include <rmm/mr/pinned_host_memory_resource.hpp>
Expand Down
8 changes: 4 additions & 4 deletions cpp/benchmarks/stream_compaction/apply_boolean_mask.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ void apply_boolean_mask_benchmark(nvbench::state& state, nvbench::type_list<Data
data_profile profile = data_profile_builder().cardinality(0).no_validity().distribution(
input_type, distribution_id::UNIFORM, 0, 20);

auto source_table =
create_random_table(cycle_dtypes({input_type}, n_cols), row_count{n_rows}, profile);
auto source_table = create_random_table(
cycle_dtypes({input_type, cudf::type_id::STRING}, n_cols), row_count{n_rows}, profile);

profile.set_bool_probability_true(percent_true / 100.0);
profile.set_null_probability(std::nullopt); // no null mask
Expand All @@ -85,6 +85,6 @@ using data_type = nvbench::type_list<int32_t, int64_t, double, cudf::string_view
NVBENCH_BENCH_TYPES(apply_boolean_mask_benchmark, NVBENCH_TYPE_AXES(data_type))
.set_name("apply_boolean_mask")
.set_type_axes_names({"type"})
.add_int64_axis("columns", {1, 4})
.add_int64_axis("columns", {1, 4, 9})
.add_int64_axis("rows", {100'000, 1'000'000, 10'000'000})
.add_int64_axis("hits_%", {10, 50, 100});
.add_int64_axis("hits_%", {10, 20, 50, 80, 90, 100});
4 changes: 3 additions & 1 deletion cpp/benchmarks/stream_compaction/distinct.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ void nvbench_distinct(nvbench::state& state, nvbench::type_list<Type>)
cudf::size_type const num_rows = state.get_int64("NumRows");
auto const keep = get_keep(state.get_string("keep"));
cudf::size_type const cardinality = state.get_int64("cardinality");
auto const null_probability = state.get_float64("null_probability");

if (cardinality > num_rows) {
state.skip("cardinality > num_rows");
Expand All @@ -42,7 +43,7 @@ void nvbench_distinct(nvbench::state& state, nvbench::type_list<Type>)

data_profile profile = data_profile_builder()
.cardinality(cardinality)
.null_probability(0.01)
.null_probability(null_probability)
.distribution(cudf::type_to_id<Type>(),
distribution_id::UNIFORM,
static_cast<Type>(0),
Expand All @@ -65,6 +66,7 @@ using data_type = nvbench::type_list<int32_t, int64_t>;
NVBENCH_BENCH_TYPES(nvbench_distinct, NVBENCH_TYPE_AXES(data_type))
.set_name("distinct")
.set_type_axes_names({"Type"})
.add_float64_axis("null_probability", {0.01})
.add_string_axis("keep", {"any", "first", "last", "none"})
.add_int64_axis("cardinality", {100, 100'000, 10'000'000, 1'000'000'000})
.add_int64_axis("NumRows", {100, 100'000, 10'000'000, 1'000'000'000});
Expand Down
5 changes: 2 additions & 3 deletions cpp/benchmarks/text/minhash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,8 @@ static void bench_minhash(nvbench::state& state)
state.add_global_memory_writes<nvbench::int32_t>(num_rows); // output are hashes

state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
auto result = base64
? nvtext::minhash64_permuted(input, 0, parameters_a, parameters_b, hash_width)
: nvtext::minhash_permuted(input, 0, parameters_a, parameters_b, hash_width);
auto result = base64 ? nvtext::minhash64(input, 0, parameters_a, parameters_b, hash_width)
: nvtext::minhash(input, 0, parameters_a, parameters_b, hash_width);
});
}

Expand Down
27 changes: 0 additions & 27 deletions cpp/cmake/thirdparty/get_spdlog.cmake

This file was deleted.

5 changes: 0 additions & 5 deletions cpp/cmake/thirdparty/patches/cccl_override.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,6 @@
"packages" : {
"CCCL" : {
"patches" : [
{
"file" : "${current_json_dir}/thrust_disable_64bit_dispatching.diff",
"issue" : "Remove 64bit dispatching as not needed by libcudf and results in compiling twice as many kernels [https://github.com/rapidsai/cudf/pull/11437]",
"fixed_in" : ""
},
{
"file" : "${current_json_dir}/thrust_faster_sort_compile_times.diff",
"issue" : "Improve Thrust sort compile times by not unrolling loops for inlined comparators [https://github.com/rapidsai/cudf/pull/10577]",
Expand Down
22 changes: 0 additions & 22 deletions cpp/cmake/thirdparty/patches/thrust_disable_64bit_dispatching.diff

This file was deleted.

6 changes: 3 additions & 3 deletions cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -1082,15 +1082,15 @@ initialization. If this setting is higher than the compile-time CMake variable,
in between the two settings will be excluded from the written log. The available levels are the same
as for the CMake variable.
* Global logger object exposed via `cudf::logger()` - sets the minimum logging level at runtime.
For example, calling `cudf::logger().set_level(spdlog::level::err)`, will exclude any messages that
For example, calling `cudf::default_logger().set_level(level_enum::err)`, will exclude any messages that
are not errors or critical errors. This API should not be used within libcudf to manipulate logging,
its purpose is to allow upstream users to configure libcudf logging to fit their application.

By default, logging messages are output to stderr.
Setting the environment variable `LIBCUDF_DEBUG_LOG_FILE` redirects the log to a file with the
specified path (can be relative to the current directory).
Upstream users can also manipulate `cudf::logger().sinks()` to add sinks or divert the log to
standard output or even a custom spdlog sink.
Upstream users can also manipulate `cudf::default_logger().sinks()` to add sinks or divert the log to
standard output.

# Data Types

Expand Down
Loading

0 comments on commit d183bea

Please sign in to comment.