diff --git a/include/roaring/containers/containers.h b/include/roaring/containers/containers.h index d81ff530d..6b101ce3a 100644 --- a/include/roaring/containers/containers.h +++ b/include/roaring/containers/containers.h @@ -2583,7 +2583,7 @@ bool container_iterator_read_into_uint32(const container_t *c, uint8_t typecode, bool container_iterator_read_into_uint64(const container_t *c, uint8_t typecode, roaring_container_iterator_t *it, uint64_t high48, uint64_t *buf, - uint64_t count, uint32_t *consumed, + uint32_t count, uint32_t *consumed, uint16_t *value_out); #ifdef __cplusplus diff --git a/include/roaring/roaring64.h b/include/roaring/roaring64.h index af01e7486..aa7bd376d 100644 --- a/include/roaring/roaring64.h +++ b/include/roaring/roaring64.h @@ -522,6 +522,17 @@ roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe(const char *buf, bool roaring64_bitmap_iterate(const roaring64_bitmap_t *r, roaring_iterator64 iterator, void *ptr); +/** + * Convert the bitmap to a sorted array `out`. + * + * Caller is responsible to ensure that there is enough memory allocated, e.g. + * ``` + * out = malloc(roaring64_bitmap_get_cardinality(bitmap) * sizeof(uint64_t)); + * ``` + */ +void roaring64_bitmap_to_uint64_array(const roaring64_bitmap_t *r, + uint64_t *out); + /** * Create an iterator object that can be used to iterate through the values. * Caller is responsible for calling `roaring64_iterator_free()`. diff --git a/microbenchmarks/bench.cpp b/microbenchmarks/bench.cpp index 8b2e74210..1bfb0d0d5 100644 --- a/microbenchmarks/bench.cpp +++ b/microbenchmarks/bench.cpp @@ -224,6 +224,19 @@ struct to_array { auto ToArray = BasicBench; BENCHMARK(ToArray); +struct to_array64 { + static uint64_t run() { + uint64_t marker = 0; + for (size_t i = 0; i < count; ++i) { + roaring64_bitmap_to_uint64_array(bitmaps64[i], array_buffer64); + marker += array_buffer[0]; + } + return marker; + } +}; +auto ToArray64 = BasicBench; +BENCHMARK(ToArray64); + struct iterate_all { static uint64_t run() { uint64_t marker = 0; diff --git a/microbenchmarks/bench.h b/microbenchmarks/bench.h index 20904abac..5e1382ccd 100644 --- a/microbenchmarks/bench.h +++ b/microbenchmarks/bench.h @@ -37,6 +37,7 @@ roaring_bitmap_t **bitmaps = NULL; roaring64_bitmap_t **bitmaps64 = NULL; Roaring64Map **bitmaps64cpp = NULL; uint32_t *array_buffer; +uint64_t *array_buffer64; uint32_t maxvalue = 0; uint32_t maxcard = 0; @@ -194,6 +195,7 @@ static roaring_bitmap_t **create_all_bitmaps(size_t *howmany, roaring_bitmap_set_copy_on_write(answer[i], copy_on_write); } array_buffer = (uint32_t *)malloc(maxcard * sizeof(uint32_t)); + array_buffer64 = (uint64_t *)malloc(maxcard * sizeof(uint64_t)); return answer; } diff --git a/src/containers/containers.c b/src/containers/containers.c index 9259b8229..04c391ec3 100644 --- a/src/containers/containers.c +++ b/src/containers/containers.c @@ -625,7 +625,7 @@ bool container_iterator_read_into_uint32(const container_t *c, uint8_t typecode, bool container_iterator_read_into_uint64(const container_t *c, uint8_t typecode, roaring_container_iterator_t *it, uint64_t high48, uint64_t *buf, - uint64_t count, uint32_t *consumed, + uint32_t count, uint32_t *consumed, uint16_t *value_out) { *consumed = 0; if (count == 0) { diff --git a/src/roaring64.c b/src/roaring64.c index 2b633b416..df5fb0a61 100644 --- a/src/roaring64.c +++ b/src/roaring64.c @@ -1906,6 +1906,13 @@ bool roaring64_bitmap_iterate(const roaring64_bitmap_t *r, return true; } +void roaring64_bitmap_to_uint64_array(const roaring64_bitmap_t *r, + uint64_t *out) { + roaring64_iterator_t it = {0}; + roaring64_iterator_init_at(r, &it, /*first=*/true); + roaring64_iterator_read(&it, out, UINT64_MAX); +} + roaring64_iterator_t *roaring64_iterator_create(const roaring64_bitmap_t *r) { roaring64_iterator_t *it = (roaring64_iterator_t *)roaring_malloc(sizeof(roaring64_iterator_t)); @@ -2028,9 +2035,13 @@ uint64_t roaring64_iterator_read(roaring64_iterator_t *it, uint64_t *buf, uint32_t container_consumed; leaf_t *leaf = (leaf_t *)it->art_it.value; uint16_t low16 = (uint16_t)it->value; + uint32_t container_count = UINT32_MAX; + if (count - consumed < (uint64_t)UINT32_MAX) { + container_count = count - consumed; + } bool has_value = container_iterator_read_into_uint64( leaf->container, leaf->typecode, &it->container_it, it->high48, buf, - count - consumed, &container_consumed, &low16); + container_count, &container_consumed, &low16); consumed += container_consumed; buf += container_consumed; if (has_value) { diff --git a/tests/roaring64_unit.cpp b/tests/roaring64_unit.cpp index d888c0e8b..47df238eb 100644 --- a/tests/roaring64_unit.cpp +++ b/tests/roaring64_unit.cpp @@ -12,6 +12,17 @@ using namespace roaring::api; namespace { +void assert_vector_equal(const std::vector& lhs, + const std::vector& rhs) { + assert_int_equal(lhs.size(), rhs.size()); + for (size_t i = 0; i < lhs.size(); ++i) { + if (lhs[i] != rhs[i]) { + printf("Mismatch at %zu\n", i); + assert_int_equal(lhs[i], rhs[i]); + } + } +} + DEFINE_TEST(test_copy) { roaring64_bitmap_t* r1 = roaring64_bitmap_create(); @@ -1186,6 +1197,21 @@ DEFINE_TEST(test_iterate) { roaring64_bitmap_free(r); } +DEFINE_TEST(test_to_uint64_array) { + roaring64_bitmap_t* r = roaring64_bitmap_create(); + std::vector a1 = {0, 1ULL << 35, (1Ull << 35) + 1, + (1Ull << 35) + 2, 1Ull << 36}; + for (uint64_t val : a1) { + roaring64_bitmap_add(r, val); + } + + std::vector a2(a1.size(), 0); + roaring64_bitmap_to_uint64_array(r, a2.data()); + assert_vector_equal(a2, a1); + + roaring64_bitmap_free(r); +} + DEFINE_TEST(test_iterator_create) { roaring64_bitmap_t* r = roaring64_bitmap_create(); { @@ -1439,11 +1465,13 @@ DEFINE_TEST(test_iterator_read) { roaring64_bitmap_add_bulk(r, &context, v); } - // Check that a zero count results in zero elements read. - roaring64_iterator_t* it = roaring64_iterator_create(r); - uint64_t buf[1]; - assert_int_equal(roaring64_iterator_read(it, buf, 0), 0); - roaring64_iterator_free(it); + { + // Check that a zero count results in zero elements read. + roaring64_iterator_t* it = roaring64_iterator_create(r); + uint64_t buf[1]; + assert_int_equal(roaring64_iterator_read(it, buf, 0), 0); + roaring64_iterator_free(it); + } readCompare(values, r, 1); readCompare(values, r, 2); @@ -1451,6 +1479,25 @@ DEFINE_TEST(test_iterator_read) { readCompare(values, r, values.size()); readCompare(values, r, values.size() + 1); + { + // A count of UINT64_MAX. + roaring64_iterator_t* it = roaring64_iterator_create(r); + std::vector buf(values.size(), 0); + assert_int_equal(roaring64_iterator_read(it, buf.data(), UINT64_MAX), + 1000); + assert_vector_equal(buf, values); + roaring64_iterator_free(it); + } + { + // A count that becomes zero if cast to uint32. + roaring64_iterator_t* it = roaring64_iterator_create(r); + std::vector buf(values.size(), 0); + assert_int_equal( + roaring64_iterator_read(it, buf.data(), 0xFFFFFFFF00000000), 1000); + assert_vector_equal(buf, values); + roaring64_iterator_free(it); + } + roaring64_bitmap_free(r); } @@ -1504,6 +1551,7 @@ int main() { cmocka_unit_test(test_flip_inplace), cmocka_unit_test(test_portable_serialize), cmocka_unit_test(test_iterate), + cmocka_unit_test(test_to_uint64_array), cmocka_unit_test(test_iterator_create), cmocka_unit_test(test_iterator_create_last), cmocka_unit_test(test_iterator_reinit),