From 23a8441cf3765aec5bc6b0dc4f4c61df39fdb7d1 Mon Sep 17 00:00:00 2001 From: Soerian Date: Sun, 12 Jan 2025 18:58:58 +0000 Subject: [PATCH] Link free nodes together This adds the index of the next free node into a newly freed node, or `capacity` if there are no more free indices. This significantly speeds up finding the next free index, which is important for add+remove workloads. Benchmarks Old: ------------------------------------------------------------------ Benchmark Time CPU Iterations ------------------------------------------------------------------ r64InsertRemoveRandom/0 127 ns 127 ns 5461079 r64InsertRemoveRandom/1 31633 ns 31604 ns 24028 r64InsertRemoveRandom/2 30782 ns 30769 ns 21859 r64InsertRemoveRandom/3 31985 ns 31969 ns 21558 r64InsertRemoveRandom/4 356 ns 356 ns 1962694 r64InsertRemoveRandom/5 28972 ns 28962 ns 21366 r64InsertRemoveRandom/6 30632 ns 30623 ns 22682 r64InsertRemoveRandom/7 448 ns 448 ns 1601550 r64InsertRemoveRandom/8 32506 ns 32495 ns 21591 r64InsertRemoveRandom/9 689 ns 689 ns 1002237 cppInsertRemoveRandom/0 131 ns 131 ns 5319673 cppInsertRemoveRandom/1 16106 ns 16104 ns 43632 cppInsertRemoveRandom/2 3881 ns 3881 ns 180087 cppInsertRemoveRandom/3 3582 ns 3582 ns 171298 cppInsertRemoveRandom/4 403 ns 402 ns 1666697 cppInsertRemoveRandom/5 993 ns 993 ns 706038 cppInsertRemoveRandom/6 4039 ns 4038 ns 172421 cppInsertRemoveRandom/7 469 ns 469 ns 1440197 cppInsertRemoveRandom/8 1454 ns 1454 ns 633551 cppInsertRemoveRandom/9 654 ns 654 ns 1091588 setInsertRemoveRandom/0 1944 ns 1943 ns 368926 setInsertRemoveRandom/1 1955 ns 1953 ns 404931 setInsertRemoveRandom/2 1911 ns 1910 ns 358466 setInsertRemoveRandom/3 1953 ns 1951 ns 362351 setInsertRemoveRandom/4 2104 ns 2102 ns 321387 setInsertRemoveRandom/5 1944 ns 1943 ns 354836 setInsertRemoveRandom/6 1835 ns 1835 ns 359099 setInsertRemoveRandom/7 1970 ns 1968 ns 372625 setInsertRemoveRandom/8 1894 ns 1892 ns 355456 setInsertRemoveRandom/9 1659 ns 1659 ns 355902 New: ------------------------------------------------------------------ Benchmark Time CPU Iterations ------------------------------------------------------------------ r64InsertRemoveRandom/0 128 ns 128 ns 5614266 r64InsertRemoveRandom/1 935 ns 935 ns 739679 r64InsertRemoveRandom/2 916 ns 916 ns 739944 r64InsertRemoveRandom/3 936 ns 936 ns 690708 r64InsertRemoveRandom/4 368 ns 368 ns 1957642 r64InsertRemoveRandom/5 1141 ns 1140 ns 592505 r64InsertRemoveRandom/6 1139 ns 1138 ns 657840 r64InsertRemoveRandom/7 481 ns 481 ns 1434967 r64InsertRemoveRandom/8 1447 ns 1446 ns 484463 r64InsertRemoveRandom/9 721 ns 721 ns 1017456 cppInsertRemoveRandom/0 134 ns 134 ns 5524804 cppInsertRemoveRandom/1 15616 ns 15608 ns 47666 cppInsertRemoveRandom/2 3855 ns 3854 ns 180265 cppInsertRemoveRandom/3 3809 ns 3808 ns 183595 cppInsertRemoveRandom/4 412 ns 412 ns 1695708 cppInsertRemoveRandom/5 1012 ns 1011 ns 713501 cppInsertRemoveRandom/6 3410 ns 3409 ns 199214 cppInsertRemoveRandom/7 474 ns 474 ns 1496740 cppInsertRemoveRandom/8 1421 ns 1420 ns 465868 cppInsertRemoveRandom/9 564 ns 564 ns 1148076 setInsertRemoveRandom/0 1956 ns 1956 ns 351283 setInsertRemoveRandom/1 1959 ns 1958 ns 355766 setInsertRemoveRandom/2 1886 ns 1885 ns 357406 setInsertRemoveRandom/3 1905 ns 1904 ns 355235 setInsertRemoveRandom/4 1945 ns 1944 ns 364599 setInsertRemoveRandom/5 1902 ns 1902 ns 350312 setInsertRemoveRandom/6 1907 ns 1906 ns 346962 setInsertRemoveRandom/7 1937 ns 1936 ns 356168 setInsertRemoveRandom/8 1881 ns 1880 ns 341472 setInsertRemoveRandom/9 1962 ns 1961 ns 350643 --- src/art/art.c | 356 ++++++++++++++++++++++++++++---------------------- 1 file changed, 197 insertions(+), 159 deletions(-) diff --git a/src/art/art.c b/src/art/art.c index 6ed82ad5..0d00a379 100644 --- a/src/art/art.c +++ b/src/art/art.c @@ -44,8 +44,13 @@ typedef void art_node_t; typedef struct art_leaf_s { bool occupied; - art_key_chunk_t key[ART_KEY_BYTES]; - art_val_t val; + union { + struct { + art_key_chunk_t key[ART_KEY_BYTES]; + art_val_t val; + }; + size_t next_free; // Used if !occupied. + }; } art_leaf_t; // Inner node, with prefix. @@ -62,16 +67,26 @@ typedef struct art_inner_node_s { typedef struct art_node4_s { art_inner_node_t base; uint8_t count; - uint8_t keys[4]; - art_ref_t children[4]; + union { + struct { + uint8_t keys[4]; + art_ref_t children[4]; + }; + size_t next_free; // Used if count == 0. + }; } art_node4_t; // Node16: key[i] corresponds with children[i]. Keys are sorted. typedef struct art_node16_s { art_inner_node_t base; uint8_t count; - uint8_t keys[16]; - art_ref_t children[16]; + union { + struct { + uint8_t keys[16]; + art_ref_t children[16]; + }; + size_t next_free; // Used if count == 0. + }; } art_node16_t; // Node48: key[i] corresponds with children[key[i]] if key[i] != @@ -80,11 +95,17 @@ typedef struct art_node16_s { typedef struct art_node48_s { art_inner_node_t base; uint8_t count; - // Bitset where the ith bit is set if children[i] is available - // Because there are at most 48 children, only the bottom 48 bits are used. - uint64_t available_children; - uint8_t keys[256]; - art_ref_t children[48]; + union { + struct { + // Bitset where the ith bit is set if children[i] is available + // Because there are at most 48 children, only the bottom 48 bits + // are used. + uint64_t available_children; + uint8_t keys[256]; + art_ref_t children[48]; + }; + size_t next_free; // Used if count == 0. + }; } art_node48_t; // Node256: children[i] is directly indexed by key chunk. A child is present if @@ -92,7 +113,12 @@ typedef struct art_node48_s { typedef struct art_node256_s { art_inner_node_t base; uint16_t count; - art_ref_t children[256]; + union { + struct { + art_ref_t children[256]; + }; + size_t next_free; // Used if count == 0. + }; } art_node256_t; // Helper struct to refer to a child within a node at a specific index. @@ -182,129 +208,6 @@ static inline art_ref_t art_get_ref(const art_t *art, const art_node_t *node, return art_to_ref(art_get_index(art, node, typecode), typecode); } -/** - * Extends the array of nodes of the given typecode. Invalidates pointers into - * the array obtained by `art_deref`. - * - * Must only be called when the node array of the given type is "full" - * (first_free == capacity). - */ -static void art_extend(art_t *art, art_typecode_t typecode) { - size_t size = art->first_free[typecode]; - size_t capacity = art->capacities[typecode]; - if (size < capacity) { - return; - } - size_t new_capacity; - if (capacity == 0) { - new_capacity = 2; - } else if (capacity < 1024) { - new_capacity = 2 * capacity; - } else { - new_capacity = 5 * capacity / 4; - } - art->capacities[typecode] = new_capacity; - size_t increase = new_capacity - capacity; - switch (typecode) { - case CROARING_ART_LEAF_TYPE: - art->leaves = - roaring_realloc(art->leaves, new_capacity * sizeof(art_leaf_t)); - memset(art->leaves + capacity, 0, increase * sizeof(art_leaf_t)); - break; - case CROARING_ART_NODE4_TYPE: - art->node4s = roaring_realloc(art->node4s, - new_capacity * sizeof(art_node4_t)); - memset(art->node4s + capacity, 0, increase * sizeof(art_node4_t)); - break; - case CROARING_ART_NODE16_TYPE: - art->node16s = roaring_realloc(art->node16s, - new_capacity * sizeof(art_node16_t)); - memset(art->node16s + capacity, 0, increase * sizeof(art_node16_t)); - break; - case CROARING_ART_NODE48_TYPE: - art->node48s = roaring_realloc(art->node48s, - new_capacity * sizeof(art_node48_t)); - memset(art->node48s + capacity, 0, increase * sizeof(art_node48_t)); - break; - case CROARING_ART_NODE256_TYPE: - art->node256s = roaring_realloc( - art->node256s, new_capacity * sizeof(art_node256_t)); - memset(art->node256s + capacity, 0, - increase * sizeof(art_node256_t)); - break; - default: - assert(false); - } -} - -/** - * Returns the next free index for the given typecode, may be equal to the - * capacity of the array. - */ -static size_t art_next_free(const art_t *art, art_typecode_t typecode, - size_t start_index) { - size_t capacity = art->capacities[typecode]; - switch (typecode) { - case CROARING_ART_LEAF_TYPE: { - for (size_t i = start_index; i < capacity; ++i) { - if (!art->leaves[i].occupied) { - return i; - } - } - break; - } - case CROARING_ART_NODE4_TYPE: { - for (size_t i = start_index; i < capacity; ++i) { - if (art->node4s[i].count == 0) { - return i; - } - } - break; - } - case CROARING_ART_NODE16_TYPE: { - for (size_t i = start_index; i < capacity; ++i) { - if (art->node16s[i].count == 0) { - return i; - } - } - break; - } - case CROARING_ART_NODE48_TYPE: { - for (size_t i = start_index; i < capacity; ++i) { - if (art->node48s[i].count == 0) { - return i; - } - } - break; - } - case CROARING_ART_NODE256_TYPE: { - for (size_t i = start_index; i < capacity; ++i) { - if (art->node256s[i].count == 0) { - return i; - } - } - break; - } - default: - assert(false); - return 0; - } - return capacity; -} - -/** - * Marks an index for the given typecode as used, expanding the relevant node - * array if necessary. - */ -static size_t art_allocate_index(art_t *art, art_typecode_t typecode) { - size_t first_free = art->first_free[typecode]; - if (first_free == art->capacities[typecode]) { - art_extend(art, typecode); - } - art->first_free[typecode] = art_next_free(art, typecode, first_free + 1); - return first_free; -} - static inline bool art_is_leaf(art_ref_t ref) { return art_ref_typecode(ref) == CROARING_ART_LEAF_TYPE; } @@ -319,6 +222,8 @@ static inline void art_init_inner_node(art_inner_node_t *node, static void art_node_free(art_t *art, art_node_t *node, art_typecode_t typecode); +static size_t art_allocate_index(art_t *art, art_typecode_t typecode); + // ===================== Start of node-specific functions ====================== static art_ref_t art_leaf_create(art_t *art, const art_key_chunk_t key[], @@ -331,7 +236,10 @@ static art_ref_t art_leaf_create(art_t *art, const art_key_chunk_t key[], return art_to_ref(index, CROARING_ART_LEAF_TYPE); } -static inline void art_leaf_clear(art_leaf_t *leaf) { leaf->occupied = false; } +static inline void art_leaf_clear(art_leaf_t *leaf, art_ref_t next_free) { + leaf->occupied = false; + leaf->next_free = next_free; +} static art_node4_t *art_node4_create(art_t *art, const art_key_chunk_t prefix[], uint8_t prefix_size); @@ -363,7 +271,10 @@ static art_node4_t *art_node4_create(art_t *art, const art_key_chunk_t prefix[], return node; } -static inline void art_node4_clear(art_node4_t *node) { node->count = 0; } +static inline void art_node4_clear(art_node4_t *node, art_ref_t next_free) { + node->count = 0; + node->next_free = next_free; +} static inline art_ref_t art_node4_find_child(const art_node4_t *node, art_key_chunk_t key) { @@ -566,7 +477,10 @@ static art_node16_t *art_node16_create(art_t *art, return node; } -static inline void art_node16_clear(art_node16_t *node) { node->count = 0; } +static inline void art_node16_clear(art_node16_t *node, art_ref_t next_free) { + node->count = 0; + node->next_free = next_free; +} static inline art_ref_t art_node16_find_child(const art_node16_t *node, art_key_chunk_t key) { @@ -751,7 +665,10 @@ static art_node48_t *art_node48_create(art_t *art, return node; } -static inline void art_node48_clear(art_node48_t *node) { node->count = 0; } +static inline void art_node48_clear(art_node48_t *node, art_ref_t next_free) { + node->count = 0; + node->next_free = next_free; +} static inline art_ref_t art_node48_find_child(const art_node48_t *node, art_key_chunk_t key) { @@ -955,7 +872,10 @@ static art_node256_t *art_node256_create(art_t *art, return node; } -static inline void art_node256_clear(art_node256_t *node) { node->count = 0; } +static inline void art_node256_clear(art_node256_t *node, art_ref_t next_free) { + node->count = 0; + node->next_free = next_free; +} static inline art_ref_t art_node256_find_child(const art_node256_t *node, art_key_chunk_t key) { @@ -1180,25 +1100,24 @@ static art_ref_t art_node_insert_leaf(art_t *art, art_inner_node_t *node, // Marks the node as unoccopied and frees its index. static void art_node_free(art_t *art, art_node_t *node, art_typecode_t typecode) { - uint64_t index = art_get_index(art, node, typecode); - if (index < art->first_free[typecode]) { - art->first_free[typecode] = index; - } + size_t index = art_get_index(art, node, typecode); + size_t next_free = art->first_free[typecode]; + art->first_free[typecode] = index; switch (typecode) { case CROARING_ART_LEAF_TYPE: - art_leaf_clear((art_leaf_t *)node); + art_leaf_clear((art_leaf_t *)node, next_free); break; case CROARING_ART_NODE4_TYPE: - art_node4_clear((art_node4_t *)node); + art_node4_clear((art_node4_t *)node, next_free); break; case CROARING_ART_NODE16_TYPE: - art_node16_clear((art_node16_t *)node); + art_node16_clear((art_node16_t *)node, next_free); break; case CROARING_ART_NODE48_TYPE: - art_node48_clear((art_node48_t *)node); + art_node48_clear((art_node48_t *)node, next_free); break; case CROARING_ART_NODE256_TYPE: - art_node256_clear((art_node256_t *)node); + art_node256_clear((art_node256_t *)node, next_free); break; default: assert(false); @@ -1348,6 +1267,124 @@ static uint8_t art_common_prefix(const art_key_chunk_t key1[], return offset; } +/** + * Extends the array of nodes of the given typecode. Invalidates pointers into + * the array obtained by `art_deref`. + * + * Must only be called when the node array of the given type is "full" + * (first_free == capacity). + */ +static void art_extend(art_t *art, art_typecode_t typecode) { + size_t size = art->first_free[typecode]; + size_t capacity = art->capacities[typecode]; + if (size < capacity) { + return; + } + size_t new_capacity; + if (capacity == 0) { + new_capacity = 2; + } else if (capacity < 1024) { + new_capacity = 2 * capacity; + } else { + new_capacity = 5 * capacity / 4; + } + art->capacities[typecode] = new_capacity; + size_t increase = new_capacity - capacity; + switch (typecode) { + case CROARING_ART_LEAF_TYPE: { + art->leaves = + roaring_realloc(art->leaves, new_capacity * sizeof(art_leaf_t)); + memset(art->leaves + capacity, 0, increase * sizeof(art_leaf_t)); + for (size_t i = capacity; i < new_capacity; ++i) { + art_leaf_clear(art->leaves + i, i + 1); + } + break; + } + case CROARING_ART_NODE4_TYPE: { + art->node4s = roaring_realloc(art->node4s, + new_capacity * sizeof(art_node4_t)); + memset(art->node4s + capacity, 0, increase * sizeof(art_node4_t)); + for (size_t i = capacity; i < new_capacity; ++i) { + art_node4_clear(art->node4s + i, i + 1); + } + break; + } + case CROARING_ART_NODE16_TYPE: { + art->node16s = roaring_realloc(art->node16s, + new_capacity * sizeof(art_node16_t)); + memset(art->node16s + capacity, 0, increase * sizeof(art_node16_t)); + for (size_t i = capacity; i < new_capacity; ++i) { + art_node16_clear(art->node16s + i, i + 1); + } + break; + } + case CROARING_ART_NODE48_TYPE: { + art->node48s = roaring_realloc(art->node48s, + new_capacity * sizeof(art_node48_t)); + memset(art->node48s + capacity, 0, increase * sizeof(art_node48_t)); + for (size_t i = capacity; i < new_capacity; ++i) { + art_node48_clear(art->node48s + i, i + 1); + } + break; + } + case CROARING_ART_NODE256_TYPE: { + art->node256s = roaring_realloc( + art->node256s, new_capacity * sizeof(art_node256_t)); + memset(art->node256s + capacity, 0, + increase * sizeof(art_node256_t)); + for (size_t i = capacity; i < new_capacity; ++i) { + art_node256_clear(art->node256s + i, i + 1); + } + break; + } + default: + assert(false); + } +} + +/** + * Returns the next free index for the given typecode, may be equal to the + * capacity of the array. + */ +static size_t art_next_free(const art_t *art, art_typecode_t typecode) { + size_t index = art->first_free[typecode]; + switch (typecode) { + case CROARING_ART_LEAF_TYPE: { + return art->leaves[index].next_free; + } + case CROARING_ART_NODE4_TYPE: { + return art->node4s[index].next_free; + } + case CROARING_ART_NODE16_TYPE: { + return art->node16s[index].next_free; + } + case CROARING_ART_NODE48_TYPE: { + return art->node48s[index].next_free; + } + case CROARING_ART_NODE256_TYPE: { + return art->node256s[index].next_free; + } + default: + assert(false); + return 0; + } +} + +/** + * Marks an index for the given typecode as used, expanding the relevant node + * array if necessary. + */ +static size_t art_allocate_index(art_t *art, art_typecode_t typecode) { + size_t first_free = art->first_free[typecode]; + if (first_free == art->capacities[typecode]) { + art_extend(art, typecode); + art->first_free[typecode]++; + return first_free; + } + art->first_free[typecode] = art_next_free(art, typecode); + return first_free; +} + // Returns a pointer to the rootmost node where the value was inserted, may // not be equal to `node`. static art_ref_t art_insert_at(art_t *art, art_ref_t ref, @@ -1659,31 +1696,36 @@ static art_ref_t art_move_node_to_shrink(art_t *art, art_ref_t ref) { size_t to = first_free; switch (typecode) { case CROARING_ART_LEAF_TYPE: { + size_t next_free = art->leaves[to].next_free; memcpy(art->leaves + to, art->leaves + from, sizeof(art_leaf_t)); - art_leaf_clear(&art->leaves[from]); + art_leaf_clear(&art->leaves[from], next_free); break; } case CROARING_ART_NODE4_TYPE: { + size_t next_free = art->node4s[to].next_free; memcpy(art->node4s + to, art->node4s + from, sizeof(art_node4_t)); - art_node4_clear(&art->node4s[from]); + art_node4_clear(&art->node4s[from], next_free); break; } case CROARING_ART_NODE16_TYPE: { + size_t next_free = art->node16s[to].next_free; memcpy(art->node16s + to, art->node16s + from, sizeof(art_node16_t)); - art_node16_clear(&art->node16s[from]); + art_node16_clear(&art->node16s[from], next_free); break; } case CROARING_ART_NODE48_TYPE: { + size_t next_free = art->node48s[to].next_free; memcpy(art->node48s + to, art->node48s + from, sizeof(art_node48_t)); - art_node48_clear(&art->node48s[from]); + art_node48_clear(&art->node48s[from], next_free); break; } case CROARING_ART_NODE256_TYPE: { + size_t next_free = art->node256s[to].next_free; memcpy(art->node256s + to, art->node256s + from, sizeof(art_node256_t)); - art_node256_clear(&art->node256s[from]); + art_node256_clear(&art->node256s[from], next_free); break; } default: { @@ -1691,7 +1733,7 @@ static art_ref_t art_move_node_to_shrink(art_t *art, art_ref_t ref) { return 0; } } - art->first_free[typecode] = art_next_free(art, typecode, to + 1); + art->first_free[typecode] = from; return art_to_ref(to, typecode); } @@ -2314,10 +2356,6 @@ bool art_internal_validate(const art_t *art, const char **reason, if (first_free > capacity) { return art_validate_fail(&validator, "first_free > capacity"); } - size_t next_free = art_next_free(art, type, 0); - if (first_free != next_free) { - return art_validate_fail(&validator, "first_free != next_free"); - } } } return art_internal_validate_at(art, art->root, validator);