Skip to content

Commit

Permalink
Merge pull request #7992 from realm/je/optimize
Browse files Browse the repository at this point in the history
Optimize size of Node and Array
  • Loading branch information
jedelbo authored Aug 27, 2024
2 parents ba9b374 + 2b1c3d4 commit bc0a677
Show file tree
Hide file tree
Showing 15 changed files with 105 additions and 212 deletions.
9 changes: 0 additions & 9 deletions src/realm/array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -220,15 +220,6 @@ void Array::init_from_mem(MemRef mem) noexcept
update_width_cache_from_header();
}

void Array::update_from_parent() noexcept
{
REALM_ASSERT_DEBUG(is_attached());
ArrayParent* parent = get_parent();
REALM_ASSERT_DEBUG(parent);
ref_type new_ref = get_ref_from_parent();
init_from_ref(new_ref);
}

void Array::set_type(Type type)
{
REALM_ASSERT(is_attached());
Expand Down
8 changes: 1 addition & 7 deletions src/realm/array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,12 +126,6 @@ class Array : public Node, public ArrayParent {
init_from_ref(ref);
}

/// Called in the context of Group::commit() to ensure that attached
/// accessors stay valid across a commit. Please note that this works only
/// for non-transactional commits. Accessors obtained during a transaction
/// are always detached when the transaction ends.
void update_from_parent() noexcept;

/// Change the type of an already attached array node.
///
/// The effect of calling this function on an unattached accessor is
Expand Down Expand Up @@ -532,10 +526,10 @@ class Array : public Node, public ArrayParent {
Getter m_getter = nullptr; // cached to avoid indirection
const VTable* m_vtable = nullptr;

uint_least8_t m_width = 0; // Size of an element (meaning depend on type of array).
int64_t m_lbound; // min number that can be stored with current m_width
int64_t m_ubound; // max number that can be stored with current m_width

uint8_t m_width = 0; // Size of an element (meaning depend on type of array).
bool m_is_inner_bptree_node; // This array is an inner node of B+-tree.
bool m_has_refs; // Elements whose first bit is zero are refs to subarrays.
bool m_context_flag; // Meaning depends on context.
Expand Down
8 changes: 2 additions & 6 deletions src/realm/array_blobs_small.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,11 @@ void ArraySmallBlobs::init_from_mem(MemRef mem) noexcept
Array::init_from_mem(mem);
ref_type offsets_ref = get_as_ref(0);
ref_type blob_ref = get_as_ref(1);
ref_type nulls_ref = get_as_ref(2);

m_offsets.init_from_ref(offsets_ref);
m_blob.init_from_ref(blob_ref);

// In theory you could have an array that survived from ancient days where this array was not present
if (Array::size() > 2) {
ref_type nulls_ref = get_as_ref(2);
m_nulls.init_from_ref(nulls_ref);
}
m_nulls.init_from_ref(nulls_ref);
}

void ArraySmallBlobs::add(BinaryData value, bool add_zero_term)
Expand Down
10 changes: 0 additions & 10 deletions src/realm/array_blobs_small.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,6 @@ class ArraySmallBlobs : public Array {
/// initialization value).
static MemRef create_array(size_t size, Allocator&, BinaryData defaults);

void update_from_parent() noexcept;

private:
friend class ArrayString;
Array m_offsets;
Expand Down Expand Up @@ -258,14 +256,6 @@ inline size_t ArraySmallBlobs::get_size_from_header(const char* header, Allocato
return Array::get_size_from_header(offsets_header);
}

inline void ArraySmallBlobs::update_from_parent() noexcept
{
Array::update_from_parent();
m_blob.update_from_parent();
m_offsets.update_from_parent();
m_nulls.update_from_parent();
}

} // namespace realm

#endif // REALM_ARRAY_BINARY_HPP
2 changes: 1 addition & 1 deletion src/realm/cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ void Cluster::init(MemRef mem)

void Cluster::update_from_parent() noexcept
{
Array::update_from_parent();
Array::init_from_parent();
auto rot = Array::get_as_ref_or_tagged(0);
if (!rot.is_tagged()) {
m_keys.update_from_parent();
Expand Down
2 changes: 1 addition & 1 deletion src/realm/cluster_tree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ void ClusterNodeInner::init(MemRef mem)

void ClusterNodeInner::update_from_parent() noexcept
{
Array::update_from_parent();
Array::init_from_parent();
ref_type ref = Array::get_as_ref(s_key_ref_index);
if (ref) {
m_keys.update_from_parent();
Expand Down
4 changes: 2 additions & 2 deletions src/realm/group.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1163,8 +1163,8 @@ void Group::update_refs(ref_type top_ref) noexcept
m_top.init_from_ref(top_ref);

// Now we can update it's child arrays
m_table_names.update_from_parent();
m_tables.update_from_parent();
m_table_names.init_from_parent();
m_tables.init_from_parent();

// Update all attached table accessors.
for (auto& table_accessor : m_table_accessors) {
Expand Down
142 changes: 72 additions & 70 deletions src/realm/index_string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -930,39 +930,38 @@ void StringIndex::insert_row_list(size_t ref, size_t offset, StringData index_da
m_array->insert(ins_pos + 1, ref);
}


void StringIndex::TreeInsert(ObjKey obj_key, key_type key, size_t offset, StringData index_data, const Mixed& value)
void StringIndex::new_node(const NodeChange& nc)
{
NodeChange nc = do_insert(obj_key, key, offset, index_data, value);
StringIndex new_node(inner_node_tag(), m_array->get_alloc());
switch (nc.type) {
case NodeChange::change_None:
return;
break;
case NodeChange::change_InsertBefore: {
StringIndex new_node(inner_node_tag(), m_array->get_alloc());
new_node.node_add_key(nc.ref1);
new_node.node_add_key(get_ref());
m_array->init_from_ref(new_node.get_ref());
m_array->update_parent();
return;
break;
}
case NodeChange::change_InsertAfter: {
StringIndex new_node(inner_node_tag(), m_array->get_alloc());
new_node.node_add_key(get_ref());
new_node.node_add_key(nc.ref1);
m_array->init_from_ref(new_node.get_ref());
m_array->update_parent();
return;
break;
}
case NodeChange::change_Split: {
StringIndex new_node(inner_node_tag(), m_array->get_alloc());
new_node.node_add_key(nc.ref1);
new_node.node_add_key(nc.ref2);
m_array->init_from_ref(new_node.get_ref());
m_array->update_parent();
return;
break;
}
}
REALM_ASSERT(false); // LCOV_EXCL_LINE; internal Realm error
m_array->init_from_ref(new_node.get_ref());
m_array->update_parent();
}

void StringIndex::TreeInsert(ObjKey obj_key, key_type key, size_t offset, StringData index_data, const Mixed& value)
{
auto nc = do_insert(obj_key, key, offset, index_data, value);
if (nc.type != NodeChange::change_None) {
new_node(nc);
}
}


Expand Down Expand Up @@ -1155,58 +1154,61 @@ bool StringIndex::leaf_insert(ObjKey obj_key, key_type key, size_t offset, Strin
throw LogicError(ErrorCodes::LimitExceeded,
util::format("String of length %1 exceeds maximum string length of %2.", len, max));
}

// Get subnode table
Allocator& alloc = m_array->get_alloc();
Array keys(alloc);
get_child(*m_array, 0, keys);
REALM_ASSERT(m_array->size() == keys.size() + 1);
size_t ins_pos_refs; // first entry in refs points to offsets

// If we are keeping the complete string in the index
// we want to know if this is the last part
bool is_at_string_end = offset + 4 >= index_data.size();
{
// Get subnode table
Array keys(alloc);
get_child(*m_array, 0, keys);
REALM_ASSERT(m_array->size() == keys.size() + 1);

size_t ins_pos = keys.lower_bound_int(key);
size_t ins_pos_refs = ins_pos + 1; // first entry in refs points to offsets
// If we are keeping the complete string in the index
// we want to know if this is the last part
bool is_at_string_end = offset + 4 >= index_data.size();

if (ins_pos == keys.size()) {
if (noextend)
return false;
size_t ins_pos = keys.lower_bound_int(key);
ins_pos_refs = ins_pos + 1; // first entry in refs points to offsets

// When key is outside current range, we can just add it
keys.add(key);
if (!m_target_column.full_word() || is_at_string_end) {
int64_t shifted = int64_t((uint64_t(obj_key.value) << 1) + 1); // shift to indicate literal
m_array->add(shifted);
}
else {
// create subindex for rest of string
StringIndex subindex(m_target_column, m_array->get_alloc());
subindex.insert_with_offset(obj_key, index_data, value, offset + 4);
m_array->add(subindex.get_ref());
if (ins_pos == keys.size()) {
if (noextend)
return false;

// When key is outside current range, we can just add it
keys.add(key);
if (!m_target_column.full_word() || is_at_string_end) {
int64_t shifted = int64_t((uint64_t(obj_key.value) << 1) + 1); // shift to indicate literal
m_array->add(shifted);
}
else {
// create subindex for rest of string
StringIndex subindex(m_target_column, m_array->get_alloc());
subindex.insert_with_offset(obj_key, index_data, value, offset + 4);
m_array->add(subindex.get_ref());
}
return true;
}
return true;
}

key_type k = key_type(keys.get(ins_pos));
key_type k = key_type(keys.get(ins_pos));

// If key is not present we add it at the correct location
if (k != key) {
if (noextend)
return false;
// If key is not present we add it at the correct location
if (k != key) {
if (noextend)
return false;

keys.insert(ins_pos, key);
if (!m_target_column.full_word() || is_at_string_end) {
int64_t shifted = int64_t((uint64_t(obj_key.value) << 1) + 1); // shift to indicate literal
m_array->insert(ins_pos_refs, shifted);
}
else {
// create subindex for rest of string
StringIndex subindex(m_target_column, m_array->get_alloc());
subindex.insert_with_offset(obj_key, index_data, value, offset + 4);
m_array->insert(ins_pos_refs, subindex.get_ref());
keys.insert(ins_pos, key);
if (!m_target_column.full_word() || is_at_string_end) {
int64_t shifted = int64_t((uint64_t(obj_key.value) << 1) + 1); // shift to indicate literal
m_array->insert(ins_pos_refs, shifted);
}
else {
// create subindex for rest of string
StringIndex subindex(m_target_column, m_array->get_alloc());
subindex.insert_with_offset(obj_key, index_data, value, offset + 4);
m_array->insert(ins_pos_refs, subindex.get_ref());
}
return true;
}
return true;
}

// This leaf already has a slot for for the key
Expand Down Expand Up @@ -1266,28 +1268,28 @@ bool StringIndex::leaf_insert(ObjKey obj_key, key_type key, size_t offset, Strin
IntegerColumn sub(alloc, ref); // Throws
sub.set_parent(m_array.get(), ins_pos_refs);

IntegerColumn::const_iterator it_end = sub.cend();
IntegerColumn::const_iterator lower = it_end;
IntegerColumn::const_iterator lower = sub.cend();

auto value_exists_in_list = [&]() {
if (m_target_column.full_word()) {
lower = sub.cbegin();
return reconstruct_string(offset, key, index_data) == value.get_string();
}
bool value_exists_in_list = false;
if (m_target_column.full_word()) {
lower = sub.cbegin();
value_exists_in_list = reconstruct_string(offset, key, index_data) == value.get_string();
}
else {
SortedListComparator slc(m_target_column);
IntegerColumn::const_iterator it_end = lower;
lower = slc.find_start_of_unsorted(value, sub);

if (lower != it_end) {
Mixed lower_value = get(ObjKey(*lower));
if (lower_value == value) {
return true;
value_exists_in_list = true;
}
}
return false;
};
}

// If we found the value in this list, add the duplicate to the list.
if (value_exists_in_list()) {
if (value_exists_in_list) {
insert_to_existing_list_at_lower(obj_key, value, sub, lower);
}
else {
Expand Down
1 change: 1 addition & 0 deletions src/realm/index_string.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,7 @@ class StringIndex : public SearchIndex {
// B-Tree functions
void TreeInsert(ObjKey obj_key, key_type, size_t offset, StringData index_data, const Mixed& value);
NodeChange do_insert(ObjKey, key_type, size_t offset, StringData index_data, const Mixed& value);
void new_node(const NodeChange&);
/// Returns true if there is room or it can join existing entries
bool leaf_insert(ObjKey obj_key, key_type, size_t offset, StringData index_data, const Mixed& value,
bool noextend = false);
Expand Down
8 changes: 2 additions & 6 deletions src/realm/node.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,11 +243,7 @@ class Node : public NodeHeader {
void set_parent(ArrayParent* parent, size_t ndx_in_parent) noexcept
{
m_parent = parent;
m_ndx_in_parent = ndx_in_parent;
}
void set_ndx_in_parent(size_t ndx) noexcept
{
m_ndx_in_parent = ndx;
m_ndx_in_parent = unsigned(ndx_in_parent);
}

void clear_missing_parent_update()
Expand Down Expand Up @@ -339,7 +335,7 @@ class Node : public NodeHeader {
private:
friend class NodeTree;
ArrayParent* m_parent = nullptr;
size_t m_ndx_in_parent = 0; // Ignored if m_parent is null.
unsigned m_ndx_in_parent = 0; // Ignored if m_parent is null.
bool m_missing_parent_update = false;

void do_copy_on_write(size_t minimum_size = 0);
Expand Down
8 changes: 1 addition & 7 deletions src/realm/search_index.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,6 @@ class SearchIndex {
bool is_attached() const noexcept;
void set_parent(ArrayParent* parent, size_t ndx_in_parent) noexcept;
size_t get_ndx_in_parent() const noexcept;
void set_ndx_in_parent(size_t ndx_in_parent) noexcept;
void update_from_parent() noexcept;
void refresh_accessor_tree(const ClusterColumn& target_column);
ref_type get_ref() const noexcept;
Expand Down Expand Up @@ -174,14 +173,9 @@ inline size_t SearchIndex::get_ndx_in_parent() const noexcept
return m_root_array->get_ndx_in_parent();
}

inline void SearchIndex::set_ndx_in_parent(size_t ndx_in_parent) noexcept
{
m_root_array->set_ndx_in_parent(ndx_in_parent);
}

inline void SearchIndex::update_from_parent() noexcept
{
m_root_array->update_from_parent();
m_root_array->init_from_parent();
}

} // namespace realm
Expand Down
Loading

0 comments on commit bc0a677

Please sign in to comment.