From 731b7d298827df108891b8de2b7949b042a54d36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B8rgen=20Edelbo?= Date: Wed, 12 Jun 2024 11:02:34 +0200 Subject: [PATCH] Add compression of strings in Mixed, Lst and Dictionary --- src/realm/array_backlink.cpp | 4 +- src/realm/array_mixed.cpp | 77 ++++++++++++++++++--------------- src/realm/array_mixed.hpp | 10 +++++ src/realm/array_string.cpp | 47 ++++++++++++++------ src/realm/array_string.hpp | 6 +-- src/realm/bplustree.hpp | 24 ++++++++-- src/realm/cluster.cpp | 35 +++++---------- src/realm/cluster_tree.cpp | 2 +- src/realm/collection.hpp | 6 ++- src/realm/dictionary.cpp | 7 ++- src/realm/impl/array_writer.hpp | 2 + src/realm/list.cpp | 6 ++- src/realm/list.hpp | 4 ++ src/realm/obj.cpp | 68 +++++++++++++++++------------ src/realm/query_expression.hpp | 4 +- src/realm/set.hpp | 3 ++ src/realm/table.cpp | 44 ++++++++++--------- src/realm/table.hpp | 11 +++-- src/realm/to_json.cpp | 4 +- test/test_lang_bind_helper.cpp | 4 +- test/test_list.cpp | 10 ++++- 21 files changed, 226 insertions(+), 152 deletions(-) diff --git a/src/realm/array_backlink.cpp b/src/realm/array_backlink.cpp index bf4cfddb8da..4190a648a1b 100644 --- a/src/realm/array_backlink.cpp +++ b/src/realm/array_backlink.cpp @@ -225,12 +225,12 @@ void ArrayBacklink::verify() const REALM_ASSERT(src_obj.get(src_col_key).get_link() == target_link); } else if (val.is_type(type_List)) { - DummyParent parent(src_table, val.get_ref()); + DummyParent parent(src_table, val.get_ref(), src_col_key); Lst list(parent, 0); REALM_ASSERT(list.find_any(target_link) != npos); } else if (val.is_type(type_Dictionary)) { - DummyParent parent(src_table, val.get_ref()); + DummyParent parent(src_table, val.get_ref(), src_col_key); Dictionary dict(parent, 0); REALM_ASSERT(dict.find_any(target_link) != npos); } diff --git a/src/realm/array_mixed.cpp b/src/realm/array_mixed.cpp index 7d00991ad5b..f20429a92fa 100644 --- a/src/realm/array_mixed.cpp +++ b/src/realm/array_mixed.cpp @@ -360,9 +360,8 @@ ref_type ArrayMixed::typed_write(ref_type top_ref, _impl::ArrayWriterBase& out, 2. int and pair int arrays, they are used for storing integers, timestamps, floats, doubles, decimals, links. In general we can compress them, but we need to be careful, controlling the col_type should prevent compressing data that we want to leave in the current format. - 3. string array is for strings and binary data (no compression for now) - 4. ref array is actually storing refs to collections. they can only be BPlusTree or - BPlusTree. + 3. string array is for strings and binary data + 4. ref array is actually storing refs to collections. They can only be Lst or Dictionary. 5. key array stores unique identifiers for collections in mixed (integers that can be compressed) */ Array composite(alloc); @@ -372,41 +371,48 @@ ref_type ArrayMixed::typed_write(ref_type top_ref, _impl::ArrayWriterBase& out, auto ref = top.get(i); ref_type new_ref = ref; if (ref && !(out.only_modified && alloc.is_read_only(ref))) { - if (i < 3) { // int, and pair_int - // integer arrays - new_ref = Array::write(ref, alloc, out, out.only_modified, out.compress); - } - else if (i == 4) { // collection in mixed - ArrayRef arr_ref(alloc); - arr_ref.init_from_ref(ref); - auto ref_sz = arr_ref.size(); - TempArray written_ref_leaf(ref_sz); - - for (size_t k = 0; k < ref_sz; k++) { - ref_type new_sub_ref = 0; - if (auto sub_ref = arr_ref.get(k)) { - auto header = alloc.translate(sub_ref); - // Now we have to find out if the nested collection is a - // dictionary or a list. If the top array has a size of 2 - // and it is not a BplusTree inner node, then it is a dictionary - if (NodeHeader::get_size_from_header(header) == 2 && - !NodeHeader::get_is_inner_bptree_node_from_header(header)) { - new_sub_ref = Dictionary::typed_write(sub_ref, out, alloc); - } - else { - new_sub_ref = BPlusTree::typed_write(sub_ref, out, alloc); + switch (i) { + case payload_idx_int: + // integer array + new_ref = Array::write(ref, alloc, out, out.only_modified, out.compress); + break; + case payload_idx_pair: + // integer array + new_ref = Array::write(ref, alloc, out, out.only_modified, out.compress); + break; + case payload_idx_str: + new_ref = ArrayString::typed_write(ref, out, alloc); + break; + case payload_idx_ref: { + // collection in mixed + ArrayRef arr_ref(alloc); + arr_ref.init_from_ref(ref); + auto ref_sz = arr_ref.size(); + TempArray written_ref_leaf(ref_sz); + + for (size_t k = 0; k < ref_sz; k++) { + ref_type new_sub_ref = 0; + if (auto sub_ref = arr_ref.get(k)) { + auto header = alloc.translate(sub_ref); + // Now we have to find out if the nested collection is a + // dictionary or a list. If the top array has a size of 2 + // and it is not a BplusTree inner node, then it is a dictionary + if (NodeHeader::get_size_from_header(header) == 2 && + !NodeHeader::get_is_inner_bptree_node_from_header(header)) { + new_sub_ref = Dictionary::typed_write(sub_ref, out, alloc); + } + else { + new_sub_ref = BPlusTree::typed_write(sub_ref, out, alloc); + } } + written_ref_leaf.set_as_ref(k, new_sub_ref); } - written_ref_leaf.set_as_ref(k, new_sub_ref); + new_ref = written_ref_leaf.write(out); + break; } - new_ref = written_ref_leaf.write(out); - } - else if (i == 5) { // unique keys associated to collections in mixed - new_ref = Array::write(ref, alloc, out, out.only_modified, out.compress); - } - else { - // all the rest we don't want to compress it, at least for now (strings will be needed) - new_ref = Array::write(ref, alloc, out, out.only_modified, false); + case payload_idx_key: + new_ref = Array::write(ref, alloc, out, out.only_modified, out.compress); + break; } } written_leaf.set(i, new_ref); @@ -451,6 +457,7 @@ void ArrayMixed::ensure_string_array() const m_strings.create(); m_strings.update_parent(); } + m_strings.set_string_interner(m_string_interner); } } diff --git a/src/realm/array_mixed.hpp b/src/realm/array_mixed.hpp index a0de93b8339..04025ef33a3 100644 --- a/src/realm/array_mixed.hpp +++ b/src/realm/array_mixed.hpp @@ -64,6 +64,15 @@ class ArrayMixed : public ArrayPayload, private Array { { Array::set_parent(parent, ndx_in_parent); } + bool need_string_interner() const override + { + return true; + } + virtual void set_string_interner(StringInterner* interner) const + { + m_string_interner = interner; + } + void init_from_parent() { ref_type ref = get_ref_from_parent(); @@ -135,6 +144,7 @@ class ArrayMixed : public ArrayPayload, private Array { mutable ArrayString m_strings; // Used to store nested collection refs mutable ArrayRef m_refs; + mutable StringInterner* m_string_interner = nullptr; DataType get_type(size_t ndx) const { diff --git a/src/realm/array_string.cpp b/src/realm/array_string.cpp index cb2aa6fb3f5..fac7e3bb667 100644 --- a/src/realm/array_string.cpp +++ b/src/realm/array_string.cpp @@ -18,7 +18,7 @@ #include #include -#include +#include #include using namespace realm; @@ -536,18 +536,39 @@ void ArrayString::verify() const #endif } -ref_type ArrayString::write(_impl::ArrayWriterBase& out, StringInterner* interner) +template <> +ref_type ArrayString::typed_write(ref_type ref, _impl::ArrayWriterBase& out, Allocator& alloc) { - REALM_ASSERT(interner); - // we have to write out all, modified or not, to match the total cleanup - Array interned(Allocator::get_default()); - auto sz = size(); - interned.create(NodeHeader::type_Normal, true, sz); - for (size_t i = 0; i < sz; ++i) { - interned.set(i, interner->intern(get(i))); + Array leaf(alloc); + leaf.init_from_ref(ref); + ref_type ret_val; + auto header = leaf.get_header(); + if (NodeHeader::get_hasrefs_from_header(header) || + NodeHeader::get_wtype_from_header(header) == NodeHeader::wtype_Multiply) { + // We're interning these strings + ArrayString as(alloc); + as.init_from_ref(ref); + StringInterner* interner = out.table->get_string_interner(out.col_key); + auto sz = as.size(); + Array interned(Allocator::get_default()); + interned.create(NodeHeader::type_Normal, true, sz); + for (size_t i = 0; i < sz; ++i) { + interned.set(i, interner->intern(as.get(i))); + } + ret_val = interned.write(out, false, false, out.compress); + interned.destroy(); + // in a transactional setting: + // Destroy all sub-arrays if present, in order to release memory in file + // This is contrary to the rest of the handling in this function, but needed + // here since sub-arrays may not have been COW'ed and therefore not freed in file. + // We rely on 'only_modified' to indicate that we're in a transactional setting. + if (out.only_modified) + leaf.destroy_deep(true); + } + else { + // whether it's the old enum strings or the new interned strings, + // just write out the array using integer leaf compression + ret_val = leaf.write(out, false, out.only_modified, out.compress); } - auto retval = interned.write(out, false, false, out.compress); - interned.destroy(); - return retval; - // return m_arr->write(out, true, false, false); + return ret_val; } diff --git a/src/realm/array_string.hpp b/src/realm/array_string.hpp index df121c50b2c..6c8400c4055 100644 --- a/src/realm/array_string.hpp +++ b/src/realm/array_string.hpp @@ -126,10 +126,8 @@ class ArrayString : public ArrayPayload { static StringData get(const char* header, size_t ndx, Allocator& alloc) noexcept; void verify() const; - // Write to 'out', if needed using 'interner' to intern any strings. - // An interner of 0 will disable interning. Interned values may be further - // compressed using leaf compression for integer arrays. - ref_type write(_impl::ArrayWriterBase& out, StringInterner* interner); + template + static ref_type typed_write(ref_type ref, T& out, Allocator& alloc); private: static constexpr size_t small_string_max_size = 15; // ArrayStringShort diff --git a/src/realm/bplustree.hpp b/src/realm/bplustree.hpp index 1f78d32ac26..7b8fec8ce93 100644 --- a/src/realm/bplustree.hpp +++ b/src/realm/bplustree.hpp @@ -30,6 +30,7 @@ namespace realm { class BPlusTreeBase; class BPlusTreeInner; +class StringInterner; /*****************************************************************************/ /* BPlusTreeNode */ @@ -207,6 +208,16 @@ class BPlusTreeBase { m_root->bp_set_parent(parent, ndx_in_parent); } + void set_interner(StringInterner* interner) + { + m_interner = interner; + } + + StringInterner* get_interner() + { + return m_interner; + } + virtual void erase(size_t) = 0; virtual void clear() = 0; virtual void swap(size_t, size_t) = 0; @@ -234,6 +245,7 @@ class BPlusTreeBase { std::unique_ptr m_root; Allocator& m_alloc; ArrayParent* m_parent = nullptr; + StringInterner* m_interner = nullptr; size_t m_ndx_in_parent = 0; size_t m_size = 0; size_t m_cached_leaf_begin; @@ -300,6 +312,9 @@ class BPlusTree : public BPlusTreeBase { void init_from_ref(ref_type ref) noexcept override { LeafArray::init_from_ref(ref); + if constexpr (realm::is_any_v) { + LeafArray::set_string_interner(m_tree->get_interner()); + } } ref_type get_ref() const override @@ -574,13 +589,16 @@ class BPlusTree : public BPlusTreeBase { std::unique_ptr create_leaf_node() override { - std::unique_ptr leaf = std::make_unique(this); - static_cast(leaf.get())->create(); + auto leaf = std::make_unique(this); + leaf->create(); + if constexpr (realm::is_any_v) { + leaf->set_string_interner(m_interner); + } return leaf; } std::unique_ptr init_leaf_node(ref_type ref) override { - std::unique_ptr leaf = std::make_unique(this); + auto leaf = std::make_unique(this); leaf->init_from_ref(ref); return leaf; } diff --git a/src/realm/cluster.cpp b/src/realm/cluster.cpp index 75deb0707c2..6edec52a9e2 100644 --- a/src/realm/cluster.cpp +++ b/src/realm/cluster.cpp @@ -261,6 +261,12 @@ inline void Cluster::set_string_interner(ArrayString& arr, ColKey col_key) const m_tree_top.set_string_interner(arr, col_key); } +template <> +inline void Cluster::set_string_interner(ArrayMixed& arr, ColKey col_key) const +{ + m_tree_top.set_string_interner(arr, col_key); +} + template inline void Cluster::set_spec(T&, ColKey::Idx) const { @@ -314,6 +320,7 @@ inline void Cluster::do_insert_mixed(size_t ndx, ColKey col_key, Mixed init_valu { ArrayMixed arr(m_alloc); arr.set_parent(this, col_key.get_index().val + s_first_col_index); + set_string_interner(arr, col_key); arr.init_from_parent(); arr.insert(ndx, init_value); @@ -798,6 +805,7 @@ inline void Cluster::do_erase_mixed(size_t ndx, ColKey col_key, ObjKey key, Casc ArrayMixed values(m_alloc); values.set_parent(this, col_ndx.val + s_first_col_index); + set_string_interner(values, col_key); values.init_from_parent(); Mixed value = values.get(ndx); @@ -1447,6 +1455,7 @@ void Cluster::dump_objects(int64_t key_offset, std::string lead) const } case col_type_Mixed: { ArrayMixed arr(m_alloc); + set_string_interner(arr, col); ref_type ref = Array::get_as_ref(j); arr.init_from_ref(ref); std::cout << ", " << arr.get(i); @@ -1651,32 +1660,8 @@ ref_type Cluster::typed_write(ref_type ref, _impl::ArrayWriterBase& out) const else { // Columns auto col_key = out.table->m_leaf_ndx2colkey[j - 1]; + out.col_key = col_key; auto col_type = col_key.get_type(); - // String columns are interned at this point - if (out.compress && col_type == col_type_String && !col_key.is_collection()) { - ArrayRef leaf(m_alloc); - leaf.init_from_ref(ref); - auto header = leaf.get_header(); - if (NodeHeader::get_hasrefs_from_header(header) || - NodeHeader::get_wtype_from_header(header) == wtype_Multiply) { - // We're interning these strings - ArrayString as(m_alloc); - as.init_from_ref(leaf_rot.get_as_ref()); - written_cluster.set_as_ref(j, as.write(out, out.table->get_string_interner(col_key))); - // in a transactional setting: - // Destroy all sub-arrays if present, in order to release memory in file - // This is contrary to the rest of the handling in this function, but needed - // here since sub-arrays may not have been COW'ed and therefore not freed in file. - // We rely on 'only_modified' to indicate that we're in a transactional setting. - if (only_modified) - leaf.destroy_deep(true); - continue; - } - // whether it's the old enum strings or the new interned strings, - // just write out the array using integer leaf compression - written_cluster.set_as_ref(j, leaf.write(out, false, false, false)); - continue; - } if (col_key.is_collection()) { ArrayRef arr_ref(m_alloc); arr_ref.init_from_ref(ref); diff --git a/src/realm/cluster_tree.cpp b/src/realm/cluster_tree.cpp index 3021f684911..5821a9f465b 100644 --- a/src/realm/cluster_tree.cpp +++ b/src/realm/cluster_tree.cpp @@ -1140,7 +1140,7 @@ void ClusterTree::set_string_interner(ArrayPayload& arr, ColKey col_key) const // Check for owner. This function may be called in context of DictionaryClusterTree // in which case m_owner is null (and spec never needed). if (m_owner) { - arr.set_string_interner(_impl::TableFriend::get_string_interner(*m_owner, col_key)); + arr.set_string_interner(m_owner->get_string_interner(col_key)); } } diff --git a/src/realm/collection.hpp b/src/realm/collection.hpp index e26158a63ee..c4278cdbf5b 100644 --- a/src/realm/collection.hpp +++ b/src/realm/collection.hpp @@ -18,9 +18,10 @@ struct CollectionIterator; // Used in Cluster when removing owning object class DummyParent : public CollectionParent { public: - DummyParent(TableRef t, ref_type ref) + DummyParent(TableRef t, ref_type ref, ColKey ck) : m_obj(t, MemRef(), ObjKey(), 0) , m_ref(ref) + , m_col_key(ck) { } FullPath get_path() const noexcept final @@ -37,7 +38,7 @@ class DummyParent : public CollectionParent { } ColKey get_col_key() const noexcept final { - return {}; + return m_col_key; } void add_index(Path&, const Index&) const noexcept final {} size_t find_index(const Index&) const noexcept final @@ -62,6 +63,7 @@ class DummyParent : public CollectionParent { protected: Obj m_obj; ref_type m_ref; + ColKey m_col_key; UpdateStatus update_if_needed() const final { return UpdateStatus::Updated; diff --git a/src/realm/dictionary.cpp b/src/realm/dictionary.cpp index 7c9f3b76801..03d1d5be024 100644 --- a/src/realm/dictionary.cpp +++ b/src/realm/dictionary.cpp @@ -850,9 +850,11 @@ UpdateStatus Dictionary::init_from_parent(bool allow_create) const Allocator& alloc = get_alloc(); m_dictionary_top.reset(new Array(alloc)); m_dictionary_top->set_parent(const_cast(this), 0); + StringInterner* interner = m_col_key ? get_table()->get_string_interner(m_col_key) : nullptr; switch (m_key_type) { case type_String: { m_keys.reset(new BPlusTree(alloc)); + m_keys->set_interner(interner); break; } case type_Int: { @@ -865,6 +867,7 @@ UpdateStatus Dictionary::init_from_parent(bool allow_create) const m_keys->set_parent(m_dictionary_top.get(), 0); m_values.reset(new BPlusTreeMixed(alloc)); m_values->set_parent(m_dictionary_top.get(), 1); + m_values->set_interner(interner); } if (ref) { @@ -1153,12 +1156,12 @@ void Dictionary::to_json(std::ostream& out, JSONOutputMode output_mode, fn(val); } else if (val.is_type(type_Dictionary)) { - DummyParent parent(this->get_table(), val.get_ref()); + DummyParent parent(this->get_table(), val.get_ref(), m_col_key); Dictionary dict(parent, 0); dict.to_json(out, output_mode, fn); } else if (val.is_type(type_List)) { - DummyParent parent(this->get_table(), val.get_ref()); + DummyParent parent(this->get_table(), val.get_ref(), m_col_key); Lst list(parent, 0); list.to_json(out, output_mode, fn); } diff --git a/src/realm/impl/array_writer.hpp b/src/realm/impl/array_writer.hpp index 4096805e0fa..c6a7e18e413 100644 --- a/src/realm/impl/array_writer.hpp +++ b/src/realm/impl/array_writer.hpp @@ -20,6 +20,7 @@ #define REALM_ARRAY_WRITER_HPP #include +#include namespace realm { class Table; @@ -30,6 +31,7 @@ class ArrayWriterBase { bool only_modified = true; bool compress = true; const Table* table; + ColKey col_key; virtual ~ArrayWriterBase() { } diff --git a/src/realm/list.cpp b/src/realm/list.cpp index d3f94bd895a..4d35e033cbf 100644 --- a/src/realm/list.cpp +++ b/src/realm/list.cpp @@ -386,6 +386,8 @@ UpdateStatus Lst::init_from_parent(bool allow_create) const m_tree.reset(new BPlusTreeMixed(get_alloc())); const ArrayParent* parent = this; m_tree->set_parent(const_cast(parent), 0); + if (m_col_key) + m_tree->set_interner(get_table()->get_string_interner(m_col_key)); } try { return do_init_from_parent(m_tree.get(), Base::get_collection_ref(), allow_create); @@ -744,12 +746,12 @@ void Lst::to_json(std::ostream& out, JSONOutputMode output_mode, fn(val); } else if (val.is_type(type_Dictionary)) { - DummyParent parent(this->get_table(), val.get_ref()); + DummyParent parent(this->get_table(), val.get_ref(), m_col_key); Dictionary dict(parent, i); dict.to_json(out, output_mode, fn); } else if (val.is_type(type_List)) { - DummyParent parent(this->get_table(), val.get_ref()); + DummyParent parent(this->get_table(), val.get_ref(), m_col_key); Lst list(parent, i); list.to_json(out, output_mode, fn); } diff --git a/src/realm/list.hpp b/src/realm/list.hpp index f0646d2a176..398988e02bc 100644 --- a/src/realm/list.hpp +++ b/src/realm/list.hpp @@ -258,6 +258,10 @@ class Lst final : public CollectionBaseImpl { m_tree.reset(new BPlusTree(get_alloc())); const ArrayParent* parent = this; m_tree->set_parent(const_cast(parent), 0); + if constexpr (realm::is_any_v) { + if (m_col_key) + m_tree->set_interner(get_table()->get_string_interner(m_col_key)); + } } Base::update_content_version(); return do_init_from_parent(m_tree.get(), Base::get_collection_ref(), allow_create); diff --git a/src/realm/obj.cpp b/src/realm/obj.cpp index fc34b755d57..3f3e23b03bf 100644 --- a/src/realm/obj.cpp +++ b/src/realm/obj.cpp @@ -239,13 +239,15 @@ bool Obj::compare_list_in_mixed(Lst& val1, Lst& val2, ColKey ck, O auto m1 = val1.get_any(i); auto m2 = val2.get_any(i); + auto other_table = other.get_table(); + auto other_col_key = other_table->get_column_key(col_name); if (m1.is_type(type_List) && m2.is_type(type_List)) { - DummyParent parent(other.get_table(), m2.get_ref()); + DummyParent parent(other_table, m2.get_ref(), other_col_key); Lst list(parent, 0); return compare_list_in_mixed(*val1.get_list(i), list, ck, other, col_name); } else if (m1.is_type(type_Dictionary) && m2.is_type(type_Dictionary)) { - DummyParent parent(other.get_table(), m2.get_ref()); + DummyParent parent(other_table, m2.get_ref(), other_col_key); Dictionary dict(parent, 0); return compare_dict_in_mixed(*val1.get_dictionary(i), dict, ck, other, col_name); } @@ -268,13 +270,15 @@ bool Obj::compare_dict_in_mixed(Dictionary& val1, Dictionary& val2, ColKey ck, O if (k1 != k2) return false; + auto other_table = other.get_table(); + auto other_col_key = other_table->get_column_key(col_name); if (m1.is_type(type_List) && m2.is_type(type_List)) { - DummyParent parent(other.get_table(), m2.get_ref()); + DummyParent parent(other_table, m2.get_ref(), other_col_key); Lst list(parent, 0); return compare_list_in_mixed(*val1.get_list(k1.get_string()), list, ck, other, col_name); } else if (m1.is_type(type_Dictionary) && m2.is_type(type_Dictionary)) { - DummyParent parent(other.get_table(), m2.get_ref()); + DummyParent parent(other_table, m2.get_ref(), other_col_key); Dictionary dict(parent, 0); return compare_dict_in_mixed(*val1.get_dictionary(k1.get_string()), dict, ck, other, col_name); } @@ -495,6 +499,7 @@ Mixed Obj::get_unfiltered_mixed(ColKey::Idx col_ndx) const ArrayMixed values(get_alloc()); ref_type ref = to_ref(Array::get(m_mem.get_addr(), col_ndx.val + 1)); values.init_from_ref(ref); + values.set_string_interner(m_table->get_string_interner(col_ndx)); return values.get(m_row_ndx); } @@ -1168,6 +1173,35 @@ REALM_FORCEINLINE void Obj::sync(Node& arr) } } +// helper functions for filtering out calls to set_string_interner() +template +inline void Obj::set_string_interner(T&, ColKey) +{ +} +template <> +inline void Obj::set_string_interner(ArrayString& values, ColKey col_key) +{ + values.set_string_interner(m_table->get_string_interner(col_key)); +} +template <> +inline void Obj::set_string_interner(ArrayMixed& values, ColKey col_key) +{ + values.set_string_interner(m_table->get_string_interner(col_key)); +} + +// helper functions for filtering out calls to set_spec() +template +inline void Obj::set_spec(T&, ColKey) +{ +} +template <> +inline void Obj::set_spec(ArrayString& values, ColKey col_key) +{ + size_t spec_ndx = m_table->colkey2spec_ndx(col_key); + Spec* spec = const_cast(&get_spec()); + values.set_spec(spec, spec_ndx); +} + template <> Obj& Obj::set(ColKey col_key, Mixed value, bool is_default) { @@ -1225,6 +1259,7 @@ Obj& Obj::set(ColKey col_key, Mixed value, bool is_default) REALM_ASSERT(col_ndx.val + 1 < fields.size()); ArrayMixed values(alloc); values.set_parent(&fields, col_ndx.val + 1); + set_string_interner(values, col_key); values.init_from_parent(); values.set(m_row_ndx, value); if (value.is_type(type_Dictionary, type_List)) { @@ -1369,6 +1404,7 @@ Obj& Obj::add_int(ColKey col_key, int64_t value) if (col_key.get_type() == col_type_Mixed) { ArrayMixed values(alloc); values.set_parent(&fields, col_ndx.val + 1); + set_string_interner(values, col_key); values.init_from_parent(); Mixed old = values.get(m_row_ndx); if (old.is_type(type_Int)) { @@ -1603,30 +1639,6 @@ inline void check_range(const BinaryData& val) } } // namespace -// helper functions for filtering out calls to set_string_interner() -template -inline void Obj::set_string_interner(T&, ColKey) -{ -} -template <> -inline void Obj::set_string_interner(ArrayString& values, ColKey col_key) -{ - values.set_string_interner(m_table->get_string_interner(col_key)); -} - -// helper functions for filtering out calls to set_spec() -template -inline void Obj::set_spec(T&, ColKey) -{ -} -template <> -inline void Obj::set_spec(ArrayString& values, ColKey col_key) -{ - size_t spec_ndx = m_table->colkey2spec_ndx(col_key); - Spec* spec = const_cast(&get_spec()); - values.set_spec(spec, spec_ndx); -} - #if REALM_ENABLE_GEOSPATIAL template <> diff --git a/src/realm/query_expression.hpp b/src/realm/query_expression.hpp index 24b43af8c66..a4f21179755 100644 --- a/src/realm/query_expression.hpp +++ b/src/realm/query_expression.hpp @@ -2626,12 +2626,12 @@ class SizeOperator : public Subexpr2 { destination.set(i, int64_t(elem.get_string().size())); } else if (elem.is_type(type_List)) { - DummyParent parent(m_expr->get_base_table().cast_away_const(), elem.get_ref()); + DummyParent parent(m_expr->get_base_table().cast_away_const(), elem.get_ref(), ColKey()); Lst list(parent, 0); destination.set(i, int64_t(list.size())); } else if (elem.is_type(type_Dictionary)) { - DummyParent parent(m_expr->get_base_table().cast_away_const(), elem.get_ref()); + DummyParent parent(m_expr->get_base_table().cast_away_const(), elem.get_ref(), ColKey()); Dictionary dict(parent, 0); destination.set(i, int64_t(dict.size())); } diff --git a/src/realm/set.hpp b/src/realm/set.hpp index e3d7fac3d60..dd42bfd26d1 100644 --- a/src/realm/set.hpp +++ b/src/realm/set.hpp @@ -532,6 +532,9 @@ UpdateStatus Set::init_from_parent(bool allow_create) const m_tree.reset(new BPlusTree(get_alloc())); const ArrayParent* parent = this; m_tree->set_parent(const_cast(parent), 0); + if constexpr (realm::is_any_v) { + m_tree->set_interner(get_table()->get_string_interner(m_col_key)); + } } return do_init_from_parent(m_tree.get(), Base::get_collection_ref(), allow_create); } diff --git a/src/realm/table.cpp b/src/realm/table.cpp index 977339ade0d..04eb6920074 100644 --- a/src/realm/table.cpp +++ b/src/realm/table.cpp @@ -264,6 +264,11 @@ using namespace realm::util; Replication* Table::g_dummy_replication = nullptr; +static inline bool needs_string_interner(ColKey col_key) +{ + return col_key.get_type() == col_type_String || col_key.get_type() == col_type_Mixed || col_key.is_dictionary(); +} + bool TableVersions::operator==(const TableVersions& other) const { if (size() != other.size()) @@ -540,9 +545,6 @@ void Table::remove_column(ColKey col_key) erase_root_column(col_key); // Throws m_has_any_embedded_objects.reset(); - auto i = col_key.get_index().val; - if (i < m_string_interners.size() && m_string_interners[i]) - m_string_interners[i].reset(); } @@ -1070,19 +1072,18 @@ ColKey Table::do_insert_root_column(ColKey col_key, ColumnType type, StringData if (m_tombstones) { m_tombstones->insert_column(col_key); } - // create string interners internal rep as well as data area - REALM_ASSERT_DEBUG(m_interner_data.is_attached()); - while (col_ndx >= m_string_interners.size()) { - m_string_interners.push_back({}); - } - while (col_ndx >= m_interner_data.size()) { - m_interner_data.add(0); + if (needs_string_interner(col_key)) { + // create string interners internal rep as well as data area + REALM_ASSERT_DEBUG(m_interner_data.is_attached()); + while (col_ndx >= m_string_interners.size()) { + m_string_interners.push_back({}); + } + while (col_ndx >= m_interner_data.size()) { + m_interner_data.add(0); + } + REALM_ASSERT(!m_string_interners[col_ndx]); + m_string_interners[col_ndx] = std::make_unique(m_alloc, m_interner_data, col_key, true); } - REALM_ASSERT(!m_string_interners[col_ndx]); - // FIXME: Limit creation of interners to EXACTLY the columns, where they can be - // relevant. - // if (col_key.get_type() == col_type_String) - m_string_interners[col_ndx] = std::make_unique(m_alloc, m_interner_data, col_key, true); bump_storage_version(); return col_key; @@ -1114,8 +1115,7 @@ void Table::do_erase_root_column(ColKey col_key) REALM_ASSERT(m_index_accessors.back() == nullptr); m_index_accessors.pop_back(); } - REALM_ASSERT_DEBUG(col_ndx < m_string_interners.size()); - if (m_string_interners[col_ndx]) { + if (col_ndx < m_string_interners.size() && m_string_interners[col_ndx]) { REALM_ASSERT_DEBUG(m_interner_data.is_attached()); REALM_ASSERT_DEBUG(col_ndx < m_interner_data.size()); auto data_ref = m_interner_data.get_as_ref(col_ndx); @@ -2231,6 +2231,9 @@ void Table::refresh_string_interners(bool writable) m_string_interners[idx].reset(); continue; } + if (!needs_string_interner(col_key)) + continue; + REALM_ASSERT_DEBUG(col_key.get_index().val == idx); // maintain sufficient size of interner arrays to cover all columns while (idx >= m_string_interners.size()) { @@ -3525,11 +3528,10 @@ void Table::typed_print(std::string prefix, ref_type ref) const std::cout << prefix << "}" << std::endl; } -StringInterner* Table::get_string_interner(ColKey col_key) const +StringInterner* Table::get_string_interner(ColKey::Idx idx) const { - auto idx = col_key.get_index().val; - REALM_ASSERT(idx < m_string_interners.size()); - auto interner = m_string_interners[idx].get(); + REALM_ASSERT(idx.val < m_string_interners.size()); + auto interner = m_string_interners[idx.val].get(); REALM_ASSERT(interner); return interner; } diff --git a/src/realm/table.hpp b/src/realm/table.hpp index 1f02e0540ac..3635d265835 100644 --- a/src/realm/table.hpp +++ b/src/realm/table.hpp @@ -573,7 +573,11 @@ class Table { ColKey::Idx spec_ndx2leaf_ndx(size_t idx) const; ColKey leaf_ndx2colkey(ColKey::Idx idx) const; ColKey spec_ndx2colkey(size_t ndx) const; - StringInterner* get_string_interner(ColKey col_key) const; + StringInterner* get_string_interner(ColKey::Idx idx) const; + StringInterner* get_string_interner(ColKey col_key) const + { + return get_string_interner(col_key.get_index()); + } // Queries // Using where(tv) is the new method to perform queries on TableView. The 'tv' can have any order; it does not // need to be sorted, and, resulting view retains its order. @@ -1417,11 +1421,6 @@ class _impl::TableFriend { return table.m_spec; } - static StringInterner* get_string_interner(const Table& table, ColKey col_key) - { - return table.get_string_interner(col_key); - } - static TableRef get_opposite_link_table(const Table& table, ColKey col_key); static Group* get_parent_group(const Table& table) noexcept diff --git a/src/realm/to_json.cpp b/src/realm/to_json.cpp index e9b9b049d72..ed1a11839d0 100644 --- a/src/realm/to_json.cpp +++ b/src/realm/to_json.cpp @@ -297,12 +297,12 @@ void Obj::to_json(std::ostream& out, JSONOutputMode output_mode) const print_link(val); } else if (val.is_type(type_Dictionary)) { - DummyParent parent(m_table, val.get_ref()); + DummyParent parent(m_table, val.get_ref(), ck); Dictionary dict(parent, 0); dict.to_json(out, output_mode, print_link); } else if (val.is_type(type_List)) { - DummyParent parent(m_table, val.get_ref()); + DummyParent parent(m_table, val.get_ref(), ck); Lst list(parent, 0); list.to_json(out, output_mode, print_link); } diff --git a/test/test_lang_bind_helper.cpp b/test/test_lang_bind_helper.cpp index 7bdd650d225..04467f781d0 100644 --- a/test/test_lang_bind_helper.cpp +++ b/test/test_lang_bind_helper.cpp @@ -521,7 +521,7 @@ TEST(LangBindHelper_AdvanceReadTransact_Basics) rt->verify(); CHECK_EQUAL(0, rt->size()); - // Create a table via the other SharedGroup + // Create a table in a separate transaction ObjKey k0; { WriteTransaction wt(sg); @@ -542,7 +542,7 @@ TEST(LangBindHelper_AdvanceReadTransact_Basics) CHECK_EQUAL(0, foo->get_object(k0).get(cols[0])); uint_fast64_t version = foo->get_content_version(); - // Modify the table via the other SharedGroup + // Modify the table in a separate transaction ObjKey k1; { WriteTransaction wt(sg); diff --git a/test/test_list.cpp b/test/test_list.cpp index d8e3f1fc1de..f3c5dd0b938 100644 --- a/test/test_list.cpp +++ b/test/test_list.cpp @@ -108,9 +108,12 @@ TEST(List_basic) TEST(List_SimpleTypes) { - Group g; + SHARED_GROUP_TEST_PATH(path); + DBRef db = DB::create(make_in_realm_history(), path); + + auto tr = db->start_write(); std::vector lists; - TableRef t = g.add_table("table"); + TableRef t = tr->add_table("table"); ColKey int_col = t->add_column_list(type_Int, "integers"); ColKey bool_col = t->add_column_list(type_Bool, "booleans"); ColKey string_col = t->add_column_list(type_String, "strings"); @@ -135,6 +138,9 @@ TEST(List_SimpleTypes) Timestamp(seconds_since_epoc + 60, 0)}; obj.set_list_values(timestamp_col, timestamp_vector); + tr->commit_and_continue_as_read(); + tr->promote_to_write(); + auto int_list = obj.get_list(int_col); lists.push_back(&int_list); std::vector vec(int_list.size());