diff --git a/src/realm/array.cpp b/src/realm/array.cpp index b95d081f4d5..7486c8092c9 100644 --- a/src/realm/array.cpp +++ b/src/realm/array.cpp @@ -314,18 +314,6 @@ void Array::destroy_children(size_t offset, bool ro_only) noexcept } } -// size_t Array::get_byte_size() const noexcept -//{ -// const auto header = get_header(); -// auto num_bytes = get_byte_size_from_header(header); -// auto read_only = m_alloc.is_read_only(m_ref) == true; -// auto capacity = get_capacity_from_header(header); -// auto bytes_ok = num_bytes <= capacity; -// REALM_ASSERT(read_only || bytes_ok); -// REALM_ASSERT_7(m_alloc.is_read_only(m_ref), ==, true, ||, num_bytes, <=, get_capacity_from_header(header)); -// return num_bytes; -// } - ref_type Array::do_write_shallow(_impl::ArrayWriterBase& out) const { // here we might want to compress the array and write down. @@ -607,14 +595,6 @@ void Array::do_ensure_minimum_width(int_fast64_t value) } } -size_t Array::size() const noexcept -{ - // in case the array is in compressed format. Never read directly - // from the header the size, since it will result very likely in a cache miss. - // For compressed arrays m_size should always be kept updated, due to init_from_mem - return m_size; -} - bool Array::compress_array(Array& arr) const { if (m_integer_compressor.get_encoding() == NodeHeader::Encoding::WTypBits) { diff --git a/src/realm/array.hpp b/src/realm/array.hpp index 0611068bd12..047349846f8 100644 --- a/src/realm/array.hpp +++ b/src/realm/array.hpp @@ -210,8 +210,6 @@ class Array : public Node, public ArrayParent { update_width_cache_from_header(); } - size_t size() const noexcept; - bool is_empty() const noexcept { return size() == 0; diff --git a/src/realm/array_string.cpp b/src/realm/array_string.cpp index cb2aa6fb3f5..8731c99fac9 100644 --- a/src/realm/array_string.cpp +++ b/src/realm/array_string.cpp @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -549,5 +550,4 @@ ref_type ArrayString::write(_impl::ArrayWriterBase& out, StringInterner* interne auto retval = interned.write(out, false, false, out.compress); interned.destroy(); return retval; - // return m_arr->write(out, true, false, false); } diff --git a/src/realm/node.hpp b/src/realm/node.hpp index 21ee61eddde..9b684f25246 100644 --- a/src/realm/node.hpp +++ b/src/realm/node.hpp @@ -21,7 +21,6 @@ #include #include -#include #include @@ -352,6 +351,7 @@ class ArrayWriterBase; } /// Base class for all nodes holding user data +class StringInterner; class ArrayPayload { public: virtual ~ArrayPayload(); diff --git a/src/realm/string_compressor.cpp b/src/realm/string_compressor.cpp index 99dcb50dac5..13de772b6a3 100644 --- a/src/realm/string_compressor.cpp +++ b/src/realm/string_compressor.cpp @@ -17,19 +17,18 @@ **************************************************************************/ #include +#include #include -#include - #include namespace realm { StringCompressor::StringCompressor(Allocator& alloc, Array& parent, size_t index, bool writable) + : m_data(alloc) { m_compression_map.resize(16); // start with a very small compression map m_symbols.reserve(65536); - m_data = std::make_unique(alloc); - m_data->set_parent(&parent, index); + m_data.set_parent(&parent, index); refresh(writable); } @@ -37,16 +36,16 @@ void StringCompressor::refresh(bool writable) { // we assume that compressors are only created from a valid parent. // String interners in 'dead' mode should never instantiate a string compressor. - if (m_data->get_ref_from_parent() == 0) { + if (m_data.get_ref_from_parent() == 0) { REALM_ASSERT(writable); - m_data->create(0, 65535); - m_data->update_parent(); + m_data.create(0, 65535); + m_data.update_parent(); } else { - if (m_data->is_attached()) - m_data->update_from_parent(); + if (m_data.is_attached()) + m_data.update_from_parent(); else - m_data->init_from_ref(m_data->get_ref_from_parent()); + m_data.init_from_ref(m_data.get_ref_from_parent()); } rebuild_internal(); } @@ -111,7 +110,7 @@ void StringCompressor::expand_compression_map() void StringCompressor::rebuild_internal() { - auto num_symbols = m_data->size(); + auto num_symbols = m_data.size(); if (num_symbols == m_symbols.size()) return; if (num_symbols < m_symbols.size()) { @@ -132,7 +131,7 @@ void StringCompressor::rebuild_internal() } // we have new symbols to add for (size_t i = m_symbols.size(); i < num_symbols; ++i) { - auto pair = m_data->get(i); + auto pair = m_data.get(i); SymbolDef def; def.id = (CompressionSymbol)(i + 256); def.expansion_a = 0xFFFF & (pair >> 16); @@ -198,13 +197,13 @@ CompressedString StringCompressor::compress(StringData sd, bool learn) if (m_symbols.size() < (65536 - 256) && learn) { // define a new symbol for this entry and use it. REALM_ASSERT_DEBUG(m_compression_map[hash].id == 0); - REALM_ASSERT_DEBUG(m_symbols.size() == m_data->size()); - REALM_ASSERT_DEBUG(m_data->is_attached()); + REALM_ASSERT_DEBUG(m_symbols.size() == m_data.size()); + REALM_ASSERT_DEBUG(m_data.is_attached()); CompressionSymbol id = (CompressionSymbol)(256 + m_symbols.size()); SymbolDef def{id, from[0], from[1]}; m_compression_map[hash] = def; add_expansion(def); - m_data->add(((uint64_t)from[0]) << 16 | from[1]); + m_data.add(((uint64_t)from[0]) << 16 | from[1]); // std::cerr << id << " = {" << from[0] << ", " << from[1] << "}" << std::endl; *to++ = id; from += 2; diff --git a/src/realm/string_compressor.hpp b/src/realm/string_compressor.hpp index 2c866ecb781..bd10948e25c 100644 --- a/src/realm/string_compressor.hpp +++ b/src/realm/string_compressor.hpp @@ -19,11 +19,13 @@ #ifndef REALM_STRING_COMPRESSOR_HPP #define REALM_STRING_COMPRESSOR_HPP +#include #include #include using CompressionSymbol = uint16_t; using CompressedString = std::vector; + struct CompressedStringView { CompressionSymbol* data = 0; uint32_t size = 0; @@ -51,11 +53,6 @@ struct CompressedStringView { }; namespace realm { - -class ArrayUnsigned; -class Array; -class Allocator; - class StringCompressor { public: StringCompressor(Allocator& alloc, Array& parent, size_t index, bool writable); @@ -90,7 +87,7 @@ class StringCompressor { std::vector m_symbols; // map from symbol -> symbolpair, 2 elements pr entry std::vector m_compression_map; // perfect hash from symbolpair to its symbol - std::unique_ptr m_data; + ArrayUnsigned m_data; constexpr static size_t storage_chunk_size = 4096; std::vector m_expansion_storage; }; diff --git a/src/realm/string_interner.cpp b/src/realm/string_interner.cpp index fb801b1fd6a..a3e898c8236 100644 --- a/src/realm/string_interner.cpp +++ b/src/realm/string_interner.cpp @@ -17,9 +17,8 @@ **************************************************************************/ #include +#include #include - -#include #include namespace realm { @@ -100,7 +99,7 @@ struct HashMapIter { // Attempt to build a hash leaf from a smaller hash leaf or a non-hash leaf. static bool rehash(Array& from, Array& to, uint8_t hash_size) { - REALM_ASSERT_DEBUG(from.size() * 2 == to.size()); + REALM_ASSERT_DEBUG(from.size() * 2 <= to.size()); for (size_t i = 0; i < from.size(); ++i) { auto entry = (size_t)from.get(i); @@ -271,48 +270,47 @@ struct StringInterner::DataLeaf { StringInterner::StringInterner(Allocator& alloc, Array& parent, ColKey col_key, bool writable) : m_parent(parent) + , m_top(alloc) + , m_data(alloc) + , m_hash_map(alloc) + , m_current_string_leaf(alloc) + , m_current_long_string_node(alloc) { REALM_ASSERT_DEBUG(col_key != ColKey()); size_t index = col_key.get_index().val; // ensure that m_top and m_data is well defined and reflect any existing data // We'll have to extend this to handle no defined backing - m_top = std::make_unique(alloc); - m_top->set_parent(&parent, index); - m_data = std::make_unique(alloc); - m_data->set_parent(m_top.get(), Pos_Data); - m_hash_map = std::make_unique(alloc); - m_hash_map->set_parent(m_top.get(), Pos_Map); - m_current_string_leaf = std::make_unique(alloc); + m_top.set_parent(&parent, index); + m_data.set_parent(&m_top, Pos_Data); + m_hash_map.set_parent(&m_top, Pos_Map); m_col_key = col_key; update_from_parent(writable); } void StringInterner::update_from_parent(bool writable) { - auto parent_idx = m_top->get_ndx_in_parent(); + auto parent_idx = m_top.get_ndx_in_parent(); bool valid_top_ref_spot = m_parent.is_attached() && parent_idx < m_parent.size(); bool valid_top = valid_top_ref_spot && m_parent.get_as_ref(parent_idx); if (valid_top) { - m_top->update_from_parent(); - m_data->update_from_parent(); - m_hash_map->update_from_parent(); + m_top.update_from_parent(); + m_data.update_from_parent(); + m_hash_map.update_from_parent(); } else if (writable && valid_top_ref_spot) { - m_top->create(NodeHeader::type_HasRefs, false, Top_Size, 0); - m_top->set(Pos_Version, (1 << 1) + 1); // version number 1. - m_top->set(Pos_Size, (0 << 1) + 1); // total size 0 - m_top->set(Pos_ColKey, (m_col_key.value << 1) + 1); - m_top->set(Pos_Compressor, 0); + m_top.create(NodeHeader::type_HasRefs, false, Top_Size, 0); + m_top.set(Pos_Version, (1 << 1) + 1); // version number 1. + m_top.set(Pos_Size, (0 << 1) + 1); // total size 0 + m_top.set(Pos_ColKey, (m_col_key.value << 1) + 1); + m_top.set(Pos_Compressor, 0); + // create first level of data tree here (to simplify other stuff) - m_data = std::make_unique(m_parent.get_alloc()); - m_data->set_parent(m_top.get(), Pos_Data); - m_data->create(NodeHeader::type_HasRefs, false, 0); - m_data->update_parent(); - m_hash_map = std::make_unique(m_parent.get_alloc()); - m_hash_map->set_parent(m_top.get(), Pos_Map); - m_hash_map->create(NodeHeader::type_Normal); - m_hash_map->update_parent(); - m_top->update_parent(); + m_data.create(NodeHeader::type_HasRefs, false, 0); + m_data.update_parent(); + + m_hash_map.create(NodeHeader::type_Normal); + m_hash_map.update_parent(); + m_top.update_parent(); valid_top = true; } if (!valid_top) { @@ -320,44 +318,43 @@ void StringInterner::update_from_parent(bool writable) m_compressor.reset(); m_compressed_leafs.clear(); // m_compressed_string_map.clear(); - m_top->detach(); // <-- indicates "dead" mode - m_data->detach(); - m_hash_map->detach(); + m_top.detach(); + m_data.detach(); + m_hash_map.detach(); m_compressor.reset(); return; } // validate we're accessing data for the correct column. A combination of column erase // and insert could lead to an interner being paired with wrong data in the file. // If so, we clear internal data forcing rebuild_internal() to rebuild from scratch. - int64_t data_colkey = m_top->get_as_ref_or_tagged(Pos_ColKey).get_as_int(); + int64_t data_colkey = m_top.get_as_ref_or_tagged(Pos_ColKey).get_as_int(); if (m_col_key.value != data_colkey) { // new column, new data m_compressor.reset(); m_decompressed_strings.clear(); } if (!m_compressor) - m_compressor = std::make_unique(m_top->get_alloc(), *m_top, Pos_Compressor, writable); + m_compressor = std::make_unique(m_top.get_alloc(), m_top, Pos_Compressor, writable); else m_compressor->refresh(writable); - if (m_data->size()) { - auto ref_to_write_buffer = m_data->get_as_ref(m_data->size() - 1); - const char* header = m_top->get_alloc().translate(ref_to_write_buffer); + if (m_data.size()) { + auto ref_to_write_buffer = m_data.get_as_ref(m_data.size() - 1); + const char* header = m_top.get_alloc().translate(ref_to_write_buffer); bool is_array_of_cprs = NodeHeader::get_hasrefs_from_header(header); if (is_array_of_cprs) { - m_current_long_string_node = std::make_unique(m_top->get_alloc()); - m_current_long_string_node->set_parent(m_data.get(), m_data->size() - 1); - m_current_long_string_node->update_from_parent(); + m_current_long_string_node.set_parent(&m_data, m_data.size() - 1); + m_current_long_string_node.update_from_parent(); } else { - m_current_long_string_node.reset(); + m_current_long_string_node.detach(); } } else - m_current_long_string_node.reset(); // just in case... + m_current_long_string_node.detach(); // just in case... // rebuild internal structures...... rebuild_internal(); - m_current_string_leaf->detach(); + m_current_string_leaf.detach(); } void StringInterner::rebuild_internal() @@ -382,21 +379,19 @@ void StringInterner::rebuild_internal() } } - size_t target_size = (size_t)m_top->get_as_ref_or_tagged(Pos_Size).get_as_int(); + size_t target_size = (size_t)m_top.get_as_ref_or_tagged(Pos_Size).get_as_int(); m_decompressed_strings.resize(target_size); - if (m_data->size() != m_compressed_leafs.size()) { - m_compressed_leafs.resize(m_data->size()); + if (m_data.size() != m_compressed_leafs.size()) { + m_compressed_leafs.resize(m_data.size()); } - // allways force new setup of all leafs: + // always force new setup of all leafs: // update m_compressed_leafs to reflect m_data for (size_t idx = 0; idx < m_compressed_leafs.size(); ++idx) { - auto ref = m_data->get_as_ref(idx); + auto ref = m_data.get_as_ref(idx); auto& leaf_meta = m_compressed_leafs[idx]; - // if (ref != leaf_meta.m_leaf_ref) { leaf_meta.m_is_loaded = false; leaf_meta.m_compressed.clear(); leaf_meta.m_leaf_ref = ref; - //} } } @@ -404,13 +399,13 @@ StringInterner::~StringInterner() {} StringID StringInterner::intern(StringData sd) { - REALM_ASSERT(m_top->is_attached()); + REALM_ASSERT(m_top.is_attached()); std::lock_guard lock(m_mutex); // special case for null string if (sd.data() == nullptr) return 0; uint32_t h = (uint32_t)sd.hash(); - auto candidates = hash_to_id(*m_hash_map.get(), h, 32); + auto candidates = hash_to_id(m_hash_map, h, 32); for (auto& candidate : candidates) { auto candidate_cpr = get_compressed(candidate); if (m_compressor->compare(sd, candidate_cpr) == 0) @@ -422,56 +417,52 @@ StringID StringInterner::intern(StringData sd) m_decompressed_strings.push_back({64, std::make_unique(sd)}); auto id = m_decompressed_strings.size(); m_in_memory_strings.push_back(id); - add_to_hash_map(*m_hash_map.get(), h, id, 32); - size_t index = (size_t)m_top->get_as_ref_or_tagged(Pos_Size).get_as_int(); + add_to_hash_map(m_hash_map, h, id, 32); + size_t index = (size_t)m_top.get_as_ref_or_tagged(Pos_Size).get_as_int(); REALM_ASSERT_DEBUG(index == id - 1); bool need_long_string_node = c_str.size() >= 65536; // TODO: update_internal must set up m_current_long_string_node if it is in use + if (need_long_string_node && !m_current_long_string_node.is_attached()) { + + m_current_long_string_node.create(NodeHeader::type_HasRefs); - if (need_long_string_node && !m_current_long_string_node) { if ((index & 0xFF) == 0) { // if we're starting on a new leaf, extend parent array for it - m_data->add(0); + m_data.add(0); m_compressed_leafs.push_back({}); - m_current_long_string_node = std::make_unique(m_top->get_alloc()); - m_current_long_string_node->set_parent(m_data.get(), m_data->size() - 1); - m_current_long_string_node->create(NodeHeader::type_HasRefs); - m_current_long_string_node->update_parent(); - REALM_ASSERT_DEBUG(!m_current_string_leaf->is_attached() || m_current_string_leaf->size() == 0); - m_current_string_leaf->detach(); + m_current_long_string_node.set_parent(&m_data, m_data.size() - 1); + m_current_long_string_node.update_parent(); + REALM_ASSERT_DEBUG(!m_current_string_leaf.is_attached() || m_current_string_leaf.size() == 0); + m_current_string_leaf.detach(); } else { // we have been building an existing leaf and need to shift representation. // but first we need to update leaf accessor for existing leaf - if (m_current_string_leaf->is_attached()) { - m_current_string_leaf->update_from_parent(); + if (m_current_string_leaf.is_attached()) { + m_current_string_leaf.update_from_parent(); } else { - m_current_string_leaf->init_from_ref(m_current_string_leaf->get_ref_from_parent()); + m_current_string_leaf.init_from_ref(m_current_string_leaf.get_ref_from_parent()); } - REALM_ASSERT_DEBUG(m_current_string_leaf->size() > 0); - m_current_long_string_node = std::make_unique(m_top->get_alloc()); - m_current_long_string_node->set_parent(m_data.get(), m_data->size() - 1); - m_current_long_string_node->create(NodeHeader::type_HasRefs); - m_current_long_string_node->update_parent(); + REALM_ASSERT_DEBUG(m_current_string_leaf.size() > 0); + m_current_long_string_node.set_parent(&m_data, m_data.size() - 1); + m_current_long_string_node.update_parent(); // convert the current leaf into a long string node. (array of strings in separate arrays) for (auto s : m_compressed_leafs.back().m_compressed) { - ArrayUnsigned arr(m_top->get_alloc()); + ArrayUnsigned arr(m_top.get_alloc()); arr.create(s.size, 65535); unsigned short* dest = reinterpret_cast(arr.m_data); std::copy_n(s.data, s.size, dest); - m_current_long_string_node->add(arr.get_ref()); + m_current_long_string_node.add(arr.get_ref()); } - m_current_string_leaf->destroy(); - m_current_string_leaf->detach(); + m_current_string_leaf.destroy(); // force later reload of leaf m_compressed_leafs.back().m_is_loaded = false; - // m_compressed_leafs.back().m_leaf_ref = m_data->get_as_ref(m_data->size() - 1); } } - if (m_current_long_string_node) { - ArrayUnsigned arr(m_top->get_alloc()); + if (m_current_long_string_node.is_attached()) { + ArrayUnsigned arr(m_top.get_alloc()); arr.create(c_str.size(), 65535); unsigned short* begin = c_str.data(); if (begin) { @@ -480,11 +471,11 @@ StringID StringInterner::intern(StringData sd) unsigned short* dest = reinterpret_cast(arr.m_data); std::copy_n(begin, n, dest); } - m_current_long_string_node->add(arr.get_ref()); - m_current_long_string_node->update_parent(); - if (m_current_long_string_node->size() == 256) { + m_current_long_string_node.add(arr.get_ref()); + m_current_long_string_node.update_parent(); + if (m_current_long_string_node.size() == 256) { // exit from "long string mode" - m_current_long_string_node.reset(); + m_current_long_string_node.detach(); } CompressionSymbol* p_start = reinterpret_cast(arr.m_data); m_compressed_leafs.back().m_compressed.push_back({p_start, arr.size()}); @@ -492,40 +483,40 @@ StringID StringInterner::intern(StringData sd) else { // Append to leaf with up to 256 entries. // First create a new leaf if needed (limit number of entries to 256 pr leaf) - bool need_leaf_update = !m_current_string_leaf->is_attached() || (index & 0xFF) == 0; + bool need_leaf_update = !m_current_string_leaf.is_attached() || (index & 0xFF) == 0; if (need_leaf_update) { - m_current_string_leaf->set_parent(m_data.get(), index >> 8); + m_current_string_leaf.set_parent(&m_data, index >> 8); if ((index & 0xFF) == 0) { // create new leaf - m_current_string_leaf->create(0, 65535); - m_data->add(m_current_string_leaf->get_ref()); + m_current_string_leaf.create(0, 65535); + m_data.add(m_current_string_leaf.get_ref()); m_compressed_leafs.push_back({}); } else { // just setup leaf accessor - if (m_current_string_leaf->is_attached()) { - m_current_string_leaf->update_from_parent(); + if (m_current_string_leaf.is_attached()) { + m_current_string_leaf.update_from_parent(); } else { - m_current_string_leaf->init_from_ref(m_current_string_leaf->get_ref_from_parent()); + m_current_string_leaf.init_from_ref(m_current_string_leaf.get_ref_from_parent()); } } } REALM_ASSERT(c_str.size() < 65535); // Add compressed string at end of leaf - m_current_string_leaf->add(c_str.size()); + m_current_string_leaf.add(c_str.size()); for (auto c : c_str) { - m_current_string_leaf->add(c); + m_current_string_leaf.add(c); } REALM_ASSERT_DEBUG(m_compressed_leafs.size()); - CompressionSymbol* p = reinterpret_cast(m_current_string_leaf->m_data); - auto p_limit = p + m_current_string_leaf->size(); + CompressionSymbol* p = reinterpret_cast(m_current_string_leaf.m_data); + auto p_limit = p + m_current_string_leaf.size(); auto p_start = p_limit - c_str.size(); m_compressed_leafs.back().m_compressed.push_back({p_start, c_str.size()}); REALM_ASSERT(m_compressed_leafs.back().m_compressed.size() <= 256); } - m_top->adjust(Pos_Size, 2); // type is has_Refs, so increment is by 2 - load_leaf_if_new_ref(m_compressed_leafs.back(), m_data->get_as_ref(m_data->size() - 1)); + m_top.adjust(Pos_Size, 2); // type is has_Refs, so increment is by 2 + load_leaf_if_new_ref(m_compressed_leafs.back(), m_data.get_as_ref(m_data.size() - 1)); #ifdef REALM_DEBUG auto csv = get_compressed(id); CompressedStringView csv2(c_str); @@ -543,11 +534,11 @@ bool StringInterner::load_leaf_if_needed(DataLeaf& leaf) // must interpret leaf first - the leaf is either a single array holding all strings, // or an array with each (compressed) string placed in its own array. - const char* header = m_top->get_alloc().translate(leaf.m_leaf_ref); + const char* header = m_top.get_alloc().translate(leaf.m_leaf_ref); bool is_single_array = !NodeHeader::get_hasrefs_from_header(header); if (is_single_array) { size_t leaf_offset = 0; - ArrayUnsigned leaf_array(m_top->get_alloc()); + ArrayUnsigned leaf_array(m_top.get_alloc()); leaf_array.init_from_ref(leaf.m_leaf_ref); REALM_ASSERT(NodeHeader::get_encoding(leaf_array.get_header()) == NodeHeader::Encoding::WTypBits); REALM_ASSERT(NodeHeader::get_width_from_header(leaf_array.get_header()) == 16); @@ -565,10 +556,10 @@ bool StringInterner::load_leaf_if_needed(DataLeaf& leaf) } else { // Not a single leaf - instead an array of strings - Array arr(m_top->get_alloc()); + Array arr(m_top.get_alloc()); arr.init_from_ref(leaf.m_leaf_ref); for (size_t idx = 0; idx < arr.size(); ++idx) { - ArrayUnsigned str_array(m_top->get_alloc()); + ArrayUnsigned str_array(m_top.get_alloc()); ref_type ref = arr.get_as_ref(idx); str_array.init_from_ref(ref); REALM_ASSERT(NodeHeader::get_encoding(str_array.get_header()) == NodeHeader::Encoding::WTypBits); @@ -607,7 +598,7 @@ CompressedStringView& StringInterner::get_compressed(StringID id) std::optional StringInterner::lookup(StringData sd) { - if (!m_top->is_attached()) { + if (!m_top.is_attached()) { // "dead" mode return {}; } @@ -615,7 +606,7 @@ std::optional StringInterner::lookup(StringData sd) if (sd.data() == nullptr) return 0; uint32_t h = (uint32_t)sd.hash(); - auto candidates = hash_to_id(*m_hash_map.get(), h, 32); + auto candidates = hash_to_id(m_hash_map, h, 32); for (auto& candidate : candidates) { auto candidate_cpr = get_compressed(candidate); if (m_compressor->compare(sd, candidate_cpr) == 0) @@ -667,10 +658,9 @@ StringData StringInterner::get(StringID id) REALM_ASSERT_DEBUG(id <= m_decompressed_strings.size()); CachedString& cs = m_decompressed_strings[id - 1]; if (cs.m_decompressed) { - std::string* ref_str = cs.m_decompressed.get(); if (cs.m_weight < 128) cs.m_weight += 64; - return {ref_str->c_str(), ref_str->size()}; + return {cs.m_decompressed->c_str(), cs.m_decompressed->size()}; } cs.m_weight = 64; cs.m_decompressed = std::make_unique(m_compressor->decompress(get_compressed(id))); diff --git a/src/realm/string_interner.hpp b/src/realm/string_interner.hpp index 2a36c9e38dc..93c1eec45be 100644 --- a/src/realm/string_interner.hpp +++ b/src/realm/string_interner.hpp @@ -19,24 +19,25 @@ #ifndef REALM_STRING_INTERNER_HPP #define REALM_STRING_INTERNER_HPP +#include #include -#include +#include #include #include #include #include #include +#include +struct CompressedStringView; namespace realm { - using StringID = size_t; -class Array; -class ArrayUnsigned; -class Allocator; +class StringCompressor; + struct CachedString { uint8_t m_weight = 0; std::unique_ptr m_decompressed; @@ -58,22 +59,22 @@ class StringInterner { private: Array& m_parent; // need to be able to check if this is attached or not - std::unique_ptr m_top; + Array m_top; // Compressed strings are stored in blocks of 256. // One array holds refs to all blocks: - std::unique_ptr m_data; + Array m_data; // In-memory representation of a block. Either only the ref to it, // or a full vector of views into the block. struct DataLeaf; // in-memory metadata for faster access to compressed strings. Mirrors m_data. std::vector m_compressed_leafs; // 'm_hash_map' is used for mapping hash of uncompressed string to string id. - std::unique_ptr m_hash_map; + Array m_hash_map; // the block of compressed strings we're currently appending to: - std::unique_ptr m_current_string_leaf; + ArrayUnsigned m_current_string_leaf; // an array of strings we're currently appending to. This is used instead // when ever we meet a string too large to be placed inline. - std::unique_ptr m_current_long_string_node; + Array m_current_long_string_node; void rebuild_internal(); CompressedStringView& get_compressed(StringID id); // return true if the leaf was reloaded diff --git a/src/realm/table.cpp b/src/realm/table.cpp index 977339ade0d..210bc049bc3 100644 --- a/src/realm/table.cpp +++ b/src/realm/table.cpp @@ -36,6 +36,7 @@ #include #include #include +#include #include @@ -541,6 +542,7 @@ void Table::remove_column(ColKey col_key) erase_root_column(col_key); // Throws m_has_any_embedded_objects.reset(); auto i = col_key.get_index().val; + if (i < m_string_interners.size() && m_string_interners[i]) m_string_interners[i].reset(); } @@ -1070,19 +1072,19 @@ ColKey Table::do_insert_root_column(ColKey col_key, ColumnType type, StringData if (m_tombstones) { m_tombstones->insert_column(col_key); } - // create string interners internal rep as well as data area - REALM_ASSERT_DEBUG(m_interner_data.is_attached()); - while (col_ndx >= m_string_interners.size()) { - m_string_interners.push_back({}); - } - while (col_ndx >= m_interner_data.size()) { - m_interner_data.add(0); + if (col_key.get_type() == col_type_String || col_key.get_type() == col_type_Mixed) { + // create string interners internal rep as well as data area + REALM_ASSERT_DEBUG(m_interner_data.is_attached()); + while (col_ndx >= m_string_interners.size()) { + m_string_interners.push_back({}); + } + while (col_ndx >= m_interner_data.size()) { + m_interner_data.add(0); + } + REALM_ASSERT(!m_string_interners[col_ndx]); + m_string_interners[col_ndx] = std::make_unique(m_alloc, m_interner_data, col_key, true); } - REALM_ASSERT(!m_string_interners[col_ndx]); - // FIXME: Limit creation of interners to EXACTLY the columns, where they can be - // relevant. - // if (col_key.get_type() == col_type_String) - m_string_interners[col_ndx] = std::make_unique(m_alloc, m_interner_data, col_key, true); + bump_storage_version(); return col_key; @@ -1114,16 +1116,16 @@ void Table::do_erase_root_column(ColKey col_key) REALM_ASSERT(m_index_accessors.back() == nullptr); m_index_accessors.pop_back(); } - REALM_ASSERT_DEBUG(col_ndx < m_string_interners.size()); - if (m_string_interners[col_ndx]) { - REALM_ASSERT_DEBUG(m_interner_data.is_attached()); - REALM_ASSERT_DEBUG(col_ndx < m_interner_data.size()); - auto data_ref = m_interner_data.get_as_ref(col_ndx); - if (data_ref) - Array::destroy_deep(data_ref, m_alloc); - m_interner_data.set(col_ndx, 0); - // m_string_interners[col_ndx]->update_from_parent(true); - m_string_interners[col_ndx].reset(); + if (col_key.get_type() == col_type_String || col_key.get_type() == col_type_Mixed) { + if (col_ndx < m_string_interners.size() && m_string_interners[col_ndx]) { + REALM_ASSERT_DEBUG(m_interner_data.is_attached()); + REALM_ASSERT_DEBUG(col_ndx < m_interner_data.size()); + auto data_ref = m_interner_data.get_as_ref(col_ndx); + if (data_ref) + Array::destroy_deep(data_ref, m_alloc); + m_interner_data.set(col_ndx, 0); + m_string_interners[col_ndx].reset(); + } } bump_content_version(); bump_storage_version(); @@ -2231,6 +2233,10 @@ void Table::refresh_string_interners(bool writable) m_string_interners[idx].reset(); continue; } + + if (col_key.get_type() != col_type_String && col_key.get_type() != col_type_Mixed) + continue; + REALM_ASSERT_DEBUG(col_key.get_index().val == idx); // maintain sufficient size of interner arrays to cover all columns while (idx >= m_string_interners.size()) { diff --git a/test/object-store/sync/client_reset.cpp b/test/object-store/sync/client_reset.cpp index e75a61d5bcb..d2bcd6c893a 100644 --- a/test/object-store/sync/client_reset.cpp +++ b/test/object-store/sync/client_reset.cpp @@ -1046,7 +1046,7 @@ TEST_CASE("sync: client reset", "[sync][pbs][client reset][baas]") { realm->cancel_transaction(); return value == 6; }, - std::chrono::seconds(20)); + std::chrono::seconds(20), std::chrono::milliseconds(500)); } auto session = test_app_session.sync_manager()->get_existing_session(local_config.path); if (session) {