Skip to content

Commit

Permalink
Squashed into single commit relative to next-major
Browse files Browse the repository at this point in the history
  • Loading branch information
finnschiermer committed Jun 6, 2024
1 parent a6bb5e9 commit fcb50cd
Show file tree
Hide file tree
Showing 31 changed files with 1,725 additions and 57 deletions.
4 changes: 4 additions & 0 deletions src/realm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ set(REALM_SOURCES
table.cpp
table_ref.cpp
obj_list.cpp
string_interner.cpp
string_compressor.cpp
object_id.cpp
table_view.cpp
tokenizer.cpp
Expand Down Expand Up @@ -178,6 +180,8 @@ set(REALM_INSTALL_HEADERS
null.hpp
obj.hpp
obj_list.hpp
string_interner.hpp
string_compressor.hpp
object_id.hpp
path.hpp
owned_data.hpp
Expand Down
12 changes: 10 additions & 2 deletions src/realm/array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ void Array::set_type(Type type)
set_hasrefs_in_header(init_has_refs, header);
}

void Array::destroy_children(size_t offset) noexcept
void Array::destroy_children(size_t offset, bool ro_only) noexcept
{
for (size_t i = offset; i != m_size; ++i) {
int64_t value = get(i);
Expand All @@ -310,7 +310,7 @@ void Array::destroy_children(size_t offset) noexcept
continue;

ref_type ref = to_ref(value);
destroy_deep(ref, m_alloc);
destroy_deep(ref, m_alloc, ro_only);
}
}

Expand Down Expand Up @@ -607,6 +607,14 @@ void Array::do_ensure_minimum_width(int_fast64_t value)
}
}

size_t Array::size() const noexcept
{
// in case the array is in compressed format. Never read directly
// from the header the size, since it will result very likely in a cache miss.
// For compressed arrays m_size should always be kept updated, due to init_from_mem
return m_size;
}

bool Array::compress_array(Array& arr) const
{
if (m_integer_compressor.get_encoding() == NodeHeader::Encoding::WTypBits) {
Expand Down
31 changes: 18 additions & 13 deletions src/realm/array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ class Array : public Node, public ArrayParent {
/// pointer.
void init_from_mem(MemRef) noexcept;

/// Same as `init_from_ref(get_ref_from_parent())`.
/// Same as `init_from_ref(ref_from_parent())`.
void init_from_parent() noexcept
{
ref_type ref = get_ref_from_parent();
Expand Down Expand Up @@ -210,6 +210,8 @@ class Array : public Node, public ArrayParent {
update_width_cache_from_header();
}

size_t size() const noexcept;

bool is_empty() const noexcept
{
return size() == 0;
Expand Down Expand Up @@ -362,7 +364,8 @@ class Array : public Node, public ArrayParent {
/// state (as if calling detach()), then free the allocated memory. If this
/// accessor is already in the detached state, this function has no effect
/// (idempotency).
void destroy_deep() noexcept;
/// If 'ro_only', only free space in read-only memory (the file)
void destroy_deep(bool ro_only = false) noexcept;

/// check if the array is encoded (in B format)
inline bool is_compressed() const;
Expand All @@ -377,13 +380,13 @@ class Array : public Node, public ArrayParent {
bool try_decompress();

/// Shorthand for `destroy_deep(MemRef(ref, alloc), alloc)`.
static void destroy_deep(ref_type ref, Allocator& alloc) noexcept;
static void destroy_deep(ref_type ref, Allocator& alloc, bool ro_only = false) noexcept;

/// Destroy the specified array node and all of its children, recursively.
///
/// This is done by freeing the specified array node after calling
/// destroy_deep() for every contained 'ref' element.
static void destroy_deep(MemRef, Allocator&) noexcept;
static void destroy_deep(MemRef, Allocator&, bool ro_only = false) noexcept;

// Clone deep
static MemRef clone(MemRef, Allocator& from_alloc, Allocator& target_alloc);
Expand Down Expand Up @@ -540,7 +543,7 @@ class Array : public Node, public ArrayParent {
// Overriding method in ArrayParent
ref_type get_child_ref(size_t) const noexcept override;

void destroy_children(size_t offset = 0) noexcept;
void destroy_children(size_t offset = 0, bool ro_only = false) noexcept;

protected:
// Getters and Setters for adaptive-packed arrays
Expand Down Expand Up @@ -912,16 +915,17 @@ inline void Array::set_context_flag(bool value) noexcept
}
}

inline void Array::destroy_deep() noexcept
inline void Array::destroy_deep(bool ro_only) noexcept
{
if (!is_attached())
return;

if (m_has_refs)
destroy_children();
destroy_children(0, ro_only);

char* header = get_header_from_data(m_data);
m_alloc.free_(m_ref, header);
if (!ro_only || is_read_only())
m_alloc.free_(m_ref, header);
m_data = nullptr;
}

Expand Down Expand Up @@ -964,20 +968,21 @@ inline void Array::clear_and_destroy_children()
truncate_and_destroy_children(0);
}

inline void Array::destroy_deep(ref_type ref, Allocator& alloc) noexcept
inline void Array::destroy_deep(ref_type ref, Allocator& alloc, bool ro_only) noexcept
{
destroy_deep(MemRef(ref, alloc), alloc);
destroy_deep(MemRef(ref, alloc), alloc, ro_only);
}

inline void Array::destroy_deep(MemRef mem, Allocator& alloc) noexcept
inline void Array::destroy_deep(MemRef mem, Allocator& alloc, bool ro_only) noexcept
{
if (!get_hasrefs_from_header(mem.get_addr())) {
alloc.free_(mem);
if (!ro_only || alloc.is_read_only(mem.get_ref()))
alloc.free_(mem);
return;
}
Array array(alloc);
array.init_from_mem(mem);
array.destroy_deep();
array.destroy_deep(ro_only);
}


Expand Down
2 changes: 2 additions & 0 deletions src/realm/array_integer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
#include <realm/impl/destroy_guard.hpp>
#include <realm/column_integer.hpp>

#include <iostream>

using namespace realm;

ArrayInteger::ArrayInteger(Allocator& allocator) noexcept
Expand Down
1 change: 1 addition & 0 deletions src/realm/array_integer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ inline ArrayIntNull::~ArrayIntNull() noexcept {}

inline size_t ArrayIntNull::size() const noexcept
{
// this cannot be right, what if size is 0
return Array::size() - 1;
}

Expand Down
95 changes: 84 additions & 11 deletions src/realm/array_string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
**************************************************************************/

#include <realm/array_string.hpp>
#include <realm/impl/array_writer.hpp>
#include <realm/spec.hpp>
#include <realm/mixed.hpp>

Expand Down Expand Up @@ -52,14 +53,24 @@ void ArrayString::init_from_mem(MemRef mem) noexcept
else {
auto arr = new (&m_storage) Array(m_alloc);
arr->init_from_mem(mem);
m_string_enum_values = std::make_unique<ArrayString>(m_alloc);
ArrayParent* p;
REALM_ASSERT(m_spec != nullptr);
REALM_ASSERT(m_col_ndx != realm::npos);
ref_type r = m_spec->get_enumkeys_ref(m_col_ndx, p);
m_string_enum_values->init_from_ref(r);
m_string_enum_values->set_parent(p, m_col_ndx);
m_type = Type::enum_strings;
// The context flag is used to indicate interned strings vs old enum strings
// (in conjunction with has_refs() == false)
if (arr->get_context_flag_from_header(arr->get_header())) {
// init for new interned strings (replacing old enum strings)
m_type = Type::interned_strings;
// consider if we want this invariant: REALM_ASSERT_DEBUG(m_string_interner);
}
else {
// init for old enum strings
m_string_enum_values = std::make_unique<ArrayString>(m_alloc);
ArrayParent* p;
REALM_ASSERT(m_spec != nullptr);
REALM_ASSERT(m_col_ndx != realm::npos);
ref_type r = m_spec->get_enumkeys_ref(m_col_ndx, p);
m_string_enum_values->init_from_ref(r);
m_string_enum_values->set_parent(p, m_col_ndx);
m_type = Type::enum_strings;
}
}
}
else {
Expand Down Expand Up @@ -111,6 +122,7 @@ size_t ArrayString::size() const
case Type::big_strings:
return static_cast<ArrayBigBlobs*>(m_arr)->size();
case Type::enum_strings:
case Type::interned_strings:
return static_cast<Array*>(m_arr)->size();
}
return {};
Expand All @@ -128,7 +140,8 @@ void ArrayString::add(StringData value)
case Type::big_strings:
static_cast<ArrayBigBlobs*>(m_arr)->add_string(value);
break;
case Type::enum_strings: {
case Type::enum_strings:
case Type::interned_strings: {
auto a = static_cast<Array*>(m_arr);
size_t ndx = a->size();
a->add(0);
Expand All @@ -150,6 +163,11 @@ void ArrayString::set(size_t ndx, StringData value)
case Type::big_strings:
static_cast<ArrayBigBlobs*>(m_arr)->set_string(ndx, value);
break;
case Type::interned_strings: {
auto id = m_string_interner->intern(value);
static_cast<Array*>(m_arr)->set(ndx, id);
break;
}
case Type::enum_strings: {
size_t sz = m_string_enum_values->size();
size_t res = m_string_enum_values->find_first(value, 0, sz);
Expand Down Expand Up @@ -178,6 +196,12 @@ void ArrayString::insert(size_t ndx, StringData value)
case Type::enum_strings: {
static_cast<Array*>(m_arr)->insert(ndx, 0);
set(ndx, value);
break;
}
case Type::interned_strings: {
static_cast<Array*>(m_arr)->insert(ndx, 0);
set(ndx, value);
break;
}
}
}
Expand All @@ -195,6 +219,10 @@ StringData ArrayString::get(size_t ndx) const
size_t index = size_t(static_cast<Array*>(m_arr)->get(ndx));
return m_string_enum_values->get(index);
}
case Type::interned_strings: {
size_t id = size_t(static_cast<Array*>(m_arr)->get(ndx));
return m_string_interner->get(id);
}
}
return {};
}
Expand All @@ -212,6 +240,10 @@ StringData ArrayString::get_legacy(size_t ndx) const
size_t index = size_t(static_cast<Array*>(m_arr)->get(ndx));
return m_string_enum_values->get(index);
}
case Type::interned_strings: {
size_t id = size_t(static_cast<Array*>(m_arr)->get(ndx));
return m_string_interner->get(id);
}
}
return {};
}
Expand All @@ -231,8 +263,12 @@ bool ArrayString::is_null(size_t ndx) const
case Type::big_strings:
return static_cast<ArrayBigBlobs*>(m_arr)->is_null(ndx);
case Type::enum_strings: {
size_t index = size_t(static_cast<Array*>(m_arr)->get(ndx));
return m_string_enum_values->is_null(index);
size_t id = size_t(static_cast<Array*>(m_arr)->get(ndx));
return m_string_enum_values->is_null(id);
}
case Type::interned_strings: {
size_t id = size_t(static_cast<Array*>(m_arr)->get(ndx));
return id == 0;
}
}
return {};
Expand All @@ -250,6 +286,7 @@ void ArrayString::erase(size_t ndx)
case Type::big_strings:
static_cast<ArrayBigBlobs*>(m_arr)->erase(ndx);
break;
case Type::interned_strings:
case Type::enum_strings:
static_cast<Array*>(m_arr)->erase(ndx);
break;
Expand Down Expand Up @@ -277,6 +314,9 @@ void ArrayString::move(ArrayString& dst, size_t ndx)
// this operation will never be called for enumerated columns
REALM_UNREACHABLE();
break;
case Type::interned_strings:
m_arr->truncate(ndx);
break;
}
}

Expand All @@ -293,6 +333,7 @@ void ArrayString::clear()
static_cast<ArrayBigBlobs*>(m_arr)->clear();
break;
case Type::enum_strings:
case Type::interned_strings:
static_cast<Array*>(m_arr)->clear();
break;
}
Expand Down Expand Up @@ -321,6 +362,15 @@ size_t ArrayString::find_first(StringData value, size_t begin, size_t end) const
}
break;
}
case Type::interned_strings: {
// we need a way to avoid this lookup for each leaf array. The lookup must appear
// higher up the call stack and passed down.
auto id = m_string_interner->lookup(value);
if (id) {
return static_cast<Array*>(m_arr)->find_first(*id, begin, end);
}
break;
}
}
return not_found;
}
Expand Down Expand Up @@ -371,6 +421,9 @@ size_t ArrayString::lower_bound(StringData value)
return lower_bound_string(static_cast<ArrayBigBlobs*>(m_arr), value);
case Type::enum_strings:
break;
case Type::interned_strings:
REALM_UNREACHABLE();
break;
}
return realm::npos;
}
Expand All @@ -383,6 +436,9 @@ ArrayString::Type ArrayString::upgrade_leaf(size_t value_size)
if (m_type == Type::enum_strings)
return Type::enum_strings;

if (m_type == Type::interned_strings)
return Type::interned_strings;

if (m_type == Type::medium_strings) {
if (value_size <= medium_string_max_size)
return Type::medium_strings;
Expand Down Expand Up @@ -473,8 +529,25 @@ void ArrayString::verify() const
static_cast<ArrayBigBlobs*>(m_arr)->verify();
break;
case Type::enum_strings:
case Type::interned_strings:
static_cast<Array*>(m_arr)->verify();
break;
}
#endif
}

ref_type ArrayString::write(_impl::ArrayWriterBase& out, StringInterner* interner)
{
REALM_ASSERT(interner);
// we have to write out all, modified or not, to match the total cleanup
Array interned(Allocator::get_default());
auto sz = size();
interned.create(NodeHeader::type_Normal, true, sz);
for (size_t i = 0; i < sz; ++i) {
interned.set(i, interner->intern(get(i)));
}
auto retval = interned.write(out, false, false, out.compress);
interned.destroy();
return retval;
// return m_arr->write(out, true, false, false);
}
Loading

0 comments on commit fcb50cd

Please sign in to comment.