Skip to content

Commit

Permalink
Remove enum string feature (#7858)
Browse files Browse the repository at this point in the history
  • Loading branch information
jedelbo authored Jul 10, 2024
1 parent 499e630 commit ce6f196
Show file tree
Hide file tree
Showing 37 changed files with 486 additions and 1,358 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
-----------

### Internals
* None.
* Ability to enumerate a string column has been removed.

----------------------------------------------

Expand Down
88 changes: 3 additions & 85 deletions src/realm/array_string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,24 +54,9 @@ void ArrayString::init_from_mem(MemRef mem) noexcept
else {
auto arr = new (&m_storage) Array(m_alloc);
arr->init_from_mem(mem);
// The context flag is used to indicate interned strings vs old enum strings
// (in conjunction with has_refs() == false)
if (arr->get_context_flag_from_header(arr->get_header())) {
// init for new interned strings (replacing old enum strings)
m_type = Type::interned_strings;
// consider if we want this invariant: REALM_ASSERT_DEBUG(m_string_interner);
}
else {
// init for old enum strings
m_string_enum_values = std::make_unique<ArrayString>(m_alloc);
ArrayParent* p;
REALM_ASSERT(m_spec != nullptr);
REALM_ASSERT(m_col_ndx != realm::npos);
ref_type r = m_spec->get_enumkeys_ref(m_col_ndx, p);
m_string_enum_values->init_from_ref(r);
m_string_enum_values->set_parent(p, m_col_ndx);
m_type = Type::enum_strings;
}
// init for new interned strings
m_type = Type::interned_strings;
// consider if we want this invariant: REALM_ASSERT_DEBUG(m_string_interner);
}
}
else {
Expand Down Expand Up @@ -122,7 +107,6 @@ size_t ArrayString::size() const
return static_cast<ArraySmallBlobs*>(m_arr)->size();
case Type::big_strings:
return static_cast<ArrayBigBlobs*>(m_arr)->size();
case Type::enum_strings:
case Type::interned_strings:
return static_cast<Array*>(m_arr)->size();
}
Expand All @@ -141,7 +125,6 @@ void ArrayString::add(StringData value)
case Type::big_strings:
static_cast<ArrayBigBlobs*>(m_arr)->add_string(value);
break;
case Type::enum_strings:
case Type::interned_strings: {
auto a = static_cast<Array*>(m_arr);
size_t ndx = a->size();
Expand Down Expand Up @@ -169,16 +152,6 @@ void ArrayString::set(size_t ndx, StringData value)
static_cast<Array*>(m_arr)->set(ndx, id);
break;
}
case Type::enum_strings: {
size_t sz = m_string_enum_values->size();
size_t res = m_string_enum_values->find_first(value, 0, sz);
if (res == realm::not_found) {
m_string_enum_values->add(value);
res = sz;
}
static_cast<Array*>(m_arr)->set(ndx, res);
break;
}
}
}

Expand All @@ -194,11 +167,6 @@ void ArrayString::insert(size_t ndx, StringData value)
case Type::big_strings:
static_cast<ArrayBigBlobs*>(m_arr)->insert_string(ndx, value);
break;
case Type::enum_strings: {
static_cast<Array*>(m_arr)->insert(ndx, 0);
set(ndx, value);
break;
}
case Type::interned_strings: {
static_cast<Array*>(m_arr)->insert(ndx, 0);
set(ndx, value);
Expand All @@ -216,31 +184,6 @@ StringData ArrayString::get(size_t ndx) const
return static_cast<ArraySmallBlobs*>(m_arr)->get_string(ndx);
case Type::big_strings:
return static_cast<ArrayBigBlobs*>(m_arr)->get_string(ndx);
case Type::enum_strings: {
size_t index = size_t(static_cast<Array*>(m_arr)->get(ndx));
return m_string_enum_values->get(index);
}
case Type::interned_strings: {
size_t id = size_t(static_cast<Array*>(m_arr)->get(ndx));
return m_string_interner->get(id);
}
}
return {};
}

StringData ArrayString::get_legacy(size_t ndx) const
{
switch (m_type) {
case Type::small_strings:
return static_cast<ArrayStringShort*>(m_arr)->get(ndx);
case Type::medium_strings:
return static_cast<ArraySmallBlobs*>(m_arr)->get_string_legacy(ndx);
case Type::big_strings:
return static_cast<ArrayBigBlobs*>(m_arr)->get_string(ndx);
case Type::enum_strings: {
size_t index = size_t(static_cast<Array*>(m_arr)->get(ndx));
return m_string_enum_values->get(index);
}
case Type::interned_strings: {
size_t id = size_t(static_cast<Array*>(m_arr)->get(ndx));
return m_string_interner->get(id);
Expand All @@ -263,10 +206,6 @@ bool ArrayString::is_null(size_t ndx) const
return static_cast<ArraySmallBlobs*>(m_arr)->is_null(ndx);
case Type::big_strings:
return static_cast<ArrayBigBlobs*>(m_arr)->is_null(ndx);
case Type::enum_strings: {
size_t id = size_t(static_cast<Array*>(m_arr)->get(ndx));
return m_string_enum_values->is_null(id);
}
case Type::interned_strings: {
size_t id = size_t(static_cast<Array*>(m_arr)->get(ndx));
return id == 0;
Expand All @@ -288,7 +227,6 @@ void ArrayString::erase(size_t ndx)
static_cast<ArrayBigBlobs*>(m_arr)->erase(ndx);
break;
case Type::interned_strings:
case Type::enum_strings:
static_cast<Array*>(m_arr)->erase(ndx);
break;
}
Expand All @@ -311,10 +249,6 @@ void ArrayString::move(ArrayString& dst, size_t ndx)
case Type::big_strings:
static_cast<ArrayBigBlobs*>(m_arr)->truncate(ndx);
break;
case Type::enum_strings:
// this operation will never be called for enumerated columns
REALM_UNREACHABLE();
break;
case Type::interned_strings:
m_arr->truncate(ndx);
break;
Expand All @@ -333,7 +267,6 @@ void ArrayString::clear()
case Type::big_strings:
static_cast<ArrayBigBlobs*>(m_arr)->clear();
break;
case Type::enum_strings:
case Type::interned_strings:
static_cast<Array*>(m_arr)->clear();
break;
Expand All @@ -355,14 +288,6 @@ size_t ArrayString::find_first(StringData value, size_t begin, size_t end) const
return static_cast<ArrayBigBlobs*>(m_arr)->find_first(as_binary, true, begin, end);
break;
}
case Type::enum_strings: {
size_t sz = m_string_enum_values->size();
size_t res = m_string_enum_values->find_first(value, 0, sz);
if (res != realm::not_found) {
return static_cast<Array*>(m_arr)->find_first(res, begin, end);
}
break;
}
case Type::interned_strings: {
// we need a way to avoid this lookup for each leaf array. The lookup must appear
// higher up the call stack and passed down.
Expand Down Expand Up @@ -420,8 +345,6 @@ size_t ArrayString::lower_bound(StringData value)
return lower_bound_string(static_cast<ArraySmallBlobs*>(m_arr), value);
case Type::big_strings:
return lower_bound_string(static_cast<ArrayBigBlobs*>(m_arr), value);
case Type::enum_strings:
break;
case Type::interned_strings:
REALM_UNREACHABLE();
break;
Expand All @@ -434,9 +357,6 @@ ArrayString::Type ArrayString::upgrade_leaf(size_t value_size)
if (m_type == Type::big_strings)
return Type::big_strings;

if (m_type == Type::enum_strings)
return Type::enum_strings;

if (m_type == Type::interned_strings)
return Type::interned_strings;

Expand Down Expand Up @@ -529,7 +449,6 @@ void ArrayString::verify() const
case Type::big_strings:
static_cast<ArrayBigBlobs*>(m_arr)->verify();
break;
case Type::enum_strings:
case Type::interned_strings:
static_cast<Array*>(m_arr)->verify();
break;
Expand Down Expand Up @@ -567,7 +486,6 @@ ref_type ArrayString::typed_write(ref_type ref, _impl::ArrayWriterBase& out, All
leaf.destroy_deep(true);
}
else {
// whether it's the old enum strings or the new interned strings,
// just write out the array using integer leaf compression
ret_val = leaf.write(out, false, out.only_modified, out.compress);
}
Expand Down
14 changes: 1 addition & 13 deletions src/realm/array_string.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,6 @@ class ArrayString : public ArrayPayload {
{
m_string_interner = string_interner;
}
bool need_spec() const override
{
return true;
}
void set_spec(Spec* spec, size_t col_ndx) const override
{
m_spec = spec;
m_col_ndx = col_ndx;
}

void update_parent()
{
Expand All @@ -108,7 +99,6 @@ class ArrayString : public ArrayPayload {
}
void insert(size_t ndx, StringData value);
StringData get(size_t ndx) const;
StringData get_legacy(size_t ndx) const;
Mixed get_any(size_t ndx) const override;
bool is_null(size_t ndx) const;
void erase(size_t ndx);
Expand Down Expand Up @@ -137,16 +127,14 @@ class ArrayString : public ArrayPayload {
static constexpr size_t storage_size =
std::max({sizeof(ArrayStringShort), sizeof(ArraySmallBlobs), sizeof(ArrayBigBlobs), sizeof(Array)});

enum class Type { small_strings, medium_strings, big_strings, enum_strings, interned_strings };
enum class Type { small_strings, medium_strings, big_strings, interned_strings };

Type m_type = Type::small_strings;

Allocator& m_alloc;
alignas(storage_alignment) std::byte m_storage[storage_size];
Array* m_arr;
bool m_nullable = true;
mutable Spec* m_spec = nullptr;
mutable size_t m_col_ndx = realm::npos;
std::unique_ptr<ArrayString> m_string_enum_values;
mutable StringInterner* m_string_interner = nullptr;

Expand Down
53 changes: 3 additions & 50 deletions src/realm/cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,12 +154,7 @@ void Cluster::create()
do_create<ArrayDoubleNull>(col_key);
break;
case col_type_String: {
if (m_tree_top.is_string_enum_type(col_ndx)) {
do_create<ArrayInteger>(col_key);
}
else {
do_create<ArrayString>(col_key);
}
do_create<ArrayString>(col_key);
break;
}
case col_type_Binary:
Expand Down Expand Up @@ -267,17 +262,6 @@ inline void Cluster::set_string_interner(ArrayMixed& arr, ColKey col_key) const
m_tree_top.set_string_interner(arr, col_key);
}

template <class T>
inline void Cluster::set_spec(T&, ColKey::Idx) const
{
}

template <>
inline void Cluster::set_spec(ArrayString& arr, ColKey::Idx col_ndx) const
{
m_tree_top.set_spec(arr, col_ndx);
}

template <class T>
inline void Cluster::do_insert_row(size_t ndx, ColKey col, Mixed init_val, bool nullable)
{
Expand All @@ -286,7 +270,6 @@ inline void Cluster::do_insert_row(size_t ndx, ColKey col, Mixed init_val, bool
T arr(m_alloc);
auto col_ndx = col.get_index();
arr.set_parent(this, col_ndx.val + s_first_col_index);
set_spec<T>(arr, col_ndx);
set_string_interner<T>(arr, col);
arr.init_from_parent();
if (init_val.is_null()) {
Expand Down Expand Up @@ -507,13 +490,9 @@ void Cluster::move(size_t ndx, ClusterNode* new_node, int64_t offset)
case col_type_Double:
do_move<ArrayDouble>(ndx, col_key, new_leaf);
break;
case col_type_String: {
if (m_tree_top.is_string_enum_type(col_key.get_index()))
do_move<ArrayInteger>(ndx, col_key, new_leaf);
else
do_move<ArrayString>(ndx, col_key, new_leaf);
case col_type_String:
do_move<ArrayString>(ndx, col_key, new_leaf);
break;
}
case col_type_Binary:
do_move<ArrayBinary>(ndx, col_key, new_leaf);
break;
Expand Down Expand Up @@ -781,7 +760,6 @@ inline void Cluster::do_erase(size_t ndx, ColKey col_key)
auto col_ndx = col_key.get_index();
T values(m_alloc);
values.set_parent(this, col_ndx.val + s_first_col_index);
set_spec<T>(values, col_ndx);
set_string_interner<T>(values, col_key);
values.init_from_parent();
if constexpr (std::is_same_v<T, ArrayTypedLink>) {
Expand Down Expand Up @@ -1048,26 +1026,6 @@ void Cluster::nullify_incoming_links(RowKey key, CascadeState& state)
m_tree_top.get_owning_table()->for_each_backlink_column(nullify_fwd_links);
}

void Cluster::upgrade_string_to_enum(ColKey col_key, ArrayString& keys)
{
auto col_ndx = col_key.get_index();
Array indexes(m_alloc);
indexes.create(Array::type_Normal, false);
ArrayString values(m_alloc);
ref_type ref = Array::get_as_ref(col_ndx.val + s_first_col_index);
set_string_interner(values, col_key);
values.init_from_ref(ref);
size_t sz = values.size();
for (size_t i = 0; i < sz; i++) {
auto v = values.get(i);
size_t pos = keys.lower_bound(v);
REALM_ASSERT_3(pos, !=, keys.size());
indexes.add(pos);
}
Array::set(col_ndx.val + s_first_col_index, indexes.get_ref());
Array::destroy_deep(ref, m_alloc);
}

void Cluster::init_leaf(ColKey col_key, ArrayPayload* leaf) const
{
auto col_ndx = col_key.get_index();
Expand All @@ -1080,9 +1038,6 @@ void Cluster::init_leaf(ColKey col_key, ArrayPayload* leaf) const
if (leaf->need_string_interner()) {
m_tree_top.set_string_interner(*leaf, col_key);
}
if (leaf->need_spec()) {
m_tree_top.set_spec(*leaf, col_ndx);
}
leaf->init_from_ref(ref);
leaf->set_parent(const_cast<Cluster*>(this), col_ndx.val + 1);
}
Expand All @@ -1098,7 +1053,6 @@ template <typename ArrayType>
void Cluster::verify(ref_type ref, size_t index, util::Optional<size_t>& sz) const
{
ArrayType arr(get_alloc());
set_spec(arr, ColKey::Idx{unsigned(index) - 1});
auto table = get_owning_table();
REALM_ASSERT(index <= table->m_leaf_ndx2colkey.size());
auto col_key = table->m_leaf_ndx2colkey[index - 1];
Expand Down Expand Up @@ -1440,7 +1394,6 @@ void Cluster::dump_objects(int64_t key_offset, std::string lead) const
}
case col_type_String: {
ArrayString arr(m_alloc);
set_spec(arr, col.get_index());
set_string_interner(arr, col);
ref_type ref = Array::get_as_ref(j);
arr.init_from_ref(ref);
Expand Down
1 change: 0 additions & 1 deletion src/realm/cluster.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,6 @@ class Cluster : public ClusterNode {
size_t get_ndx(RowKey key, size_t ndx) const noexcept override;
size_t erase(RowKey k, CascadeState& state) override;
void nullify_incoming_links(RowKey key, CascadeState& state) override;
void upgrade_string_to_enum(ColKey col, ArrayString& keys);

void init_leaf(ColKey col, ArrayPayload* leaf) const;
void add_leaf(ColKey col, ref_type ref);
Expand Down
Loading

0 comments on commit ce6f196

Please sign in to comment.