-
Notifications
You must be signed in to change notification settings - Fork 168
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Index on list of strings #7142
Index on list of strings #7142
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -35,6 +35,7 @@ | |
#include "realm/table_view.hpp" | ||
#include "realm/group.hpp" | ||
#include "realm/replication.hpp" | ||
#include "realm/index_string.hpp" | ||
|
||
namespace realm { | ||
|
||
|
@@ -192,6 +193,77 @@ void Lst<T>::distinct(std::vector<size_t>& indices, util::Optional<bool> sort_or | |
} | ||
} | ||
|
||
/***************************** Lst<Stringdata> ******************************/ | ||
|
||
template <> | ||
void Lst<StringData>::do_insert(size_t ndx, StringData value) | ||
{ | ||
if (auto index = m_obj.get_table()->get_search_index(m_col_key)) { | ||
if (m_tree->find_first(value) == realm::not_found) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suppose the idea behind handling duplicates at this level is to optimize the find operation. But the cost of O(N) on insert/set/remove seems very high. What would you think about allowing duplicates into the index to make these operations faster and less error prone? Then we would have a small sort/unique cost applied in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll give it a try. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am a bit wary of changing the invariant that there are no duplicate object keys in the index. I have now changed the code so that inserting a value twice is idempotent, so at least we can avoid the O(N) behavior on insertions. Would that be an acceptable compromise? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm good with that. Thanks! |
||
// Value not present - insert | ||
index->insert(m_obj.get_key(), value); | ||
} | ||
} | ||
m_tree->insert(ndx, value); | ||
} | ||
|
||
template <> | ||
void Lst<StringData>::do_set(size_t ndx, StringData value) | ||
{ | ||
if (auto index = m_obj.get_table()->get_search_index(m_col_key)) { | ||
auto old_value = m_tree->get(ndx); | ||
size_t nb_old = 0; | ||
size_t nb_new = 0; | ||
m_tree->for_all([&](StringData val) { | ||
if (val == old_value) { | ||
nb_old++; | ||
} | ||
if (val == value) { | ||
nb_new++; | ||
} | ||
return !(nb_new && nb_old > 1); | ||
}); | ||
|
||
if (nb_old == 1) { | ||
index->erase_string(m_obj.get_key(), old_value); | ||
} | ||
if (!nb_new) { | ||
// Value not present - insert | ||
index->insert(m_obj.get_key(), value); | ||
} | ||
} | ||
m_tree->set(ndx, value); | ||
} | ||
|
||
template <> | ||
inline void Lst<StringData>::do_remove(size_t ndx) | ||
{ | ||
if (auto index = m_obj.get_table()->get_search_index(m_col_key)) { | ||
auto old_value = m_tree->get(ndx); | ||
size_t nb_old = 0; | ||
m_tree->for_all([&](StringData val) { | ||
if (val == old_value) { | ||
nb_old++; | ||
} | ||
return !(nb_old > 1); | ||
}); | ||
|
||
if (nb_old == 1) { | ||
index->erase_string(m_obj.get_key(), old_value); | ||
} | ||
} | ||
m_tree->erase(ndx); | ||
} | ||
|
||
template <> | ||
inline void Lst<StringData>::do_clear() | ||
{ | ||
if (auto index = m_obj.get_table()->get_search_index(m_col_key)) { | ||
index->erase_list(m_obj.get_key()); | ||
} | ||
m_tree->clear(); | ||
} | ||
|
||
/********************************* Lst<Key> *********************************/ | ||
|
||
template <> | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
good idea to share this logic with the fulltext index 👍