Skip to content

Commit

Permalink
RCORE-2065 use compressed string view for quick comparison if the lea…
Browse files Browse the repository at this point in the history
…f is compressed (#7880)

* use CompressedViewString cmp for compressed leaves

* fix asan

* code review

* fix redudant code
  • Loading branch information
nicola-cab authored Jul 16, 2024
1 parent d35c8fc commit aec4ca0
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 19 deletions.
18 changes: 10 additions & 8 deletions src/realm/query_engine.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1744,29 +1744,31 @@ class StringNode : public StringNodeBase {
TConditionFunction cond;

for (size_t s = start; s < end; ++s) {
if constexpr (std::is_same_v<TConditionFunction, NotEqual>) {

// special handling for !=, <, <= , >, >= if the leaf is compressed and we have got a compressed string
// id.
if constexpr (realm::is_any_v<TConditionFunction, NotEqual, Greater, Less, GreaterEqual, LessEqual>) {
if (m_leaf->is_compressed()) {
if (m_interned_string_id) {
// The search string has been interned, so there might be a match
// We can compare the string IDs directly
const auto id = m_leaf->get_string_id(s);
if (m_string_interner->compare(*m_interned_string_id, *id) == 0) {
// The value matched, so we continue to the next value
if (cond(m_string_interner->compare(*id, *m_interned_string_id), 0))
return s;
else
continue;
}
}
return s;
}
}

StringData t = get_string(s);
if constexpr (case_sensitive_comparison) {
// case insensitive not implemented for: >, >=, <, <=
if (cond(t, m_string_value))
return s;
}
else {
if (cond(m_string_value, m_ucase.c_str(), m_lcase.c_str(), t))
if (cond(m_string_value, m_ucase.c_str(), m_lcase.c_str(), t)) {
return s;
}
}
}
return not_found;
Expand Down
12 changes: 6 additions & 6 deletions src/realm/string_compressor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ int StringCompressor::compare(CompressedStringView& A, CompressedStringView& B)
// symbols did not match:
// 1. both symbols are single characters
if (code_A < 256 && code_B < 256)
return code_B - code_A;
return code_A - code_B;
std::string a_str(code_A, 1);
auto str_A = std::string_view(code_A < 256 ? a_str : m_symbols[code_A - 256].expansion);
std::string b_str(code_B, 1);
Expand All @@ -302,17 +302,17 @@ int StringCompressor::compare(CompressedStringView& A, CompressedStringView& B)
StringData sd_b(str_B.data(), str_B.size());
REALM_ASSERT_DEBUG(sd_a != sd_b);
if (sd_a < sd_b)
return 1;
else
return -1;
else
return 1;
}
// The compressed strings are identical or one is the prefix of the other
return B.size - A.size;
return static_cast<int>(A.size - B.size);
// ^ a faster way of producing same positive / negative / zero as:
// if (A.size() < B.size())
// return 1;
// if (A.size() > B.size())
// return -1;
// if (A.size() > B.size())
// return 1;
// return 0;
}

Expand Down
4 changes: 2 additions & 2 deletions src/realm/string_interner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -642,9 +642,9 @@ int StringInterner::compare(StringData s, StringID A)
if (s.data() == nullptr && A == 0)
return 0;
if (s.data() == nullptr)
return 1;
if (A == 0)
return -1;
if (A == 0)
return 1;
// ok, no nulls
REALM_ASSERT(m_compressor);
return m_compressor->compare(s, get_compressed(A));
Expand Down
1 change: 0 additions & 1 deletion test/test_query.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,6 @@ See TEST(StringData_Substrings) for more unit tests for null, isolated to using
columns or queries involved
*/


TEST_TYPES(Query_NextGen_StringConditions, std::true_type, std::false_type)
{
SHARED_GROUP_TEST_PATH(path);
Expand Down
85 changes: 83 additions & 2 deletions test/test_string_compression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,83 @@ TEST(StringInterner_TestLookup)
parent.destroy_deep();
}

TEST(StringInterner_VerifyComparison)
{
Array parent(Allocator::get_default());
parent.create(NodeHeader::type_HasRefs, false, 1, 0);
StringInterner interner(Allocator::get_default(), parent, ColKey(0), true);

auto null_id = interner.intern({});
auto test_lower_case_id = interner.intern({"test"});
auto test_upper_case_id = interner.intern({"TEST"});

// check NULL vs empty string
auto res = interner.compare("", null_id);
CHECK_GREATER(StringData(""), StringData());
CHECK_EQUAL(res, 1);

// check that NULL filtering actually works
res = interner.compare(test_lower_case_id, null_id);
CHECK_GREATER(interner.get(test_lower_case_id), StringData());
CHECK_EQUAL(res, 1);

res = interner.compare(null_id, test_lower_case_id);
CHECK_LESS(StringData(), interner.get(test_lower_case_id));
CHECK_EQUAL(res, -1);

//"aaa" < "test"
res = interner.compare({"aaa"}, test_lower_case_id);
CHECK_LESS(StringData("aaa"), interner.get(test_lower_case_id));
CHECK_EQUAL(res, -1);

//"zzz" > "test"
res = interner.compare({"zzz"}, test_lower_case_id);
CHECK_GREATER(StringData("zzz"), interner.get(test_lower_case_id));
CHECK_EQUAL(res, 1);

//"AAA" < "test"
res = interner.compare({"AAA"}, test_lower_case_id);
CHECK_LESS(StringData("AAA"), interner.get(test_lower_case_id));
CHECK_EQUAL(res, -1);

//"ZZZ" < "test"
res = interner.compare({"ZZZ"}, test_lower_case_id);
CHECK_LESS(StringData("ZZZ"), interner.get(test_lower_case_id));
CHECK_EQUAL(res, -1);

//"aaa" > "TEST"
res = interner.compare({"aaa"}, test_upper_case_id);
CHECK_GREATER(StringData("aaa"), interner.get(test_upper_case_id));
CHECK_EQUAL(res, 1);

//"zzz" > "TEST"
res = interner.compare({"zzz"}, test_upper_case_id);
CHECK_GREATER(StringData("zzz"), interner.get(test_upper_case_id));
CHECK_EQUAL(res, 1);

//"AAA" < "TEST"
res = interner.compare({"AAA"}, test_upper_case_id);
CHECK_LESS(StringData("AAA"), interner.get(test_upper_case_id));
CHECK_EQUAL(res, -1);

//"ZZZ" > "TEST"
res = interner.compare({"ZZZ"}, test_upper_case_id);
CHECK_GREATER(StringData("ZZZ"), interner.get(test_upper_case_id));
CHECK_EQUAL(res, 1);

// test > TEST
res = interner.compare(test_lower_case_id, test_upper_case_id);
CHECK_GREATER(interner.get(test_lower_case_id), interner.get(test_upper_case_id));
CHECK_EQUAL(res, 1);

// TEST < test
res = interner.compare(test_upper_case_id, test_lower_case_id);
CHECK_LESS(interner.get(test_upper_case_id), interner.get(test_lower_case_id));
CHECK_EQUAL(res, -1);

parent.destroy_deep();
}

TEST(StringInterner_VerifyInterningNull)
{
Array parent(Allocator::get_default());
Expand All @@ -115,13 +192,17 @@ TEST(StringInterner_VerifyInterningNull)
// interned string id vs null id
auto str_id = interner.intern(StringData("test"));
CHECK_EQUAL(interner.compare(str_id, null_id), 1);
CHECK_GREATER(interner.get(str_id), interner.get(null_id)); // compare via StringData
// null id vs interned string id
CHECK_EQUAL(interner.compare(null_id, str_id), -1);
CHECK_LESS(interner.get(null_id), interner.get(str_id));

// comparison String vs StringID
CHECK_EQUAL(interner.compare(StringData{}, null_id), 0);
CHECK_EQUAL(interner.compare(StringData{}, str_id), 1);
CHECK_EQUAL(interner.compare(StringData{"test"}, null_id), -1);
CHECK_EQUAL(interner.compare(StringData{}, str_id), -1);
CHECK_LESS(StringData{}, interner.get(str_id)); // compare via StringData
CHECK_EQUAL(interner.compare(StringData{"test"}, null_id), 1);
CHECK_GREATER(StringData{"test"}, interner.get(null_id));

parent.destroy_deep();
}
Expand Down

0 comments on commit aec4ca0

Please sign in to comment.