diff --git a/Package.swift b/Package.swift index 55a07398f7b..1a2e581eddb 100644 --- a/Package.swift +++ b/Package.swift @@ -52,6 +52,7 @@ let notSyncServerSources: [String] = [ "realm/array_blobs_small.cpp", "realm/array_decimal128.cpp", "realm/array_fixed_bytes.cpp", + "realm/array_aggregate_optimizations.cpp", "realm/array_integer.cpp", "realm/array_key.cpp", "realm/array_mixed.cpp", @@ -78,6 +79,9 @@ let notSyncServerSources: [String] = [ "realm/group.cpp", "realm/group_writer.cpp", "realm/history.cpp", + "realm/integer_compressor.cpp", + "realm/integer_flex_compressor.cpp", + "realm/integer_packed_compressor.cpp", "realm/impl", "realm/index_string.cpp", "realm/link_translator.cpp", diff --git a/evergreen/config.yml b/evergreen/config.yml index 8b0fed20697..debf15fd1fa 100644 --- a/evergreen/config.yml +++ b/evergreen/config.yml @@ -137,6 +137,10 @@ functions: set_cmake_var realm_vars REALM_LLVM_COVERAGE BOOL On fi + if [[ -n "${compress|}" ]]; then + set_cmake_var realm_vars REALM_COMPRESS PATH "${cmake_toolchain_file}" + fi + set_cmake_var realm_vars REALM_BUILD_COMMANDLINE_TOOLS BOOL "${build_command_line_tools|On}" set_cmake_var realm_vars REALM_ENABLE_ENCRYPTION BOOL "${enable_realm_encryption|On}" if [[ -n "${compress|}" ]]; then diff --git a/src/realm/CMakeLists.txt b/src/realm/CMakeLists.txt index b5aebd5d3bf..18583f3549a 100644 --- a/src/realm/CMakeLists.txt +++ b/src/realm/CMakeLists.txt @@ -13,6 +13,7 @@ set(REALM_SOURCES array_blobs_big.cpp array_decimal128.cpp array_fixed_bytes.cpp + array_aggregate_optimizations.cpp array_integer.cpp array_key.cpp array_mixed.cpp @@ -36,6 +37,9 @@ set(REALM_SOURCES db.cpp group_writer.cpp history.cpp + integer_compressor.cpp + integer_flex_compressor.cpp + integer_packed_compressor.cpp impl/copy_replication.cpp impl/output_stream.cpp impl/simulated_failure.cpp @@ -163,6 +167,9 @@ set(REALM_INSTALL_HEADERS handover_defs.hpp history.hpp index_string.hpp + integer_compressor.hpp + integer_flex_compressor.hpp + integer_packed_compressor.hpp keys.hpp list.hpp mixed.hpp diff --git a/src/realm/alloc_slab.cpp b/src/realm/alloc_slab.cpp index 24b122e50d6..5465603c882 100644 --- a/src/realm/alloc_slab.cpp +++ b/src/realm/alloc_slab.cpp @@ -388,6 +388,10 @@ SlabAlloc::FreeBlock* SlabAlloc::allocate_block(int size) if (remaining) push_freelist_entry(remaining); REALM_ASSERT_EX(size_from_block(block) >= size, size_from_block(block), size, get_file_path_for_assertions()); + const auto block_before = bb_before(block); + REALM_ASSERT_DEBUG(block_before && block_before->block_after_size >= size); + const auto after_block_size = size_from_block(block); + REALM_ASSERT_DEBUG(after_block_size >= size); return block; } diff --git a/src/realm/array.cpp b/src/realm/array.cpp index 2f96b15877d..be70388bb2b 100644 --- a/src/realm/array.cpp +++ b/src/realm/array.cpp @@ -42,7 +42,6 @@ #pragma warning(disable : 4127) // Condition is constant warning #endif - // Header format (8 bytes): // ------------------------ // @@ -190,38 +189,79 @@ using namespace realm::util; void QueryStateBase::dyncast() {} -size_t Array::bit_width(int64_t v) +uint8_t Array::bit_width(int64_t v) { // FIXME: Assuming there is a 64-bit CPU reverse bitscan // instruction and it is fast, then this function could be // implemented as a table lookup on the result of the scan - if ((uint64_t(v) >> 4) == 0) { static const int8_t bits[] = {0, 1, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4}; return bits[int8_t(v)]; } - - // First flip all bits if bit 63 is set (will now always be zero) if (v < 0) v = ~v; - // Then check if bits 15-31 used (32b), 7-31 used (16b), else (8b) return uint64_t(v) >> 31 ? 64 : uint64_t(v) >> 15 ? 32 : uint64_t(v) >> 7 ? 16 : 8; } +template +struct Array::VTableForWidth { + struct PopulatedVTable : VTable { + PopulatedVTable() + { + getter = &Array::get; + setter = &Array::set; + chunk_getter = &Array::get_chunk; + finder[cond_Equal] = &Array::find_vtable; + finder[cond_NotEqual] = &Array::find_vtable; + finder[cond_Greater] = &Array::find_vtable; + finder[cond_Less] = &Array::find_vtable; + } + }; + static const PopulatedVTable vtable; +}; + +template +const typename Array::VTableForWidth::PopulatedVTable Array::VTableForWidth::vtable; + void Array::init_from_mem(MemRef mem) noexcept { - char* header = Node::init_from_mem(mem); - // Parse header + // Header is the type of header that has been allocated, in case we are decompressing, + // the header is of kind A, which is kind of deceiving the purpose of these checks. + // Since we will try to fetch some data from the just initialised header, and never reset + // important fields used for type A arrays, like width, lower, upper_bound which are used + // for expanding the array, but also query the data. + const auto header = mem.get_addr(); + const auto is_extended = m_integer_compressor.init(header); + m_is_inner_bptree_node = get_is_inner_bptree_node_from_header(header); m_has_refs = get_hasrefs_from_header(header); m_context_flag = get_context_flag_from_header(header); - update_width_cache_from_header(); + + if (is_extended) { + m_ref = mem.get_ref(); + m_data = get_data_from_header(header); + m_size = m_integer_compressor.size(); + m_width = m_integer_compressor.v_width(); + m_lbound = -m_integer_compressor.v_mask(); + m_ubound = m_integer_compressor.v_mask() - 1; + m_integer_compressor.set_vtable(*this); + m_getter = m_vtable->getter; + } + else { + // Old init phase. + Node::init_from_mem(mem); + update_width_cache_from_header(); + } +} + +MemRef Array::get_mem() const noexcept +{ + return MemRef(get_header_from_data(m_data), m_ref, m_alloc); } void Array::update_from_parent() noexcept { - REALM_ASSERT_DEBUG(is_attached()); ArrayParent* parent = get_parent(); REALM_ASSERT_DEBUG(parent); ref_type new_ref = get_ref_from_parent(); @@ -230,7 +270,7 @@ void Array::update_from_parent() noexcept void Array::set_type(Type type) { - REALM_ASSERT(is_attached()); + REALM_ASSERT_DEBUG(is_attached()); copy_on_write(); // Throws @@ -254,7 +294,6 @@ void Array::set_type(Type type) set_hasrefs_in_header(init_has_refs, header); } - void Array::destroy_children(size_t offset) noexcept { for (size_t i = offset; i != m_size; ++i) { @@ -275,15 +314,28 @@ void Array::destroy_children(size_t offset) noexcept } } +// size_t Array::get_byte_size() const noexcept +//{ +// const auto header = get_header(); +// auto num_bytes = get_byte_size_from_header(header); +// auto read_only = m_alloc.is_read_only(m_ref) == true; +// auto capacity = get_capacity_from_header(header); +// auto bytes_ok = num_bytes <= capacity; +// REALM_ASSERT(read_only || bytes_ok); +// REALM_ASSERT_7(m_alloc.is_read_only(m_ref), ==, true, ||, num_bytes, <=, get_capacity_from_header(header)); +// return num_bytes; +// } ref_type Array::do_write_shallow(_impl::ArrayWriterBase& out) const { - // Write flat array + // here we might want to compress the array and write down. const char* header = get_header_from_data(m_data); size_t byte_size = get_byte_size(); - uint32_t dummy_checksum = 0x41414141UL; // "AAAA" in ASCII - ref_type new_ref = out.write_array(header, byte_size, dummy_checksum); // Throws - REALM_ASSERT_3(new_ref % 8, ==, 0); // 8-byte alignment + const auto compressed = is_compressed(); + uint32_t dummy_checksum = compressed ? 0x42424242UL : 0x41414141UL; // + uint32_t dummy_checksum_bytes = compressed ? 2 : 4; // AAAA / BB (only 2 bytes for extended arrays) + ref_type new_ref = out.write_array(header, byte_size, dummy_checksum, dummy_checksum_bytes); // Throws + REALM_ASSERT_3(new_ref % 8, ==, 0); // 8-byte alignment return new_ref; } @@ -308,7 +360,6 @@ ref_type Array::do_write_deep(_impl::ArrayWriterBase& out, bool only_if_modified } new_array.add(value); // Throws } - return new_array.do_write_shallow(out); // Throws } @@ -333,8 +384,8 @@ void Array::move(size_t begin, size_t end, size_t dest_begin) if (bits_per_elem < 8) { // FIXME: Should be optimized for (size_t i = begin; i != end; ++i) { - int_fast64_t v = (this->*m_getter)(i); - (this->*(m_vtable->setter))(dest_begin++, v); + int_fast64_t v = m_getter(*this, i); + m_vtable->setter(*this, dest_begin++, v); } return; } @@ -360,8 +411,8 @@ void Array::move(Array& dst, size_t ndx) size_t sz = m_size; for (size_t i = ndx; i < sz; i++) { - auto v = (this->*getter)(i); - (dst.*setter)(dest_begin++, v); + auto v = getter(*this, i); + setter(dst, dest_begin++, v); } truncate(ndx); @@ -370,17 +421,15 @@ void Array::move(Array& dst, size_t ndx) void Array::set(size_t ndx, int64_t value) { REALM_ASSERT_3(ndx, <, m_size); - if ((this->*(m_vtable->getter))(ndx) == value) + if (m_vtable->getter(*this, ndx) == value) return; // Check if we need to copy before modifying copy_on_write(); // Throws - // Grow the array if needed to store this value ensure_minimum_width(value); // Throws - // Set the value - (this->*(m_vtable->setter))(ndx, value); + m_vtable->setter(*this, ndx, value); } void Array::set_as_ref(size_t ndx, ref_type ref) @@ -428,6 +477,7 @@ void Array::insert(size_t ndx, int_fast64_t value) { REALM_ASSERT_DEBUG(ndx <= m_size); + decompress_array(*this); const auto old_width = m_width; const auto old_size = m_size; const Getter old_getter = m_getter; // Save old getter before potential width expansion @@ -447,8 +497,8 @@ void Array::insert(size_t ndx, int_fast64_t value) size_t i = old_size; while (i > ndx) { --i; - int64_t v = (this->*old_getter)(i); - (this->*(m_vtable->setter))(i + 1, v); + int64_t v = old_getter(*this, i); + m_vtable->setter(*this, i + 1, v); } } else if (ndx != old_size) { @@ -462,19 +512,30 @@ void Array::insert(size_t ndx, int_fast64_t value) } // Insert the new value - (this->*(m_vtable->setter))(ndx, value); + m_vtable->setter(*this, ndx, value); // Expand values above insertion if (do_expand) { size_t i = ndx; while (i != 0) { --i; - int64_t v = (this->*old_getter)(i); - (this->*(m_vtable->setter))(i, v); + int64_t v = old_getter(*this, i); + m_vtable->setter(*this, i, v); } } } +void Array::copy_on_write() +{ + if (is_read_only() && !decompress_array(*this)) + Node::copy_on_write(); +} + +void Array::copy_on_write(size_t min_size) +{ + if (is_read_only() && !decompress_array(*this)) + Node::copy_on_write(min_size); +} void Array::truncate(size_t new_size) { @@ -499,7 +560,6 @@ void Array::truncate(size_t new_size) } } - void Array::truncate_and_destroy_children(size_t new_size) { REALM_ASSERT(is_attached()); @@ -528,10 +588,8 @@ void Array::truncate_and_destroy_children(size_t new_size) } } - void Array::do_ensure_minimum_width(int_fast64_t value) { - // Make room for the new value const size_t width = bit_width(value); @@ -544,353 +602,32 @@ void Array::do_ensure_minimum_width(int_fast64_t value) size_t i = m_size; while (i != 0) { --i; - int64_t v = (this->*old_getter)(i); - (this->*(m_vtable->setter))(i, v); + int64_t v = old_getter(*this, i); + m_vtable->setter(*this, i, v); } } -int64_t Array::sum(size_t start, size_t end) const +bool Array::compress_array(Array& arr) const { - REALM_TEMPEX(return sum, m_width, (start, end)); + if (m_integer_compressor.get_encoding() == NodeHeader::Encoding::WTypBits) { + return m_integer_compressor.compress(*this, arr); + } + return false; } -template -int64_t Array::sum(size_t start, size_t end) const +bool Array::decompress_array(Array& arr) const { - if (end == size_t(-1)) - end = m_size; - REALM_ASSERT_EX(end <= m_size && start <= end, start, end, m_size); - - if (w == 0 || start == end) - return 0; - - int64_t s = 0; - - // Sum manually until 128 bit aligned - for (; (start < end) && (((size_t(m_data) & 0xf) * 8 + start * w) % 128 != 0); start++) { - s += get(start); - } - - if (w == 1 || w == 2 || w == 4) { - // Sum of bitwidths less than a byte (which are always positive) - // uses a divide and conquer algorithm that is a variation of popolation count: - // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel - - // static values needed for fast sums - const uint64_t m2 = 0x3333333333333333ULL; - const uint64_t m4 = 0x0f0f0f0f0f0f0f0fULL; - const uint64_t h01 = 0x0101010101010101ULL; - - int64_t* data = reinterpret_cast(m_data + start * w / 8); - size_t chunks = (end - start) * w / 8 / sizeof(int64_t); - - for (size_t t = 0; t < chunks; t++) { - if (w == 1) { -#if 0 -#if defined(USE_SSE42) && defined(_MSC_VER) && defined(REALM_PTR_64) - s += __popcnt64(data[t]); -#elif !defined(_MSC_VER) && defined(USE_SSE42) && defined(REALM_PTR_64) - s += __builtin_popcountll(data[t]); -#else - uint64_t a = data[t]; - const uint64_t m1 = 0x5555555555555555ULL; - a -= (a >> 1) & m1; - a = (a & m2) + ((a >> 2) & m2); - a = (a + (a >> 4)) & m4; - a = (a * h01) >> 56; - s += a; -#endif -#endif - s += fast_popcount64(data[t]); - } - else if (w == 2) { - uint64_t a = data[t]; - a = (a & m2) + ((a >> 2) & m2); - a = (a + (a >> 4)) & m4; - a = (a * h01) >> 56; - - s += a; - } - else if (w == 4) { - uint64_t a = data[t]; - a = (a & m4) + ((a >> 4) & m4); - a = (a * h01) >> 56; - s += a; - } - } - start += sizeof(int64_t) * 8 / no0(w) * chunks; - } - -#ifdef REALM_COMPILER_SSE - if (sseavx<42>()) { - // 2000 items summed 500000 times, 8/16/32 bits, miliseconds: - // Naive, templated get<>: 391 371 374 - // SSE: 97 148 282 - - if ((w == 8 || w == 16 || w == 32) && end - start > sizeof(__m128i) * 8 / no0(w)) { - __m128i* data = reinterpret_cast<__m128i*>(m_data + start * w / 8); - __m128i sum_result = {0}; - __m128i sum2; - - size_t chunks = (end - start) * w / 8 / sizeof(__m128i); - - for (size_t t = 0; t < chunks; t++) { - if (w == 8) { - /* - // 469 ms AND disadvantage of handling max 64k elements before overflow - __m128i vl = _mm_cvtepi8_epi16(data[t]); - __m128i vh = data[t]; - vh.m128i_i64[0] = vh.m128i_i64[1]; - vh = _mm_cvtepi8_epi16(vh); - sum_result = _mm_add_epi16(sum_result, vl); - sum_result = _mm_add_epi16(sum_result, vh); - */ - - /* - // 424 ms - __m128i vl = _mm_unpacklo_epi8(data[t], _mm_set1_epi8(0)); - __m128i vh = _mm_unpackhi_epi8(data[t], _mm_set1_epi8(0)); - sum_result = _mm_add_epi32(sum_result, _mm_madd_epi16(vl, _mm_set1_epi16(1))); - sum_result = _mm_add_epi32(sum_result, _mm_madd_epi16(vh, _mm_set1_epi16(1))); - */ - - __m128i vl = _mm_cvtepi8_epi16(data[t]); // sign extend lower words 8->16 - __m128i vh = data[t]; - vh = _mm_srli_si128(vh, 8); // v >>= 64 - vh = _mm_cvtepi8_epi16(vh); // sign extend lower words 8->16 - __m128i sum1 = _mm_add_epi16(vl, vh); - __m128i sumH = _mm_cvtepi16_epi32(sum1); - __m128i sumL = _mm_srli_si128(sum1, 8); // v >>= 64 - sumL = _mm_cvtepi16_epi32(sumL); - sum_result = _mm_add_epi32(sum_result, sumL); - sum_result = _mm_add_epi32(sum_result, sumH); - } - else if (w == 16) { - // todo, can overflow for array size > 2^32 - __m128i vl = _mm_cvtepi16_epi32(data[t]); // sign extend lower words 16->32 - __m128i vh = data[t]; - vh = _mm_srli_si128(vh, 8); // v >>= 64 - vh = _mm_cvtepi16_epi32(vh); // sign extend lower words 16->32 - sum_result = _mm_add_epi32(sum_result, vl); - sum_result = _mm_add_epi32(sum_result, vh); - } - else if (w == 32) { - __m128i v = data[t]; - __m128i v0 = _mm_cvtepi32_epi64(v); // sign extend lower dwords 32->64 - v = _mm_srli_si128(v, 8); // v >>= 64 - __m128i v1 = _mm_cvtepi32_epi64(v); // sign extend lower dwords 32->64 - sum_result = _mm_add_epi64(sum_result, v0); - sum_result = _mm_add_epi64(sum_result, v1); - - /* - __m128i m = _mm_set1_epi32(0xc000); // test if overflow could happen (still need - underflow test). - __m128i mm = _mm_and_si128(data[t], m); - zz = _mm_or_si128(mm, zz); - sum_result = _mm_add_epi32(sum_result, data[t]); - */ - } - } - start += sizeof(__m128i) * 8 / no0(w) * chunks; - - // prevent taking address of 'state' to make the compiler keep it in SSE register in above loop - // (vc2010/gcc4.6) - sum2 = sum_result; - - // Avoid aliasing bug where sum2 might not yet be initialized when accessed by get_universal - char sum3[sizeof sum2]; - memcpy(&sum3, &sum2, sizeof sum2); - - // Sum elements of sum - for (size_t t = 0; t < sizeof(__m128i) * 8 / ((w == 8 || w == 16) ? 32 : 64); ++t) { - int64_t v = get_universal < (w == 8 || w == 16) ? 32 : 64 > (reinterpret_cast(&sum3), t); - s += v; - } - } - } -#endif - - // Sum remaining elements - for (; start < end; ++start) - s += get(start); - - return s; + return arr.is_compressed() ? m_integer_compressor.decompress(arr) : false; } -size_t Array::count(int64_t value) const noexcept +bool Array::try_compress(Array& arr) const { - const uint64_t* next = reinterpret_cast(m_data); - size_t value_count = 0; - const size_t end = m_size; - size_t i = 0; - - // static values needed for fast population count - const uint64_t m1 = 0x5555555555555555ULL; - const uint64_t m2 = 0x3333333333333333ULL; - const uint64_t m4 = 0x0f0f0f0f0f0f0f0fULL; - const uint64_t h01 = 0x0101010101010101ULL; - - if (m_width == 0) { - if (value == 0) - return m_size; - return 0; - } - if (m_width == 1) { - if (uint64_t(value) > 1) - return 0; - - const size_t chunkvals = 64; - for (; i + chunkvals <= end; i += chunkvals) { - uint64_t a = next[i / chunkvals]; - if (value == 0) - a = ~a; // reverse - - a -= (a >> 1) & m1; - a = (a & m2) + ((a >> 2) & m2); - a = (a + (a >> 4)) & m4; - a = (a * h01) >> 56; - - // Could use intrinsic instead: - // a = __builtin_popcountll(a); // gcc intrinsic - - value_count += to_size_t(a); - } - } - else if (m_width == 2) { - if (uint64_t(value) > 3) - return 0; - - const uint64_t v = ~0ULL / 0x3 * value; - - // Masks to avoid spillover between segments in cascades - const uint64_t c1 = ~0ULL / 0x3 * 0x1; - - const size_t chunkvals = 32; - for (; i + chunkvals <= end; i += chunkvals) { - uint64_t a = next[i / chunkvals]; - a ^= v; // zero matching bit segments - a |= (a >> 1) & c1; // cascade ones in non-zeroed segments - a &= m1; // isolate single bit in each segment - a ^= m1; // reverse isolated bits - // if (!a) continue; - - // Population count - a = (a & m2) + ((a >> 2) & m2); - a = (a + (a >> 4)) & m4; - a = (a * h01) >> 56; - - value_count += to_size_t(a); - } - } - else if (m_width == 4) { - if (uint64_t(value) > 15) - return 0; - - const uint64_t v = ~0ULL / 0xF * value; - const uint64_t m = ~0ULL / 0xF * 0x1; - - // Masks to avoid spillover between segments in cascades - const uint64_t c1 = ~0ULL / 0xF * 0x7; - const uint64_t c2 = ~0ULL / 0xF * 0x3; - - const size_t chunkvals = 16; - for (; i + chunkvals <= end; i += chunkvals) { - uint64_t a = next[i / chunkvals]; - a ^= v; // zero matching bit segments - a |= (a >> 1) & c1; // cascade ones in non-zeroed segments - a |= (a >> 2) & c2; - a &= m; // isolate single bit in each segment - a ^= m; // reverse isolated bits - - // Population count - a = (a + (a >> 4)) & m4; - a = (a * h01) >> 56; - - value_count += to_size_t(a); - } - } - else if (m_width == 8) { - if (value > 0x7FLL || value < -0x80LL) - return 0; // by casting? - - const uint64_t v = ~0ULL / 0xFF * value; - const uint64_t m = ~0ULL / 0xFF * 0x1; - - // Masks to avoid spillover between segments in cascades - const uint64_t c1 = ~0ULL / 0xFF * 0x7F; - const uint64_t c2 = ~0ULL / 0xFF * 0x3F; - const uint64_t c3 = ~0ULL / 0xFF * 0x0F; - - const size_t chunkvals = 8; - for (; i + chunkvals <= end; i += chunkvals) { - uint64_t a = next[i / chunkvals]; - a ^= v; // zero matching bit segments - a |= (a >> 1) & c1; // cascade ones in non-zeroed segments - a |= (a >> 2) & c2; - a |= (a >> 4) & c3; - a &= m; // isolate single bit in each segment - a ^= m; // reverse isolated bits - - // Population count - a = (a * h01) >> 56; - - value_count += to_size_t(a); - } - } - else if (m_width == 16) { - if (value > 0x7FFFLL || value < -0x8000LL) - return 0; // by casting? - - const uint64_t v = ~0ULL / 0xFFFF * value; - const uint64_t m = ~0ULL / 0xFFFF * 0x1; - - // Masks to avoid spillover between segments in cascades - const uint64_t c1 = ~0ULL / 0xFFFF * 0x7FFF; - const uint64_t c2 = ~0ULL / 0xFFFF * 0x3FFF; - const uint64_t c3 = ~0ULL / 0xFFFF * 0x0FFF; - const uint64_t c4 = ~0ULL / 0xFFFF * 0x00FF; - - const size_t chunkvals = 4; - for (; i + chunkvals <= end; i += chunkvals) { - uint64_t a = next[i / chunkvals]; - a ^= v; // zero matching bit segments - a |= (a >> 1) & c1; // cascade ones in non-zeroed segments - a |= (a >> 2) & c2; - a |= (a >> 4) & c3; - a |= (a >> 8) & c4; - a &= m; // isolate single bit in each segment - a ^= m; // reverse isolated bits - - // Population count - a = (a * h01) >> 56; - - value_count += to_size_t(a); - } - } - else if (m_width == 32) { - int32_t v = int32_t(value); - const int32_t* d = reinterpret_cast(m_data); - for (; i < end; ++i) { - if (d[i] == v) - ++value_count; - } - return value_count; - } - else if (m_width == 64) { - const int64_t* d = reinterpret_cast(m_data); - for (; i < end; ++i) { - if (d[i] == value) - ++value_count; - } - return value_count; - } - - // Check remaining elements - for (; i < end; ++i) - if (value == get(i)) - ++value_count; + return compress_array(arr); +} - return value_count; +bool Array::try_decompress() +{ + return decompress_array(*this); } size_t Array::calc_aligned_byte_size(size_t size, int width) @@ -990,9 +727,9 @@ MemRef Array::create(Type type, bool context_flag, WidthType width_type, size_t { REALM_ASSERT_DEBUG(value == 0 || width_type == wtype_Bits); REALM_ASSERT_DEBUG(size == 0 || width_type != wtype_Ignore); - int width = 0; + uint8_t width = 0; if (value != 0) - width = static_cast(bit_width(value)); + width = bit_width(value); auto mem = Node::create_node(size, alloc, context_flag, type, width_type, width); if (value != 0) { const auto header = mem.get_addr(); @@ -1004,52 +741,32 @@ MemRef Array::create(Type type, bool context_flag, WidthType width_type, size_t } // This is the one installed into the m_vtable->finder slots. -template -bool Array::find_vtable(int64_t value, size_t start, size_t end, size_t baseindex, QueryStateBase* state) const +template +bool Array::find_vtable(const Array& arr, int64_t value, size_t start, size_t end, size_t baseindex, + QueryStateBase* state) { - return ArrayWithFind(*this).find_optimized(value, start, end, baseindex, state); + REALM_TEMPEX2(return ArrayWithFind(arr).find_optimized, cond, arr.m_width, (value, start, end, baseindex, state)); } - -template -struct Array::VTableForWidth { - struct PopulatedVTable : Array::VTable { - PopulatedVTable() - { - getter = &Array::get; - setter = &Array::set; - chunk_getter = &Array::get_chunk; - finder[cond_Equal] = &Array::find_vtable; - finder[cond_NotEqual] = &Array::find_vtable; - finder[cond_Greater] = &Array::find_vtable; - finder[cond_Less] = &Array::find_vtable; - } - }; - static const PopulatedVTable vtable; -}; - -template -const typename Array::VTableForWidth::PopulatedVTable Array::VTableForWidth::vtable; - void Array::update_width_cache_from_header() noexcept { - auto width = get_width_from_header(get_header()); - m_lbound = lbound_for_width(width); - m_ubound = ubound_for_width(width); - - m_width = width; - - REALM_TEMPEX(m_vtable = &VTableForWidth, width, ::vtable); + m_width = get_width_from_header(get_header()); + m_lbound = lbound_for_width(m_width); + m_ubound = ubound_for_width(m_width); + REALM_ASSERT_DEBUG(m_lbound <= m_ubound); + REALM_ASSERT_DEBUG(m_width >= m_lbound); + REALM_ASSERT_DEBUG(m_width <= m_ubound); + REALM_TEMPEX(m_vtable = &VTableForWidth, m_width, ::vtable); m_getter = m_vtable->getter; } // This method reads 8 concecutive values into res[8], starting from index 'ndx'. It's allowed for the 8 values to // exceed array length; in this case, remainder of res[8] will be be set to 0. template -void Array::get_chunk(size_t ndx, int64_t res[8]) const noexcept +void Array::get_chunk(const Array& arr, size_t ndx, int64_t res[8]) noexcept { - REALM_ASSERT_3(ndx, <, m_size); - + auto sz = arr.size(); + REALM_ASSERT_3(ndx, <, sz); size_t i = 0; // if constexpr to avoid producing spurious warnings resulting from @@ -1061,7 +778,7 @@ void Array::get_chunk(size_t ndx, int64_t res[8]) const noexcept // Round m_size down to byte granularity as the trailing bits in the last // byte are uninitialized - size_t bytes_available = m_size / elements_per_byte; + size_t bytes_available = sz / elements_per_byte; // Round start and end to be byte-aligned. Start is rounded down and // end is rounded up as we may read up to 7 unused bits at each end. @@ -1073,7 +790,7 @@ void Array::get_chunk(size_t ndx, int64_t res[8]) const noexcept uint64_t c = 0; for (size_t i = end; i > start; --i) { c <<= 8; - c += *reinterpret_cast(m_data + i - 1); + c += *reinterpret_cast(arr.m_data + i - 1); } // Trim off leading bits which aren't part of the requested range c >>= (ndx - start * elements_per_byte) * w; @@ -1093,31 +810,31 @@ void Array::get_chunk(size_t ndx, int64_t res[8]) const noexcept } } - for (; i + ndx < m_size && i < 8; i++) - res[i] = get(ndx + i); + for (; i + ndx < sz && i < 8; i++) + res[i] = get(arr, ndx + i); for (; i < 8; i++) res[i] = 0; #ifdef REALM_DEBUG - for (int j = 0; j + ndx < m_size && j < 8; j++) { - int64_t expected = get(ndx + j); + for (int j = 0; j + ndx < sz && j < 8; j++) { + int64_t expected = Array::get_universal(arr.m_data, ndx + j); REALM_ASSERT(res[j] == expected); } #endif } template <> -void Array::get_chunk<0>(size_t ndx, int64_t res[8]) const noexcept +void Array::get_chunk<0>(const Array& arr, size_t ndx, int64_t res[8]) noexcept { - REALM_ASSERT_3(ndx, <, m_size); + REALM_ASSERT_3(ndx, <, arr.m_size); memset(res, 0, sizeof(int64_t) * 8); } template -void Array::set(size_t ndx, int64_t value) +void Array::set(Array& arr, size_t ndx, int64_t value) { - set_direct(m_data, ndx, value); + realm::set_direct(arr.m_data, ndx, value); } void Array::_mem_usage(size_t& mem) const noexcept @@ -1222,10 +939,15 @@ void Array::report_memory_usage_2(MemUsageHandler& handler) const void Array::verify() const { #ifdef REALM_DEBUG - REALM_ASSERT(is_attached()); - REALM_ASSERT(m_width == 0 || m_width == 1 || m_width == 2 || m_width == 4 || m_width == 8 || m_width == 16 || - m_width == 32 || m_width == 64); + REALM_ASSERT(is_attached()); + if (!wtype_is_extended(get_header())) { + REALM_ASSERT(m_width == 0 || m_width == 1 || m_width == 2 || m_width == 4 || m_width == 8 || m_width == 16 || + m_width == 32 || m_width == 64); + } + else { + REALM_ASSERT(m_width <= 64); + } if (!get_parent()) return; @@ -1238,35 +960,60 @@ void Array::verify() const size_t Array::lower_bound_int(int64_t value) const noexcept { + if (is_compressed()) + return lower_bound_int_compressed(value); REALM_TEMPEX(return lower_bound, m_width, (m_data, m_size, value)); } size_t Array::upper_bound_int(int64_t value) const noexcept { + if (is_compressed()) + return upper_bound_int_compressed(value); REALM_TEMPEX(return upper_bound, m_width, (m_data, m_size, value)); } - -size_t Array::find_first(int64_t value, size_t start, size_t end) const +size_t Array::lower_bound_int_compressed(int64_t value) const noexcept { - return find_first(value, start, end); + static impl::CompressedDataFetcher encoder; + encoder.ptr = &m_integer_compressor; + return lower_bound(m_data, m_size, value, encoder); } +size_t Array::upper_bound_int_compressed(int64_t value) const noexcept +{ + static impl::CompressedDataFetcher encoder; + encoder.ptr = &m_integer_compressor; + return upper_bound(m_data, m_size, value, encoder); +} int_fast64_t Array::get(const char* header, size_t ndx) noexcept { - const char* data = get_data_from_header(header); - uint_least8_t width = get_width_from_header(header); - return get_direct(data, width, ndx); + // this is very important. Most of the times we end up here + // because we are traversing the cluster, the keys/refs in the cluster + // are not compressed (because there is almost no gain), so the intent + // is avoiding to pollute traversing the cluster as little as possible. + // We need to check the header wtype and only initialise the + // integer compressor, if needed. Otherwise we should just call + // get_direct. On average there should be one more access to the header + // while traversing the cluster tree. + if (REALM_LIKELY(!NodeHeader::wtype_is_extended(header))) { + const char* data = get_data_from_header(header); + uint_least8_t width = get_width_from_header(header); + return get_direct(data, width, ndx); + } + // Ideally, we would not want to construct a compressor every time we end up here. + // However the compressor initalization should be fast enough. Creating an array, + // which owns a compressor internally, is the better approach if we intend to access + // the same data over and over again. The compressor basically caches the most important + // information about the layuot of the data itself. + IntegerCompressor s_compressor; + s_compressor.init(header); + return s_compressor.get(ndx); } - std::pair Array::get_two(const char* header, size_t ndx) noexcept { - const char* data = get_data_from_header(header); - uint_least8_t width = get_width_from_header(header); - std::pair p = ::get_two(data, width, ndx); - return std::make_pair(p.first, p.second); + return std::make_pair(get(header, ndx), get(header, ndx + 1)); } bool QueryStateCount::match(size_t, Mixed) noexcept @@ -1312,7 +1059,6 @@ bool QueryStateFindAll>::match(size_t index) noexcept ++m_match_count; int64_t key_value = (m_key_values ? m_key_values->get(index) : index) + m_key_offset; m_keys.push_back(ObjKey(key_value)); - return (m_limit > m_match_count); } diff --git a/src/realm/array.hpp b/src/realm/array.hpp index 1df0aa2b992..6b9569ebd82 100644 --- a/src/realm/array.hpp +++ b/src/realm/array.hpp @@ -21,8 +21,10 @@ #include #include +#include #include #include +#include namespace realm { @@ -90,12 +92,8 @@ class QueryStateFindFirst : public QueryStateBase { class Array : public Node, public ArrayParent { public: /// Create an array accessor in the unattached state. - explicit Array(Allocator& allocator) noexcept - : Node(allocator) - { - } - - ~Array() noexcept override {} + explicit Array(Allocator& allocator) noexcept; + virtual ~Array() noexcept = default; /// Create a new integer array of the specified type and size, and filled /// with the specified value, and attach this accessor to it. This does not @@ -126,6 +124,8 @@ class Array : public Node, public ArrayParent { init_from_ref(ref); } + MemRef get_mem() const noexcept; + /// Called in the context of Group::commit() to ensure that attached /// accessors stay valid across a commit. Please note that this works only /// for non-transactional commits. Accessors obtained during a transaction @@ -174,21 +174,23 @@ class Array : public Node, public ArrayParent { void set_as_ref(size_t ndx, ref_type ref); template - void set(size_t ndx, int64_t value); + static void set(Array&, size_t ndx, int64_t value); int64_t get(size_t ndx) const noexcept; + std::vector get_all(size_t b, size_t e) const; + template - int64_t get(size_t ndx) const noexcept; + static int64_t get(const Array& arr, size_t ndx) noexcept; void get_chunk(size_t ndx, int64_t res[8]) const noexcept; template - void get_chunk(size_t ndx, int64_t res[8]) const noexcept; + static void get_chunk(const Array&, size_t ndx, int64_t res[8]) noexcept; ref_type get_as_ref(size_t ndx) const noexcept; - RefOrTagged get_as_ref_or_tagged(size_t ndx) const noexcept; + void set(size_t ndx, RefOrTagged); void add(RefOrTagged); void ensure_minimum_width(RefOrTagged); @@ -198,12 +200,21 @@ class Array : public Node, public ArrayParent { void alloc(size_t init_size, size_t new_width) { - REALM_ASSERT_3(m_width, ==, get_width_from_header(get_header())); - REALM_ASSERT_3(m_size, ==, get_size_from_header(get_header())); + // Node::alloc is the one that triggers copy on write. If we call alloc for a B + // array we have a bug in our machinery, the array should have been decompressed + // way before calling alloc. + const auto header = get_header(); + REALM_ASSERT_3(m_width, ==, get_width_from_header(header)); + REALM_ASSERT_3(m_size, ==, get_size_from_header(header)); Node::alloc(init_size, new_width); update_width_cache_from_header(); } + bool is_empty() const noexcept + { + return size() == 0; + } + /// Remove the element at the specified index, and move elements at higher /// indexes to the next lower index. /// @@ -322,6 +333,8 @@ class Array : public Node, public ArrayParent { /// by doing a linear search for short sequences. size_t lower_bound_int(int64_t value) const noexcept; size_t upper_bound_int(int64_t value) const noexcept; + size_t lower_bound_int_compressed(int64_t value) const noexcept; + size_t upper_bound_int_compressed(int64_t value) const noexcept; //@} int64_t get_sum(size_t start = 0, size_t end = size_t(-1)) const @@ -351,6 +364,18 @@ class Array : public Node, public ArrayParent { /// (idempotency). void destroy_deep() noexcept; + /// check if the array is encoded (in B format) + inline bool is_compressed() const; + + inline const IntegerCompressor& integer_compressor() const; + + /// used only for testing, encode the array passed as argument + bool try_compress(Array&) const; + + /// used only for testing, decode the array, on which this method is invoked. If the array is not encoded, this is + /// a NOP + bool try_decompress(); + /// Shorthand for `destroy_deep(MemRef(ref, alloc), alloc)`. static void destroy_deep(ref_type ref, Allocator& alloc) noexcept; @@ -383,25 +408,35 @@ class Array : public Node, public ArrayParent { /// Same as non-static write() with `deep` set to true. This is for the /// cases where you do not already have an array accessor available. + /// Compression may be attempted if `compress_in_flight` is true. + /// This should be avoided if you rely on the size of the array beeing unchanged. static ref_type write(ref_type, Allocator&, _impl::ArrayWriterBase&, bool only_if_modified, bool compress_in_flight); - size_t find_first(int64_t value, size_t begin = 0, size_t end = size_t(-1)) const; + inline size_t find_first(int64_t value, size_t begin = 0, size_t end = size_t(-1)) const + { + return find_first(value, begin, end); + } // Wrappers for backwards compatibility and for simple use without // setting up state initialization etc template size_t find_first(int64_t value, size_t start = 0, size_t end = size_t(-1)) const { - REALM_ASSERT(start <= m_size && (end <= m_size || end == size_t(-1)) && start <= end); - // todo, would be nice to avoid this in order to speed up find_first loops QueryStateFindFirst state; Finder finder = m_vtable->finder[cond::condition]; - (this->*finder)(value, start, end, 0, &state); + finder(*this, value, start, end, 0, &state); + return state.m_state; + } - return static_cast(state.m_state); + template + bool find(int64_t value, size_t start, size_t end, size_t baseIndex, QueryStateBase* state) const + { + Finder finder = m_vtable->finder[cond::condition]; + return finder(*this, value, start, end, baseIndex, state); } + /// Get the specified element without the cost of constructing an /// array instance. If an array instance is already available, or /// you need to get multiple values, then this method will be @@ -463,11 +498,15 @@ class Array : public Node, public ArrayParent { /// Takes a 64-bit value and returns the minimum number of bits needed /// to fit the value. For alignment this is rounded up to nearest /// log2. Possible results {0, 1, 2, 4, 8, 16, 32, 64} - static size_t bit_width(int64_t value); + static uint8_t bit_width(int64_t value); void typed_print(std::string prefix) const; protected: + friend class NodeTree; + void copy_on_write(); + void copy_on_write(size_t min_size); + // This returns the minimum value ("lower bound") of the representable values // for the given bit width. Valid widths are 0, 1, 2, 4, 8, 16, 32, and 64. static constexpr int_fast64_t lbound_for_width(size_t width) noexcept; @@ -505,14 +544,17 @@ class Array : public Node, public ArrayParent { protected: // Getters and Setters for adaptive-packed arrays - typedef int64_t (Array::*Getter)(size_t) const; // Note: getters must not throw - typedef void (Array::*Setter)(size_t, int64_t); - typedef bool (Array::*Finder)(int64_t, size_t, size_t, size_t, QueryStateBase*) const; - typedef void (Array::*ChunkGetter)(size_t, int64_t res[8]) const; // Note: getters must not throw + typedef int64_t (*Getter)(const Array&, size_t); // Note: getters must not throw + typedef void (*Setter)(Array&, size_t, int64_t); + typedef bool (*Finder)(const Array&, int64_t, size_t, size_t, size_t, QueryStateBase*); + typedef void (*ChunkGetter)(const Array&, size_t, int64_t res[8]); // Note: getters must not throw + + typedef std::vector (*GetterAll)(const Array&, size_t, size_t); // Note: getters must not throw struct VTable { Getter getter; ChunkGetter chunk_getter; + GetterAll getter_all; Setter setter; Finder finder[cond_VTABLE_FINDER_COUNT]; // one for each active function pointer }; @@ -520,11 +562,12 @@ class Array : public Node, public ArrayParent { struct VTableForWidth; // This is the one installed into the m_vtable->finder slots. - template - bool find_vtable(int64_t value, size_t start, size_t end, size_t baseindex, QueryStateBase* state) const; + template + static bool find_vtable(const Array&, int64_t value, size_t start, size_t end, size_t baseindex, + QueryStateBase* state); template - int64_t get_universal(const char* const data, const size_t ndx) const; + static int64_t get_universal(const char* const data, const size_t ndx); protected: Getter m_getter = nullptr; // cached to avoid indirection @@ -538,6 +581,11 @@ class Array : public Node, public ArrayParent { bool m_has_refs; // Elements whose first bit is zero are refs to subarrays. bool m_context_flag; // Meaning depends on context. + IntegerCompressor m_integer_compressor; + // compress/decompress this array + bool compress_array(Array&) const; + bool decompress_array(Array& arr) const; + private: ref_type do_write_shallow(_impl::ArrayWriterBase&) const; ref_type do_write_deep(_impl::ArrayWriterBase&, bool only_if_modified, bool compress) const; @@ -548,10 +596,15 @@ class Array : public Node, public ArrayParent { void report_memory_usage_2(MemUsageHandler&) const; #endif + +private: friend class Allocator; friend class SlabAlloc; friend class GroupWriter; friend class ArrayWithFind; + friend class IntegerCompressor; + friend class PackedCompressor; + friend class FlexCompressor; }; class TempArray : public Array { @@ -573,6 +626,57 @@ class TempArray : public Array { // Implementation: +inline Array::Array(Allocator& allocator) noexcept + : Node(allocator) +{ +} + +inline bool Array::is_compressed() const +{ + const auto enc = m_integer_compressor.get_encoding(); + return enc == NodeHeader::Encoding::Flex || enc == NodeHeader::Encoding::Packed; +} + +inline const IntegerCompressor& Array::integer_compressor() const +{ + return m_integer_compressor; +} + +inline int64_t Array::get(size_t ndx) const noexcept +{ + REALM_ASSERT_DEBUG(is_attached()); + REALM_ASSERT_DEBUG_EX(ndx < m_size, ndx, m_size); + return m_getter(*this, ndx); + + // Two ideas that are not efficient but may be worth looking into again: + /* + // Assume correct width is found early in REALM_TEMPEX, which is the case for B tree offsets that + // are probably either 2^16 long. Turns out to be 25% faster if found immediately, but 50-300% slower + // if found later + REALM_TEMPEX(return get, (ndx)); + */ + /* + // Slightly slower in both of the if-cases. Also needs an matchcount m_size check too, to avoid + // reading beyond array. + if (m_width >= 8 && m_size > ndx + 7) + return get<64>(ndx >> m_shift) & m_widthmask; + else + return (this->*(m_vtable->getter))(ndx); + */ +} + +inline std::vector Array::get_all(size_t b, size_t e) const +{ + REALM_ASSERT_DEBUG(is_compressed()); + return m_vtable->getter_all(*this, b, e); +} + +template +inline int64_t Array::get(const Array& arr, size_t ndx) noexcept +{ + REALM_ASSERT_DEBUG(arr.is_attached()); + return get_universal(arr.m_data, ndx); +} constexpr inline int_fast64_t Array::lbound_for_width(size_t width) noexcept { @@ -673,7 +777,6 @@ inline void Array::create(Type type, bool context_flag, size_t length, int_fast6 init_from_mem(mem); } - inline Array::Type Array::get_type() const noexcept { if (m_is_inner_bptree_node) { @@ -689,41 +792,44 @@ inline Array::Type Array::get_type() const noexcept inline void Array::get_chunk(size_t ndx, int64_t res[8]) const noexcept { REALM_ASSERT_DEBUG(ndx < m_size); - (this->*(m_vtable->chunk_getter))(ndx, res); + m_vtable->chunk_getter(*this, ndx, res); } template -int64_t Array::get_universal(const char* data, size_t ndx) const +inline int64_t Array::get_universal(const char* data, size_t ndx) { - if (w == 0) { - return 0; - } - else if (w == 1) { - size_t offset = ndx >> 3; - return (data[offset] >> (ndx & 7)) & 0x01; + if (w == 64) { + size_t offset = ndx << 3; + return *reinterpret_cast(data + offset); } - else if (w == 2) { - size_t offset = ndx >> 2; - return (data[offset] >> ((ndx & 3) << 1)) & 0x03; + else if (w == 32) { + size_t offset = ndx << 2; + return *reinterpret_cast(data + offset); } - else if (w == 4) { - size_t offset = ndx >> 1; - return (data[offset] >> ((ndx & 1) << 2)) & 0x0F; + else if (w == 16) { + size_t offset = ndx << 1; + return *reinterpret_cast(data + offset); } else if (w == 8) { return *reinterpret_cast(data + ndx); } - else if (w == 16) { - size_t offset = ndx * 2; - return *reinterpret_cast(data + offset); + else if (w == 4) { + size_t offset = ndx >> 1; + auto d = data[offset]; + return (d >> ((ndx & 1) << 2)) & 0x0F; } - else if (w == 32) { - size_t offset = ndx * 4; - return *reinterpret_cast(data + offset); + else if (w == 2) { + size_t offset = ndx >> 2; + auto d = data[offset]; + return (d >> ((ndx & 3) << 1)) & 0x03; } - else if (w == 64) { - size_t offset = ndx * 8; - return *reinterpret_cast(data + offset); + else if (w == 1) { + size_t offset = ndx >> 3; + auto d = data[offset]; + return (d >> (ndx & 7)) & 0x01; + } + else if (w == 0) { + return 0; } else { REALM_ASSERT_DEBUG(false); @@ -731,35 +837,6 @@ int64_t Array::get_universal(const char* data, size_t ndx) const } } -template -int64_t Array::get(size_t ndx) const noexcept -{ - return get_universal(m_data, ndx); -} - -inline int64_t Array::get(size_t ndx) const noexcept -{ - REALM_ASSERT_DEBUG(is_attached()); - REALM_ASSERT_DEBUG_EX(ndx < m_size, ndx, m_size); - return (this->*m_getter)(ndx); - - // Two ideas that are not efficient but may be worth looking into again: - /* - // Assume correct width is found early in REALM_TEMPEX, which is the case for B tree offsets that - // are probably either 2^16 long. Turns out to be 25% faster if found immediately, but 50-300% slower - // if found later - REALM_TEMPEX(return get, (ndx)); - */ - /* - // Slightly slower in both of the if-cases. Also needs an matchcount m_size check too, to avoid - // reading beyond array. - if (m_width >= 8 && m_size > ndx + 7) - return get<64>(ndx >> m_shift) & m_widthmask; - else - return (this->*(m_vtable->getter))(ndx); - */ -} - inline int64_t Array::front() const noexcept { return get(0); @@ -848,34 +925,6 @@ inline void Array::destroy_deep() noexcept m_data = nullptr; } -inline ref_type Array::write(_impl::ArrayWriterBase& out, bool deep, bool only_if_modified, bool compress) const -{ - REALM_ASSERT(is_attached()); - - if (only_if_modified && m_alloc.is_read_only(m_ref)) - return m_ref; - - if (!deep || !m_has_refs) - return do_write_shallow(out); // Throws - - return do_write_deep(out, only_if_modified, compress); // Throws -} - -inline ref_type Array::write(ref_type ref, Allocator& alloc, _impl::ArrayWriterBase& out, bool only_if_modified, - bool compress) -{ - if (only_if_modified && alloc.is_read_only(ref)) - return ref; - - Array array(alloc); - array.init_from_ref(ref); - - if (!array.m_has_refs) - return array.do_write_shallow(out); // Throws - - return array.do_write_deep(out, only_if_modified, compress); // Throws -} - inline void Array::add(int_fast64_t value) { insert(m_size, value); @@ -986,7 +1035,6 @@ inline size_t Array::get_max_byte_size(size_t num_elems) noexcept return header_size + num_elems * max_bytes_per_elem; } - inline void Array::update_child_ref(size_t child_ndx, ref_type new_ref) { set(child_ndx, new_ref); @@ -1004,6 +1052,73 @@ inline void Array::ensure_minimum_width(int_fast64_t value) do_ensure_minimum_width(value); } +inline ref_type Array::write(_impl::ArrayWriterBase& out, bool deep, bool only_if_modified, + bool compress_in_flight) const +{ + REALM_ASSERT_DEBUG(is_attached()); + // The default allocator cannot be trusted wrt is_read_only(): + REALM_ASSERT_DEBUG(!only_if_modified || &m_alloc != &Allocator::get_default()); + if (only_if_modified && m_alloc.is_read_only(m_ref)) + return m_ref; + + if (!deep || !m_has_refs) { + // however - creating an array using ANYTHING BUT the default allocator during commit is also wrong.... + // it only works by accident, because the whole slab area is reinitialized after commit. + // We should have: Array encoded_array{Allocator::get_default()}; + Array compressed_array{Allocator::get_default()}; + if (compress_in_flight && compress_array(compressed_array)) { +#ifdef REALM_DEBUG + const auto encoding = compressed_array.m_integer_compressor.get_encoding(); + REALM_ASSERT_DEBUG(encoding == Encoding::Flex || encoding == Encoding::Packed); + REALM_ASSERT_DEBUG(size() == compressed_array.size()); + for (size_t i = 0; i < compressed_array.size(); ++i) { + REALM_ASSERT_DEBUG(get(i) == compressed_array.get(i)); + } +#endif + auto ref = compressed_array.do_write_shallow(out); + compressed_array.destroy(); + return ref; + } + return do_write_shallow(out); // Throws + } + + return do_write_deep(out, only_if_modified, compress_in_flight); // Throws +} + +inline ref_type Array::write(ref_type ref, Allocator& alloc, _impl::ArrayWriterBase& out, bool only_if_modified, + bool compress_in_flight) +{ + // The default allocator cannot be trusted wrt is_read_only(): + REALM_ASSERT_DEBUG(!only_if_modified || &alloc != &Allocator::get_default()); + if (only_if_modified && alloc.is_read_only(ref)) + return ref; + + Array array(alloc); + array.init_from_ref(ref); + REALM_ASSERT_DEBUG(array.is_attached()); + + if (!array.m_has_refs) { + Array compressed_array{Allocator::get_default()}; + if (compress_in_flight && array.compress_array(compressed_array)) { +#ifdef REALM_DEBUG + const auto encoding = compressed_array.m_integer_compressor.get_encoding(); + REALM_ASSERT_DEBUG(encoding == Encoding::Flex || encoding == Encoding::Packed); + REALM_ASSERT_DEBUG(array.size() == compressed_array.size()); + for (size_t i = 0; i < compressed_array.size(); ++i) { + REALM_ASSERT_DEBUG(array.get(i) == compressed_array.get(i)); + } +#endif + auto ref = compressed_array.do_write_shallow(out); + compressed_array.destroy(); + return ref; + } + else { + return array.do_write_shallow(out); // Throws + } + } + return array.do_write_deep(out, only_if_modified, compress_in_flight); // Throws +} + } // namespace realm diff --git a/src/realm/array_aggregate_optimizations.cpp b/src/realm/array_aggregate_optimizations.cpp new file mode 100644 index 00000000000..6242e6853dd --- /dev/null +++ b/src/realm/array_aggregate_optimizations.cpp @@ -0,0 +1,369 @@ +/************************************************************************* + * + * Copyright 2023 Realm Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + **************************************************************************/ + +#include +#include + +using namespace realm; + +int64_t Array::sum(size_t start, size_t end) const +{ + REALM_TEMPEX(return sum, m_width, (start, end)); +} + +template +int64_t Array::sum(size_t start, size_t end) const +{ + if (end == size_t(-1)) + end = m_size; + + REALM_ASSERT_EX(end <= m_size && start <= end, start, end, m_size); + + if (start == end) + return 0; + + int64_t s = 0; + + // Sum manually until 128 bit aligned + for (; (start < end) && (((size_t(m_data) & 0xf) * 8 + start * w) % 128 != 0); start++) { + s += get(*this, start); + } + + if (w == 1 || w == 2 || w == 4) { + // Sum of bitwidths less than a byte (which are always positive) + // uses a divide and conquer algorithm that is a variation of popolation count: + // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel + + // static values needed for fast sums + const uint64_t m2 = 0x3333333333333333ULL; + const uint64_t m4 = 0x0f0f0f0f0f0f0f0fULL; + const uint64_t h01 = 0x0101010101010101ULL; + + int64_t* data = reinterpret_cast(m_data + start * w / 8); + size_t chunks = (end - start) * w / 8 / sizeof(int64_t); + + for (size_t t = 0; t < chunks; t++) { + if (w == 1) { +#if 0 +#if defined(USE_SSE42) && defined(_MSC_VER) && defined(REALM_PTR_64) + s += __popcnt64(data[t]); +#elif !defined(_MSC_VER) && defined(USE_SSE42) && defined(REALM_PTR_64) + s += __builtin_popcountll(data[t]); +#else + uint64_t a = data[t]; + const uint64_t m1 = 0x5555555555555555ULL; + a -= (a >> 1) & m1; + a = (a & m2) + ((a >> 2) & m2); + a = (a + (a >> 4)) & m4; + a = (a * h01) >> 56; + s += a; +#endif +#endif + s += fast_popcount64(data[t]); + } + else if (w == 2) { + uint64_t a = data[t]; + a = (a & m2) + ((a >> 2) & m2); + a = (a + (a >> 4)) & m4; + a = (a * h01) >> 56; + + s += a; + } + else if (w == 4) { + uint64_t a = data[t]; + a = (a & m4) + ((a >> 4) & m4); + a = (a * h01) >> 56; + s += a; + } + } + start += sizeof(int64_t) * 8 / no0(w) * chunks; + } + +#ifdef REALM_COMPILER_SSE + if (sseavx<42>()) { + // 2000 items summed 500000 times, 8/16/32 bits, miliseconds: + // Naive, templated get<>: 391 371 374 + // SSE: 97 148 282 + + if ((w == 8 || w == 16 || w == 32) && end - start > sizeof(__m128i) * 8 / no0(w)) { + __m128i* data = reinterpret_cast<__m128i*>(m_data + start * w / 8); + __m128i sum_result = {0}; + __m128i sum2; + + size_t chunks = (end - start) * w / 8 / sizeof(__m128i); + + for (size_t t = 0; t < chunks; t++) { + if (w == 8) { + /* + // 469 ms AND disadvantage of handling max 64k elements before overflow + __m128i vl = _mm_cvtepi8_epi16(data[t]); + __m128i vh = data[t]; + vh.m128i_i64[0] = vh.m128i_i64[1]; + vh = _mm_cvtepi8_epi16(vh); + sum_result = _mm_add_epi16(sum_result, vl); + sum_result = _mm_add_epi16(sum_result, vh); + */ + + /* + // 424 ms + __m128i vl = _mm_unpacklo_epi8(data[t], _mm_set1_epi8(0)); + __m128i vh = _mm_unpackhi_epi8(data[t], _mm_set1_epi8(0)); + sum_result = _mm_add_epi32(sum_result, _mm_madd_epi16(vl, _mm_set1_epi16(1))); + sum_result = _mm_add_epi32(sum_result, _mm_madd_epi16(vh, _mm_set1_epi16(1))); + */ + + __m128i vl = _mm_cvtepi8_epi16(data[t]); // sign extend lower words 8->16 + __m128i vh = data[t]; + vh = _mm_srli_si128(vh, 8); // v >>= 64 + vh = _mm_cvtepi8_epi16(vh); // sign extend lower words 8->16 + __m128i sum1 = _mm_add_epi16(vl, vh); + __m128i sumH = _mm_cvtepi16_epi32(sum1); + __m128i sumL = _mm_srli_si128(sum1, 8); // v >>= 64 + sumL = _mm_cvtepi16_epi32(sumL); + sum_result = _mm_add_epi32(sum_result, sumL); + sum_result = _mm_add_epi32(sum_result, sumH); + } + else if (w == 16) { + // todo, can overflow for array size > 2^32 + __m128i vl = _mm_cvtepi16_epi32(data[t]); // sign extend lower words 16->32 + __m128i vh = data[t]; + vh = _mm_srli_si128(vh, 8); // v >>= 64 + vh = _mm_cvtepi16_epi32(vh); // sign extend lower words 16->32 + sum_result = _mm_add_epi32(sum_result, vl); + sum_result = _mm_add_epi32(sum_result, vh); + } + else if (w == 32) { + __m128i v = data[t]; + __m128i v0 = _mm_cvtepi32_epi64(v); // sign extend lower dwords 32->64 + v = _mm_srli_si128(v, 8); // v >>= 64 + __m128i v1 = _mm_cvtepi32_epi64(v); // sign extend lower dwords 32->64 + sum_result = _mm_add_epi64(sum_result, v0); + sum_result = _mm_add_epi64(sum_result, v1); + + /* + __m128i m = _mm_set1_epi32(0xc000); // test if overflow could happen (still need + underflow test). + __m128i mm = _mm_and_si128(data[t], m); + zz = _mm_or_si128(mm, zz); + sum_result = _mm_add_epi32(sum_result, data[t]); + */ + } + } + start += sizeof(__m128i) * 8 / no0(w) * chunks; + + // prevent taking address of 'state' to make the compiler keep it in SSE register in above loop + // (vc2010/gcc4.6) + sum2 = sum_result; + + // Avoid aliasing bug where sum2 might not yet be initialized when accessed by get_universal + char sum3[sizeof sum2]; + memcpy(&sum3, &sum2, sizeof sum2); + + // Sum elements of sum + for (size_t t = 0; t < sizeof(__m128i) * 8 / ((w == 8 || w == 16) ? 32 : 64); ++t) { + int64_t v = get_universal < (w == 8 || w == 16) ? 32 : 64 > (reinterpret_cast(&sum3), t); + s += v; + } + } + } +#endif + + // Sum remaining elements + for (; start < end; ++start) + s += get(*this, start); + + return s; +} + +size_t Array::count(int64_t value) const noexcept +{ + // This is not used anywhere in the code, I believe we can delete this + // since the query logic does not use this + const uint64_t* next = reinterpret_cast(m_data); + size_t value_count = 0; + const size_t end = m_size; + size_t i = 0; + + // static values needed for fast population count + const uint64_t m1 = 0x5555555555555555ULL; + const uint64_t m2 = 0x3333333333333333ULL; + const uint64_t m4 = 0x0f0f0f0f0f0f0f0fULL; + const uint64_t h01 = 0x0101010101010101ULL; + + if (m_width == 0) { + if (value == 0) + return m_size; + return 0; + } + if (m_width == 1) { + if (uint64_t(value) > 1) + return 0; + + const size_t chunkvals = 64; + for (; i + chunkvals <= end; i += chunkvals) { + uint64_t a = next[i / chunkvals]; + if (value == 0) + a = ~a; // reverse + + a -= (a >> 1) & m1; + a = (a & m2) + ((a >> 2) & m2); + a = (a + (a >> 4)) & m4; + a = (a * h01) >> 56; + + // Could use intrinsic instead: + // a = __builtin_popcountll(a); // gcc intrinsic + + value_count += to_size_t(a); + } + } + else if (m_width == 2) { + if (uint64_t(value) > 3) + return 0; + + const uint64_t v = ~0ULL / 0x3 * value; + + // Masks to avoid spillover between segments in cascades + const uint64_t c1 = ~0ULL / 0x3 * 0x1; + + const size_t chunkvals = 32; + for (; i + chunkvals <= end; i += chunkvals) { + uint64_t a = next[i / chunkvals]; + a ^= v; // zero matching bit segments + a |= (a >> 1) & c1; // cascade ones in non-zeroed segments + a &= m1; // isolate single bit in each segment + a ^= m1; // reverse isolated bits + // if (!a) continue; + + // Population count + a = (a & m2) + ((a >> 2) & m2); + a = (a + (a >> 4)) & m4; + a = (a * h01) >> 56; + + value_count += to_size_t(a); + } + } + else if (m_width == 4) { + if (uint64_t(value) > 15) + return 0; + + const uint64_t v = ~0ULL / 0xF * value; + const uint64_t m = ~0ULL / 0xF * 0x1; + + // Masks to avoid spillover between segments in cascades + const uint64_t c1 = ~0ULL / 0xF * 0x7; + const uint64_t c2 = ~0ULL / 0xF * 0x3; + + const size_t chunkvals = 16; + for (; i + chunkvals <= end; i += chunkvals) { + uint64_t a = next[i / chunkvals]; + a ^= v; // zero matching bit segments + a |= (a >> 1) & c1; // cascade ones in non-zeroed segments + a |= (a >> 2) & c2; + a &= m; // isolate single bit in each segment + a ^= m; // reverse isolated bits + + // Population count + a = (a + (a >> 4)) & m4; + a = (a * h01) >> 56; + + value_count += to_size_t(a); + } + } + else if (m_width == 8) { + if (value > 0x7FLL || value < -0x80LL) + return 0; // by casting? + + const uint64_t v = ~0ULL / 0xFF * value; + const uint64_t m = ~0ULL / 0xFF * 0x1; + + // Masks to avoid spillover between segments in cascades + const uint64_t c1 = ~0ULL / 0xFF * 0x7F; + const uint64_t c2 = ~0ULL / 0xFF * 0x3F; + const uint64_t c3 = ~0ULL / 0xFF * 0x0F; + + const size_t chunkvals = 8; + for (; i + chunkvals <= end; i += chunkvals) { + uint64_t a = next[i / chunkvals]; + a ^= v; // zero matching bit segments + a |= (a >> 1) & c1; // cascade ones in non-zeroed segments + a |= (a >> 2) & c2; + a |= (a >> 4) & c3; + a &= m; // isolate single bit in each segment + a ^= m; // reverse isolated bits + + // Population count + a = (a * h01) >> 56; + + value_count += to_size_t(a); + } + } + else if (m_width == 16) { + if (value > 0x7FFFLL || value < -0x8000LL) + return 0; // by casting? + + const uint64_t v = ~0ULL / 0xFFFF * value; + const uint64_t m = ~0ULL / 0xFFFF * 0x1; + + // Masks to avoid spillover between segments in cascades + const uint64_t c1 = ~0ULL / 0xFFFF * 0x7FFF; + const uint64_t c2 = ~0ULL / 0xFFFF * 0x3FFF; + const uint64_t c3 = ~0ULL / 0xFFFF * 0x0FFF; + const uint64_t c4 = ~0ULL / 0xFFFF * 0x00FF; + + const size_t chunkvals = 4; + for (; i + chunkvals <= end; i += chunkvals) { + uint64_t a = next[i / chunkvals]; + a ^= v; // zero matching bit segments + a |= (a >> 1) & c1; // cascade ones in non-zeroed segments + a |= (a >> 2) & c2; + a |= (a >> 4) & c3; + a |= (a >> 8) & c4; + a &= m; // isolate single bit in each segment + a ^= m; // reverse isolated bits + + // Population count + a = (a * h01) >> 56; + + value_count += to_size_t(a); + } + } + else if (m_width == 32) { + int32_t v = int32_t(value); + const int32_t* d = reinterpret_cast(m_data); + for (; i < end; ++i) { + if (d[i] == v) + ++value_count; + } + return value_count; + } + else if (m_width == 64) { + const int64_t* d = reinterpret_cast(m_data); + for (; i < end; ++i) { + if (d[i] == value) + ++value_count; + } + return value_count; + } + + // Check remaining elements + for (; i < end; ++i) + if (value == get(i)) + ++value_count; + + return value_count; +} diff --git a/src/realm/array_blobs_small.cpp b/src/realm/array_blobs_small.cpp index bca4d012a1f..4e93f40c5f4 100644 --- a/src/realm/array_blobs_small.cpp +++ b/src/realm/array_blobs_small.cpp @@ -91,7 +91,8 @@ void ArraySmallBlobs::erase(size_t ndx) REALM_ASSERT_3(ndx, <, m_offsets.size()); size_t start = ndx ? to_size_t(m_offsets.get(ndx - 1)) : 0; - size_t end = to_size_t(m_offsets.get(ndx)); + auto offset = m_offsets.get(ndx); + size_t end = to_size_t(offset); m_blob.erase(start, end); m_offsets.erase(ndx); diff --git a/src/realm/array_blobs_small.hpp b/src/realm/array_blobs_small.hpp index 8db3467a209..e1a08e43e4f 100644 --- a/src/realm/array_blobs_small.hpp +++ b/src/realm/array_blobs_small.hpp @@ -176,7 +176,8 @@ inline BinaryData ArraySmallBlobs::get(size_t ndx) const noexcept } else { size_t begin = ndx ? to_size_t(m_offsets.get(ndx - 1)) : 0; - size_t end = to_size_t(m_offsets.get(ndx)); + auto offset = m_offsets.get(ndx); + size_t end = to_size_t(offset); BinaryData bd = BinaryData(m_blob.get(begin), end - begin); // Old database file (non-nullable column should never return null) diff --git a/src/realm/array_direct.hpp b/src/realm/array_direct.hpp index 5380876700f..4b92141bf55 100644 --- a/src/realm/array_direct.hpp +++ b/src/realm/array_direct.hpp @@ -26,48 +26,48 @@ // clang-format off /* wid == 16/32 likely when accessing offsets in B tree */ #define REALM_TEMPEX(fun, wid, arg) \ - if (wid == 16) {fun<16> arg;} \ - else if (wid == 32) {fun<32> arg;} \ - else if (wid == 0) {fun<0> arg;} \ - else if (wid == 1) {fun<1> arg;} \ - else if (wid == 2) {fun<2> arg;} \ - else if (wid == 4) {fun<4> arg;} \ - else if (wid == 8) {fun<8> arg;} \ - else if (wid == 64) {fun<64> arg;} \ - else {REALM_ASSERT_DEBUG(false); fun<0> arg;} +if (wid == 16) {fun<16> arg;} \ +else if (wid == 32) {fun<32> arg;} \ +else if (wid == 0) {fun<0> arg;} \ +else if (wid == 1) {fun<1> arg;} \ +else if (wid == 2) {fun<2> arg;} \ +else if (wid == 4) {fun<4> arg;} \ +else if (wid == 8) {fun<8> arg;} \ +else if (wid == 64) {fun<64> arg;} \ +else {REALM_ASSERT_DEBUG(false); fun<0> arg;} #define REALM_TEMPEX2(fun, targ, wid, arg) \ - if (wid == 16) {fun arg;} \ - else if (wid == 32) {fun arg;} \ - else if (wid == 0) {fun arg;} \ - else if (wid == 1) {fun arg;} \ - else if (wid == 2) {fun arg;} \ - else if (wid == 4) {fun arg;} \ - else if (wid == 8) {fun arg;} \ - else if (wid == 64) {fun arg;} \ - else {REALM_ASSERT_DEBUG(false); fun arg;} +if (wid == 16) {fun arg;} \ +else if (wid == 32) {fun arg;} \ +else if (wid == 0) {fun arg;} \ +else if (wid == 1) {fun arg;} \ +else if (wid == 2) {fun arg;} \ +else if (wid == 4) {fun arg;} \ +else if (wid == 8) {fun arg;} \ +else if (wid == 64) {fun arg;} \ +else {REALM_ASSERT_DEBUG(false); fun arg;} #define REALM_TEMPEX3(fun, targ1, wid, targ3, arg) \ - if (wid == 16) {fun arg;} \ - else if (wid == 32) {fun arg;} \ - else if (wid == 0) {fun arg;} \ - else if (wid == 1) {fun arg;} \ - else if (wid == 2) {fun arg;} \ - else if (wid == 4) {fun arg;} \ - else if (wid == 8) {fun arg;} \ - else if (wid == 64) {fun arg;} \ - else {REALM_ASSERT_DEBUG(false); fun arg;} +if (wid == 16) {fun arg;} \ +else if (wid == 32) {fun arg;} \ +else if (wid == 0) {fun arg;} \ +else if (wid == 1) {fun arg;} \ +else if (wid == 2) {fun arg;} \ +else if (wid == 4) {fun arg;} \ +else if (wid == 8) {fun arg;} \ +else if (wid == 64) {fun arg;} \ +else {REALM_ASSERT_DEBUG(false); fun arg;} #define REALM_TEMPEX4(fun, targ1, targ3, targ4, wid, arg) \ - if (wid == 16) {fun arg;} \ - else if (wid == 32) {fun arg;} \ - else if (wid == 0) {fun arg;} \ - else if (wid == 1) {fun arg;} \ - else if (wid == 2) {fun arg;} \ - else if (wid == 4) {fun arg;} \ - else if (wid == 8) {fun arg;} \ - else if (wid == 64) {fun arg;} \ - else {REALM_ASSERT_DEBUG(false); fun arg;} +if (wid == 16) {fun arg;} \ +else if (wid == 32) {fun arg;} \ +else if (wid == 0) {fun arg;} \ +else if (wid == 1) {fun arg;} \ +else if (wid == 2) {fun arg;} \ +else if (wid == 4) {fun arg;} \ +else if (wid == 8) {fun arg;} \ +else if (wid == 64) {fun arg;} \ +else {REALM_ASSERT_DEBUG(false); fun arg;} // clang-format on namespace realm { @@ -194,21 +194,22 @@ class UnalignedWordIter { } // 'num_bits' number of bits which must be read // WARNING returned word may be garbage above the first 'num_bits' bits. - uint64_t get(size_t num_bits) + uint64_t consume(size_t num_bits) { auto first_word = m_word_ptr[0]; uint64_t result = first_word >> m_in_word_offset; // note: above shifts in zeroes - if (m_in_word_offset + num_bits <= 64) - return result; - // if we're here, in_word_offset > 0 - auto first_word_size = 64 - m_in_word_offset; - auto second_word = m_word_ptr[1]; - result |= second_word << first_word_size; - // note: above shifts in zeroes below the bits we want + if (m_in_word_offset + num_bits > 64) { + // if we're here, in_word_offset > 0 + auto first_word_size = 64 - m_in_word_offset; + auto second_word = m_word_ptr[1]; + result |= second_word << first_word_size; + // note: above shifts in zeroes below the bits we want + } + _bump(num_bits); return result; } - uint64_t get_with_unsafe_prefetch(size_t num_bits) + uint64_t consume_with_unsafe_prefetch(size_t num_bits) { auto first_word = m_word_ptr[0]; uint64_t result = first_word >> m_in_word_offset; @@ -216,21 +217,24 @@ class UnalignedWordIter { auto first_word_size = 64 - m_in_word_offset; auto second_word = m_word_ptr[1]; REALM_ASSERT_DEBUG(num_bits <= 64); - result |= (m_in_word_offset + num_bits > 64) ? (second_word << first_word_size) : 0; + if (num_bits > first_word_size) + result |= second_word << first_word_size; // note: above shifts in zeroes below the bits we want + _bump(num_bits); return result; } + +private: + const uint64_t* m_word_ptr; + unsigned m_in_word_offset; + // bump the iterator the specified number of bits - void bump(size_t num_bits) + void _bump(size_t num_bits) { auto total_offset = m_in_word_offset + num_bits; m_word_ptr += total_offset >> 6; m_in_word_offset = total_offset & 0x3F; } - -private: - const uint64_t* m_word_ptr; - unsigned m_in_word_offset; }; // Read a bit field of up to 64 bits. @@ -241,16 +245,19 @@ class UnalignedWordIter { // iterator useful for scanning arrays faster than by indexing each element // supports arrays of pairs by differentiating field size and step size. class BfIterator { + friend class FlexCompressor; + friend class PackedCompressor; + public: BfIterator() = default; BfIterator(const BfIterator&) = default; BfIterator(BfIterator&&) = default; BfIterator& operator=(const BfIterator&) = default; BfIterator& operator=(BfIterator&&) = default; - BfIterator(uint64_t* data_area, size_t initial_offset, size_t field_size, size_t step_size, size_t index) + BfIterator(uint64_t* data_area, size_t initial_offset, uint8_t field_size, uint8_t step_size, size_t index) : data_area(data_area) - , field_size(static_cast(field_size)) - , step_size(static_cast(step_size)) + , field_size(field_size) + , step_size(step_size) , offset(initial_offset) { if (field_size < 64) @@ -376,13 +383,13 @@ inline bool operator<(const BfIterator& a, const BfIterator& b) return a.field_position < b.field_position; } -inline uint64_t read_bitfield(uint64_t* data_area, size_t field_position, size_t width) +inline uint64_t read_bitfield(uint64_t* data_area, size_t field_position, uint8_t width) { BfIterator it(data_area, field_position, width, width, 0); return *it; } -inline void write_bitfield(uint64_t* data_area, size_t field_position, size_t width, uint64_t value) +inline void write_bitfield(uint64_t* data_area, size_t field_position, uint8_t width, uint64_t value) { BfIterator it(data_area, field_position, width, width, 0); it.set_value(value); @@ -414,26 +421,26 @@ inline std::pair get_two(const char* data, size_t width, size_ /* Subword parallel search - The following provides facilities for subword parallel search for bitfields of any size. - To simplify, the first bitfield must be aligned within the word: it must occupy the lowest - bits of the word. + The following provides facilities for subword parallel search for bitfields of any size. + To simplify, the first bitfield must be aligned within the word: it must occupy the lowest + bits of the word. - In general the metods here return a vector with the most significant bit in each field - marking that a condition was met when comparing the corresponding pair of fields in two - vectors. Checking if any field meets a condition is as simple as comparing the return - vector against 0. Finding the first to meet a condition is also supported. + In general the metods here return a vector with the most significant bit in each field + marking that a condition was met when comparing the corresponding pair of fields in two + vectors. Checking if any field meets a condition is as simple as comparing the return + vector against 0. Finding the first to meet a condition is also supported. - Vectors are "split" into fields according to a MSB vector, wich indicates the most - significant bit of each field. The MSB must be passed in as an argument to most - bit field comparison functions. It can be generated by the field_sign_bit template. + Vectors are "split" into fields according to a MSB vector, wich indicates the most + significant bit of each field. The MSB must be passed in as an argument to most + bit field comparison functions. It can be generated by the field_sign_bit template. - The simplest condition to test is any_field_NE(A,B), where A and B are words. - This condition should be true if any bitfield in A is not equal to the corresponding - field in B. + The simplest condition to test is any_field_NE(A,B), where A and B are words. + This condition should be true if any bitfield in A is not equal to the corresponding + field in B. - This is almost as simple as a direct word compare, but needs to take into account that - we may want to have part of the words undefined. -*/ + This is almost as simple as a direct word compare, but needs to take into account that + we may want to have part of the words undefined. + */ constexpr uint8_t num_fields_table[65] = {0, 64, 32, 21, 16, 12, 10, 9, // 0-7 8, 7, 6, 5, 5, 4, 4, 4, // 8-15 4, 3, 3, 3, 3, 3, 2, 2, // 16-23 @@ -521,127 +528,6 @@ constexpr uint64_t field_sign_bit(int width) return populate(width, 1ULL << (width - 1)); } -/* Unsigned LT. - - This can be determined by trial subtaction. However, some care must be exercised - since simply subtracting one vector from another will allow carries from one - bitfield to flow into the next one. To avoid this, we isolate bitfields by clamping - the MSBs to 1 in A and 0 in B before subtraction. After the subtraction the MSBs in - the result indicate borrows from the MSB. We then compute overflow (borrow OUT of MSB) - using boolean logic as described below. - - Unsigned LT is also used to find all zero fields or all non-zero fields, so it is - the backbone of all comparisons returning vectors. -*/ - -// compute the overflows in unsigned trial subtraction A-B. The overflows -// will be marked by 1 in the sign bit of each field in the result. Other -// bits in the result are zero. -// Overflow are detected for each field pair where A is less than B. -inline uint64_t unsigned_LT_vector(uint64_t MSBs, uint64_t A, uint64_t B) -{ - // 1. compute borrow from most significant bit - // Isolate bitfields inside A and B before subtraction (prevent carries from spilling over) - // do this by clamping most significant bit in A to 1, and msb in B to 0 - auto A_isolated = A | MSBs; // 1 op - auto B_isolated = B & ~MSBs; // 2 ops - auto borrows_into_sign_bit = ~(A_isolated - B_isolated); // 2 ops (total latency 4) - - // 2. determine what subtraction against most significant bit would give: - // A B borrow-in: (A-B-borrow-in) - // 0 0 0 (0-0-0) = 0 - // 0 0 1 (0-0-1) = 1 + borrow-out - // 0 1 0 (0-1-0) = 1 + borrow-out - // 0 1 1 (0-1-1) = 0 + borrow-out - // 1 0 0 (1-0-0) = 1 - // 1 0 1 (1-0-1) = 0 - // 1 1 0 (1-1-0) = 0 - // 1 1 1 (1-1-1) = 1 + borrow-out - // borrow-out = (~A & B) | (~A & borrow-in) | (A & B & borrow-in) - // The overflows are simply the borrow-out, now encoded into the sign bits of each field. - auto overflows = (~A & B) | (~A & borrows_into_sign_bit) | (A & B & borrows_into_sign_bit); - // ^ 6 ops, total latency 6 (4+2) - return overflows & MSBs; // 1 op, total latency 7 - // total of 12 ops and a latency of 7. On a beefy CPU 3-4 of those can run in parallel - // and still reach a combined latency of 10 or less. -} - -inline uint64_t find_all_fields_unsigned_LT(uint64_t MSBs, uint64_t A, uint64_t B) -{ - return unsigned_LT_vector(MSBs, A, B); -} - -inline uint64_t find_all_fields_NE(uint64_t MSBs, uint64_t A, uint64_t B) -{ - // 0 != A^B, same as asking 0 - (A^B) overflows. - return unsigned_LT_vector(MSBs, 0, A ^ B); -} - -inline uint64_t find_all_fields_EQ(uint64_t MSBs, uint64_t A, uint64_t B) -{ - // get the fields which are EQ and negate the result - auto all_fields_NE = find_all_fields_NE(MSBs, A, B); - auto all_fields_NE_negated = ~all_fields_NE; - // must filter the negated vector so only MSB are left. - return MSBs & all_fields_NE_negated; -} - -inline uint64_t find_all_fields_unsigned_LE(uint64_t MSBs, uint64_t A, uint64_t B) -{ - // Now A <= B is the same as !(A > B) so... - // reverse A and B to turn (A>B) --> (B B is the same as B < A - return find_all_fields_signed_LT(MSBs, B, A); -} - -inline uint64_t find_all_fields_signed_GE(uint64_t MSBs, uint64_t A, uint64_t B) -{ - // A >= B is the same as B <= A - return find_all_fields_signed_LE(MSBs, B, A); -} - constexpr uint32_t inverse_width[65] = { 65536 * 64 / 1, // never used 65536 * 64 / 1, 65536 * 64 / 2, 65536 * 64 / 3, 65536 * 64 / 4, 65536 * 64 / 5, 65536 * 64 / 6, @@ -706,12 +592,10 @@ size_t parallel_subword_find(VectorCompare vector_compare, const uint64_t* data, uint64_t found_vector = 0; while (total_bit_count_left >= fast_scan_limit) { // unrolling 2x - const auto word0 = it.get_with_unsafe_prefetch(bit_count_pr_iteration); - it.bump(bit_count_pr_iteration); - const auto word1 = it.get_with_unsafe_prefetch(bit_count_pr_iteration); + const auto word0 = it.consume_with_unsafe_prefetch(bit_count_pr_iteration); + const auto word1 = it.consume_with_unsafe_prefetch(bit_count_pr_iteration); auto found_vector0 = vector_compare(MSBs, word0, search_vector); auto found_vector1 = vector_compare(MSBs, word1, search_vector); - it.bump(bit_count_pr_iteration); if (found_vector0) { const auto sub_word_index = first_field_marked(width, found_vector0); return start + sub_word_index; @@ -723,8 +607,10 @@ size_t parallel_subword_find(VectorCompare vector_compare, const uint64_t* data, total_bit_count_left -= 2 * bit_count_pr_iteration; start += 2 * field_count; } + + // One word at a time while (total_bit_count_left >= bit_count_pr_iteration) { - const auto word = it.get(bit_count_pr_iteration); + const auto word = it.consume(bit_count_pr_iteration); found_vector = vector_compare(MSBs, word, search_vector); if (found_vector) { const auto sub_word_index = first_field_marked(width, found_vector); @@ -732,10 +618,12 @@ size_t parallel_subword_find(VectorCompare vector_compare, const uint64_t* data, } total_bit_count_left -= bit_count_pr_iteration; start += field_count; - it.bump(bit_count_pr_iteration); } - if (total_bit_count_left) { // final subword, may be partial - const auto word = it.get(total_bit_count_left); // <-- limit lookahead to avoid touching memory beyond array + + // final subword, may be partial + if (total_bit_count_left) { + // limit lookahead to avoid touching memory beyond array + const auto word = it.consume(total_bit_count_left); found_vector = vector_compare(MSBs, word, search_vector); auto last_word_mask = 0xFFFFFFFFFFFFFFFFULL >> (64 - total_bit_count_left); found_vector &= last_word_mask; diff --git a/src/realm/array_integer.cpp b/src/realm/array_integer.cpp index 8cf854c671f..f86871c3225 100644 --- a/src/realm/array_integer.cpp +++ b/src/realm/array_integer.cpp @@ -24,6 +24,12 @@ using namespace realm; +ArrayInteger::ArrayInteger(Allocator& allocator) noexcept + : Array(allocator) +{ + m_is_inner_bptree_node = false; +} + Mixed ArrayInteger::get_any(size_t ndx) const { return Mixed(get(ndx)); @@ -112,7 +118,6 @@ void ArrayIntNull::replace_nulls_with(int64_t new_null) } } - void ArrayIntNull::avoid_null_collision(int64_t value) { if (m_width == 64) { diff --git a/src/realm/array_integer.hpp b/src/realm/array_integer.hpp index 3b50d3757d1..b8739414091 100644 --- a/src/realm/array_integer.hpp +++ b/src/realm/array_integer.hpp @@ -29,16 +29,10 @@ namespace realm { class ArrayInteger : public Array, public ArrayPayload { public: using value_type = int64_t; - - using Array::add; using Array::find_first; - using Array::get; - using Array::insert; - using Array::move; - using Array::set; explicit ArrayInteger(Allocator&) noexcept; - ~ArrayInteger() noexcept override {} + ~ArrayInteger() noexcept override = default; static value_type default_value(bool) { @@ -171,12 +165,6 @@ class ArrayIntNull : public Array, public ArrayPayload { // Implementation: -inline ArrayInteger::ArrayInteger(Allocator& allocator) noexcept - : Array(allocator) -{ - m_is_inner_bptree_node = false; -} - inline ArrayIntNull::ArrayIntNull(Allocator& allocator) noexcept : Array(allocator) { diff --git a/src/realm/array_integer_tpl.hpp b/src/realm/array_integer_tpl.hpp index 9d96584ab3c..0914b1bae65 100644 --- a/src/realm/array_integer_tpl.hpp +++ b/src/realm/array_integer_tpl.hpp @@ -27,9 +27,10 @@ namespace realm { template bool ArrayInteger::find(value_type value, size_t start, size_t end, QueryStateBase* state) const { - return ArrayWithFind(*this).find(value, start, end, 0, state); + return Array::find(value, start, end, 0, state); } + inline bool ArrayIntNull::find_impl(int cond, value_type value, size_t start, size_t end, QueryStateBase* state) const { switch (cond) { @@ -74,9 +75,7 @@ bool ArrayIntNull::find_impl(value_type opt_value, size_t start, size_t end, Que value = *opt_value; } } - - // Fall back to plain Array find. - return ArrayWithFind(*this).find(value, start2, end2, baseindex2, state); + return Array::find(value, start2, end2, baseindex2, state); } else { cond c; diff --git a/src/realm/array_mixed.cpp b/src/realm/array_mixed.cpp index b0542da93b0..7d00991ad5b 100644 --- a/src/realm/array_mixed.cpp +++ b/src/realm/array_mixed.cpp @@ -274,6 +274,7 @@ size_t ArrayMixed::find_first(Mixed value, size_t begin, size_t end) const noexc DataType type = value.get_type(); if (end == realm::npos) end = size(); + for (size_t i = begin; i < end; i++) { if (Mixed::data_types_are_comparable(this->get_type(i), type) && get(i) == value) { return i; diff --git a/src/realm/array_unsigned.cpp b/src/realm/array_unsigned.cpp index e1aac8dbf80..938fe5aece8 100644 --- a/src/realm/array_unsigned.cpp +++ b/src/realm/array_unsigned.cpp @@ -71,6 +71,7 @@ inline uint64_t ArrayUnsigned::_get(size_t ndx, uint8_t width) const return reinterpret_cast(m_data)[ndx]; } return get_direct(m_data, width, ndx); + REALM_UNREACHABLE(); } void ArrayUnsigned::create(size_t initial_size, uint64_t ubound_value) @@ -168,7 +169,8 @@ size_t ArrayUnsigned::upper_bound(uint64_t value) const noexcept void ArrayUnsigned::insert(size_t ndx, uint64_t value) { REALM_ASSERT_DEBUG(m_width >= 8); - bool do_expand = value > m_ubound; + + bool do_expand = value > (uint64_t)m_ubound; const uint8_t old_width = m_width; const uint8_t new_width = do_expand ? bit_width(value) : m_width; const auto old_size = m_size; @@ -215,6 +217,7 @@ void ArrayUnsigned::insert(size_t ndx, uint64_t value) void ArrayUnsigned::erase(size_t ndx) { REALM_ASSERT_DEBUG(m_width >= 8); + copy_on_write(); // Throws size_t w = m_width >> 3; diff --git a/src/realm/array_unsigned.hpp b/src/realm/array_unsigned.hpp index f1926ec7fc0..3e13b35e8dd 100644 --- a/src/realm/array_unsigned.hpp +++ b/src/realm/array_unsigned.hpp @@ -19,7 +19,7 @@ #ifndef REALM_ARRAY_UNSIGNED_HPP #define REALM_ARRAY_UNSIGNED_HPP -#include +#include namespace realm { @@ -81,13 +81,13 @@ class ArrayUnsigned : public Node { } private: - uint_least8_t m_width = 0; // Size of an element (meaning depend on type of array). - uint64_t m_ubound; // max number that can be stored with current m_width + uint_least8_t m_width = 0; + uint64_t m_ubound = 0; // max is 0xFFFFFFFFFFFFFFFFLL void init_from_mem(MemRef mem) noexcept { - Node::init_from_mem(mem); - set_width(get_width_from_header(get_header())); + auto header = Node::init_from_mem(mem); + set_width(get_width_from_header(header)); } void adjust(size_t ndx, int64_t diff) diff --git a/src/realm/array_with_find.cpp b/src/realm/array_with_find.cpp index e33513ef28e..2cf528a5c47 100644 --- a/src/realm/array_with_find.cpp +++ b/src/realm/array_with_find.cpp @@ -34,32 +34,6 @@ void ArrayWithFind::find_all(IntegerColumn* result, int64_t value, size_t col_of return; } - -bool ArrayWithFind::find(int cond, int64_t value, size_t start, size_t end, size_t baseindex, - QueryStateBase* state) const -{ - if (cond == cond_Equal) { - return find(value, start, end, baseindex, state); - } - if (cond == cond_NotEqual) { - return find(value, start, end, baseindex, state); - } - if (cond == cond_Greater) { - return find(value, start, end, baseindex, state); - } - if (cond == cond_Less) { - return find(value, start, end, baseindex, state); - } - if (cond == cond_None) { - return find(value, start, end, baseindex, state); - } - else if (cond == cond_LeftNotNull) { - return find(value, start, end, baseindex, state); - } - REALM_ASSERT_DEBUG(false); - return false; -} - size_t ArrayWithFind::first_set_bit(uint32_t v) const { // (v & -v) is UB when v is INT_MIN @@ -79,5 +53,15 @@ size_t ArrayWithFind::first_set_bit64(int64_t v) const return first_set_bit(v1) + 32; } +bool ArrayWithFind::find_all_will_match(size_t start2, size_t end, size_t baseindex, QueryStateBase* state) const +{ + REALM_ASSERT_DEBUG(state->match_count() < state->limit()); + size_t process = state->limit() - state->match_count(); + size_t end2 = end - start2 > process ? start2 + process : end; + for (; start2 < end2; start2++) + if (!state->match(start2 + baseindex)) + return false; + return true; +} } // namespace realm diff --git a/src/realm/array_with_find.hpp b/src/realm/array_with_find.hpp index 81d86d47e44..b35ed85e808 100644 --- a/src/realm/array_with_find.hpp +++ b/src/realm/array_with_find.hpp @@ -89,8 +89,6 @@ class ArrayWithFind { } // Main finding function - used for find_first, find_all, sum, max, min, etc. - bool find(int cond, int64_t value, size_t start, size_t end, size_t baseindex, QueryStateBase* state) const; - template bool find(int64_t value, size_t start, size_t end, size_t baseindex, QueryStateBase* state) const; @@ -161,7 +159,6 @@ class ArrayWithFind { private: const Array& m_array; - template bool find_all_will_match(size_t start, size_t end, size_t baseindex, QueryStateBase* state) const; }; //************************************************************************************* @@ -276,19 +273,6 @@ uint64_t ArrayWithFind::cascade(uint64_t a) const } } -template -REALM_NOINLINE bool ArrayWithFind::find_all_will_match(size_t start2, size_t end, size_t baseindex, - QueryStateBase* state) const -{ - REALM_ASSERT_DEBUG(state->match_count() < state->limit()); - size_t process = state->limit() - state->match_count(); - size_t end2 = end - start2 > process ? start2 + process : end; - for (; start2 < end2; start2++) - if (!state->match(start2 + baseindex)) - return false; - return true; -} - // This is the main finding function for Array. Other finding functions are just // wrappers around this one. Search for 'value' using condition cond (Equal, // NotEqual, Less, etc) and call QueryStateBase::match() for each match. Break and @@ -318,7 +302,7 @@ bool ArrayWithFind::find_optimized(int64_t value, size_t start, size_t end, size // optimization if all items are guaranteed to match (such as cond == NotEqual && value == 100 && m_ubound == 15) if (c.will_match(value, lbound, ubound)) { - return find_all_will_match(start2, end, baseindex, state); + return find_all_will_match(start2, end, baseindex, state); } // finder cannot handle this bitwidth @@ -567,14 +551,18 @@ inline bool ArrayWithFind::compare_equality(int64_t value, size_t start, size_t QueryStateBase* state) const { REALM_ASSERT_DEBUG(start <= m_array.m_size && (end <= m_array.m_size || end == size_t(-1)) && start <= end); + REALM_ASSERT_DEBUG(width == m_array.m_width); - size_t ee = round_up(start, 64 / no0(width)); + auto v = 64 / no0(width); + size_t ee = round_up(start, v); ee = ee > end ? end : ee; - for (; start < ee; ++start) - if (eq ? (m_array.get(start) == value) : (m_array.get(start) != value)) { + for (; start < ee; ++start) { + auto v = Array::get(m_array, start); + if (eq ? (v == value) : (v != value)) { if (!state->match(start + baseindex)) return false; } + } if (start >= end) return true; @@ -624,7 +612,7 @@ inline bool ArrayWithFind::compare_equality(int64_t value, size_t start, size_t } while (start < end) { - if (eq ? m_array.get(start) == value : m_array.get(start) != value) { + if (eq ? Array::get(m_array, start) == value : Array::get(m_array, start) != value) { if (!state->match(start + baseindex)) { return false; } @@ -903,8 +891,8 @@ bool ArrayWithFind::compare_relation(int64_t value, size_t start, size_t end, si size_t ee = round_up(start, 64 / no0(bitwidth)); ee = ee > end ? end : ee; for (; start < ee; start++) { - if (gt ? (m_array.get(start) > value) : (m_array.get(start) < value)) { - if (!state->match(start + baseindex, m_array.get(start))) + if (gt ? (Array::get(m_array, start) > value) : (Array::get(m_array, start) < value)) { + if (!state->match(start + baseindex, Array::get(m_array, start))) return false; } } @@ -969,7 +957,7 @@ bool ArrayWithFind::compare_relation(int64_t value, size_t start, size_t end, si // Test unaligned end and/or values of width > 16 manually while (start < end) { - if (gt ? m_array.get(start) > value : m_array.get(start) < value) { + if (gt ? Array::get(m_array, start) > value : Array::get(m_array, start) < value) { if (!state->match(start + baseindex)) return false; } diff --git a/src/realm/group.cpp b/src/realm/group.cpp index ab3bef4c68a..eeecbaed4f5 100644 --- a/src/realm/group.cpp +++ b/src/realm/group.cpp @@ -1012,10 +1012,6 @@ ref_type Group::DefaultTableWriter::write_names(_impl::OutputStream& out) } ref_type Group::DefaultTableWriter::write_tables(_impl::OutputStream& out) { - // bool deep = true; // Deep - // bool only_if_modified = false; // Always - // bool compress = false; // true; - // return m_group->m_tables.write(out, deep, only_if_modified, compress); // Throws return m_group->typed_write_tables(out); } @@ -1141,7 +1137,6 @@ void Group::write(std::ostream& out, int file_format_version, TableWriter& table REALM_ASSERT(version_number == 0 || version_number == 1); } else { - // table_writer.typed_print(""); // Because we need to include the total logical file size in the // top-array, we have to start by writing everything except the // top-array, and then finally compute and write a correct version of @@ -1151,7 +1146,8 @@ void Group::write(std::ostream& out, int file_format_version, TableWriter& table // DB to compact the database by writing only the live data // into a separate file. ref_type names_ref = table_writer.write_names(out_2); // Throws - ref_type tables_ref = table_writer.write_tables(out_2); // Throws + ref_type tables_ref = table_writer.write_tables(out_2); + SlabAlloc new_alloc; new_alloc.attach_empty(); // Throws Array top(new_alloc); @@ -1214,8 +1210,8 @@ void Group::write(std::ostream& out, int file_format_version, TableWriter& table top.set(2, RefOrTagged::make_tagged(final_file_size)); // Throws // Write the top array - bool deep = false; // Shallow - bool only_if_modified = false; // Always + bool deep = false; // Shallow + bool only_if_modified = false; // Always bool compress = false; top.write(out_2, deep, only_if_modified, compress); // Throws REALM_ASSERT_3(size_t(out_2.get_ref_of_next_array()), ==, final_file_size); diff --git a/src/realm/group.hpp b/src/realm/group.hpp index 434c0258336..08ddd9acd44 100644 --- a/src/realm/group.hpp +++ b/src/realm/group.hpp @@ -1133,6 +1133,7 @@ class Group::TableWriter { { m_group->typed_print(prefix); } + virtual ~TableWriter() noexcept {} void set_group(const Group* g) diff --git a/src/realm/group_writer.cpp b/src/realm/group_writer.cpp index 2990e010d3a..4ce470fec62 100644 --- a/src/realm/group_writer.cpp +++ b/src/realm/group_writer.cpp @@ -41,15 +41,16 @@ class InMemoryWriter : public _impl::ArrayWriterBase { , m_alloc(owner.m_alloc) { } - ref_type write_array(const char* data, size_t size, uint32_t checksum) override + ref_type write_array(const char* data, size_t size, uint32_t checksum, uint32_t checksum_bytes) override { + REALM_ASSERT(checksum_bytes == 4 || checksum_bytes == 2); size_t pos = m_owner.get_free_space(size); // Write the block char* dest_addr = translate(pos); REALM_ASSERT_RELEASE(dest_addr && (reinterpret_cast(dest_addr) & 7) == 0); - memcpy(dest_addr, &checksum, 4); - memcpy(dest_addr + 4, data + 4, size - 4); + memcpy(dest_addr, &checksum, checksum_bytes); + memcpy(dest_addr + checksum_bytes, data + checksum_bytes, size - checksum_bytes); // return ref of the written array ref_type ref = to_ref(pos); return ref; @@ -1339,8 +1340,9 @@ bool inline is_aligned(char* addr) return (as_binary & 7) == 0; } -ref_type GroupWriter::write_array(const char* data, size_t size, uint32_t checksum) +ref_type GroupWriter::write_array(const char* data, size_t size, uint32_t checksum, uint32_t checksum_bytes) { + REALM_ASSERT(checksum_bytes == 4 || checksum_bytes == 2); // Get position of free space to write in (expanding file if needed) size_t pos = get_free_space(size); @@ -1349,8 +1351,8 @@ ref_type GroupWriter::write_array(const char* data, size_t size, uint32_t checks char* dest_addr = window->translate(pos); REALM_ASSERT_RELEASE(is_aligned(dest_addr)); window->encryption_read_barrier(dest_addr, size); - memcpy(dest_addr, &checksum, 4); - memcpy(dest_addr + 4, data + 4, size - 4); + memcpy(dest_addr, &checksum, checksum_bytes); + memcpy(dest_addr + checksum_bytes, data + checksum_bytes, size - checksum_bytes); window->encryption_write_barrier(dest_addr, size); // return ref of the written array ref_type ref = to_ref(pos); diff --git a/src/realm/group_writer.hpp b/src/realm/group_writer.hpp index 438879114c6..b6caed048f6 100644 --- a/src/realm/group_writer.hpp +++ b/src/realm/group_writer.hpp @@ -135,7 +135,7 @@ class GroupWriter : public _impl::ArrayWriterBase { size_t get_file_size() const noexcept; - ref_type write_array(const char*, size_t, uint32_t) override; + ref_type write_array(const char*, size_t, uint32_t, uint32_t) override; #ifdef REALM_DEBUG void dump(); diff --git a/src/realm/impl/array_writer.hpp b/src/realm/impl/array_writer.hpp index 55fd42574bc..4096805e0fa 100644 --- a/src/realm/impl/array_writer.hpp +++ b/src/realm/impl/array_writer.hpp @@ -39,7 +39,7 @@ class ArrayWriterBase { /// /// Returns the ref (position in the target stream) of the written copy of /// the specified array data. - virtual ref_type write_array(const char* data, size_t size, uint32_t checksum) = 0; + virtual ref_type write_array(const char* data, size_t size, uint32_t checksum, uint32_t checksum_bytes) = 0; }; } // namespace _impl diff --git a/src/realm/impl/output_stream.cpp b/src/realm/impl/output_stream.cpp index 04db91235b6..1b0d870aa2f 100644 --- a/src/realm/impl/output_stream.cpp +++ b/src/realm/impl/output_stream.cpp @@ -39,17 +39,18 @@ void OutputStream::write(const char* data, size_t size) } -ref_type OutputStream::write_array(const char* data, size_t size, uint32_t checksum) +ref_type OutputStream::write_array(const char* data, size_t size, uint32_t checksum, uint32_t checksum_bytes) { REALM_ASSERT(size % 8 == 0); + REALM_ASSERT(checksum_bytes == 4 || checksum_bytes == 2); const char* data_1 = data; size_t size_1 = size; const char* cksum_bytes = reinterpret_cast(&checksum); - m_out.write(cksum_bytes, 4); // Throws - data_1 += 4; - size_1 -= 4; + m_out.write(cksum_bytes, checksum_bytes); // Throws + data_1 += checksum_bytes; + size_1 -= checksum_bytes; do_write(data_1, size_1); // Throws diff --git a/src/realm/impl/output_stream.hpp b/src/realm/impl/output_stream.hpp index eb459900485..ba287f92c30 100644 --- a/src/realm/impl/output_stream.hpp +++ b/src/realm/impl/output_stream.hpp @@ -41,7 +41,7 @@ class OutputStream : public ArrayWriterBase { void write(const char* data, size_t size); - ref_type write_array(const char* data, size_t size, uint32_t checksum) override; + ref_type write_array(const char* data, size_t size, uint32_t checksum, uint32_t checksum_bytes) override; private: ref_type m_next_ref; diff --git a/src/realm/integer_compressor.cpp b/src/realm/integer_compressor.cpp new file mode 100644 index 00000000000..5246928e775 --- /dev/null +++ b/src/realm/integer_compressor.cpp @@ -0,0 +1,318 @@ +/************************************************************************* + * + * Copyright 2023 Realm Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + **************************************************************************/ + +#include +#include +#include +#include +#include +#include + +#include +#include + +using namespace realm; + +namespace { + +template +inline void init_compress_array(Array& arr, size_t byte_size, Arg&&... args) +{ + Allocator& allocator = arr.get_alloc(); + auto mem = allocator.alloc(byte_size); + auto h = mem.get_addr(); + T::init_header(h, std::forward(args)...); + NodeHeader::set_capacity_in_header(byte_size, h); + arr.init_from_mem(mem); +} + +} // namespace + +bool IntegerCompressor::always_compress(const Array& origin, Array& arr, NodeHeader::Encoding encoding) const +{ + using Encoding = NodeHeader::Encoding; + std::vector values; + std::vector indices; + compress_values(origin, values, indices); + if (!values.empty()) { + const uint8_t flags = NodeHeader::get_flags(origin.get_header()); + uint8_t v_width = std::max(Node::signed_to_num_bits(values.front()), Node::signed_to_num_bits(values.back())); + + if (encoding == Encoding::Packed) { + const auto packed_size = NodeHeader::calc_size(indices.size(), v_width, NodeHeader::Encoding::Packed); + init_compress_array(arr, packed_size, flags, v_width, origin.size()); + PackedCompressor::copy_data(origin, arr); + } + else if (encoding == Encoding::Flex) { + uint8_t ndx_width = NodeHeader::unsigned_to_num_bits(values.size()); + const auto flex_size = NodeHeader::calc_size(values.size(), indices.size(), v_width, ndx_width); + init_compress_array(arr, flex_size, flags, v_width, ndx_width, values.size(), + indices.size()); + FlexCompressor::copy_data(arr, values, indices); + } + else { + REALM_UNREACHABLE(); + } + return true; + } + return false; +} + +bool IntegerCompressor::compress(const Array& origin, Array& arr) const +{ + if (origin.m_width < 2 || origin.m_size == 0) + return false; + +#if REALM_COMPRESS + return always_compress(origin, arr, NodeHeader::Encoding::Flex); +#else + std::vector values; + std::vector indices; + compress_values(origin, values, indices); + REALM_ASSERT(!values.empty()); + const auto uncompressed_size = origin.get_byte_size(); + uint8_t ndx_width = NodeHeader::unsigned_to_num_bits(values.size()); + uint8_t v_width = std::max(Node::signed_to_num_bits(values.front()), Node::signed_to_num_bits(values.back())); + const auto packed_size = NodeHeader::calc_size(indices.size(), v_width, NodeHeader::Encoding::Packed); + const auto flex_size = NodeHeader::calc_size(values.size(), indices.size(), v_width, ndx_width); + // heuristic: only compress to packed if gain at least 11.1% + const auto adjusted_packed_size = packed_size + packed_size / 8; + // heuristic: only compress to flex if gain at least 20% + const auto adjusted_flex_size = flex_size + flex_size / 4; + if (adjusted_flex_size < adjusted_packed_size && adjusted_flex_size < uncompressed_size) { + const uint8_t flags = NodeHeader::get_flags(origin.get_header()); + init_compress_array(arr, flex_size, flags, v_width, ndx_width, values.size(), indices.size()); + FlexCompressor::copy_data(arr, values, indices); + return true; + } + else if (adjusted_packed_size < uncompressed_size) { + const uint8_t flags = NodeHeader::get_flags(origin.get_header()); + init_compress_array(arr, packed_size, flags, v_width, origin.size()); + PackedCompressor::copy_data(origin, arr); + return true; + } + return false; +#endif +} + +bool IntegerCompressor::decompress(Array& arr) const +{ + int64_t min_v = std::numeric_limits::max(); + int64_t max_v = std::numeric_limits::min(); + REALM_ASSERT_DEBUG(arr.is_attached()); + auto values_fetcher = [&]() { + const auto sz = arr.size(); + if (is_packed()) { + std::vector res; + res.reserve(sz); + for (size_t i = 0; i < sz; ++i) { + auto val = arr.get(i); + if (val > max_v) + max_v = val; + if (val < min_v) + min_v = val; + res.push_back(val); + } + return res; + } + min_v = FlexCompressor::min(*this); + max_v = FlexCompressor::max(*this); + return FlexCompressor::get_all(*this, 0, sz); + }; + const auto& values = values_fetcher(); + // do the reverse of compressing the array + REALM_ASSERT_DEBUG(!values.empty()); + using Encoding = NodeHeader::Encoding; + const auto flags = NodeHeader::get_flags(arr.get_header()); + const auto size = values.size(); + const auto width = std::max(Array::bit_width(min_v), Array::bit_width(max_v)); + REALM_ASSERT_DEBUG(width == 0 || width == 1 || width == 2 || width == 4 || width == 8 || width == 16 || + width == 32 || width == 64); + // 64 is some slab allocator magic number. + // The padding is needed in order to account for bit width expansion. + const auto byte_size = 64 + NodeHeader::calc_size(size, width, Encoding::WTypBits); + REALM_ASSERT_DEBUG(byte_size % 8 == 0); // nevertheless all the values my be aligned to 8 + + // Create new array with the correct width + const auto mem = arr.get_alloc().alloc(byte_size); + const auto header = mem.get_addr(); + init_header(header, Encoding::WTypBits, flags, width, size); + NodeHeader::set_capacity_in_header(byte_size, header); + + // Destroy old array before initializing + arr.destroy(); + arr.init_from_mem(mem); + + // this is copying the bits straight, without doing any COW, since the array is basically restored, we just need + // to copy the data straight back into it. This makes decompressing the array equivalent to copy on write for + // normal arrays, in fact for a compressed array, we skip COW and we just decompress, getting the same result. + auto setter = arr.m_vtable->setter; + for (size_t ndx = 0; ndx < size; ++ndx) + setter(arr, ndx, values[ndx]); + + // very important: since the ref of the current array has changed, the parent must be informed. + // Otherwise we will lose the link between parent array and child array. + arr.update_parent(); + REALM_ASSERT_DEBUG(width == arr.get_width()); + REALM_ASSERT_DEBUG(arr.size() == values.size()); + + return true; +} + +bool IntegerCompressor::init(const char* h) +{ + m_encoding = NodeHeader::get_encoding(h); + // avoid to check wtype here, it is another access to the header, that we can avoid. + // We just need to know if the encoding is packed or flex. + // This makes Array::init_from_mem faster. + if (REALM_LIKELY(!(is_packed() || is_flex()))) + return false; + + if (is_packed()) { + init_packed(h); + } + else { + init_flex(h); + } + return true; +} +int64_t IntegerCompressor::get_packed(const Array& arr, size_t ndx) +{ + return PackedCompressor::get(arr.m_integer_compressor, ndx); +} + +int64_t IntegerCompressor::get_flex(const Array& arr, size_t ndx) +{ + return FlexCompressor::get(arr.m_integer_compressor, ndx); +} + +std::vector IntegerCompressor::get_all_packed(const Array& arr, size_t begin, size_t end) +{ + return PackedCompressor::get_all(arr.m_integer_compressor, begin, end); +} + +std::vector IntegerCompressor::get_all_flex(const Array& arr, size_t begin, size_t end) +{ + return FlexCompressor::get_all(arr.m_integer_compressor, begin, end); +} + +void IntegerCompressor::get_chunk_packed(const Array& arr, size_t ndx, int64_t res[8]) +{ + PackedCompressor::get_chunk(arr.m_integer_compressor, ndx, res); +} + +void IntegerCompressor::get_chunk_flex(const Array& arr, size_t ndx, int64_t res[8]) +{ + FlexCompressor::get_chunk(arr.m_integer_compressor, ndx, res); +} + +void IntegerCompressor::set_packed(Array& arr, size_t ndx, int64_t val) +{ + PackedCompressor::set_direct(arr.m_integer_compressor, ndx, val); +} + +void IntegerCompressor::set_flex(Array& arr, size_t ndx, int64_t val) +{ + FlexCompressor::set_direct(arr.m_integer_compressor, ndx, val); +} + +template +bool IntegerCompressor::find_packed(const Array& arr, int64_t val, size_t begin, size_t end, size_t base_index, + QueryStateBase* st) +{ + return PackedCompressor::find_all(arr, val, begin, end, base_index, st); +} + +template +bool IntegerCompressor::find_flex(const Array& arr, int64_t val, size_t begin, size_t end, size_t base_index, + QueryStateBase* st) +{ + return FlexCompressor::find_all(arr, val, begin, end, base_index, st); +} + +void IntegerCompressor::set_vtable(Array& arr) +{ + static const Array::VTable vtable_packed = {get_packed, + get_chunk_packed, + get_all_packed, + set_packed, + { + find_packed, + find_packed, + find_packed, + find_packed, + }}; + static const Array::VTable vtable_flex = {get_flex, + get_chunk_flex, + get_all_flex, + set_flex, + { + find_flex, + find_flex, + find_flex, + find_flex, + }}; + if (is_packed()) { + arr.m_vtable = &vtable_packed; + } + else { + arr.m_vtable = &vtable_flex; + } +} + +int64_t IntegerCompressor::get(size_t ndx) const +{ + if (is_packed()) { + return PackedCompressor::get(*this, ndx); + } + else { + return FlexCompressor::get(*this, ndx); + } +} + +void IntegerCompressor::compress_values(const Array& arr, std::vector& values, + std::vector& indices) const +{ + // The main idea is to compress the values in flex format. If Packed is better it will be chosen by + // IntegerCompressor::compress. The algorithm is O(n lg n), it gives us nice properties, but we could use an + // efficient hash table and try to boost perf during insertion, although leaf arrays are relatively small in + // general (256 entries). The two compresion formats are packed and flex, and the data in the array is re-arranged + // in the following ways (if compressed): + // Packed: || node header || ..... values ..... || + // Flex: || node header || ..... values ..... || ..... indices ..... || + + const auto sz = arr.size(); + REALM_ASSERT_DEBUG(sz > 0); + values.reserve(sz); + indices.reserve(sz); + + for (size_t i = 0; i < sz; ++i) { + auto item = arr.get(i); + values.push_back(item); + } + + std::sort(values.begin(), values.end()); + auto last = std::unique(values.begin(), values.end()); + values.erase(last, values.end()); + + for (size_t i = 0; i < sz; ++i) { + auto pos = std::lower_bound(values.begin(), values.end(), arr.get(i)); + indices.push_back(unsigned(std::distance(values.begin(), pos))); + REALM_ASSERT_DEBUG(values[indices[i]] == arr.get(i)); + } +} diff --git a/src/realm/integer_compressor.hpp b/src/realm/integer_compressor.hpp new file mode 100644 index 00000000000..4e9023cfe18 --- /dev/null +++ b/src/realm/integer_compressor.hpp @@ -0,0 +1,202 @@ +/************************************************************************* + * + * Copyright 2023 Realm Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + **************************************************************************/ + +#ifndef REALM_INTEGER_COMPRESSOR_HPP +#define REALM_INTEGER_COMPRESSOR_HPP + +#include +#include +#include +#include +#include +#include + +namespace realm { + +class Array; +class QueryStateBase; +class IntegerCompressor { +public: + // commit => encode, COW/insert => decode + bool compress(const Array&, Array&) const; + bool decompress(Array&) const; + + bool init(const char*); + void set_vtable(Array&); + + // init from mem B + inline uint64_t* data() const; + inline size_t size() const; + inline NodeHeader::Encoding get_encoding() const; + inline uint8_t v_width() const; + inline uint8_t ndx_width() const; + inline size_t v_size() const; + inline size_t ndx_size() const; + + inline uint64_t v_mask() const; + inline uint64_t ndx_mask() const; + inline uint64_t msb() const; + inline uint64_t ndx_msb() const; + inline uint64_t bitmask_v() const; + inline uint64_t bitmask_ndx() const; + + int64_t get(size_t) const; + +private: + // getting and setting interface specifically for encoding formats + inline void init_packed(const char*); + inline void init_flex(const char*); + + static int64_t get_packed(const Array& arr, size_t ndx); + static int64_t get_flex(const Array& arr, size_t ndx); + + static std::vector get_all_packed(const Array& arr, size_t begin, size_t end); + static std::vector get_all_flex(const Array& arr, size_t begin, size_t end); + + static void get_chunk_packed(const Array& arr, size_t ndx, int64_t res[8]); + static void get_chunk_flex(const Array& arr, size_t ndx, int64_t res[8]); + static void set_packed(Array& arr, size_t ndx, int64_t val); + static void set_flex(Array& arr, size_t ndx, int64_t val); + // query interface + template + static bool find_packed(const Array& arr, int64_t val, size_t begin, size_t end, size_t base_index, + QueryStateBase* st); + template + static bool find_flex(const Array& arr, int64_t val, size_t begin, size_t end, size_t base_index, + QueryStateBase* st); + + // internal impl + void compress_values(const Array&, std::vector&, std::vector&) const; + inline bool is_packed() const; + inline bool is_flex() const; + + // for testing + bool always_compress(const Array&, Array&, Node::Encoding) const; + +private: + using Encoding = NodeHeader::Encoding; + Encoding m_encoding{NodeHeader::Encoding::WTypBits}; + uint64_t* m_data; + uint8_t m_v_width = 0, m_ndx_width = 0; + size_t m_v_size = 0, m_ndx_size = 0; +}; + +inline void IntegerCompressor::init_packed(const char* h) +{ + m_data = (uint64_t*)NodeHeader::get_data_from_header(h); + m_v_width = NodeHeader::get_element_size(h, Encoding::Packed); + m_v_size = NodeHeader::get_num_elements(h, Encoding::Packed); +} + +inline void IntegerCompressor::init_flex(const char* h) +{ + m_data = (uint64_t*)NodeHeader::get_data_from_header(h); + m_v_width = NodeHeader::get_elementA_size(h); + m_v_size = NodeHeader::get_arrayA_num_elements(h); + m_ndx_width = NodeHeader::get_elementB_size(h); + m_ndx_size = NodeHeader::get_arrayB_num_elements(h); +} + +inline uint64_t* IntegerCompressor::data() const +{ + return m_data; +} + +inline bool IntegerCompressor::is_packed() const +{ + return m_encoding == NodeHeader::Encoding::Packed; +} + +inline bool IntegerCompressor::is_flex() const +{ + return m_encoding == NodeHeader::Encoding::Flex; +} + +inline size_t IntegerCompressor::size() const +{ + REALM_ASSERT_DEBUG(is_packed() || is_flex()); + return m_encoding == NodeHeader::Encoding::Packed ? v_size() : ndx_size(); +} + +inline size_t IntegerCompressor::v_size() const +{ + REALM_ASSERT_DEBUG(is_packed() || is_flex()); + return m_v_size; +} + +inline size_t IntegerCompressor::ndx_size() const +{ + REALM_ASSERT_DEBUG(is_flex()); + return m_ndx_size; +} + +inline uint8_t IntegerCompressor::v_width() const +{ + REALM_ASSERT_DEBUG(is_packed() || is_flex()); + return m_v_width; +} + +inline uint8_t IntegerCompressor::ndx_width() const +{ + REALM_ASSERT_DEBUG(is_flex()); + return m_ndx_width; +} + +inline NodeHeader::Encoding IntegerCompressor::get_encoding() const +{ + return m_encoding; +} + +inline uint64_t IntegerCompressor::v_mask() const +{ + REALM_ASSERT_DEBUG(is_packed() || is_flex()); + return 1ULL << (m_v_width - 1); +} + +inline uint64_t IntegerCompressor::ndx_mask() const +{ + REALM_ASSERT_DEBUG(is_flex()); + return 1ULL << (m_ndx_width - 1); +} + +inline uint64_t IntegerCompressor::msb() const +{ + REALM_ASSERT_DEBUG(is_packed() || is_flex()); + return populate(m_v_width, v_mask()); +} + +inline uint64_t IntegerCompressor::ndx_msb() const +{ + REALM_ASSERT_DEBUG(is_flex()); + return populate(m_ndx_width, ndx_mask()); +} + +inline uint64_t IntegerCompressor::bitmask_v() const +{ + REALM_ASSERT_DEBUG(is_packed() || is_flex()); + return 0xFFFFFFFFFFFFFFFFULL >> (64 - m_v_width); +} + +inline uint64_t IntegerCompressor::bitmask_ndx() const +{ + REALM_ASSERT_DEBUG(is_flex()); + return 0xFFFFFFFFFFFFFFFFULL >> (64 - m_ndx_width); +} + +} // namespace realm +#endif // REALM_INTEGER_COMPRESSOR_HPP diff --git a/src/realm/integer_flex_compressor.cpp b/src/realm/integer_flex_compressor.cpp new file mode 100644 index 00000000000..ef5e3b2fe6f --- /dev/null +++ b/src/realm/integer_flex_compressor.cpp @@ -0,0 +1,79 @@ +/************************************************************************* + * + * Copyright 2023 Realm Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + **************************************************************************/ + +#include +#include +#include + +#include +#include + +#ifdef REALM_DEBUG +#include +#include +#endif + +using namespace realm; + +void FlexCompressor::init_header(char* h, uint8_t flags, uint8_t v_width, uint8_t ndx_width, size_t v_size, + size_t ndx_size) +{ + using Encoding = NodeHeader::Encoding; + ::init_header(h, Encoding::Flex, flags, v_width, ndx_width, v_size, ndx_size); +} + +void FlexCompressor::copy_data(const Array& arr, const std::vector& values, + const std::vector& indices) +{ + using Encoding = NodeHeader::Encoding; + REALM_ASSERT_DEBUG(arr.is_attached()); + const auto& compressor = arr.integer_compressor(); + REALM_ASSERT_DEBUG(compressor.get_encoding() == Encoding::Flex); + const auto v_width = compressor.v_width(); + const auto ndx_width = compressor.ndx_width(); + const auto v_size = values.size(); + const auto data = (uint64_t*)arr.m_data; + const auto offset = static_cast(v_size * v_width); + BfIterator it_value{data, 0, v_width, v_width, 0}; + BfIterator it_index{data, offset, ndx_width, ndx_width, 0}; + for (size_t i = 0; i < v_size; ++i) { + it_value.set_value(values[i]); + REALM_ASSERT_DEBUG(sign_extend_value(v_width, it_value.get_value()) == values[i]); + ++it_value; + } + for (size_t i = 0; i < indices.size(); ++i) { + REALM_ASSERT_DEBUG(values[indices[i]] == + sign_extend_value(v_width, read_bitfield(data, indices[i] * v_width, v_width))); + it_index.set_value(indices[i]); + REALM_ASSERT_DEBUG(indices[i] == it_index.get_value()); + REALM_ASSERT_DEBUG(values[indices[i]] == + sign_extend_value(v_width, read_bitfield(data, indices[i] * v_width, v_width))); + ++it_index; + } +} + +bool FlexCompressor::find_all_match(size_t start, size_t end, size_t baseindex, QueryStateBase* state) +{ + REALM_ASSERT_DEBUG(state->match_count() < state->limit()); + const auto process = state->limit() - state->match_count(); + const auto end2 = end - start > process ? start + process : end; + for (; start < end2; start++) + if (!state->match(start + baseindex)) + return false; + return true; +} diff --git a/src/realm/integer_flex_compressor.hpp b/src/realm/integer_flex_compressor.hpp new file mode 100644 index 00000000000..a7338978af8 --- /dev/null +++ b/src/realm/integer_flex_compressor.hpp @@ -0,0 +1,305 @@ +/************************************************************************* + * + * Copyright 2023 Realm Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + **************************************************************************/ + +#ifndef FLEX_COMPRESSOR_HPP +#define FLEX_COMPRESSOR_HPP + +#include + +#include +#include +#include + +namespace realm { + +// +// Compress array in Flex format +// Decompress array in WTypeBits formats +// +class FlexCompressor { +public: + // encoding/decoding + static void init_header(char*, uint8_t, uint8_t, uint8_t, size_t, size_t); + static void copy_data(const Array&, const std::vector&, const std::vector&); + // getters/setters + static int64_t get(const IntegerCompressor&, size_t); + static std::vector get_all(const IntegerCompressor&, size_t, size_t); + static void get_chunk(const IntegerCompressor&, size_t, int64_t[8]); + static void set_direct(const IntegerCompressor&, size_t, int64_t); + + template + static bool find_all(const Array&, int64_t, size_t, size_t, size_t, QueryStateBase*); + + static int64_t min(const IntegerCompressor&); + static int64_t max(const IntegerCompressor&); + +private: + static bool find_all_match(size_t, size_t, size_t, QueryStateBase*); + + template + static bool find_linear(const Array&, int64_t, size_t, size_t, size_t, QueryStateBase*); + + template + static bool find_parallel(const Array&, int64_t, size_t, size_t, size_t, QueryStateBase*); + + template + static bool do_find_all(const Array&, int64_t, size_t, size_t, size_t, QueryStateBase*); + + template + static bool run_parallel_subscan(size_t, size_t, size_t); +}; + +inline int64_t FlexCompressor::get(const IntegerCompressor& c, size_t ndx) +{ + const auto offset = c.v_width() * c.v_size(); + const auto ndx_w = c.ndx_width(); + const auto v_w = c.v_width(); + const auto data = c.data(); + BfIterator ndx_iterator{data, offset, ndx_w, ndx_w, ndx}; + BfIterator data_iterator{data, 0, v_w, v_w, static_cast(*ndx_iterator)}; + return sign_extend_field_by_mask(c.v_mask(), *data_iterator); +} + +inline std::vector FlexCompressor::get_all(const IntegerCompressor& c, size_t b, size_t e) +{ + const auto offset = c.v_width() * c.v_size(); + const auto ndx_w = c.ndx_width(); + const auto v_w = c.v_width(); + const auto data = c.data(); + const auto sign_mask = c.v_mask(); + const auto range = (e - b); + const auto starting_bit = offset + b * ndx_w; + const auto bit_per_it = num_bits_for_width(ndx_w); + const auto ndx_mask = 0xFFFFFFFFFFFFFFFFULL >> (64 - ndx_w); + const auto values_per_word = num_fields_for_width(ndx_w); + + // this is very important, x4 faster pre-allocating the array + std::vector res; + res.reserve(range); + + UnalignedWordIter unaligned_ndx_iterator(data, starting_bit); + BfIterator data_iterator{data, 0, v_w, v_w, 0}; + auto remaining_bits = ndx_w * range; + while (remaining_bits >= bit_per_it) { + auto word = unaligned_ndx_iterator.consume(bit_per_it); + for (int i = 0; i < values_per_word; ++i) { + const auto index = word & ndx_mask; + data_iterator.move(static_cast(index)); + const auto sv = sign_extend_field_by_mask(sign_mask, *data_iterator); + res.push_back(sv); + word >>= ndx_w; + } + remaining_bits -= bit_per_it; + } + if (remaining_bits) { + auto last_word = unaligned_ndx_iterator.consume(remaining_bits); + while (remaining_bits) { + const auto index = last_word & ndx_mask; + data_iterator.move(static_cast(index)); + const auto sv = sign_extend_field_by_mask(sign_mask, *data_iterator); + res.push_back(sv); + remaining_bits -= ndx_w; + last_word >>= ndx_w; + } + } + return res; +} + +inline int64_t FlexCompressor::min(const IntegerCompressor& c) +{ + const auto v_w = c.v_width(); + const auto data = c.data(); + const auto sign_mask = c.v_mask(); + BfIterator data_iterator{data, 0, v_w, v_w, 0}; + return sign_extend_field_by_mask(sign_mask, *data_iterator); +} + +inline int64_t FlexCompressor::max(const IntegerCompressor& c) +{ + const auto v_w = c.v_width(); + const auto data = c.data(); + const auto sign_mask = c.v_mask(); + BfIterator data_iterator{data, 0, v_w, v_w, c.v_size() - 1}; + return sign_extend_field_by_mask(sign_mask, *data_iterator); +} + +inline void FlexCompressor::get_chunk(const IntegerCompressor& c, size_t ndx, int64_t res[8]) +{ + auto sz = 8; + std::memset(res, 0, sizeof(int64_t) * sz); + auto supposed_end = ndx + sz; + size_t i = ndx; + size_t index = 0; + for (; i < supposed_end; ++i) { + res[index++] = get(c, i); + } + for (; index < 8; ++index) { + res[index++] = get(c, i++); + } +} + +inline void FlexCompressor::set_direct(const IntegerCompressor& c, size_t ndx, int64_t value) +{ + const auto offset = c.v_width() * c.v_size(); + const auto ndx_w = c.ndx_width(); + const auto v_w = c.v_width(); + const auto data = c.data(); + BfIterator ndx_iterator{data, offset, ndx_w, ndx_w, ndx}; + BfIterator data_iterator{data, 0, v_w, v_w, static_cast(*ndx_iterator)}; + data_iterator.set_value(value); +} + +template +inline bool FlexCompressor::find_all(const Array& arr, int64_t value, size_t start, size_t end, size_t baseindex, + QueryStateBase* state) +{ + REALM_ASSERT_DEBUG(start <= arr.m_size && (end <= arr.m_size || end == size_t(-1)) && start <= end); + Cond c; + + if (end == npos) + end = arr.m_size; + + if (!(arr.m_size > start && start < end)) + return true; + + const auto lbound = arr.m_lbound; + const auto ubound = arr.m_ubound; + + if (!c.can_match(value, lbound, ubound)) + return true; + + if (c.will_match(value, lbound, ubound)) { + return find_all_match(start, end, baseindex, state); + } + + REALM_ASSERT_DEBUG(arr.m_width != 0); + + if constexpr (std::is_same_v) { + return do_find_all(arr, value, start, end, baseindex, state); + } + else if constexpr (std::is_same_v) { + return do_find_all(arr, value, start, end, baseindex, state); + } + else if constexpr (std::is_same_v) { + return do_find_all(arr, value, start, end, baseindex, state); + } + else if constexpr (std::is_same_v) { + return do_find_all(arr, value, start, end, baseindex, state); + } + return true; +} + +template +inline bool FlexCompressor::do_find_all(const Array& arr, int64_t value, size_t start, size_t end, size_t baseindex, + QueryStateBase* state) +{ + const auto v_width = arr.m_width; + const auto v_range = arr.integer_compressor().v_size(); + const auto ndx_range = end - start; + if (!run_parallel_subscan(v_width, v_range, ndx_range)) + return find_linear(arr, value, start, end, baseindex, state); + return find_parallel(arr, value, start, end, baseindex, state); +} + +template +inline bool FlexCompressor::find_linear(const Array& arr, int64_t value, size_t start, size_t end, size_t baseindex, + QueryStateBase* state) +{ + const auto cmp = [](int64_t item, int64_t key) { + if constexpr (std::is_same_v) + return item == key; + if constexpr (std::is_same_v) + return item != key; + if constexpr (std::is_same_v) + return item < key; + if constexpr (std::is_same_v) + return item > key; + REALM_UNREACHABLE(); + }; + + const auto& c = arr.integer_compressor(); + const auto offset = c.v_width() * c.v_size(); + const auto ndx_w = c.ndx_width(); + const auto v_w = c.v_width(); + const auto data = c.data(); + const auto mask = c.v_mask(); + BfIterator ndx_iterator{data, offset, ndx_w, ndx_w, start}; + BfIterator data_iterator{data, 0, v_w, v_w, static_cast(*ndx_iterator)}; + while (start < end) { + const auto sv = sign_extend_field_by_mask(mask, *data_iterator); + if (cmp(sv, value) && !state->match(start + baseindex)) + return false; + ndx_iterator.move(++start); + data_iterator.move(static_cast(*ndx_iterator)); + } + return true; +} + +template +inline bool FlexCompressor::find_parallel(const Array& arr, int64_t value, size_t start, size_t end, size_t baseindex, + QueryStateBase* state) +{ + // + // algorithm idea: first try to find in the array of values (should be shorter in size but more bits) using + // VectorCond1. + // Then match the index found in the array of indices using VectorCond2 + // + + const auto& compressor = arr.integer_compressor(); + const auto v_width = compressor.v_width(); + const auto v_size = compressor.v_size(); + const auto ndx_width = compressor.ndx_width(); + const auto offset = v_size * v_width; + uint64_t* data = (uint64_t*)arr.m_data; + + auto MSBs = compressor.msb(); + auto search_vector = populate(v_width, value); + auto v_start = + parallel_subword_find(find_all_fields, data, 0, v_width, MSBs, search_vector, 0, v_size); + + if constexpr (!std::is_same_v) { + if (start == v_size) + return true; + } + + MSBs = compressor.ndx_msb(); + search_vector = populate(ndx_width, v_start); + while (start < end) { + start = parallel_subword_find(find_all_fields_unsigned, data, offset, ndx_width, MSBs, + search_vector, start, end); + + if (start < end && !state->match(start + baseindex)) + return false; + + ++start; + } + return true; +} + +template +inline bool FlexCompressor::run_parallel_subscan(size_t v_width, size_t v_range, size_t ndx_range) +{ + if constexpr (std::is_same_v || std::is_same_v) { + return v_width < 32 && v_range >= 20 && ndx_range >= 20; + } + // > and < need looks slower in parallel scan for large values + return v_width <= 16 && v_range >= 20 && ndx_range >= 20; +} + +} // namespace realm +#endif // FLEX_COMPRESSOR_HPP diff --git a/src/realm/integer_packed_compressor.cpp b/src/realm/integer_packed_compressor.cpp new file mode 100644 index 00000000000..2f7646b1b0c --- /dev/null +++ b/src/realm/integer_packed_compressor.cpp @@ -0,0 +1,68 @@ +/************************************************************************* + * + * Copyright 2023 Realm Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + **************************************************************************/ + +#include +#include +#include +#include +#include + +#include +#include + +#ifdef REALM_DEBUG +#include +#include +#endif + +using namespace realm; + +void PackedCompressor::init_header(char* h, uint8_t flags, uint8_t v_width, size_t v_size) +{ + using Encoding = NodeHeader::Encoding; + ::init_header((char*)h, Encoding::Packed, flags, static_cast(v_width), v_size); +} + +void PackedCompressor::copy_data(const Array& origin, Array& arr) +{ + // this can be boosted a little bit, with and size should be known at this stage. + using Encoding = NodeHeader::Encoding; + REALM_ASSERT_DEBUG(arr.is_attached()); + REALM_ASSERT_DEBUG(arr.integer_compressor().get_encoding() == Encoding::Packed); + // we don't need to access the header, init from mem must have been called + const auto v_width = arr.m_width; + const auto v_size = arr.m_size; + auto data = (uint64_t*)arr.m_data; + BfIterator it_value{data, 0, v_width, v_width, 0}; + for (size_t i = 0; i < v_size; ++i) { + it_value.set_value(origin.get(i)); + REALM_ASSERT_DEBUG(sign_extend_value(v_width, it_value.get_value()) == origin.get(i)); + ++it_value; + } +} + +bool PackedCompressor::find_all_match(size_t start, size_t end, size_t baseindex, QueryStateBase* state) +{ + REALM_ASSERT_DEBUG(state->match_count() < state->limit()); + const auto process = state->limit() - state->match_count(); + const auto end2 = end - start > process ? start + process : end; + for (; start < end2; start++) + if (!state->match(start + baseindex)) + return false; + return true; +} diff --git a/src/realm/integer_packed_compressor.hpp b/src/realm/integer_packed_compressor.hpp new file mode 100644 index 00000000000..91d94fc5eab --- /dev/null +++ b/src/realm/integer_packed_compressor.hpp @@ -0,0 +1,229 @@ +/************************************************************************* + * + * Copyright 2024 Realm Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + **************************************************************************/ + +#ifndef PACKED_COMPRESSOR_HPP +#define PACKED_COMPRESSOR_HPP + +#include +#include + +#include +#include + +namespace realm { + +// +// Compress array in Packed format +// Decompress array in WTypeBits formats +// +class PackedCompressor { +public: + // encoding/decoding + static void init_header(char*, uint8_t, uint8_t, size_t); + static void copy_data(const Array&, Array&); + // get or set + static int64_t get(const IntegerCompressor&, size_t); + static std::vector get_all(const IntegerCompressor& c, size_t b, size_t e); + static void get_chunk(const IntegerCompressor&, size_t, int64_t res[8]); + static void set_direct(const IntegerCompressor&, size_t, int64_t); + + template + static bool find_all(const Array&, int64_t, size_t, size_t, size_t, QueryStateBase*); + +private: + static bool find_all_match(size_t start, size_t end, size_t baseindex, QueryStateBase* state); + + template + static bool find_parallel(const Array&, int64_t, size_t, size_t, size_t, QueryStateBase*); + + template + static bool find_linear(const Array&, int64_t, size_t, size_t, size_t, QueryStateBase*); + + template + static bool run_parallel_scan(size_t, size_t); +}; + +inline int64_t PackedCompressor::get(const IntegerCompressor& c, size_t ndx) +{ + BfIterator it{c.data(), 0, c.v_width(), c.v_width(), ndx}; + return sign_extend_field_by_mask(c.v_mask(), *it); +} + +inline std::vector PackedCompressor::get_all(const IntegerCompressor& c, size_t b, size_t e) +{ + const auto range = (e - b); + const auto v_w = c.v_width(); + const auto data = c.data(); + const auto sign_mask = c.v_mask(); + const auto starting_bit = b * v_w; + const auto total_bits = starting_bit + (v_w * range); + const auto mask = 0xFFFFFFFFFFFFFFFFULL >> (64 - v_w); + const auto bit_per_it = num_bits_for_width(v_w); + const auto values_per_word = num_fields_for_width(v_w); + + std::vector res; + res.reserve(range); + + UnalignedWordIter unaligned_data_iterator(data, starting_bit); + auto cnt_bits = starting_bit; + while (cnt_bits + bit_per_it < total_bits) { + auto word = unaligned_data_iterator.consume(bit_per_it); + for (int i = 0; i < values_per_word; ++i) { + res.push_back(sign_extend_field_by_mask(sign_mask, word & mask)); + word >>= v_w; + } + cnt_bits += bit_per_it; + } + if (cnt_bits < total_bits) { + auto last_word = unaligned_data_iterator.consume(static_cast(total_bits - cnt_bits)); + while (cnt_bits < total_bits) { + res.push_back(sign_extend_field_by_mask(sign_mask, last_word & mask)); + cnt_bits += v_w; + last_word >>= v_w; + } + } + return res; +} + +inline void PackedCompressor::set_direct(const IntegerCompressor& c, size_t ndx, int64_t value) +{ + BfIterator it{c.data(), 0, c.v_width(), c.v_width(), ndx}; + it.set_value(value); +} + +inline void PackedCompressor::get_chunk(const IntegerCompressor& c, size_t ndx, int64_t res[8]) +{ + auto sz = 8; + std::memset(res, 0, sizeof(int64_t) * sz); + auto supposed_end = ndx + sz; + size_t i = ndx; + size_t index = 0; + // this can be done better, in one go, retrieve both!!! + for (; i < supposed_end; ++i) { + res[index++] = get(c, i); + } + for (; index < 8; ++index) { + res[index++] = get(c, i++); + } +} + + +template +inline bool PackedCompressor::find_all(const Array& arr, int64_t value, size_t start, size_t end, size_t baseindex, + QueryStateBase* state) +{ + REALM_ASSERT_DEBUG(start <= arr.m_size && (end <= arr.m_size || end == size_t(-1)) && start <= end); + Cond c; + + if (end == npos) + end = arr.m_size; + + if (!(arr.m_size > start && start < end)) + return true; + + const auto lbound = arr.m_lbound; + const auto ubound = arr.m_ubound; + + if (!c.can_match(value, lbound, ubound)) + return true; + + if (c.will_match(value, lbound, ubound)) { + return find_all_match(start, end, baseindex, state); + } + + REALM_ASSERT_DEBUG(arr.m_width != 0); + + if (!run_parallel_scan(arr.m_width, end - start)) + return find_linear(arr, value, start, end, baseindex, state); + + return find_parallel(arr, value, start, end, baseindex, state); +} + +template +inline bool PackedCompressor::find_parallel(const Array& arr, int64_t value, size_t start, size_t end, + size_t baseindex, QueryStateBase* state) +{ + // + // Main idea around find parallel (applicable to flex arrays too). + // Try to find the starting point where the condition can be met, comparing as many values as a single 64bit can + // contain in parallel. Once we have found the starting point, keep matching values as much as we can between + // start and end. + // + // EG: let's store 6, it gets stored in 4 bits (0110). 6 is 4 bits because 110 (6) + sign bit 0. + // Inside 64bits we can fit max 16 times 6. If we go from index 0 to 15 throughout the same 64 bits, we need to + // apply a mask and a shift bits every time, then compare the extracted values. + // This is not the cheapest thing to do. Instead we can compare all values contained within 64 bits in one go, and + // see if there is a match with what we are looking for. Reducing the number of comparison by ~logk(N) where K is + // the width of each single value within a 64 bit word and N is the total number of values stored in the array. + + const auto data = (const uint64_t*)arr.m_data; + const auto width = arr.m_width; + const auto MSBs = arr.integer_compressor().msb(); + const auto search_vector = populate(arr.m_width, value); + while (start < end) { + start = parallel_subword_find(find_all_fields, data, 0, width, MSBs, search_vector, start, end); + if (start < end && !state->match(start + baseindex)) + return false; + ++start; + } + return true; +} + +template +inline bool PackedCompressor::find_linear(const Array& arr, int64_t value, size_t start, size_t end, size_t baseindex, + QueryStateBase* state) +{ + auto compare = [](int64_t a, int64_t b) { + if constexpr (std::is_same_v) + return a == b; + if constexpr (std::is_same_v) + return a != b; + if constexpr (std::is_same_v) + return a > b; + if constexpr (std::is_same_v) + return a < b; + }; + const auto& c = arr.integer_compressor(); + BfIterator it{c.data(), 0, c.v_width(), c.v_width(), start}; + for (; start < end; ++start) { + it.move(start); + const auto sv = sign_extend_field_by_mask(c.v_mask(), *it); + if (compare(sv, value) && !state->match(start + baseindex)) + return false; + } + return true; +} + +template +inline bool PackedCompressor::run_parallel_scan(size_t width, size_t range) +{ + if constexpr (std::is_same_v) { + // we seem to be particularly slow doing parallel scan in packed for NotEqual. + // we are much better with a linear scan. TODO: investigate this. + return false; + } + if constexpr (std::is_same_v) { + return width < 32 && range >= 20; + } + // > and < need a different heuristic + return width <= 20 && range >= 20; +} + +} // namespace realm + +#endif // PACKED_COMPRESSOR_HPP diff --git a/src/realm/node.cpp b/src/realm/node.cpp index f23cff4316b..63ef4d3962c 100644 --- a/src/realm/node.cpp +++ b/src/realm/node.cpp @@ -26,7 +26,8 @@ using namespace realm; -MemRef Node::create_node(size_t size, Allocator& alloc, bool context_flag, Type type, WidthType width_type, int width) +MemRef Node::create_node(size_t size, Allocator& alloc, bool context_flag, Type type, WidthType width_type, + uint8_t width) { size_t byte_size_0 = calc_byte_size(width_type, size, width); size_t byte_size = std::max(byte_size_0, size_t(initial_capacity)); @@ -81,9 +82,9 @@ size_t Node::calc_item_count(size_t bytes, size_t width) const noexcept void Node::alloc(size_t init_size, size_t new_width) { - REALM_ASSERT(is_attached()); + REALM_ASSERT_DEBUG(is_attached()); char* header = get_header_from_data(m_data); - REALM_ASSERT(!wtype_is_extended(header)); + REALM_ASSERT_DEBUG(!wtype_is_extended(header)); size_t needed_bytes = calc_byte_len(init_size, new_width); // this method is not public and callers must (and currently do) ensure that // needed_bytes are never larger than max_array_payload. @@ -132,7 +133,7 @@ void Node::alloc(size_t init_size, size_t new_width) } // update width (important when we convert from normal uncompressed array into compressed format) if (new_width != orig_width) { - set_width_in_header(int(new_width), header); + set_width_in_header(new_width, header); } set_size_in_header(init_size, header); m_size = init_size; diff --git a/src/realm/node.hpp b/src/realm/node.hpp index 5cb637ab7d1..8a4b862a701 100644 --- a/src/realm/node.hpp +++ b/src/realm/node.hpp @@ -323,7 +323,7 @@ class Node : public NodeHeader { } static MemRef create_node(size_t size, Allocator& alloc, bool context_flag = false, Type type = type_Normal, - WidthType width_type = wtype_Ignore, int width = 1); + WidthType width_type = wtype_Ignore, uint8_t width = 1); void set_header_size(size_t value) noexcept { diff --git a/src/realm/node_header.hpp b/src/realm/node_header.hpp index 2ffe073b721..ca7d5638025 100644 --- a/src/realm/node_header.hpp +++ b/src/realm/node_header.hpp @@ -205,7 +205,7 @@ class NodeHeader { h[4] = h4; } - static size_t unsigned_to_num_bits(uint64_t value) + static uint8_t unsigned_to_num_bits(uint64_t value) { if constexpr (sizeof(size_t) == sizeof(uint64_t)) return 1 + log2(static_cast(value)); @@ -218,7 +218,7 @@ class NodeHeader { return 0; } - static inline size_t signed_to_num_bits(int64_t value) + static inline uint8_t signed_to_num_bits(int64_t value) { if (value >= 0) return 1 + unsigned_to_num_bits(value); @@ -292,7 +292,6 @@ class NodeHeader { (reinterpret_cast(header))[0] = static_cast(value >> 3); } } - static size_t get_byte_size_from_header(const char* header) noexcept; // ^ First 3 must overlap numerically with corresponding wtype_X enum. @@ -343,17 +342,18 @@ class NodeHeader { private: friend class Node; + friend class IntegerCompressor; // Setting element size for encodings with a single element size: - static void inline set_element_size(char* header, size_t bits_per_element, Encoding); + static void inline set_element_size(char* header, uint8_t bits_per_element, Encoding); // Getting element size for encodings with a single element size: - static inline size_t get_element_size(const char* header, Encoding); + static inline uint8_t get_element_size(const char* header, Encoding); // Used only by flex at this stage. // Setting element sizes for encodings with two element sizes (called A and B) - static inline void set_elementA_size(char* header, size_t bits_per_element); - static inline void set_elementB_size(char* header, size_t bits_per_element); + static inline void set_elementA_size(char* header, uint8_t bits_per_element); + static inline void set_elementB_size(char* header, uint8_t bits_per_element); // Getting element sizes for encodings with two element sizes (called A and B) - static inline size_t get_elementA_size(const char* header); - static inline size_t get_elementB_size(const char* header); + static inline uint8_t get_elementA_size(const char* header); + static inline uint8_t get_elementB_size(const char* header); // Setting num of elements for encodings with two element sizes (called A and B) static inline void set_arrayA_num_elements(char* header, size_t num_elements); static inline void set_arrayB_num_elements(char* header, size_t num_elements); @@ -366,9 +366,9 @@ class NodeHeader { static inline void set_num_elements(char* header, size_t num_elements, Encoding); static inline size_t calc_size(size_t num_elements); - static inline size_t calc_size(size_t num_elements, size_t element_size, Encoding); - static inline size_t calc_size(size_t arrayA_num_elements, size_t arrayB_num_elements, size_t elementA_size, - size_t elementB_size); + static inline size_t calc_size(size_t num_elements, uint8_t element_size, Encoding); + static inline size_t calc_size(size_t arrayA_num_elements, size_t arrayB_num_elements, uint8_t elementA_size, + uint8_t elementB_size); static size_t calc_byte_size(WidthType wtype, size_t size, uint_least8_t width) noexcept { @@ -441,7 +441,7 @@ class NodeHeader { } }; -inline void NodeHeader::set_element_size(char* header, size_t bits_per_element, Encoding encoding) +inline void NodeHeader::set_element_size(char* header, uint8_t bits_per_element, Encoding encoding) { switch (encoding) { case NodeHeader::Encoding::Packed: { @@ -469,7 +469,7 @@ inline void NodeHeader::set_element_size(char* header, size_t bits_per_element, } } -inline size_t NodeHeader::get_element_size(const char* header, Encoding encoding) +inline uint8_t NodeHeader::get_element_size(const char* header, Encoding encoding) { switch (encoding) { case NodeHeader::Encoding::Packed: { @@ -496,7 +496,7 @@ inline size_t NodeHeader::get_element_size(const char* header, Encoding encoding } } -inline void NodeHeader::set_elementA_size(char* header, size_t bits_per_element) +inline void NodeHeader::set_elementA_size(char* header, uint8_t bits_per_element) { // we're a bit low on bits for the Flex encoding, so we need to squeeze stuff REALM_ASSERT_DEBUG(get_encoding(header) == Encoding::Flex); @@ -509,7 +509,7 @@ inline void NodeHeader::set_elementA_size(char* header, size_t bits_per_element) (reinterpret_cast(header))[1] = word; } -inline void NodeHeader::set_elementB_size(char* header, size_t bits_per_element) +inline void NodeHeader::set_elementB_size(char* header, uint8_t bits_per_element) { // we're a bit low on bits for the Flex encoding, so we need to squeeze stuff REALM_ASSERT_DEBUG(get_encoding(header) == Encoding::Flex); @@ -522,7 +522,7 @@ inline void NodeHeader::set_elementB_size(char* header, size_t bits_per_element) (reinterpret_cast(header))[3] = word; } -inline size_t NodeHeader::get_elementA_size(const char* header) +inline uint8_t NodeHeader::get_elementA_size(const char* header) { const auto encoding = get_encoding(header); REALM_ASSERT_DEBUG(encoding == Encoding::Flex); @@ -536,7 +536,7 @@ inline size_t NodeHeader::get_elementA_size(const char* header) return bits_per_element; } -inline size_t NodeHeader::get_elementB_size(const char* header) +inline uint8_t NodeHeader::get_elementB_size(const char* header) { REALM_ASSERT_DEBUG(get_encoding(header) == Encoding::Flex); uint16_t word = (reinterpret_cast(header))[3]; @@ -643,7 +643,7 @@ inline size_t NodeHeader::calc_size(size_t num_elements) return calc_byte_size(wtype_Ignore, num_elements, 0); } -inline size_t NodeHeader::calc_size(size_t num_elements, size_t element_size, Encoding encoding) +inline size_t NodeHeader::calc_size(size_t num_elements, uint8_t element_size, Encoding encoding) { using Encoding = NodeHeader::Encoding; switch (encoding) { @@ -660,8 +660,8 @@ inline size_t NodeHeader::calc_size(size_t num_elements, size_t element_size, En } } -inline size_t NodeHeader::calc_size(size_t arrayA_num_elements, size_t arrayB_num_elements, size_t elementA_size, - size_t elementB_size) +inline size_t NodeHeader::calc_size(size_t arrayA_num_elements, size_t arrayB_num_elements, uint8_t elementA_size, + uint8_t elementB_size) { return NodeHeader::header_size + align_bits_to8(arrayA_num_elements * elementA_size + arrayB_num_elements * elementB_size); @@ -757,6 +757,7 @@ static inline void init_header(char* header, realm::NodeHeader::Encoding enc, ui REALM_ASSERT_DEBUG(num_elemsB < 1024); hw[1] = static_cast(((bits_pr_elemA - 1) << 10) | num_elemsA); hw[3] = static_cast(((bits_pr_elemB - 1) << 10) | num_elemsB); + REALM_ASSERT_DEBUG(realm::NodeHeader::get_encoding(header) == realm::NodeHeader::Encoding::Flex); } } // namespace diff --git a/src/realm/obj.cpp b/src/realm/obj.cpp index 8a1267029b9..eb8138dd8f5 100644 --- a/src/realm/obj.cpp +++ b/src/realm/obj.cpp @@ -549,12 +549,9 @@ int64_t Obj::_get(ColKey::Idx col_ndx) const if (current_version != m_storage_version) { update(); } - ref_type ref = to_ref(Array::get(m_mem.get_addr(), col_ndx.val + 1)); char* header = alloc.translate(ref); - int width = Array::get_width_from_header(header); - char* data = Array::get_data_from_header(header); - REALM_TEMPEX(return get_direct, width, (data, m_row_ndx)); + return Array::get(header, m_row_ndx); } template <> diff --git a/src/realm/query_conditions.hpp b/src/realm/query_conditions.hpp index cf3cf9e73d8..ea16fb4a736 100644 --- a/src/realm/query_conditions.hpp +++ b/src/realm/query_conditions.hpp @@ -1002,6 +1002,155 @@ struct GreaterEqual : public HackClass { static const int condition = -1; }; +/* Unsigned LT. + + This can be determined by trial subtaction. However, some care must be exercised + since simply subtracting one vector from another will allow carries from one + bitfield to flow into the next one. To avoid this, we isolate bitfields by clamping + the MSBs to 1 in A and 0 in B before subtraction. After the subtraction the MSBs in + the result indicate borrows from the MSB. We then compute overflow (borrow OUT of MSB) + using boolean logic as described below. + + Unsigned LT is also used to find all zero fields or all non-zero fields, so it is + the backbone of all comparisons returning vectors. + */ + +// compute the overflows in unsigned trial subtraction A-B. The overflows +// will be marked by 1 in the sign bit of each field in the result. Other +// bits in the result are zero. +// Overflow are detected for each field pair where A is less than B. +inline uint64_t unsigned_LT_vector(uint64_t MSBs, uint64_t A, uint64_t B) +{ + // 1. compute borrow from most significant bit + // Isolate bitfields inside A and B before subtraction (prevent carries from spilling over) + // do this by clamping most significant bit in A to 1, and msb in B to 0 + auto A_isolated = A | MSBs; // 1 op + auto B_isolated = B & ~MSBs; // 2 ops + auto borrows_into_sign_bit = ~(A_isolated - B_isolated); // 2 ops (total latency 4) + + // 2. determine what subtraction against most significant bit would give: + // A B borrow-in: (A-B-borrow-in) + // 0 0 0 (0-0-0) = 0 + // 0 0 1 (0-0-1) = 1 + borrow-out + // 0 1 0 (0-1-0) = 1 + borrow-out + // 0 1 1 (0-1-1) = 0 + borrow-out + // 1 0 0 (1-0-0) = 1 + // 1 0 1 (1-0-1) = 0 + // 1 1 0 (1-1-0) = 0 + // 1 1 1 (1-1-1) = 1 + borrow-out + // borrow-out = (~A & B) | (~A & borrow-in) | (A & B & borrow-in) + // The overflows are simply the borrow-out, now encoded into the sign bits of each field. + auto overflows = (~A & B) | (~A & borrows_into_sign_bit) | (A & B & borrows_into_sign_bit); + // ^ 6 ops, total latency 6 (4+2) + return overflows & MSBs; // 1 op, total latency 7 + // total of 12 ops and a latency of 7. On a beefy CPU 3-4 of those can run in parallel + // and still reach a combined latency of 10 or less. +} + +template +uint64_t find_all_fields_unsigned(uint64_t MSBs, uint64_t A, uint64_t B); + +template +uint64_t find_all_fields(uint64_t MSBs, uint64_t A, uint64_t B); + +template <> +inline uint64_t find_all_fields(uint64_t MSBs, uint64_t A, uint64_t B) +{ + // 0 != A^B, same as asking 0 - (A^B) overflows. + return unsigned_LT_vector(MSBs, 0, A ^ B); +} + +template <> +inline uint64_t find_all_fields(uint64_t MSBs, uint64_t A, uint64_t B) +{ + // get the fields which are EQ and negate the result + auto all_fields_NE = find_all_fields(MSBs, A, B); + auto all_fields_NE_negated = ~all_fields_NE; + // must filter the negated vector so only MSB are left. + return MSBs & all_fields_NE_negated; +} + +template <> +inline uint64_t find_all_fields_unsigned(uint64_t MSBs, uint64_t A, uint64_t B) +{ + return find_all_fields(MSBs, A, B); +} + +template <> +inline uint64_t find_all_fields_unsigned(uint64_t MSBs, uint64_t A, uint64_t B) +{ + return find_all_fields(MSBs, A, B); +} + +template <> +inline uint64_t find_all_fields_unsigned(uint64_t MSBs, uint64_t A, uint64_t B) +{ + return unsigned_LT_vector(MSBs, A, B); +} + +template <> +inline uint64_t find_all_fields_unsigned(uint64_t MSBs, uint64_t A, uint64_t B) +{ + // Now A <= B is the same as !(A > B) so... + // reverse A and B to turn (A>B) --> (B +inline uint64_t find_all_fields_unsigned(uint64_t MSBs, uint64_t A, uint64_t B) +{ + return find_all_fields_unsigned(MSBs, B, A); +} + +template <> +inline uint64_t find_all_fields_unsigned(uint64_t MSBs, uint64_t A, uint64_t B) +{ + return find_all_fields_unsigned(MSBs, B, A); +} + +/* + Handling signed values + + Trial subtraction only works as is for unsigned. We simply transform signed into unsigned + by pusing all values up by 1<<(field_width-1). This makes all negative values positive and positive + values remain positive, although larger. Any overflow during the push can be ignored. + After that transformation Trial subtraction should correctly detect the LT condition. + + */ + + +template <> +inline uint64_t find_all_fields(uint64_t MSBs, uint64_t A, uint64_t B) +{ + auto sign_bits = MSBs; + return unsigned_LT_vector(MSBs, A ^ sign_bits, B ^ sign_bits); +} + +template <> +inline uint64_t find_all_fields(uint64_t MSBs, uint64_t A, uint64_t B) +{ + auto sign_bits = MSBs; + return find_all_fields_unsigned(MSBs, A ^ sign_bits, B ^ sign_bits); +} + +template <> +inline uint64_t find_all_fields(uint64_t MSBs, uint64_t A, uint64_t B) +{ + // A > B is the same as B < A + return find_all_fields(MSBs, B, A); +} + +template <> +inline uint64_t find_all_fields(uint64_t MSBs, uint64_t A, uint64_t B) +{ + // A >= B is the same as B <= A + return find_all_fields(MSBs, B, A); +} + } // namespace realm #endif // REALM_QUERY_CONDITIONS_HPP diff --git a/src/realm/query_engine.hpp b/src/realm/query_engine.hpp index 8b7ecf2d1e8..26a07377536 100644 --- a/src/realm/query_engine.hpp +++ b/src/realm/query_engine.hpp @@ -449,6 +449,7 @@ static size_t find_first_haystack(LeafType& leaf, NeedleContainer& needles, size { // for a small number of conditions, it is faster to do a linear search than to compute the hash // the exact thresholds were found experimentally + if (needles.size() < linear_search_threshold) { for (size_t i = start; i < end; ++i) { auto element = leaf.get(i); diff --git a/src/realm/query_state.hpp b/src/realm/query_state.hpp index b2812276539..ac0480d7166 100644 --- a/src/realm/query_state.hpp +++ b/src/realm/query_state.hpp @@ -22,8 +22,6 @@ #include // size_t #include // unint8_t etc -#include - namespace realm { enum Action { act_ReturnFirst, act_Sum, act_Max, act_Min, act_Count, act_FindAll, act_Average }; @@ -34,6 +32,7 @@ enum { cond_Equal, cond_NotEqual, cond_Greater, cond_Less, cond_VTABLE_FINDER_CO class ArrayUnsigned; class Mixed; +class ArrayPayload; class QueryStateBase { public: diff --git a/src/realm/table.hpp b/src/realm/table.hpp index 3709669400d..0830d7c733f 100644 --- a/src/realm/table.hpp +++ b/src/realm/table.hpp @@ -544,6 +544,10 @@ class Table { return false; } + ref_type typed_write(ref_type ref, _impl::ArrayWriterBase& out, bool deep, bool only_modified, + bool compress) const; + void typed_print(std::string prefix, ref_type ref) const; + private: template TableView find_all(ColKey col_key, T value); @@ -689,7 +693,6 @@ class Table { }; ref_type typed_write(ref_type ref, _impl::ArrayWriterBase& out) const; - void typed_print(std::string prefix, ref_type ref) const; private: enum LifeCycleCookie { diff --git a/test/benchmark-common-tasks/main.cpp b/test/benchmark-common-tasks/main.cpp index 5333e464dfc..b837834796b 100644 --- a/test/benchmark-common-tasks/main.cpp +++ b/test/benchmark-common-tasks/main.cpp @@ -1413,7 +1413,6 @@ struct BenchmarkQueryChainedOrIntsIndexed : BenchmarkQueryChainedOrInts { } }; - struct BenchmarkQueryIntEquality : BenchmarkQueryChainedOrInts { const char* name() const { diff --git a/test/object-store/results.cpp b/test/object-store/results.cpp index 5815d258e84..3fedacfeaec 100644 --- a/test/object-store/results.cpp +++ b/test/object-store/results.cpp @@ -103,7 +103,6 @@ struct TestContext : CppContext { } }; - TEST_CASE("notifications: async delivery", "[notifications]") { _impl::RealmCoordinator::assert_no_open_realms(); TestFile config; diff --git a/test/test_array.cpp b/test/test_array.cpp index 8a86ac15718..a77c698b7fa 100644 --- a/test/test_array.cpp +++ b/test/test_array.cpp @@ -96,6 +96,27 @@ void has_zero_byte(TestContext& test_context, int64_t value, size_t reps) } // anonymous namespace +TEST(Array_Bits) +{ + CHECK_EQUAL(NodeHeader::unsigned_to_num_bits(0), 0); + CHECK_EQUAL(NodeHeader::unsigned_to_num_bits(1), 1); + CHECK_EQUAL(NodeHeader::unsigned_to_num_bits(2), 2); + CHECK_EQUAL(NodeHeader::unsigned_to_num_bits(3), 2); + CHECK_EQUAL(NodeHeader::unsigned_to_num_bits(4), 3); + CHECK_EQUAL(NodeHeader::unsigned_to_num_bits(5), 3); + CHECK_EQUAL(NodeHeader::unsigned_to_num_bits(7), 3); + CHECK_EQUAL(NodeHeader::unsigned_to_num_bits(8), 4); + CHECK_EQUAL(NodeHeader::signed_to_num_bits(0), 1); + CHECK_EQUAL(NodeHeader::signed_to_num_bits(1), 2); + CHECK_EQUAL(NodeHeader::signed_to_num_bits(-1), 1); + CHECK_EQUAL(NodeHeader::signed_to_num_bits(-2), 2); + CHECK_EQUAL(NodeHeader::signed_to_num_bits(-3), 3); + CHECK_EQUAL(NodeHeader::signed_to_num_bits(-4), 3); + CHECK_EQUAL(NodeHeader::signed_to_num_bits(3), 3); + CHECK_EQUAL(NodeHeader::signed_to_num_bits(4), 4); + CHECK_EQUAL(NodeHeader::signed_to_num_bits(7), 4); +} + TEST(Array_General) { Array c(Allocator::get_default()); @@ -1560,25 +1581,56 @@ NONCONCURRENT_TEST(Array_count) c.destroy(); } -TEST(Array_Bits) +TEST(DirectBitFields) { - CHECK_EQUAL(NodeHeader::unsigned_to_num_bits(0), 0); - CHECK_EQUAL(NodeHeader::unsigned_to_num_bits(1), 1); - CHECK_EQUAL(NodeHeader::unsigned_to_num_bits(2), 2); - CHECK_EQUAL(NodeHeader::unsigned_to_num_bits(3), 2); - CHECK_EQUAL(NodeHeader::unsigned_to_num_bits(4), 3); - CHECK_EQUAL(NodeHeader::unsigned_to_num_bits(5), 3); - CHECK_EQUAL(NodeHeader::unsigned_to_num_bits(7), 3); - CHECK_EQUAL(NodeHeader::unsigned_to_num_bits(8), 4); - CHECK_EQUAL(NodeHeader::signed_to_num_bits(0), 1); - CHECK_EQUAL(NodeHeader::signed_to_num_bits(1), 2); - CHECK_EQUAL(NodeHeader::signed_to_num_bits(-1), 1); - CHECK_EQUAL(NodeHeader::signed_to_num_bits(-2), 2); - CHECK_EQUAL(NodeHeader::signed_to_num_bits(-3), 3); - CHECK_EQUAL(NodeHeader::signed_to_num_bits(-4), 3); - CHECK_EQUAL(NodeHeader::signed_to_num_bits(3), 3); - CHECK_EQUAL(NodeHeader::signed_to_num_bits(4), 4); - CHECK_EQUAL(NodeHeader::signed_to_num_bits(7), 4); + uint64_t a[2]; + a[0] = a[1] = 0; + { + BfIterator it(a, 0, 7, 7, 8); + REALM_ASSERT(*it == 0); + auto it2(it); + ++it2; + it2.set_value(127 + 128); + REALM_ASSERT(*it == 0); + ++it; + REALM_ASSERT(*it == 127); + ++it; + REALM_ASSERT(*it == 0); + } + // reverse polarity + a[0] = a[1] = -1ULL; + { + BfIterator it(a, 0, 7, 7, 8); + REALM_ASSERT(*it == 127); + auto it2(it); + ++it2; + it2.set_value(42 + 128); + REALM_ASSERT(*it == 127); + ++it; + REALM_ASSERT(*it == 42); + ++it; + REALM_ASSERT(*it == 127); + } +} + +TEST(Extended_Array_encoding) +{ + using Encoding = NodeHeader::Encoding; + Array array(Allocator::get_default()); + auto mem = array.get_alloc().alloc(10); + init_header(mem.get_addr(), Encoding::Flex, 7, 1, 1, 1, 1); + array.init_from_mem(mem); + auto array_header = array.get_header(); + auto encoding = array.get_encoding(array_header); + CHECK(encoding == Encoding::Flex); + + Array another_array(Allocator::get_default()); + another_array.init_from_ref(array.get_ref()); + auto another_header = another_array.get_header(); + auto another_encoding = another_array.get_encoding(another_header); + CHECK(encoding == another_encoding); + + array.get_alloc().free_(mem); } TEST(Array_cares_about) @@ -1710,9 +1762,8 @@ TEST(VerifyIterationAcrossWords) // unaligned iterator UnalignedWordIter u_it(a, 0); for (size_t i = 0; i < 51; ++i) { - const auto v = sign_extend_value(5, u_it.get(5) & 0x1F); + const auto v = sign_extend_value(5, u_it.consume(5) & 0x1F); CHECK_EQUAL(v, values[i]); - u_it.bump(5); } } } @@ -1859,7 +1910,7 @@ TEST(ParallelSearchEqualMatch) // Now use the optimized version static auto vector_compare_eq = [](auto msb, auto a, auto b) { - return find_all_fields_EQ(msb, a, b); + return find_all_fields(msb, a, b); }; start = 0; @@ -1901,7 +1952,7 @@ TEST(ParallelSearchEqualNoMatch) const auto search_vector = populate(width, key); static auto vector_compare_eq = [](auto msb, auto a, auto b) { - return find_all_fields_EQ(msb, a, b); + return find_all_fields(msb, a, b); }; size_t start = 0; @@ -1951,7 +2002,7 @@ TEST(ParallelSearchNotEqual) const auto search_vector = populate(width, key); static auto vector_compare_neq = [](auto msb, auto a, auto b) { - return find_all_fields_NE(msb, a, b); + return find_all_fields(msb, a, b); }; size_t start = 0; @@ -2002,7 +2053,7 @@ TEST(ParallelSearchLessThan) const auto search_vector = populate(width, key); static auto vector_compare_lt = [](auto msb, auto a, auto b) { - return find_all_fields_signed_LT(msb, a, b); + return find_all_fields(msb, a, b); }; size_t start = 0; @@ -2052,7 +2103,7 @@ TEST(ParallelSearchGreaterThan) const auto search_vector = populate(width, key); static auto vector_compare_gt = [](auto msb, auto a, auto b) { - return find_all_fields_signed_GT(msb, a, b); + return find_all_fields(msb, a, b); }; size_t start = 0; diff --git a/test/test_array_integer.cpp b/test/test_array_integer.cpp index a26cecf52b2..9ccdf25653e 100644 --- a/test/test_array_integer.cpp +++ b/test/test_array_integer.cpp @@ -19,6 +19,7 @@ #include "testsettings.hpp" #include +#include #include #include @@ -31,6 +32,1575 @@ using namespace realm; using namespace realm::test_util; +// #define ARRAY_PERFORMANCE_TESTING +#if !defined(REALM_DEBUG) && defined(ARRAY_PERFORMANCE_TESTING) +NONCONCURRENT_TEST(perf_array_encode_get_vs_array_get_less_32bit) +{ + using namespace std; + using namespace std::chrono; + size_t n_values = 1000; + size_t n_runs = 100; + std::cout << " < 32 bit values " << std::endl; + std::cout << " N values = " << n_values << std::endl; + std::cout << " N runs = " << n_runs << std::endl; + + std::vector input_array; + ArrayInteger a(Allocator::get_default()); + ArrayInteger compressed_a(Allocator::get_default()); + a.create(); + + for (size_t i = 0; i < n_values; i++) + input_array.push_back(i); + std::random_device rd; + std::mt19937 g(rd()); + std::shuffle(input_array.begin(), input_array.end(), g); + for (const auto& v : input_array) + a.add(v); + + auto t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) + REALM_ASSERT(a.get(i) == input_array[i]); + } + auto t2 = high_resolution_clock::now(); + + std::cout << " Positive values - Array::get(): " << duration_cast(t2 - t1).count() << " ns" + << std::endl; + std::cout << " Positive values - Array::get(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + a.try_compress(compressed_a); + CHECK(compressed_a.is_compressed()); + CHECK(compressed_a.size() == a.size()); + t1 = high_resolution_clock::now(); + + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + REALM_ASSERT(compressed_a.get(i) == a.get(i)); + } + } + t2 = high_resolution_clock::now(); + std::cout << " Positive values - ArrayCompress::get(): " << duration_cast(t2 - t1).count() << " ns" + << std::endl; + std::cout << " Positive values - ArrayCompress::get(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + a.destroy(); + compressed_a.destroy(); + a.create(); + input_array.clear(); + for (size_t i = 0; i < n_values; i++) + input_array.push_back(-i); + std::random_device rd1; + std::mt19937 g1(rd1()); + std::shuffle(input_array.begin(), input_array.end(), g1); + for (const auto& v : input_array) + a.add(v); + + t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) + REALM_ASSERT(a.get(i) == input_array[i]); + } + t2 = high_resolution_clock::now(); + + std::cout << std::endl; + + std::cout << " Negative values - Array::get(): " << duration_cast(t2 - t1).count() << " ns" + << std::endl; + std::cout << " Negative values - Array::get(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + a.try_compress(compressed_a); + CHECK(compressed_a.is_compressed()); + CHECK(compressed_a.size() == a.size()); + t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + REALM_ASSERT(compressed_a.get(i) == a.get(i)); + } + } + t2 = high_resolution_clock::now(); + std::cout << " Negative values - ArrayCompress::get(): " << duration_cast(t2 - t1).count() << " ns" + << std::endl; + std::cout << " Negative values - ArrayCompress::get(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + a.destroy(); + compressed_a.destroy(); +} + + +NONCONCURRENT_TEST(Test_basic_find_EQ_less_32bit) +{ + using namespace std; + using namespace std::chrono; + size_t n_values = 1000; + size_t n_runs = 100; + std::cout << " Value with bitwidth < 32 " << std::endl; + std::cout << " N values = " << n_values << std::endl; + std::cout << " N runs = " << n_runs << std::endl; + + std::vector input_array; + ArrayInteger a(Allocator::get_default()); + ArrayInteger compressed_a(Allocator::get_default()); + a.create(); + + for (size_t i = 0; i < n_values; i++) + input_array.push_back(i); + std::random_device rd; + std::mt19937 g(rd()); + std::shuffle(input_array.begin(), input_array.end(), g); + for (const auto& v : input_array) + a.add(v); + + auto t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + auto ndx = a.find_first(input_array[i]); + REALM_ASSERT(ndx != realm::not_found); + REALM_ASSERT(a.get(ndx) == input_array[ndx]); + } + } + auto t2 = high_resolution_clock::now(); + + std::cout << " Positive values - Array::find(): " << duration_cast(t2 - t1).count() + << " ms" << std::endl; + std::cout << " Positive values - Array::find(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + a.try_compress(compressed_a); + CHECK(compressed_a.is_compressed()); + CHECK(compressed_a.size() == a.size()); + + // verify that both find the same thing + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + auto v = a.find_first(input_array[i]); + auto v1 = compressed_a.find_first(input_array[i]); + REALM_ASSERT(v == v1); + } + } + + t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + auto ndx = compressed_a.find_first(input_array[i]); + REALM_ASSERT(ndx != realm::not_found); + REALM_ASSERT(compressed_a.get(ndx) == input_array[ndx]); + } + } + t2 = high_resolution_clock::now(); + std::cout << " Positive values - ArrayCompress::find(): " << duration_cast(t2 - t1).count() + << " ms" << std::endl; + std::cout << " Positive values - ArrayCompress::find(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + std::cout << std::endl; + + a.destroy(); + compressed_a.destroy(); + a.create(); + input_array.clear(); + for (size_t i = 0; i < n_values; i++) + input_array.push_back(-i); + std::random_device rd1; + std::mt19937 g1(rd1()); + std::shuffle(input_array.begin(), input_array.end(), g1); + for (const auto& v : input_array) + a.add(v); + + a.try_compress(compressed_a); + CHECK(compressed_a.is_compressed()); + CHECK(compressed_a.size() == a.size()); + + // verify that both find the same thing + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + auto v = a.find_first(input_array[i]); + auto v1 = compressed_a.find_first(input_array[i]); + REALM_ASSERT(v == v1); + } + } + + t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + auto ndx = a.find_first(input_array[i]); + REALM_ASSERT(ndx != realm::not_found); + REALM_ASSERT(a.get(ndx) == input_array[ndx]); + } + } + t2 = high_resolution_clock::now(); + + std::cout << " Negative values - Array::find(): " << duration_cast(t2 - t1).count() + << " ms" << std::endl; + std::cout << " Negative values - Array::find(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + auto ndx = compressed_a.find_first(input_array[i]); + REALM_ASSERT(ndx != realm::not_found); + REALM_ASSERT(compressed_a.get(ndx) == a.get(ndx)); + } + } + t2 = high_resolution_clock::now(); + std::cout << " Negative values - ArrayCompress::find(): " << duration_cast(t2 - t1).count() + << " ms" << std::endl; + std::cout << " Negative values - ArrayCompress::find(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + a.destroy(); + compressed_a.destroy(); +} + +NONCONCURRENT_TEST(Test_basic_find_NEQ_value_less_32bit) +{ + using namespace std; + using namespace std::chrono; + size_t n_values = 1000; + size_t n_runs = 100; + std::cout << " Value with bitwidth < 32 " << std::endl; + std::cout << " N values = " << n_values << std::endl; + std::cout << " N runs = " << n_runs << std::endl; + + std::vector input_array; + ArrayInteger a(Allocator::get_default()); + ArrayInteger compressed_a(Allocator::get_default()); + a.create(); + + for (size_t i = 0; i < n_values; i++) + input_array.push_back(i); + std::random_device rd; + std::mt19937 g(rd()); + std::shuffle(input_array.begin(), input_array.end(), g); + for (const auto& v : input_array) + a.add(v); + + QueryStateFindFirst state1; + QueryStateFindFirst state2; + auto t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + a.find(i, 0, a.size(), &state1); + REALM_ASSERT(state1.m_state != realm::not_found); + REALM_ASSERT(a.get(state1.m_state) == input_array[state1.m_state]); + } + } + auto t2 = high_resolution_clock::now(); + + std::cout << " Positive values - Array::find(): " << duration_cast(t2 - t1).count() + << " ms" << std::endl; + std::cout << " Positive values - Array::find(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + a.try_compress(compressed_a); + CHECK(compressed_a.is_compressed()); + CHECK(compressed_a.size() == a.size()); + + // verify that both find the same thing + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + a.find(i, 0, a.size(), &state1); + compressed_a.find(i, 0, compressed_a.size(), &state2); + REALM_ASSERT(state1.m_state == state2.m_state); + } + } + + t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + compressed_a.find(i, 0, compressed_a.size(), &state2); + REALM_ASSERT(state2.m_state != realm::not_found); + REALM_ASSERT(compressed_a.get(state2.m_state) == a.get(state2.m_state)); + } + } + t2 = high_resolution_clock::now(); + std::cout << " Positive values - ArrayCompress::find(): " + << duration_cast(t2 - t1).count() << " ms" << std::endl; + std::cout << " Positive values - ArrayCompress::find(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + std::cout << std::endl; + + a.destroy(); + compressed_a.destroy(); + a.create(); + input_array.clear(); + for (size_t i = 0; i < n_values; i++) + input_array.push_back(-i); + std::random_device rd1; + std::mt19937 g1(rd1()); + std::shuffle(input_array.begin(), input_array.end(), g1); + for (const auto& v : input_array) + a.add(v); + + a.try_compress(compressed_a); + CHECK(compressed_a.is_compressed()); + CHECK(compressed_a.size() == a.size()); + + // NEQ for signed integers is not working. TODO: investigate this. + // verify that both find the same thing + + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + a.find(-i, 0, a.size(), &state1); + compressed_a.find(-i, 0, compressed_a.size(), &state2); + REALM_ASSERT(state1.m_state == state2.m_state); + } + } + + t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + a.find(-i, 0, a.size(), &state1); + REALM_ASSERT(state1.m_state != realm::not_found); + REALM_ASSERT(a.get(state1.m_state) == input_array[state1.m_state]); + } + } + t2 = high_resolution_clock::now(); + + std::cout << " Negative values - Array::find(): " << duration_cast(t2 - t1).count() + << " ms" << std::endl; + std::cout << " Negative values - Array::find(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + compressed_a.find(-i, 0, compressed_a.size(), &state2); + REALM_ASSERT(state2.m_state != realm::not_found); + REALM_ASSERT(compressed_a.get(state2.m_state) == a.get(state2.m_state)); + } + } + t2 = high_resolution_clock::now(); + std::cout << " Negative values - ArrayCompress::find(): " + << duration_cast(t2 - t1).count() << " ms" << std::endl; + std::cout << " Negative values - ArrayCompress::find(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + a.destroy(); + compressed_a.destroy(); +} + +NONCONCURRENT_TEST(Test_basic_find_LT_value_less_32bit) +{ + using namespace std; + using namespace std::chrono; + size_t n_values = 1000; + size_t n_runs = 100; + std::cout << " Value with bitwidth < 32 " << std::endl; + std::cout << " N values = " << n_values << std::endl; + std::cout << " N runs = " << n_runs << std::endl; + + std::vector input_array; + ArrayInteger a(Allocator::get_default()); + ArrayInteger compressed_a(Allocator::get_default()); + a.create(); + + for (size_t i = 0; i < n_values; i++) + input_array.push_back(i); + std::random_device rd; + std::mt19937 g(rd()); + std::shuffle(input_array.begin(), input_array.end(), g); + for (const auto& v : input_array) + a.add(v); + + QueryStateFindFirst state1{}; + QueryStateFindFirst state2{}; + auto t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 1; i < n_values; ++i) { // there is nothing less than 0 + a.find(i, 0, a.size(), &state1); + REALM_ASSERT(state1.m_state != realm::not_found); + REALM_ASSERT(a.get(state1.m_state) == input_array[state1.m_state]); + } + } + auto t2 = high_resolution_clock::now(); + + std::cout << " Positive values - Array::find(): " << duration_cast(t2 - t1).count() << " ms" + << std::endl; + std::cout << " Positive values - Array::find(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + a.try_compress(compressed_a); + CHECK(compressed_a.is_compressed()); + CHECK(compressed_a.size() == a.size()); + + // verify that both find the same thing + state1 = {}; + state2 = {}; + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + a.find(i, 0, a.size(), &state1); + compressed_a.find(i, 0, compressed_a.size(), &state2); + REALM_ASSERT(state1.m_state == state2.m_state); + } + } + + t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 1; i < n_values; ++i) { // there is nothing less than 0 + compressed_a.find(i, 0, compressed_a.size(), &state2); + REALM_ASSERT(state2.m_state != realm::not_found); + REALM_ASSERT(compressed_a.get(state2.m_state) == a.get(state2.m_state)); + } + } + t2 = high_resolution_clock::now(); + std::cout << " Positive values - ArrayCompress::find(): " << duration_cast(t2 - t1).count() + << " ms" << std::endl; + std::cout << " Positive values - ArrayCompress::find(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + std::cout << std::endl; + + a.destroy(); + compressed_a.destroy(); + a.create(); + input_array.clear(); + for (size_t i = 0; i < n_values; i++) + input_array.push_back(-i); + std::random_device rd1; + std::mt19937 g1(rd1()); + std::shuffle(input_array.begin(), input_array.end(), g1); + for (const auto& v : input_array) + a.add(v); + + a.try_compress(compressed_a); + CHECK(compressed_a.is_compressed()); + CHECK(compressed_a.size() == a.size()); + + // verify that both find the same thing + state1 = {}; + state2 = {}; + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + a.find(-i, 0, a.size(), &state1); + compressed_a.find(-i, 0, compressed_a.size(), &state2); + REALM_ASSERT(state1.m_state == state2.m_state); + } + } + + t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values - 1; ++i) { // nothing less than the biggest negative number + a.find(-i, 0, a.size(), &state1); + REALM_ASSERT(state1.m_state != realm::not_found); + REALM_ASSERT(a.get(state1.m_state) == input_array[state1.m_state]); + } + } + t2 = high_resolution_clock::now(); + + std::cout << " Negative values - Array::find(): " << duration_cast(t2 - t1).count() << " ms" + << std::endl; + std::cout << " Negative values - Array::find(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values - 1; ++i) { // nothing less than the biggest negative number + compressed_a.find(-i, 0, compressed_a.size(), &state2); + REALM_ASSERT(state2.m_state != realm::not_found); + REALM_ASSERT(compressed_a.get(state2.m_state) == a.get(state2.m_state)); + } + } + t2 = high_resolution_clock::now(); + std::cout << " Negative values - ArrayCompress::find(): " << duration_cast(t2 - t1).count() + << " ms" << std::endl; + std::cout << " Negative values - ArrayCompress::find(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + a.destroy(); + compressed_a.destroy(); +} + +NONCONCURRENT_TEST(Test_basic_find_GT_value_less_32bit) +{ + // GT subword parallel search is not working... TODO : investigate + using namespace std; + using namespace std::chrono; + size_t n_values = 1000; + size_t n_runs = 100; + std::cout << " Value with bitwidth < 32 " << std::endl; + std::cout << " N values = " << n_values << std::endl; + std::cout << " N runs = " << n_runs << std::endl; + + std::vector input_array; + ArrayInteger a(Allocator::get_default()); + ArrayInteger compressed_a(Allocator::get_default()); + a.create(); + + for (size_t i = 0; i < n_values; i++) + input_array.push_back(i); + std::random_device rd; + std::mt19937 g(rd()); + std::shuffle(input_array.begin(), input_array.end(), g); + for (const auto& v : input_array) + a.add(v); + + QueryStateFindFirst state1; + QueryStateFindFirst state2; + auto t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values - 1; ++i) { // nothing greatest than the last number + a.find(i, 0, a.size(), &state1); + REALM_ASSERT(state1.m_state != realm::not_found); + REALM_ASSERT(a.get(state1.m_state) == input_array[state1.m_state]); + } + } + auto t2 = high_resolution_clock::now(); + + std::cout << " Positive values - Array::find(): " << duration_cast(t2 - t1).count() + << " ms" << std::endl; + std::cout << " Positive values - Array::find(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + a.try_compress(compressed_a); + CHECK(compressed_a.is_compressed()); + CHECK(compressed_a.size() == a.size()); + + // verify that both find the same thing + state1 = {}; + state2 = {}; + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + a.find(i, 0, a.size(), &state1); + compressed_a.find(i, 0, compressed_a.size(), &state2); + REALM_ASSERT(state1.m_state == state2.m_state); + } + } + + t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values - 1; ++i) { // nothing bigger than the last val + compressed_a.find(i, 0, compressed_a.size(), &state2); + REALM_ASSERT(state2.m_state != realm::not_found); + REALM_ASSERT(compressed_a.get(state2.m_state) == a.get(state2.m_state)); + } + } + t2 = high_resolution_clock::now(); + std::cout << " Positive values - ArrayCompress::find(): " + << duration_cast(t2 - t1).count() << " ms" << std::endl; + std::cout << " Positive values - ArrayCompress::find(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + std::cout << std::endl; + + a.destroy(); + compressed_a.destroy(); + a.create(); + input_array.clear(); + for (size_t i = 0; i < n_values; i++) + input_array.push_back(-i); + std::random_device rd1; + std::mt19937 g1(rd1()); + std::shuffle(input_array.begin(), input_array.end(), g1); + for (const auto& v : input_array) + a.add(v); + + a.try_compress(compressed_a); + CHECK(compressed_a.is_compressed()); + CHECK(compressed_a.size() == a.size()); + + // verify that both find the same thing + state1 = {}; + state2 = {}; + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + a.find(-i, 0, a.size(), &state1); + compressed_a.find(-i, 0, compressed_a.size(), &state2); + REALM_ASSERT(state1.m_state == state2.m_state); + } + } + + t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 1; i < n_values; ++i) { // nothing bigger than 0 + a.find(-i, 0, a.size(), &state1); + REALM_ASSERT(state1.m_state != realm::not_found); + REALM_ASSERT(a.get(state1.m_state) == input_array[state1.m_state]); + } + } + t2 = high_resolution_clock::now(); + + std::cout << " Negative values - Array::find(): " << duration_cast(t2 - t1).count() + << " ms" << std::endl; + std::cout << " Negative values - Array::find(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 1; i < n_values; ++i) { // nothing bigger than 0 + compressed_a.find(-i, 0, compressed_a.size(), &state2); + REALM_ASSERT(state2.m_state != realm::not_found); + REALM_ASSERT(compressed_a.get(state2.m_state) == a.get(state2.m_state)); + } + } + t2 = high_resolution_clock::now(); + std::cout << " Negative values - ArrayCompress::find(): " + << duration_cast(t2 - t1).count() << " ms" << std::endl; + std::cout << " Negative values - ArrayCompress::find(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + a.destroy(); + compressed_a.destroy(); +} + +NONCONCURRENT_TEST(perf_array_encode_get_vs_array_get_greater_32bit) +{ + using namespace std; + using namespace std::chrono; + size_t start_value = 0x0000000100000000; // 32 bit val + size_t n_values = 1000; + size_t n_runs = 100; + std::cout << " >= 32 bit values " << std::endl; + std::cout << " N values = " << n_values << std::endl; + std::cout << " N runs = " << n_runs << std::endl; + + std::vector input_array; + ArrayInteger a(Allocator::get_default()); + ArrayInteger compressed_a(Allocator::get_default()); + a.create(); + + for (size_t i = 0; i < n_values; i++) + input_array.push_back(start_value + i); + std::random_device rd; + std::mt19937 g(rd()); + std::shuffle(input_array.begin(), input_array.end(), g); + for (const auto& v : input_array) + a.add(v); + + auto t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) + REALM_ASSERT(a.get(i) == input_array[i]); + } + auto t2 = high_resolution_clock::now(); + + std::cout << " Positive values - Array::get(): " << duration_cast(t2 - t1).count() << " ns" + << std::endl; + std::cout << " Positive values - Array::get(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + a.try_compress(compressed_a); + CHECK(compressed_a.is_compressed()); + CHECK(compressed_a.size() == a.size()); + t1 = high_resolution_clock::now(); + + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + REALM_ASSERT(compressed_a.get(i) == a.get(i)); + } + } + t2 = high_resolution_clock::now(); + std::cout << " Positive values - ArrayCompress::get(): " << duration_cast(t2 - t1).count() << " ns" + << std::endl; + std::cout << " Positive values - ArrayCompress::get(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + a.destroy(); + compressed_a.destroy(); + a.create(); + input_array.clear(); + for (size_t i = 0; i < n_values; i++) + input_array.push_back(-i); + std::random_device rd1; + std::mt19937 g1(rd1()); + std::shuffle(input_array.begin(), input_array.end(), g1); + for (const auto& v : input_array) + a.add(v); + + t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) + REALM_ASSERT(a.get(i) == input_array[i]); + } + t2 = high_resolution_clock::now(); + + std::cout << std::endl; + + std::cout << " Negative values - Array::get(): " << duration_cast(t2 - t1).count() << " ns" + << std::endl; + std::cout << " Negative values - Array::get(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + a.try_compress(compressed_a); + CHECK(compressed_a.is_compressed()); + CHECK(compressed_a.size() == a.size()); + t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + REALM_ASSERT(compressed_a.get(i) == a.get(i)); + } + } + t2 = high_resolution_clock::now(); + std::cout << " Negative values - ArrayCompress::get(): " << duration_cast(t2 - t1).count() << " ns" + << std::endl; + std::cout << " Negative values - ArrayCompress::get(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + a.destroy(); + compressed_a.destroy(); +} + +NONCONCURRENT_TEST(Test_basic_find_EQ_greater_32bit) +{ + using namespace std; + using namespace std::chrono; + size_t start_value = 0x000001000000000; // 32 bit val + size_t n_values = 1000; + size_t n_runs = 100; + std::cout << " Value with bitwidth >= 32 " << std::endl; + std::cout << " N values = " << n_values << std::endl; + std::cout << " N runs = " << n_runs << std::endl; + + std::vector input_array; + ArrayInteger a(Allocator::get_default()); + ArrayInteger compressed_a(Allocator::get_default()); + a.create(); + + for (size_t i = 0; i < n_values; i++) + input_array.push_back(start_value + i); + std::random_device rd; + std::mt19937 g(rd()); + std::shuffle(input_array.begin(), input_array.end(), g); + for (const auto& v : input_array) + a.add(v); + + auto t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + auto ndx = a.find_first(start_value + i); + REALM_ASSERT(ndx != realm::not_found); + REALM_ASSERT(a.get(ndx) == input_array[ndx]); + } + } + auto t2 = high_resolution_clock::now(); + + std::cout << " Positive values - Array::find(): " << duration_cast(t2 - t1).count() + << " ms" << std::endl; + std::cout << " Positive values - Array::find(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + a.try_compress(compressed_a); + CHECK(compressed_a.is_compressed()); + CHECK(compressed_a.size() == a.size()); + + // verify that both find the same thing + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + REALM_ASSERT(a.find_first(start_value + i) == compressed_a.find_first(start_value + i)); + } + } + + t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + auto ndx = compressed_a.find_first(start_value + i); + REALM_ASSERT(ndx != realm::not_found); + REALM_ASSERT(compressed_a.get(ndx) == a.get(ndx)); + } + } + t2 = high_resolution_clock::now(); + std::cout << " Positive values - ArrayCompress::find(): " << duration_cast(t2 - t1).count() + << " ms" << std::endl; + std::cout << " Positive values - ArrayCompress::find(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + std::cout << std::endl; + + a.destroy(); + compressed_a.destroy(); + a.create(); + input_array.clear(); + for (size_t i = 0; i < n_values; i++) + input_array.push_back(-(start_value + i)); + std::random_device rd1; + std::mt19937 g1(rd1()); + std::shuffle(input_array.begin(), input_array.end(), g1); + for (const auto& v : input_array) + a.add(v); + + a.try_compress(compressed_a); + CHECK(compressed_a.is_compressed()); + CHECK(compressed_a.size() == a.size()); + + // verify that both find the same thing + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + const auto k = -(start_value + i); + const auto v1 = a.find_first(k); + const auto v2 = compressed_a.find_first(k); + REALM_ASSERT(v1 == v2); + } + } + + t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + auto ndx = a.find_first(-(start_value + i)); + REALM_ASSERT(ndx != realm::not_found); + REALM_ASSERT(a.get(ndx) == input_array[ndx]); + } + } + t2 = high_resolution_clock::now(); + + std::cout << " Negative values - Array::find(): " << duration_cast(t2 - t1).count() + << " ms" << std::endl; + std::cout << " Negative values - Array::find(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + auto ndx = compressed_a.find_first(-(start_value + i)); + REALM_ASSERT(ndx != realm::not_found); + REALM_ASSERT(compressed_a.get(ndx) == a.get(ndx)); + } + } + t2 = high_resolution_clock::now(); + std::cout << " Negative values - ArrayCompress::find(): " << duration_cast(t2 - t1).count() + << " ms" << std::endl; + std::cout << " Negative values - ArrayCompress::find(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + a.destroy(); + compressed_a.destroy(); +} + +NONCONCURRENT_TEST(Test_basic_find_NEQ_value_greater_32bit) +{ + using namespace std; + using namespace std::chrono; + size_t start_value = 0x0000000100000000; // 32 bit val + size_t n_values = 1000; + size_t n_runs = 100; + std::cout << " Value with bitwidth >= 32 " << std::endl; + std::cout << " N values = " << n_values << std::endl; + std::cout << " N runs = " << n_runs << std::endl; + + std::vector input_array; + ArrayInteger a(Allocator::get_default()); + ArrayInteger compressed_a(Allocator::get_default()); + a.create(); + + for (size_t i = 0; i < n_values; i++) + input_array.push_back(start_value + i); + std::random_device rd; + std::mt19937 g(rd()); + std::shuffle(input_array.begin(), input_array.end(), g); + for (const auto& v : input_array) + a.add(v); + + QueryStateFindFirst state1; + QueryStateFindFirst state2; + auto t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + a.find(start_value + i, 0, a.size(), &state1); + REALM_ASSERT(state1.m_state != realm::not_found); + REALM_ASSERT(a.get(state1.m_state) == input_array[state1.m_state]); + } + } + auto t2 = high_resolution_clock::now(); + + std::cout << " Positive values - Array::find(): " << duration_cast(t2 - t1).count() + << " ms" << std::endl; + std::cout << " Positive values - Array::find(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + a.try_compress(compressed_a); + CHECK(compressed_a.is_compressed()); + CHECK(compressed_a.size() == a.size()); + + // verify that both find the same thing + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + a.find(start_value + i, 0, a.size(), &state1); + compressed_a.find(start_value + i, 0, compressed_a.size(), &state2); + REALM_ASSERT(state1.m_state == state2.m_state); + } + } + + t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + compressed_a.find(start_value + i, 0, compressed_a.size(), &state2); + REALM_ASSERT(state2.m_state != realm::not_found); + REALM_ASSERT(compressed_a.get(state2.m_state) == a.get(state2.m_state)); + } + } + t2 = high_resolution_clock::now(); + std::cout << " Positive values - ArrayCompress::find(): " + << duration_cast(t2 - t1).count() << " ms" << std::endl; + std::cout << " Positive values - ArrayCompress::find(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + std::cout << std::endl; + + a.destroy(); + compressed_a.destroy(); + a.create(); + input_array.clear(); + for (size_t i = 0; i < n_values; i++) + input_array.push_back(-(start_value + i)); + std::random_device rd1; + std::mt19937 g1(rd1()); + std::shuffle(input_array.begin(), input_array.end(), g1); + for (const auto& v : input_array) + a.add(v); + + a.try_compress(compressed_a); + CHECK(compressed_a.is_compressed()); + CHECK(compressed_a.size() == a.size()); + + // verify that both find the same thing + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + a.find(-(start_value + i), 0, a.size(), &state1); + compressed_a.find(-(start_value + i), 0, compressed_a.size(), &state2); + REALM_ASSERT(state1.m_state == state2.m_state); + } + } + + t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + a.find(-(start_value + i), 0, a.size(), &state1); + REALM_ASSERT(state1.m_state != realm::not_found); + REALM_ASSERT(a.get(state1.m_state) == input_array[state1.m_state]); + } + } + t2 = high_resolution_clock::now(); + + std::cout << " Negative values - Array::find(): " << duration_cast(t2 - t1).count() + << " ms" << std::endl; + std::cout << " Negative values - Array::find(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + compressed_a.find(-(start_value + i), 0, compressed_a.size(), &state2); + REALM_ASSERT(state2.m_state != realm::not_found); + REALM_ASSERT(compressed_a.get(state2.m_state) == a.get(state2.m_state)); + } + } + t2 = high_resolution_clock::now(); + std::cout << " Negative values - ArrayCompress::find(): " + << duration_cast(t2 - t1).count() << " ms" << std::endl; + std::cout << " Negative values - ArrayCompress::find(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + a.destroy(); + compressed_a.destroy(); +} + +NONCONCURRENT_TEST(Test_basic_find_LT_value_greater_32bit) +{ + using namespace std; + using namespace std::chrono; + size_t start_value = 0x0000000100000000; // 32 bit val + size_t n_values = 1000; + size_t n_runs = 100; + std::cout << " Value with bitwidth >= 32 " << std::endl; + std::cout << " N values = " << n_values << std::endl; + std::cout << " N runs = " << n_runs << std::endl; + + std::vector input_array; + ArrayInteger a(Allocator::get_default()); + ArrayInteger compressed_a(Allocator::get_default()); + a.create(); + + for (size_t i = 0; i < n_values; i++) + input_array.push_back(start_value + i); + std::random_device rd; + std::mt19937 g(rd()); + std::shuffle(input_array.begin(), input_array.end(), g); + for (const auto& v : input_array) + a.add(v); + + QueryStateFindFirst state1; + QueryStateFindFirst state2; + auto t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 1; i < n_values; ++i) { + a.find(start_value + i, 0, a.size(), &state1); + REALM_ASSERT(state1.m_state != realm::not_found); + REALM_ASSERT(a.get(state1.m_state) == input_array[state1.m_state]); + } + } + auto t2 = high_resolution_clock::now(); + + std::cout << " Positive values - Array::find(): " << duration_cast(t2 - t1).count() << " ms" + << std::endl; + std::cout << " Positive values - Array::find(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + a.try_compress(compressed_a); + CHECK(compressed_a.is_compressed()); + CHECK(compressed_a.size() == a.size()); + + // verify that both find the same thing + state1 = {}; + state2 = {}; + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + a.find(start_value + i, 0, a.size(), &state1); + compressed_a.find(start_value + i, 0, compressed_a.size(), &state2); + REALM_ASSERT(state1.m_state == state2.m_state); + } + } + + t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 1; i < n_values; ++i) { + compressed_a.find(start_value + i, 0, compressed_a.size(), &state2); + REALM_ASSERT(state2.m_state != realm::not_found); + REALM_ASSERT(compressed_a.get(state2.m_state) == a.get(state2.m_state)); + } + } + t2 = high_resolution_clock::now(); + std::cout << " Positive values - ArrayCompress::find(): " << duration_cast(t2 - t1).count() + << " ms" << std::endl; + std::cout << " Positive values - ArrayCompress::find(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + std::cout << std::endl; + + a.destroy(); + compressed_a.destroy(); + a.create(); + input_array.clear(); + for (size_t i = 0; i < n_values; i++) + input_array.push_back(-(start_value + i)); + std::random_device rd1; + std::mt19937 g1(rd1()); + std::shuffle(input_array.begin(), input_array.end(), g1); + for (const auto& v : input_array) + a.add(v); + + a.try_compress(compressed_a); + CHECK(compressed_a.is_compressed()); + CHECK(compressed_a.size() == a.size()); + + // verify that both find the same thing + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + a.find(-(start_value + i), 0, a.size(), &state1); + compressed_a.find(-(start_value + i), 0, compressed_a.size(), &state2); + REALM_ASSERT(state1.m_state == state2.m_state); + } + } + + t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + a.find(-(start_value + i), 0, a.size(), &state1); + REALM_ASSERT(state1.m_state != realm::not_found); + REALM_ASSERT(a.get(state1.m_state) == input_array[state1.m_state]); + } + } + t2 = high_resolution_clock::now(); + + std::cout << " Negative values - Array::find(): " << duration_cast(t2 - t1).count() << " ms" + << std::endl; + std::cout << " Negative values - Array::find(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + compressed_a.find(-(start_value + i), 0, compressed_a.size(), &state2); + REALM_ASSERT(state2.m_state != realm::not_found); + REALM_ASSERT(compressed_a.get(state2.m_state) == a.get(state2.m_state)); + } + } + t2 = high_resolution_clock::now(); + std::cout << " Negative values - ArrayCompress::find(): " << duration_cast(t2 - t1).count() + << " ms" << std::endl; + std::cout << " Negative values - ArrayCompress::find(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + a.destroy(); + compressed_a.destroy(); +} + +NONCONCURRENT_TEST(Test_basic_find_GT_value_greater_32bit) +{ + using namespace std; + using namespace std::chrono; + size_t start_value = 0x0000100000000; // 32 bit val + size_t n_values = 1000; + size_t n_runs = 100; + std::cout << " Value with bitwidth >= 32 " << std::endl; + std::cout << " N values = " << n_values << std::endl; + std::cout << " N runs = " << n_runs << std::endl; + + std::vector input_array; + ArrayInteger a(Allocator::get_default()); + ArrayInteger compressed_a(Allocator::get_default()); + a.create(); + + for (size_t i = 0; i < n_values; i++) + input_array.push_back(start_value + i); + std::random_device rd; + std::mt19937 g(rd()); + std::shuffle(input_array.begin(), input_array.end(), g); + for (const auto& v : input_array) + a.add(v); + + QueryStateFindFirst state1; + QueryStateFindFirst state2; + auto t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values - 1; ++i) { + a.find(start_value + i, 0, a.size(), &state1); + REALM_ASSERT(state1.m_state != realm::not_found); + REALM_ASSERT(a.get(state1.m_state) == input_array[state1.m_state]); + } + } + auto t2 = high_resolution_clock::now(); + + std::cout << " Positive values - Array::find(): " << duration_cast(t2 - t1).count() + << " ms" << std::endl; + std::cout << " Positive values - Array::find(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + a.try_compress(compressed_a); + CHECK(compressed_a.is_compressed()); + CHECK(compressed_a.size() == a.size()); + + // verify that both find the same thing + state1 = {}; + state2 = {}; + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + const auto k = start_value + i; + a.find(k, 0, a.size(), &state1); + compressed_a.find(k, 0, compressed_a.size(), &state2); + REALM_ASSERT(state1.m_state == state2.m_state); + } + } + + t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values - 1; ++i) { + compressed_a.find(start_value + i, 0, compressed_a.size(), &state2); + REALM_ASSERT(state2.m_state != realm::not_found); + REALM_ASSERT(compressed_a.get(state2.m_state) == a.get(state2.m_state)); + } + } + t2 = high_resolution_clock::now(); + std::cout << " Positive values - ArrayCompress::find(): " + << duration_cast(t2 - t1).count() << " ms" << std::endl; + std::cout << " Positive values - ArrayCompress::find(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + std::cout << std::endl; + + a.destroy(); + compressed_a.destroy(); + a.create(); + input_array.clear(); + for (size_t i = 0; i < n_values; i++) + input_array.push_back(-(start_value + i)); + std::random_device rd1; + std::mt19937 g1(rd1()); + std::shuffle(input_array.begin(), input_array.end(), g1); + for (const auto& v : input_array) + a.add(v); + + a.try_compress(compressed_a); + CHECK(compressed_a.is_compressed()); + CHECK(compressed_a.size() == a.size()); + + // verify that both find the same thing + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 1; i < n_values; ++i) { + a.find(-(start_value + i), 0, a.size(), &state1); + compressed_a.find(-(start_value + i), 0, compressed_a.size(), &state2); + REALM_ASSERT(state1.m_state == state2.m_state); + } + } + + t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 0; i < n_values; ++i) { + a.find(-(start_value + i), 0, a.size(), &state1); + REALM_ASSERT(state1.m_state != realm::not_found); + REALM_ASSERT(a.get(state1.m_state) == input_array[state1.m_state]); + } + } + t2 = high_resolution_clock::now(); + + std::cout << " Negative values - Array::find(): " << duration_cast(t2 - t1).count() + << " ms" << std::endl; + std::cout << " Negative values - Array::find(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + t1 = high_resolution_clock::now(); + for (size_t j = 0; j < n_runs; ++j) { + for (size_t i = 1; i < n_values; ++i) { + compressed_a.find(-(start_value + i), 0, compressed_a.size(), &state2); + REALM_ASSERT(state2.m_state != realm::not_found); + REALM_ASSERT(compressed_a.get(state2.m_state) == a.get(state2.m_state)); + } + } + t2 = high_resolution_clock::now(); + std::cout << " Negative values - ArrayCompress::find(): " + << duration_cast(t2 - t1).count() << " ms" << std::endl; + std::cout << " Negative values - ArrayCompress::find(): " + << (double)duration_cast(t2 - t1).count() / n_values / n_runs << " ns/value" << std::endl; + + a.destroy(); + compressed_a.destroy(); +} + +#endif + +// disable this test if forcing compression to Packed. +#if !REALM_COMPRESS +TEST(Test_ArrayInt_no_compress) +{ + ArrayInteger a(Allocator::get_default()); + ArrayInteger a1(Allocator::get_default()); + a.create(); + a.add(10); + a.add(11); + a.add(12); + // the original array is never encoded. a1 is the array to write down to disk + // in this case compression is not needed + CHECK_NOT(a.try_compress(a1)); + CHECK_NOT(a.is_compressed()); + CHECK(a.get(0) == 10); + CHECK(a.get(1) == 11); + CHECK(a.get(2) == 12); + a.destroy(); + a1.destroy(); +} + +TEST(Test_ArrayInt_compress_decompress_needed) +{ + ArrayInteger a(Allocator::get_default()); + ArrayInteger a1(Allocator::get_default()); + a.create(); + a.add(10); + a.add(5); + a.add(5); + // uncompressed requires 3 x 4 bits, compressed takes 2 x 5 bits + 3 x 2 bits + // with 8 byte alignment this is both 16 bytes. + CHECK_NOT(a.try_compress(a1)); + CHECK_NOT(a.is_compressed()); + a.add(10); + a.add(15); + // uncompressed is 5x4 bits, compressed is 3x5 bits + 5x2 bits + // with 8 byte alignment this is both 16 bytes + CHECK_NOT(a.try_compress(a1)); + CHECK_NOT(a.is_compressed()); + a.add(10); + a.add(15); + a.add(10); + a.add(15); + // uncompressed is 9x4 bits, compressed is 3x5 bits + 9x2 bits + // with 8 byte alignment this is both 16 bytes + CHECK_NOT(a.try_compress(a1)); + CHECK_NOT(a.is_compressed()); + a.add(-1); + // the addition of -1 forces the array from unsigned to signed form + // changing from 4 bits per element to 8 bits. + // (1,2,4 bit elements are unsigned, larger elements are signed) + // uncompressed is 10x8 bits, compressed is 3x5 bits + 10x2 bits + // with alignment, this is 24 bytes uncompressed and 16 bytes compressed + CHECK(a.try_compress(a1)); + CHECK_NOT(a.is_compressed()); + CHECK(a.get(0) == 10); + CHECK(a.get(1) == 5); + CHECK(a.get(2) == 5); + CHECK(a.get(3) == 10); + CHECK(a.get(4) == 15); + CHECK(a1.is_compressed()); + auto v = a1.get(0); + CHECK(v == a.get(0)); + CHECK(a1.get(1) == a.get(1)); + CHECK(a1.get(2) == a.get(2)); + CHECK(a1.get(3) == a.get(3)); + CHECK(a1.get(4) == a.get(4)); + a.destroy(); + a1.destroy(); +} +#endif + +TEST(Test_ArrayInt_get_all) +{ + std::vector vs = {3656152302, 2814021986, 4195757081, 3272933168, 3466127978, 2777289082, + 4247467684, 3825361855, 2496524560, 4052938301, 3765455798, 2527633011, + 3448934593, 3699340964, 4057735040, 3294068800}; + ArrayInteger a(Allocator::get_default()); + ArrayInteger a1(Allocator::get_default()); + a.create(); + for (const auto i : vs) + a.add(i); + CHECK(a.try_compress(a1)); + CHECK(a1.is_compressed()); + auto res = a1.get_all(0, a1.size()); + CHECK(res == vs); + a.destroy(); + a1.destroy(); +} + +TEST(Test_array_same_size_less_bits) +{ + ArrayInteger a(Allocator::get_default()); + ArrayInteger a1(Allocator::get_default()); + a.create(); + a.add(1000000); + a.add(1000000); + a.add(1000000); + CHECK(a.try_compress(a1)); + CHECK_NOT(a.is_compressed()); + CHECK(a.get_any(0) == 1000000); + CHECK(a.get_any(1) == 1000000); + CHECK(a.get_any(2) == 1000000); + CHECK(a1.is_compressed()); + CHECK(a1.get_any(0) == 1000000); + CHECK(a1.get_any(1) == 1000000); + CHECK(a1.get_any(2) == 1000000); + a.destroy(); + a1.destroy(); +} + +TEST(Test_ArrayInt_negative_nums) +{ + ArrayInteger a(Allocator::get_default()); + ArrayInteger a1(Allocator::get_default()); + a.create(); + a.add(-1000000); + a.add(0); + a.add(1000000); + CHECK_NOT(a.is_compressed()); + CHECK(a.try_compress(a1)); + a1.destroy(); + CHECK(a.get(0) == -1000000); + CHECK(a.get(1) == 0); + CHECK(a.get(2) == 1000000); + a.add(-1000000); + a.add(-1000000); + CHECK(a.try_compress(a1)); + CHECK_NOT(a.is_compressed()); + CHECK(a.get(0) == -1000000); + CHECK(a.get(1) == 0); + CHECK(a.get(2) == 1000000); + CHECK(a.get(3) == -1000000); + CHECK(a.get(4) == -1000000); + a.add(0); + a1.destroy(); + CHECK(a.try_compress(a1)); + CHECK_NOT(a.is_compressed()); + CHECK(a1.is_compressed()); + + CHECK(a1.get(0) == a.get(0)); + CHECK(a1.get(1) == a.get(1)); + CHECK(a1.get(2) == a.get(2)); + CHECK(a1.get(3) == a.get(3)); + CHECK(a1.get(4) == a.get(4)); + CHECK(a1.get(5) == a.get(5)); + + a.add(1000000); + a1.destroy(); // this decodes the array + CHECK(a.try_compress(a1)); + CHECK_NOT(a.is_compressed()); + CHECK(a1.is_compressed()); + CHECK(a1.get(0) == a.get(0)); + CHECK(a1.get(1) == a.get(1)); + CHECK(a1.get(2) == a.get(2)); + CHECK(a1.try_decompress()); + a.add(-1000000); + a1.destroy(); + CHECK(a.try_compress(a1)); + CHECK_NOT(a.is_compressed()); + CHECK(a1.is_compressed()); + CHECK(a1.get(0) == a.get(0)); + CHECK(a1.get(1) == a.get(1)); + CHECK(a1.get(2) == a.get(2)); + a.add(0); + a1.destroy(); + CHECK(a.try_compress(a1)); + CHECK_NOT(a.is_compressed()); + CHECK(a1.is_compressed()); + CHECK(a1.get(0) == a.get(0)); + CHECK(a1.get(1) == a.get(1)); + CHECK(a1.get(2) == a.get(2)); + a.add(1000000); + a1.destroy(); + CHECK(a.try_compress(a1)); + CHECK_NOT(a.is_compressed()); + CHECK(a1.is_compressed()); + CHECK(a.size() == 10); + CHECK(a.size() == a1.size()); + CHECK(a1.is_compressed()); + CHECK(a1.get(0) == a.get(0)); + CHECK(a1.get(1) == a.get(1)); + CHECK(a1.get(2) == a.get(2)); + CHECK(a1.get(3) == a.get(3)); + CHECK(a1.get(4) == a.get(4)); + CHECK(a1.get(5) == a.get(5)); + CHECK(a1.get(6) == a.get(6)); + CHECK(a1.get(7) == a.get(7)); + CHECK(a1.get(8) == a.get(8)); + a.destroy(); + a1.destroy(); +} + +TEST(Test_ArrayInt_compress_data) +{ + ArrayInteger a(Allocator::get_default()); + ArrayInteger a1(Allocator::get_default()); + + a.create(); + a.add(-4427957085475570907); + a.add(-4427957085475570907); + a.add(-4427957085475570907); + a.add(-4427957085475570907); + a.add(4); + a.add(5); + a.add(6); + a.add(7); + a.add(8); + a.add(4); + a.try_compress(a1); + bool ok = a1.is_compressed(); + CHECK(ok); + CHECK(a1.is_compressed()); + CHECK(a1.is_attached()); + CHECK(a.is_attached()); + for (size_t i = 0; i < a.size(); ++i) { + auto v0 = a1.get(i); + auto v1 = a.get(i); + CHECK(v0 == v1); + } + a.destroy(); + a1.destroy(); + + a.create(); + a.add(-4427957085475570907); + a.add(-4427957085475570907); + a.add(-4427957085475570907); + a.add(-4427957085475570907); + a.try_compress(a1); + for (size_t i = 0; i < a.size(); ++i) + CHECK(a1.get(i) == a.get(i)); + a.destroy(); + a1.destroy(); + + a.create(); + + a.add(16388); + a.add(409); + a.add(16388); + a.add(16388); + a.add(409); + a.add(16388); + CHECK(a.size() == 6); + // Current: [16388:16, 409:16, 16388:16, 16388:16, 409:16, 16388:16], space needed: 6*16 bits = 96 bits + + // header + // compress the array is a good option. + CHECK(a.try_compress(a1)); + CHECK(a1.is_compressed()); + // Compressed: [409:16, 16388:16][1:1,0:1,1:1,1:1,0:1,1:1], space needed: 2*16 bits + 6 * 1 bit = 38 bits + + // header + CHECK(a1.size() == a.size()); + CHECK(a1.get(0) == a.get(0)); + CHECK(a1.get(1) == a.get(1)); + CHECK(a1.get(2) == a.get(2)); + CHECK(a1.get(3) == a.get(3)); + CHECK(a1.get(4) == a.get(4)); + CHECK(a1.get(5) == a.get(5)); + // decompress + CHECK(a1.try_decompress()); + a.add(20); + // compress again, it should be a viable option + a1.destroy(); + CHECK(a.try_compress(a1)); + CHECK(a1.is_compressed()); + CHECK(a1.size() == 7); + CHECK(a1.get(0) == a.get(0)); + CHECK(a1.get(1) == a.get(1)); + CHECK(a1.get(2) == a.get(2)); + CHECK(a1.get(3) == a.get(3)); + CHECK(a1.get(4) == a.get(4)); + CHECK(a1.get(5) == a.get(5)); + CHECK(a1.get(6) == a.get(6)); + CHECK(a1.try_decompress()); + CHECK_NOT(a1.is_compressed()); + CHECK(a1.get(0) == a.get(0)); + CHECK(a1.get(1) == a.get(1)); + CHECK(a1.get(2) == a.get(2)); + CHECK(a1.get(3) == a.get(3)); + CHECK(a1.get(4) == a.get(4)); + CHECK(a1.get(5) == a.get(5)); + CHECK(a1.get(6) == a.get(6)); + a.destroy(); + a1.destroy(); +} + +TEST(Test_ArrayInt_compress_data_init_from_mem) +{ + ArrayInteger a(Allocator::get_default()); + ArrayInteger a1(Allocator::get_default()); + a.create(); + a.add(16388); + a.add(409); + a.add(16388); + a.add(16388); + a.add(409); + a.add(16388); + const auto sz = a.size(); + CHECK(sz == 6); + // Current: [16388:16, 409:16, 16388:16, 16388:16, 409:16, 16388:16], + // space needed: 6*16 bits = 96 bits + header + // compress the array is a good option (it should already be compressed). + CHECK(a.try_compress(a1)); + CHECK(a1.is_compressed()); + // Array should be in compressed form now + auto mem = a1.get_mem(); + ArrayInteger a2(Allocator::get_default()); + a2.init_from_mem(mem); // initialise a1 with a + // check a2 + CHECK(a2.is_compressed()); + const auto sz2 = a2.size(); + CHECK(sz2 == 6); + CHECK(a2.get(0) == 16388); + CHECK(a2.get(1) == 409); + CHECK(a2.get(2) == 16388); + CHECK(a2.get(3) == 16388); + CHECK(a2.get(4) == 409); + CHECK(a2.get(5) == 16388); + // decompress a2 and compresses again + CHECK(a2.is_compressed()); + CHECK(a2.try_decompress()); + CHECK_NOT(a2.is_compressed()); + a2.add(20); + CHECK(a2.try_compress(a1)); + CHECK(a1.is_compressed()); + CHECK(a1.size() == 7); + CHECK(a1.get(0) == 16388); + CHECK(a1.get(1) == 409); + CHECK(a1.get(2) == 16388); + CHECK(a1.get(3) == 16388); + CHECK(a1.get(4) == 409); + CHECK(a1.get(5) == 16388); + CHECK(a1.get(6) == 20); + CHECK(a1.try_decompress()); + a.destroy(); + a1.destroy(); + a2.destroy(); + CHECK_NOT(a.is_attached()); + CHECK_NOT(a1.is_attached()); + CHECK_NOT(a2.is_attached()); +} TEST(ArrayIntNull_SetNull) { @@ -244,3 +1814,114 @@ TEST(ArrayRef_Basic) a.destroy(); } + +TEST_TYPES(ArrayInt_comparison, Equal, NotEqual, Less, Greater) +{ + using Cond = TEST_TYPE; + ArrayInteger a(Allocator::get_default()); + ArrayInteger a1(Allocator::get_default()); + a.create(); + + // check first positive values < 32 bits + constexpr auto N = 300; + constexpr auto M = 3; + for (size_t i = 0; i < N; i++) + for (size_t j = 0; j < M; ++j) + a.add(i); + + auto sz = a.size(); + CHECK(sz == M * N); + + CHECK(a.try_compress(a1)); + CHECK(a1.is_compressed()); + + // Array should be in compressed form now and values should match + for (size_t i = 0; i < sz; ++i) + CHECK(a.get(i) == a1.get(i)); + + for (int i = (int)(sz)-1; i >= 0; --i) { + QueryStateFindFirst m_first1, m_first2; + CHECK(a.find(i, 0, sz, &m_first1) == a1.find(i, 0, sz, &m_first2)); + CHECK(m_first1.m_state == m_first2.m_state); + } + + IntegerColumn accu1(Allocator::get_default()); + IntegerColumn accu2(Allocator::get_default()); + accu1.create(); + accu2.create(); + for (int i = (int)(sz)-1; i >= 0; --i) { + QueryStateFindAll m1{accu1}, m2{accu2}; + CHECK(a.find(i, 0, sz, &m1) == a1.find(i, 0, sz, &m2)); + CHECK(m1.match_count() == m2.match_count()); + } + + // check negative numbers now. + a1.destroy(); + a.clear(); + + for (size_t i = 0; i < N; i++) + for (size_t j = 0; j < M; ++j) + a.add(-i); + + sz = a.size(); + CHECK(sz == M * N); + + CHECK(a.try_compress(a1)); + CHECK(a1.is_compressed()); + + // Array should be in compressed form now and values should match + for (size_t i = 0; i < sz; ++i) + CHECK(a.get(i) == a1.get(i)); + + for (int64_t i = (int64_t)(sz)-1; i >= 0; --i) { + QueryStateFindFirst m_first1, m_first2; + CHECK(a.find(-i, 0, sz, &m_first1) == a1.find(-i, 0, sz, &m_first2)); + CHECK(m_first1.m_state == m_first2.m_state); + } + + accu1.clear(); + accu2.clear(); + for (int i = (int)(sz)-1; i >= 0; --i) { + QueryStateFindAll m1{accu1}, m2{accu2}; + CHECK(a.find(-i, 0, sz, &m1) == a1.find(-i, 0, sz, &m2)); + CHECK(m1.match_count() == m2.match_count()); + } + + accu1.destroy(); + accu2.destroy(); + a.destroy(); + a1.destroy(); + +#if REALM_COMPRESS + a.create(); + std::random_device dev; + std::mt19937 rng(dev()); + const auto min_range_t = (size_t)std::numeric_limits::min(); + const auto max_range_t = (size_t)std::numeric_limits::max(); + std::uniform_int_distribution dist(min_range_t, max_range_t); + sz = 100; + for (size_t i = 0; i < sz; ++i) { + auto v = (int)dist(rng); + a.add(v); + } + a.try_compress(a1); + + for (size_t i = 0; i < sz; ++i) + CHECK(a.get(i) == a1.get(i)); + + CHECK(a1.is_compressed()); + for (size_t i = 0; i < sz; ++i) { + QueryStateFindFirst m_first1, m_first2; + CHECK(a.find(a.get(i), 0, sz, &m_first1) == a1.find(a1.get(i), 0, sz, &m_first2)); + CHECK(m_first1.m_state == m_first2.m_state); + if (m_first1.m_state != realm::not_found) + CHECK(a.get(m_first1.m_state) == a1.get(m_first2.m_state)); + } + + a.destroy(); + a1.destroy(); +#endif + + CHECK_NOT(a.is_attached()); + CHECK_NOT(a1.is_attached()); +} diff --git a/test/test_group.cpp b/test/test_group.cpp index 54cd141485b..651a582463c 100644 --- a/test/test_group.cpp +++ b/test/test_group.cpp @@ -2315,4 +2315,198 @@ TEST(Group_UniqueColumnKeys) CHECK_NOT_EQUAL(col_foo, col_bar); } +TEST(Group_ArrayCompression_Correctness) +{ + GROUP_TEST_PATH(path); + + // Create group with one list which maps to array_integer + Group to_disk; + TableRef table = to_disk.add_table("test"); + auto col_key = table->add_column_list(type_Int, "lint"); + auto obj = table->create_object(); + auto array = obj.get_list(col_key); + array.add(16388); + array.add(409); + array.add(16388); + array.add(16388); + array.add(409); + array.add(16388); + CHECK_EQUAL(array.size(), 6); + CHECK_EQUAL(array.get_any(0).get_int(), 16388); + CHECK_EQUAL(array.get_any(1).get_int(), 409); + CHECK_EQUAL(array.get_any(2).get_int(), 16388); + CHECK_EQUAL(array.get_any(3).get_int(), 16388); + CHECK_EQUAL(array.get_any(4).get_int(), 409); + CHECK_EQUAL(array.get_any(5).get_int(), 16388); + + // Serialize to disk (compression should happen when the proper leaf array is serialized to disk) + to_disk.write(path, crypt_key()); + +#ifdef REALM_DEBUG + to_disk.verify(); +#endif + + // Load the tables + Group from_disk(path, crypt_key()); + TableRef read_table = from_disk.get_table("test"); + auto col_key1 = read_table->get_column_key("lint"); + auto obj1 = read_table->get_object(0); + auto l1 = obj1.get_list(col_key1); + CHECK(l1.size() == array.size()); + CHECK(*read_table == *table); + for (size_t i = 0; i < l1.size(); ++i) { + CHECK_EQUAL(l1.get_any(i), array.get_any(i)); + } + +#ifdef REALM_DEBUG + from_disk.verify(); +#endif +} + +TEST(Group_ArrayCompression_Correctness_Negative) +{ + GROUP_TEST_PATH(path); + + // Create group with one list which maps to array_integer + Group to_disk; + TableRef table = to_disk.add_table("test"); + auto col_key = table->add_column_list(type_Int, "lint"); + auto obj = table->create_object(); + auto array = obj.get_list(col_key); + + array.add(-1); + array.add(-1); + array.add(-1); + array.add(-1); + array.add(std::numeric_limits::max()); + array.add(std::numeric_limits::max()); + + CHECK_EQUAL(array.size(), 6); + CHECK_EQUAL(array.get_any(0).get_int(), -1); + CHECK_EQUAL(array.get_any(1).get_int(), -1); + CHECK_EQUAL(array.get_any(2).get_int(), -1); + CHECK_EQUAL(array.get_any(3).get_int(), -1); + CHECK_EQUAL(array.get_any(4).get_int(), std::numeric_limits::max()); + CHECK_EQUAL(array.get_any(5).get_int(), std::numeric_limits::max()); + + // Serialize to disk (compression should happen when the proper leaf array is serialized to disk) + to_disk.write(path, crypt_key()); + +#ifdef REALM_DEBUG + to_disk.verify(); +#endif + + // Load the tables + Group from_disk(path, crypt_key()); + TableRef read_table = from_disk.get_table("test"); + auto col_key1 = read_table->get_column_key("lint"); + auto obj1 = read_table->get_object(0); + auto l1 = obj1.get_list(col_key1); + CHECK(l1.size() == array.size()); + CHECK(*read_table == *table); + for (size_t i = 0; i < l1.size(); ++i) { + CHECK_EQUAL(l1.get_any(i), array.get_any(i)); + } + +#ifdef REALM_DEBUG + from_disk.verify(); +#endif +} + +TEST(Group_ArrayCompression_Correctness_Funny_Values) +{ + GROUP_TEST_PATH(path); + + // Create group with one list which maps to array_integer + Group to_disk; + TableRef table = to_disk.add_table("test"); + auto col_key = table->add_column_list(type_Int, "lint"); + auto obj = table->create_object(); + auto array = obj.get_list(col_key); + + std::vector vs = {3656152302, 2814021986, 4195757081, 3272933168, 3466127978, 2777289082, + 4247467684, 3825361855, 2496524560, 4052938301, 3765455798, 2527633011, + 3448934593, 3699340964, 4057735040, 3294068800}; + + size_t ndx = 0; + for (const auto v : vs) { + array.add(v); + CHECK_EQUAL(v, array.get(ndx++)); + } + CHECK_EQUAL(array.size(), vs.size()); + + // Serialize to disk (compression should happen when the proper leaf array is serialized to disk) + to_disk.write(path, crypt_key()); + +#ifdef REALM_DEBUG + to_disk.verify(); +#endif + + // Load the tables + Group from_disk(path, crypt_key()); + TableRef read_table = from_disk.get_table("test"); + auto col_key1 = read_table->get_column_key("lint"); + auto obj1 = read_table->get_object(0); + auto l1 = obj1.get_list(col_key1); + CHECK(l1.size() == array.size()); + CHECK(*read_table == *table); + for (size_t i = 0; i < l1.size(); ++i) { + CHECK_EQUAL(l1.get_any(i), array.get_any(i)); + } + +#ifdef REALM_DEBUG + from_disk.verify(); +#endif +} + + +TEST(Group_ArrayCompression_Correctness_Random_Input) +{ + GROUP_TEST_PATH(path); + + // Create group with one list which maps to array_integer + Group to_disk; + TableRef table = to_disk.add_table("test"); + auto col_key = table->add_column_list(type_Int, "lint"); + auto obj = table->create_object(); + auto array = obj.get_list(col_key); + + std::random_device dev; + std::mt19937 rng(dev()); + constexpr auto min = std::numeric_limits::min(); + constexpr auto max = std::numeric_limits::max(); + std::uniform_int_distribution dist6(static_cast(min), + static_cast(max)); + for (size_t i = 0; i < 1000; ++i) { + const auto v = dist6(rng); + array.add(v); + const auto stored_v = array.get_any(i).get_int(); + CHECK_EQUAL(stored_v, v); + } + + // Serialize to disk (compression should happen when the proper leaf array is serialized to disk) + to_disk.write(path, crypt_key()); + +#ifdef REALM_DEBUG + to_disk.verify(); +#endif + + // Load the tables + Group from_disk(path, crypt_key()); + TableRef read_table = from_disk.get_table("test"); + auto col_key1 = read_table->get_column_key("lint"); + auto obj1 = read_table->get_object(0); + auto l1 = obj1.get_list(col_key1); + CHECK(l1.size() == array.size()); + CHECK(*read_table == *table); + for (size_t i = 0; i < l1.size(); ++i) { + CHECK_EQUAL(l1.get_any(i), array.get_any(i)); + } + +#ifdef REALM_DEBUG + from_disk.verify(); +#endif +} + + #endif // TEST_GROUP diff --git a/test/test_links.cpp b/test/test_links.cpp index be08d2c7392..7561364089b 100644 --- a/test/test_links.cpp +++ b/test/test_links.cpp @@ -1167,11 +1167,13 @@ TEST(Links_FormerMemLeakCase) auto col = origin->add_column(*target, "link"); origin->create_object().set(col, k); origin->create_object().set(col, k); + wt.get_group().verify(); wt.commit(); } { WriteTransaction wt(sg_w); TableRef target = wt.get_table("target"); + wt.get_group().verify(); target->begin()->remove(); wt.get_group().verify(); wt.commit(); diff --git a/test/test_list.cpp b/test/test_list.cpp index b29935981b1..d8e3f1fc1de 100644 --- a/test/test_list.cpp +++ b/test/test_list.cpp @@ -633,6 +633,41 @@ TEST(List_AggOps) test_lists_numeric_agg(test_context, sg, type_Decimal, Decimal128(realm::null()), true); } +TEST(Test_Write_List_Nested_In_Mixed) +{ + SHARED_GROUP_TEST_PATH(path); + std::string message; + DBOptions options; + options.logger = test_context.logger; + DBRef db = DB::create(make_in_realm_history(), path, options); + auto tr = db->start_write(); + auto table = tr->add_table("table"); + auto col_any = table->add_column(type_Mixed, "something"); + + Obj obj = table->create_object(); + obj.set_any(col_any, Mixed{20}); + tr->verify(); + tr->commit_and_continue_writing(); // commit simple mixed + tr->verify(); + + obj.set_collection(col_any, CollectionType::List); + auto list = obj.get_list_ptr(col_any); + list->add(Mixed{10}); + list->add(Mixed{11}); + tr->verify(); + tr->commit_and_continue_writing(); // commit nested list in mixed + tr->verify(); + + // spicy it up a little bit... + list->insert_collection(2, CollectionType::List); + list->insert_collection(3, CollectionType::List); + list->get_list(2)->add(Mixed{20}); + list->get_list(3)->add(Mixed{21}); + tr->commit_and_continue_writing(); + tr->verify(); + tr->close(); +} + TEST(List_Nested_InMixed) { SHARED_GROUP_TEST_PATH(path); diff --git a/test/test_query.cpp b/test/test_query.cpp index 6df86fb2b1f..c2d6b196b32 100644 --- a/test/test_query.cpp +++ b/test/test_query.cpp @@ -5772,4 +5772,38 @@ TEST(Query_NestedLinkCount) CHECK_EQUAL(q.count(), 3); } +TEST_TYPES(Query_IntCompressed, Equal, NotEqual, Less, LessEqual, Greater, GreaterEqual) +{ + TEST_TYPE c; + SHARED_GROUP_TEST_PATH(path); + int ints[] = {-120, -111, -70, -61, -55, -45, -22, -15, -3, 2, 7, 18, 25, 33, 55, 56, 66, 78, 104, 125}; + std::vector values; + for (int j = 1; j < 21; j++) { + for (int i = 0; i < j; i++) { + values.push_back(ints[i]); + } + } + + auto db = DB::create(path); + auto wt = db->start_write(); + auto t = wt->add_table("table"); + auto col = t->add_column(type_Int, "id"); + for (auto val : values) { + t->create_object().set(col, val); + } + wt->commit_and_continue_as_read(); + + for (int val : {-1000, -125, 2, 3, 6, 126, 1000}) { + size_t num_matches = 0; + for (auto i : values) { + if (c(i, val)) + num_matches++; + } + + char query_str[20]; + snprintf(query_str, 20, "id %s %d", c.description().c_str(), val); + CHECK_EQUAL(t->query(query_str).count(), num_matches); + } +} + #endif // TEST_QUERY diff --git a/test/test_shared.cpp b/test/test_shared.cpp index 78ede3b4a0c..85c3de4f8ab 100644 --- a/test/test_shared.cpp +++ b/test/test_shared.cpp @@ -95,34 +95,32 @@ using unit_test::TestContext; // `experiments/testcase.cpp` and then run `sh build.sh // check-testcase` (or one of its friends) from the command line. -#if 0 + // Sorting benchmark -ONLY(Query_QuickSort2) +TEST(Query_QuickSort2) { Random random(random_int()); // Seed from slow global generator // Triggers QuickSort because range > len Table ttt; - auto ints = ttt.add_column(type_Int, "1"); + // auto ints = ttt.add_column(type_Int, "1"); auto strings = ttt.add_column(type_String, "2"); for (size_t t = 0; t < 10000; t++) { Obj o = ttt.create_object(); - // o.set(ints, random.draw_int_mod(1100)); + // o.set(ints, random.draw_int_mod(1100)); o.set(strings, "a"); } Query q = ttt.where(); - std::cerr << "GO"; - for (size_t t = 0; t < 1000; t++) { TableView tv = q.find_all(); tv.sort(strings); - // tv.ints(strings); + // tv.ints(strings); } } -#endif + #if REALM_WINDOWS namespace { diff --git a/test/test_table.cpp b/test/test_table.cpp index 80df42e1824..52e06fb2659 100644 --- a/test/test_table.cpp +++ b/test/test_table.cpp @@ -46,7 +46,7 @@ using namespace std::chrono; #include "test_types_helper.hpp" // #include -// #define PERFORMACE_TESTING +// #define PERFORMANCE_TESTING using namespace realm; using namespace realm::util; @@ -2954,9 +2954,122 @@ NONCONCURRENT_TEST(Table_QuickSort2) std::cout << " time: " << duration_cast(t2 - t1).count() / nb_reps << " ns/rep" << std::endl; } +NONCONCURRENT_TEST(Table_object_timestamp) +{ +#if !defined(REALM_DEBUG) && defined(PERFORMANCE_TESTING) + int nb_rows = 10'000'000; + int num_runs = 100; +#else + int nb_rows = 100'000; + int num_runs = 1; +#endif + SHARED_GROUP_TEST_PATH(path); + std::unique_ptr hist(make_in_realm_history()); + DBRef sg = DB::create(*hist, path, DBOptions(crypt_key())); + ColKey c0; + + CALLGRIND_START_INSTRUMENTATION; + + std::cout << nb_rows << " rows - timestamps" << std::endl; + + { + WriteTransaction wt(sg); + auto table = wt.add_table("test"); + + c0 = table->add_column(type_Timestamp, "ts"); + + + auto t1 = steady_clock::now(); + + for (int i = 0; i < nb_rows; i++) { + Timestamp t(i, i); + table->create_object(ObjKey(i)).set_all(t); + } + + auto t2 = steady_clock::now(); + std::cout << " insertion time: " << duration_cast(t2 - t1).count() / nb_rows << " ns/key" + << std::endl; + + CHECK_EQUAL(table->size(), nb_rows); + wt.commit(); + } + { + ReadTransaction rt(sg); + auto table = rt.get_table("test"); + + auto t1 = steady_clock::now(); + Timestamp t(nb_rows / 2, nb_rows / 2); + for (int j = 0; j < num_runs; ++j) { + auto result = table->where().equal(c0, t).find_all(); + } + + auto t2 = steady_clock::now(); + + std::cout << " find all : " << duration_cast(t2 - t1).count() / num_runs << " ms" + << std::endl; + } +} + +NONCONCURRENT_TEST(Table_object_search) +{ +#if !defined(REALM_DEBUG) && defined(PERFORMANCE_TESTING) + int nb_rows = 10'000'000; + int num_runs = 100; +#else + int nb_rows = 100'000; + int num_runs = 1; +#endif + SHARED_GROUP_TEST_PATH(path); + std::unique_ptr hist(make_in_realm_history()); + DBRef sg = DB::create(*hist, path, DBOptions(crypt_key())); + ColKey c0; + ColKey c1; + + CALLGRIND_START_INSTRUMENTATION; + + std::cout << nb_rows << " rows - sequential" << std::endl; + + { + WriteTransaction wt(sg); + auto table = wt.add_table("test"); + + c0 = table->add_column(type_Int, "int1"); + c1 = table->add_column(type_Int, "int2", true); + + + auto t1 = steady_clock::now(); + + for (int i = 0; i < nb_rows; i++) { + table->create_object(ObjKey(i)).set_all(i << 1, i << 2); + } + + auto t2 = steady_clock::now(); + std::cout << " insertion time: " << duration_cast(t2 - t1).count() / nb_rows << " ns/key" + << std::endl; + + CHECK_EQUAL(table->size(), nb_rows); + wt.commit(); + } + { + ReadTransaction rt(sg); + auto table = rt.get_table("test"); + + auto t1 = steady_clock::now(); + + for (int j = 0; j < num_runs; ++j) { + auto result = table->find_all_int(c0, nb_rows / 2); + } + + auto t2 = steady_clock::now(); + + std::cout << " find all : " << duration_cast(t2 - t1).count() / num_runs << " ms" + << std::endl; + } +} + NONCONCURRENT_TEST(Table_object_sequential) { -#ifdef PERFORMACE_TESTING +#if !defined(REALM_DEBUG) && defined(PERFORMANCE_TESTING) int nb_rows = 10'000'000; int num_runs = 1; #else @@ -3106,7 +3219,7 @@ NONCONCURRENT_TEST(Table_object_sequential) NONCONCURRENT_TEST(Table_object_seq_rnd) { -#ifdef PERFORMACE_TESTING +#if !defined(REALM_DEBUG) && defined(PERFORMANCE_TESTING) size_t rows = 1'000'000; int runs = 100; // runs for building scenario #else @@ -3149,7 +3262,7 @@ NONCONCURRENT_TEST(Table_object_seq_rnd) } // scenario established! int nb_rows = int(key_values.size()); -#ifdef PERFORMACE_TESTING +#if !defined(REALM_DEBUG) && defined(PERFORMANCE_TESTING) int num_runs = 10; // runs for timing access #else int num_runs = 1; // runs for timing access @@ -3221,7 +3334,7 @@ NONCONCURRENT_TEST(Table_object_seq_rnd) NONCONCURRENT_TEST(Table_object_random) { -#ifdef PERFORMACE_TESTING +#if !defined(REALM_DEBUG) && defined(PERFORMANCE_TESTING) int nb_rows = 1'000'000; int num_runs = 10; #else diff --git a/test/test_unresolved_links.cpp b/test/test_unresolved_links.cpp index adaf6981130..60f50ee3488 100644 --- a/test/test_unresolved_links.cpp +++ b/test/test_unresolved_links.cpp @@ -837,35 +837,6 @@ TEST(Links_ManyObjects) tr->commit(); } -TEST(Unresolved_PerformanceLinks) -{ - constexpr int nb_objects = 1000; - using namespace std::chrono; - - SHARED_GROUP_TEST_PATH(path); - auto hist = make_in_realm_history(); - DBRef db = DB::create(*hist, path); - - auto tr = db->start_write(); - auto table = tr->add_table_with_primary_key("table", type_Int, "id"); - auto origin = tr->add_table("origin"); - auto col = origin->add_column(*table, "link"); - auto key = table->get_objkey_from_primary_key(1); - for (int i = 0; i < nb_objects; i++) { - origin->create_object().set(col, key); - } - tr->commit_and_continue_as_read(); - tr->promote_to_write(); - auto t1 = steady_clock::now(); - table->create_object_with_primary_key(1); - auto t2 = steady_clock::now(); - tr->commit_and_continue_as_read(); - CHECK(t2 > t1); - // std::cout << "Time: " << duration_cast(t2 - t1).count() << " us" << std::endl; - tr->promote_to_write(); - tr->verify(); -} - TEST(Unresolved_PerformanceLinkList) { constexpr int nb_objects = 1000; @@ -889,6 +860,7 @@ TEST(Unresolved_PerformanceLinkList) ll.add(key3); } tr->commit_and_continue_as_read(); + // compresses tr->promote_to_write(); auto t1 = steady_clock::now(); table->create_object_with_primary_key(1); @@ -897,7 +869,6 @@ TEST(Unresolved_PerformanceLinkList) auto t2 = steady_clock::now(); tr->commit_and_continue_as_read(); CHECK(t2 > t1); - // std::cout << "Time: " << duration_cast(t2 - t1).count() << " us" << std::endl; tr->promote_to_write(); tr->verify(); }