diff --git a/src/realm/CMakeLists.txt b/src/realm/CMakeLists.txt
index 18583f3549a..5a67cdabc15 100644
--- a/src/realm/CMakeLists.txt
+++ b/src/realm/CMakeLists.txt
@@ -62,6 +62,8 @@ set(REALM_SOURCES
     table.cpp
     table_ref.cpp
     obj_list.cpp
+    string_interner.cpp
+    string_compressor.cpp
     object_id.cpp
     table_view.cpp
     tokenizer.cpp
@@ -178,6 +180,8 @@ set(REALM_INSTALL_HEADERS
     null.hpp
     obj.hpp
     obj_list.hpp
+    string_interner.hpp
+    string_compressor.hpp
     object_id.hpp
     path.hpp
     owned_data.hpp
diff --git a/src/realm/array.cpp b/src/realm/array.cpp
index be70388bb2b..b95d081f4d5 100644
--- a/src/realm/array.cpp
+++ b/src/realm/array.cpp
@@ -294,7 +294,7 @@ void Array::set_type(Type type)
     set_hasrefs_in_header(init_has_refs, header);
 }
 
-void Array::destroy_children(size_t offset) noexcept
+void Array::destroy_children(size_t offset, bool ro_only) noexcept
 {
     for (size_t i = offset; i != m_size; ++i) {
         int64_t value = get(i);
@@ -310,7 +310,7 @@ void Array::destroy_children(size_t offset) noexcept
             continue;
 
         ref_type ref = to_ref(value);
-        destroy_deep(ref, m_alloc);
+        destroy_deep(ref, m_alloc, ro_only);
     }
 }
 
@@ -607,6 +607,14 @@ void Array::do_ensure_minimum_width(int_fast64_t value)
     }
 }
 
+size_t Array::size() const noexcept
+{
+    // in case the array is in compressed format. Never read directly
+    // from the header the size, since it will result very likely in a cache miss.
+    // For compressed arrays m_size should always be kept updated, due to init_from_mem
+    return m_size;
+}
+
 bool Array::compress_array(Array& arr) const
 {
     if (m_integer_compressor.get_encoding() == NodeHeader::Encoding::WTypBits) {
diff --git a/src/realm/array.hpp b/src/realm/array.hpp
index 47984bfe959..0611068bd12 100644
--- a/src/realm/array.hpp
+++ b/src/realm/array.hpp
@@ -117,7 +117,7 @@ class Array : public Node, public ArrayParent {
     /// pointer.
     void init_from_mem(MemRef) noexcept;
 
-    /// Same as `init_from_ref(get_ref_from_parent())`.
+    /// Same as `init_from_ref(ref_from_parent())`.
     void init_from_parent() noexcept
     {
         ref_type ref = get_ref_from_parent();
@@ -210,6 +210,8 @@ class Array : public Node, public ArrayParent {
         update_width_cache_from_header();
     }
 
+    size_t size() const noexcept;
+
     bool is_empty() const noexcept
     {
         return size() == 0;
@@ -362,7 +364,8 @@ class Array : public Node, public ArrayParent {
     /// state (as if calling detach()), then free the allocated memory. If this
     /// accessor is already in the detached state, this function has no effect
     /// (idempotency).
-    void destroy_deep() noexcept;
+    /// If 'ro_only', only free space in read-only memory (the file)
+    void destroy_deep(bool ro_only = false) noexcept;
 
     /// check if the array is encoded (in B format)
     inline bool is_compressed() const;
@@ -377,13 +380,13 @@ class Array : public Node, public ArrayParent {
     bool try_decompress();
 
     /// Shorthand for `destroy_deep(MemRef(ref, alloc), alloc)`.
-    static void destroy_deep(ref_type ref, Allocator& alloc) noexcept;
+    static void destroy_deep(ref_type ref, Allocator& alloc, bool ro_only = false) noexcept;
 
     /// Destroy the specified array node and all of its children, recursively.
     ///
     /// This is done by freeing the specified array node after calling
     /// destroy_deep() for every contained 'ref' element.
-    static void destroy_deep(MemRef, Allocator&) noexcept;
+    static void destroy_deep(MemRef, Allocator&, bool ro_only = false) noexcept;
 
     // Clone deep
     static MemRef clone(MemRef, Allocator& from_alloc, Allocator& target_alloc);
@@ -546,7 +549,7 @@ class Array : public Node, public ArrayParent {
     // Overriding method in ArrayParent
     ref_type get_child_ref(size_t) const noexcept override;
 
-    void destroy_children(size_t offset = 0) noexcept;
+    void destroy_children(size_t offset = 0, bool ro_only = false) noexcept;
 
 protected:
     // Getters and Setters for adaptive-packed arrays
@@ -918,16 +921,17 @@ inline void Array::set_context_flag(bool value) noexcept
     }
 }
 
-inline void Array::destroy_deep() noexcept
+inline void Array::destroy_deep(bool ro_only) noexcept
 {
     if (!is_attached())
         return;
 
     if (m_has_refs)
-        destroy_children();
+        destroy_children(0, ro_only);
 
     char* header = get_header_from_data(m_data);
-    m_alloc.free_(m_ref, header);
+    if (!ro_only || is_read_only())
+        m_alloc.free_(m_ref, header);
     m_data = nullptr;
 }
 
@@ -970,20 +974,21 @@ inline void Array::clear_and_destroy_children()
     truncate_and_destroy_children(0);
 }
 
-inline void Array::destroy_deep(ref_type ref, Allocator& alloc) noexcept
+inline void Array::destroy_deep(ref_type ref, Allocator& alloc, bool ro_only) noexcept
 {
-    destroy_deep(MemRef(ref, alloc), alloc);
+    destroy_deep(MemRef(ref, alloc), alloc, ro_only);
 }
 
-inline void Array::destroy_deep(MemRef mem, Allocator& alloc) noexcept
+inline void Array::destroy_deep(MemRef mem, Allocator& alloc, bool ro_only) noexcept
 {
     if (!get_hasrefs_from_header(mem.get_addr())) {
-        alloc.free_(mem);
+        if (!ro_only || alloc.is_read_only(mem.get_ref()))
+            alloc.free_(mem);
         return;
     }
     Array array(alloc);
     array.init_from_mem(mem);
-    array.destroy_deep();
+    array.destroy_deep(ro_only);
 }
 
 
diff --git a/src/realm/array_integer.cpp b/src/realm/array_integer.cpp
index f86871c3225..b39ade6e940 100644
--- a/src/realm/array_integer.cpp
+++ b/src/realm/array_integer.cpp
@@ -22,6 +22,8 @@
 #include <realm/impl/destroy_guard.hpp>
 #include <realm/column_integer.hpp>
 
+#include <iostream>
+
 using namespace realm;
 
 ArrayInteger::ArrayInteger(Allocator& allocator) noexcept
diff --git a/src/realm/array_integer.hpp b/src/realm/array_integer.hpp
index b8739414091..22d729e2e29 100644
--- a/src/realm/array_integer.hpp
+++ b/src/realm/array_integer.hpp
@@ -174,6 +174,7 @@ inline ArrayIntNull::~ArrayIntNull() noexcept {}
 
 inline size_t ArrayIntNull::size() const noexcept
 {
+    // this cannot be right, what if size is 0
     return Array::size() - 1;
 }
 
diff --git a/src/realm/array_string.cpp b/src/realm/array_string.cpp
index 636a60a2865..cb2aa6fb3f5 100644
--- a/src/realm/array_string.cpp
+++ b/src/realm/array_string.cpp
@@ -17,6 +17,7 @@
  **************************************************************************/
 
 #include <realm/array_string.hpp>
+#include <realm/impl/array_writer.hpp>
 #include <realm/spec.hpp>
 #include <realm/mixed.hpp>
 
@@ -52,14 +53,24 @@ void ArrayString::init_from_mem(MemRef mem) noexcept
         else {
             auto arr = new (&m_storage) Array(m_alloc);
             arr->init_from_mem(mem);
-            m_string_enum_values = std::make_unique<ArrayString>(m_alloc);
-            ArrayParent* p;
-            REALM_ASSERT(m_spec != nullptr);
-            REALM_ASSERT(m_col_ndx != realm::npos);
-            ref_type r = m_spec->get_enumkeys_ref(m_col_ndx, p);
-            m_string_enum_values->init_from_ref(r);
-            m_string_enum_values->set_parent(p, m_col_ndx);
-            m_type = Type::enum_strings;
+            // The context flag is used to indicate interned strings vs old enum strings
+            // (in conjunction with has_refs() == false)
+            if (arr->get_context_flag_from_header(arr->get_header())) {
+                // init for new interned strings (replacing old enum strings)
+                m_type = Type::interned_strings;
+                // consider if we want this invariant: REALM_ASSERT_DEBUG(m_string_interner);
+            }
+            else {
+                // init for old enum strings
+                m_string_enum_values = std::make_unique<ArrayString>(m_alloc);
+                ArrayParent* p;
+                REALM_ASSERT(m_spec != nullptr);
+                REALM_ASSERT(m_col_ndx != realm::npos);
+                ref_type r = m_spec->get_enumkeys_ref(m_col_ndx, p);
+                m_string_enum_values->init_from_ref(r);
+                m_string_enum_values->set_parent(p, m_col_ndx);
+                m_type = Type::enum_strings;
+            }
         }
     }
     else {
@@ -111,6 +122,7 @@ size_t ArrayString::size() const
         case Type::big_strings:
             return static_cast<ArrayBigBlobs*>(m_arr)->size();
         case Type::enum_strings:
+        case Type::interned_strings:
             return static_cast<Array*>(m_arr)->size();
     }
     return {};
@@ -128,7 +140,8 @@ void ArrayString::add(StringData value)
         case Type::big_strings:
             static_cast<ArrayBigBlobs*>(m_arr)->add_string(value);
             break;
-        case Type::enum_strings: {
+        case Type::enum_strings:
+        case Type::interned_strings: {
             auto a = static_cast<Array*>(m_arr);
             size_t ndx = a->size();
             a->add(0);
@@ -150,6 +163,11 @@ void ArrayString::set(size_t ndx, StringData value)
         case Type::big_strings:
             static_cast<ArrayBigBlobs*>(m_arr)->set_string(ndx, value);
             break;
+        case Type::interned_strings: {
+            auto id = m_string_interner->intern(value);
+            static_cast<Array*>(m_arr)->set(ndx, id);
+            break;
+        }
         case Type::enum_strings: {
             size_t sz = m_string_enum_values->size();
             size_t res = m_string_enum_values->find_first(value, 0, sz);
@@ -178,6 +196,12 @@ void ArrayString::insert(size_t ndx, StringData value)
         case Type::enum_strings: {
             static_cast<Array*>(m_arr)->insert(ndx, 0);
             set(ndx, value);
+            break;
+        }
+        case Type::interned_strings: {
+            static_cast<Array*>(m_arr)->insert(ndx, 0);
+            set(ndx, value);
+            break;
         }
     }
 }
@@ -195,6 +219,10 @@ StringData ArrayString::get(size_t ndx) const
             size_t index = size_t(static_cast<Array*>(m_arr)->get(ndx));
             return m_string_enum_values->get(index);
         }
+        case Type::interned_strings: {
+            size_t id = size_t(static_cast<Array*>(m_arr)->get(ndx));
+            return m_string_interner->get(id);
+        }
     }
     return {};
 }
@@ -212,6 +240,10 @@ StringData ArrayString::get_legacy(size_t ndx) const
             size_t index = size_t(static_cast<Array*>(m_arr)->get(ndx));
             return m_string_enum_values->get(index);
         }
+        case Type::interned_strings: {
+            size_t id = size_t(static_cast<Array*>(m_arr)->get(ndx));
+            return m_string_interner->get(id);
+        }
     }
     return {};
 }
@@ -231,8 +263,12 @@ bool ArrayString::is_null(size_t ndx) const
         case Type::big_strings:
             return static_cast<ArrayBigBlobs*>(m_arr)->is_null(ndx);
         case Type::enum_strings: {
-            size_t index = size_t(static_cast<Array*>(m_arr)->get(ndx));
-            return m_string_enum_values->is_null(index);
+            size_t id = size_t(static_cast<Array*>(m_arr)->get(ndx));
+            return m_string_enum_values->is_null(id);
+        }
+        case Type::interned_strings: {
+            size_t id = size_t(static_cast<Array*>(m_arr)->get(ndx));
+            return id == 0;
         }
     }
     return {};
@@ -250,6 +286,7 @@ void ArrayString::erase(size_t ndx)
         case Type::big_strings:
             static_cast<ArrayBigBlobs*>(m_arr)->erase(ndx);
             break;
+        case Type::interned_strings:
         case Type::enum_strings:
             static_cast<Array*>(m_arr)->erase(ndx);
             break;
@@ -277,6 +314,9 @@ void ArrayString::move(ArrayString& dst, size_t ndx)
             // this operation will never be called for enumerated columns
             REALM_UNREACHABLE();
             break;
+        case Type::interned_strings:
+            m_arr->truncate(ndx);
+            break;
     }
 }
 
@@ -293,6 +333,7 @@ void ArrayString::clear()
             static_cast<ArrayBigBlobs*>(m_arr)->clear();
             break;
         case Type::enum_strings:
+        case Type::interned_strings:
             static_cast<Array*>(m_arr)->clear();
             break;
     }
@@ -321,6 +362,15 @@ size_t ArrayString::find_first(StringData value, size_t begin, size_t end) const
             }
             break;
         }
+        case Type::interned_strings: {
+            // we need a way to avoid this lookup for each leaf array. The lookup must appear
+            // higher up the call stack and passed down.
+            auto id = m_string_interner->lookup(value);
+            if (id) {
+                return static_cast<Array*>(m_arr)->find_first(*id, begin, end);
+            }
+            break;
+        }
     }
     return not_found;
 }
@@ -371,6 +421,9 @@ size_t ArrayString::lower_bound(StringData value)
             return lower_bound_string(static_cast<ArrayBigBlobs*>(m_arr), value);
         case Type::enum_strings:
             break;
+        case Type::interned_strings:
+            REALM_UNREACHABLE();
+            break;
     }
     return realm::npos;
 }
@@ -383,6 +436,9 @@ ArrayString::Type ArrayString::upgrade_leaf(size_t value_size)
     if (m_type == Type::enum_strings)
         return Type::enum_strings;
 
+    if (m_type == Type::interned_strings)
+        return Type::interned_strings;
+
     if (m_type == Type::medium_strings) {
         if (value_size <= medium_string_max_size)
             return Type::medium_strings;
@@ -473,8 +529,25 @@ void ArrayString::verify() const
             static_cast<ArrayBigBlobs*>(m_arr)->verify();
             break;
         case Type::enum_strings:
+        case Type::interned_strings:
             static_cast<Array*>(m_arr)->verify();
             break;
     }
 #endif
 }
+
+ref_type ArrayString::write(_impl::ArrayWriterBase& out, StringInterner* interner)
+{
+    REALM_ASSERT(interner);
+    // we have to write out all, modified or not, to match the total cleanup
+    Array interned(Allocator::get_default());
+    auto sz = size();
+    interned.create(NodeHeader::type_Normal, true, sz);
+    for (size_t i = 0; i < sz; ++i) {
+        interned.set(i, interner->intern(get(i)));
+    }
+    auto retval = interned.write(out, false, false, out.compress);
+    interned.destroy();
+    return retval;
+    // return m_arr->write(out, true, false, false);
+}
diff --git a/src/realm/array_string.hpp b/src/realm/array_string.hpp
index 4dc96646378..df121c50b2c 100644
--- a/src/realm/array_string.hpp
+++ b/src/realm/array_string.hpp
@@ -66,6 +66,14 @@ class ArrayString : public ArrayPayload {
     {
         m_arr->set_parent(p, n);
     }
+    bool need_string_interner() const override
+    {
+        return true;
+    }
+    void set_string_interner(StringInterner* string_interner) const override
+    {
+        m_string_interner = string_interner;
+    }
     bool need_spec() const override
     {
         return true;
@@ -118,6 +126,10 @@ class ArrayString : public ArrayPayload {
     static StringData get(const char* header, size_t ndx, Allocator& alloc) noexcept;
 
     void verify() const;
+    // Write to 'out', if needed using 'interner' to intern any strings.
+    // An interner of 0 will disable interning. Interned values may be further
+    // compressed using leaf compression for integer arrays.
+    ref_type write(_impl::ArrayWriterBase& out, StringInterner* interner);
 
 private:
     static constexpr size_t small_string_max_size = 15;  // ArrayStringShort
@@ -127,18 +139,18 @@ class ArrayString : public ArrayPayload {
     static constexpr size_t storage_size =
         std::max({sizeof(ArrayStringShort), sizeof(ArraySmallBlobs), sizeof(ArrayBigBlobs), sizeof(Array)});
 
-    enum class Type { small_strings, medium_strings, big_strings, enum_strings };
+    enum class Type { small_strings, medium_strings, big_strings, enum_strings, interned_strings };
 
     Type m_type = Type::small_strings;
 
     Allocator& m_alloc;
     alignas(storage_alignment) std::byte m_storage[storage_size];
     Array* m_arr;
+    bool m_nullable = true;
     mutable Spec* m_spec = nullptr;
     mutable size_t m_col_ndx = realm::npos;
-    bool m_nullable = true;
-
     std::unique_ptr<ArrayString> m_string_enum_values;
+    mutable StringInterner* m_string_interner = nullptr;
 
     Type upgrade_leaf(size_t value_size);
 };
diff --git a/src/realm/array_timestamp.hpp b/src/realm/array_timestamp.hpp
index 1fad36144f0..cfa4268cd11 100644
--- a/src/realm/array_timestamp.hpp
+++ b/src/realm/array_timestamp.hpp
@@ -76,7 +76,8 @@ class ArrayTimestamp : public ArrayPayload, private Array {
     Timestamp get(size_t ndx) const
     {
         util::Optional<int64_t> seconds = m_seconds.get(ndx);
-        return seconds ? Timestamp(*seconds, int32_t(m_nanoseconds.get(ndx))) : Timestamp{};
+        int32_t nano = (int32_t)m_nanoseconds.get(ndx);
+        return seconds ? Timestamp(*seconds, nano) : Timestamp{};
     }
     Mixed get_any(size_t ndx) const final
     {
diff --git a/src/realm/array_unsigned.cpp b/src/realm/array_unsigned.cpp
index 938fe5aece8..55f030522b9 100644
--- a/src/realm/array_unsigned.cpp
+++ b/src/realm/array_unsigned.cpp
@@ -92,23 +92,25 @@ void ArrayUnsigned::update_from_parent() noexcept
 
 size_t ArrayUnsigned::lower_bound(uint64_t value) const noexcept
 {
-    if (m_width == 8) {
+    auto width = get_width_from_header(get_header());
+
+    if (width == 8) {
         uint8_t* arr = reinterpret_cast<uint8_t*>(m_data);
         uint8_t* pos = std::lower_bound(arr, arr + m_size, value);
         return pos - arr;
     }
-    else if (m_width == 16) {
+    else if (width == 16) {
         uint16_t* arr = reinterpret_cast<uint16_t*>(m_data);
         uint16_t* pos = std::lower_bound(arr, arr + m_size, value);
         return pos - arr;
     }
-    else if (m_width == 32) {
+    else if (width == 32) {
         uint32_t* arr = reinterpret_cast<uint32_t*>(m_data);
         uint32_t* pos = std::lower_bound(arr, arr + m_size, value);
         return pos - arr;
     }
-    else if (m_width < 8) {
-        switch (m_width) {
+    else if (width < 8) {
+        switch (width) {
             case 0:
                 return realm::lower_bound<0>(m_data, m_size, value);
             case 1:
@@ -130,23 +132,25 @@ size_t ArrayUnsigned::lower_bound(uint64_t value) const noexcept
 
 size_t ArrayUnsigned::upper_bound(uint64_t value) const noexcept
 {
-    if (m_width == 8) {
+    auto width = get_width_from_header(get_header());
+
+    if (width == 8) {
         uint8_t* arr = reinterpret_cast<uint8_t*>(m_data);
         uint8_t* pos = std::upper_bound(arr, arr + m_size, value);
         return pos - arr;
     }
-    else if (m_width == 16) {
+    else if (width == 16) {
         uint16_t* arr = reinterpret_cast<uint16_t*>(m_data);
         uint16_t* pos = std::upper_bound(arr, arr + m_size, value);
         return pos - arr;
     }
-    else if (m_width == 32) {
+    else if (width == 32) {
         uint32_t* arr = reinterpret_cast<uint32_t*>(m_data);
         uint32_t* pos = std::upper_bound(arr, arr + m_size, value);
         return pos - arr;
     }
-    else if (m_width < 8) {
-        switch (m_width) {
+    else if (width < 8) {
+        switch (width) {
             case 0:
                 return realm::upper_bound<0>(m_data, m_size, value);
             case 1:
diff --git a/src/realm/cluster.cpp b/src/realm/cluster.cpp
index 4922c54f9b2..75deb0707c2 100644
--- a/src/realm/cluster.cpp
+++ b/src/realm/cluster.cpp
@@ -250,6 +250,17 @@ size_t Cluster::node_size_from_header(Allocator& alloc, const char* header)
     }
 }
 
+template <class T>
+inline void Cluster::set_string_interner(T&, ColKey) const
+{
+}
+
+template <>
+inline void Cluster::set_string_interner(ArrayString& arr, ColKey col_key) const
+{
+    m_tree_top.set_string_interner(arr, col_key);
+}
+
 template <class T>
 inline void Cluster::set_spec(T&, ColKey::Idx) const
 {
@@ -270,6 +281,7 @@ inline void Cluster::do_insert_row(size_t ndx, ColKey col, Mixed init_val, bool
     auto col_ndx = col.get_index();
     arr.set_parent(this, col_ndx.val + s_first_col_index);
     set_spec<T>(arr, col_ndx);
+    set_string_interner<T>(arr, col);
     arr.init_from_parent();
     if (init_val.is_null()) {
         arr.insert(ndx, T::default_value(nullable));
@@ -446,10 +458,12 @@ inline void Cluster::do_move(size_t ndx, ColKey col_key, Cluster* to)
     T src(m_alloc);
     src.set_parent(this, col_ndx);
     src.init_from_parent();
+    set_string_interner<T>(src, col_key);
 
     T dst(m_alloc);
     dst.set_parent(to, col_ndx);
     dst.init_from_parent();
+    set_string_interner<T>(dst, col_key);
 
     src.move(dst, ndx);
 }
@@ -760,6 +774,7 @@ inline void Cluster::do_erase(size_t ndx, ColKey col_key)
     T values(m_alloc);
     values.set_parent(this, col_ndx.val + s_first_col_index);
     set_spec<T>(values, col_ndx);
+    set_string_interner<T>(values, col_key);
     values.init_from_parent();
     if constexpr (std::is_same_v<T, ArrayTypedLink>) {
         if (ObjLink link = values.get(ndx)) {
@@ -1031,6 +1046,7 @@ void Cluster::upgrade_string_to_enum(ColKey col_key, ArrayString& keys)
     indexes.create(Array::type_Normal, false);
     ArrayString values(m_alloc);
     ref_type ref = Array::get_as_ref(col_ndx.val + s_first_col_index);
+    set_string_interner(values, col_key);
     values.init_from_ref(ref);
     size_t sz = values.size();
     for (size_t i = 0; i < sz; i++) {
@@ -1052,6 +1068,9 @@ void Cluster::init_leaf(ColKey col_key, ArrayPayload* leaf) const
     if (auto t = m_tree_top.get_owning_table())
         t->check_column(col_key);
     ref_type ref = to_ref(Array::get(col_ndx.val + 1));
+    if (leaf->need_string_interner()) {
+        m_tree_top.set_string_interner(*leaf, col_key);
+    }
     if (leaf->need_spec()) {
         m_tree_top.set_spec(*leaf, col_ndx);
     }
@@ -1071,6 +1090,10 @@ void Cluster::verify(ref_type ref, size_t index, util::Optional<size_t>& sz) con
 {
     ArrayType arr(get_alloc());
     set_spec(arr, ColKey::Idx{unsigned(index) - 1});
+    auto table = get_owning_table();
+    REALM_ASSERT(index <= table->m_leaf_ndx2colkey.size());
+    auto col_key = table->m_leaf_ndx2colkey[index - 1];
+    set_string_interner(arr, col_key);
     arr.set_parent(const_cast<Cluster*>(this), index);
     arr.init_from_ref(ref);
     arr.verify();
@@ -1409,6 +1432,7 @@ void Cluster::dump_objects(int64_t key_offset, std::string lead) const
                 case col_type_String: {
                     ArrayString arr(m_alloc);
                     set_spec(arr, col.get_index());
+                    set_string_interner(arr, col);
                     ref_type ref = Array::get_as_ref(j);
                     arr.init_from_ref(ref);
                     std::cout << ", " << arr.get(i);
@@ -1628,6 +1652,31 @@ ref_type Cluster::typed_write(ref_type ref, _impl::ArrayWriterBase& out) const
             // Columns
             auto col_key = out.table->m_leaf_ndx2colkey[j - 1];
             auto col_type = col_key.get_type();
+            // String columns are interned at this point
+            if (out.compress && col_type == col_type_String && !col_key.is_collection()) {
+                ArrayRef leaf(m_alloc);
+                leaf.init_from_ref(ref);
+                auto header = leaf.get_header();
+                if (NodeHeader::get_hasrefs_from_header(header) ||
+                    NodeHeader::get_wtype_from_header(header) == wtype_Multiply) {
+                    // We're interning these strings
+                    ArrayString as(m_alloc);
+                    as.init_from_ref(leaf_rot.get_as_ref());
+                    written_cluster.set_as_ref(j, as.write(out, out.table->get_string_interner(col_key)));
+                    // in a transactional setting:
+                    // Destroy all sub-arrays if present, in order to release memory in file
+                    // This is contrary to the rest of the handling in this function, but needed
+                    // here since sub-arrays may not have been COW'ed and therefore not freed in file.
+                    // We rely on 'only_modified' to indicate that we're in a transactional setting.
+                    if (only_modified)
+                        leaf.destroy_deep(true);
+                    continue;
+                }
+                // whether it's the old enum strings or the new interned strings,
+                // just write out the array using integer leaf compression
+                written_cluster.set_as_ref(j, leaf.write(out, false, false, false));
+                continue;
+            }
             if (col_key.is_collection()) {
                 ArrayRef arr_ref(m_alloc);
                 arr_ref.init_from_ref(ref);
diff --git a/src/realm/cluster.hpp b/src/realm/cluster.hpp
index 9b106f436ea..365ad3a8634 100644
--- a/src/realm/cluster.hpp
+++ b/src/realm/cluster.hpp
@@ -365,6 +365,8 @@ class Cluster : public ClusterNode {
     void do_insert_mixed(size_t ndx, ColKey col_key, Mixed init_value, ObjKey origin_key);
     template <class T>
     void set_spec(T&, ColKey::Idx) const;
+    template <class T>
+    void set_string_interner(T&, ColKey) const;
     template <class ArrayType>
     void verify(ref_type ref, size_t index, util::Optional<size_t>& sz) const;
 };
diff --git a/src/realm/cluster_tree.cpp b/src/realm/cluster_tree.cpp
index 29d5f52ce84..3021f684911 100644
--- a/src/realm/cluster_tree.cpp
+++ b/src/realm/cluster_tree.cpp
@@ -1135,6 +1135,15 @@ void ClusterTree::update(UpdateFunction func)
     }
 }
 
+void ClusterTree::set_string_interner(ArrayPayload& arr, ColKey col_key) const
+{
+    // Check for owner. This function may be called in context of DictionaryClusterTree
+    // in which case m_owner is null (and spec never needed).
+    if (m_owner) {
+        arr.set_string_interner(_impl::TableFriend::get_string_interner(*m_owner, col_key));
+    }
+}
+
 void ClusterTree::set_spec(ArrayPayload& arr, ColKey::Idx col_ndx) const
 {
     // Check for owner. This function may be called in context of DictionaryClusterTree
diff --git a/src/realm/cluster_tree.hpp b/src/realm/cluster_tree.hpp
index 43d796c995e..15829f991bc 100644
--- a/src/realm/cluster_tree.hpp
+++ b/src/realm/cluster_tree.hpp
@@ -181,6 +181,7 @@ class ClusterTree {
     void update(UpdateFunction func);
 
     void set_spec(ArrayPayload& arr, ColKey::Idx col_ndx) const;
+    void set_string_interner(ArrayPayload& arr, ColKey col_key) const;
 
     virtual std::unique_ptr<ClusterNode> get_root_from_parent();
 
diff --git a/src/realm/db.hpp b/src/realm/db.hpp
index e46ba6742c3..7613a4c367b 100644
--- a/src/realm/db.hpp
+++ b/src/realm/db.hpp
@@ -510,6 +510,8 @@ class DB : public std::enable_shared_from_this<DB> {
     std::shared_ptr<util::Logger> m_logger;
     std::mutex m_commit_listener_mutex;
     std::vector<CommitListener*> m_commit_listeners;
+    std::unordered_map<TableKey, std::vector<StringInterner*>*> m_string_interners;
+    std::mutex m_string_interners_mutex;
     bool m_is_sync_agent = false;
     // Id for this DB to be used in logging. We will just use some bits from the pointer.
     // The path cannot be used as this would not allow us to distinguish between two DBs opening
diff --git a/src/realm/group.cpp b/src/realm/group.cpp
index 90b7d690b26..70de9a71ae2 100644
--- a/src/realm/group.cpp
+++ b/src/realm/group.cpp
@@ -1368,7 +1368,7 @@ void Group::flush_accessors_for_commit()
             acc->flush_for_commit();
 }
 
-void Group::refresh_dirty_accessors()
+void Group::refresh_dirty_accessors(bool writable)
 {
     if (!m_tables.is_attached()) {
         m_table_accessors.clear();
@@ -1398,7 +1398,7 @@ void Group::refresh_dirty_accessors()
                     same_table = true;
             }
             if (same_table) {
-                table_accessor->refresh_accessor_tree();
+                table_accessor->refresh_accessor_tree(writable);
             }
             else {
                 table_accessor->detach(Table::cookie_removed);
@@ -1456,7 +1456,7 @@ void Group::advance_transact(ref_type new_top_ref, util::InputStream* in, bool w
     m_top.detach();                                           // Soft detach
     bool create_group_when_missing = false;                   // See Group::attach_shared().
     attach(new_top_ref, writable, create_group_when_missing); // Throws
-    refresh_dirty_accessors();                                // Throws
+    refresh_dirty_accessors(writable);                        // Throws
 
     if (schema_changed)
         send_schema_change_notification();
diff --git a/src/realm/group.hpp b/src/realm/group.hpp
index 08ddd9acd44..7204f26b258 100644
--- a/src/realm/group.hpp
+++ b/src/realm/group.hpp
@@ -681,7 +681,7 @@ class Group : public ArrayParent {
     /// Memory mappings must have been updated to reflect any growth in filesize before
     /// calling advance_transact()
     void advance_transact(ref_type new_top_ref, util::InputStream*, bool writable);
-    void refresh_dirty_accessors();
+    void refresh_dirty_accessors(bool writable);
     void flush_accessors_for_commit();
 
     /// \brief The version of the format of the node structure (in file or in
diff --git a/src/realm/group_writer.cpp b/src/realm/group_writer.cpp
index 22ce7db93ac..533565f39d2 100644
--- a/src/realm/group_writer.cpp
+++ b/src/realm/group_writer.cpp
@@ -647,6 +647,7 @@ ref_type GroupWriter::write_group()
 {
     ALLOC_DBG_COUT("Commit nr " << m_current_version << "   ( from " << m_oldest_reachable_version << " )"
                                 << std::endl);
+    // m_group.typed_print("");
 
     read_in_freelist();
     // Now, 'm_size_map' holds all free elements candidate for recycling
@@ -710,7 +711,7 @@ ref_type GroupWriter::write_group()
             top.set_as_ref(Group::s_evacuation_point_ndx, ref);
         }
         else if (ref) {
-            Array::destroy(ref, m_alloc);
+            Array::destroy(ref_type(ref), m_alloc);
             top.set(Group::s_evacuation_point_ndx, 0);
         }
     }
@@ -788,7 +789,9 @@ ref_type GroupWriter::write_group()
             top.set(Group::s_file_size_ndx, RefOrTagged::make_tagged(m_logical_size));
             auto ref = top.get_as_ref(Group::s_evacuation_point_ndx);
             REALM_ASSERT(ref);
-            Array::destroy(ref, m_alloc);
+            Array destroy_array(m_alloc);
+            destroy_array.init_from_ref(ref);
+            destroy_array.destroy();
             top.set(Group::s_evacuation_point_ndx, 0);
             m_evacuation_limit = 0;
 
diff --git a/src/realm/node.hpp b/src/realm/node.hpp
index 8a4b862a701..21ee61eddde 100644
--- a/src/realm/node.hpp
+++ b/src/realm/node.hpp
@@ -21,6 +21,7 @@
 
 #include <realm/node_header.hpp>
 #include <realm/alloc.hpp>
+#include <realm/string_interner.hpp>
 
 #include <iostream>
 
@@ -357,6 +358,11 @@ class ArrayPayload {
     virtual void init_from_ref(ref_type) noexcept = 0;
     virtual void set_parent(ArrayParent* parent, size_t ndx_in_parent) noexcept = 0;
     virtual Mixed get_any(size_t ndx) const = 0;
+    virtual bool need_string_interner() const
+    {
+        return false;
+    }
+    virtual void set_string_interner(StringInterner*) const {}
     virtual bool need_spec() const
     {
         return false;
diff --git a/src/realm/obj.cpp b/src/realm/obj.cpp
index eb8138dd8f5..fc34b755d57 100644
--- a/src/realm/obj.cpp
+++ b/src/realm/obj.cpp
@@ -613,7 +613,11 @@ StringData Obj::_get<StringData>(ColKey::Idx col_ndx) const
         return values.get(m_row_ndx);
     }
     else {
-        return ArrayString::get(alloc.translate(ref), m_row_ndx, alloc);
+        ArrayString values(get_alloc());
+        auto col_key = m_table->leaf_ndx2colkey(col_ndx);
+        values.set_string_interner(m_table->get_string_interner(col_key));
+        values.init_from_ref(ref);
+        return values.get(m_row_ndx);
     }
 }
 
@@ -738,9 +742,12 @@ inline bool Obj::do_is_null(ColKey::Idx col_ndx) const
 template <>
 inline bool Obj::do_is_null<ArrayString>(ColKey::Idx col_ndx) const
 {
+    REALM_ASSERT(false); // Don't come here, you're falling from a cliff....
     ArrayString values(get_alloc());
     ref_type ref = to_ref(Array::get(m_mem.get_addr(), col_ndx.val + 1));
     values.set_spec(const_cast<Spec*>(&get_spec()), m_table->leaf_ndx2spec_ndx(col_ndx));
+    // TODO: Set string interner if needed
+    // values.set_string_interner(m_table->get_string_interner(col_key));
     values.init_from_ref(ref);
     return values.is_null(m_row_ndx);
 }
@@ -765,8 +772,16 @@ bool Obj::is_null(ColKey col_key) const
                 return do_is_null<ArrayFloatNull>(col_ndx);
             case col_type_Double:
                 return do_is_null<ArrayDoubleNull>(col_ndx);
-            case col_type_String:
-                return do_is_null<ArrayString>(col_ndx);
+            case col_type_String: {
+                ArrayString values(get_alloc());
+                ref_type ref = to_ref(Array::get(m_mem.get_addr(), col_ndx.val + 1));
+                values.set_spec(const_cast<Spec*>(&get_spec()), m_table->leaf_ndx2spec_ndx(col_ndx));
+                // TODO: Set string interner if needed
+                values.set_string_interner(m_table->get_string_interner(col_key));
+                values.init_from_ref(ref);
+                return values.is_null(m_row_ndx);
+            }
+                // return do_is_null<ArrayString>(col_ndx);
             case col_type_Binary:
                 return do_is_null<ArrayBinary>(col_ndx);
             case col_type_Mixed:
@@ -1588,6 +1603,17 @@ inline void check_range(const BinaryData& val)
 }
 } // namespace
 
+// helper functions for filtering out calls to set_string_interner()
+template <class T>
+inline void Obj::set_string_interner(T&, ColKey)
+{
+}
+template <>
+inline void Obj::set_string_interner<ArrayString>(ArrayString& values, ColKey col_key)
+{
+    values.set_string_interner(m_table->get_string_interner(col_key));
+}
+
 // helper functions for filtering out calls to set_spec()
 template <class T>
 inline void Obj::set_spec(T&, ColKey)
@@ -1685,6 +1711,7 @@ Obj& Obj::set(ColKey col_key, T value, bool is_default)
     LeafType values(alloc);
     values.set_parent(&fields, col_ndx.val + 1);
     set_spec<LeafType>(values, col_key);
+    set_string_interner<LeafType>(values, col_key);
     values.init_from_parent();
     values.set(m_row_ndx, value);
 
@@ -2296,6 +2323,7 @@ inline void Obj::do_set_null<ArrayString>(ColKey col_key)
     ArrayString values(alloc);
     values.set_parent(&fields, col_ndx.val + 1);
     values.set_spec(const_cast<Spec*>(&get_spec()), spec_ndx);
+    values.set_string_interner(m_table->get_string_interner(col_key));
     values.init_from_parent();
     values.set_null(m_row_ndx);
 
diff --git a/src/realm/obj.hpp b/src/realm/obj.hpp
index 67c82a0cada..8711e590dac 100644
--- a/src/realm/obj.hpp
+++ b/src/realm/obj.hpp
@@ -392,6 +392,8 @@ class Obj {
     void nullify_link(ColKey origin_col, ObjLink target_key) &&;
     template <class T>
     inline void set_spec(T&, ColKey);
+    template <class T>
+    inline void set_string_interner(T&, ColKey);
     template <class ValueType>
     inline void nullify_single_link(ColKey col, ValueType target);
 
diff --git a/src/realm/string_compressor.cpp b/src/realm/string_compressor.cpp
new file mode 100644
index 00000000000..99dcb50dac5
--- /dev/null
+++ b/src/realm/string_compressor.cpp
@@ -0,0 +1,357 @@
+/*************************************************************************
+ *
+ * Copyright 2016 Realm Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ **************************************************************************/
+
+#include <realm/string_compressor.hpp>
+#include <realm/string_data.hpp>
+
+#include <realm/array_unsigned.hpp>
+
+#include <iostream>
+namespace realm {
+
+StringCompressor::StringCompressor(Allocator& alloc, Array& parent, size_t index, bool writable)
+{
+    m_compression_map.resize(16); // start with a very small compression map
+    m_symbols.reserve(65536);
+    m_data = std::make_unique<ArrayUnsigned>(alloc);
+    m_data->set_parent(&parent, index);
+    refresh(writable);
+}
+
+void StringCompressor::refresh(bool writable)
+{
+    // we assume that compressors are only created from a valid parent.
+    // String interners in 'dead' mode should never instantiate a string compressor.
+    if (m_data->get_ref_from_parent() == 0) {
+        REALM_ASSERT(writable);
+        m_data->create(0, 65535);
+        m_data->update_parent();
+    }
+    else {
+        if (m_data->is_attached())
+            m_data->update_from_parent();
+        else
+            m_data->init_from_ref(m_data->get_ref_from_parent());
+    }
+    rebuild_internal();
+}
+
+static size_t symbol_pair_hash(CompressionSymbol a, CompressionSymbol b)
+{
+    // range of return value must match size of encoding table
+    uint32_t tmp = a + 3;
+    tmp *= b + 7;
+    return (tmp ^ (tmp >> 16)) & 0xFFFF;
+}
+
+void StringCompressor::add_expansion(SymbolDef def)
+{
+    // compute expansion size:
+    size_t exp_size = 0;
+    if (def.expansion_a < 256)
+        exp_size = 1;
+    else
+        exp_size = m_symbols[def.expansion_a - 256].expansion.size();
+    if (def.expansion_b < 256)
+        exp_size += 1;
+    else
+        exp_size += m_symbols[def.expansion_b - 256].expansion.size();
+    // make sure there is room in active storage chunk:
+    if (m_expansion_storage.size() == 0 || m_expansion_storage.back().size() + exp_size + 1 >= storage_chunk_size) {
+        m_expansion_storage.push_back({});
+        m_expansion_storage.back().reserve(storage_chunk_size);
+    }
+    // construct expansion at end of chunk:
+    auto& chunk = m_expansion_storage.back();
+    auto start_index = (uint32_t)chunk.size();
+    if (def.expansion_a < 256)
+        chunk.push_back((char)def.expansion_a);
+    else
+        chunk.append(m_symbols[def.expansion_a - 256].expansion);
+    if (def.expansion_b < 256)
+        chunk.push_back((char)def.expansion_b);
+    else
+        chunk.append(m_symbols[def.expansion_b - 256].expansion);
+    std::string_view expansion(chunk.data() + start_index, exp_size);
+    m_symbols.push_back({def, expansion, (uint32_t)m_expansion_storage.size() - 1, start_index});
+}
+
+void StringCompressor::expand_compression_map()
+{
+    size_t old_size = m_compression_map.size();
+    REALM_ASSERT(old_size <= 16384);
+    size_t new_size = 4 * old_size;
+    std::vector<SymbolDef> map(new_size);
+    for (size_t i = 0; i < m_compression_map.size(); ++i) {
+        auto& entry = m_compression_map[i];
+        if (entry.id == 0)
+            continue;
+        auto hash = symbol_pair_hash(entry.expansion_a, entry.expansion_b);
+        auto new_hash = hash & (new_size - 1);
+        REALM_ASSERT(map[new_hash].id == 0);
+        map[new_hash] = entry;
+    }
+    m_compression_map.swap(map);
+}
+
+void StringCompressor::rebuild_internal()
+{
+    auto num_symbols = m_data->size();
+    if (num_symbols == m_symbols.size())
+        return;
+    if (num_symbols < m_symbols.size()) {
+        // fewer symbols (likely a rollback) -- remove last ones added
+        while (num_symbols < m_symbols.size()) {
+            auto& symbol = m_symbols.back();
+            auto hash = symbol_pair_hash(symbol.def.expansion_a, symbol.def.expansion_b);
+            hash &= m_compression_map.size() - 1;
+            REALM_ASSERT(m_compression_map[hash].id == symbol.def.id);
+            m_compression_map[hash] = {0, 0, 0};
+            if (symbol.storage_index < m_expansion_storage.size() - 1) {
+                m_expansion_storage.resize(symbol.storage_index + 1);
+            }
+            m_expansion_storage[symbol.storage_index].resize(symbol.storage_offset);
+            m_symbols.pop_back();
+        }
+        return;
+    }
+    // we have new symbols to add
+    for (size_t i = m_symbols.size(); i < num_symbols; ++i) {
+        auto pair = m_data->get(i);
+        SymbolDef def;
+        def.id = (CompressionSymbol)(i + 256);
+        def.expansion_a = 0xFFFF & (pair >> 16);
+        def.expansion_b = 0xFFFF & pair;
+        auto hash = symbol_pair_hash(def.expansion_a, def.expansion_b);
+        while (m_compression_map[hash & (m_compression_map.size() - 1)].id) {
+            expand_compression_map();
+        }
+        // REALM_ASSERT_DEBUG(m_compression_map[hash].id == 0);
+        m_compression_map[hash & (m_compression_map.size() - 1)] = def;
+        add_expansion(def);
+    }
+}
+
+StringCompressor::~StringCompressor() {}
+
+CompressedString StringCompressor::compress(StringData sd, bool learn)
+{
+    CompressedString result(sd.size());
+    // expand string into array of symbols
+    const char* d = sd.data();
+    const size_t limit = sd.size();
+    if (limit == 0)
+        return {};
+    size_t i = 0;
+    while (i < limit) {
+        result[i++] = 0xFF & *d++;
+    }
+    // iteratively compress array of symbols. Each run compresses pairs into single symbols.
+    // 6 runs give a max compression of 64x - on average it will be much less :-)
+    constexpr int run_limit = 6;
+    CompressionSymbol* to;
+    for (int run = 0; run < run_limit; ++run) {
+        CompressionSymbol* from = to = result.data();
+        CompressionSymbol* limit = from + result.size() - 1;
+        while (from < limit) {
+            auto hash = symbol_pair_hash(from[0], from[1]);
+            hash &= m_compression_map.size() - 1;
+            auto& def = m_compression_map[hash];
+            if (def.id) {
+                // existing symbol
+                if (def.expansion_a == from[0] && def.expansion_b == from[1]) {
+                    // matching symbol
+                    *to++ = def.id;
+                    from += 2;
+                }
+                else if (m_compression_map.size() < 65536) {
+                    // Conflict: some other symbol is defined here - but we can expand the compression map
+                    // and hope to find room!
+                    expand_compression_map();
+                    // simply retry:
+                    continue;
+                }
+                else {
+                    // also conflict: some other symbol is defined here, we can't compress
+                    *to++ = *from++;
+                    // In a normal hash table we'd have buckets and add a translation
+                    // to a bucket. This is slower generally, but yields better compression.
+                }
+            }
+            else {
+                // free entry we can use for new symbol (and we're learning)
+                if (m_symbols.size() < (65536 - 256) && learn) {
+                    // define a new symbol for this entry and use it.
+                    REALM_ASSERT_DEBUG(m_compression_map[hash].id == 0);
+                    REALM_ASSERT_DEBUG(m_symbols.size() == m_data->size());
+                    REALM_ASSERT_DEBUG(m_data->is_attached());
+                    CompressionSymbol id = (CompressionSymbol)(256 + m_symbols.size());
+                    SymbolDef def{id, from[0], from[1]};
+                    m_compression_map[hash] = def;
+                    add_expansion(def);
+                    m_data->add(((uint64_t)from[0]) << 16 | from[1]);
+                    // std::cerr << id << " = {" << from[0] << ", " << from[1] << "}" << std::endl;
+                    *to++ = id;
+                    from += 2;
+                }
+                else {
+                    // no more symbol space, so can't compress
+                    *to++ = *from++;
+                }
+            }
+        }
+        if (from == limit) {
+            // copy over trailing symbol
+            *to++ = *from++;
+        }
+        REALM_ASSERT_DEBUG(to > result.data());
+        size_t sz = to - result.data();
+        REALM_ASSERT_DEBUG(sz <= sd.size());
+        result.resize(sz);
+        if (from == to) // no compression took place in last iteration
+            break;
+    }
+    return result;
+}
+
+std::string StringCompressor::decompress(CompressedStringView& c_str)
+{
+    CompressionSymbol* ptr = c_str.data;
+    CompressionSymbol* limit = ptr + c_str.size;
+    // compute size of decompressed string first to avoid allocations as string grows
+    size_t result_size = 0;
+    while (ptr < limit) {
+        if (*ptr < 256)
+            result_size += 1;
+        else
+            result_size += m_symbols[*ptr - 256].expansion.size();
+        ++ptr;
+    }
+    std::string result2;
+    result2.reserve(result_size);
+    // generate result
+    ptr = c_str.data;
+    while (ptr < limit) {
+        if (*ptr < 256)
+            result2.push_back((char)*ptr);
+        else
+            result2.append(m_symbols[*ptr - 256].expansion);
+        ptr++;
+    }
+#ifdef REALM_DEBUG
+    std::string result;
+    {
+        auto decompress = [&](CompressionSymbol symbol, auto& decompress) -> void {
+            if (symbol < 256) {
+                result.push_back((char)symbol);
+            }
+            else {
+                auto& s = m_symbols[symbol - 256];
+                decompress(s.def.expansion_a, decompress);
+                decompress(s.def.expansion_b, decompress);
+            }
+        };
+
+        CompressionSymbol* ptr = c_str.data;
+        CompressionSymbol* limit = ptr + c_str.size;
+        while (ptr < limit) {
+            decompress(*ptr, decompress);
+            ++ptr;
+        }
+    }
+    REALM_ASSERT_DEBUG(result == result2);
+#endif
+    return result2;
+}
+
+int StringCompressor::compare(CompressedStringView& A, CompressedStringView& B)
+{
+    auto A_ptr = A.data;
+    auto A_limit = A_ptr + A.size;
+    auto B_ptr = B.data;
+    auto B_limit = B_ptr + B.size;
+    while (A_ptr < A_limit && B_ptr < B_limit) {
+        auto code_A = *A_ptr++;
+        auto code_B = *B_ptr++;
+        if (code_A == code_B)
+            continue;
+        // symbols did not match:
+        // 1. both symbols are single characters
+        if (code_A < 256 && code_B < 256)
+            return code_B - code_A;
+        std::string a_str(code_A, 1);
+        auto str_A = std::string_view(code_A < 256 ? a_str : m_symbols[code_A - 256].expansion);
+        std::string b_str(code_B, 1);
+        auto str_B = std::string_view(code_B < 256 ? b_str : m_symbols[code_B - 256].expansion);
+        // to ensure comparison as StringData we need to convert the stringviews
+        StringData sd_a(str_A.data(), str_A.size());
+        StringData sd_b(str_B.data(), str_B.size());
+        REALM_ASSERT_DEBUG(sd_a != sd_b);
+        if (sd_a < sd_b)
+            return 1;
+        else
+            return -1;
+    }
+    // The compressed strings are identical or one is the prefix of the other
+    return B.size - A.size;
+    // ^ a faster way of producing same positive / negative / zero as:
+    // if (A.size() < B.size())
+    //     return 1;
+    // if (A.size() > B.size())
+    //     return -1;
+    // return 0;
+}
+
+int StringCompressor::compare(StringData sd, CompressedStringView& B)
+{
+    auto B_size = B.size;
+    // make sure comparisons are unsigned, even though StringData does not specify signedness
+    const unsigned char* A_ptr = reinterpret_cast<const unsigned char*>(sd.data());
+    auto A_limit = A_ptr + sd.size();
+    for (size_t i = 0; i < B_size; ++i) {
+        if (A_ptr == A_limit) {
+            // sd ended first, so B is bigger
+            return -1;
+        }
+        auto code = B.data[i];
+        if (code < 256) {
+            if (code < *A_ptr)
+                return 1;
+            if (code > *A_ptr)
+                return -1;
+            ++A_ptr;
+            continue;
+        }
+        auto& expansion = m_symbols[code - 256];
+        for (size_t disp = 0; disp < expansion.expansion.size(); ++disp) {
+            uint8_t c = expansion.expansion[disp];
+            if (c < *A_ptr)
+                return 1;
+            if (c > *A_ptr)
+                return -1;
+            ++A_ptr;
+        }
+    }
+    // if sd is longer than B, sd is the biggest string
+    if (A_ptr < A_limit)
+        return 1;
+    return 0;
+}
+
+
+} // namespace realm
diff --git a/src/realm/string_compressor.hpp b/src/realm/string_compressor.hpp
new file mode 100644
index 00000000000..2c866ecb781
--- /dev/null
+++ b/src/realm/string_compressor.hpp
@@ -0,0 +1,100 @@
+/*************************************************************************
+ *
+ * Copyright 2016 Realm Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ **************************************************************************/
+
+#ifndef REALM_STRING_COMPRESSOR_HPP
+#define REALM_STRING_COMPRESSOR_HPP
+
+#include <realm/utilities.hpp>
+#include <vector>
+
+using CompressionSymbol = uint16_t;
+using CompressedString = std::vector<CompressionSymbol>;
+struct CompressedStringView {
+    CompressionSymbol* data = 0;
+    uint32_t size = 0;
+    CompressedStringView() = default;
+    CompressedStringView(CompressionSymbol* c_ptr, size_t s)
+        : data(c_ptr)
+        , size(uint32_t(s))
+    {
+    }
+    explicit CompressedStringView(CompressedString& cs)
+        : data(cs.data())
+        , size(uint32_t(cs.size()))
+    {
+    }
+    bool operator==(CompressedStringView& other)
+    {
+        if (size != other.size)
+            return false;
+        for (size_t i = 0; i < size; ++i) {
+            if (data[i] != other.data[i])
+                return false;
+        }
+        return true;
+    }
+};
+
+namespace realm {
+
+class ArrayUnsigned;
+class Array;
+class Allocator;
+
+class StringCompressor {
+public:
+    StringCompressor(Allocator& alloc, Array& parent, size_t index, bool writable);
+    void refresh(bool writable);
+    ~StringCompressor();
+
+    int compare(CompressedStringView& A, CompressedStringView& B);
+    int compare(StringData sd, CompressedStringView& B);
+
+    CompressedString compress(StringData, bool learn);
+    std::string decompress(CompressedStringView& c_str);
+
+private:
+    struct SymbolDef {
+        CompressionSymbol id = 0;
+        CompressionSymbol expansion_a = 0;
+        CompressionSymbol expansion_b = 0;
+    };
+
+    struct ExpandedSymbolDef {
+        SymbolDef def;
+        std::string_view expansion;
+        // ^ points into storage managed by m_expansion_storage
+        // we need the following 2 values to facilitate rollback of allocated storage
+        uint32_t storage_index;  // index into m_expansion_storage
+        uint32_t storage_offset; // offset into block.
+    };
+
+    void rebuild_internal();
+    void expand_compression_map();
+    void add_expansion(SymbolDef def);
+    std::vector<ExpandedSymbolDef> m_symbols; // map from symbol -> symbolpair, 2 elements pr entry
+    std::vector<SymbolDef> m_compression_map; // perfect hash from symbolpair to its symbol
+
+    std::unique_ptr<ArrayUnsigned> m_data;
+    constexpr static size_t storage_chunk_size = 4096;
+    std::vector<std::string> m_expansion_storage;
+};
+
+} // namespace realm
+
+#endif
diff --git a/src/realm/string_interner.cpp b/src/realm/string_interner.cpp
new file mode 100644
index 00000000000..fb801b1fd6a
--- /dev/null
+++ b/src/realm/string_interner.cpp
@@ -0,0 +1,681 @@
+/*************************************************************************
+ *
+ * Copyright 2016 Realm Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ **************************************************************************/
+
+#include <realm/string_interner.hpp>
+#include <realm/string_data.hpp>
+
+#include <realm/array_unsigned.hpp>
+#include <string_view>
+
+namespace realm {
+
+// Fast mapping of strings (or rather hash of strings) to string IDs.
+//
+// We use a tree where:
+// * All interior nodes are radix nodes with a fan-out of 256.
+// * Leaf nodes with up to 16 entries are just lists, searched linearly
+// * Leaf nodes with more than 16 entries and less than 1K are hash tables.
+//   Hash tables use linear search starting from the entry found by hashing.
+//
+constexpr static size_t linear_search_limit = 16;
+constexpr static size_t hash_node_min_size = 32;
+constexpr static size_t hash_node_max_size = 1024;
+constexpr static size_t radix_node_consumes_bits = 8;
+constexpr static size_t radix_node_size = 1ULL << radix_node_consumes_bits;
+
+// helpers
+struct HashMapIter {
+    Array& m_array;
+    uint32_t hash_filter;
+    uint16_t index;
+    uint16_t left_to_search;
+    uint8_t hash_size;
+    HashMapIter(Array& array, uint32_t hash, uint8_t hash_size)
+        : m_array(array)
+        , hash_filter(hash)
+        , hash_size(hash_size)
+    {
+        set_index(0);
+    }
+    HashMapIter(Array& dummy)
+        : m_array(dummy)
+    {
+        left_to_search = 0;
+    }
+    inline uint32_t get()
+    {
+        return (uint32_t)(m_array.get(index) >> hash_size);
+    }
+    inline bool empty()
+    {
+        auto element = m_array.get(index);
+        return (element >> hash_size) == 0;
+    }
+    inline void set(uint64_t element)
+    {
+        m_array.set(index, element);
+    }
+    inline bool matches()
+    {
+        auto mask = 0xFFFFFFFFUL >> (32 - hash_size);
+        auto element = m_array.get(index);
+        return ((element & mask) == hash_filter) && (element >> hash_size);
+    }
+    inline bool is_valid()
+    {
+        return left_to_search != 0;
+    }
+    inline void set_index(size_t i, size_t search_limit = linear_search_limit)
+    {
+        index = (uint16_t)i;
+        left_to_search = (uint16_t)std::min(m_array.size(), (size_t)search_limit);
+    }
+    void operator++()
+    {
+        if (is_valid()) {
+            left_to_search--;
+            index++;
+            if (index == m_array.size()) {
+                index = 0;
+            }
+        }
+    }
+};
+
+// Attempt to build a hash leaf from a smaller hash leaf or a non-hash leaf.
+static bool rehash(Array& from, Array& to, uint8_t hash_size)
+{
+    REALM_ASSERT_DEBUG(from.size() * 2 == to.size());
+
+    for (size_t i = 0; i < from.size(); ++i) {
+        auto entry = (size_t)from.get(i);
+        if ((entry >> hash_size) == 0)
+            continue;
+        size_t starting_index = entry & (to.size() - 1);
+        HashMapIter it(to, 0, hash_size);
+        it.set_index(starting_index);
+        while (it.is_valid() && !it.empty()) {
+            ++it;
+        }
+        if (!it.is_valid()) {
+            // abort rehashing, we need a larger to-space
+            return false;
+        }
+        REALM_ASSERT(it.empty());
+        it.set(entry);
+    }
+    return true;
+}
+
+// Add a binding from hash value to id.
+static void add_to_hash_map(Array& node, uint64_t hash, uint64_t id, uint8_t hash_size)
+{
+    REALM_ASSERT(node.is_attached());
+    if (!node.has_refs()) {
+        // it's a leaf.
+        if (node.size() < linear_search_limit) {
+            // it's a list with room to grow
+            node.add(((uint64_t)id << hash_size) | hash);
+            return;
+        }
+        if (node.size() == linear_search_limit) {
+            // it's a full list, must be converted to a hash table
+            Array new_node(node.get_alloc());
+            new_node.create(NodeHeader::type_Normal, false, hash_node_min_size, 0);
+            new_node.set_parent(node.get_parent(), node.get_ndx_in_parent());
+            new_node.update_parent();
+            // transform existing list into hash table
+            rehash(node, new_node, hash_size);
+            node.destroy();
+            node.init_from_parent();
+        }
+        // it's a hash table. Grow if needed up till 'hash_node_max_size' entries
+        while (node.size() < hash_node_max_size) {
+            auto size = node.size();
+            size_t start_index = hash & (size - 1);
+            HashMapIter it(node, 0, hash_size);
+            it.set_index(start_index);
+            while (it.is_valid() && !it.empty()) {
+                ++it;
+            }
+            if (it.is_valid()) {
+                // found an empty spot within search range
+                it.set(((uint64_t)id << hash_size) | hash);
+                return;
+            }
+            if (node.size() >= hash_node_max_size)
+                break;
+            // No free spot found - rehash into bigger and bigger tables
+            auto new_size = node.size();
+            bool need_to_rehash = true;
+            Array new_node(node.get_alloc());
+            while (need_to_rehash && new_size < hash_node_max_size) {
+                new_size *= 2;
+                new_node.create(NodeHeader::type_Normal, false, new_size, 0);
+                need_to_rehash = !rehash(node, new_node, hash_size);
+                if (need_to_rehash) { // we failed, try again - or shift to radix
+                    // I find it counter-intuitive. But it CAN happen.
+                    new_node.destroy();
+                }
+            }
+            if (need_to_rehash)
+                break;
+            new_node.set_parent(node.get_parent(), node.get_ndx_in_parent());
+            new_node.update_parent();
+            node.destroy();
+            node.init_from_parent();
+        }
+        // we ran out of space. Rewrite as a radix node with subtrees
+        Array new_node(node.get_alloc());
+        new_node.create(NodeHeader::type_HasRefs, false, radix_node_size, 0);
+        new_node.set_parent(node.get_parent(), node.get_ndx_in_parent());
+        new_node.update_parent();
+        for (size_t index = 0; index < node.size(); ++index) {
+            auto element = node.get(index);
+            auto hash = element & (0xFFFFFFFF >> (32 - hash_size));
+            auto string_id = element >> hash_size;
+            if (string_id == 0)
+                continue;
+            auto remaining_hash = hash >> radix_node_consumes_bits;
+            add_to_hash_map(new_node, remaining_hash, string_id, hash_size - 8);
+        }
+        node.destroy();
+        node.init_from_parent();
+    }
+    // We have a radix node and need to insert the new binding into the proper subtree
+    size_t index = hash & (radix_node_size - 1);
+    auto rot = node.get_as_ref_or_tagged(index);
+    REALM_ASSERT(!rot.is_tagged());
+    Array subtree(node.get_alloc());
+    if (rot.get_as_ref() == 0) {
+        // no subtree present, create an empty one
+        subtree.set_parent(&node, index);
+        subtree.create(NodeHeader::type_Normal);
+        subtree.update_parent();
+    }
+    else {
+        // subtree already present
+        subtree.set_parent(&node, index);
+        subtree.init_from_parent();
+    }
+    // recurse into subtree
+    add_to_hash_map(subtree, hash >> radix_node_consumes_bits, id, hash_size - radix_node_consumes_bits);
+}
+
+static std::vector<uint32_t> hash_to_id(Array& node, uint32_t hash, uint8_t hash_size)
+{
+    std::vector<uint32_t> result;
+    REALM_ASSERT(node.is_attached());
+    if (!node.has_refs()) {
+        // it's a leaf - default is a list, search starts from index 0.
+        HashMapIter it(node, hash, hash_size);
+        if (node.size() > hash_node_min_size) {
+            // it is a hash table, so use hash to select index to start searching
+            // table size must be power of two!
+            size_t index = hash & (node.size() - 1);
+            it.set_index(index);
+        }
+        // collect all matching values within allowed range
+        while (it.is_valid()) {
+            if (it.matches()) {
+                result.push_back(it.get());
+            }
+            ++it;
+        }
+        return result;
+    }
+    else {
+        // it's a radix node
+        size_t index = hash & (node.size() - 1);
+        auto rot = node.get_as_ref_or_tagged(index);
+        REALM_ASSERT(rot.is_ref());
+        if (rot.get_as_ref() == 0) {
+            // no subtree, return empty vector
+            return result;
+        }
+        // descend into subtree
+        Array subtree(node.get_alloc());
+        subtree.set_parent(&node, index);
+        subtree.init_from_parent();
+        return hash_to_id(subtree, hash >> radix_node_consumes_bits, hash_size - radix_node_consumes_bits);
+    }
+}
+
+
+enum positions { Pos_Version, Pos_ColKey, Pos_Size, Pos_Compressor, Pos_Data, Pos_Map, Top_Size };
+struct StringInterner::DataLeaf {
+    std::vector<CompressedStringView> m_compressed;
+    ref_type m_leaf_ref = 0;
+    bool m_is_loaded = false;
+    DataLeaf() {}
+    DataLeaf(ref_type ref)
+        : m_leaf_ref(ref)
+    {
+    }
+};
+
+StringInterner::StringInterner(Allocator& alloc, Array& parent, ColKey col_key, bool writable)
+    : m_parent(parent)
+{
+    REALM_ASSERT_DEBUG(col_key != ColKey());
+    size_t index = col_key.get_index().val;
+    // ensure that m_top and m_data is well defined and reflect any existing data
+    // We'll have to extend this to handle no defined backing
+    m_top = std::make_unique<Array>(alloc);
+    m_top->set_parent(&parent, index);
+    m_data = std::make_unique<Array>(alloc);
+    m_data->set_parent(m_top.get(), Pos_Data);
+    m_hash_map = std::make_unique<Array>(alloc);
+    m_hash_map->set_parent(m_top.get(), Pos_Map);
+    m_current_string_leaf = std::make_unique<ArrayUnsigned>(alloc);
+    m_col_key = col_key;
+    update_from_parent(writable);
+}
+
+void StringInterner::update_from_parent(bool writable)
+{
+    auto parent_idx = m_top->get_ndx_in_parent();
+    bool valid_top_ref_spot = m_parent.is_attached() && parent_idx < m_parent.size();
+    bool valid_top = valid_top_ref_spot && m_parent.get_as_ref(parent_idx);
+    if (valid_top) {
+        m_top->update_from_parent();
+        m_data->update_from_parent();
+        m_hash_map->update_from_parent();
+    }
+    else if (writable && valid_top_ref_spot) {
+        m_top->create(NodeHeader::type_HasRefs, false, Top_Size, 0);
+        m_top->set(Pos_Version, (1 << 1) + 1); // version number 1.
+        m_top->set(Pos_Size, (0 << 1) + 1);    // total size 0
+        m_top->set(Pos_ColKey, (m_col_key.value << 1) + 1);
+        m_top->set(Pos_Compressor, 0);
+        // create first level of data tree here (to simplify other stuff)
+        m_data = std::make_unique<Array>(m_parent.get_alloc());
+        m_data->set_parent(m_top.get(), Pos_Data);
+        m_data->create(NodeHeader::type_HasRefs, false, 0);
+        m_data->update_parent();
+        m_hash_map = std::make_unique<Array>(m_parent.get_alloc());
+        m_hash_map->set_parent(m_top.get(), Pos_Map);
+        m_hash_map->create(NodeHeader::type_Normal);
+        m_hash_map->update_parent();
+        m_top->update_parent();
+        valid_top = true;
+    }
+    if (!valid_top) {
+        // We're lacking part of underlying data and not allowed to create it, so enter "dead" mode
+        m_compressor.reset();
+        m_compressed_leafs.clear();
+        // m_compressed_string_map.clear();
+        m_top->detach(); // <-- indicates "dead" mode
+        m_data->detach();
+        m_hash_map->detach();
+        m_compressor.reset();
+        return;
+    }
+    // validate we're accessing data for the correct column. A combination of column erase
+    // and insert could lead to an interner being paired with wrong data in the file.
+    // If so, we clear internal data forcing rebuild_internal() to rebuild from scratch.
+    int64_t data_colkey = m_top->get_as_ref_or_tagged(Pos_ColKey).get_as_int();
+    if (m_col_key.value != data_colkey) {
+        // new column, new data
+        m_compressor.reset();
+        m_decompressed_strings.clear();
+    }
+    if (!m_compressor)
+        m_compressor = std::make_unique<StringCompressor>(m_top->get_alloc(), *m_top, Pos_Compressor, writable);
+    else
+        m_compressor->refresh(writable);
+    if (m_data->size()) {
+        auto ref_to_write_buffer = m_data->get_as_ref(m_data->size() - 1);
+        const char* header = m_top->get_alloc().translate(ref_to_write_buffer);
+        bool is_array_of_cprs = NodeHeader::get_hasrefs_from_header(header);
+        if (is_array_of_cprs) {
+            m_current_long_string_node = std::make_unique<Array>(m_top->get_alloc());
+            m_current_long_string_node->set_parent(m_data.get(), m_data->size() - 1);
+            m_current_long_string_node->update_from_parent();
+        }
+        else {
+            m_current_long_string_node.reset();
+        }
+    }
+    else
+        m_current_long_string_node.reset(); // just in case...
+
+    // rebuild internal structures......
+    rebuild_internal();
+    m_current_string_leaf->detach();
+}
+
+void StringInterner::rebuild_internal()
+{
+    std::lock_guard lock(m_mutex);
+    // release old decompressed strings
+    for (size_t idx = 0; idx < m_in_memory_strings.size(); ++idx) {
+        StringID id = m_in_memory_strings[idx];
+        if (id > m_decompressed_strings.size()) {
+            m_in_memory_strings[idx] = m_in_memory_strings.back();
+            m_in_memory_strings.pop_back();
+            continue;
+        }
+        if (auto& w = m_decompressed_strings[id - 1].m_weight) {
+            w >>= 1;
+        }
+        else {
+            m_decompressed_strings[id - 1].m_decompressed.reset();
+            m_in_memory_strings[idx] = m_in_memory_strings.back();
+            m_in_memory_strings.pop_back();
+            continue;
+        }
+    }
+
+    size_t target_size = (size_t)m_top->get_as_ref_or_tagged(Pos_Size).get_as_int();
+    m_decompressed_strings.resize(target_size);
+    if (m_data->size() != m_compressed_leafs.size()) {
+        m_compressed_leafs.resize(m_data->size());
+    }
+    // allways force new setup of all leafs:
+    // update m_compressed_leafs to reflect m_data
+    for (size_t idx = 0; idx < m_compressed_leafs.size(); ++idx) {
+        auto ref = m_data->get_as_ref(idx);
+        auto& leaf_meta = m_compressed_leafs[idx];
+        // if (ref != leaf_meta.m_leaf_ref) {
+        leaf_meta.m_is_loaded = false;
+        leaf_meta.m_compressed.clear();
+        leaf_meta.m_leaf_ref = ref;
+        //}
+    }
+}
+
+StringInterner::~StringInterner() {}
+
+StringID StringInterner::intern(StringData sd)
+{
+    REALM_ASSERT(m_top->is_attached());
+    std::lock_guard lock(m_mutex);
+    // special case for null string
+    if (sd.data() == nullptr)
+        return 0;
+    uint32_t h = (uint32_t)sd.hash();
+    auto candidates = hash_to_id(*m_hash_map.get(), h, 32);
+    for (auto& candidate : candidates) {
+        auto candidate_cpr = get_compressed(candidate);
+        if (m_compressor->compare(sd, candidate_cpr) == 0)
+            return candidate;
+    }
+    // it's a new string
+    bool learn = true;
+    auto c_str = m_compressor->compress(sd, learn);
+    m_decompressed_strings.push_back({64, std::make_unique<std::string>(sd)});
+    auto id = m_decompressed_strings.size();
+    m_in_memory_strings.push_back(id);
+    add_to_hash_map(*m_hash_map.get(), h, id, 32);
+    size_t index = (size_t)m_top->get_as_ref_or_tagged(Pos_Size).get_as_int();
+    REALM_ASSERT_DEBUG(index == id - 1);
+    bool need_long_string_node = c_str.size() >= 65536;
+
+    // TODO: update_internal must set up m_current_long_string_node if it is in use
+
+    if (need_long_string_node && !m_current_long_string_node) {
+        if ((index & 0xFF) == 0) {
+            // if we're starting on a new leaf, extend parent array for it
+            m_data->add(0);
+            m_compressed_leafs.push_back({});
+            m_current_long_string_node = std::make_unique<Array>(m_top->get_alloc());
+            m_current_long_string_node->set_parent(m_data.get(), m_data->size() - 1);
+            m_current_long_string_node->create(NodeHeader::type_HasRefs);
+            m_current_long_string_node->update_parent();
+            REALM_ASSERT_DEBUG(!m_current_string_leaf->is_attached() || m_current_string_leaf->size() == 0);
+            m_current_string_leaf->detach();
+        }
+        else {
+            // we have been building an existing leaf and need to shift representation.
+            // but first we need to update leaf accessor for existing leaf
+            if (m_current_string_leaf->is_attached()) {
+                m_current_string_leaf->update_from_parent();
+            }
+            else {
+                m_current_string_leaf->init_from_ref(m_current_string_leaf->get_ref_from_parent());
+            }
+            REALM_ASSERT_DEBUG(m_current_string_leaf->size() > 0);
+            m_current_long_string_node = std::make_unique<Array>(m_top->get_alloc());
+            m_current_long_string_node->set_parent(m_data.get(), m_data->size() - 1);
+            m_current_long_string_node->create(NodeHeader::type_HasRefs);
+            m_current_long_string_node->update_parent();
+            // convert the current leaf into a long string node. (array of strings in separate arrays)
+            for (auto s : m_compressed_leafs.back().m_compressed) {
+                ArrayUnsigned arr(m_top->get_alloc());
+                arr.create(s.size, 65535);
+                unsigned short* dest = reinterpret_cast<unsigned short*>(arr.m_data);
+                std::copy_n(s.data, s.size, dest);
+                m_current_long_string_node->add(arr.get_ref());
+            }
+            m_current_string_leaf->destroy();
+            m_current_string_leaf->detach();
+            // force later reload of leaf
+            m_compressed_leafs.back().m_is_loaded = false;
+            // m_compressed_leafs.back().m_leaf_ref = m_data->get_as_ref(m_data->size() - 1);
+        }
+    }
+    if (m_current_long_string_node) {
+        ArrayUnsigned arr(m_top->get_alloc());
+        arr.create(c_str.size(), 65535);
+        unsigned short* begin = c_str.data();
+        if (begin) {
+            // if the compressed string is empty, 'begin' is zero and we don't copy
+            size_t n = c_str.size();
+            unsigned short* dest = reinterpret_cast<unsigned short*>(arr.m_data);
+            std::copy_n(begin, n, dest);
+        }
+        m_current_long_string_node->add(arr.get_ref());
+        m_current_long_string_node->update_parent();
+        if (m_current_long_string_node->size() == 256) {
+            // exit from  "long string mode"
+            m_current_long_string_node.reset();
+        }
+        CompressionSymbol* p_start = reinterpret_cast<CompressionSymbol*>(arr.m_data);
+        m_compressed_leafs.back().m_compressed.push_back({p_start, arr.size()});
+    }
+    else {
+        // Append to leaf with up to 256 entries.
+        // First create a new leaf if needed (limit number of entries to 256 pr leaf)
+        bool need_leaf_update = !m_current_string_leaf->is_attached() || (index & 0xFF) == 0;
+        if (need_leaf_update) {
+            m_current_string_leaf->set_parent(m_data.get(), index >> 8);
+            if ((index & 0xFF) == 0) {
+                // create new leaf
+                m_current_string_leaf->create(0, 65535);
+                m_data->add(m_current_string_leaf->get_ref());
+                m_compressed_leafs.push_back({});
+            }
+            else {
+                // just setup leaf accessor
+                if (m_current_string_leaf->is_attached()) {
+                    m_current_string_leaf->update_from_parent();
+                }
+                else {
+                    m_current_string_leaf->init_from_ref(m_current_string_leaf->get_ref_from_parent());
+                }
+            }
+        }
+        REALM_ASSERT(c_str.size() < 65535);
+        // Add compressed string at end of leaf
+        m_current_string_leaf->add(c_str.size());
+        for (auto c : c_str) {
+            m_current_string_leaf->add(c);
+        }
+        REALM_ASSERT_DEBUG(m_compressed_leafs.size());
+        CompressionSymbol* p = reinterpret_cast<CompressionSymbol*>(m_current_string_leaf->m_data);
+        auto p_limit = p + m_current_string_leaf->size();
+        auto p_start = p_limit - c_str.size();
+        m_compressed_leafs.back().m_compressed.push_back({p_start, c_str.size()});
+        REALM_ASSERT(m_compressed_leafs.back().m_compressed.size() <= 256);
+    }
+    m_top->adjust(Pos_Size, 2); // type is has_Refs, so increment is by 2
+    load_leaf_if_new_ref(m_compressed_leafs.back(), m_data->get_as_ref(m_data->size() - 1));
+#ifdef REALM_DEBUG
+    auto csv = get_compressed(id);
+    CompressedStringView csv2(c_str);
+    REALM_ASSERT(csv == csv2);
+#endif
+    return id;
+}
+
+bool StringInterner::load_leaf_if_needed(DataLeaf& leaf)
+{
+    if (!leaf.m_is_loaded) {
+        // start with an empty leaf:
+        leaf.m_compressed.clear();
+        leaf.m_compressed.reserve(256);
+
+        // must interpret leaf first - the leaf is either a single array holding all strings,
+        // or an array with each (compressed) string placed in its own array.
+        const char* header = m_top->get_alloc().translate(leaf.m_leaf_ref);
+        bool is_single_array = !NodeHeader::get_hasrefs_from_header(header);
+        if (is_single_array) {
+            size_t leaf_offset = 0;
+            ArrayUnsigned leaf_array(m_top->get_alloc());
+            leaf_array.init_from_ref(leaf.m_leaf_ref);
+            REALM_ASSERT(NodeHeader::get_encoding(leaf_array.get_header()) == NodeHeader::Encoding::WTypBits);
+            REALM_ASSERT(NodeHeader::get_width_from_header(leaf_array.get_header()) == 16);
+            // This is dangerous if the leaf is for some reason not in the assumed format
+            CompressionSymbol* c = reinterpret_cast<CompressionSymbol*>(leaf_array.m_data);
+            auto leaf_size = leaf_array.size();
+            while (leaf_offset < leaf_size) {
+                size_t length = c[leaf_offset];
+                REALM_ASSERT_DEBUG(length == leaf_array.get(leaf_offset));
+                leaf_offset++;
+                leaf.m_compressed.push_back({c + leaf_offset, length});
+                REALM_ASSERT_DEBUG(leaf.m_compressed.size() <= 256);
+                leaf_offset += length;
+            }
+        }
+        else {
+            // Not a single leaf - instead an array of strings
+            Array arr(m_top->get_alloc());
+            arr.init_from_ref(leaf.m_leaf_ref);
+            for (size_t idx = 0; idx < arr.size(); ++idx) {
+                ArrayUnsigned str_array(m_top->get_alloc());
+                ref_type ref = arr.get_as_ref(idx);
+                str_array.init_from_ref(ref);
+                REALM_ASSERT(NodeHeader::get_encoding(str_array.get_header()) == NodeHeader::Encoding::WTypBits);
+                REALM_ASSERT(NodeHeader::get_width_from_header(str_array.get_header()) == 16);
+                CompressionSymbol* c = reinterpret_cast<CompressionSymbol*>(str_array.m_data);
+                leaf.m_compressed.push_back({c, str_array.size()});
+            }
+        }
+        leaf.m_is_loaded = true;
+        return true;
+    }
+    return false;
+}
+
+// Danger: Only to be used if you know that a change in content ==> different ref
+bool StringInterner::load_leaf_if_new_ref(DataLeaf& leaf, ref_type new_ref)
+{
+    if (leaf.m_leaf_ref != new_ref) {
+        leaf.m_leaf_ref = new_ref;
+        leaf.m_is_loaded = false;
+        leaf.m_compressed.resize(0);
+    }
+    return load_leaf_if_needed(leaf);
+}
+
+CompressedStringView& StringInterner::get_compressed(StringID id)
+{
+    auto index = id - 1; // 0 represents null
+    auto hi = index >> 8;
+    auto lo = index & 0xFFUL;
+    DataLeaf& leaf = m_compressed_leafs[hi];
+    load_leaf_if_needed(leaf);
+    REALM_ASSERT_DEBUG(lo < leaf.m_compressed.size());
+    return leaf.m_compressed[lo];
+}
+
+std::optional<StringID> StringInterner::lookup(StringData sd)
+{
+    if (!m_top->is_attached()) {
+        // "dead" mode
+        return {};
+    }
+    std::lock_guard lock(m_mutex);
+    if (sd.data() == nullptr)
+        return 0;
+    uint32_t h = (uint32_t)sd.hash();
+    auto candidates = hash_to_id(*m_hash_map.get(), h, 32);
+    for (auto& candidate : candidates) {
+        auto candidate_cpr = get_compressed(candidate);
+        if (m_compressor->compare(sd, candidate_cpr) == 0)
+            return candidate;
+    }
+    return {};
+}
+
+int StringInterner::compare(StringID A, StringID B)
+{
+    std::lock_guard lock(m_mutex);
+    REALM_ASSERT_DEBUG(A < m_decompressed_strings.size());
+    REALM_ASSERT_DEBUG(B < m_decompressed_strings.size());
+    // comparisons against null
+    if (A == B && A == 0)
+        return 0;
+    if (A == 0)
+        return -1;
+    if (B == 0)
+        return 1;
+    // ok, no nulls.
+    REALM_ASSERT(m_compressor);
+    return m_compressor->compare(get_compressed(A), get_compressed(B));
+}
+
+int StringInterner::compare(StringData s, StringID A)
+{
+    std::lock_guard lock(m_mutex);
+    REALM_ASSERT_DEBUG(A < m_decompressed_strings.size());
+    // comparisons against null
+    if (s.data() == nullptr && A == 0)
+        return 0;
+    if (s.data() == nullptr)
+        return 1;
+    if (A == 0)
+        return -1;
+    // ok, no nulls
+    REALM_ASSERT(m_compressor);
+    return m_compressor->compare(s, get_compressed(A));
+}
+
+
+StringData StringInterner::get(StringID id)
+{
+    REALM_ASSERT(m_compressor);
+    std::lock_guard lock(m_mutex);
+    if (id == 0)
+        return StringData{nullptr};
+    REALM_ASSERT_DEBUG(id <= m_decompressed_strings.size());
+    CachedString& cs = m_decompressed_strings[id - 1];
+    if (cs.m_decompressed) {
+        std::string* ref_str = cs.m_decompressed.get();
+        if (cs.m_weight < 128)
+            cs.m_weight += 64;
+        return {ref_str->c_str(), ref_str->size()};
+    }
+    cs.m_weight = 64;
+    cs.m_decompressed = std::make_unique<std::string>(m_compressor->decompress(get_compressed(id)));
+    m_in_memory_strings.push_back(id);
+    return {cs.m_decompressed->c_str(), cs.m_decompressed->size()};
+}
+
+} // namespace realm
diff --git a/src/realm/string_interner.hpp b/src/realm/string_interner.hpp
new file mode 100644
index 00000000000..2a36c9e38dc
--- /dev/null
+++ b/src/realm/string_interner.hpp
@@ -0,0 +1,96 @@
+/*************************************************************************
+ *
+ * Copyright 2016 Realm Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ **************************************************************************/
+
+#ifndef REALM_STRING_INTERNER_HPP
+#define REALM_STRING_INTERNER_HPP
+
+#include <realm/utilities.hpp>
+#include <realm/string_compressor.hpp>
+#include <realm/keys.hpp>
+#include <realm/alloc.hpp>
+
+#include <unordered_map>
+#include <vector>
+#include <mutex>
+
+
+namespace realm {
+
+
+using StringID = size_t;
+
+class Array;
+class ArrayUnsigned;
+class Allocator;
+struct CachedString {
+    uint8_t m_weight = 0;
+    std::unique_ptr<std::string> m_decompressed;
+};
+
+class StringInterner {
+public:
+    // To be used exclusively from Table
+    StringInterner(Allocator& alloc, Array& parent, ColKey col_key, bool writable);
+    void update_from_parent(bool writable);
+    ~StringInterner();
+
+    // To be used from Obj and for searching
+    StringID intern(StringData);
+    std::optional<StringID> lookup(StringData);
+    int compare(StringID A, StringID B);
+    int compare(StringData, StringID A);
+    StringData get(StringID);
+
+private:
+    Array& m_parent; // need to be able to check if this is attached or not
+    std::unique_ptr<Array> m_top;
+    // Compressed strings are stored in blocks of 256.
+    // One array holds refs to all blocks:
+    std::unique_ptr<Array> m_data;
+    // In-memory representation of a block. Either only the ref to it,
+    // or a full vector of views into the block.
+    struct DataLeaf;
+    // in-memory metadata for faster access to compressed strings. Mirrors m_data.
+    std::vector<DataLeaf> m_compressed_leafs;
+    // 'm_hash_map' is used for mapping hash of uncompressed string to string id.
+    std::unique_ptr<Array> m_hash_map;
+    // the block of compressed strings we're currently appending to:
+    std::unique_ptr<ArrayUnsigned> m_current_string_leaf;
+    // an array of strings we're currently appending to. This is used instead
+    // when ever we meet a string too large to be placed inline.
+    std::unique_ptr<Array> m_current_long_string_node;
+    void rebuild_internal();
+    CompressedStringView& get_compressed(StringID id);
+    // return true if the leaf was reloaded
+    bool load_leaf_if_needed(DataLeaf& leaf);
+    // return 'true' if the new ref was different and forced a reload
+    bool load_leaf_if_new_ref(DataLeaf& leaf, ref_type new_ref);
+    ColKey m_col_key; // for validation
+    std::unique_ptr<StringCompressor> m_compressor;
+    // At the moment we need to keep decompressed strings around if they've been
+    // returned to the caller, since we're handing
+    // out StringData references to their storage. This is a temporary solution.
+    std::vector<CachedString> m_decompressed_strings;
+    std::vector<StringID> m_in_memory_strings;
+    // Mutual exclusion is needed for frozen transactions only. Live objects are
+    // only used in single threaded contexts so don't need them. For now, just use always.
+    std::mutex m_mutex;
+};
+} // namespace realm
+
+#endif
diff --git a/src/realm/table.cpp b/src/realm/table.cpp
index ad9435f45ca..977339ade0d 100644
--- a/src/realm/table.cpp
+++ b/src/realm/table.cpp
@@ -26,6 +26,7 @@
 #include <realm/array_string.hpp>
 #include <realm/array_timestamp.hpp>
 #include <realm/db.hpp>
+#include <realm/transaction.hpp>
 #include <realm/dictionary.hpp>
 #include <realm/exceptions.hpp>
 #include <realm/impl/destroy_guard.hpp>
@@ -356,6 +357,7 @@ Table::Table(Allocator& alloc)
     , m_index_refs(m_alloc)
     , m_opposite_table(m_alloc)
     , m_opposite_column(m_alloc)
+    , m_interner_data(m_alloc)
     , m_repl(&g_dummy_replication)
     , m_own_ref(this, alloc.get_instance_version())
 {
@@ -363,7 +365,7 @@ Table::Table(Allocator& alloc)
     m_index_refs.set_parent(&m_top, top_position_for_search_indexes);
     m_opposite_table.set_parent(&m_top, top_position_for_opposite_table);
     m_opposite_column.set_parent(&m_top, top_position_for_opposite_column);
-
+    m_interner_data.set_parent(&m_top, top_position_for_interners);
     ref_type ref = create_empty_table(m_alloc); // Throws
     ArrayParent* parent = nullptr;
     size_t ndx_in_parent = 0;
@@ -378,6 +380,7 @@ Table::Table(Replication* const* repl, Allocator& alloc)
     , m_index_refs(m_alloc)
     , m_opposite_table(m_alloc)
     , m_opposite_column(m_alloc)
+    , m_interner_data(m_alloc)
     , m_repl(repl)
     , m_own_ref(this, alloc.get_instance_version())
 {
@@ -385,6 +388,8 @@ Table::Table(Replication* const* repl, Allocator& alloc)
     m_index_refs.set_parent(&m_top, top_position_for_search_indexes);
     m_opposite_table.set_parent(&m_top, top_position_for_opposite_table);
     m_opposite_column.set_parent(&m_top, top_position_for_opposite_column);
+    m_opposite_column.set_parent(&m_top, top_position_for_opposite_column);
+    m_interner_data.set_parent(&m_top, top_position_for_interners);
     m_cookie = cookie_created;
 }
 
@@ -535,6 +540,9 @@ void Table::remove_column(ColKey col_key)
 
     erase_root_column(col_key); // Throws
     m_has_any_embedded_objects.reset();
+    auto i = col_key.get_index().val;
+    if (i < m_string_interners.size() && m_string_interners[i])
+        m_string_interners[i].reset();
 }
 
 
@@ -653,6 +661,14 @@ void Table::init(ref_type top_ref, ArrayParent* parent, size_t ndx_in_parent, bo
     else {
         m_tombstones = nullptr;
     }
+    if (m_top.size() > top_position_for_interners && m_top.get_as_ref(top_position_for_interners)) {
+        // Interner data exist
+        m_interner_data.init_from_parent();
+    }
+    else {
+        REALM_ASSERT_DEBUG(!m_interner_data.is_attached());
+    }
+    refresh_string_interners(is_writable);
     m_cookie = cookie_initialized;
 }
 
@@ -1054,7 +1070,19 @@ ColKey Table::do_insert_root_column(ColKey col_key, ColumnType type, StringData
     if (m_tombstones) {
         m_tombstones->insert_column(col_key);
     }
-
+    // create string interners internal rep as well as data area
+    REALM_ASSERT_DEBUG(m_interner_data.is_attached());
+    while (col_ndx >= m_string_interners.size()) {
+        m_string_interners.push_back({});
+    }
+    while (col_ndx >= m_interner_data.size()) {
+        m_interner_data.add(0);
+    }
+    REALM_ASSERT(!m_string_interners[col_ndx]);
+    // FIXME: Limit creation of interners to EXACTLY the columns, where they can be
+    // relevant.
+    // if (col_key.get_type() == col_type_String)
+    m_string_interners[col_ndx] = std::make_unique<StringInterner>(m_alloc, m_interner_data, col_key, true);
     bump_storage_version();
 
     return col_key;
@@ -1086,6 +1114,17 @@ void Table::do_erase_root_column(ColKey col_key)
         REALM_ASSERT(m_index_accessors.back() == nullptr);
         m_index_accessors.pop_back();
     }
+    REALM_ASSERT_DEBUG(col_ndx < m_string_interners.size());
+    if (m_string_interners[col_ndx]) {
+        REALM_ASSERT_DEBUG(m_interner_data.is_attached());
+        REALM_ASSERT_DEBUG(col_ndx < m_interner_data.size());
+        auto data_ref = m_interner_data.get_as_ref(col_ndx);
+        if (data_ref)
+            Array::destroy_deep(data_ref, m_alloc);
+        m_interner_data.set(col_ndx, 0);
+        // m_string_interners[col_ndx]->update_from_parent(true);
+        m_string_interners[col_ndx].reset();
+    }
     bump_content_version();
     bump_storage_version();
 }
@@ -1239,6 +1278,9 @@ void Table::detach(LifeCycleCookie cookie) noexcept
 {
     m_cookie = cookie;
     m_alloc.bump_instance_version();
+    // release string interners
+    m_string_interners.clear();
+    m_interner_data.detach();
 }
 
 void Table::fully_detach() noexcept
@@ -1249,6 +1291,7 @@ void Table::fully_detach() noexcept
     m_opposite_table.detach();
     m_opposite_column.detach();
     m_index_accessors.clear();
+    m_string_interners.clear();
 }
 
 
@@ -1465,6 +1508,7 @@ ref_type Table::create_empty_table(Allocator& alloc, TableKey key)
     top.add(0); // pk col key
     top.add(0); // flags
     top.add(0); // tombstones
+    top.add(0); // string interners
 
     REALM_ASSERT(top.size() == top_array_size);
 
@@ -1976,6 +2020,13 @@ void Table::update_from_parent() noexcept
 
         refresh_content_version();
         m_has_any_embedded_objects.reset();
+        if (m_top.size() > top_position_for_interners) {
+            if (m_top.get_as_ref(top_position_for_interners))
+                m_interner_data.update_from_parent();
+            else
+                m_interner_data.detach();
+        }
+        refresh_string_interners(false);
     }
     m_alloc.bump_storage_version();
 }
@@ -2104,7 +2155,7 @@ void Table::refresh_content_version()
 
 // Called when Group is moved to another version - either a rollback or an advance.
 // The content of the table is potentially different, so make no assumptions.
-void Table::refresh_accessor_tree()
+void Table::refresh_accessor_tree(bool writable)
 {
     REALM_ASSERT(m_cookie == cookie_initialized);
     REALM_ASSERT(m_top.is_attached());
@@ -2134,12 +2185,78 @@ void Table::refresh_accessor_tree()
     else {
         m_tombstones = nullptr;
     }
+    if (writable) {
+        while (m_top.size() < top_position_for_interners)
+            m_top.add(0);
+    }
+    if (m_top.size() > top_position_for_interners) {
+        if (m_top.get_as_ref(top_position_for_interners))
+            m_interner_data.init_from_parent();
+        else
+            m_interner_data.detach();
+    }
     refresh_content_version();
     bump_storage_version();
     build_column_mapping();
+    refresh_string_interners(writable);
     refresh_index_accessors();
 }
 
+void Table::refresh_string_interners(bool writable)
+{
+    if (writable) {
+        // if we're in a write transaction, make sure interner arrays are created which will allow
+        // string interners to expand with their own data when "learning"
+        while (m_top.size() <= top_position_for_interners) {
+            m_top.add(0);
+        }
+    }
+    if (m_top.size() > top_position_for_interners && m_top.get_as_ref(top_position_for_interners))
+        m_interner_data.update_from_parent();
+    else
+        m_interner_data.detach();
+    if (writable) {
+        if (!m_interner_data.is_attached()) {
+            m_interner_data.create(NodeHeader::type_HasRefs);
+            m_interner_data.update_parent();
+        }
+    }
+    // bring string interners in line with underlying data.
+    // Precondition: we rely on the col keys in m_leaf_ndx2colkey[] being up to date.
+    for (size_t idx = 0; idx < m_leaf_ndx2colkey.size(); ++idx) {
+        auto col_key = m_leaf_ndx2colkey[idx];
+        if (col_key == ColKey()) {
+            // deleted column, we really don't want a string interner for this
+            if (idx < m_string_interners.size() && m_string_interners[idx])
+                m_string_interners[idx].reset();
+            continue;
+        }
+        REALM_ASSERT_DEBUG(col_key.get_index().val == idx);
+        // maintain sufficient size of interner arrays to cover all columns
+        while (idx >= m_string_interners.size()) {
+            m_string_interners.push_back({});
+        }
+        while (writable && idx >= m_interner_data.size()) { // m_interner_data.is_attached() per above
+            m_interner_data.add(0);
+        }
+        if (m_string_interners[idx]) {
+            // existing interner
+            m_string_interners[idx]->update_from_parent(writable);
+        }
+        else {
+            // new interner. Note: if not in a writable state, the interner will not have a valid
+            // underlying data array. The interner will be set in a state, where it cannot "learn",
+            // and searches will not find any matching interned strings.
+            m_string_interners[idx] = std::make_unique<StringInterner>(m_alloc, m_interner_data, col_key, writable);
+        }
+    }
+    if (m_string_interners.size() > m_leaf_ndx2colkey.size()) {
+        // remove any string interners which are no longer reachable,
+        // e.g. after a rollback
+        m_string_interners.resize(m_leaf_ndx2colkey.size());
+    }
+}
+
 void Table::refresh_index_accessors()
 {
     // Refresh search index accessors
@@ -3407,3 +3524,12 @@ void Table::typed_print(std::string prefix, ref_type ref) const
     }
     std::cout << prefix << "}" << std::endl;
 }
+
+StringInterner* Table::get_string_interner(ColKey col_key) const
+{
+    auto idx = col_key.get_index().val;
+    REALM_ASSERT(idx < m_string_interners.size());
+    auto interner = m_string_interners[idx].get();
+    REALM_ASSERT(interner);
+    return interner;
+}
diff --git a/src/realm/table.hpp b/src/realm/table.hpp
index 0830d7c733f..1f02e0540ac 100644
--- a/src/realm/table.hpp
+++ b/src/realm/table.hpp
@@ -573,7 +573,7 @@ class Table {
     ColKey::Idx spec_ndx2leaf_ndx(size_t idx) const;
     ColKey leaf_ndx2colkey(ColKey::Idx idx) const;
     ColKey spec_ndx2colkey(size_t ndx) const;
-
+    StringInterner* get_string_interner(ColKey col_key) const;
     // Queries
     // Using where(tv) is the new method to perform queries on TableView. The 'tv' can have any order; it does not
     // need to be sorted, and, resulting view retains its order.
@@ -737,6 +737,7 @@ class Table {
     Array m_index_refs;                        // 5th slot in m_top
     Array m_opposite_table;                    // 7th slot in m_top
     Array m_opposite_column;                   // 8th slot in m_top
+    Array m_interner_data;                     // 14th slot in m_top
     std::vector<std::unique_ptr<SearchIndex>> m_index_accessors;
     ColKey m_primary_key_col;
     Replication* const* m_repl;
@@ -848,8 +849,9 @@ class Table {
 
     /// Refresh the part of the accessor tree that is rooted at this
     /// table.
-    void refresh_accessor_tree();
+    void refresh_accessor_tree(bool writable);
     void refresh_index_accessors();
+    void refresh_string_interners(bool writable);
     void refresh_content_version();
     void flush_for_commit();
 
@@ -861,6 +863,7 @@ class Table {
     std::vector<ColKey> m_leaf_ndx2colkey;
     std::vector<ColKey::Idx> m_spec_ndx2leaf_ndx;
     std::vector<size_t> m_leaf_ndx2spec_ndx;
+    mutable std::vector<std::unique_ptr<StringInterner>> m_string_interners;
     Type m_table_type = Type::TopLevel;
     uint64_t m_in_file_version_at_transaction_boundary = 0;
     AtomicLifeCycleCookie m_cookie;
@@ -880,7 +883,8 @@ class Table {
     static constexpr int top_position_for_flags = 12;
     // flags contents: bit 0-1 - table type
     static constexpr int top_position_for_tombstones = 13;
-    static constexpr int top_array_size = 14;
+    static constexpr int top_position_for_interners = 14;
+    static constexpr int top_array_size = 15;
 
     enum { s_collision_map_lo = 0, s_collision_map_hi = 1, s_collision_map_local_id = 2, s_collision_map_num_slots };
 
@@ -1413,6 +1417,11 @@ class _impl::TableFriend {
         return table.m_spec;
     }
 
+    static StringInterner* get_string_interner(const Table& table, ColKey col_key)
+    {
+        return table.get_string_interner(col_key);
+    }
+
     static TableRef get_opposite_link_table(const Table& table, ColKey col_key);
 
     static Group* get_parent_group(const Table& table) noexcept
diff --git a/src/realm/transaction.hpp b/src/realm/transaction.hpp
index 4da316c0d2e..e4db3c8a586 100644
--- a/src/realm/transaction.hpp
+++ b/src/realm/transaction.hpp
@@ -217,6 +217,7 @@ class Transaction : public Group {
 
     friend class DB;
     friend class DisableReplication;
+    friend class Table;
 };
 
 /*
diff --git a/src/realm/utilities.hpp b/src/realm/utilities.hpp
index 2125fe2c2fa..badc4d772b6 100644
--- a/src/realm/utilities.hpp
+++ b/src/realm/utilities.hpp
@@ -69,8 +69,8 @@ typedef SSIZE_T ssize_t;
 
 
 #if defined(REALM_PTR_64) && defined(REALM_X86_OR_X64) && !REALM_WATCHOS
-#define REALM_COMPILER_SSE // Compiler supports SSE 4.2 through __builtin_ accessors or back-end assembler
-#define REALM_COMPILER_AVX
+// #define REALM_COMPILER_SSE // Compiler supports SSE 4.2 through __builtin_ accessors or back-end assembler
+// #define REALM_COMPILER_AVX
 #endif
 
 namespace realm {
diff --git a/test/test_shared.cpp b/test/test_shared.cpp
index 261a0cc70fc..5fe1f91a194 100644
--- a/test/test_shared.cpp
+++ b/test/test_shared.cpp
@@ -2288,6 +2288,89 @@ TEST(Shared_EncryptionPageReadFailure)
 
 #endif // REALM_ENABLE_ENCRYPTION
 
+TEST(Shared_MaxStrings)
+{
+    SHARED_GROUP_TEST_PATH(path);
+    DBRef sg = get_test_db(path);
+    auto trans = sg->start_write();
+    auto t = trans->add_table("MyTable");
+    ColKey ck = t->add_column(type_String, "MyStrings");
+    std::string str_a(16 * 1024 * 1024 - 257, 'a');
+    std::string str_b(16 * 1024 * 1024 - 257, 'b');
+    // make it harder to compress:
+    for (auto& e : str_a) {
+        e = std::rand() % 256;
+    }
+    for (auto& e : str_b) {
+        e = std::rand() % 256;
+    }
+    auto o = t->create_object();
+    o.set(ck, str_a);
+    trans->commit_and_continue_as_read();
+    auto v = o.get<StringData>(ck);
+    CHECK_EQUAL(str_a, v);
+    trans->promote_to_write();
+    auto o2 = t->create_object();
+    o2.set(ck, str_b);
+    trans->commit_and_continue_as_read();
+    v = o.get<StringData>(ck);
+    auto v2 = o2.get<StringData>(ck);
+    CHECK_EQUAL(v, str_a);
+    CHECK_EQUAL(v2, str_b);
+    trans->close();
+    sg.reset();
+}
+
+TEST(Shared_RandomMaxStrings)
+{
+
+    SHARED_GROUP_TEST_PATH(path);
+    DBRef sg = get_test_db(path);
+    auto trans = sg->start_write();
+    auto t = trans->add_table("MyTable");
+    ColKey ck = t->add_column(type_String, "MyStrings");
+    trans->commit_and_continue_as_read();
+    for (int run = 0; run < 10; ++run) {
+        trans->promote_to_write();
+        size_t str_length = std::rand() % (16 * 1024 * 1024 - 257);
+        std::string str(str_length, 'X');
+        for (auto& e : str) {
+            e = std::rand() % 256;
+        }
+        auto o = t->create_object();
+        o.set(ck, str);
+        trans->commit_and_continue_as_read();
+    }
+    trans->close();
+}
+
+TEST(Shared_RandomSmallStrings)
+{
+
+    SHARED_GROUP_TEST_PATH(path);
+    DBRef sg = get_test_db(path);
+    // std::cout << "Writing " << path << std::endl;
+    auto trans = sg->start_write();
+    auto t = trans->add_table("MyTable");
+    ColKey ck = t->add_column(type_String, "MyStrings");
+    trans->commit_and_continue_as_read();
+    std::string str(500, 'X');
+    // insert a million objects with at most 4000 different strings
+    for (int run = 0; run < 100; ++run) {
+        trans->promote_to_write();
+        for (int i = 0; i < 1000; ++i) {
+            // size_t str_length = std::rand() % (1 + 500);
+            // std::string str(str_length, 'X');
+            size_t offset = std::rand() % str.size();
+            str[offset] = 'a' + (std::rand() & 0x7);
+            auto o = t->create_object();
+            o.set(ck, str);
+        }
+        trans->commit_and_continue_as_read();
+    }
+    trans->close();
+}
+
 TEST(Shared_VersionCount)
 {
     SHARED_GROUP_TEST_PATH(path);
@@ -2468,6 +2551,7 @@ TEST(Shared_MovingSearchIndex)
     // Remove the padding column to shift the indexed columns
     {
         WriteTransaction wt(sg);
+        wt.get_group().verify();
         TableRef table = wt.get_table("foo");
 
         CHECK(table->has_search_index(int_col));
diff --git a/test/test_unresolved_links.cpp b/test/test_unresolved_links.cpp
index 60f50ee3488..b47c68fa313 100644
--- a/test/test_unresolved_links.cpp
+++ b/test/test_unresolved_links.cpp
@@ -870,6 +870,7 @@ TEST(Unresolved_PerformanceLinkList)
     tr->commit_and_continue_as_read();
     CHECK(t2 > t1);
     tr->promote_to_write();
+    // fails in compressed format because of unsigned/signed interpretation.
     tr->verify();
 }
 
diff --git a/test/test_upgrade_database.cpp b/test/test_upgrade_database.cpp
index ae95d1a02da..04bf2e533b4 100644
--- a/test/test_upgrade_database.cpp
+++ b/test/test_upgrade_database.cpp
@@ -166,6 +166,7 @@ TEST(Upgrade_Disabled)
 
 TEST(Upgrade_DatabaseWithUnsupportedOldFileFormat)
 {
+    // Not core 6, thus kind is not set. And assetion is triggered.
     std::string path = test_util::get_test_resource_path() + "test_upgrade_database_1000_1.realm";
     CHECK_OR_RETURN(File::exists(path));