From 7aa972def9f53fd9ed2f708aa87ce83362765cbf Mon Sep 17 00:00:00 2001 From: Xuehan Xu Date: Mon, 18 Mar 2024 10:54:24 +0800 Subject: [PATCH 1/3] crimson/os/seastore/btree: check for reserved ptrs when determining children stability Fixes: https://tracker.ceph.com/issues/64957 Signed-off-by: Xuehan Xu --- src/crimson/os/seastore/btree/fixed_kv_btree.h | 8 ++++++-- src/crimson/os/seastore/btree/fixed_kv_node.cc | 6 +++++- src/crimson/os/seastore/btree/fixed_kv_node.h | 11 ++++++++--- src/crimson/os/seastore/cached_extent.h | 2 ++ 4 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/crimson/os/seastore/btree/fixed_kv_btree.h b/src/crimson/os/seastore/btree/fixed_kv_btree.h index 2d758ef4be2d8..f15682621fb77 100644 --- a/src/crimson/os/seastore/btree/fixed_kv_btree.h +++ b/src/crimson/os/seastore/btree/fixed_kv_btree.h @@ -15,8 +15,6 @@ #include "crimson/os/seastore/btree/btree_range_pin.h" #include "crimson/os/seastore/root_block.h" -#define RESERVATION_PTR reinterpret_cast(0x1) - namespace crimson::os::seastore::lba_manager::btree { struct lba_map_val_t; } @@ -25,6 +23,12 @@ namespace crimson::os::seastore { bool is_valid_child_ptr(ChildableCachedExtent* child); +bool is_reserved_ptr(ChildableCachedExtent* child); + +inline ChildableCachedExtent* get_reserved_ptr() { + return (ChildableCachedExtent*)0x1; +} + template phy_tree_root_t& get_phy_tree_root(root_t& r); diff --git a/src/crimson/os/seastore/btree/fixed_kv_node.cc b/src/crimson/os/seastore/btree/fixed_kv_node.cc index 00aceab92b382..94783a0109105 100644 --- a/src/crimson/os/seastore/btree/fixed_kv_node.cc +++ b/src/crimson/os/seastore/btree/fixed_kv_node.cc @@ -6,7 +6,11 @@ namespace crimson::os::seastore { bool is_valid_child_ptr(ChildableCachedExtent* child) { - return child != nullptr && child != RESERVATION_PTR; + return child != nullptr && child != get_reserved_ptr(); +} + +bool is_reserved_ptr(ChildableCachedExtent* child) { + return child == get_reserved_ptr(); } } // namespace crimson::os::seastore diff --git a/src/crimson/os/seastore/btree/fixed_kv_node.h b/src/crimson/os/seastore/btree/fixed_kv_node.h index 544727f30aac8..de3569c0c5bcf 100644 --- a/src/crimson/os/seastore/btree/fixed_kv_node.h +++ b/src/crimson/os/seastore/btree/fixed_kv_node.h @@ -134,7 +134,7 @@ struct FixedKVNode : ChildableCachedExtent { // copy sources when committing this lba node, because // we rely on pointers' "nullness" to avoid copying // pointers for updated values - children[offset] = RESERVATION_PTR; + children[offset] = get_reserved_ptr(); } } @@ -431,7 +431,7 @@ struct FixedKVNode : ChildableCachedExtent { if (!child) { child = source.children[foreign_it.get_offset()]; // child can be either valid if present, nullptr if absent, - // or RESERVATION_PTR. + // or reserved ptr. } foreign_it++; local_it++; @@ -972,6 +972,7 @@ struct FixedKVLeafNode get_child_ret_t get_logical_child(op_context_t c, uint16_t pos) final { auto child = this->children[pos]; + ceph_assert(!is_reserved_ptr(child)); if (is_valid_child_ptr(child)) { ceph_assert(child->is_logical()); return c.cache.template get_extent_viewable_by_trans< @@ -996,9 +997,13 @@ struct FixedKVLeafNode // children are considered stable if any of the following case is true: // 1. The child extent is absent in cache // 2. The child extent is stable + // + // For reserved mappings, the return values are undefined. bool is_child_stable(uint16_t pos) const final { auto child = this->children[pos]; - if (is_valid_child_ptr(child)) { + if (is_reserved_ptr(child)) { + return true; + } else if (is_valid_child_ptr(child)) { ceph_assert(child->is_logical()); return child->is_stable(); } else if (this->is_pending()) { diff --git a/src/crimson/os/seastore/cached_extent.h b/src/crimson/os/seastore/cached_extent.h index 35753d101bd25..18ddbd95796c3 100644 --- a/src/crimson/os/seastore/cached_extent.h +++ b/src/crimson/os/seastore/cached_extent.h @@ -1062,6 +1062,8 @@ class PhysicalNodeMapping { child_pos->link_child(c); } + // For reserved mappings, the return values are + // undefined although it won't crash virtual bool is_stable() const = 0; virtual bool is_clone() const = 0; bool is_zero_reserved() const { From b1c59ca077597e14a053d47d4961b12ba7a2a1b0 Mon Sep 17 00:00:00 2001 From: Xuehan Xu Date: Mon, 18 Mar 2024 17:24:02 +0800 Subject: [PATCH 2/3] crimson/os/seastore/cached_extent: add comments to elaborate why MUTATION_PENDING and under-io extents are "stable" Signed-off-by: Xuehan Xu --- src/crimson/os/seastore/cached_extent.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/crimson/os/seastore/cached_extent.h b/src/crimson/os/seastore/cached_extent.h index 18ddbd95796c3..730a0ace9a0b7 100644 --- a/src/crimson/os/seastore/cached_extent.h +++ b/src/crimson/os/seastore/cached_extent.h @@ -426,6 +426,9 @@ class CachedExtent /// Returns true if extent is stable and shared among transactions bool is_stable() const { return is_stable_written() || + // MUTATION_PENDING and under-io extents are to-be-stable extents, + // for the sake of caveats that checks the correctness of extents + // states, we consider them stable. (is_mutation_pending() && is_pending_io()); } From 7dcd74ac098201d2ea6f9d1c723f9944ade6bf33 Mon Sep 17 00:00:00 2001 From: Xuehan Xu Date: Thu, 21 Mar 2024 09:54:42 +0800 Subject: [PATCH 3/3] crimson/os/seastore/btree: always check the stability of extents within the current transaction's view Signed-off-by: Xuehan Xu --- src/crimson/os/seastore/btree/btree_range_pin.cc | 2 +- src/crimson/os/seastore/btree/fixed_kv_node.h | 14 +++++++++----- src/crimson/os/seastore/cache.h | 9 +++++++++ src/crimson/os/seastore/cached_extent.h | 10 +++++----- 4 files changed, 24 insertions(+), 11 deletions(-) diff --git a/src/crimson/os/seastore/btree/btree_range_pin.cc b/src/crimson/os/seastore/btree/btree_range_pin.cc index 1fe79eafa8177..3da1cac970ae7 100644 --- a/src/crimson/os/seastore/btree/btree_range_pin.cc +++ b/src/crimson/os/seastore/btree/btree_range_pin.cc @@ -29,7 +29,7 @@ bool BtreeNodeMapping::is_stable() const assert(parent->is_valid()); assert(pos != std::numeric_limits::max()); auto &p = (FixedKVNode&)*parent; - return p.is_child_stable(pos); + return p.is_child_stable(ctx, pos); } template class BtreeNodeMapping; diff --git a/src/crimson/os/seastore/btree/fixed_kv_node.h b/src/crimson/os/seastore/btree/fixed_kv_node.h index de3569c0c5bcf..ab8775ad8aa83 100644 --- a/src/crimson/os/seastore/btree/fixed_kv_node.h +++ b/src/crimson/os/seastore/btree/fixed_kv_node.h @@ -229,13 +229,14 @@ struct FixedKVNode : ChildableCachedExtent { virtual get_child_ret_t get_logical_child(op_context_t c, uint16_t pos) = 0; - virtual bool is_child_stable(uint16_t pos) const = 0; + virtual bool is_child_stable(op_context_t, uint16_t pos) const = 0; template get_child_ret_t get_child(op_context_t c, iter_t iter) { auto pos = iter.get_offset(); assert(children.capacity()); auto child = children[pos]; + ceph_assert(!is_reserved_ptr(child)); if (is_valid_child_ptr(child)) { ceph_assert(child->get_type() == T::TYPE); return c.cache.template get_extent_viewable_by_trans(c.trans, (T*)child); @@ -596,7 +597,7 @@ struct FixedKVInternalNode return get_child_ret_t(child_pos_t(nullptr, 0)); } - bool is_child_stable(uint16_t pos) const final { + bool is_child_stable(op_context_t, uint16_t pos) const final { ceph_abort("impossible"); return false; } @@ -999,13 +1000,16 @@ struct FixedKVLeafNode // 2. The child extent is stable // // For reserved mappings, the return values are undefined. - bool is_child_stable(uint16_t pos) const final { + bool is_child_stable(op_context_t c, uint16_t pos) const final { auto child = this->children[pos]; if (is_reserved_ptr(child)) { return true; } else if (is_valid_child_ptr(child)) { ceph_assert(child->is_logical()); - return child->is_stable(); + ceph_assert( + child->is_pending_in_trans(c.trans.get_trans_id()) + || this->is_stable_written()); + return c.cache.is_viewable_extent_stable(c.trans, child); } else if (this->is_pending()) { auto key = this->iter_idx(pos).get_key(); auto &sparent = this->get_stable_for_key(key); @@ -1013,7 +1017,7 @@ struct FixedKVLeafNode auto child = sparent.children[spos]; if (is_valid_child_ptr(child)) { ceph_assert(child->is_logical()); - return child->is_stable(); + return c.cache.is_viewable_extent_stable(c.trans, child); } else { return true; } diff --git a/src/crimson/os/seastore/cache.h b/src/crimson/os/seastore/cache.h index 28471bbcd7ea3..c3dbda1dd8ada 100644 --- a/src/crimson/os/seastore/cache.h +++ b/src/crimson/os/seastore/cache.h @@ -443,6 +443,15 @@ class Cache { return get_absent_extent(t, offset, length, [](T &){}); } + bool is_viewable_extent_stable( + Transaction &t, + CachedExtentRef extent) + { + assert(extent); + auto view = extent->get_transactional_view(t); + return view->is_stable(); + } + using get_extent_ertr = base_ertr; get_extent_ertr::future get_extent_viewable_by_trans( diff --git a/src/crimson/os/seastore/cached_extent.h b/src/crimson/os/seastore/cached_extent.h index 730a0ace9a0b7..fee7ca74514cd 100644 --- a/src/crimson/os/seastore/cached_extent.h +++ b/src/crimson/os/seastore/cached_extent.h @@ -618,6 +618,11 @@ class CachedExtent return last_committed_crc; } + /// Returns true if the extent part of the open transaction + bool is_pending_in_trans(transaction_id_t id) const { + return is_pending() && pending_for_transaction == id; + } + private: template friend class read_set_item_t; @@ -648,11 +653,6 @@ class CachedExtent ptr = nptr; } - /// Returns true if the extent part of the open transaction - bool is_pending_in_trans(transaction_id_t id) const { - return is_pending() && pending_for_transaction == id; - } - /// hook for intrusive ref list (mainly dirty or lru list) boost::intrusive::list_member_hook<> primary_ref_list_hook; using primary_ref_list_member_options = boost::intrusive::member_hook<