Skip to content

Commit

Permalink
Merge pull request ceph#42731 from liu-chunmei/seastore-alloc-extent-…
Browse files Browse the repository at this point in the history
…hint

crimson/seastore alloc extent hint

Replace L_ADDR_MIN by obj hash in onode, omap, object_data when alloc_extent.

 Reviewed-by: Sam Just <[email protected]> and Cheng, Yingxin <[email protected]>
  • Loading branch information
Liu-Chunmei authored Aug 17, 2021
2 parents f74b748 + 1c0e6e7 commit a4558a2
Show file tree
Hide file tree
Showing 27 changed files with 153 additions and 105 deletions.
2 changes: 1 addition & 1 deletion src/crimson/os/seastore/object_data_handler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ ObjectDataHandler::write_ret ObjectDataHandler::prepare_data_reservation(
} else {
return ctx.tm.reserve_region(
ctx.t,
0 /* TODO -- pass hint based on object hash */,
ctx.onode.get_hint(),
MAX_OBJECT_SIZE
).si_then([&object_data](auto pin) {
ceph_assert(pin->get_length() == MAX_OBJECT_SIZE);
Expand Down
2 changes: 1 addition & 1 deletion src/crimson/os/seastore/omap_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class OMapManager {
*/
using initialize_omap_iertr = base_iertr;
using initialize_omap_ret = initialize_omap_iertr::future<omap_root_t>;
virtual initialize_omap_ret initialize_omap(Transaction &t) = 0;
virtual initialize_omap_ret initialize_omap(Transaction &t, laddr_t hint) = 0;

/**
* get value(string) by key(string)
Expand Down
53 changes: 27 additions & 26 deletions src/crimson/os/seastore/omap_manager/btree/btree_omap_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,17 @@ BtreeOMapManager::BtreeOMapManager(
: tm(tm) {}

BtreeOMapManager::initialize_omap_ret
BtreeOMapManager::initialize_omap(Transaction &t)
BtreeOMapManager::initialize_omap(Transaction &t, laddr_t hint)
{

logger().debug("{}", __func__);
return tm.alloc_extent<OMapLeafNode>(t, L_ADDR_MIN, OMAP_BLOCK_SIZE)
.si_then([](auto&& root_extent) {
return tm.alloc_extent<OMapLeafNode>(t, hint, OMAP_BLOCK_SIZE)
.si_then([hint](auto&& root_extent) {
root_extent->set_size(0);
omap_node_meta_t meta{1};
root_extent->set_meta(meta);
omap_root_t omap_root;
omap_root.update(root_extent->get_laddr(), 1);
omap_root.update(root_extent->get_laddr(), 1, hint);
return initialize_omap_iertr::make_ready_future<omap_root_t>(omap_root);
});
}
Expand All @@ -51,7 +51,7 @@ BtreeOMapManager::handle_root_split(
omap_root_t &omap_root,
const OMapNode::mutation_result_t& mresult)
{
return oc.tm.alloc_extent<OMapInnerNode>(oc.t, L_ADDR_MIN, OMAP_BLOCK_SIZE)
return oc.tm.alloc_extent<OMapInnerNode>(oc.t, omap_root.hint, OMAP_BLOCK_SIZE)
.si_then([&omap_root, mresult](auto&& nroot) -> handle_root_split_ret {
auto [left, right, pivot] = *(mresult.split_tuple);
omap_node_meta_t meta{omap_root.depth + 1};
Expand All @@ -60,7 +60,7 @@ BtreeOMapManager::handle_root_split(
"", nroot->maybe_get_delta_buffer());
nroot->journal_inner_insert(nroot->iter_begin() + 1, right->get_laddr(),
pivot, nroot->maybe_get_delta_buffer());
omap_root.update(nroot->get_laddr(), omap_root.get_depth() + 1);
omap_root.update(nroot->get_laddr(), omap_root.get_depth() + 1, omap_root.hint);
return seastar::now();
});
}
Expand All @@ -75,7 +75,8 @@ BtreeOMapManager::handle_root_merge(
auto iter = root->cast<OMapInnerNode>()->iter_begin();
omap_root.update(
iter->get_val(),
omap_root.depth -= 1);
omap_root.depth -= 1,
omap_root.hint);
return oc.tm.dec_ref(oc.t, root->get_laddr()
).si_then([](auto &&ret) -> handle_root_merge_ret {
return seastar::now();
Expand All @@ -95,10 +96,10 @@ BtreeOMapManager::omap_get_value(
{
logger().debug("{}: {}", __func__, key);
return get_omap_root(
get_omap_context(t),
get_omap_context(t, omap_root.hint),
omap_root
).si_then([this, &t, &key](auto&& extent) {
return extent->get_value(get_omap_context(t), key);
).si_then([this, &t, &key, &omap_root](auto&& extent) {
return extent->get_value(get_omap_context(t, omap_root.hint), key);
}).si_then([](auto &&e) {
return omap_get_value_ret(
interruptible::ready_future_marker{},
Expand Down Expand Up @@ -131,15 +132,15 @@ BtreeOMapManager::omap_set_key(
{
logger().debug("{}: {} -> {}", __func__, key, value);
return get_omap_root(
get_omap_context(t),
get_omap_context(t, omap_root.hint),
omap_root
).si_then([this, &t, &key, &value](auto root) {
return root->insert(get_omap_context(t), key, value);
).si_then([this, &t, &key, &value, &omap_root](auto root) {
return root->insert(get_omap_context(t, omap_root.hint), key, value);
}).si_then([this, &omap_root, &t](auto mresult) -> omap_set_key_ret {
if (mresult.status == mutation_status_t::SUCCESS)
return seastar::now();
else if (mresult.status == mutation_status_t::WAS_SPLIT)
return handle_root_split(get_omap_context(t), omap_root, mresult);
return handle_root_split(get_omap_context(t, omap_root.hint), omap_root, mresult);
else
return seastar::now();
});
Expand All @@ -153,19 +154,19 @@ BtreeOMapManager::omap_rm_key(
{
logger().debug("{}: {}", __func__, key);
return get_omap_root(
get_omap_context(t),
get_omap_context(t, omap_root.hint),
omap_root
).si_then([this, &t, &key](auto root) {
return root->rm_key(get_omap_context(t), key);
).si_then([this, &t, &key, &omap_root](auto root) {
return root->rm_key(get_omap_context(t, omap_root.hint), key);
}).si_then([this, &omap_root, &t](auto mresult) -> omap_rm_key_ret {
if (mresult.status == mutation_status_t::SUCCESS) {
return seastar::now();
} else if (mresult.status == mutation_status_t::WAS_SPLIT) {
return handle_root_split(get_omap_context(t), omap_root, mresult);
return handle_root_split(get_omap_context(t, omap_root.hint), omap_root, mresult);
} else if (mresult.status == mutation_status_t::NEED_MERGE) {
auto root = *(mresult.need_merge);
if (root->get_node_size() == 1 && omap_root.depth != 1) {
return handle_root_merge(get_omap_context(t), omap_root, mresult);
return handle_root_merge(get_omap_context(t, omap_root.hint), omap_root, mresult);
} else {
return seastar::now();
}
Expand All @@ -185,11 +186,11 @@ BtreeOMapManager::omap_list(
{
logger().debug("{}", __func__);
return get_omap_root(
get_omap_context(t),
get_omap_context(t, omap_root.hint),
omap_root
).si_then([this, config, &t, &start](auto extent) {
).si_then([this, config, &t, &start, &omap_root](auto extent) {
return extent->list(
get_omap_context(t),
get_omap_context(t, omap_root.hint),
start,
config);
});
Expand All @@ -202,17 +203,17 @@ BtreeOMapManager::omap_clear(
{
logger().debug("{}", __func__);
return get_omap_root(
get_omap_context(t),
get_omap_context(t, omap_root.hint),
omap_root
).si_then([this, &t](auto extent) {
return extent->clear(get_omap_context(t));
).si_then([this, &t, &omap_root](auto extent) {
return extent->clear(get_omap_context(t, omap_root.hint));
}).si_then([this, &omap_root, &t] {
return tm.dec_ref(
t, omap_root.get_location()
).si_then([&omap_root] (auto ret) {
omap_root.update(
L_ADDR_NULL,
0);
0, L_ADDR_MIN);
return omap_clear_iertr::now();
});
}).handle_error_interruptible(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ class BtreeOMapManager : public OMapManager {
TransactionManager &tm;

omap_context_t get_omap_context(
Transaction &t) {
return omap_context_t{tm, t};
Transaction &t, laddr_t addr_min) {
return omap_context_t{tm, t, addr_min};
}

/* get_omap_root
Expand Down Expand Up @@ -65,7 +65,7 @@ class BtreeOMapManager : public OMapManager {
public:
explicit BtreeOMapManager(TransactionManager &tm);

initialize_omap_ret initialize_omap(Transaction &t) final;
initialize_omap_ret initialize_omap(Transaction &t, laddr_t hint) final;

omap_get_value_ret omap_get_value(
const omap_root_t &omap_root,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ namespace crimson::os::seastore::omap_manager{
struct omap_context_t {
TransactionManager &tm;
Transaction &t;
laddr_t hint;
};

enum class mutation_status_t : uint8_t {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ OMapInnerNode::full_merge_ret
OMapInnerNode::make_full_merge(omap_context_t oc, OMapNodeRef right)
{
logger().debug("OMapInnerNode: {}", __func__);
return oc.tm.alloc_extent<OMapInnerNode>(oc.t, L_ADDR_MIN, OMAP_BLOCK_SIZE)
return oc.tm.alloc_extent<OMapInnerNode>(oc.t, oc.hint, OMAP_BLOCK_SIZE)
.si_then([this, right] (auto &&replacement) {
replacement->merge_from(*this, *right->cast<OMapInnerNode>());
return full_merge_ret(
Expand Down Expand Up @@ -569,7 +569,7 @@ OMapLeafNode::make_full_merge(omap_context_t oc, OMapNodeRef right)
{
ceph_assert(right->get_type() == TYPE);
logger().debug("OMapLeafNode: {}", __func__);
return oc.tm.alloc_extent<OMapLeafNode>(oc.t, L_ADDR_MIN, OMAP_BLOCK_SIZE)
return oc.tm.alloc_extent<OMapLeafNode>(oc.t, oc.hint, OMAP_BLOCK_SIZE)
.si_then([this, right] (auto &&replacement) {
replacement->merge_from(*this, *right->cast<OMapLeafNode>());
return full_merge_ret(
Expand Down
1 change: 1 addition & 0 deletions src/crimson/os/seastore/onode.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ class Onode : public boost::intrusive_ref_counter<
virtual const onode_layout_t &get_layout() const = 0;
virtual onode_layout_t &get_mutable_layout(Transaction &t) = 0;
virtual ~Onode() = default;
virtual laddr_t get_hint() const = 0;
};


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,9 @@ struct FLTreeOnode final : Onode, Value {
status = status_t::DELETED;
}

laddr_t get_hint() const final {
return Value::get_hint();
}
~FLTreeOnode() final {}
};

Expand Down
46 changes: 24 additions & 22 deletions src/crimson/os/seastore/onode_manager/staged-fltree/node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,7 @@ void Node::test_make_destructable(
eagain_ifuture<> Node::mkfs(context_t c, RootNodeTracker& root_tracker)
{
LOG_PREFIX(OTree::Node::mkfs);
return LeafNode::allocate_root(c, root_tracker
return LeafNode::allocate_root(c, L_ADDR_MIN, root_tracker
).si_then([c, FNAME](auto ret) {
INFOT("allocated root {}", c.t, ret->get_name());
});
Expand Down Expand Up @@ -462,13 +462,13 @@ Super::URef Node::deref_super()
return ret;
}

eagain_ifuture<> Node::upgrade_root(context_t c)
eagain_ifuture<> Node::upgrade_root(context_t c, laddr_t hint)
{
LOG_PREFIX(OTree::Node::upgrade_root);
assert(impl->field_type() == field_type_t::N0);
auto super_to_move = deref_super();
return InternalNode::allocate_root(
c, impl->level(), impl->laddr(), std::move(super_to_move)
c, hint, impl->level(), impl->laddr(), std::move(super_to_move)
).si_then([this, c, FNAME](auto new_root) {
as_child(search_position_t::end(), new_root);
INFOT("upgraded from {} to {}",
Expand Down Expand Up @@ -604,7 +604,7 @@ Node::try_merge_adjacent(
// so use rebuild_extent() as a workaround to rebuild the node from a
// fresh extent, thus no need to generate delta.
auto left_addr = left_for_merge->impl->laddr();
return left_for_merge->rebuild_extent(c
return left_for_merge->rebuild_extent(c, L_ADDR_MIN
).si_then([c, update_index_after_merge,
left_addr,
merge_stage = merge_stage,
Expand Down Expand Up @@ -743,7 +743,7 @@ eagain_ifuture<Ref<Node>> Node::load(
});
}

eagain_ifuture<NodeExtentMutable> Node::rebuild_extent(context_t c)
eagain_ifuture<NodeExtentMutable> Node::rebuild_extent(context_t c, laddr_t hint)
{
LOG_PREFIX(OTree::Node::rebuild_extent);
DEBUGT("{} ...", c.t, get_name());
Expand All @@ -752,7 +752,7 @@ eagain_ifuture<NodeExtentMutable> Node::rebuild_extent(context_t c)

// note: laddr can be changed after rebuild, but we don't fix the parent
// mapping as it is part of the merge process.
return impl->rebuild_extent(c);
return impl->rebuild_extent(c, hint);
}

eagain_ifuture<> Node::retire(context_t c, Ref<Node>&& this_ref)
Expand Down Expand Up @@ -1214,12 +1214,12 @@ eagain_ifuture<std::pair<Ref<Node>, Ref<Node>>> InternalNode::get_child_peers(
}

eagain_ifuture<Ref<InternalNode>> InternalNode::allocate_root(
context_t c, level_t old_root_level,
context_t c, laddr_t hint, level_t old_root_level,
laddr_t old_root_addr, Super::URef&& super)
{
// support tree height up to 256
ceph_assert(old_root_level < MAX_LEVEL);
return InternalNode::allocate(c, field_type_t::N0, true, old_root_level + 1
return InternalNode::allocate(c, hint, field_type_t::N0, true, old_root_level + 1
).si_then([c, old_root_addr,
super = std::move(super)](auto fresh_node) mutable {
auto root = fresh_node.node;
Expand Down Expand Up @@ -1379,7 +1379,7 @@ eagain_ifuture<> InternalNode::test_clone_root(
assert(impl->is_level_tail());
assert(impl->field_type() == field_type_t::N0);
Ref<const Node> this_ref = this;
return InternalNode::allocate(c_other, field_type_t::N0, true, impl->level()
return InternalNode::allocate(c_other, L_ADDR_MIN, field_type_t::N0, true, impl->level()
).si_then([this, c_other, &tracker_other](auto fresh_other) {
impl->test_copy_to(fresh_other.mut);
auto cloned_root = fresh_other.node;
Expand Down Expand Up @@ -1489,10 +1489,11 @@ eagain_ifuture<Ref<InternalNode>> InternalNode::insert_or_split(

// proceed to split with insert
// assume I'm already ref-counted by caller
return (is_root() ? upgrade_root(c) : eagain_iertr::now()
).si_then([this, c] {
auto hint = insert_key.get_hint();
return (is_root() ? upgrade_root(c, hint) : eagain_iertr::now()
).si_then([this, c, hint] {
return InternalNode::allocate(
c, impl->field_type(), impl->is_level_tail(), impl->level());
c, hint, impl->field_type(), impl->is_level_tail(), impl->level());
}).si_then([this, insert_key, insert_child, insert_pos,
insert_stage=insert_stage, insert_size=insert_size,
outdated_child, c, FNAME](auto fresh_right) mutable {
Expand Down Expand Up @@ -1735,9 +1736,9 @@ void InternalNode::validate_child_inconsistent(const Node& child) const
}

eagain_ifuture<InternalNode::fresh_node_t> InternalNode::allocate(
context_t c, field_type_t field_type, bool is_level_tail, level_t level)
context_t c, laddr_t hint, field_type_t field_type, bool is_level_tail, level_t level)
{
return InternalNodeImpl::allocate(c, field_type, is_level_tail, level
return InternalNodeImpl::allocate(c, hint, field_type, is_level_tail, level
).si_then([](auto&& fresh_impl) {
auto node = Ref<InternalNode>(new InternalNode(
fresh_impl.impl.get(), std::move(fresh_impl.impl)));
Expand Down Expand Up @@ -2013,7 +2014,7 @@ eagain_ifuture<> LeafNode::test_clone_root(
assert(impl->is_level_tail());
assert(impl->field_type() == field_type_t::N0);
Ref<const Node> this_ref = this;
return LeafNode::allocate(c_other, field_type_t::N0, true
return LeafNode::allocate(c_other, L_ADDR_MIN, field_type_t::N0, true
).si_then([this, c_other, &tracker_other](auto fresh_other) {
impl->test_copy_to(fresh_other.mut);
auto cloned_root = fresh_other.node;
Expand Down Expand Up @@ -2060,9 +2061,10 @@ eagain_ifuture<Ref<tree_cursor_t>> LeafNode::insert_value(
}
// split and insert
Ref<Node> this_ref = this;
return (is_root() ? upgrade_root(c) : eagain_iertr::now()
).si_then([this, c] {
return LeafNode::allocate(c, impl->field_type(), impl->is_level_tail());
auto hint = key.get_hint();
return (is_root() ? upgrade_root(c, hint) : eagain_iertr::now()
).si_then([this, c, hint] {
return LeafNode::allocate(c, hint, impl->field_type(), impl->is_level_tail());
}).si_then([this_ref = std::move(this_ref), this, c, &key, vconf, FNAME,
insert_pos, insert_stage=insert_stage, insert_size=insert_size](auto fresh_right) mutable {
auto right_node = fresh_right.node;
Expand Down Expand Up @@ -2096,10 +2098,10 @@ eagain_ifuture<Ref<tree_cursor_t>> LeafNode::insert_value(
}

eagain_ifuture<Ref<LeafNode>> LeafNode::allocate_root(
context_t c, RootNodeTracker& root_tracker)
context_t c, laddr_t hint, RootNodeTracker& root_tracker)
{
LOG_PREFIX(OTree::LeafNode::allocate_root);
return LeafNode::allocate(c, field_type_t::N0, true
return LeafNode::allocate(c, hint, field_type_t::N0, true
).si_then([c, &root_tracker, FNAME](auto fresh_node) {
auto root = fresh_node.node;
return c.nm.get_super(c.t, root_tracker
Expand Down Expand Up @@ -2221,9 +2223,9 @@ void LeafNode::track_erase(
}

eagain_ifuture<LeafNode::fresh_node_t> LeafNode::allocate(
context_t c, field_type_t field_type, bool is_level_tail)
context_t c, laddr_t hint, field_type_t field_type, bool is_level_tail)
{
return LeafNodeImpl::allocate(c, field_type, is_level_tail
return LeafNodeImpl::allocate(c, hint, field_type, is_level_tail
).si_then([](auto&& fresh_impl) {
auto node = Ref<LeafNode>(new LeafNode(
fresh_impl.impl.get(), std::move(fresh_impl.impl)));
Expand Down
Loading

0 comments on commit a4558a2

Please sign in to comment.