From f0674afd62434ba531039c47ed4111c685639d23 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 22 Feb 2022 18:10:34 +0800 Subject: [PATCH 1/3] lun: try to recovery the config from the LIO If the LIO exists but the config doesn't, it will only add a disk item with "created": "rbd/blockX1": { "created": "2022/02/22 11:23:12", } We can recovery the config from LIO instead of just append a corrupted disk item. In all the other fail path we should delete the corrupted disk item. Signed-off-by: Xiubo Li --- ceph_iscsi_config/lun.py | 68 +++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 26 deletions(-) diff --git a/ceph_iscsi_config/lun.py b/ceph_iscsi_config/lun.py index c169c3d7..b2611258 100644 --- a/ceph_iscsi_config/lun.py +++ b/ceph_iscsi_config/lun.py @@ -574,6 +574,38 @@ def activate(self): if client_err: raise CephiSCSIError(client_err) + def add_disk_item(self, wwn, pool_id, recovery=False): + # rbd image is OK to use, so ensure it's in the config + # object + if self.config_key not in self.config.config['disks']: + self.config.add_item('disks', self.config_key) + + if recovery: + gateways = self.config.config['gateways'].keys() + recovery_count = [gw for gw in gateways if gw != this_host()] + else: + recovery_count = [] + + # update the other items + disk_attr = {"wwn": wwn, + "image": self.image, + "pool": self.pool, + "allocating_host": self.allocating_host, + "pool_id": pool_id, + "controls": self.controls, + "backstore": self.backstore, + "backstore_object_name": self.backstore_object_name, + "recovery": recovery_count} + + self.config.update_item('disks', + self.config_key, + disk_attr) + + self.logger.debug("(LUN.allocate) registered '{}' with " + "wwn '{}' with the config " + "object".format(self.image, + wwn)) + def allocate(self, keep_dev_in_lio=True, in_wwn=None): """ Create image and add to LIO and config. @@ -604,7 +636,6 @@ def allocate(self, keep_dev_in_lio=True, in_wwn=None): rbd_image.create() if not rbd_image.error: - self.config.add_item('disks', self.config_key) self.logger.info("(LUN.allocate) created {}/{} " "successfully".format(self.pool, self.image)) @@ -630,13 +661,7 @@ def allocate(self, keep_dev_in_lio=True, in_wwn=None): else: # requested image is already defined to ceph - if rbd_image.valid: - # rbd image is OK to use, so ensure it's in the config - # object - if self.config_key not in self.config.config['disks']: - self.config.add_item('disks', self.config_key) - - else: + if not rbd_image.valid: # rbd image is not valid for export, so abort self.error = True features = ','.join(RBDDev.unsupported_features_list[self.backstore]) @@ -700,23 +725,7 @@ def allocate(self, keep_dev_in_lio=True, in_wwn=None): if self.error: return None - disk_attr = {"wwn": wwn, - "image": self.image, - "pool": self.pool, - "allocating_host": self.allocating_host, - "pool_id": rbd_image.pool_id, - "controls": self.controls, - "backstore": self.backstore, - "backstore_object_name": self.backstore_object_name} - - self.config.update_item('disks', - self.config_key, - disk_attr) - - self.logger.debug("(LUN.allocate) registered '{}' with " - "wwn '{}' with the config " - "object".format(self.image, - wwn)) + self.add_disk_item(wwn, rbd_image.pool_id) self.logger.info("(LUN.allocate) added '{}/{}' to LIO and" " config object".format(self.pool, self.image)) @@ -780,8 +789,15 @@ def allocate(self, keep_dev_in_lio=True, in_wwn=None): self.logger.critical(self.error_msg) return None + # try to recovery the config from LIO + if local_gw == self.allocating_host: + # lun is now in LIO, time for some housekeeping :P + wwn = so._get_wwn() + self.add_disk_item(wwn, rbd_image.pool_id, True) + self.num_changes += 1 + self.logger.debug("config meta data for this disk is " - "{}".format(self.config.config['disks'][self.config_key])) + "{}".format(self.config.config['disks'].get(self.config_key))) # the owning host for an image is the only host that commits to the # config From fc5a0c18b9350bbd2af8c3376955655595ef29ae Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 23 Feb 2022 15:24:14 +0800 Subject: [PATCH 2/3] common: add update_sub_item support We can update the sub item only for 'disks' Signed-off-by: Xiubo Li --- ceph_iscsi_config/common.py | 68 ++++++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 27 deletions(-) diff --git a/ceph_iscsi_config/common.py b/ceph_iscsi_config/common.py index 59f04820..8aed7596 100644 --- a/ceph_iscsi_config/common.py +++ b/ceph_iscsi_config/common.py @@ -11,14 +11,15 @@ class ConfigTransaction(object): - def __init__(self, cfg_type, element_name, txn_action='add', initial_value=None): + def __init__(self, cfg_type, element_name, item_name, txn_action='add', initial_value=None): self.type = cfg_type self.action = txn_action - self.item_name = element_name + self.element_name = element_name + self.item_name = item_name init_state = {} if initial_value is None else initial_value - self.item_content = init_state + self.content = init_state def __repr__(self): return str(self.__dict__) @@ -527,7 +528,7 @@ def add_item(self, cfg_type, element_name=None, initial_value=None): if isinstance(init_state, str) and 'created' not in self.config[cfg_type]: self.config[cfg_type]['created'] = now # add a separate transaction to capture the creation date to the section - txn = ConfigTransaction(cfg_type, 'created', initial_value=now) + txn = ConfigTransaction(cfg_type, 'created', None, initial_value=now) self.txn_list.append(txn) else: @@ -540,7 +541,7 @@ def add_item(self, cfg_type, element_name=None, initial_value=None): self.logger.debug("(Config.add_item) config updated to {}".format(self.config)) self.changed = True - txn = ConfigTransaction(cfg_type, element_name, initial_value=init_state) + txn = ConfigTransaction(cfg_type, element_name, None, initial_value=init_state) self.txn_list.append(txn) def del_item(self, cfg_type, element_name): @@ -551,44 +552,48 @@ def del_item(self, cfg_type, element_name): del self.config[cfg_type] self.logger.debug("(Config.del_item) config updated to {}".format(self.config)) - txn = ConfigTransaction(cfg_type, element_name, 'delete') + txn = ConfigTransaction(cfg_type, element_name, None, 'delete') self.txn_list.append(txn) - def update_item(self, cfg_type, element_name, element_value): + def update_sub_item(self, cfg_type, element_name, item_name, value): now = get_time() if element_name: - current_values = self.config[cfg_type][element_name] + if item_name: + current_values = self.config[cfg_type][element_name][item_name] + self.config[cfg_type][element_name][item_name] = value + else: + current_values = self.config[cfg_type][element_name] + if isinstance(value, dict): + merged = current_values.copy() + new_dict = value + new_dict['updated'] = now + merged.update(new_dict) + value = merged.copy() + self.config[cfg_type][element_name] = value self.logger.debug("prior to update, item contains {}".format(current_values)) - if isinstance(element_value, dict): - merged = current_values.copy() - new_dict = element_value - new_dict['updated'] = now - merged.update(new_dict) - element_value = merged.copy() - - self.config[cfg_type][element_name] = element_value else: # update to a root level config element, like version - self.config[cfg_type] = element_value + self.config[cfg_type] = value self.logger.debug("(Config.update_item) config is {}".format(self.config)) self.changed = True self.logger.debug("update_item: type={}, item={}, update={}".format( - cfg_type, element_name, element_value)) + cfg_type, element_name, value)) - txn = ConfigTransaction(cfg_type, element_name, 'add') - txn.item_content = element_value + txn = ConfigTransaction(cfg_type, element_name, item_name, 'add', value) self.txn_list.append(txn) + def update_item(self, cfg_type, element_name, value): + self.update_sub_item(cfg_type, element_name, None, value) + def set_item(self, cfg_type, element_name, element_value): self.logger.debug("(Config.update_item) config is {}".format(self.config)) self.changed = True self.logger.debug("update_item: type={}, item={}, update={}".format( cfg_type, element_name, element_value)) - txn = ConfigTransaction(cfg_type, element_name, 'add') - txn.item_content = element_value + txn = ConfigTransaction(cfg_type, element_name, None, 'add', element_value) self.txn_list.append(txn) def _commit_rbd(self, post_action): @@ -607,14 +612,23 @@ def _commit_rbd(self, post_action): self.logger.debug("_commit_rbd transaction shows {}".format(txn)) if txn.action == 'add': # add's and updates - if txn.item_name: - current_config[txn.type][txn.item_name] = txn.item_content + if txn.element_name: + if txn.item_name: + # for the 'update' item it's monotone increasing + if txn.item_name == "updated": + cur_time = current_config[txn.type][txn.element_name][txn.item_name] + if cur_time < txn.content: + current_config[txn.type][txn.element_name][txn.item_name] = txn.content + else: + current_config[txn.type][txn.element_name][txn.item_name] = txn.content + else: + current_config[txn.type][txn.element_name] = txn.content else: - current_config[txn.type] = txn.item_content + current_config[txn.type] = txn.content elif txn.action == 'delete': - if txn.item_name: - del current_config[txn.type][txn.item_name] + if txn.element_name: + del current_config[txn.type][txn.element_name] else: del current_config[txn.type] else: From 237cc1c53d7cb87269c4e3a2e775e2d8ba2b08a5 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 23 Feb 2022 12:47:54 +0800 Subject: [PATCH 3/3] common: try to update LIO device's size When one gateway have recovered the config from the rbd image and LIO device, the image size may have changed, so in other gateways we need to update the size in LIO device. Signed-off-by: Xiubo Li --- ceph_iscsi_config/common.py | 49 +++++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/ceph_iscsi_config/common.py b/ceph_iscsi_config/common.py index 8aed7596..38d7f4e6 100644 --- a/ceph_iscsi_config/common.py +++ b/ceph_iscsi_config/common.py @@ -4,9 +4,9 @@ import json import traceback -from ceph_iscsi_config.backstore import USER_RBD +from ceph_iscsi_config.backstore import USER_RBD, lookup_storage_object_by_disk import ceph_iscsi_config.settings as settings -from ceph_iscsi_config.utils import encryption_available, get_time +from ceph_iscsi_config.utils import encryption_available, get_time, get_rbd_size, this_host class ConfigTransaction(object): @@ -189,8 +189,53 @@ def needs_hostname_update(self): return True + # if the 'recovery' are set, that means the image size + # have changed and we need to update the local LIO + # device size. + def try_to_update_lio_dev_size(self): + if self.config['version'] < 11: + return + + now = get_time() + local_gw = this_host() + for disk_key, disk in self.config['disks'].items(): + if disk.get('recovery', []) == []: + continue + + if local_gw not in disk['recovery']: + continue + + so = lookup_storage_object_by_disk(self, disk_key) + if not so: + continue + + try: + size = get_rbd_size(disk['pool'], disk['image']) + except Exception as err: + self.logger.warn("Failed to get image size, " + "{}".format(self.config_key)) + continue + + # most likely + if so.size == size: + continue + elif so.size < size: + stg_object.set_attribute("dev_size", self.size_bytes) + else: + self.logger.warn("Image size({}) is smaller than LIO device size" + "({})".format(size, so.size)) + + # update the other items + disk['recovery'].remove(local_gw) + self.update_sub_item('disks', self.config_key, + 'recovery', disk['recovery']) + self.update_sub_item('disks', self.config_key, + 'updated', now) + self.commit("retain") + def _upgrade_config(self): update_hostname = self.needs_hostname_update() + self.try_to_update_lio_dev_size() if self.config['version'] >= Config.seed_config['version'] and not update_hostname: return