Skip to content

Commit

Permalink
[Stretch cluster] test device replacement in stretch cluster (red-hat…
Browse files Browse the repository at this point in the history
…-storage#9548)

Signed-off-by: Mahesh Shetty <[email protected]>
  • Loading branch information
mashetty330 authored Feb 10, 2025
1 parent 17eb221 commit afa1627
Show file tree
Hide file tree
Showing 4 changed files with 201 additions and 1 deletion.
17 changes: 17 additions & 0 deletions ocs_ci/ocs/resources/pv.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,23 @@ def get_pv_status(pv_obj):
return pv_obj.get("status").get("phase")


def get_pv_in_status(storage_class, status="Bound"):
"""
It looks for pv with particular storageclass in particular status
Args:
storage_class (str): storage class
status (str): status of the pv
Returns:
list of pv objects
"""

pvs = [pv for pv in get_pv_objs_in_sc(storage_class) if get_pv_status(pv) == status]
return pvs


def get_pv_name(pv_obj):
"""
Get the name of the pv object
Expand Down
35 changes: 35 additions & 0 deletions tests/functional/disaster-recovery/sc_arbiter/test_add_capacity.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
from ocs_ci.framework.pytest_customization.marks import (
turquoise_squad,
stretchcluster_required,
tier1,
)
from ocs_ci.helpers.cnv_helpers import cal_md5sum_vm
from ocs_ci.ocs import constants
from ocs_ci.ocs.resources import storage_cluster
from ocs_ci.ocs.resources.pod import (
Expand All @@ -18,6 +20,7 @@
logger = logging.getLogger(__name__)


@tier1
@turquoise_squad
@stretchcluster_required
class TestAddCapacityStretchCluster:
Expand Down Expand Up @@ -85,6 +88,8 @@ def test_cluster_expansion(
setup_logwriter_rbd_workload_factory,
logreader_workload_factory,
iterations,
setup_cnv,
cnv_workload,
):
"""
Test cluster exapnsion and health when add capacity is performed
Expand All @@ -107,6 +112,13 @@ def test_cluster_expansion(
)
logger.info("All the workloads pods are successfully up and running")

# setup vm and write some data to the VM instance
vm_obj = cnv_workload(volume_interface=constants.VM_VOLUME_PVC)
vm_obj.run_ssh_cmd(
command="dd if=/dev/zero of=/file_1.txt bs=1024 count=102400"
)
md5sum_before = cal_md5sum_vm(vm_obj, file_path="/file_1.txt")

start_time = datetime.now(timezone.utc)

sc_obj.get_logfile_map(label=constants.LOGWRITER_CEPHFS_LABEL)
Expand All @@ -123,6 +135,29 @@ def test_cluster_expansion(
sc_obj.post_failure_checks(start_time, end_time, wait_for_read_completion=False)
logger.info("Successfully verified with post failure checks for the workloads")

# check vm data written after the failure for integrity
md5sum_after = cal_md5sum_vm(vm_obj, file_path="/file_1.txt")
assert (
md5sum_before == md5sum_after
), "Data integrity of the file inside VM is not maintained during the add capacity"
logger.info(
"Data integrity of the file inside VM is maintained during the add capacity"
)

# check if new data can be created
vm_obj.run_ssh_cmd(
command="dd if=/dev/zero of=/file_2.txt bs=1024 count=103600"
)
logger.info("Successfully created new data inside VM")

# check if the data can be copied back to local machine
vm_obj.scp_from_vm(local_path="/tmp", vm_src_path="/file_1.txt")
logger.info("VM data is successfully copied back to local machine")

# stop the VM
vm_obj.stop()
logger.info("Stoped the VM successfully")

sc_obj.cephfs_logreader_job.delete()
logger.info(sc_obj.cephfs_logreader_pods)
for pod in sc_obj.cephfs_logreader_pods:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
import logging
from datetime import datetime, timezone

from ocs_ci.framework.pytest_customization.marks import (
stretchcluster_required,
turquoise_squad,
polarion_id,
tier1,
)
from ocs_ci.helpers.cnv_helpers import cal_md5sum_vm
from ocs_ci.ocs import constants
from ocs_ci.ocs.resources.pod import wait_for_pods_to_be_in_statuses

from ocs_ci.ocs.osd_operations import osd_device_replacement
from ocs_ci.ocs.resources.stretchcluster import StretchCluster

logger = logging.getLogger(__name__)


@tier1
@stretchcluster_required
@turquoise_squad
class TestDeviceReplacementInStretchCluster:

@polarion_id("OCS-5047")
def test_device_replacement(
self,
nodes,
setup_logwriter_cephfs_workload_factory,
setup_logwriter_rbd_workload_factory,
logreader_workload_factory,
cnv_workload,
setup_cnv,
):
"""
Test device replacement in stretch cluster while logwriter workload
for both CephFs and RBD is running
Steps:
1) Run logwriter/reader workload for both CephFs and RBD volumes
2) Perform device replacement procedure
3) Verify no data loss
4) Verify no data corruption
"""

sc_obj = StretchCluster()

# setup logwriter workloads in the background
(
sc_obj.cephfs_logwriter_dep,
sc_obj.cephfs_logreader_job,
) = setup_logwriter_cephfs_workload_factory(read_duration=0)

sc_obj.get_logwriter_reader_pods(label=constants.LOGWRITER_CEPHFS_LABEL)
sc_obj.get_logwriter_reader_pods(label=constants.LOGREADER_CEPHFS_LABEL)
sc_obj.get_logwriter_reader_pods(
label=constants.LOGWRITER_RBD_LABEL, exp_num_replicas=2
)
logger.info("All the workloads pods are successfully up and running")

# setup vm and write some data to the VM instance
vm_obj = cnv_workload(volume_interface=constants.VM_VOLUME_PVC)
vm_obj.run_ssh_cmd(
command="dd if=/dev/zero of=/file_1.txt bs=1024 count=102400"
)
md5sum_before = cal_md5sum_vm(vm_obj, file_path="/file_1.txt")

start_time = datetime.now(timezone.utc)

sc_obj.get_logfile_map(label=constants.LOGWRITER_CEPHFS_LABEL)
sc_obj.get_logfile_map(label=constants.LOGWRITER_RBD_LABEL)

# run device replacement procedure
logger.info("Running device replacement procedure now")
osd_device_replacement(nodes)

# check Io for any failures
end_time = datetime.now(timezone.utc)
sc_obj.post_failure_checks(start_time, end_time, wait_for_read_completion=False)
logger.info("Successfully verified with post failure checks for the workloads")

# check vm data written after the failure for integrity
md5sum_after = cal_md5sum_vm(vm_obj, file_path="/file_1.txt")
assert (
md5sum_before == md5sum_after
), "Data integrity of the file inside VM is not maintained during the device replacement"
logger.info(
"Data integrity of the file inside VM is maintained during the device replacement"
)

# check if new data can be created
vm_obj.run_ssh_cmd(
command="dd if=/dev/zero of=/file_2.txt bs=1024 count=103600"
)
logger.info("Successfully created new data inside VM")

# check if the data can be copied back to local machine
vm_obj.scp_from_vm(local_path="/tmp", vm_src_path="/file_1.txt")
logger.info("VM data is successfully copied back to local machine")

# stop the VM
vm_obj.stop()
logger.info("Stoped the VM successfully")

sc_obj.cephfs_logreader_job.delete()
logger.info(sc_obj.cephfs_logreader_pods)
for pod in sc_obj.cephfs_logreader_pods:
pod.wait_for_pod_delete(timeout=120)
logger.info("All old CephFS logreader pods are deleted")

# check for any data loss
assert sc_obj.check_for_data_loss(
constants.LOGWRITER_CEPHFS_LABEL
), "[CephFS] Data is lost"
logger.info("[CephFS] No data loss is seen")
assert sc_obj.check_for_data_loss(
constants.LOGWRITER_RBD_LABEL
), "[RBD] Data is lost"
logger.info("[RBD] No data loss is seen")

# check for data corruption
logreader_workload_factory(
pvc=sc_obj.get_workload_pvc_obj(constants.LOGWRITER_CEPHFS_LABEL)[0],
logreader_path=constants.LOGWRITER_CEPHFS_READER,
duration=5,
)
sc_obj.get_logwriter_reader_pods(constants.LOGREADER_CEPHFS_LABEL)

wait_for_pods_to_be_in_statuses(
expected_statuses=constants.STATUS_COMPLETED,
pod_names=[pod.name for pod in sc_obj.cephfs_logreader_pods],
timeout=900,
namespace=constants.STRETCH_CLUSTER_NAMESPACE,
)
logger.info("[CephFS] Logreader job pods have reached 'Completed' state!")

assert sc_obj.check_for_data_corruption(
label=constants.LOGREADER_CEPHFS_LABEL
), "Data is corrupted for cephFS workloads"
logger.info("No data corruption is seen in CephFS workloads")

assert sc_obj.check_for_data_corruption(
label=constants.LOGWRITER_RBD_LABEL
), "Data is corrupted for RBD workloads"
logger.info("No data corruption is seen in RBD workloads")
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
polarion_id,
stretchcluster_required,
turquoise_squad,
tier2,
)
from ocs_ci.helpers.cnv_helpers import cal_md5sum_vm
from ocs_ci.helpers.helpers import modify_deployment_replica_count
Expand Down Expand Up @@ -147,7 +148,7 @@ def setup_cnv_workload(request, cnv_workload_class, setup_cnv):
logger.info("Setting up CNV workload and creating some data")
vm_obj = cnv_workload_class(
volume_interface=constants.VM_VOLUME_PVC, namespace=CNV_WORKLOAD_NAMESPACE
)[0]
)
vm_obj.run_ssh_cmd(command="dd if=/dev/zero of=/file_1.txt bs=1024 count=102400")
md5sum_before = cal_md5sum_vm(vm_obj, file_path="/file_1.txt")

Expand Down Expand Up @@ -181,6 +182,7 @@ def finalizer():
request.addfinalizer(finalizer)


@tier2
@turquoise_squad
@stretchcluster_required
@pytest.mark.usefixtures("setup_cnv_workload")
Expand Down

0 comments on commit afa1627

Please sign in to comment.