Skip to content

Commit

Permalink
fix(manager): support different regions in disrupt_mgmt_restore nemesis
Browse files Browse the repository at this point in the history
All backup snapshots has been duplicated into eu-west-1 region to make
disrupt_mgmt_restore test functional for this region as well.

This fix adjusts yaml file with snapshots configuration and the flow
nemesis uses to choose the snapshot for the test.

(cherry picked from commit 2c6683d)
  • Loading branch information
mikliapko committed Jan 7, 2025
1 parent 17371d9 commit 2c9cccf
Show file tree
Hide file tree
Showing 2 changed files with 111 additions and 51 deletions.
142 changes: 99 additions & 43 deletions defaults/manager_persistent_snapshots.yaml
Original file line number Diff line number Diff line change
@@ -1,69 +1,125 @@
aws:
bucket: "manager-backup-tests-permanent-snapshots-us-east-1"
bucket: "manager-backup-tests-permanent-snapshots-{region}"
confirmation_stress_template: "cassandra-stress read cl=QUORUM n={num_of_rows} -schema 'keyspace={keyspace_name} replication(strategy=NetworkTopologyStrategy,replication_factor=3) compaction(strategy=SizeTieredCompactionStrategy)' -mode cql3 native -rate threads=50 -col 'size=FIXED(64) n=FIXED(16)' -pop seq={sequence_start}..{sequence_end}"
snapshots_sizes:
5:
number_of_rows: 5242880
expected_timeout: 1800 # 30 minutes
snapshots:
'sm_20240812150136UTC':
keyspace_name: "5gb_sizetiered_2024_2_0_rc1"
scylla_version: "2024.2.0~rc1"
scylla_product: "enterprise"
number_of_nodes: 3
# Recording cluster_id which is required for snapshots cleanup from the bucket in the future
cluster_id: "36d35f0b-2f9c-4df4-8d24-3a5093cf07d3"
'sm_20240812150350UTC':
keyspace_name: "5gb_sizetiered_6_0"
scylla_version: "6.0.2"
scylla_product: "oss"
number_of_nodes: 3
cluster_id: "9b8c43c0-ebbc-4c59-a23f-dbdbeda9d9e0"
us-east-1:
sm_20240812150136UTC:
keyspace_name: "5gb_sizetiered_2024_2_0_rc1"
scylla_version: "2024.2.0~rc1"
scylla_product: "enterprise"
number_of_nodes: 3
# Recording cluster_id which is required for snapshots cleanup from the bucket in the future
cluster_id: "36d35f0b-2f9c-4df4-8d24-3a5093cf07d3"
sm_20240812150350UTC:
keyspace_name: "5gb_sizetiered_6_0"
scylla_version: "6.0.2"
scylla_product: "oss"
number_of_nodes: 3
cluster_id: "9b8c43c0-ebbc-4c59-a23f-dbdbeda9d9e0"
eu-west-1:
sm_20241202161158UTC:
keyspace_name: "5gb_sizetiered_2024_2"
scylla_version: "2024.2.0"
scylla_product: "enterprise"
number_of_nodes: 3
cluster_id: "e61259b5-77eb-4cad-8db1-1fc4a6f3991d"
sm_20241203091117UTC:
keyspace_name: "5gb_sizetiered_6_1"
scylla_version: "6.1.4"
scylla_product: "oss"
number_of_nodes: 3
cluster_id: "f3e64aed-62db-4bd3-82cc-55c83fed82c8"
10:
number_of_rows: 10485760
expected_timeout: 3600 # 60 minutes
snapshots:
'sm_20240812150753UTC':
keyspace_name: "10gb_sizetiered_2024_2_0_rc1"
scylla_version: "2024.2.0~rc1"
scylla_product: "enterprise"
number_of_nodes: 3
cluster_id: "c1ac4a5f-cb1a-4312-aa00-e9fdddac7afb"
'sm_20240812150801UTC':
keyspace_name: "10gb_sizetiered_6_0"
scylla_version: "6.0.2"
scylla_product: "oss"
number_of_nodes: 3
cluster_id: "947e78ed-e988-41d6-92b5-faaf8ad7bbc0"
us-east-1:
sm_20241010102035UTC:
keyspace_name: "10gb_sizetiered_2024_2_0_rc3"
scylla_version: "2024.2.0~rc3"
scylla_product: "enterprise"
number_of_nodes: 3
cluster_id: "c5ae2ea7-72f3-4350-85d4-7956c1837b8a"
sm_20240812150801UTC:
keyspace_name: "10gb_sizetiered_6_0"
scylla_version: "6.0.2"
scylla_product: "oss"
number_of_nodes: 3
cluster_id: "947e78ed-e988-41d6-92b5-faaf8ad7bbc0"
eu-west-1:
sm_20241203091417UTC:
keyspace_name: "10gb_sizetiered_2024_2"
scylla_version: "2024.2.0"
scylla_product: "enterprise"
number_of_nodes: 3
cluster_id: "5c17bec0-ee5c-4fc3-93cd-1e5916a8da57"
sm_20241203091651UTC:
keyspace_name: "10gb_sizetiered_6_1"
scylla_version: "6.1.4"
scylla_product: "oss"
number_of_nodes: 3
cluster_id: "71878cc5-1268-4e87-b849-c433e4d8a4ed"
100:
number_of_rows: 104857600
expected_timeout: 18000 # 300 minutes
snapshots:
'sm_20240812162646UTC':
keyspace_name: "100gb_sizetiered_2024_2_0_rc1"
scylla_version: "2024.2.0~rc1"
scylla_product: "enterprise"
number_of_nodes: 3
cluster_id: "ebeab8af-cde8-492c-a7ee-d71b88872e4c"
'sm_20240812164539UTC':
keyspace_name: "100gb_sizetiered_6_0"
scylla_version: "6.0.2"
scylla_product: "oss"
number_of_nodes: 3
cluster_id: "931ff656-51c0-432c-9495-9b4850061b65"
us-east-1:
sm_20240812162646UTC:
keyspace_name: "100gb_sizetiered_2024_2_0_rc1"
scylla_version: "2024.2.0~rc1"
scylla_product: "enterprise"
number_of_nodes: 3
cluster_id: "ebeab8af-cde8-492c-a7ee-d71b88872e4c"
sm_20240812164539UTC:
keyspace_name: "100gb_sizetiered_6_0"
scylla_version: "6.0.2"
scylla_product: "oss"
number_of_nodes: 3
cluster_id: "931ff656-51c0-432c-9495-9b4850061b65"
eu-west-1:
sm_20241203103619UTC:
keyspace_name: "100gb_sizetiered_2024_2"
scylla_version: "2024.2.0"
scylla_product: "enterprise"
number_of_nodes: 3
cluster_id: "a152b771-fe7a-4826-8958-999bd4cb2ea2"
sm_20241203104551UTC:
keyspace_name: "100gb_sizetiered_6_1"
scylla_version: "6.1.4"
scylla_product: "oss"
number_of_nodes: 3
cluster_id: "5becc815-1961-4c81-9557-131d308cd67c"
2048:
number_of_rows: 2147483648
expected_timeout: 132000 # 2200 minutes
snapshots:
'sm_20240904154553UTC':
number_of_rows: 2147483648
expected_timeout: 132000 # 2200 minutes
snapshots:
us-east-1:
sm_20240904154553UTC:
keyspace_name: "2tb_sizetiered_2024_2_0_rc1"
scylla_version: "2024.2.0~rc1"
scylla_product: "enterprise"
number_of_nodes: 3
cluster_id: "adb4afb6-27fe-4b26-914e-4f5cc3551955"
'sm_20240905214537UTC':
sm_20240905214537UTC:
keyspace_name: "2tb_sizetiered_6_0"
scylla_version: "6.0.2"
scylla_product: "oss"
number_of_nodes: 3
cluster_id: "7fc7ce78-2e7e-4348-a8f7-29ae6494f6c9"
eu-west-1:
sm_20241205173847UTC:
keyspace_name: "2048gb_sizetiered_2024_2"
scylla_version: "2024.2.0"
scylla_product: "enterprise"
number_of_nodes: 3
cluster_id: "cd688eb4-409c-49b7-8165-0f586febde40"
sm_20241205214509UTC:
keyspace_name: "2048gb_sizetiered_6_1"
scylla_version: "6.1.4"
scylla_product: "oss"
number_of_nodes: 3
cluster_id: "f65efbab-415e-40bc-aa3c-1720256a1025"
20 changes: 12 additions & 8 deletions sdcm/nemesis.py
Original file line number Diff line number Diff line change
Expand Up @@ -2987,7 +2987,7 @@ def get_total_scylla_partition_size():
free_space_size = int(result.stdout.split()[1]) / 1024 ** 2 # Converting to GB
return free_space_size

def choose_snapshot(snapshots_dict):
def choose_snapshot(snapshots_dict, region: str):
snapshot_groups_by_size = snapshots_dict["snapshots_sizes"]
total_partition_size = get_total_scylla_partition_size()
all_snapshot_sizes = sorted(list(snapshot_groups_by_size.keys()), reverse=True)
Expand All @@ -3001,15 +3001,16 @@ def choose_snapshot(snapshots_dict):

self.use_nemesis_seed()
chosen_snapshot_size = random.choice(fitting_snapshot_sizes)
all_snapshots_per_region = snapshot_groups_by_size[chosen_snapshot_size]["snapshots"][region]

if self.cluster.nodes[0].is_enterprise:
snapshot_tag = random.choice(list(snapshot_groups_by_size[chosen_snapshot_size]["snapshots"].keys()))
snapshot_tag = random.choice(list(all_snapshots_per_region.keys()))
else:
all_snapshots = snapshot_groups_by_size[chosen_snapshot_size]["snapshots"]
oss_snapshots = [snapshot_key for snapshot_key, snapshot_value in all_snapshots.items() if
oss_snapshots = [snapshot_key for snapshot_key, snapshot_value in all_snapshots_per_region.items() if
snapshot_value['scylla_product'] == "oss"]

snapshot_tag = random.choice(oss_snapshots)
snapshot_info = snapshot_groups_by_size[chosen_snapshot_size]["snapshots"][snapshot_tag]

snapshot_info = all_snapshots_per_region[snapshot_tag]
snapshot_info.update({"expected_timeout": snapshot_groups_by_size[chosen_snapshot_size]["expected_timeout"],
"number_of_rows": snapshot_groups_by_size[chosen_snapshot_size]["number_of_rows"]})
return snapshot_tag, snapshot_info
Expand Down Expand Up @@ -3037,10 +3038,13 @@ def execute_data_validation_thread(command_template, keyspace_name, number_of_ro
cluster_backend = self.cluster.params.get('cluster_backend')
if cluster_backend == 'k8s-eks':
cluster_backend = 'aws'

persistent_manager_snapshots_dict = get_persistent_snapshots()
target_bucket = persistent_manager_snapshots_dict[cluster_backend]["bucket"]
region = self.cluster.params.get('region_name').split()[0]
target_bucket = persistent_manager_snapshots_dict[cluster_backend]["bucket"].format(region=region)
chosen_snapshot_tag, chosen_snapshot_info = (
choose_snapshot(persistent_manager_snapshots_dict[cluster_backend]))
choose_snapshot(snapshots_dict=persistent_manager_snapshots_dict[cluster_backend], region=region)
)

self.log.info("Restoring the keyspace %s", chosen_snapshot_info["keyspace_name"])
location_list = [f"{self.cluster.params.get('backup_bucket_backend')}:{target_bucket}"]
Expand Down

0 comments on commit 2c9cccf

Please sign in to comment.