Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(gemini-cmd): broken gemini commands on upgrades #9959

Merged
merged 1 commit into from
Feb 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion defaults/docker_images/gemini/values_gemini.yaml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
gemini:
image: scylladb/gemini:1.8.9
image: scylladb/gemini:1.8.10
81 changes: 39 additions & 42 deletions sdcm/gemini_thread.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
from sdcm.stress_thread import DockerBasedStressThread
from sdcm.utils.docker_remote import RemoteDocker


LOGGER = logging.getLogger(__name__)


Expand Down Expand Up @@ -59,7 +58,6 @@ def run(self):


class GeminiStressThread(DockerBasedStressThread): # pylint: disable=too-many-instance-attributes

DOCKER_IMAGE_PARAM_NAME = "stress_image.gemini"

def __init__(self, test_cluster, oracle_cluster, loaders, stress_cmd, timeout=None, params=None): # pylint: disable=too-many-arguments
Expand Down Expand Up @@ -105,9 +103,9 @@ def __init__(self, test_cluster, oracle_cluster, loaders, stress_cmd, timeout=No
self.gemini_result_file = f"gemini_result_{self.unique_id}.log"

def _generate_gemini_command(self):
seed = self.params.get('gemini_seed') or random.randint(1, 100)
table_options = self.params.get('gemini_table_options')
log_statements = self.params.get('gemini_log_cql_statements') or False
seed = self.params.get("gemini_seed") or random.randint(1, 100)
table_options = self.params.get("gemini_table_options")
log_statements = self.params.get("gemini_log_cql_statements") or False

test_nodes = ",".join(self.test_cluster.get_node_cql_ips())
oracle_nodes = ",".join(self.oracle_cluster.get_node_cql_ips())
Expand All @@ -119,7 +117,7 @@ def _generate_gemini_command(self):
--seed={seed} \
--schema-seed={seed} \
--profiling-port=6060 \
--bind=0.0.0.0:2121 \
--bind=0.0.0.0:2112 \
--outfile=/{self.gemini_result_file} \
--replication-strategy=\"{{'class': 'NetworkTopologyStrategy', 'replication_factor': '3'}}\" \
--oracle-replication-strategy=\"{{'class': 'NetworkTopologyStrategy', 'replication_factor': '1'}}\" "
Expand All @@ -130,16 +128,16 @@ def _generate_gemini_command(self):

credentials = self.loader_set.get_db_auth()

if credentials and '--test-username' not in cmd:
if credentials and "--test-username" not in cmd:
cmd += f"--test-username={credentials[0]} \
--test-password={credentials[1]} \
--oracle-username={credentials[0]} \
--oracle-password={credentials[1]} "

if table_options:
cmd += " ".join([f"--table-options=\"{table_opt}\"" for table_opt in table_options])
cmd += " ".join([f'--table-options="{table_opt}"' for table_opt in table_options])

stress_cmd = self.stress_cmd.replace('\n', ' ').strip()
stress_cmd = self.stress_cmd.replace("\n", " ").strip()

for key, value in self.gemini_default_flags.items():
if not key in stress_cmd:
Expand All @@ -156,35 +154,35 @@ def _run_stress(self, loader, loader_idx, cpu_idx):
docker = cleanup_context = RemoteDocker(
loader,
self.docker_image_name,
extra_docker_opts=f'--cpuset-cpus="{cpu_idx}"' if self.stress_num > 1 else ""
'--label shell_marker={self.shell_marker}'
'--network=host '
'--security-opt seccomp=unconfined '
extra_docker_opts=f'--cpuset-cpus="{cpu_idx}" '
if self.stress_num > 1
else ""
"--network=host "
"--security-opt seccomp=unconfined "
'--entrypoint="" '
f'-v $HOME/{self.gemini_result_file}:/{self.gemini_result_file} '
f'-v $HOME/{self.gemini_test_statements_file}:/{self.gemini_test_statements_file} '
f'-v $HOME/{self.gemini_oracle_statements_file}:/{self.gemini_oracle_statements_file} '
f"--label shell_marker={self.shell_marker} "
f"-v $HOME/{self.gemini_result_file}:/{self.gemini_result_file} "
f"-v $HOME/{self.gemini_test_statements_file}:/{self.gemini_test_statements_file} "
f"-v $HOME/{self.gemini_oracle_statements_file}:/{self.gemini_oracle_statements_file} ",
)

if not os.path.exists(loader.logdir):
os.makedirs(loader.logdir, exist_ok=True)
log_file_name = os.path.join(loader.logdir, 'gemini-l%s-c%s-%s.log' %
(loader_idx, cpu_idx, uuid.uuid4()))
LOGGER.debug('gemini local log: %s', log_file_name)
log_file_name = os.path.join(loader.logdir, "gemini-l%s-c%s-%s.log" % (loader_idx, cpu_idx, uuid.uuid4()))
LOGGER.debug("gemini local log: %s", log_file_name)

gemini_cmd = self._generate_gemini_command()
with cleanup_context, \
GeminiEventsPublisher(node=loader, gemini_log_filename=log_file_name) as publisher, \
GeminiStressEvent(node=loader, cmd=gemini_cmd, log_file_name=log_file_name) as gemini_stress_event:
with cleanup_context, GeminiEventsPublisher(node=loader, gemini_log_filename=log_file_name) as publisher, GeminiStressEvent(node=loader, cmd=gemini_cmd, log_file_name=log_file_name) as gemini_stress_event:
try:
publisher.event_id = gemini_stress_event.event_id
gemini_stress_event.log_file_name = log_file_name
result = docker.run(cmd=gemini_cmd,
timeout=self.timeout,
ignore_status=False,
log_file=log_file_name,
retry=0,
)
result = docker.run(
cmd=gemini_cmd,
timeout=self.timeout,
ignore_status=False,
log_file=log_file_name,
retry=0,
)
# sleep to gather all latest log messages
time.sleep(5)
except Exception as details: # pylint: disable=broad-except # noqa: BLE001
Expand Down Expand Up @@ -227,34 +225,33 @@ def get_gemini_results(self):

@staticmethod
def verify_gemini_results(results):

stats = {'results': [], 'errors': {}}
stats = {"results": [], "errors": {}}
if not results:
LOGGER.error('Gemini results are not found')
stats['status'] = 'FAILED'
LOGGER.error("Gemini results are not found")
stats["status"] = "FAILED"
else:
for res in results:
stats['results'].append(res)
for err_type in ['write_errors', 'read_errors', 'errors']:
stats["results"].append(res)
for err_type in ["write_errors", "read_errors", "errors"]:
if res.get(err_type, None):
LOGGER.error("Gemini {} errors: {}".format(err_type, res[err_type]))
stats['status'] = 'FAILED'
stats['errors'][err_type] = res[err_type]
if not stats.get('status'):
stats['status'] = "PASSED"
stats["status"] = "FAILED"
stats["errors"][err_type] = res[err_type]
if not stats.get("status"):
stats["status"] = "PASSED"

return stats

@staticmethod
def _parse_gemini_summary_json(json_str):
results = {'result': {}}
results = {"result": {}}
try:
results = json.loads(json_str)

except Exception as details: # pylint: disable=broad-except # noqa: BLE001
LOGGER.error("Invalid json document {}".format(details))

return results.get('result')
return results.get("result")

@staticmethod
def _parse_gemini_summary(lines):
Expand All @@ -263,7 +260,7 @@ def _parse_gemini_summary(lines):

for line in lines:
line.strip()
if 'Results:' in line:
if "Results:" in line:
enable_parse = True
continue
if "run completed" in line:
Expand All @@ -272,7 +269,7 @@ def _parse_gemini_summary(lines):
if not enable_parse:
continue

split_idx = line.index(':')
split_idx = line.index(":")
key = line[:split_idx].strip()
value = line[split_idx + 1:].split()[0]
results[key] = int(value)
Expand Down
12 changes: 11 additions & 1 deletion test-cases/cdc/cdc-15m-replication-gemini.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,17 @@ nemesis_interval: 1
# Required by the nemesis:
extra_network_interface: true

gemini_cmd: "gemini -d --duration 15m --warmup 0s -c 5 -m write --non-interactive --cql-features basic --max-mutation-retries 100 --max-mutation-retries-backoff 100ms --replication-strategy \"{'class': 'NetworkTopologyStrategy', 'replication_factor': '3'}\" --table-options \"cdc = {'enabled': true}\""
gemini_cmd: |
--duration 15m
--warmup 0s
--concurrency 5
--mode write
--cql-features basic
--max-mutation-retries 100
--max-mutation-retries-backoff 100ms
gemini_table_options:
- "cdc={'enabled': true}"


# Required by SCT, although not used:
Expand Down
13 changes: 11 additions & 2 deletions test-cases/cdc/cdc-15m-replication-postimage.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,17 @@ extra_network_interface: true

# Note: for preimage and postimage we use 1 thread because there is no sensible way
# to test pre/post-images with concurrent writes happening to a single row.
gemini_cmd: "gemini -d --duration 15m --warmup 0s -c 1 -m write --non-interactive --cql-features basic --max-mutation-retries 100 --max-mutation-retries-backoff 100ms --replication-strategy \"{'class': 'NetworkTopologyStrategy', 'replication_factor': '3'}\" --table-options \"cdc = {'enabled': true, 'postimage': true}\""

gemini_cmd: |
--duration 15m
--warmup 0s
--concurrency 1
--mode write
--cql-features basic
--max-mutation-retries 100
--max-mutation-retries-backoff 100ms
gemini_table_options:
- "cdc = {'enabled': true, 'postimage': true}"

# Required by SCT, although not used:
gemini_schema_url: 'https://s3.amazonaws.com/scylla-gemini/Binaries/schema.json'
13 changes: 11 additions & 2 deletions test-cases/cdc/cdc-15m-replication-preimage.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,17 @@ extra_network_interface: true

# Note: for preimage and postimage we use 1 thread because there is no sensible way
# to test pre/post-images with concurrent writes happening to a single row.
gemini_cmd: "gemini -d --duration 15m --warmup 0s -c 1 -m write --non-interactive --cql-features basic --max-mutation-retries 100 --max-mutation-retries-backoff 100ms --replication-strategy \"{'class': 'NetworkTopologyStrategy', 'replication_factor': '3'}\" --table-options \"cdc = {'enabled': true, 'preimage': 'full'}\""

gemini_cmd: |
--duration 15m
--warmup 0s
--concurrency 1
--mode write
--cql-features basic
--max-mutation-retries 100
--max-mutation-retries-backoff 100ms
gemini_table_options:
- "cdc = {'enabled': true, 'preimage': 'full'}"

# Required by SCT, although not used:
gemini_schema_url: 'https://s3.amazonaws.com/scylla-gemini/Binaries/schema.json'
14 changes: 12 additions & 2 deletions test-cases/cdc/cdc-replication-longevity.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,18 @@ nemesis_interval: 5

extra_network_interface: True

gemini_cmd: "gemini --duration 30m --warmup 0s -c 4 -m write --non-interactive --cql-features basic --max-mutation-retries 100 --max-mutation-retries-backoff 100ms --replication-strategy \"{'class': 'NetworkTopologyStrategy', 'replication_factor': '3'}\" --table-options \"cdc = {'enabled': true, 'ttl': 0}\" --use-server-timestamps --test-host-selection-policy token-aware"

gemini_cmd: |
--duration 30m
--warmup 0s
--concurrency 4
--mode write
--cql-features basic
--max-mutation-retries 100
--max-mutation-retries-backoff 100ms
--use-server-timestamps
gemini_table_options:
- "cdc = {'enabled': true, 'ttl': 0}"

# Required by SCT, although not used:
gemini_schema_url: 'https://s3.amazonaws.com/scylla-gemini/Binaries/schema.json'
14 changes: 8 additions & 6 deletions test-cases/upgrades/rolling-upgrade.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,14 @@ authenticator_password: 'cassandra'

use_mgmt: false

gemini_cmd: "gemini -d --duration 2h \
-c 10 -m write -f --non-interactive --cql-features normal \
--max-mutation-retries 5 --max-mutation-retries-backoff 500ms \
--async-objects-stabilization-attempts 5 --async-objects-stabilization-backoff 500ms \
--replication-strategy \"{'class': 'NetworkTopologyStrategy', 'replication_factor': '3'}\" \
--test-username cassandra --test-password cassandra"
gemini_cmd: |
--duration 2h
--concurrency 10
--mode write
--max-mutation-retries 5
--max-mutation-retries-backoff 500ms
--async-objects-stabilization-attempts 5
--async-objects-stabilization-backoff 500ms
gemini_seed: 66

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,19 @@ n_monitor_nodes: 1
nemesis_class_name: 'CategoricalMonkey'
nemesis_interval: 5

gemini_cmd: "gemini --duration 30m --warmup 0s -c 4 -m write --non-interactive --cql-features basic --max-mutation-retries 100 --max-mutation-retries-backoff 100ms --replication-strategy \"{'class': 'NetworkTopologyStrategy', 'replication_factor': '3'}\" --table-options \"cdc = {'enabled': true, 'ttl': 0}\" --use-server-timestamps --test-host-selection-policy token-aware"
gemini_cmd: |
--duration 30m
--warmup 0s
--concurrency 4
--mode write
--cql-features basic
--max-mutation-retries 100
--max-mutation-retries-backoff 100ms
--use-server-timestamps
--test-host-selection-policy token-aware

gemini_table_options:
- "cdc={'enabled':true,'ttl':0}"

# Required by SCT, although not used:
gemini_schema_url: 'https://s3.amazonaws.com/scylla-gemini/Binaries/schema.json'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,18 @@ authenticator_password: 'cassandra'

use_mgmt: false

gemini_cmd: "gemini -d --duration 2h \
-c 10 -m write -f --non-interactive --cql-features normal \
--max-mutation-retries 5 --max-mutation-retries-backoff 500ms \
--async-objects-stabilization-attempts 5 --async-objects-stabilization-backoff 500ms \
--replication-strategy \"{'class': 'NetworkTopologyStrategy', 'replication_factor': '3'}\" \
--table-options \"cdc={'enabled': true}\" --test-username cassandra --test-password cassandra"
gemini_cmd: |
--duration 2h
--concurrency 10
--mode write
--cql-features normal
--max-mutation-retries 5
--max-mutation-retries-backoff 500ms
--async-objects-stabilization-attempts 5
--async-objects-stabilization-backoff 500ms
gemini_table_options:
- "cdc={'enabled': true}"

gemini_schema_url: 'https://s3.amazonaws.com/scylla-gemini/Binaries/schema.json' # currently is not used

Expand Down