Skip to content

Commit

Permalink
Merge pull request #203 from tisnik/waiting-for-pod-in-e2e-tests
Browse files Browse the repository at this point in the history
Waiting for pod in e2e tests
  • Loading branch information
tisnik authored Dec 11, 2024
2 parents aa1cab7 + 816d09d commit fdddb7b
Show file tree
Hide file tree
Showing 4 changed files with 192 additions and 116 deletions.
10 changes: 1 addition & 9 deletions tests/e2e/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
read_conversation_history_count,
retrieve_connection,
)
from tests.e2e.utils.retry import retry_until_timeout_or_success
from tests.e2e.utils.wait_for_ols import wait_for_ols
from tests.scripts.must_gather import must_gather

Expand Down Expand Up @@ -202,14 +201,7 @@ def test_forbidden_user():
def test_transcripts_storing_cluster():
"""Test if the transcripts are stored properly."""
transcripts_path = OLS_USER_DATA_PATH + "/transcripts"
r = retry_until_timeout_or_success(
120,
5,
lambda: len(cluster_utils.get_pod_by_prefix(fail_not_found=False)) == 1,
)
if not r:
print("Timed out waiting for new OLS pod to be ready")
return
cluster_utils.wait_for_running_pod()
pod_name = cluster_utils.get_pod_by_prefix()[0]
# disable collector script to avoid interference with the test
cluster_utils.create_file(pod_name, OLS_COLLECTOR_DISABLING_FILE, "")
Expand Down
70 changes: 67 additions & 3 deletions tests/e2e/utils/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
import json
import subprocess

from tests.e2e.utils.retry import retry_until_timeout_or_success

OC_COMMAND_RETRY_COUNT = 120


def run_oc(
args: list[str], input=None, ignore_existing_resource=False # noqa: A002
Expand All @@ -25,6 +29,7 @@ def run_oc(
f"Error running oc command {args}: {e}, stdout: {e.output}, stderr: {e.stderr}"
)
raise
return subprocess.CompletedProcess("", 0)


def run_oc_and_store_stdout(
Expand Down Expand Up @@ -171,9 +176,7 @@ def get_pod_by_prefix(
pods = []
try:
result = get_pods(namespace)
for pod in result:
if prefix in pod:
pods.append(pod) # noqa: PERF401
pods = [pod for pod in result if prefix in pod]
if fail_not_found and not pods:
assert False, f"No OLS api server pod found in list pods: {result}"
return pods
Expand Down Expand Up @@ -295,3 +298,64 @@ def remove_file(pod_name: str, path: str) -> None:
run_oc(["exec", pod_name, "--", "rm", path])
except subprocess.CalledProcessError as e:
raise Exception("Error removing file") from e


def wait_for_running_pod(
name: str = "lightspeed-app-server-", namespace: str = "openshift-lightspeed"
):
"""Wait for the selected pod to be in running state."""
r = retry_until_timeout_or_success(
5,
3,
lambda: len(
run_oc(
[
"get",
"pods",
"--field-selector=status.phase=Pending",
"-n",
namespace,
]
).stdout
)
== 1,
)
r = retry_until_timeout_or_success(
OC_COMMAND_RETRY_COUNT,
6,
lambda: (
len(
[
pod
for pod in [
run_oc(
[
"get",
"pods",
"--field-selector=status.phase=Running",
"-n",
namespace,
]
).stdout.find(name)
]
if pod > 0
]
)
== 1
),
)

# wait for new ols app pod to be created+running
# there should be exactly one, if we see more than one it may be an old pod
# and we need to wait for it to go away before progressing so we don't try to
# interact with it.
r = retry_until_timeout_or_success(
OC_COMMAND_RETRY_COUNT,
5,
lambda: len(
get_pod_by_prefix(prefix=name, namespace=namespace, fail_not_found=False)
)
== 1,
)
if not r:
raise Exception("Timed out waiting for new OLS pod to be ready")
7 changes: 7 additions & 0 deletions tests/e2e/utils/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,9 @@ def __init__(self, client, endpoint, status_code=requests.codes.ok):
self.client = client
self.endpoint = endpoint
self.status_code = status_code
# to be updated when the code entered the "with" block
self.old_counter = None
self.old_duration = None

def __enter__(self):
"""Retrieve old counter value before calling REST API."""
Expand Down Expand Up @@ -224,6 +227,10 @@ def __init__(
# expect change in number of received tokens
self.expect_received_change = expect_received_change

# to be updated when code enters the "with" block
self.old_counter_token_sent_total = None
self.old_counter_token_received_total = None

def __enter__(self):
"""Retrieve old counter values before calling LLM."""
if self.skip_check:
Expand Down
Loading

0 comments on commit fdddb7b

Please sign in to comment.