diff --git a/sky/serve/autoscalers.py b/sky/serve/autoscalers.py index 68d0c4d5020..ee231b338fe 100644 --- a/sky/serve/autoscalers.py +++ b/sky/serve/autoscalers.py @@ -80,9 +80,9 @@ def update_version(self, version: int, f'latest version: {self.latest_version}') return self.latest_version = version - self.min_nodes = spec.min_replicas - self.max_nodes = (spec.max_replicas if spec.max_replicas is not None - else spec.min_replicas) + self.min_replicas = spec.min_replicas + self.max_replicas = (spec.max_replicas if spec.max_replicas is not None + else spec.min_replicas) # Reclip self.target_num_replicas with new min and max replicas. self.target_num_replicas = max( self.min_replicas, min(self.max_replicas, self.target_num_replicas)) diff --git a/tests/skyserve/update/num_min_one.yaml b/tests/skyserve/update/num_min_one.yaml new file mode 100644 index 00000000000..12d47c5bff1 --- /dev/null +++ b/tests/skyserve/update/num_min_one.yaml @@ -0,0 +1,15 @@ +service: + readiness_probe: + path: /health + initial_delay_seconds: 20 + replica_policy: + min_replicas: 1 + +resources: + ports: 8080 + cloud: gcp + cpus: 2+ + +workdir: examples/serve/http_server + +run: python3 server.py --port 8080 diff --git a/tests/skyserve/update/num_min_two.yaml b/tests/skyserve/update/num_min_two.yaml new file mode 100644 index 00000000000..ebf52bd768e --- /dev/null +++ b/tests/skyserve/update/num_min_two.yaml @@ -0,0 +1,16 @@ +service: + readiness_probe: + path: /health + initial_delay_seconds: 20 + replica_policy: + min_replicas: 2 + +resources: + ports: 8080 + cloud: gcp + cpus: 2+ + +workdir: examples/serve/http_server + +run: python3 server.py --port 8080 + diff --git a/tests/test_smoke.py b/tests/test_smoke.py index 19509bee6c4..f3af7c97abd 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -3088,6 +3088,30 @@ def test_skyserve_update(): run_one_test(test) +@pytest.mark.gcp +@pytest.mark.sky_serve +def test_skyserve_update_autoscale(): + """Test skyserve update with autoscale""" + name = _get_service_name() + test = Test( + f'test-skyserve-update-autoscale', + [ + f'sky serve up -n {name} -y tests/skyserve/update/num_min_two.yaml', + _SERVE_WAIT_UNTIL_READY.format(name=name, replica_num=2), + f'{_get_serve_endpoint(name)}; curl -L http://$endpoint | grep "Hi, SkyPilot here"', + f'sky serve update {name} -y tests/skyserve/update/num_min_one.yaml', + # sleep before update is registered. + 'sleep 20', + # Timeout will be triggered when update fails. + _SERVE_WAIT_UNTIL_READY.format(name=name, replica_num=1), + f'{_get_serve_endpoint(name)}; curl -L http://$endpoint | grep "Hi, SkyPilot here!"', + ], + _TEARDOWN_SERVICE.format(name=name), + timeout=20 * 60, + ) + run_one_test(test) + + # ------- Testing user ray cluster -------- def test_user_ray_cluster(generic_cloud: str): name = _get_cluster_name()