Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename autoscaler parameters and add .update_autoscaler method #2885

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
114 changes: 81 additions & 33 deletions modal/_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,18 +422,18 @@ def from_local(
allow_cross_region_volumes: bool = False,
volumes: dict[Union[str, PurePosixPath], Union[_Volume, _CloudBucketMount]] = {},
webhook_config: Optional[api_pb2.WebhookConfig] = None,
cpu: Optional[Union[float, tuple[float, float]]] = None,
memory: Optional[Union[int, tuple[int, int]]] = None,
proxy: Optional[_Proxy] = None,
retries: Optional[Union[int, Retries]] = None,
timeout: Optional[int] = None,
concurrency_limit: Optional[int] = None,
min_containers: Optional[int] = None,
max_containers: Optional[int] = None,
buffer_containers: Optional[int] = None,
scaledown_window: Optional[int] = None,
allow_concurrent_inputs: Optional[int] = None,
batch_max_size: Optional[int] = None,
batch_wait_ms: Optional[int] = None,
container_idle_timeout: Optional[int] = None,
cpu: Optional[Union[float, tuple[float, float]]] = None,
# keep_warm=True is equivalent to keep_warm=1
keep_warm: Optional[int] = None,
cloud: Optional[str] = None,
scheduler_placement: Optional[SchedulerPlacement] = None,
is_builder_function: bool = False,
Expand All @@ -447,7 +447,6 @@ def from_local(
ephemeral_disk: Optional[int] = None,
# current default: first-party, future default: main-package
include_source: Optional[bool] = None,
_experimental_buffer_containers: Optional[int] = None,
_experimental_proxy_ip: Optional[str] = None,
_experimental_custom_scaling_factor: Optional[float] = None,
_experimental_enable_gpu_snapshot: bool = False,
Expand Down Expand Up @@ -584,20 +583,21 @@ def from_local(
force_build=image.force_build or pf.force_build,
)

if keep_warm is not None and not isinstance(keep_warm, int):
raise TypeError(f"`keep_warm` must be an int or bool, not {type(keep_warm).__name__}")

if (keep_warm is not None) and (concurrency_limit is not None) and concurrency_limit < keep_warm:
# Note that we also do these checks in FunctionCreate; could drop them here
if min_containers is not None and not isinstance(min_containers, int):
raise InvalidError(f"`min_containers` must be an int, not {type(min_containers).__name__}")
if min_containers is not None and max_containers is not None and max_containers < min_containers:
raise InvalidError(
f"Function `{info.function_name}` has `{concurrency_limit=}`, "
f"strictly less than its `{keep_warm=}` parameter."
f"`min_containers` ({min_containers}) cannot be greater than `max_containers` ({max_containers})"
)
if scaledown_window is not None and scaledown_window <= 0:
raise InvalidError("`scaledown_window` must be > 0")

autoscaler_settings = api_pb2.AutoscalerSettings(
max_containers=concurrency_limit,
min_containers=keep_warm,
buffer_containers=_experimental_buffer_containers,
scaledown_window=container_idle_timeout,
min_containers=min_containers,
max_containers=max_containers,
buffer_containers=buffer_containers,
scaledown_window=scaledown_window,
)

if _experimental_custom_scaling_factor is not None and (
Expand Down Expand Up @@ -625,9 +625,6 @@ def from_local(
if arg.default is not inspect.Parameter.empty:
raise InvalidError(f"Modal batched function {func_name} does not accept default arguments.")

if container_idle_timeout is not None and container_idle_timeout <= 0:
raise InvalidError("`container_idle_timeout` must be > 0")

if max_inputs is not None:
if not isinstance(max_inputs, int):
raise InvalidError(f"`max_inputs` must be an int, not {type(max_inputs).__name__}")
Expand Down Expand Up @@ -793,11 +790,8 @@ async def _load(self: _Function, resolver: Resolver, existing_object_id: Optiona
proxy_id=(proxy.object_id if proxy else None),
retry_policy=retry_policy,
timeout_secs=timeout_secs or 0,
task_idle_timeout_secs=container_idle_timeout or 0,
concurrency_limit=concurrency_limit or 0,
pty_info=pty_info,
cloud_provider_str=cloud if cloud else "",
warm_pool_size=keep_warm or 0,
runtime=config.get("function_runtime"),
runtime_debug=config.get("function_runtime_debug"),
runtime_perf_record=config.get("runtime_perf_record"),
Expand All @@ -822,10 +816,15 @@ async def _load(self: _Function, resolver: Resolver, existing_object_id: Optiona
snapshot_debug=config.get("snapshot_debug"),
_experimental_group_size=cluster_size or 0, # Experimental: Clustered functions
_experimental_concurrent_cancellations=True,
_experimental_buffer_containers=_experimental_buffer_containers or 0,
_experimental_proxy_ip=_experimental_proxy_ip,
_experimental_custom_scaling=_experimental_custom_scaling_factor is not None,
_experimental_enable_gpu_snapshot=_experimental_enable_gpu_snapshot,
# --- These are deprecated in favor of autoscaler_settings
warm_pool_size=min_containers or 0,
concurrency_limit=max_containers or 0,
_experimental_buffer_containers=buffer_containers or 0,
task_idle_timeout_secs=scaledown_window or 0,
# ---
)

if isinstance(gpu, list):
Expand Down Expand Up @@ -1017,21 +1016,64 @@ async def _load(param_bound_func: _Function, resolver: Resolver, existing_object
fun._spec = self._spec # TODO (elias): fix - this is incorrect when using with_options
return fun

@live_method
async def update_autoscaler(
self,
*,
min_containers: Optional[int] = None,
max_containers: Optional[int] = None,
buffer_containers: Optional[int] = None,
scaledown_window: Optional[int] = None,
) -> None:
"""Override the current autoscaler behavior for this Function.

Unspecified parameters will retain their current value, i.e. either the static value
from the function decorator, or an override value from a previous call to this method.

Subsequent deployments of the App containing this Function will reset the autoscaler back to
its static configuration.

Examples:

```python notest
f = modal.Function.from_name("my-app", "function")

# Always have at least 2 containers running, with an extra buffer when the Function is active
f.update_autoscaler(min_containers=2, buffer_containers=1)

# Limit this Function to avoid consuming all of your workspace's resources
f.update_autoscaler(max_containers=5)
```
"""
# TODO(elias) won't need this check once we refactor methods to not be Function objects
if self._is_method:
raise InvalidError("Cannot call .update_autoscaler() on a method. Call it on the class instance instead.")

settings = api_pb2.AutoscalerSettings(
min_containers=min_containers,
max_containers=max_containers,
buffer_containers=buffer_containers,
scaledown_window=scaledown_window,
)
request = api_pb2.FunctionUpdateSchedulingParamsRequest(function_id=self.object_id, settings=settings)
await retry_transient_errors(self.client.stub.FunctionUpdateSchedulingParams, request)
# One idea would be for FunctionUpdateScheduleParams to return the current (coalesced) settings
# and then we could return them here (would need some ad hoc dataclass, which I don't love)

@live_method
async def keep_warm(self, warm_pool_size: int) -> None:
"""Set the warm pool size for the function.
"""Set the warm pool size for the Function.

Please exercise care when using this advanced feature!
Setting and forgetting a warm pool on functions can lead to increased costs.
DEPRECATED: Please adapt your code to use the more general `update_autoscaler` method instead:

```python notest
# Usage on a regular function.
f = modal.Function.from_name("my-app", "function")

# Old pattern (deprecated)
f.keep_warm(2)

# Usage on a parametrized function.
Model = modal.Cls.from_name("my-app", "Model")
Model("fine-tuned-model").keep_warm(2) # note that this applies to the class instance, not a method
# New pattern
f.update_autoscaler(min_containers=2)
```
"""
if self._is_method:
Expand All @@ -1045,10 +1087,16 @@ async def keep_warm(self, warm_pool_size: int) -> None:
"""
)
)
request = api_pb2.FunctionUpdateSchedulingParamsRequest(
function_id=self.object_id, warm_pool_size_override=warm_pool_size

deprecation_warning(
(2025, 2, 24),
"The .keep_warm() method has been deprecated in favor of the more general "
".update_autoscaler(min_containers=...) method.",
pending=True,
show_source=True,
)
await retry_transient_errors(self.client.stub.FunctionUpdateSchedulingParams, request)

await self.update_autoscaler(min_containers=warm_pool_size)

@classmethod
def _from_name(cls, app_name: str, name: str, namespace, environment_name: Optional[str]):
Expand Down
18 changes: 1 addition & 17 deletions modal/_partial_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ class _PartialFunction(typing.Generic[P, ReturnType, OriginalReturnType]):
flags: _PartialFunctionFlags
webhook_config: Optional[api_pb2.WebhookConfig]
is_generator: bool
keep_warm: Optional[int]
batch_max_size: Optional[int]
batch_wait_ms: Optional[int]
force_build: bool
Expand All @@ -67,7 +66,6 @@ def __init__(
flags: _PartialFunctionFlags,
webhook_config: Optional[api_pb2.WebhookConfig] = None,
is_generator: Optional[bool] = None,
keep_warm: Optional[int] = None,
batch_max_size: Optional[int] = None,
batch_wait_ms: Optional[int] = None,
cluster_size: Optional[int] = None, # Experimental: Clustered functions
Expand All @@ -84,7 +82,6 @@ def __init__(
final_is_generator = is_generator

self.is_generator = final_is_generator
self.keep_warm = keep_warm
self.wrapped = False # Make sure that this was converted into a FunctionHandle
self.batch_max_size = batch_max_size
self.batch_wait_ms = batch_wait_ms
Expand Down Expand Up @@ -141,7 +138,6 @@ def add_flags(self, flags) -> "_PartialFunction":
raw_f=self.raw_f,
flags=(self.flags | flags),
webhook_config=self.webhook_config,
keep_warm=self.keep_warm,
batch_max_size=self.batch_max_size,
batch_wait_ms=self.batch_wait_ms,
force_build=self.force_build,
Expand Down Expand Up @@ -198,7 +194,6 @@ def _method(
# Set this to True if it's a non-generator function returning
# a [sync/async] generator object
is_generator: Optional[bool] = None,
keep_warm: Optional[int] = None, # Deprecated: Use keep_warm on @app.cls() instead
) -> _MethodDecoratorType:
"""Decorator for methods that should be transformed into a Modal Function registered against this class's App.

Expand All @@ -216,17 +211,6 @@ def f(self):
if _warn_parentheses_missing is not None:
raise InvalidError("Positional arguments are not allowed. Did you forget parentheses? Suggestion: `@method()`.")

if keep_warm is not None:
deprecation_warning(
(2024, 6, 10),
(
"`keep_warm=` is no longer supported per-method on Modal classes. "
"All methods and web endpoints of a class use the same set of containers now. "
"Use keep_warm via the @app.cls() decorator instead. "
),
pending=True,
)

def wrapper(raw_f: Callable[..., Any]) -> _PartialFunction:
nonlocal is_generator
if isinstance(raw_f, _PartialFunction) and raw_f.webhook_config:
Expand All @@ -241,7 +225,7 @@ def wrapper(raw_f: Callable[..., Any]) -> _PartialFunction:
"Batched function on classes should not be wrapped by `@method`. "
"Suggestion: remove the `@method` decorator."
)
return _PartialFunction(raw_f, _PartialFunctionFlags.FUNCTION, is_generator=is_generator, keep_warm=keep_warm)
return _PartialFunction(raw_f, _PartialFunctionFlags.FUNCTION, is_generator=is_generator)

return wrapper # type: ignore # synchronicity issue with wrapped vs unwrapped types and protocols

Expand Down
35 changes: 35 additions & 0 deletions modal/_utils/deprecation.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,38 @@ def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
return wrapper

return decorator


def warn_on_renamed_autoscaler_settings(func: Callable[P, R]) -> Callable[P, R]:
name_map = {
"keep_warm": "min_containers",
"concurrency_limit": "max_containers",
"_experimental_buffer_containers": "buffer_containers",
"container_idle_timeout": "scaledown_window",
}

@functools.wraps(func)
def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
mut_kwargs: dict[str, Any] = locals()["kwargs"] # Avoid referencing kwargs directly due to bug in sigtools

substitutions = []
old_params_used = name_map.keys() & mut_kwargs.keys()
for old_param, new_param in name_map.items():
if old_param in old_params_used:
new_param = name_map[old_param]
mut_kwargs[new_param] = mut_kwargs.pop(old_param)
substitutions.append(f"- {old_param} -> {new_param}")

if substitutions:
substitution_string = "\n".join(substitutions)
message = (
"We have renamed several parameters related to autoscaling."
" Please update your code to use the following new names:"
f"\n\n{substitution_string}"
"\n\nSee https://modal.com/docs/guide/modal-1-0-migration for more details."
)
deprecation_warning((2025, 2, 24), message, pending=True, show_source=True)

return func(*args, **kwargs)

return wrapper
Loading
Loading