modal-labs · mwaskom · Feb 20, 2025 · Feb 20, 2025 · Feb 20, 2025 · Feb 20, 2025
diff --git a/modal/_functions.py b/modal/_functions.py
@@ -422,18 +422,18 @@ def from_local(
         allow_cross_region_volumes: bool = False,
         volumes: dict[Union[str, PurePosixPath], Union[_Volume, _CloudBucketMount]] = {},
         webhook_config: Optional[api_pb2.WebhookConfig] = None,
+        cpu: Optional[Union[float, tuple[float, float]]] = None,
         memory: Optional[Union[int, tuple[int, int]]] = None,
         proxy: Optional[_Proxy] = None,
         retries: Optional[Union[int, Retries]] = None,
         timeout: Optional[int] = None,
-        concurrency_limit: Optional[int] = None,
+        min_containers: Optional[int] = None,
+        max_containers: Optional[int] = None,
+        buffer_containers: Optional[int] = None,
+        scaledown_window: Optional[int] = None,
         allow_concurrent_inputs: Optional[int] = None,
         batch_max_size: Optional[int] = None,
         batch_wait_ms: Optional[int] = None,
-        container_idle_timeout: Optional[int] = None,
-        cpu: Optional[Union[float, tuple[float, float]]] = None,
-        # keep_warm=True is equivalent to keep_warm=1
-        keep_warm: Optional[int] = None,
         cloud: Optional[str] = None,
         scheduler_placement: Optional[SchedulerPlacement] = None,
         is_builder_function: bool = False,
@@ -447,7 +447,6 @@ def from_local(
         ephemeral_disk: Optional[int] = None,
         # current default: first-party, future default: main-package
         include_source: Optional[bool] = None,
-        _experimental_buffer_containers: Optional[int] = None,
         _experimental_proxy_ip: Optional[str] = None,
         _experimental_custom_scaling_factor: Optional[float] = None,
         _experimental_enable_gpu_snapshot: bool = False,
@@ -584,20 +583,21 @@ def from_local(
                     force_build=image.force_build or pf.force_build,
                 )
 
-        if keep_warm is not None and not isinstance(keep_warm, int):
-            raise TypeError(f"`keep_warm` must be an int or bool, not {type(keep_warm).__name__}")
-
-        if (keep_warm is not None) and (concurrency_limit is not None) and concurrency_limit < keep_warm:
+        # Note that we also do these checks in FunctionCreate; could drop them here
+        if min_containers is not None and not isinstance(min_containers, int):
+            raise InvalidError(f"`min_containers` must be an int, not {type(min_containers).__name__}")
+        if min_containers is not None and max_containers is not None and max_containers < min_containers:
             raise InvalidError(
-                f"Function `{info.function_name}` has `{concurrency_limit=}`, "
-                f"strictly less than its `{keep_warm=}` parameter."
+                f"`min_containers` ({min_containers}) cannot be greater than `max_containers` ({max_containers})"
             )
+        if scaledown_window is not None and scaledown_window <= 0:
+            raise InvalidError("`scaledown_window` must be > 0")
 
         autoscaler_settings = api_pb2.AutoscalerSettings(
-            max_containers=concurrency_limit,
-            min_containers=keep_warm,
-            buffer_containers=_experimental_buffer_containers,
-            scaledown_window=container_idle_timeout,
+            min_containers=min_containers,
+            max_containers=max_containers,
+            buffer_containers=buffer_containers,
+            scaledown_window=scaledown_window,
         )
 
         if _experimental_custom_scaling_factor is not None and (
@@ -625,9 +625,6 @@ def from_local(
                 if arg.default is not inspect.Parameter.empty:
                     raise InvalidError(f"Modal batched function {func_name} does not accept default arguments.")
 
-        if container_idle_timeout is not None and container_idle_timeout <= 0:
-            raise InvalidError("`container_idle_timeout` must be > 0")
-
         if max_inputs is not None:
             if not isinstance(max_inputs, int):
                 raise InvalidError(f"`max_inputs` must be an int, not {type(max_inputs).__name__}")
@@ -793,11 +790,8 @@ async def _load(self: _Function, resolver: Resolver, existing_object_id: Optiona
                     proxy_id=(proxy.object_id if proxy else None),
                     retry_policy=retry_policy,
                     timeout_secs=timeout_secs or 0,
-                    task_idle_timeout_secs=container_idle_timeout or 0,
-                    concurrency_limit=concurrency_limit or 0,
                     pty_info=pty_info,
                     cloud_provider_str=cloud if cloud else "",
-                    warm_pool_size=keep_warm or 0,
                     runtime=config.get("function_runtime"),
                     runtime_debug=config.get("function_runtime_debug"),
                     runtime_perf_record=config.get("runtime_perf_record"),
@@ -822,10 +816,15 @@ async def _load(self: _Function, resolver: Resolver, existing_object_id: Optiona
                     snapshot_debug=config.get("snapshot_debug"),
                     _experimental_group_size=cluster_size or 0,  # Experimental: Clustered functions
                     _experimental_concurrent_cancellations=True,
-                    _experimental_buffer_containers=_experimental_buffer_containers or 0,
                     _experimental_proxy_ip=_experimental_proxy_ip,
                     _experimental_custom_scaling=_experimental_custom_scaling_factor is not None,
                     _experimental_enable_gpu_snapshot=_experimental_enable_gpu_snapshot,
+                    # --- These are deprecated in favor of autoscaler_settings
+                    warm_pool_size=min_containers or 0,
+                    concurrency_limit=max_containers or 0,
+                    _experimental_buffer_containers=buffer_containers or 0,
+                    task_idle_timeout_secs=scaledown_window or 0,
+                    # ---
                 )
 
                 if isinstance(gpu, list):
@@ -1017,21 +1016,64 @@ async def _load(param_bound_func: _Function, resolver: Resolver, existing_object
         fun._spec = self._spec  # TODO (elias): fix - this is incorrect when using with_options
         return fun
 
+    @live_method
+    async def update_autoscaler(
+        self,
+        *,
+        min_containers: Optional[int] = None,
+        max_containers: Optional[int] = None,
+        buffer_containers: Optional[int] = None,
+        scaledown_window: Optional[int] = None,
+    ) -> None:
+        """Override the current autoscaler behavior for this Function.
+
+        Unspecified parameters will retain their current value, i.e. either the static value
+        from the function decorator, or an override value from a previous call to this method.
+
+        Subsequent deployments of the App containing this Function will reset the autoscaler back to
+        its static configuration.
+
+        Examples:
+
+        ```python notest
+        f = modal.Function.from_name("my-app", "function")
+
+        # Always have at least 2 containers running, with an extra buffer when the Function is active
+        f.update_autoscaler(min_containers=2, buffer_containers=1)
+
+        # Limit this Function to avoid consuming all of your workspace's resources
+        f.update_autoscaler(max_containers=5)
+        ```
+        """
+        # TODO(elias) won't need this check once we refactor methods to not be Function objects
+        if self._is_method:
+            raise InvalidError("Cannot call .update_autoscaler() on a method. Call it on the class instance instead.")
+
+        settings = api_pb2.AutoscalerSettings(
+            min_containers=min_containers,
+            max_containers=max_containers,
+            buffer_containers=buffer_containers,
+            scaledown_window=scaledown_window,
+        )
+        request = api_pb2.FunctionUpdateSchedulingParamsRequest(function_id=self.object_id, settings=settings)
+        await retry_transient_errors(self.client.stub.FunctionUpdateSchedulingParams, request)
+        # One idea would be for FunctionUpdateScheduleParams to return the current (coalesced) settings
+        # and then we could return them here (would need some ad hoc dataclass, which I don't love)
+
     @live_method
     async def keep_warm(self, warm_pool_size: int) -> None:
-        """Set the warm pool size for the function.
+        """Set the warm pool size for the Function.
 
-        Please exercise care when using this advanced feature!
-        Setting and forgetting a warm pool on functions can lead to increased costs.
+        DEPRECATED: Please adapt your code to use the more general `update_autoscaler` method instead:
 
         ```python notest
-        # Usage on a regular function.
         f = modal.Function.from_name("my-app", "function")
+
+        # Old pattern (deprecated)
         f.keep_warm(2)
 
-        # Usage on a parametrized function.
-        Model = modal.Cls.from_name("my-app", "Model")
-        Model("fine-tuned-model").keep_warm(2)  # note that this applies to the class instance, not a method
+        # New pattern
+        f.update_autoscaler(min_containers=2)
         ```
         """
         if self._is_method:
@@ -1045,10 +1087,16 @@ async def keep_warm(self, warm_pool_size: int) -> None:
             """
                 )
             )
-        request = api_pb2.FunctionUpdateSchedulingParamsRequest(
-            function_id=self.object_id, warm_pool_size_override=warm_pool_size
+
+        deprecation_warning(
+            (2025, 2, 24),
+            "The .keep_warm() method has been deprecated in favor of the more general "
+            ".update_autoscaler(min_containers=...) method.",
+            pending=True,
+            show_source=True,
         )
-        await retry_transient_errors(self.client.stub.FunctionUpdateSchedulingParams, request)
+
+        await self.update_autoscaler(min_containers=warm_pool_size)
 
     @classmethod
     def _from_name(cls, app_name: str, name: str, namespace, environment_name: Optional[str]):

diff --git a/modal/_partial_function.py b/modal/_partial_function.py
@@ -54,7 +54,6 @@ class _PartialFunction(typing.Generic[P, ReturnType, OriginalReturnType]):
     flags: _PartialFunctionFlags
     webhook_config: Optional[api_pb2.WebhookConfig]
     is_generator: bool
-    keep_warm: Optional[int]
     batch_max_size: Optional[int]
     batch_wait_ms: Optional[int]
     force_build: bool
@@ -67,7 +66,6 @@ def __init__(
         flags: _PartialFunctionFlags,
         webhook_config: Optional[api_pb2.WebhookConfig] = None,
         is_generator: Optional[bool] = None,
-        keep_warm: Optional[int] = None,
         batch_max_size: Optional[int] = None,
         batch_wait_ms: Optional[int] = None,
         cluster_size: Optional[int] = None,  # Experimental: Clustered functions
@@ -84,7 +82,6 @@ def __init__(
             final_is_generator = is_generator
 
         self.is_generator = final_is_generator
-        self.keep_warm = keep_warm
         self.wrapped = False  # Make sure that this was converted into a FunctionHandle
         self.batch_max_size = batch_max_size
         self.batch_wait_ms = batch_wait_ms
@@ -141,7 +138,6 @@ def add_flags(self, flags) -> "_PartialFunction":
             raw_f=self.raw_f,
             flags=(self.flags | flags),
             webhook_config=self.webhook_config,
-            keep_warm=self.keep_warm,
             batch_max_size=self.batch_max_size,
             batch_wait_ms=self.batch_wait_ms,
             force_build=self.force_build,
@@ -198,7 +194,6 @@ def _method(
     # Set this to True if it's a non-generator function returning
     # a [sync/async] generator object
     is_generator: Optional[bool] = None,
-    keep_warm: Optional[int] = None,  # Deprecated: Use keep_warm on @app.cls() instead
 ) -> _MethodDecoratorType:
     """Decorator for methods that should be transformed into a Modal Function registered against this class's App.
 
@@ -216,17 +211,6 @@ def f(self):
     if _warn_parentheses_missing is not None:
         raise InvalidError("Positional arguments are not allowed. Did you forget parentheses? Suggestion: `@method()`.")
 
-    if keep_warm is not None:
-        deprecation_warning(
-            (2024, 6, 10),
-            (
-                "`keep_warm=` is no longer supported per-method on Modal classes. "
-                "All methods and web endpoints of a class use the same set of containers now. "
-                "Use keep_warm via the @app.cls() decorator instead. "
-            ),
-            pending=True,
-        )
-
     def wrapper(raw_f: Callable[..., Any]) -> _PartialFunction:
         nonlocal is_generator
         if isinstance(raw_f, _PartialFunction) and raw_f.webhook_config:
@@ -241,7 +225,7 @@ def wrapper(raw_f: Callable[..., Any]) -> _PartialFunction:
                 "Batched function on classes should not be wrapped by `@method`. "
                 "Suggestion: remove the `@method` decorator."
             )
-        return _PartialFunction(raw_f, _PartialFunctionFlags.FUNCTION, is_generator=is_generator, keep_warm=keep_warm)
+        return _PartialFunction(raw_f, _PartialFunctionFlags.FUNCTION, is_generator=is_generator)
 
     return wrapper  # type: ignore   # synchronicity issue with wrapped vs unwrapped types and protocols
 

diff --git a/modal/_utils/deprecation.py b/modal/_utils/deprecation.py
@@ -87,3 +87,38 @@ def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
         return wrapper
 
     return decorator
+
+
+def warn_on_renamed_autoscaler_settings(func: Callable[P, R]) -> Callable[P, R]:
+    name_map = {
+        "keep_warm": "min_containers",
+        "concurrency_limit": "max_containers",
+        "_experimental_buffer_containers": "buffer_containers",
+        "container_idle_timeout": "scaledown_window",
+    }
+
+    @functools.wraps(func)
+    def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
+        mut_kwargs: dict[str, Any] = locals()["kwargs"]  # Avoid referencing kwargs directly due to bug in sigtools
+
+        substitutions = []
+        old_params_used = name_map.keys() & mut_kwargs.keys()
+        for old_param, new_param in name_map.items():
+            if old_param in old_params_used:
+                new_param = name_map[old_param]
+                mut_kwargs[new_param] = mut_kwargs.pop(old_param)
+                substitutions.append(f"- {old_param} -> {new_param}")
+
+        if substitutions:
+            substitution_string = "\n".join(substitutions)
+            message = (
+                "We have renamed several parameters related to autoscaling."
+                " Please update your code to use the following new names:"
+                f"\n\n{substitution_string}"
+                "\n\nSee https://modal.com/docs/guide/modal-1-0-migration for more details."
+            )
+            deprecation_warning((2025, 2, 24), message, pending=True, show_source=True)
+
+        return func(*args, **kwargs)
+
+    return wrapper