diff --git a/config.template.toml b/config.template.toml index 8f26eaf92b88..aefb52376803 100644 --- a/config.template.toml +++ b/config.template.toml @@ -23,6 +23,9 @@ workspace_base = "./workspace" # Cache directory path #cache_dir = "/tmp/cache" +# Reasoning effort for o1 models (low, medium, high, or not set) +#reasoning_effort = "medium" + # Debugging enabled #debug = false diff --git a/openhands/core/config/llm_config.py b/openhands/core/config/llm_config.py index 16c08a7693f0..bae58373811d 100644 --- a/openhands/core/config/llm_config.py +++ b/openhands/core/config/llm_config.py @@ -40,6 +40,7 @@ class LLMConfig: drop_params: Drop any unmapped (unsupported) params without causing an exception. modify_params: Modify params allows litellm to do transformations like adding a default message, when a message is empty. disable_vision: If model is vision capable, this option allows to disable image processing (useful for cost reduction). + reasoning_effort: The effort to put into reasoning. This is a string that can be one of 'low', 'medium', 'high', or 'none'. Exclusive for o1 models. caching_prompt: Use the prompt caching feature if provided by the LLM and supported by the provider. log_completions: Whether to log LLM completions to the state. log_completions_folder: The folder to log LLM completions to. Required if log_completions is True. @@ -79,6 +80,7 @@ class LLMConfig: # Note: this setting is actually global, unlike drop_params modify_params: bool = True disable_vision: bool | None = None + reasoning_effort: str | None = None caching_prompt: bool = True log_completions: bool = False log_completions_folder: str = os.path.join(LOG_DIR, 'completions') diff --git a/openhands/llm/async_llm.py b/openhands/llm/async_llm.py index ed84273c737b..f553ae173fd6 100644 --- a/openhands/llm/async_llm.py +++ b/openhands/llm/async_llm.py @@ -6,7 +6,11 @@ from openhands.core.exceptions import UserCancelledError from openhands.core.logger import openhands_logger as logger -from openhands.llm.llm import LLM, LLM_RETRY_EXCEPTIONS +from openhands.llm.llm import ( + LLM, + LLM_RETRY_EXCEPTIONS, + REASONING_EFFORT_SUPPORTED_MODELS, +) from openhands.utils.shutdown_listener import should_continue @@ -55,6 +59,10 @@ async def async_completion_wrapper(*args, **kwargs): elif 'messages' in kwargs: messages = kwargs['messages'] + # Set reasoning effort for models that support it + if self.config.model.lower() in REASONING_EFFORT_SUPPORTED_MODELS: + kwargs['reasoning_effort'] = self.config.reasoning_effort + # ensure we work with a list of messages messages = messages if isinstance(messages, list) else [messages] diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py index 743d6535ba3b..82fc6822543f 100644 --- a/openhands/llm/llm.py +++ b/openhands/llm/llm.py @@ -70,7 +70,15 @@ 'claude-3.5-haiku', 'claude-3-5-haiku-20241022', 'gpt-4o-mini', - 'gpt-4o', + 'o1-2024-12-17', +] + +REASONING_EFFORT_SUPPORTED_MODELS = [ + 'o1-2024-12-17', +] + +MODELS_WITHOUT_STOP_WORDS = [ + 'o1-mini', ] @@ -186,7 +194,8 @@ def wrapper(*args, **kwargs): messages, kwargs['tools'] ) kwargs['messages'] = messages - kwargs['stop'] = STOP_WORDS + if self.config.model not in MODELS_WITHOUT_STOP_WORDS: + kwargs['stop'] = STOP_WORDS mock_fncall_tools = kwargs.pop('tools') # if we have no messages, something went very wrong @@ -205,6 +214,10 @@ def wrapper(*args, **kwargs): 'anthropic-beta': 'prompt-caching-2024-07-31', } + # Set reasoning effort for models that support it + if self.config.model.lower() in REASONING_EFFORT_SUPPORTED_MODELS: + kwargs['reasoning_effort'] = self.config.reasoning_effort + # set litellm modify_params to the configured value # True by default to allow litellm to do transformations like adding a default message, when a message is empty # NOTE: this setting is global; unlike drop_params, it cannot be overridden in the litellm completion partial @@ -213,7 +226,6 @@ def wrapper(*args, **kwargs): try: # Record start time for latency measurement start_time = time.time() - # we don't support streaming here, thus we get a ModelResponse resp: ModelResponse = self._completion_unwrapped(*args, **kwargs) diff --git a/openhands/llm/streaming_llm.py b/openhands/llm/streaming_llm.py index 77d999fadcd3..10925b9564cf 100644 --- a/openhands/llm/streaming_llm.py +++ b/openhands/llm/streaming_llm.py @@ -5,6 +5,7 @@ from openhands.core.exceptions import UserCancelledError from openhands.core.logger import openhands_logger as logger from openhands.llm.async_llm import LLM_RETRY_EXCEPTIONS, AsyncLLM +from openhands.llm.llm import REASONING_EFFORT_SUPPORTED_MODELS class StreamingLLM(AsyncLLM): @@ -61,6 +62,10 @@ async def async_streaming_completion_wrapper(*args, **kwargs): 'The messages list is empty. At least one message is required.' ) + # Set reasoning effort for models that support it + if self.config.model.lower() in REASONING_EFFORT_SUPPORTED_MODELS: + kwargs['reasoning_effort'] = self.config.reasoning_effort + self.log_prompt(messages) try: