Revert "Config objects as Pydantic BaseModels (#6176)" (#6214)

All-Hands-AI · Jan 13, 2025 · 2347307 · 2347307
1 parent 63133c0
commit 2347307
Show file tree

Hide file tree

Showing 22 changed files with 406 additions and 257 deletions.
diff --git a/evaluation/benchmarks/the_agent_company/run_infer.py b/evaluation/benchmarks/the_agent_company/run_infer.py
@@ -80,7 +80,7 @@ def load_dependencies(runtime: Runtime) -> List[str]:
 def init_task_env(runtime: Runtime, hostname: str, env_llm_config: LLMConfig):
     command = (
         f'SERVER_HOSTNAME={hostname} '
-        f'LITELLM_API_KEY={env_llm_config.api_key.get_secret_value() if env_llm_config.api_key else None} '
+        f'LITELLM_API_KEY={env_llm_config.api_key} '
         f'LITELLM_BASE_URL={env_llm_config.base_url} '
         f'LITELLM_MODEL={env_llm_config.model} '
         'bash /utils/init.sh'
@@ -165,7 +165,7 @@ def run_evaluator(
     runtime: Runtime, env_llm_config: LLMConfig, trajectory_path: str, result_path: str
 ):
     command = (
-        f'LITELLM_API_KEY={env_llm_config.api_key.get_secret_value() if env_llm_config.api_key else None} '
+        f'LITELLM_API_KEY={env_llm_config.api_key} '
         f'LITELLM_BASE_URL={env_llm_config.base_url} '
         f'LITELLM_MODEL={env_llm_config.model} '
         f"DECRYPTION_KEY='theagentcompany is all you need' "  # Hardcoded Key

diff --git a/evaluation/utils/shared.py b/evaluation/utils/shared.py
@@ -52,6 +52,30 @@ class EvalMetadata(BaseModel):
     details: dict[str, Any] | None = None
     condenser_config: CondenserConfig | None = None
 
+    def model_dump(self, *args, **kwargs):
+        dumped_dict = super().model_dump(*args, **kwargs)
+        # avoid leaking sensitive information
+        dumped_dict['llm_config'] = self.llm_config.to_safe_dict()
+        if hasattr(self.condenser_config, 'llm_config'):
+            dumped_dict['condenser_config']['llm_config'] = (
+                self.condenser_config.llm_config.to_safe_dict()
+            )
+
+        return dumped_dict
+
+    def model_dump_json(self, *args, **kwargs):
+        dumped = super().model_dump_json(*args, **kwargs)
+        dumped_dict = json.loads(dumped)
+        # avoid leaking sensitive information
+        dumped_dict['llm_config'] = self.llm_config.to_safe_dict()
+        if hasattr(self.condenser_config, 'llm_config'):
+            dumped_dict['condenser_config']['llm_config'] = (
+                self.condenser_config.llm_config.to_safe_dict()
+            )
+
+        logger.debug(f'Dumped metadata: {dumped_dict}')
+        return json.dumps(dumped_dict)
+
 
 class EvalOutput(BaseModel):
     # NOTE: User-specified
@@ -74,6 +98,23 @@ class EvalOutput(BaseModel):
     # Optionally save the input test instance
     instance: dict[str, Any] | None = None
 
+    def model_dump(self, *args, **kwargs):
+        dumped_dict = super().model_dump(*args, **kwargs)
+        # Remove None values
+        dumped_dict = {k: v for k, v in dumped_dict.items() if v is not None}
+        # Apply custom serialization for metadata (to avoid leaking sensitive information)
+        if self.metadata is not None:
+            dumped_dict['metadata'] = self.metadata.model_dump()
+        return dumped_dict
+
+    def model_dump_json(self, *args, **kwargs):
+        dumped = super().model_dump_json(*args, **kwargs)
+        dumped_dict = json.loads(dumped)
+        # Apply custom serialization for metadata (to avoid leaking sensitive information)
+        if 'metadata' in dumped_dict:
+            dumped_dict['metadata'] = json.loads(self.metadata.model_dump_json())
+        return json.dumps(dumped_dict)
+
 
 class EvalException(Exception):
     pass
@@ -273,7 +314,7 @@ def update_progress(
     logger.info(
         f'Finished evaluation for instance {result.instance_id}: {str(result.test_result)[:300]}...\n'
     )
-    output_fp.write(result.model_dump_json() + '\n')
+    output_fp.write(json.dumps(result.model_dump()) + '\n')
     output_fp.flush()
 
 

diff --git a/openhands/core/config/README.md b/openhands/core/config/README.md
@@ -37,17 +37,21 @@ export SANDBOX_TIMEOUT='300'
 
 ## Type Handling
 
-The `load_from_env` function attempts to cast environment variable values to the types specified in the models. It handles:
+The `load_from_env` function attempts to cast environment variable values to the types specified in the dataclasses. It handles:
 
 - Basic types (str, int, bool)
 - Optional types (e.g., `str | None`)
-- Nested models
+- Nested dataclasses
 
 If type casting fails, an error is logged, and the default value is retained.
 
 ## Default Values
 
-If an environment variable is not set, the default value specified in the model is used.
+If an environment variable is not set, the default value specified in the dataclass is used.
+
+## Nested Configurations
+
+The `AppConfig` class contains nested configurations like `LLMConfig` and `AgentConfig`. The `load_from_env` function handles these by recursively processing nested dataclasses with updated prefixes.
 
 ## Security Considerations
 

diff --git a/openhands/core/config/agent_config.py b/openhands/core/config/agent_config.py
@@ -1,9 +1,11 @@
-from pydantic import BaseModel, Field
+from dataclasses import dataclass, field, fields
 
 from openhands.core.config.condenser_config import CondenserConfig, NoOpCondenserConfig
+from openhands.core.config.config_utils import get_field_info
 
 
-class AgentConfig(BaseModel):
+@dataclass
+class AgentConfig:
     """Configuration for the agent.
 
     Attributes:
@@ -20,13 +22,20 @@ class AgentConfig(BaseModel):
         condenser: Configuration for the memory condenser. Default is NoOpCondenserConfig.
     """
 
-    codeact_enable_browsing: bool = Field(default=True)
-    codeact_enable_llm_editor: bool = Field(default=False)
-    codeact_enable_jupyter: bool = Field(default=True)
-    micro_agent_name: str | None = Field(default=None)
-    memory_enabled: bool = Field(default=False)
-    memory_max_threads: int = Field(default=3)
-    llm_config: str | None = Field(default=None)
-    use_microagents: bool = Field(default=True)
-    disabled_microagents: list[str] | None = Field(default=None)
-    condenser: CondenserConfig = Field(default_factory=NoOpCondenserConfig)
+    codeact_enable_browsing: bool = True
+    codeact_enable_llm_editor: bool = False
+    codeact_enable_jupyter: bool = True
+    micro_agent_name: str | None = None
+    memory_enabled: bool = False
+    memory_max_threads: int = 3
+    llm_config: str | None = None
+    use_microagents: bool = True
+    disabled_microagents: list[str] | None = None
+    condenser: CondenserConfig = field(default_factory=NoOpCondenserConfig)  # type: ignore
+
+    def defaults_to_dict(self) -> dict:
+        """Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional."""
+        result = {}
+        for f in fields(self):
+            result[f.name] = get_field_info(f)
+        return result
diff --git a/openhands/core/config/app_config.py b/openhands/core/config/app_config.py
@@ -1,20 +1,20 @@
+from dataclasses import dataclass, field, fields, is_dataclass
 from typing import ClassVar
 
-from pydantic import BaseModel, Field, SecretStr
-
 from openhands.core import logger
 from openhands.core.config.agent_config import AgentConfig
 from openhands.core.config.config_utils import (
     OH_DEFAULT_AGENT,
     OH_MAX_ITERATIONS,
-    model_defaults_to_dict,
+    get_field_info,
 )
 from openhands.core.config.llm_config import LLMConfig
 from openhands.core.config.sandbox_config import SandboxConfig
 from openhands.core.config.security_config import SecurityConfig
 
 
-class AppConfig(BaseModel):
+@dataclass
+class AppConfig:
     """Configuration for the app.
 
     Attributes:
@@ -46,39 +46,37 @@ class AppConfig(BaseModel):
             input is read line by line. When enabled, input continues until /exit command.
     """
 
-    llms: dict[str, LLMConfig] = Field(default_factory=dict)
-    agents: dict = Field(default_factory=dict)
-    default_agent: str = Field(default=OH_DEFAULT_AGENT)
-    sandbox: SandboxConfig = Field(default_factory=SandboxConfig)
-    security: SecurityConfig = Field(default_factory=SecurityConfig)
-    runtime: str = Field(default='docker')
-    file_store: str = Field(default='local')
-    file_store_path: str = Field(default='/tmp/openhands_file_store')
-    trajectories_path: str | None = Field(default=None)
-    workspace_base: str | None = Field(default=None)
-    workspace_mount_path: str | None = Field(default=None)
-    workspace_mount_path_in_sandbox: str = Field(default='/workspace')
-    workspace_mount_rewrite: str | None = Field(default=None)
-    cache_dir: str = Field(default='/tmp/cache')
-    run_as_openhands: bool = Field(default=True)
-    max_iterations: int = Field(default=OH_MAX_ITERATIONS)
-    max_budget_per_task: float | None = Field(default=None)
-    e2b_api_key: SecretStr | None = Field(default=None)
-    modal_api_token_id: SecretStr | None = Field(default=None)
-    modal_api_token_secret: SecretStr | None = Field(default=None)
-    disable_color: bool = Field(default=False)
-    jwt_secret: SecretStr | None = Field(default=None)
-    debug: bool = Field(default=False)
-    file_uploads_max_file_size_mb: int = Field(default=0)
-    file_uploads_restrict_file_types: bool = Field(default=False)
-    file_uploads_allowed_extensions: list[str] = Field(default_factory=lambda: ['.*'])
-    runloop_api_key: SecretStr | None = Field(default=None)
-    cli_multiline_input: bool = Field(default=False)
+    llms: dict[str, LLMConfig] = field(default_factory=dict)
+    agents: dict = field(default_factory=dict)
+    default_agent: str = OH_DEFAULT_AGENT
+    sandbox: SandboxConfig = field(default_factory=SandboxConfig)
+    security: SecurityConfig = field(default_factory=SecurityConfig)
+    runtime: str = 'docker'
+    file_store: str = 'local'
+    file_store_path: str = '/tmp/openhands_file_store'
+    trajectories_path: str | None = None
+    workspace_base: str | None = None
+    workspace_mount_path: str | None = None
+    workspace_mount_path_in_sandbox: str = '/workspace'
+    workspace_mount_rewrite: str | None = None
+    cache_dir: str = '/tmp/cache'
+    run_as_openhands: bool = True
+    max_iterations: int = OH_MAX_ITERATIONS
+    max_budget_per_task: float | None = None
+    e2b_api_key: str = ''
+    modal_api_token_id: str = ''
+    modal_api_token_secret: str = ''
+    disable_color: bool = False
+    jwt_secret: str = ''
+    debug: bool = False
+    file_uploads_max_file_size_mb: int = 0
+    file_uploads_restrict_file_types: bool = False
+    file_uploads_allowed_extensions: list[str] = field(default_factory=lambda: ['.*'])
+    runloop_api_key: str | None = None
+    cli_multiline_input: bool = False
 
     defaults_dict: ClassVar[dict] = {}
 
-    model_config = {'extra': 'forbid'}
-
     def get_llm_config(self, name='llm') -> LLMConfig:
         """'llm' is the name for default config (for backward compatibility prior to 0.8)."""
         if name in self.llms:
@@ -117,7 +115,42 @@ def get_llm_config_from_agent(self, name='agent') -> LLMConfig:
     def get_agent_configs(self) -> dict[str, AgentConfig]:
         return self.agents
 
-    def model_post_init(self, __context):
+    def __post_init__(self):
         """Post-initialization hook, called when the instance is created with only default values."""
-        super().model_post_init(__context)
-        AppConfig.defaults_dict = model_defaults_to_dict(self)
+        AppConfig.defaults_dict = self.defaults_to_dict()
+
+    def defaults_to_dict(self) -> dict:
+        """Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional."""
+        result = {}
+        for f in fields(self):
+            field_value = getattr(self, f.name)
+
+            # dataclasses compute their defaults themselves
+            if is_dataclass(type(field_value)):
+                result[f.name] = field_value.defaults_to_dict()
+            else:
+                result[f.name] = get_field_info(f)
+        return result
+
+    def __str__(self):
+        attr_str = []
+        for f in fields(self):
+            attr_name = f.name
+            attr_value = getattr(self, f.name)
+
+            if attr_name in [
+                'e2b_api_key',
+                'github_token',
+                'jwt_secret',
+                'modal_api_token_id',
+                'modal_api_token_secret',
+                'runloop_api_key',
+            ]:
+                attr_value = '******' if attr_value else None
+
+            attr_str.append(f'{attr_name}={repr(attr_value)}')
+
+        return f"AppConfig({', '.join(attr_str)}"
+
+    def __repr__(self):
+        return self.__str__()
diff --git a/openhands/core/config/config_utils.py b/openhands/core/config/config_utils.py
@@ -1,22 +1,19 @@
 from types import UnionType
-from typing import Any, get_args, get_origin
-
-from pydantic import BaseModel
-from pydantic.fields import FieldInfo
+from typing import get_args, get_origin
 
 OH_DEFAULT_AGENT = 'CodeActAgent'
 OH_MAX_ITERATIONS = 500
 
 
-def get_field_info(field: FieldInfo) -> dict[str, Any]:
+def get_field_info(f):
     """Extract information about a dataclass field: type, optional, and default.
 
     Args:
-        field: The field to extract information from.
+        f: The field to extract information from.
 
     Returns: A dict with the field's type, whether it's optional, and its default value.
     """
-    field_type = field.annotation
+    field_type = f.type
     optional = False
 
     # for types like str | None, find the non-None type and set optional to True
@@ -36,21 +33,7 @@ def get_field_info(field: FieldInfo) -> dict[str, Any]:
     )
 
     # default is always present
-    default = field.default
+    default = f.default
 
     # return a schema with the useful info for frontend
     return {'type': type_name.lower(), 'optional': optional, 'default': default}
-
-
-def model_defaults_to_dict(model: BaseModel) -> dict[str, Any]:
-    """Serialize field information in a dict for the frontend, including type hints, defaults, and whether it's optional."""
-    result = {}
-    for name, field in model.model_fields.items():
-        field_value = getattr(model, name)
-
-        if isinstance(field_value, BaseModel):
-            result[name] = model_defaults_to_dict(field_value)
-        else:
-            result[name] = get_field_info(field)
-
-    return result