Make Sonnet 3.7 and R1 play nice (openrouter params fun)

Having this in code is a bit hackey, and I want to move to entirely in config. However, this is a step in that direction. Enturely declarative in ml_model_list (the file we want to become config) and remove logic like "if openrouter". See comments.
Kiln-AI · Mar 3, 2025 · 8a95f4d · 8a95f4d
1 parent 9169cf4
commit 8a95f4d
Show file tree

Hide file tree

Showing 4 changed files with 127 additions and 80 deletions.
diff --git a/libs/core/kiln_ai/adapters/adapter_registry.py b/libs/core/kiln_ai/adapters/adapter_registry.py
@@ -34,7 +34,6 @@ def adapter_for_task(
                     api_key=Config.shared().open_router_api_key,
                     model_name=model_name,
                     provider_name=provider,
-                    openrouter_style_reasoning=True,
                     default_headers={
                         "HTTP-Referer": "https://getkiln.ai/openrouter",
                         "X-Title": "KilnAI",

diff --git a/libs/core/kiln_ai/adapters/ml_model_list.py b/libs/core/kiln_ai/adapters/ml_model_list.py
@@ -131,6 +131,14 @@ class KilnModelProvider(BaseModel):
     reasoning_capable: bool = False
     supports_logprobs: bool = False
 
+    # TODO P1: Need a more generalized way to handle custom provider parameters.
+    # Making them quite declarative here for now, isolating provider specific logic
+    # to this file. Later I should be able to override anything in this file via config.
+    r1_openrouter_options: bool = False
+    require_openrouter_reasoning: bool = False
+    logprobs_openrouter_options: bool = False
+    openrouter_skip_required_parameters: bool = False
+
 
 class KilnModel(BaseModel):
     """
@@ -169,6 +177,7 @@ class KilnModel(BaseModel):
                 provider_options={"model": "openai/gpt-4o-mini"},
                 structured_output_mode=StructuredOutputMode.json_schema,
                 supports_logprobs=True,
+                logprobs_openrouter_options=True,
             ),
         ],
     ),
@@ -190,6 +199,7 @@ class KilnModel(BaseModel):
                 provider_options={"model": "openai/gpt-4o"},
                 structured_output_mode=StructuredOutputMode.json_schema,
                 supports_logprobs=True,
+                logprobs_openrouter_options=True,
             ),
         ],
     ),
@@ -244,56 +254,7 @@ class KilnModel(BaseModel):
                 reasoning_capable=True,
                 # For reasoning models, we need to use json_instructions with OpenRouter
                 structured_output_mode=StructuredOutputMode.json_instructions,
-            ),
-        ],
-    ),
-    # DeepSeek 3
-    KilnModel(
-        family=ModelFamily.deepseek,
-        name=ModelName.deepseek_3,
-        friendly_name="DeepSeek v3",
-        providers=[
-            KilnModelProvider(
-                name=ModelProviderName.openrouter,
-                provider_options={"model": "deepseek/deepseek-chat"},
-                structured_output_mode=StructuredOutputMode.function_calling,
-            ),
-            KilnModelProvider(
-                name=ModelProviderName.fireworks_ai,
-                provider_options={"model": "accounts/fireworks/models/deepseek-v3"},
-                structured_output_mode=StructuredOutputMode.json_mode,
-                supports_structured_output=True,
-                supports_data_gen=False,
-            ),
-        ],
-    ),
-    # DeepSeek R1
-    KilnModel(
-        family=ModelFamily.deepseek,
-        name=ModelName.deepseek_r1,
-        friendly_name="DeepSeek R1",
-        providers=[
-            KilnModelProvider(
-                name=ModelProviderName.openrouter,
-                provider_options={"model": "deepseek/deepseek-r1"},
-                # No custom parser -- openrouter implemented it themselves
-                structured_output_mode=StructuredOutputMode.json_instructions,
-                reasoning_capable=True,
-            ),
-            KilnModelProvider(
-                name=ModelProviderName.fireworks_ai,
-                provider_options={"model": "accounts/fireworks/models/deepseek-r1"},
-                parser=ModelParserID.r1_thinking,
-                structured_output_mode=StructuredOutputMode.json_instructions,
-                reasoning_capable=True,
-            ),
-            KilnModelProvider(
-                # I want your RAM
-                name=ModelProviderName.ollama,
-                provider_options={"model": "deepseek-r1:671b"},
-                parser=ModelParserID.r1_thinking,
-                structured_output_mode=StructuredOutputMode.json_instructions,
-                reasoning_capable=True,
+                require_openrouter_reasoning=True,
             ),
         ],
     ),
@@ -429,6 +390,7 @@ class KilnModel(BaseModel):
                 structured_output_mode=StructuredOutputMode.function_calling_weak,
                 provider_options={"model": "meta-llama/llama-3.1-70b-instruct"},
                 supports_logprobs=True,
+                logprobs_openrouter_options=True,
             ),
             KilnModelProvider(
                 name=ModelProviderName.ollama,
@@ -867,6 +829,58 @@ class KilnModel(BaseModel):
             ),
         ],
     ),
+    # DeepSeek 3
+    KilnModel(
+        family=ModelFamily.deepseek,
+        name=ModelName.deepseek_3,
+        friendly_name="DeepSeek V3",
+        providers=[
+            KilnModelProvider(
+                name=ModelProviderName.openrouter,
+                provider_options={"model": "deepseek/deepseek-chat"},
+                structured_output_mode=StructuredOutputMode.function_calling,
+            ),
+            KilnModelProvider(
+                name=ModelProviderName.fireworks_ai,
+                provider_options={"model": "accounts/fireworks/models/deepseek-v3"},
+                structured_output_mode=StructuredOutputMode.json_mode,
+                supports_structured_output=True,
+                supports_data_gen=False,
+            ),
+        ],
+    ),
+    # DeepSeek R1
+    KilnModel(
+        family=ModelFamily.deepseek,
+        name=ModelName.deepseek_r1,
+        friendly_name="DeepSeek R1",
+        providers=[
+            KilnModelProvider(
+                name=ModelProviderName.openrouter,
+                provider_options={"model": "deepseek/deepseek-r1"},
+                # No custom parser -- openrouter implemented it themselves
+                structured_output_mode=StructuredOutputMode.json_instructions,
+                reasoning_capable=True,
+                r1_openrouter_options=True,
+                require_openrouter_reasoning=True,
+            ),
+            KilnModelProvider(
+                name=ModelProviderName.fireworks_ai,
+                provider_options={"model": "accounts/fireworks/models/deepseek-r1"},
+                parser=ModelParserID.r1_thinking,
+                structured_output_mode=StructuredOutputMode.json_instructions,
+                reasoning_capable=True,
+            ),
+            KilnModelProvider(
+                # I want your RAM
+                name=ModelProviderName.ollama,
+                provider_options={"model": "deepseek-r1:671b"},
+                parser=ModelParserID.r1_thinking,
+                structured_output_mode=StructuredOutputMode.json_instructions,
+                reasoning_capable=True,
+            ),
+        ],
+    ),
     # DeepSeek R1 Distill Qwen 32B
     KilnModel(
         family=ModelFamily.deepseek,
@@ -878,6 +892,8 @@ class KilnModel(BaseModel):
                 reasoning_capable=True,
                 structured_output_mode=StructuredOutputMode.json_instructions,
                 provider_options={"model": "deepseek/deepseek-r1-distill-qwen-32b"},
+                r1_openrouter_options=True,
+                require_openrouter_reasoning=True,
             ),
             KilnModelProvider(
                 name=ModelProviderName.ollama,
@@ -899,6 +915,8 @@ class KilnModel(BaseModel):
                 reasoning_capable=True,
                 structured_output_mode=StructuredOutputMode.json_instructions,
                 provider_options={"model": "deepseek/deepseek-r1-distill-llama-70b"},
+                r1_openrouter_options=True,
+                require_openrouter_reasoning=True,
             ),
             KilnModelProvider(
                 name=ModelProviderName.ollama,
@@ -922,6 +940,9 @@ class KilnModel(BaseModel):
                 reasoning_capable=True,
                 structured_output_mode=StructuredOutputMode.json_instructions,
                 provider_options={"model": "deepseek/deepseek-r1-distill-qwen-14b"},
+                r1_openrouter_options=True,
+                require_openrouter_reasoning=True,
+                openrouter_skip_required_parameters=True,
             ),
             KilnModelProvider(
                 name=ModelProviderName.ollama,
@@ -945,6 +966,9 @@ class KilnModel(BaseModel):
                 reasoning_capable=True,
                 structured_output_mode=StructuredOutputMode.json_instructions,
                 provider_options={"model": "deepseek/deepseek-r1-distill-llama-8b"},
+                r1_openrouter_options=True,
+                require_openrouter_reasoning=True,
+                openrouter_skip_required_parameters=True,
             ),
             KilnModelProvider(
                 name=ModelProviderName.ollama,
@@ -985,6 +1009,9 @@ class KilnModel(BaseModel):
                 reasoning_capable=True,
                 structured_output_mode=StructuredOutputMode.json_instructions,
                 provider_options={"model": "deepseek/deepseek-r1-distill-qwen-1.5b"},
+                r1_openrouter_options=True,
+                require_openrouter_reasoning=True,
+                openrouter_skip_required_parameters=True,
             ),
             KilnModelProvider(
                 name=ModelProviderName.ollama,

diff --git a/libs/core/kiln_ai/adapters/model_adapters/openai_compatible_config.py b/libs/core/kiln_ai/adapters/model_adapters/openai_compatible_config.py
@@ -8,4 +8,3 @@ class OpenAICompatibleConfig:
     provider_name: str
     base_url: str | None = None  # Defaults to OpenAI
     default_headers: dict[str, str] | None = None
-    openrouter_style_reasoning: bool = False
diff --git a/libs/core/kiln_ai/adapters/model_adapters/openai_model_adapter.py b/libs/core/kiln_ai/adapters/model_adapters/openai_model_adapter.py
@@ -9,7 +9,11 @@
 )
 
 import kiln_ai.datamodel as datamodel
-from kiln_ai.adapters.ml_model_list import ModelProviderName, StructuredOutputMode
+from kiln_ai.adapters.ml_model_list import (
+    KilnModelProvider,
+    ModelProviderName,
+    StructuredOutputMode,
+)
 from kiln_ai.adapters.model_adapters.base_adapter import (
     COT_FINAL_ANSWER_PROMPT,
     AdapterConfig,
@@ -98,31 +102,8 @@ async def _run(self, input: Dict | str) -> RunOutput:
                 ]
             )
 
-        # OpenRouter specific options for reasoning models and logprobs.
-        # TODO: this isn't a good place for this and I should refactor. But big usability improvement so keeping it here for now.
-        extra_body = {}
-        require_or_reasoning = (
-            self.config.openrouter_style_reasoning and provider.reasoning_capable
-        )
-        if require_or_reasoning:
-            extra_body["include_reasoning"] = True
-            # Filter to providers that support the reasoning parameter
-            extra_body["provider"] = {
-                "require_parameters": True,
-                # Ugly to have these here, but big range of quality of R1 providers
-                "order": ["Fireworks", "Together"],
-                # fp8 quants are awful
-                "ignore": ["DeepInfra"],
-            }
-        elif (
-            self.run_config.model_provider_name == ModelProviderName.openrouter
-            and self.base_adapter_config.top_logprobs is not None
-        ):
-            # OpenRouter specific options related to logprobs. Bit of a hack but really does improve usability.
-            extra_body["provider"] = {
-                "require_parameters": True,
-                "ignore": ["DeepInfra"],
-            }
+        # Build custom request params based on model provider
+        extra_body = self.build_extra_body(provider)
 
         # Main completion call
         response_format_options = await self.response_format_options()
@@ -156,8 +137,8 @@ async def _run(self, input: Dict | str) -> RunOutput:
         if self.base_adapter_config.top_logprobs is not None and logprobs is None:
             raise RuntimeError("Logprobs were required, but no logprobs were returned.")
 
-        # Save reasoning if it exists (OpenRouter specific format)
-        if require_or_reasoning:
+        # Save reasoning if it exists (OpenRouter specific api response field)
+        if provider.require_openrouter_reasoning:
             if (
                 hasattr(message, "reasoning") and message.reasoning  # pyright: ignore
             ):
@@ -265,3 +246,44 @@ def tool_call_params(self, strict: bool) -> dict[str, Any]:
                 "function": {"name": "task_response"},
             },
         }
+
+    def build_extra_body(self, provider: KilnModelProvider) -> dict[str, Any]:
+        # TODO P1: Don't love having this logic here. But it's a usability improvement
+        # so better to keep it than exclude it. Should figure out how I want to isolate
+        # this sort of logic so it's config driven and can be overridden
+
+        extra_body = {}
+        provider_options = {}
+
+        if provider.require_openrouter_reasoning:
+            # https://openrouter.ai/docs/use-cases/reasoning-tokens
+            extra_body["reasoning"] = {
+                "exclude": False,
+            }
+
+        if provider.r1_openrouter_options:
+            # Require providers that support the reasoning parameter
+            provider_options["require_parameters"] = True
+            # Prefer R1 providers with reasonable perf/quants
+            provider_options["order"] = ["Fireworks", "Together"]
+            # R1 providers with unreasonable quants
+            provider_options["ignore"] = ["DeepInfra"]
+
+        # Only set of this request is to get logprobs.
+        if (
+            provider.logprobs_openrouter_options
+            and self.base_adapter_config.top_logprobs is not None
+        ):
+            # Don't let OpenRouter choose a provider that doesn't support logprobs.
+            provider_options["require_parameters"] = True
+            # DeepInfra silently fails to return logprobs consistently.
+            provider_options["ignore"] = ["DeepInfra"]
+
+        if provider.openrouter_skip_required_parameters:
+            # Oddball case, R1 14/8/1.5B fail with this param, even though they support thinking params.
+            provider_options["require_parameters"] = False
+
+        if len(provider_options) > 0:
+            extra_body["provider"] = provider_options
+
+        return extra_body