Skip to content

Commit

Permalink
Make Sonnet 3.7 and R1 play nice (openrouter params fun)
Browse files Browse the repository at this point in the history
Having this in code is a bit hackey, and I want to move to entirely in config. However, this is a step in that direction. Enturely declarative in ml_model_list (the file we want to become config) and remove logic like "if openrouter". See comments.
  • Loading branch information
scosman committed Mar 3, 2025
1 parent 9169cf4 commit 8a95f4d
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 80 deletions.
1 change: 0 additions & 1 deletion libs/core/kiln_ai/adapters/adapter_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ def adapter_for_task(
api_key=Config.shared().open_router_api_key,
model_name=model_name,
provider_name=provider,
openrouter_style_reasoning=True,
default_headers={
"HTTP-Referer": "https://getkiln.ai/openrouter",
"X-Title": "KilnAI",
Expand Down
127 changes: 77 additions & 50 deletions libs/core/kiln_ai/adapters/ml_model_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,14 @@ class KilnModelProvider(BaseModel):
reasoning_capable: bool = False
supports_logprobs: bool = False

# TODO P1: Need a more generalized way to handle custom provider parameters.
# Making them quite declarative here for now, isolating provider specific logic
# to this file. Later I should be able to override anything in this file via config.
r1_openrouter_options: bool = False
require_openrouter_reasoning: bool = False
logprobs_openrouter_options: bool = False
openrouter_skip_required_parameters: bool = False


class KilnModel(BaseModel):
"""
Expand Down Expand Up @@ -169,6 +177,7 @@ class KilnModel(BaseModel):
provider_options={"model": "openai/gpt-4o-mini"},
structured_output_mode=StructuredOutputMode.json_schema,
supports_logprobs=True,
logprobs_openrouter_options=True,
),
],
),
Expand All @@ -190,6 +199,7 @@ class KilnModel(BaseModel):
provider_options={"model": "openai/gpt-4o"},
structured_output_mode=StructuredOutputMode.json_schema,
supports_logprobs=True,
logprobs_openrouter_options=True,
),
],
),
Expand Down Expand Up @@ -244,56 +254,7 @@ class KilnModel(BaseModel):
reasoning_capable=True,
# For reasoning models, we need to use json_instructions with OpenRouter
structured_output_mode=StructuredOutputMode.json_instructions,
),
],
),
# DeepSeek 3
KilnModel(
family=ModelFamily.deepseek,
name=ModelName.deepseek_3,
friendly_name="DeepSeek v3",
providers=[
KilnModelProvider(
name=ModelProviderName.openrouter,
provider_options={"model": "deepseek/deepseek-chat"},
structured_output_mode=StructuredOutputMode.function_calling,
),
KilnModelProvider(
name=ModelProviderName.fireworks_ai,
provider_options={"model": "accounts/fireworks/models/deepseek-v3"},
structured_output_mode=StructuredOutputMode.json_mode,
supports_structured_output=True,
supports_data_gen=False,
),
],
),
# DeepSeek R1
KilnModel(
family=ModelFamily.deepseek,
name=ModelName.deepseek_r1,
friendly_name="DeepSeek R1",
providers=[
KilnModelProvider(
name=ModelProviderName.openrouter,
provider_options={"model": "deepseek/deepseek-r1"},
# No custom parser -- openrouter implemented it themselves
structured_output_mode=StructuredOutputMode.json_instructions,
reasoning_capable=True,
),
KilnModelProvider(
name=ModelProviderName.fireworks_ai,
provider_options={"model": "accounts/fireworks/models/deepseek-r1"},
parser=ModelParserID.r1_thinking,
structured_output_mode=StructuredOutputMode.json_instructions,
reasoning_capable=True,
),
KilnModelProvider(
# I want your RAM
name=ModelProviderName.ollama,
provider_options={"model": "deepseek-r1:671b"},
parser=ModelParserID.r1_thinking,
structured_output_mode=StructuredOutputMode.json_instructions,
reasoning_capable=True,
require_openrouter_reasoning=True,
),
],
),
Expand Down Expand Up @@ -429,6 +390,7 @@ class KilnModel(BaseModel):
structured_output_mode=StructuredOutputMode.function_calling_weak,
provider_options={"model": "meta-llama/llama-3.1-70b-instruct"},
supports_logprobs=True,
logprobs_openrouter_options=True,
),
KilnModelProvider(
name=ModelProviderName.ollama,
Expand Down Expand Up @@ -867,6 +829,58 @@ class KilnModel(BaseModel):
),
],
),
# DeepSeek 3
KilnModel(
family=ModelFamily.deepseek,
name=ModelName.deepseek_3,
friendly_name="DeepSeek V3",
providers=[
KilnModelProvider(
name=ModelProviderName.openrouter,
provider_options={"model": "deepseek/deepseek-chat"},
structured_output_mode=StructuredOutputMode.function_calling,
),
KilnModelProvider(
name=ModelProviderName.fireworks_ai,
provider_options={"model": "accounts/fireworks/models/deepseek-v3"},
structured_output_mode=StructuredOutputMode.json_mode,
supports_structured_output=True,
supports_data_gen=False,
),
],
),
# DeepSeek R1
KilnModel(
family=ModelFamily.deepseek,
name=ModelName.deepseek_r1,
friendly_name="DeepSeek R1",
providers=[
KilnModelProvider(
name=ModelProviderName.openrouter,
provider_options={"model": "deepseek/deepseek-r1"},
# No custom parser -- openrouter implemented it themselves
structured_output_mode=StructuredOutputMode.json_instructions,
reasoning_capable=True,
r1_openrouter_options=True,
require_openrouter_reasoning=True,
),
KilnModelProvider(
name=ModelProviderName.fireworks_ai,
provider_options={"model": "accounts/fireworks/models/deepseek-r1"},
parser=ModelParserID.r1_thinking,
structured_output_mode=StructuredOutputMode.json_instructions,
reasoning_capable=True,
),
KilnModelProvider(
# I want your RAM
name=ModelProviderName.ollama,
provider_options={"model": "deepseek-r1:671b"},
parser=ModelParserID.r1_thinking,
structured_output_mode=StructuredOutputMode.json_instructions,
reasoning_capable=True,
),
],
),
# DeepSeek R1 Distill Qwen 32B
KilnModel(
family=ModelFamily.deepseek,
Expand All @@ -878,6 +892,8 @@ class KilnModel(BaseModel):
reasoning_capable=True,
structured_output_mode=StructuredOutputMode.json_instructions,
provider_options={"model": "deepseek/deepseek-r1-distill-qwen-32b"},
r1_openrouter_options=True,
require_openrouter_reasoning=True,
),
KilnModelProvider(
name=ModelProviderName.ollama,
Expand All @@ -899,6 +915,8 @@ class KilnModel(BaseModel):
reasoning_capable=True,
structured_output_mode=StructuredOutputMode.json_instructions,
provider_options={"model": "deepseek/deepseek-r1-distill-llama-70b"},
r1_openrouter_options=True,
require_openrouter_reasoning=True,
),
KilnModelProvider(
name=ModelProviderName.ollama,
Expand All @@ -922,6 +940,9 @@ class KilnModel(BaseModel):
reasoning_capable=True,
structured_output_mode=StructuredOutputMode.json_instructions,
provider_options={"model": "deepseek/deepseek-r1-distill-qwen-14b"},
r1_openrouter_options=True,
require_openrouter_reasoning=True,
openrouter_skip_required_parameters=True,
),
KilnModelProvider(
name=ModelProviderName.ollama,
Expand All @@ -945,6 +966,9 @@ class KilnModel(BaseModel):
reasoning_capable=True,
structured_output_mode=StructuredOutputMode.json_instructions,
provider_options={"model": "deepseek/deepseek-r1-distill-llama-8b"},
r1_openrouter_options=True,
require_openrouter_reasoning=True,
openrouter_skip_required_parameters=True,
),
KilnModelProvider(
name=ModelProviderName.ollama,
Expand Down Expand Up @@ -985,6 +1009,9 @@ class KilnModel(BaseModel):
reasoning_capable=True,
structured_output_mode=StructuredOutputMode.json_instructions,
provider_options={"model": "deepseek/deepseek-r1-distill-qwen-1.5b"},
r1_openrouter_options=True,
require_openrouter_reasoning=True,
openrouter_skip_required_parameters=True,
),
KilnModelProvider(
name=ModelProviderName.ollama,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,3 @@ class OpenAICompatibleConfig:
provider_name: str
base_url: str | None = None # Defaults to OpenAI
default_headers: dict[str, str] | None = None
openrouter_style_reasoning: bool = False
78 changes: 50 additions & 28 deletions libs/core/kiln_ai/adapters/model_adapters/openai_model_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,11 @@
)

import kiln_ai.datamodel as datamodel
from kiln_ai.adapters.ml_model_list import ModelProviderName, StructuredOutputMode
from kiln_ai.adapters.ml_model_list import (
KilnModelProvider,
ModelProviderName,
StructuredOutputMode,
)
from kiln_ai.adapters.model_adapters.base_adapter import (
COT_FINAL_ANSWER_PROMPT,
AdapterConfig,
Expand Down Expand Up @@ -98,31 +102,8 @@ async def _run(self, input: Dict | str) -> RunOutput:
]
)

# OpenRouter specific options for reasoning models and logprobs.
# TODO: this isn't a good place for this and I should refactor. But big usability improvement so keeping it here for now.
extra_body = {}
require_or_reasoning = (
self.config.openrouter_style_reasoning and provider.reasoning_capable
)
if require_or_reasoning:
extra_body["include_reasoning"] = True
# Filter to providers that support the reasoning parameter
extra_body["provider"] = {
"require_parameters": True,
# Ugly to have these here, but big range of quality of R1 providers
"order": ["Fireworks", "Together"],
# fp8 quants are awful
"ignore": ["DeepInfra"],
}
elif (
self.run_config.model_provider_name == ModelProviderName.openrouter
and self.base_adapter_config.top_logprobs is not None
):
# OpenRouter specific options related to logprobs. Bit of a hack but really does improve usability.
extra_body["provider"] = {
"require_parameters": True,
"ignore": ["DeepInfra"],
}
# Build custom request params based on model provider
extra_body = self.build_extra_body(provider)

# Main completion call
response_format_options = await self.response_format_options()
Expand Down Expand Up @@ -156,8 +137,8 @@ async def _run(self, input: Dict | str) -> RunOutput:
if self.base_adapter_config.top_logprobs is not None and logprobs is None:
raise RuntimeError("Logprobs were required, but no logprobs were returned.")

# Save reasoning if it exists (OpenRouter specific format)
if require_or_reasoning:
# Save reasoning if it exists (OpenRouter specific api response field)
if provider.require_openrouter_reasoning:
if (
hasattr(message, "reasoning") and message.reasoning # pyright: ignore
):
Expand Down Expand Up @@ -265,3 +246,44 @@ def tool_call_params(self, strict: bool) -> dict[str, Any]:
"function": {"name": "task_response"},
},
}

def build_extra_body(self, provider: KilnModelProvider) -> dict[str, Any]:
# TODO P1: Don't love having this logic here. But it's a usability improvement
# so better to keep it than exclude it. Should figure out how I want to isolate
# this sort of logic so it's config driven and can be overridden

extra_body = {}
provider_options = {}

if provider.require_openrouter_reasoning:
# https://openrouter.ai/docs/use-cases/reasoning-tokens
extra_body["reasoning"] = {
"exclude": False,
}

if provider.r1_openrouter_options:
# Require providers that support the reasoning parameter
provider_options["require_parameters"] = True
# Prefer R1 providers with reasonable perf/quants
provider_options["order"] = ["Fireworks", "Together"]
# R1 providers with unreasonable quants
provider_options["ignore"] = ["DeepInfra"]

# Only set of this request is to get logprobs.
if (
provider.logprobs_openrouter_options
and self.base_adapter_config.top_logprobs is not None
):
# Don't let OpenRouter choose a provider that doesn't support logprobs.
provider_options["require_parameters"] = True
# DeepInfra silently fails to return logprobs consistently.
provider_options["ignore"] = ["DeepInfra"]

if provider.openrouter_skip_required_parameters:
# Oddball case, R1 14/8/1.5B fail with this param, even though they support thinking params.
provider_options["require_parameters"] = False

if len(provider_options) > 0:
extra_body["provider"] = provider_options

return extra_body

0 comments on commit 8a95f4d

Please sign in to comment.