From 0c6cab3909855d0d6f7a9d8c1d2389d360f46916 Mon Sep 17 00:00:00 2001 From: Yingjie He Date: Wed, 15 Jan 2025 20:27:05 -0800 Subject: [PATCH 1/3] Enable prompt caching for Anthropic --- crates/goose/src/providers/anthropic.rs | 51 +++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 3 deletions(-) diff --git a/crates/goose/src/providers/anthropic.rs b/crates/goose/src/providers/anthropic.rs index c44398efc..ff292a4d7 100644 --- a/crates/goose/src/providers/anthropic.rs +++ b/crates/goose/src/providers/anthropic.rs @@ -62,9 +62,25 @@ impl AnthropicProvider { } } + // Add "cache_control" to the last tool spec, if any. This means that all tool definitions, + // will be cached as a single prefix. + if let Some(last_tool) = tool_specs.last_mut() { + last_tool.as_object_mut() + .unwrap() + .insert("cache_control".to_string(), json!({ "type": "ephemeral" })); + } + tool_specs } + fn system_to_anthropic_spec(system: &str) -> Value { + json!([{ + "type": "text", + "text": system, + "cache_control": { "type": "ephemeral" } + }]) + } + fn messages_to_anthropic_spec(messages: &[Message]) -> Vec { let mut anthropic_messages = Vec::new(); @@ -134,6 +150,30 @@ impl AnthropicProvider { "text": "Ignore" }] })); + } + + // Add "cache_control" to the last and second-to-last "user" messages. + // During each turn, we mark the final message with cache_control so the conversation can be + // incrementally cached. The second-to-last user message is also marked for caching with the + // cache_control parameter, so that this checkpoint can read from the previous cache. + let mut user_count = 0; + for message in anthropic_messages.iter_mut().rev() { + if message.get("role") == Some(&json!("user")) { + if let Some(content) = message.get_mut("content") { + if let Some(content_array) = content.as_array_mut() { + for content_item in content_array { + content_item + .as_object_mut() + .unwrap() + .insert("cache_control".to_string(), json!({ "type": "ephemeral" })); + } + } + } + user_count += 1; + if user_count >= 2 { + break; + } + } } anthropic_messages @@ -225,6 +265,7 @@ impl Provider for AnthropicProvider { ) -> Result<(Message, ProviderUsage)> { let anthropic_messages = Self::messages_to_anthropic_spec(messages); let tool_specs = Self::tools_to_anthropic_spec(tools); + let system_spec = Self::system_to_anthropic_spec(system); // Check if we have any messages to send if anthropic_messages.is_empty() { @@ -242,7 +283,7 @@ impl Provider for AnthropicProvider { payload .as_object_mut() .unwrap() - .insert("system".to_string(), json!(system)); + .insert("system".to_string(), json!(system_spec)); } // Add tools if present @@ -348,7 +389,9 @@ mod tests { "stop_sequence": null, "usage": { "input_tokens": 12, - "output_tokens": 15 + "output_tokens": 15, + "cache_creation_input_tokens": 12, + "cache_read_input_tokens": 0 } }); @@ -394,7 +437,9 @@ mod tests { "stop_sequence": null, "usage": { "input_tokens": 15, - "output_tokens": 20 + "output_tokens": 20, + "cache_creation_input_tokens": 15, + "cache_read_input_tokens": 0, } }); From e415dd03c9147ef4bc92e939722a50899ce2f957 Mon Sep 17 00:00:00 2001 From: Yingjie He Date: Thu, 16 Jan 2025 10:54:55 -0800 Subject: [PATCH 2/3] ad cache control to the last content --- crates/goose/src/providers/anthropic.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/goose/src/providers/anthropic.rs b/crates/goose/src/providers/anthropic.rs index ff292a4d7..855c07b78 100644 --- a/crates/goose/src/providers/anthropic.rs +++ b/crates/goose/src/providers/anthropic.rs @@ -161,8 +161,8 @@ impl AnthropicProvider { if message.get("role") == Some(&json!("user")) { if let Some(content) = message.get_mut("content") { if let Some(content_array) = content.as_array_mut() { - for content_item in content_array { - content_item + if let Some(last_content) = content_array.last_mut() { + last_content .as_object_mut() .unwrap() .insert("cache_control".to_string(), json!({ "type": "ephemeral" })); From e5323b020ba9aa4ae63a944e0597fe79cc5ac1f5 Mon Sep 17 00:00:00 2001 From: Yingjie He Date: Thu, 16 Jan 2025 11:00:15 -0800 Subject: [PATCH 3/3] format code --- crates/goose/src/providers/anthropic.rs | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/crates/goose/src/providers/anthropic.rs b/crates/goose/src/providers/anthropic.rs index 855c07b78..67b4ae5e9 100644 --- a/crates/goose/src/providers/anthropic.rs +++ b/crates/goose/src/providers/anthropic.rs @@ -62,10 +62,11 @@ impl AnthropicProvider { } } - // Add "cache_control" to the last tool spec, if any. This means that all tool definitions, + // Add "cache_control" to the last tool spec, if any. This means that all tool definitions, // will be cached as a single prefix. if let Some(last_tool) = tool_specs.last_mut() { - last_tool.as_object_mut() + last_tool + .as_object_mut() .unwrap() .insert("cache_control".to_string(), json!({ "type": "ephemeral" })); } @@ -150,11 +151,11 @@ impl AnthropicProvider { "text": "Ignore" }] })); - } + } // Add "cache_control" to the last and second-to-last "user" messages. - // During each turn, we mark the final message with cache_control so the conversation can be - // incrementally cached. The second-to-last user message is also marked for caching with the + // During each turn, we mark the final message with cache_control so the conversation can be + // incrementally cached. The second-to-last user message is also marked for caching with the // cache_control parameter, so that this checkpoint can read from the previous cache. let mut user_count = 0; for message in anthropic_messages.iter_mut().rev() { @@ -162,10 +163,10 @@ impl AnthropicProvider { if let Some(content) = message.get_mut("content") { if let Some(content_array) = content.as_array_mut() { if let Some(last_content) = content_array.last_mut() { - last_content - .as_object_mut() - .unwrap() - .insert("cache_control".to_string(), json!({ "type": "ephemeral" })); + last_content.as_object_mut().unwrap().insert( + "cache_control".to_string(), + json!({ "type": "ephemeral" }), + ); } } }