Skip to content

Commit

Permalink
anthropic: support caching
Browse files Browse the repository at this point in the history
  • Loading branch information
lofcz committed Jan 24, 2025
1 parent d3bc768 commit d97f2a9
Show file tree
Hide file tree
Showing 12 changed files with 525 additions and 181 deletions.
173 changes: 165 additions & 8 deletions LlmTornado.Demo/ChatDemo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
using LlmTornado.Chat;
using LlmTornado.Chat.Models;
using LlmTornado.Chat.Plugins;
using LlmTornado.Chat.Vendors.Anthropic;
using LlmTornado.Chat.Vendors.Cohere;
using LlmTornado.ChatFunctions;
using LlmTornado.Code;
Expand Down Expand Up @@ -113,10 +114,7 @@ public static async Task<bool> ChatFunctionRequired()
Conversation chat = Program.Connect().Chat.CreateConversation(new ChatRequest
{
Model = ChatModel.OpenAi.Gpt4.O241120,
Tools = new List<Tool>
{
new Tool(new ToolFunction("get_weather", "gets the current weather"), true)
},
Tools = [new Tool(new ToolFunction("get_weather", "gets the current weather"), true)],
ToolChoice = new OutboundToolChoice(OutboundToolChoiceModes.Required)
});
chat.AppendUserInput("Who are you?"); // user asks something unrelated, but we force the model to use the tool
Expand Down Expand Up @@ -210,16 +208,175 @@ public static async Task<bool> ChatFunctionGeminiStrict()
return false;
}

public static async Task AnthropicCachingChat()
{
Conversation chat = Program.Connect(LLmProviders.Cohere).Chat.CreateConversation(new ChatRequest
{
Model = ChatModel.Anthropic.Claude35.SonnetLatest,
Tools =
[
new Tool(new ToolFunction("get_weather", "gets the current weather", new
{
type = "object",
properties = new
{
location = new
{
type = "string",
description = "The location for which the weather information is required."
}
},
required = new List<string> { "location" }
}))
{
VendorExtensions = new ToolVendorExtensions(new AnthropicToolVendorExtensions
{
Cache = AnthropicCacheSettings.Ephemeral
})
}
],
VendorExtensions = new ChatRequestVendorExtensions(new ChatRequestVendorAnthropicExtensions
{
OutboundRequest = (sys, msgs, tools) =>
{
// we need to mark the last user message and the second-last user message as cached (for hitting cache & setting it for the next turn)
int marked = 0;

for (int i = msgs.Count - 1; i >= 0; i--)
{
VendorAnthropicChatRequestMessageContent msg = msgs[i];

if (msg.Role is ChatMessageRoles.User)
{
if (msg.Parts.Count > 0)
{
msg.Parts[0].VendorExtensions = new ChatMessagePartAnthropicExtensions
{
Cache = AnthropicCacheSettings.Ephemeral
};

marked++;

if (marked is 2)
{
break;
}
}
}
}
}
})
});

chat.OnAfterToolsCall = async (result) =>
{
Console.WriteLine();
await chat.StreamResponse(Console.Write);
};

chat.AppendUserInput("fetch me the weather in Paris");

await chat.StreamResponseRich(new ChatStreamEventHandler
{
MessageTokenHandler = (token) =>
{
Console.Write(token);
return Task.CompletedTask;
},
FunctionCallHandler = (functions) =>
{
foreach (FunctionCall fn in functions)
{
fn.Result = new FunctionResult(fn.Name, new
{
result = "ok",
weather = "A mild rain is expected around noon in Paris."
});
}

return Task.CompletedTask;
},
OnUsageReceived = (usage) =>
{
return Task.CompletedTask;
}
});
}

public static async Task AnthropicCaching()
{
await File.ReadAllTextAsync("");
string longPrompt = await File.ReadAllTextAsync("Static/Files/pride_and_prejudice.txt");

Conversation chat = Program.Connect(LLmProviders.Cohere).Chat.CreateConversation(new ChatRequest
{
Model = ChatModel.Anthropic.Claude35.SonnetLatest,
Model = ChatModel.Anthropic.Claude35.SonnetLatest
});

chat.AppendSystemMessage([
new ChatMessagePart("You are an assistant answering queries about the following text"),
new ChatMessagePart(longPrompt, new ChatMessagePartAnthropicExtensions
{
Cache = AnthropicCacheSettings.Ephemeral
})
]);

Console.WriteLine();
Console.ForegroundColor = ConsoleColor.Cyan;
Console.WriteLine("------- System:");
Console.ResetColor();
Console.WriteLine(longPrompt);

string shortPrompt = "In the text above, who cries “I am sick of Mr. Bingley”?";

Console.WriteLine();
Console.ForegroundColor = ConsoleColor.Cyan;
Console.WriteLine("------- User:");
Console.ResetColor();
Console.WriteLine(shortPrompt);
chat.AppendUserInput(shortPrompt);

Console.ReadKey();

Console.WriteLine();
Console.ForegroundColor = ConsoleColor.Cyan;
Console.WriteLine("------- Assistant:");
Console.ResetColor();
await StreamResponse();

chat.AppendMessage(new ChatMessage(ChatMessageRoles.User, [ new ChatMessagePart() ]));
string shortPrompt2 = "When Elizabeth replied “He is also handsome”, who does she mean?";
Console.WriteLine();
Console.WriteLine();
Console.ForegroundColor = ConsoleColor.Cyan;
Console.WriteLine("------- User:");
Console.ResetColor();
Console.WriteLine(shortPrompt2);
chat.AppendUserInput(shortPrompt2);

Console.WriteLine();
Console.ForegroundColor = ConsoleColor.Cyan;
Console.WriteLine("------- Assistant:");
Console.ResetColor();
await StreamResponse();

async Task StreamResponse()
{
await chat.StreamResponseRich(new ChatStreamEventHandler
{
MessageTokenHandler = (token) =>
{
Console.Write(token);
return Task.CompletedTask;
},
OnUsageReceived = (usage) =>
{
Console.WriteLine();
Console.ForegroundColor = ConsoleColor.Yellow;
Console.WriteLine($"Usage: {usage}");
Console.ResetColor();
return Task.CompletedTask;
}
});
}
}

public static async Task CohereWebSearch()
Expand Down Expand Up @@ -1312,7 +1469,7 @@ await chat.StreamResponseRich(msgId, (x) =>
}

return Task.FromResult(results);
}, null, null);
}, null);


string response = sb.ToString();
Expand Down
3 changes: 3 additions & 0 deletions LlmTornado.Demo/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,8 @@ public enum Demos
[Flaky("requires ollama")]
CustomProviderOllamaStreaming,
ChatAnthropicCaching,
[Flaky("interactive")]
ChatAnthropicCachingInteractive,
Last
}

Expand Down Expand Up @@ -283,6 +285,7 @@ public static async Task<bool> SetupApi()
Demos.CustomProviderOllama => CustomProviderDemo.Ollama,
Demos.CustomProviderOllamaStreaming => CustomProviderDemo.OllamaStreaming,
Demos.ChatAnthropicCaching => ChatDemo.AnthropicCaching,
Demos.ChatAnthropicCachingInteractive => ChatDemo.AnthropicCachingChat,
_ => null
};

Expand Down
16 changes: 16 additions & 0 deletions LlmTornado/Chat/ChatMessagePart.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
using System.IO;
using System.Linq;
using System.Threading.Tasks;
using LlmTornado.Chat.Vendors.Anthropic;
using LlmTornado.Code;
using LlmTornado.Images;
using Newtonsoft.Json;
Expand Down Expand Up @@ -42,6 +43,13 @@ public ChatMessagePart(string text)
Type = ChatMessageTypes.Text;
}

public ChatMessagePart(string text, IChatMessagePartVendorExtensions vendorExtensions)
{
Text = text;
Type = ChatMessageTypes.Text;
VendorExtensions = vendorExtensions;
}

/// <summary>
/// The part is an audio fragment.
/// </summary>
Expand Down Expand Up @@ -134,6 +142,12 @@ public ChatMessagePart(string content, ImageDetail imageDetail, string? mimeType
/// </summary>
[JsonProperty("input_audio")]
public ChatAudio? Audio { get; set; }

/// <summary>
/// Specific features supported only by certain providers
/// </summary>
[JsonIgnore]
public IChatMessagePartVendorExtensions? VendorExtensions { get; set; }

/// <summary>
/// Creates an audio part from a given stream.
Expand Down Expand Up @@ -182,4 +196,6 @@ public static ChatMessagePart Create(string base64EncodedAudio, ChatAudioFormats
{
return new ChatMessagePart(base64EncodedAudio, format);
}


}
15 changes: 15 additions & 0 deletions LlmTornado/Chat/ChatRequestVendorExtensions.cs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
using LlmTornado.Chat.Vendors.Anthropic;
using LlmTornado.Chat.Vendors.Cohere;

namespace LlmTornado.Chat;
Expand All @@ -11,6 +12,11 @@ public class ChatRequestVendorExtensions
/// Cohere extensions.
/// </summary>
public ChatRequestVendorCohereExtensions? Cohere { get; set; }

/// <summary>
/// Anthropic extensions.
/// </summary>
public ChatRequestVendorAnthropicExtensions? Anthropic { get; set; }

/// <summary>
/// Empty extensions.
Expand All @@ -28,4 +34,13 @@ public ChatRequestVendorExtensions(ChatRequestVendorCohereExtensions cohereExten
{
Cohere = cohereExtensions;
}

/// <summary>
/// Cohere extensions.
/// </summary>
/// <param name="anthropicExtensions"></param>
public ChatRequestVendorExtensions(ChatRequestVendorAnthropicExtensions anthropicExtensions)
{
Anthropic = anthropicExtensions;
}
}
21 changes: 21 additions & 0 deletions LlmTornado/Chat/ChatResult.cs
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,18 @@ public class ChatUsage : Usage
[JsonProperty("completion_tokens_details")]
public ChatUsageTokenDetails? CompletionTokensDetails { get; set; }

/// <summary>
/// Number of cached tokens.
/// </summary>
[JsonIgnore]
public int? CacheCreationTokens { get; set; }

/// <summary>
/// Number of tokens read from cache.
/// </summary>
[JsonIgnore]
public int? CacheReadTokens { get; set; }

public ChatUsage()
{

Expand All @@ -236,4 +248,13 @@ internal ChatUsage(VendorGoogleUsage usage)
PromptTokens = usage.PromptTokenCount;
TotalTokens = CompletionTokens + PromptTokens;
}

/// <summary>
/// View of the usage.
/// </summary>
/// <returns></returns>
public override string ToString()
{
return $"Total: {TotalTokens}, Prompt: {PromptTokens}, Completion: {CompletionTokens}, Cache created: {CacheCreationTokens}, Cache read: {CacheReadTokens}";
}
}
13 changes: 12 additions & 1 deletion LlmTornado/Chat/Conversation.cs
Original file line number Diff line number Diff line change
Expand Up @@ -380,13 +380,24 @@ public Conversation AppendUserInputWithName(string userName, IEnumerable<ChatMes

/// <summary>
/// Creates and appends a <see cref="ChatMessage" /> to the chat history with the Role of
/// <see cref="ChatMessageRole.System" />. The system message helps set the behavior of the assistant.
/// <see cref="ChatMessageRole.System" />. The system message helps set the behavior of the assistant.
/// </summary>
/// <param name="content">text content that helps set the behavior of the assistant</param>
public Conversation AppendSystemMessage(string content)
{
return AppendMessage(new ChatMessage(ChatMessageRoles.System, content));
}

/// <summary>
/// Creates and appends a <see cref="ChatMessage" /> to the chat history with the Role of
/// <see cref="ChatMessageRole.System" />. The system message helps set the behavior of the assistant.
/// </summary>
/// <param name="parts"></param>
/// <returns></returns>
public Conversation AppendSystemMessage(IEnumerable<ChatMessagePart> parts)
{
return AppendMessage(new ChatMessage(ChatMessageRoles.System, parts));
}

/// <summary>
/// Creates and appends a <see cref="ChatMessage" /> to the chat history with the Role of
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
using System;
using System.Collections.Generic;
using LlmTornado.Vendor.Anthropic;
using Newtonsoft.Json;

namespace LlmTornado.Chat.Vendors.Anthropic;

public class AnthropicCacheSettings
{
public static readonly AnthropicCacheSettings Ephemeral = new AnthropicCacheSettings();

[JsonProperty("type")]
public string Type { get; set; } = "ephemeral";

private AnthropicCacheSettings()
{

}
}

public interface IAnthropicChatRequestItem
{

}

/// <summary>
/// Chat features supported only by Anthropic.
/// </summary>
public class ChatRequestVendorAnthropicExtensions
{
/// <summary>
/// Enables modification of the outbound chat request just before sending it. Use this to control cache in chat-like scenarios.<br/>
/// Arguments: <b>System message</b>; <b>User, Assistant messages</b>; <b>Tools</b>
/// </summary>
public Action<VendorAnthropicChatRequestMessageContent?, List<VendorAnthropicChatRequestMessageContent>, List<VendorAnthropicToolFunction>?>? OutboundRequest;
}
Loading

0 comments on commit d97f2a9

Please sign in to comment.