From 9b1fcde428847c7028e91a1ee1db26156a930d70 Mon Sep 17 00:00:00 2001
From: Sam Saffron <sam.saffron@gmail.com>
Date: Thu, 27 Feb 2025 11:50:08 +1100
Subject: [PATCH 01/18] SambaNova recomment Llama 3.3 vs 3.1

---
 config/locales/client.en.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config/locales/client.en.yml b/config/locales/client.en.yml
index e204360ba..55d239509 100644
--- a/config/locales/client.en.yml
+++ b/config/locales/client.en.yml
@@ -403,7 +403,7 @@ en:
           open_ai-o1: "Open AI's most capable reasoning model"
           open_ai-o3-mini: "Advanced Cost-efficient reasoning model"
           samba_nova-Meta-Llama-3-1-8B-Instruct: "Efficient lightweight multilingual model"
-          samba_nova-Meta-Llama-3-1-70B-Instruct": "Powerful multipurpose model"
+          samba_nova-Meta-Llama-3-3-70B-Instruct": "Powerful multipurpose model"
           mistral-mistral-large-latest: "Mistral's most powerful model"
           mistral-pixtral-large-latest: "Mistral's most powerful vision capable model"
 

From 2b25e49462d2f67a97e87cf25d25b9468156829e Mon Sep 17 00:00:00 2001
From: Sam Saffron <sam.saffron@gmail.com>
Date: Thu, 27 Feb 2025 11:50:21 +1100
Subject: [PATCH 02/18] 1024 tokens is a minimum

---
 lib/completions/endpoints/anthropic.rb   | 2 +-
 lib/completions/endpoints/aws_bedrock.rb | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/completions/endpoints/anthropic.rb b/lib/completions/endpoints/anthropic.rb
index ed950d31f..673149eb9 100644
--- a/lib/completions/endpoints/anthropic.rb
+++ b/lib/completions/endpoints/anthropic.rb
@@ -40,7 +40,7 @@ def default_options(dialect)
 
           if llm_model.lookup_custom_param("enable_reasoning")
             reasoning_tokens =
-              llm_model.lookup_custom_param("reasoning_tokens").to_i.clamp(100, 65_536)
+              llm_model.lookup_custom_param("reasoning_tokens").to_i.clamp(1024, 65_536)
 
             # this allows for lots of tokens beyond reasoning
             options[:max_tokens] = reasoning_tokens + 30_000
diff --git a/lib/completions/endpoints/aws_bedrock.rb b/lib/completions/endpoints/aws_bedrock.rb
index 75ed12cfe..b30338d66 100644
--- a/lib/completions/endpoints/aws_bedrock.rb
+++ b/lib/completions/endpoints/aws_bedrock.rb
@@ -29,7 +29,7 @@ def default_options(dialect)
               result = { anthropic_version: "bedrock-2023-05-31" }
               if llm_model.lookup_custom_param("enable_reasoning")
                 reasoning_tokens =
-                  llm_model.lookup_custom_param("reasoning_tokens").to_i.clamp(100, 65_536)
+                  llm_model.lookup_custom_param("reasoning_tokens").to_i.clamp(1024, 65_536)
 
                 # this allows for ample tokens beyond reasoning
                 max_tokens = reasoning_tokens + 30_000

From b67568be18bdeebd067d48f5fb8663d0976679b6 Mon Sep 17 00:00:00 2001
From: Sam Saffron <sam.saffron@gmail.com>
Date: Thu, 27 Feb 2025 15:17:38 +1100
Subject: [PATCH 03/18] Implement streaming and generation of thinking tokens

This is required for claude and introduces some new concepts into
prompt
---
 .../anthropic_message_processor.rb            | 74 ++++++++++++++++---
 lib/completions/dialects/claude.rb            | 25 ++++++-
 lib/completions/endpoints/anthropic.rb        |  1 +
 lib/completions/endpoints/base.rb             |  6 +-
 lib/completions/llm.rb                        |  3 +
 lib/completions/prompt.rb                     | 27 ++++++-
 lib/completions/thinking.rb                   | 34 +++++++++
 7 files changed, 156 insertions(+), 14 deletions(-)
 create mode 100644 lib/completions/thinking.rb

diff --git a/lib/completions/anthropic_message_processor.rb b/lib/completions/anthropic_message_processor.rb
index 44242b2d1..e67be109b 100644
--- a/lib/completions/anthropic_message_processor.rb
+++ b/lib/completions/anthropic_message_processor.rb
@@ -44,13 +44,15 @@ def to_tool_call
     end
   end
 
-  attr_reader :tool_calls, :input_tokens, :output_tokens
+  attr_reader :tool_calls, :input_tokens, :output_tokens, :output_thinking
 
-  def initialize(streaming_mode:, partial_tool_calls: false)
+  def initialize(streaming_mode:, partial_tool_calls: false, output_thinking: false)
     @streaming_mode = streaming_mode
     @tool_calls = []
     @current_tool_call = nil
     @partial_tool_calls = partial_tool_calls
+    @output_thinking = output_thinking
+    @thinking = nil
   end
 
   def to_tool_calls
@@ -69,13 +71,48 @@ def process_streamed_message(parsed)
           tool_id,
           partial_tool_calls: @partial_tool_calls,
         ) if tool_name
+    elsif parsed[:type] == "content_block_start" && parsed.dig(:content_block, :type) == "thinking"
+      if @output_thinking
+        @thinking =
+          DiscourseAi::Completions::Thinking.new(
+            message: +parsed.dig(:content_block, :thinking).to_s,
+            signature: +"",
+            partial: true,
+          )
+        result = @thinking.dup
+      end
+    elsif parsed[:type] == "content_block_delta" && parsed.dig(:delta, :type) == "thinking_delta"
+      if @output_thinking
+        delta = parsed.dig(:delta, :thinking)
+        @thinking.message << delta if @thinking
+        result = DiscourseAi::Completions::Thinking.new(message: delta, partial: true)
+      end
+    elsif parsed[:type] == "content_block_delta" && parsed.dig(:delta, :type) == "signature_delta"
+      if @output_thinking
+        @thinking.signature << parsed.dig(:delta, :signature) if @thinking
+      end
+    elsif parsed[:type] == "content_block_stop" && @thinking
+      @thinking.partial = false
+      result = @thinking
+      @thinking = nil
     elsif parsed[:type] == "content_block_start" || parsed[:type] == "content_block_delta"
       if @current_tool_call
         tool_delta = parsed.dig(:delta, :partial_json).to_s
         @current_tool_call.append(tool_delta)
         result = @current_tool_call.partial_tool_call if @current_tool_call.has_partial?
+      elsif parsed.dig(:content_block, :type) == "redacted_thinking"
+        if @output_thinking
+          result =
+            DiscourseAi::Completions::Thinking.new(
+              message: nil,
+              signature: parsed.dig(:content_block, :data),
+              redacted: true,
+            )
+        end
       else
         result = parsed.dig(:delta, :text).to_s
+        # no need to return empty strings for streaming, no value
+        result = nil if result == ""
       end
     elsif parsed[:type] == "content_block_stop"
       if @current_tool_call
@@ -105,15 +142,32 @@ def process_message(payload)
     content = parsed.dig(:content)
     if content.is_a?(Array)
       result =
-        content.map do |data|
-          if data[:type] == "tool_use"
-            call = AnthropicToolCall.new(data[:name], data[:id])
-            call.append(data[:input].to_json)
-            call.to_tool_call
-          else
-            data[:text]
+        content
+          .map do |data|
+            if data[:type] == "tool_use"
+              call = AnthropicToolCall.new(data[:name], data[:id])
+              call.append(data[:input].to_json)
+              call.to_tool_call
+            elsif data[:type] == "thinking"
+              if @output_thinking
+                DiscourseAi::Completions::Thinking.new(
+                  message: data[:thinking],
+                  signature: data[:signature],
+                )
+              end
+            elsif data[:type] == "redacted_thinking"
+              if @output_thinking
+                DiscourseAi::Completions::Thinking.new(
+                  message: nil,
+                  signature: data[:data],
+                  redacted: true,
+                )
+              end
+            else
+              data[:text]
+            end
           end
-        end
+          .compact
     end
 
     @input_tokens = parsed.dig(:usage, :input_tokens)
diff --git a/lib/completions/dialects/claude.rb b/lib/completions/dialects/claude.rb
index a9c0aba75..06fbe1023 100644
--- a/lib/completions/dialects/claude.rb
+++ b/lib/completions/dialects/claude.rb
@@ -87,7 +87,30 @@ def tool_msg(msg)
         end
 
         def model_msg(msg)
-          { role: "assistant", content: msg[:content] }
+          if msg[:thinking] || msg[:redacted_thinking_signature]
+            content_array = []
+
+            if msg[:thinking]
+              content_array << {
+                type: "thinking",
+                thinking: msg[:thinking],
+                signature: msg[:thinking_signature],
+              }
+            end
+
+            if msg[:redacted_thinking_signature]
+              content_array << {
+                type: "redacted_thinking",
+                data: msg[:redacted_thinking_signature],
+              }
+            end
+
+            content_array << { type: "text", text: msg[:content] }
+
+            { role: "assistant", content: content_array }
+          else
+            { role: "assistant", content: msg[:content] }
+          end
         end
 
         def system_msg(msg)
diff --git a/lib/completions/endpoints/anthropic.rb b/lib/completions/endpoints/anthropic.rb
index 673149eb9..ec7656d9a 100644
--- a/lib/completions/endpoints/anthropic.rb
+++ b/lib/completions/endpoints/anthropic.rb
@@ -123,6 +123,7 @@ def processor
             DiscourseAi::Completions::AnthropicMessageProcessor.new(
               streaming_mode: @streaming_mode,
               partial_tool_calls: partial_tool_calls,
+              output_thinking: output_thinking,
             )
         end
 
diff --git a/lib/completions/endpoints/base.rb b/lib/completions/endpoints/base.rb
index 74933f10f..800381e46 100644
--- a/lib/completions/endpoints/base.rb
+++ b/lib/completions/endpoints/base.rb
@@ -4,7 +4,7 @@ module DiscourseAi
   module Completions
     module Endpoints
       class Base
-        attr_reader :partial_tool_calls
+        attr_reader :partial_tool_calls, :output_thinking
 
         CompletionFailed = Class.new(StandardError)
         # 6 minutes
@@ -67,12 +67,15 @@ def perform_completion!(
           feature_name: nil,
           feature_context: nil,
           partial_tool_calls: false,
+          output_thinking: false,
           &blk
         )
           LlmQuota.check_quotas!(@llm_model, user)
           start_time = Time.now
 
           @partial_tool_calls = partial_tool_calls
+          @output_thinking = output_thinking
+
           model_params = normalize_model_params(model_params)
           orig_blk = blk
 
@@ -85,6 +88,7 @@ def perform_completion!(
                 feature_name: feature_name,
                 feature_context: feature_context,
                 partial_tool_calls: partial_tool_calls,
+                output_thinking: output_thinking,
               )
 
             wrapped = result
diff --git a/lib/completions/llm.rb b/lib/completions/llm.rb
index 513859206..cb75f6e7c 100644
--- a/lib/completions/llm.rb
+++ b/lib/completions/llm.rb
@@ -234,6 +234,7 @@ def initialize(dialect_klass, gateway_klass, llm_model, gateway: nil)
       # @param feature_name { String - Optional } - The feature name to use for the completion.
       # @param feature_context { Hash - Optional } - The feature context to use for the completion.
       # @param partial_tool_calls { Boolean - Optional } - If true, the completion will return partial tool calls.
+      # @param output_thinking { Boolean - Optional } - If true, the completion will return the thinking output for thinking models.
       #
       # @param &on_partial_blk { Block - Optional } - The passed block will get called with the LLM partial response alongside a cancel function.
       #
@@ -250,6 +251,7 @@ def generate(
         feature_name: nil,
         feature_context: nil,
         partial_tool_calls: false,
+        output_thinking: false,
         &partial_read_blk
       )
         self.class.record_prompt(prompt)
@@ -285,6 +287,7 @@ def generate(
           feature_name: feature_name,
           feature_context: feature_context,
           partial_tool_calls: partial_tool_calls,
+          output_thinking: output_thinking,
           &partial_read_blk
         )
       end
diff --git a/lib/completions/prompt.rb b/lib/completions/prompt.rb
index 9a6d4d617..6afbc52be 100644
--- a/lib/completions/prompt.rb
+++ b/lib/completions/prompt.rb
@@ -41,12 +41,26 @@ def initialize(
         @tool_choice = tool_choice
       end
 
-      def push(type:, content:, id: nil, name: nil, upload_ids: nil)
+      def push(
+        type:,
+        content:,
+        id: nil,
+        name: nil,
+        upload_ids: nil,
+        thinking: nil,
+        thinking_signature: nil,
+        redacted_thinking_signature: nil
+      )
         return if type == :system
         new_message = { type: type, content: content }
         new_message[:name] = name.to_s if name
         new_message[:id] = id.to_s if id
         new_message[:upload_ids] = upload_ids if upload_ids
+        new_message[:thinking] = thinking if thinking
+        new_message[:thinking_signature] = thinking_signature if thinking_signature
+        new_message[
+          :redacted_thinking_signature
+        ] = redacted_thinking_signature if redacted_thinking_signature
 
         validate_message(new_message)
         validate_turn(messages.last, new_message)
@@ -73,7 +87,16 @@ def validate_message(message)
           raise ArgumentError, "message type must be one of #{valid_types}"
         end
 
-        valid_keys = %i[type content id name upload_ids]
+        valid_keys = %i[
+          type
+          content
+          id
+          name
+          upload_ids
+          thinking
+          thinking_signature
+          redacted_thinking_signature
+        ]
         if (invalid_keys = message.keys - valid_keys).any?
           raise ArgumentError, "message contains invalid keys: #{invalid_keys}"
         end
diff --git a/lib/completions/thinking.rb b/lib/completions/thinking.rb
new file mode 100644
index 000000000..e075835f9
--- /dev/null
+++ b/lib/completions/thinking.rb
@@ -0,0 +1,34 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+  module Completions
+    class Thinking
+      attr_accessor :message, :signature, :redacted, :partial
+
+      def initialize(message:, signature: nil, redacted: false, partial: false)
+        @message = message
+        @signature = signature
+        @redacted = redacted
+        @partial = partial
+      end
+
+      def ==(other)
+        message == other.message && signature == other.signature && redacted == other.redacted &&
+          partial == other.partial
+      end
+
+      def dup
+        Thinking.new(
+          message: message.dup,
+          signature: signature.dup,
+          redacted: redacted,
+          partial: partial,
+        )
+      end
+
+      def to_s
+        "#{message} - #{signature} - #{redacted} - #{partial}"
+      end
+    end
+  end
+end

From ae3117d1b45736d2c488e1cb8d908562d23207e5 Mon Sep 17 00:00:00 2001
From: Sam Saffron <sam.saffron@gmail.com>
Date: Thu, 27 Feb 2025 15:17:49 +1100
Subject: [PATCH 04/18] tests

---
 .../completions/endpoints/anthropic_spec.rb   | 215 ++++++++++++++++++
 1 file changed, 215 insertions(+)

diff --git a/spec/lib/completions/endpoints/anthropic_spec.rb b/spec/lib/completions/endpoints/anthropic_spec.rb
index b43c76254..579af6953 100644
--- a/spec/lib/completions/endpoints/anthropic_spec.rb
+++ b/spec/lib/completions/endpoints/anthropic_spec.rb
@@ -449,4 +449,219 @@
     expect(log.request_tokens).to eq(10)
     expect(log.response_tokens).to eq(25)
   end
+
+  it "can send through thinking tokens via a completion prompt" do
+    body = {
+      id: "msg_1nZdL29xx5MUA1yADyHTEsnR8uuvGzszyY",
+      type: "message",
+      role: "assistant",
+      content: [{ type: "text", text: "world" }],
+      model: "claude-3-7-sonnet-20250219",
+      stop_reason: "end_turn",
+      usage: {
+        input_tokens: 25,
+        output_tokens: 40,
+      },
+    }.to_json
+
+    parsed_body = nil
+    stub_request(:post, url).with(
+      body: ->(req_body) { parsed_body = JSON.parse(req_body) },
+      headers: {
+        "Content-Type" => "application/json",
+        "X-Api-Key" => "123",
+        "Anthropic-Version" => "2023-06-01",
+      },
+    ).to_return(status: 200, body: body)
+
+    prompt = DiscourseAi::Completions::Prompt.new("system prompt")
+    prompt.push(type: :user, content: "hello")
+    prompt.push(
+      type: :model,
+      id: "user1",
+      content: "hello",
+      thinking: "I am thinking",
+      thinking_signature: "signature",
+      redacted_thinking_signature: "redacted_signature",
+    )
+
+    result = llm.generate(prompt, user: Discourse.system_user)
+    expect(result).to eq("world")
+
+    expected_body = {
+      "model" => "claude-3-opus-20240229",
+      "max_tokens" => 4096,
+      "messages" => [
+        { "role" => "user", "content" => "hello" },
+        {
+          "role" => "assistant",
+          "content" => [
+            { "type" => "thinking", "thinking" => "I am thinking", "signature" => "signature" },
+            { "type" => "redacted_thinking", "data" => "redacted_signature" },
+            { "type" => "text", "text" => "hello" },
+          ],
+        },
+      ],
+      "system" => "system prompt",
+    }
+
+    expect(parsed_body).to eq(expected_body)
+  end
+
+  it "can handle a response with thinking blocks in non-streaming mode" do
+    body = {
+      id: "msg_1nZdL29xx5MUA1yADyHTEsnR8uuvGzszyY",
+      type: "message",
+      role: "assistant",
+      content: [
+        {
+          type: "thinking",
+          thinking: "This is my thinking process about prime numbers...",
+          signature: "abc123signature",
+        },
+        { type: "redacted_thinking", data: "abd456signature" },
+        { type: "text", text: "Yes, there are infinitely many prime numbers where n mod 4 = 3." },
+      ],
+      model: "claude-3-7-sonnet-20250219",
+      stop_reason: "end_turn",
+      usage: {
+        input_tokens: 25,
+        output_tokens: 40,
+      },
+    }.to_json
+
+    stub_request(:post, url).with(
+      headers: {
+        "Content-Type" => "application/json",
+        "X-Api-Key" => "123",
+        "Anthropic-Version" => "2023-06-01",
+      },
+    ).to_return(status: 200, body: body)
+
+    result =
+      llm.generate(
+        "hello",
+        user: Discourse.system_user,
+        feature_name: "testing",
+        output_thinking: true,
+      )
+
+    # Result should be an array with both thinking and text content
+    expect(result).to be_an(Array)
+    expect(result.length).to eq(3)
+
+    # First item should be a Thinking object
+    expect(result[0]).to be_a(DiscourseAi::Completions::Thinking)
+    expect(result[0].message).to eq("This is my thinking process about prime numbers...")
+    expect(result[0].signature).to eq("abc123signature")
+
+    expect(result[1]).to be_a(DiscourseAi::Completions::Thinking)
+    expect(result[1].signature).to eq("abd456signature")
+    expect(result[1].redacted).to eq(true)
+
+    # Second item should be the text response
+    expect(result[2]).to eq("Yes, there are infinitely many prime numbers where n mod 4 = 3.")
+
+    # Verify audit log
+    log = AiApiAuditLog.order(:id).last
+    expect(log.provider_id).to eq(AiApiAuditLog::Provider::Anthropic)
+    expect(log.feature_name).to eq("testing")
+    expect(log.response_tokens).to eq(40)
+  end
+
+  it "can stream a response with thinking blocks" do
+    body = (<<~STRING).strip
+      event: message_start
+      data: {"type": "message_start", "message": {"id": "msg_01...", "type": "message", "role": "assistant", "content": [], "model": "claude-3-opus-20240229", "stop_reason": null, "stop_sequence": null, "usage": {"input_tokens": 25}}}
+
+      event: content_block_start
+      data: {"type": "content_block_start", "index": 0, "content_block": {"type": "thinking", "thinking": ""}}
+
+      event: content_block_delta
+      data: {"type": "content_block_delta", "index": 0, "delta": {"type": "thinking_delta", "thinking": "Let me solve this step by step:\\n\\n1. First break down 27 * 453"}}
+
+      event: content_block_delta
+      data: {"type": "content_block_delta", "index": 0, "delta": {"type": "thinking_delta", "thinking": "\\n2. 453 = 400 + 50 + 3"}}
+
+      event: content_block_delta
+      data: {"type": "content_block_delta", "index": 0, "delta": {"type": "signature_delta", "signature": "EqQBCgIYAhIM1gbcDa9GJwZA2b3hGgxBdjrkzLoky3dl1pkiMOYds..."}}
+
+      event: content_block_stop
+      data: {"type": "content_block_stop", "index": 0}
+
+      event: content_block_start
+data: {"type":"content_block_start","index":0,"content_block":{"type":"redacted_thinking","data":"AAA=="} }
+
+      event: ping
+      data: {"type": "ping"}
+
+      event: content_block_stop
+      data: {"type":"content_block_stop","index":0 }
+
+      event: content_block_start
+      data: {"type": "content_block_start", "index": 1, "content_block": {"type": "text", "text": ""}}
+
+      event: content_block_delta
+      data: {"type": "content_block_delta", "index": 1, "delta": {"type": "text_delta", "text": "27 * 453 = 12,231"}}
+
+      event: content_block_stop
+      data: {"type": "content_block_stop", "index": 1}
+
+      event: message_delta
+      data: {"type": "message_delta", "delta": {"stop_reason": "end_turn", "stop_sequence": null, "usage": {"output_tokens": 30}}}
+
+      event: message_stop
+      data: {"type": "message_stop"}
+    STRING
+
+    parsed_body = nil
+
+    stub_request(:post, url).with(
+      headers: {
+        "Content-Type" => "application/json",
+        "X-Api-Key" => "123",
+        "Anthropic-Version" => "2023-06-01",
+      },
+    ).to_return(status: 200, body: body)
+
+    thinking_chunks = []
+    text_chunks = []
+
+    llm.generate(
+      "hello there",
+      user: Discourse.system_user,
+      feature_name: "testing",
+      output_thinking: true,
+    ) do |partial, cancel|
+      if partial.is_a?(DiscourseAi::Completions::Thinking)
+        thinking_chunks << partial
+      else
+        text_chunks << partial
+      end
+    end
+
+    expected_thinking = [
+      DiscourseAi::Completions::Thinking.new(message: "", signature: "", partial: true),
+      DiscourseAi::Completions::Thinking.new(
+        message: "Let me solve this step by step:\n\n1. First break down 27 * 453",
+        partial: true,
+      ),
+      DiscourseAi::Completions::Thinking.new(message: "\n2. 453 = 400 + 50 + 3", partial: true),
+      DiscourseAi::Completions::Thinking.new(
+        message:
+          "Let me solve this step by step:\n\n1. First break down 27 * 453\n2. 453 = 400 + 50 + 3",
+        signature: "EqQBCgIYAhIM1gbcDa9GJwZA2b3hGgxBdjrkzLoky3dl1pkiMOYds...",
+        partial: false,
+      ),
+      DiscourseAi::Completions::Thinking.new(message: nil, signature: "AAA==", redacted: true),
+    ]
+
+    expect(thinking_chunks).to eq(expected_thinking)
+    expect(text_chunks).to eq(["27 * 453 = 12,231"])
+
+    log = AiApiAuditLog.order(:id).last
+    expect(log.provider_id).to eq(AiApiAuditLog::Provider::Anthropic)
+    expect(log.feature_name).to eq("testing")
+    expect(log.response_tokens).to eq(30)
+  end
 end

From a3160509a0b2ed8008ec4c42d788715ad080461a Mon Sep 17 00:00:00 2001
From: Sam Saffron <sam.saffron@gmail.com>
Date: Fri, 28 Feb 2025 16:06:47 +1100
Subject: [PATCH 05/18] preserve thinking context across turns

---
 .../composer-fields/persona-llm-selector.gjs  | 14 ++---
 config/locales/server.en.yml                  |  1 +
 lib/ai_bot/bot.rb                             | 52 ++++++++++++++++--
 lib/ai_bot/playground.rb                      | 21 +++++++-
 lib/completions/endpoints/canned_response.rb  |  9 +++-
 lib/completions/prompt_messages_builder.rb    | 11 +++-
 lib/completions/thinking.rb                   |  4 ++
 spec/lib/modules/ai_bot/playground_spec.rb    | 53 +++++++++++++++++++
 8 files changed, 151 insertions(+), 14 deletions(-)

diff --git a/assets/javascripts/discourse/connectors/composer-fields/persona-llm-selector.gjs b/assets/javascripts/discourse/connectors/composer-fields/persona-llm-selector.gjs
index 951d754ae..e3e82b4d6 100644
--- a/assets/javascripts/discourse/connectors/composer-fields/persona-llm-selector.gjs
+++ b/assets/javascripts/discourse/connectors/composer-fields/persona-llm-selector.gjs
@@ -168,12 +168,14 @@ export default class BotSelector extends Component {
       .filter((bot) => !bot.is_persona)
       .filter(Boolean);
 
-    return availableBots.map((bot) => {
-      return {
-        id: bot.id,
-        name: bot.display_name,
-      };
-    });
+    return availableBots
+      .map((bot) => {
+        return {
+          id: bot.id,
+          name: bot.display_name,
+        };
+      })
+      .sort((a, b) => a.name.localeCompare(b.name));
   }
 
   <template>
diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml
index 9f8f92750..aeefd76b3 100644
--- a/config/locales/server.en.yml
+++ b/config/locales/server.en.yml
@@ -261,6 +261,7 @@ en:
     ai_bot:
       reply_error: "Sorry, it looks like our system encountered an unexpected issue while trying to reply.\n\n[details='Error details']\n%{details}\n[/details]"
       default_pm_prefix: "[Untitled AI bot PM]"
+      thinking: "Thinking..."
       personas:
         default_llm_required: "Default LLM model is required prior to enabling Chat"
         cannot_delete_system_persona: "System personas cannot be deleted, please disable it instead"
diff --git a/lib/ai_bot/bot.rb b/lib/ai_bot/bot.rb
index d3e5f73c6..1dffd8335 100644
--- a/lib/ai_bot/bot.rb
+++ b/lib/ai_bot/bot.rb
@@ -7,7 +7,8 @@ class Bot
 
       BOT_NOT_FOUND = Class.new(StandardError)
       MAX_COMPLETIONS = 5
-      MAX_TOOLS = 5
+      # limit is arbitrary, but 5 which was used in the past was too low
+      MAX_TOOLS = 20
 
       def self.as(bot_user, persona: DiscourseAi::AiBot::Personas::General.new, model: nil)
         new(bot_user, persona, model)
@@ -117,6 +118,7 @@ def reply(context, &update_blk)
               prompt,
               feature_name: "bot",
               partial_tool_calls: allow_partial_tool_calls,
+              output_thinking: true,
               **llm_kwargs,
             ) do |partial, cancel|
               tool =
@@ -158,26 +160,68 @@ def reply(context, &update_blk)
                 if partial.is_a?(DiscourseAi::Completions::ToolCall)
                   Rails.logger.warn("DiscourseAi: Tool not found: #{partial.name}")
                 else
-                  update_blk.call(partial, cancel)
+                  if partial.is_a?(DiscourseAi::Completions::Thinking)
+                    if partial.partial? && partial.message.present?
+                      update_blk.call(partial.message, cancel, nil, :thinking)
+                    end
+                    if !partial.partial?
+                      # this will be dealt with later
+                      raw_context << partial
+                    end
+                  else
+                    update_blk.call(partial, cancel)
+                  end
                 end
               end
             end
 
           if !tool_found
             ongoing_chain = false
-            raw_context << [result, bot_user.username]
+            text = result
+
+            # we must strip out thinking
+            if result.is_a?(Array)
+              text = +""
+              result.each { |item| text << item if item.is_a?(String) }
+            end
+            raw_context << [text, bot_user.username]
           end
+
           total_completions += 1
 
           # do not allow tools when we are at the end of a chain (total_completions == MAX_COMPLETIONS)
           prompt.tools = [] if total_completions == MAX_COMPLETIONS
         end
 
-        raw_context
+        embed_thinking(raw_context)
       end
 
       private
 
+      def embed_thinking(raw_context)
+        embedded_thinking = []
+        thinking_info = nil
+        raw_context.each do |context|
+          if context.is_a?(DiscourseAi::Completions::Thinking)
+            thinking_info ||= {}
+            if context.redacted
+              thinking_info[:redacted_thinking_signature] = context.signature
+            else
+              thinking_info[:thinking] = context.message
+              thinking_info[:thinking_signature] = context.signature
+            end
+          else
+            if thinking_info
+              context = context.dup
+              context[4] = thinking_info
+            end
+            embedded_thinking << context
+          end
+        end
+
+        embedded_thinking
+      end
+
       def process_tool(tool, raw_context, llm, cancel, update_blk, prompt, context)
         tool_call_id = tool.tool_call_id
         invocation_result_json = invoke_tool(tool, llm, cancel, context, &update_blk).to_json
diff --git a/lib/ai_bot/playground.rb b/lib/ai_bot/playground.rb
index 65719870f..21888ef82 100644
--- a/lib/ai_bot/playground.rb
+++ b/lib/ai_bot/playground.rb
@@ -220,6 +220,9 @@ def conversation_context(post)
                 custom_context[:id] = message[1] if custom_context[:type] != :model
                 custom_context[:name] = message[3] if message[3]
 
+                thinking = message[4]
+                custom_context[:thinking] = thinking if thinking
+
                 builder.push(**custom_context)
               end
             end
@@ -473,8 +476,20 @@ def reply_to(post, custom_instructions: nil, &blk)
 
         post_streamer = PostStreamer.new(delay: Rails.env.test? ? 0 : 0.5) if stream_reply
 
+        started_thinking = false
+
         new_custom_prompts =
           bot.reply(context) do |partial, cancel, placeholder, type|
+            if type == :thinking && !started_thinking
+              reply << "<details><summary>#{I18n.t("discourse_ai.ai_bot.thinking")}</summary>"
+              started_thinking = true
+            end
+
+            if type != :thinking && started_thinking
+              reply << "</details>\n\n"
+              started_thinking = false
+            end
+
             reply << partial
             raw = reply.dup
             raw << "\n\n" << placeholder if placeholder.present?
@@ -527,8 +542,10 @@ def reply_to(post, custom_instructions: nil, &blk)
             )
         end
 
-        # we do not need to add a custom prompt for a single reply
-        if new_custom_prompts.length > 1
+        # a bit messy internally, but this is how we tell
+        is_thinking = new_custom_prompts.any? { |prompt| prompt[4].present? }
+
+        if is_thinking || new_custom_prompts.length > 1
           reply_post.post_custom_prompt ||= reply_post.build_post_custom_prompt(custom_prompt: [])
           prompt = reply_post.post_custom_prompt.custom_prompt || []
           prompt.concat(new_custom_prompts)
diff --git a/lib/completions/endpoints/canned_response.rb b/lib/completions/endpoints/canned_response.rb
index 279348182..a7ccc1daf 100644
--- a/lib/completions/endpoints/canned_response.rb
+++ b/lib/completions/endpoints/canned_response.rb
@@ -29,7 +29,8 @@ def perform_completion!(
           model_params,
           feature_name: nil,
           feature_context: nil,
-          partial_tool_calls: false
+          partial_tool_calls: false,
+          output_thinking: false
         )
           @dialect = dialect
           @model_params = model_params
@@ -51,6 +52,8 @@ def perform_completion!(
             as_array.each do |response|
               if is_tool?(response)
                 yield(response, cancel_fn)
+              elsif is_thinking?(response)
+                yield(response, cancel_fn)
               else
                 response.each_char do |char|
                   break if cancelled
@@ -70,6 +73,10 @@ def tokenizer
 
         private
 
+        def is_thinking?(response)
+          response.is_a?(DiscourseAi::Completions::Thinking)
+        end
+
         def is_tool?(response)
           response.is_a?(DiscourseAi::Completions::ToolCall)
         end
diff --git a/lib/completions/prompt_messages_builder.rb b/lib/completions/prompt_messages_builder.rb
index 8fbd70f82..045e0e89e 100644
--- a/lib/completions/prompt_messages_builder.rb
+++ b/lib/completions/prompt_messages_builder.rb
@@ -102,7 +102,7 @@ def to_a(limit: nil, style: nil)
         end
       end
 
-      def push(type:, content:, name: nil, upload_ids: nil, id: nil)
+      def push(type:, content:, name: nil, upload_ids: nil, id: nil, thinking: nil)
         if !%i[user model tool tool_call system].include?(type)
           raise ArgumentError, "type must be either :user, :model, :tool, :tool_call or :system"
         end
@@ -112,6 +112,15 @@ def push(type:, content:, name: nil, upload_ids: nil, id: nil)
         message[:name] = name.to_s if name
         message[:upload_ids] = upload_ids if upload_ids
         message[:id] = id.to_s if id
+        if thinking
+          message[:thinking] = thinking["thinking"] if thinking["thinking"]
+          message[:thinking_signature] = thinking["thinking_signature"] if thinking[
+            "thinking_signature"
+          ]
+          message[:redacted_thinking_signature] = thinking[
+            "redacted_thinking_signature"
+          ] if thinking["redacted_thinking_signature"]
+        end
 
         @raw_messages << message
       end
diff --git a/lib/completions/thinking.rb b/lib/completions/thinking.rb
index e075835f9..eb9e6275c 100644
--- a/lib/completions/thinking.rb
+++ b/lib/completions/thinking.rb
@@ -12,6 +12,10 @@ def initialize(message:, signature: nil, redacted: false, partial: false)
         @partial = partial
       end
 
+      def partial?
+        !!@partial
+      end
+
       def ==(other)
         message == other.message && signature == other.signature && redacted == other.redacted &&
           partial == other.partial
diff --git a/spec/lib/modules/ai_bot/playground_spec.rb b/spec/lib/modules/ai_bot/playground_spec.rb
index 124582ed5..ddaff3cd8 100644
--- a/spec/lib/modules/ai_bot/playground_spec.rb
+++ b/spec/lib/modules/ai_bot/playground_spec.rb
@@ -828,6 +828,59 @@
   end
 
   describe "#reply_to" do
+    it "preserves thinking context between replies" do
+      thinking_progress =
+        DiscourseAi::Completions::Thinking.new(message: "I should say hello", partial: true)
+      thinking =
+        DiscourseAi::Completions::Thinking.new(
+          message: "I should say hello",
+          signature: "thinking-signature-123",
+          partial: false,
+        )
+
+      thinking_redacted =
+        DiscourseAi::Completions::Thinking.new(
+          message: nil,
+          signature: "thinking-redacted-signature-123",
+          partial: false,
+          redacted: true,
+        )
+
+      first_responses = [[thinking_progress, thinking, thinking_redacted, "Hello Sam"]]
+
+      DiscourseAi::Completions::Llm.with_prepared_responses(first_responses) do
+        playground.reply_to(third_post)
+      end
+
+      new_post = third_post.topic.reload.posts.order(:post_number).last
+      expect(new_post.raw).to include("Hello Sam")
+      expect(new_post.raw).to include("I should say hello")
+
+      post = Fabricate(:post, topic: third_post.topic, user: user, raw: "Say Cat")
+
+      prompt_detail = nil
+      # Capture the prompt to verify thinking context was included
+      DiscourseAi::Completions::Llm.with_prepared_responses(["Cat"]) do |_, _, prompts|
+        playground.reply_to(post)
+        prompt_detail = prompts.first
+      end
+
+      last_messages = prompt_detail.messages.last(2)
+
+      expect(last_messages).to eq(
+        [
+          {
+            type: :model,
+            content: "Hello Sam",
+            thinking: "I should say hello",
+            thinking_signature: "thinking-signature-123",
+            redacted_thinking_signature: "thinking-redacted-signature-123",
+          },
+          { type: :user, content: "Say Cat", id: "bruce1" },
+        ],
+      )
+    end
+
     it "streams the bot reply through MB and create a new post in the PM with a cooked responses" do
       expected_bot_response =
         "Hello this is a bot and what you just said is an interesting question"

From 4d366388386d0278c149ee11b113262bf83e811e Mon Sep 17 00:00:00 2001
From: Sam Saffron <sam.saffron@gmail.com>
Date: Fri, 28 Feb 2025 16:11:38 +1100
Subject: [PATCH 06/18] improve token budget

---
 lib/completions/endpoints/anthropic.rb   | 6 ++++--
 lib/completions/endpoints/aws_bedrock.rb | 6 ++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/lib/completions/endpoints/anthropic.rb b/lib/completions/endpoints/anthropic.rb
index ec7656d9a..2fb0b28d7 100644
--- a/lib/completions/endpoints/anthropic.rb
+++ b/lib/completions/endpoints/anthropic.rb
@@ -34,13 +34,15 @@ def default_options(dialect)
 
           # Note: Anthropic requires this param
           max_tokens = 4096
-          max_tokens = 8192 if mapped_model.match?(/3.5/)
+          # 3.5 and 3.7 models have a higher token limit
+          max_tokens = 8192 if mapped_model.match?(/3.[57]/)
 
           options = { model: mapped_model, max_tokens: max_tokens }
 
+          # reasoning has even higher token limits
           if llm_model.lookup_custom_param("enable_reasoning")
             reasoning_tokens =
-              llm_model.lookup_custom_param("reasoning_tokens").to_i.clamp(1024, 65_536)
+              llm_model.lookup_custom_param("reasoning_tokens").to_i.clamp(1024, 32_768)
 
             # this allows for lots of tokens beyond reasoning
             options[:max_tokens] = reasoning_tokens + 30_000
diff --git a/lib/completions/endpoints/aws_bedrock.rb b/lib/completions/endpoints/aws_bedrock.rb
index b30338d66..6b998c4fd 100644
--- a/lib/completions/endpoints/aws_bedrock.rb
+++ b/lib/completions/endpoints/aws_bedrock.rb
@@ -24,12 +24,14 @@ def default_options(dialect)
           options =
             if dialect.is_a?(DiscourseAi::Completions::Dialects::Claude)
               max_tokens = 4096
-              max_tokens = 8192 if bedrock_model_id.match?(/3.5/)
+              max_tokens = 8192 if bedrock_model_id.match?(/3.[57]/)
 
               result = { anthropic_version: "bedrock-2023-05-31" }
               if llm_model.lookup_custom_param("enable_reasoning")
+                # we require special headers to go over 64k output tokens, lets
+                # wait for feature requests before enabling this
                 reasoning_tokens =
-                  llm_model.lookup_custom_param("reasoning_tokens").to_i.clamp(1024, 65_536)
+                  llm_model.lookup_custom_param("reasoning_tokens").to_i.clamp(1024, 32_768)
 
                 # this allows for ample tokens beyond reasoning
                 max_tokens = reasoning_tokens + 30_000

From c69a51be7902bc2e639d9cafb743ead48d877230 Mon Sep 17 00:00:00 2001
From: Sam Saffron <sam.saffron@gmail.com>
Date: Fri, 28 Feb 2025 17:00:48 +1100
Subject: [PATCH 07/18] handle thinking in tool implementation

---
 lib/ai_bot/bot.rb                        | 36 ++++++++++++++++++++++--
 lib/completions/dialects/claude_tools.rb | 36 ++++++++++++++++++------
 2 files changed, 62 insertions(+), 10 deletions(-)

diff --git a/lib/ai_bot/bot.rb b/lib/ai_bot/bot.rb
index 1dffd8335..6d2dffe8d 100644
--- a/lib/ai_bot/bot.rb
+++ b/lib/ai_bot/bot.rb
@@ -112,6 +112,7 @@ def reply(context, &update_blk)
 
           allow_partial_tool_calls = persona.allow_partial_tool_calls?
           existing_tools = Set.new
+          current_thinking = []
 
           result =
             llm.generate(
@@ -149,7 +150,17 @@ def reply(context, &update_blk)
                   needs_newlines = false
                 end
 
-                process_tool(tool, raw_context, llm, cancel, update_blk, prompt, context)
+                process_tool(
+                  tool: tool,
+                  raw_context: raw_context,
+                  llm: llm,
+                  cancel: cancel,
+                  update_blk: update_blk,
+                  prompt: prompt,
+                  context: context,
+                  current_thinking: current_thinking,
+                )
+
                 tools_ran += 1
                 ongoing_chain &&= tool.chain_next_response?
 
@@ -167,6 +178,7 @@ def reply(context, &update_blk)
                     if !partial.partial?
                       # this will be dealt with later
                       raw_context << partial
+                      current_thinking << partial
                     end
                   else
                     update_blk.call(partial, cancel)
@@ -222,7 +234,16 @@ def embed_thinking(raw_context)
         embedded_thinking
       end
 
-      def process_tool(tool, raw_context, llm, cancel, update_blk, prompt, context)
+      def process_tool(
+        tool:,
+        raw_context:,
+        llm:,
+        cancel:,
+        update_blk:,
+        prompt:,
+        context:,
+        current_thinking:
+      )
         tool_call_id = tool.tool_call_id
         invocation_result_json = invoke_tool(tool, llm, cancel, context, &update_blk).to_json
 
@@ -233,6 +254,17 @@ def process_tool(tool, raw_context, llm, cancel, update_blk, prompt, context)
           name: tool.name,
         }
 
+        if current_thinking.present?
+          current_thinking.each do |thinking|
+            if thinking.redacted
+              tool_call_message[:redacted_thinking_signature] = thinking.signature
+            else
+              tool_call_message[:thinking] = thinking.message
+              tool_call_message[:thinking_signature] = thinking.signature
+            end
+          end
+        end
+
         tool_message = {
           type: :tool,
           id: tool_call_id,
diff --git a/lib/completions/dialects/claude_tools.rb b/lib/completions/dialects/claude_tools.rb
index b42a18332..238a25b0c 100644
--- a/lib/completions/dialects/claude_tools.rb
+++ b/lib/completions/dialects/claude_tools.rb
@@ -45,15 +45,35 @@ def instructions
 
         def from_raw_tool_call(raw_message)
           call_details = JSON.parse(raw_message[:content], symbolize_names: true)
+          result = []
+
+          if raw_message[:thinking] || raw_message[:redacted_thinking_signature]
+            if raw_message[:thinking]
+              result << {
+                type: "thinking",
+                thinking: raw_message[:thinking],
+                signature: raw_message[:thinking_signature],
+              }
+            end
+
+            if raw_message[:redacted_thinking_signature]
+              result << {
+                type: "redacted_thinking",
+                data: raw_message[:redacted_thinking_signature],
+              }
+            end
+          end
+
           tool_call_id = raw_message[:id]
-          [
-            {
-              type: "tool_use",
-              id: tool_call_id,
-              name: raw_message[:name],
-              input: call_details[:arguments],
-            },
-          ]
+
+          result << {
+            type: "tool_use",
+            id: tool_call_id,
+            name: raw_message[:name],
+            input: call_details[:arguments],
+          }
+
+          result
         end
 
         def from_raw_tool(raw_message)

From 2a380c825244be1be6d23087ff053760b52fed97 Mon Sep 17 00:00:00 2001
From: Sam Saffron <sam.saffron@gmail.com>
Date: Mon, 3 Mar 2025 14:22:17 +1100
Subject: [PATCH 08/18] Improve PR info

Side effect change cause I was reviewing the PR with bot
---
 lib/ai_bot/tools/github_pull_request_diff.rb  | 50 +++++++++++++--
 .../tools/github_pull_request_diff_spec.rb    | 61 ++++++++++++++++---
 2 files changed, 95 insertions(+), 16 deletions(-)

diff --git a/lib/ai_bot/tools/github_pull_request_diff.rb b/lib/ai_bot/tools/github_pull_request_diff.rb
index 90d43f824..21bb71cf4 100644
--- a/lib/ai_bot/tools/github_pull_request_diff.rb
+++ b/lib/ai_bot/tools/github_pull_request_diff.rb
@@ -47,27 +47,65 @@ def invoke
           api_url = "https://api.github.com/repos/#{repo}/pulls/#{pull_id}"
           @url = "https://github.com/#{repo}/pull/#{pull_id}"
 
-          body = nil
+          pr_info = nil
+          diff_body = nil
           response_code = "unknown error"
 
           send_http_request(
             api_url,
             headers: {
-              "Accept" => "application/vnd.github.v3.diff",
+              "Accept" => "application/json",
             },
             authenticate_github: true,
           ) do |response|
             response_code = response.code
-            body = read_response_body(response)
+            pr_info = JSON.parse(read_response_body(response)) if response_code == "200"
           end
 
           if response_code == "200"
-            diff = body
+            send_http_request(
+              api_url,
+              headers: {
+                "Accept" => "application/vnd.github.v3.diff",
+              },
+              authenticate_github: true,
+            ) do |response|
+              response_code = response.code
+              diff_body = read_response_body(response)
+            end
+          end
+
+          if response_code == "200" && pr_info && diff_body
+            diff = diff_body
             diff = self.class.sort_and_shorten_diff(diff)
             diff = truncate(diff, max_length: 20_000, percent_length: 0.3, llm: llm)
-            { diff: diff }
+
+            source_repo = pr_info.dig("head", "repo", "full_name")
+            source_branch = pr_info.dig("head", "ref")
+            source_sha = pr_info.dig("head", "sha")
+
+            {
+              diff: diff,
+              pr_info: {
+                title: pr_info["title"],
+                state: pr_info["state"],
+                source: {
+                  repo: source_repo,
+                  branch: source_branch,
+                  sha: source_sha,
+                  url: "https://github.com/#{source_repo}/tree/#{source_branch}",
+                },
+                target: {
+                  repo: pr_info["base"]["repo"]["full_name"],
+                  branch: pr_info["base"]["ref"],
+                },
+                author: pr_info["user"]["login"],
+                created_at: pr_info["created_at"],
+                updated_at: pr_info["updated_at"],
+              },
+            }
           else
-            { error: "Failed to retrieve the diff. Status code: #{response_code}" }
+            { error: "Failed to retrieve the PR information. Status code: #{response_code}" }
           end
         end
 
diff --git a/spec/lib/modules/ai_bot/tools/github_pull_request_diff_spec.rb b/spec/lib/modules/ai_bot/tools/github_pull_request_diff_spec.rb
index 6af6fd6af..428bd72b4 100644
--- a/spec/lib/modules/ai_bot/tools/github_pull_request_diff_spec.rb
+++ b/spec/lib/modules/ai_bot/tools/github_pull_request_diff_spec.rb
@@ -49,15 +49,47 @@
     let(:repo) { "discourse/discourse-automation" }
     let(:pull_id) { 253 }
     let(:diff) { <<~DIFF }
-        diff --git a/lib/discourse_automation/automation.rb b/lib/discourse_automation/automation.rb
-        index 3e3e3e3..4f4f4f4 100644
-        --- a/lib/discourse_automation/automation.rb
-        +++ b/lib/discourse_automation/automation.rb
-        @@ -1,3 +1,3 @@
-        -module DiscourseAutomation
-      DIFF
+      diff --git a/lib/discourse_automation/automation.rb b/lib/discourse_automation/automation.rb
+      index 3e3e3e3..4f4f4f4 100644
+      --- a/lib/discourse_automation/automation.rb
+      +++ b/lib/discourse_automation/automation.rb
+      @@ -1,3 +1,3 @@
+      -module DiscourseAutomation
+    DIFF
+
+    let(:pr_info) do
+      {
+        "title" => "Test PR",
+        "state" => "open",
+        "user" => {
+          "login" => "test-user",
+        },
+        "created_at" => "2023-01-01T00:00:00Z",
+        "updated_at" => "2023-01-02T00:00:00Z",
+        "head" => {
+          "repo" => {
+            "full_name" => "test/repo",
+          },
+          "ref" => "feature-branch",
+          "sha" => "abc123",
+        },
+        "base" => {
+          "repo" => {
+            "full_name" => "main/repo",
+          },
+          "ref" => "main",
+        },
+      }
+    end
+
+    it "retrieves both PR info and diff" do
+      stub_request(:get, "https://api.github.com/repos/#{repo}/pulls/#{pull_id}").with(
+        headers: {
+          "Accept" => "application/json",
+          "User-Agent" => DiscourseAi::AiBot::USER_AGENT,
+        },
+      ).to_return(status: 200, body: pr_info.to_json)
 
-    it "retrieves the diff for the pull request" do
       stub_request(:get, "https://api.github.com/repos/#{repo}/pulls/#{pull_id}").with(
         headers: {
           "Accept" => "application/vnd.github.v3.diff",
@@ -67,12 +99,21 @@
 
       result = tool.invoke
       expect(result[:diff]).to eq(diff)
+      expect(result[:pr_info]).to include(title: "Test PR", state: "open", author: "test-user")
       expect(result[:error]).to be_nil
     end
 
     it "uses the github access token if present" do
       SiteSetting.ai_bot_github_access_token = "ABC"
 
+      stub_request(:get, "https://api.github.com/repos/#{repo}/pulls/#{pull_id}").with(
+        headers: {
+          "Accept" => "application/json",
+          "User-Agent" => DiscourseAi::AiBot::USER_AGENT,
+          "Authorization" => "Bearer ABC",
+        },
+      ).to_return(status: 200, body: pr_info.to_json)
+
       stub_request(:get, "https://api.github.com/repos/#{repo}/pulls/#{pull_id}").with(
         headers: {
           "Accept" => "application/vnd.github.v3.diff",
@@ -94,14 +135,14 @@
     it "returns an error message" do
       stub_request(:get, "https://api.github.com/repos/#{repo}/pulls/#{pull_id}").with(
         headers: {
-          "Accept" => "application/vnd.github.v3.diff",
+          "Accept" => "application/json",
           "User-Agent" => DiscourseAi::AiBot::USER_AGENT,
         },
       ).to_return(status: 404)
 
       result = tool.invoke
       expect(result[:diff]).to be_nil
-      expect(result[:error]).to include("Failed to retrieve the diff")
+      expect(result[:error]).to include("Failed to retrieve the PR information")
     end
   end
 end

From 754b3ce7ad1436063999ed87e07376122697cf87 Mon Sep 17 00:00:00 2001
From: Sam Saffron <sam.saffron@gmail.com>
Date: Mon, 3 Mar 2025 14:45:14 +1100
Subject: [PATCH 09/18] fix specs

---
 lib/ai_bot/bot.rb                    | 2 +-
 lib/completions/endpoints/fake.rb    | 3 ++-
 lib/completions/endpoints/open_ai.rb | 1 +
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/lib/ai_bot/bot.rb b/lib/ai_bot/bot.rb
index 6d2dffe8d..00f6ab79f 100644
--- a/lib/ai_bot/bot.rb
+++ b/lib/ai_bot/bot.rb
@@ -191,7 +191,7 @@ def reply(context, &update_blk)
             ongoing_chain = false
             text = result
 
-            # we must strip out thinking
+            # we must strip out thinking and other types of blocks
             if result.is_a?(Array)
               text = +""
               result.each { |item| text << item if item.is_a?(String) }
diff --git a/lib/completions/endpoints/fake.rb b/lib/completions/endpoints/fake.rb
index e9b96c77d..7b7ff27cf 100644
--- a/lib/completions/endpoints/fake.rb
+++ b/lib/completions/endpoints/fake.rb
@@ -121,7 +121,8 @@ def perform_completion!(
           model_params = {},
           feature_name: nil,
           feature_context: nil,
-          partial_tool_calls: false
+          partial_tool_calls: false,
+          ouput_thinking: false
         )
           last_call = { dialect: dialect, user: user, model_params: model_params }
           self.class.last_call = last_call
diff --git a/lib/completions/endpoints/open_ai.rb b/lib/completions/endpoints/open_ai.rb
index f2bbd7977..d68dbcdb9 100644
--- a/lib/completions/endpoints/open_ai.rb
+++ b/lib/completions/endpoints/open_ai.rb
@@ -42,6 +42,7 @@ def perform_completion!(
           feature_name: nil,
           feature_context: nil,
           partial_tool_calls: false,
+          output_thinkings: false,
           &blk
         )
           @disable_native_tools = dialect.disable_native_tools?

From 699ad5cf8fffd20b67cd4bb2352bb500036a18c1 Mon Sep 17 00:00:00 2001
From: Sam Saffron <sam.saffron@gmail.com>
Date: Mon, 3 Mar 2025 14:54:54 +1100
Subject: [PATCH 10/18] Fix specs

---
 lib/completions/endpoints/fake.rb          | 2 +-
 lib/completions/endpoints/open_ai.rb       | 2 +-
 spec/lib/modules/ai_bot/playground_spec.rb | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/completions/endpoints/fake.rb b/lib/completions/endpoints/fake.rb
index 7b7ff27cf..70b04543d 100644
--- a/lib/completions/endpoints/fake.rb
+++ b/lib/completions/endpoints/fake.rb
@@ -122,7 +122,7 @@ def perform_completion!(
           feature_name: nil,
           feature_context: nil,
           partial_tool_calls: false,
-          ouput_thinking: false
+          output_thinking: false
         )
           last_call = { dialect: dialect, user: user, model_params: model_params }
           self.class.last_call = last_call
diff --git a/lib/completions/endpoints/open_ai.rb b/lib/completions/endpoints/open_ai.rb
index d68dbcdb9..35afd7787 100644
--- a/lib/completions/endpoints/open_ai.rb
+++ b/lib/completions/endpoints/open_ai.rb
@@ -42,7 +42,7 @@ def perform_completion!(
           feature_name: nil,
           feature_context: nil,
           partial_tool_calls: false,
-          output_thinkings: false,
+          output_thinking: false,
           &blk
         )
           @disable_native_tools = dialect.disable_native_tools?
diff --git a/spec/lib/modules/ai_bot/playground_spec.rb b/spec/lib/modules/ai_bot/playground_spec.rb
index ddaff3cd8..e0371834d 100644
--- a/spec/lib/modules/ai_bot/playground_spec.rb
+++ b/spec/lib/modules/ai_bot/playground_spec.rb
@@ -876,7 +876,7 @@
             thinking_signature: "thinking-signature-123",
             redacted_thinking_signature: "thinking-redacted-signature-123",
           },
-          { type: :user, content: "Say Cat", id: "bruce1" },
+          { type: :user, content: "Say Cat", id: user.username },
         ],
       )
     end

From ecb15c9b3e22fa2d0db5fe05bfb820331a9ae165 Mon Sep 17 00:00:00 2001
From: Sam Saffron <sam.saffron@gmail.com>
Date: Mon, 3 Mar 2025 14:59:16 +1100
Subject: [PATCH 11/18] add spec

---
 .../anthropic_message_processor_spec.rb       | 73 +++++++++++++++++++
 1 file changed, 73 insertions(+)
 create mode 100644 spec/lib/completions/anthropic_message_processor_spec.rb

diff --git a/spec/lib/completions/anthropic_message_processor_spec.rb b/spec/lib/completions/anthropic_message_processor_spec.rb
new file mode 100644
index 000000000..94e5a0ee5
--- /dev/null
+++ b/spec/lib/completions/anthropic_message_processor_spec.rb
@@ -0,0 +1,73 @@
+# frozen_string_literal: true
+
+describe DiscourseAi::Completions::AnthropicMessageProcessor do
+  it "correctly handles and combines partial thinking chunks into complete thinking objects" do
+    processor =
+      DiscourseAi::Completions::AnthropicMessageProcessor.new(
+        streaming_mode: true,
+        partial_tool_calls: false,
+        output_thinking: true,
+      )
+
+    # Simulate streaming thinking output in multiple chunks
+    result1 =
+      processor.process_streamed_message(
+        { type: "content_block_start", content_block: { type: "thinking", thinking: "" } },
+      )
+
+    result2 =
+      processor.process_streamed_message(
+        {
+          type: "content_block_delta",
+          delta: {
+            type: "thinking_delta",
+            thinking: "First part of thinking",
+          },
+        },
+      )
+
+    result3 =
+      processor.process_streamed_message(
+        {
+          type: "content_block_delta",
+          delta: {
+            type: "thinking_delta",
+            thinking: " and second part",
+          },
+        },
+      )
+
+    _result4 =
+      processor.process_streamed_message(
+        {
+          type: "content_block_delta",
+          delta: {
+            type: "signature_delta",
+            signature: "thinking-sig-123",
+          },
+        },
+      )
+
+    # Finish the thinking block
+    final_result = processor.process_streamed_message({ type: "content_block_stop" })
+
+    # Verify the partial thinking chunks
+    expect(result1).to be_a(DiscourseAi::Completions::Thinking)
+    expect(result1.message).to eq("")
+    expect(result1.partial?).to eq(true)
+
+    expect(result2).to be_a(DiscourseAi::Completions::Thinking)
+    expect(result2.message).to eq("First part of thinking")
+    expect(result2.partial?).to eq(true)
+
+    expect(result3).to be_a(DiscourseAi::Completions::Thinking)
+    expect(result3.message).to eq(" and second part")
+    expect(result3.partial?).to eq(true)
+
+    # Verify the final complete thinking object
+    expect(final_result).to be_a(DiscourseAi::Completions::Thinking)
+    expect(final_result.message).to eq("First part of thinking and second part")
+    expect(final_result.signature).to eq("thinking-sig-123")
+    expect(final_result.partial?).to eq(false)
+  end
+end

From 76f6b5dc998c4eecb120abf31418193655ddf2a3 Mon Sep 17 00:00:00 2001
From: Sam Saffron <sam.saffron@gmail.com>
Date: Mon, 3 Mar 2025 15:06:11 +1100
Subject: [PATCH 12/18] comment code

---
 spec/lib/modules/ai_bot/playground_spec.rb | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/spec/lib/modules/ai_bot/playground_spec.rb b/spec/lib/modules/ai_bot/playground_spec.rb
index e0371834d..3bcd4c12e 100644
--- a/spec/lib/modules/ai_bot/playground_spec.rb
+++ b/spec/lib/modules/ai_bot/playground_spec.rb
@@ -828,7 +828,7 @@
   end
 
   describe "#reply_to" do
-    it "preserves thinking context between replies" do
+    it "preserves thinking context between replies and correctly renders" do
       thinking_progress =
         DiscourseAi::Completions::Thinking.new(message: "I should say hello", partial: true)
       thinking =
@@ -853,7 +853,9 @@
       end
 
       new_post = third_post.topic.reload.posts.order(:post_number).last
+      # confirm message is there
       expect(new_post.raw).to include("Hello Sam")
+      # confirm thinking is there
       expect(new_post.raw).to include("I should say hello")
 
       post = Fabricate(:post, topic: third_post.topic, user: user, raw: "Say Cat")

From e4239cb2cdc242992c598cb52cd3d1939d126ae6 Mon Sep 17 00:00:00 2001
From: Sam Saffron <sam.saffron@gmail.com>
Date: Mon, 3 Mar 2025 15:08:31 +1100
Subject: [PATCH 13/18] allow a few more completions, we hit wall to often

long term we need to move this to persona
---
 lib/ai_bot/bot.rb | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/ai_bot/bot.rb b/lib/ai_bot/bot.rb
index 00f6ab79f..2688769ac 100644
--- a/lib/ai_bot/bot.rb
+++ b/lib/ai_bot/bot.rb
@@ -6,7 +6,8 @@ class Bot
       attr_reader :model
 
       BOT_NOT_FOUND = Class.new(StandardError)
-      MAX_COMPLETIONS = 5
+      # the future is agentic, allow for more turns
+      MAX_COMPLETIONS = 8
       # limit is arbitrary, but 5 which was used in the past was too low
       MAX_TOOLS = 20
 

From 456ba2c97560127d153f8be3ebb9ea1803a1e2da Mon Sep 17 00:00:00 2001
From: Sam Saffron <sam.saffron@gmail.com>
Date: Mon, 3 Mar 2025 15:36:45 +1100
Subject: [PATCH 14/18] allow setting temp to "automatic" in llm triage make
 llm report behave consistently with llm triage

this enables thinking models to be used with llm triage
---
 config/locales/client.en.yml                  |  7 ++++--
 discourse_automation/llm_report.rb            | 21 ++++++++++------
 discourse_automation/llm_triage.rb            |  8 ++++++
 lib/automation/llm_triage.rb                  |  5 ++--
 lib/automation/report_runner.rb               |  4 +--
 lib/completions/llm.rb                        | 25 ++++++++++++++++---
 .../discourse_automation/llm_triage_spec.rb   |  9 ++++++-
 7 files changed, 62 insertions(+), 17 deletions(-)

diff --git a/config/locales/client.en.yml b/config/locales/client.en.yml
index 55d239509..f18bb0f02 100644
--- a/config/locales/client.en.yml
+++ b/config/locales/client.en.yml
@@ -85,10 +85,10 @@ en:
               description: "Prioritize content from this group in the report"
             temperature:
               label: "Temperature"
-              description: "Temperature to use for the LLM. Increase to increase randomness (0 to use model default)"
+              description: "Temperature to use for the LLM. Increase to increase randomness (leave empty to use model default)"
             top_p:
               label: "Top P"
-              description: "Top P to use for the LLM, increase to increase randomness (0 to use model default)"
+              description: "Top P to use for the LLM, increase to increase randomness (leave empty to use model default)"
 
         llm_triage:
           fields:
@@ -131,6 +131,9 @@ en:
             model:
               label: "Model"
               description: "Language model used for triage"
+            temperature:
+              label: "Temperature"
+              description: "Temperature to use for the LLM. Increase to increase randomness (leave empty to use model default)"
 
     discourse_ai:
       title: "AI"
diff --git a/discourse_automation/llm_report.rb b/discourse_automation/llm_report.rb
index e309fbbb3..49c4b6375 100644
--- a/discourse_automation/llm_report.rb
+++ b/discourse_automation/llm_report.rb
@@ -37,8 +37,8 @@ module DiscourseAutomation::LlmReport
 
     field :allow_secure_categories, component: :boolean
 
-    field :top_p, component: :text, required: true, default_value: 0.1
-    field :temperature, component: :text, required: true, default_value: 0.2
+    field :top_p, component: :text
+    field :temperature, component: :text
 
     field :suppress_notifications, component: :boolean
     field :debug_mode, component: :boolean
@@ -64,12 +64,19 @@ module DiscourseAutomation::LlmReport
         exclude_category_ids = fields.dig("exclude_categories", "value")
         exclude_tags = fields.dig("exclude_tags", "value")
 
-        # set defaults in code to support easy migration for old rules
-        top_p = 0.1
-        top_p = fields.dig("top_p", "value").to_f if fields.dig("top_p", "value")
+        top_p = fields.dig("top_p", "value")
+        if top_p == "" || top_p.nil?
+          top_p = nil
+        else
+          top_p = top_p.to_f
+        end
 
-        temperature = 0.2
-        temperature = fields.dig("temperature", "value").to_f if fields.dig("temperature", "value")
+        temperature = fields.dig("temperature", "value")
+        if temperature == "" || temperature.nil?
+          temperature = nil
+        else
+          temperature = temperature.to_f
+        end
 
         suppress_notifications = !!fields.dig("suppress_notifications", "value")
         DiscourseAi::Automation::ReportRunner.run!(
diff --git a/discourse_automation/llm_triage.rb b/discourse_automation/llm_triage.rb
index a0bd21fff..abbd5936e 100644
--- a/discourse_automation/llm_triage.rb
+++ b/discourse_automation/llm_triage.rb
@@ -24,6 +24,7 @@
     field :hide_topic, component: :boolean
     field :flag_post, component: :boolean
     field :include_personal_messages, component: :boolean
+    field :temperature, component: :text
     field :flag_type,
           component: :choices,
           required: false,
@@ -53,6 +54,12 @@
       flag_post = fields.dig("flag_post", "value")
       flag_type = fields.dig("flag_type", "value")
       max_post_tokens = fields.dig("max_post_tokens", "value").to_i
+      temperature = fields.dig("temperature", "value")
+      if temperature == "" || temperature.nil?
+        temperature = nil
+      else
+        temperature = temperature.to_f
+      end
 
       max_post_tokens = nil if max_post_tokens <= 0
 
@@ -93,6 +100,7 @@
           max_post_tokens: max_post_tokens,
           stop_sequences: stop_sequences,
           automation: self.automation,
+          temperature: temperature,
         )
       rescue => e
         Discourse.warn_exception(e, message: "llm_triage: skipped triage on post #{post.id}")
diff --git a/lib/automation/llm_triage.rb b/lib/automation/llm_triage.rb
index 497a5f1dc..c677fbff5 100644
--- a/lib/automation/llm_triage.rb
+++ b/lib/automation/llm_triage.rb
@@ -17,7 +17,8 @@ def self.handle(
         flag_type: nil,
         automation: nil,
         max_post_tokens: nil,
-        stop_sequences: nil
+        stop_sequences: nil,
+        temperature: nil
       )
         if category_id.blank? && tags.blank? && canned_reply.blank? && hide_topic.blank? &&
              flag_post.blank?
@@ -40,7 +41,7 @@ def self.handle(
         result =
           llm.generate(
             prompt,
-            temperature: 0,
+            temperature: temperature,
             max_tokens: 700, # ~500 words
             user: Discourse.system_user,
             stop_sequences: stop_sequences,
diff --git a/lib/automation/report_runner.rb b/lib/automation/report_runner.rb
index 02363a45b..5d30ec2da 100644
--- a/lib/automation/report_runner.rb
+++ b/lib/automation/report_runner.rb
@@ -84,8 +84,8 @@ def initialize(
         @top_p = top_p
         @temperature = temperature
 
-        @top_p = nil if top_p <= 0
-        @temperature = nil if temperature <= 0
+        @top_p = nil if top_p.to_f < 0
+        @temperature = nil if temperature.to_f < 0
         @suppress_notifications = suppress_notifications
 
         if !@topic_id && !@receivers.present? && !@email_receivers.present?
diff --git a/lib/completions/llm.rb b/lib/completions/llm.rb
index cb75f6e7c..5e77cc099 100644
--- a/lib/completions/llm.rb
+++ b/lib/completions/llm.rb
@@ -172,8 +172,9 @@ def with_prepared_responses(responses, llm: nil)
           @canned_response = DiscourseAi::Completions::Endpoints::CannedResponse.new(responses)
           @canned_llm = llm
           @prompts = []
+          @prompt_options = []
 
-          yield(@canned_response, llm, @prompts)
+          yield(@canned_response, llm, @prompts, @prompt_options)
         ensure
           # Don't leak prepared response if there's an exception.
           @canned_response = nil
@@ -181,8 +182,13 @@ def with_prepared_responses(responses, llm: nil)
           @prompts = nil
         end
 
-        def record_prompt(prompt)
+        def record_prompt(prompt, options)
           @prompts << prompt.dup if @prompts
+          @prompt_options << options if @prompt_options
+        end
+
+        def prompt_options
+          @prompt_options
         end
 
         def prompts
@@ -254,7 +260,20 @@ def generate(
         output_thinking: false,
         &partial_read_blk
       )
-        self.class.record_prompt(prompt)
+        self.class.record_prompt(
+          prompt,
+          {
+            temperature: temperature,
+            top_p: top_p,
+            max_tokens: max_tokens,
+            stop_sequences: stop_sequences,
+            user: user,
+            feature_name: feature_name,
+            feature_context: feature_context,
+            partial_tool_calls: partial_tool_calls,
+            output_thinking: output_thinking,
+          },
+        )
 
         model_params = { max_tokens: max_tokens, stop_sequences: stop_sequences }
 
diff --git a/spec/lib/discourse_automation/llm_triage_spec.rb b/spec/lib/discourse_automation/llm_triage_spec.rb
index e595d275e..be63b72fd 100644
--- a/spec/lib/discourse_automation/llm_triage_spec.rb
+++ b/spec/lib/discourse_automation/llm_triage_spec.rb
@@ -94,13 +94,20 @@ def add_automation_field(name, value, type: "text")
     # PM
     reply_user.update!(admin: true)
     add_automation_field("include_personal_messages", true, type: :boolean)
+    add_automation_field("temperature", "0.2")
     post = Fabricate(:post, topic: personal_message)
 
-    DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
+    prompt_options = nil
+    DiscourseAi::Completions::Llm.with_prepared_responses(
+      ["bad"],
+    ) do |_resp, _llm, _prompts, _prompt_options|
       automation.running_in_background!
       automation.trigger!({ "post" => post })
+      prompt_options = _prompt_options.first
     end
 
+    expect(prompt_options[:temperature]).to eq(0.2)
+
     last_post = post.topic.reload.posts.order(:post_number).last
     expect(last_post.raw).to eq(canned_reply_text)
   end

From 83614eb3849dc6eaaf3f8e28c8de261e23fe846f Mon Sep 17 00:00:00 2001
From: Sam Saffron <sam.saffron@gmail.com>
Date: Mon, 3 Mar 2025 16:01:55 +1100
Subject: [PATCH 15/18] remove confusing behavior

---
 lib/ai_bot/playground.rb | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/lib/ai_bot/playground.rb b/lib/ai_bot/playground.rb
index 21888ef82..97f803eb5 100644
--- a/lib/ai_bot/playground.rb
+++ b/lib/ai_bot/playground.rb
@@ -119,10 +119,8 @@ def self.schedule_reply(post)
           bot_user ||= User.find_by(id: mentioned[:user_id]) if mentioned
         end
 
-        if bot_user && post.reply_to_post_number && !post.reply_to_post.user&.bot?
-          # replying to a non-bot user
-          return
-        end
+        # in the past we would have an edge case where we would not reply on a mention if it was
+        # also a reply this just causes confusion
 
         if bot_user
           topic_persona_id = post.topic.custom_fields["ai_persona_id"]

From 099e5c9c59f6cc675180eddec610e5672602f695 Mon Sep 17 00:00:00 2001
From: Sam Saffron <sam.saffron@gmail.com>
Date: Mon, 3 Mar 2025 16:41:49 +1100
Subject: [PATCH 16/18] correct spec and implementation to only bypass reply if
 not mentioned

---
 lib/ai_bot/playground.rb | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/lib/ai_bot/playground.rb b/lib/ai_bot/playground.rb
index 97f803eb5..f9e2fd8e0 100644
--- a/lib/ai_bot/playground.rb
+++ b/lib/ai_bot/playground.rb
@@ -119,8 +119,10 @@ def self.schedule_reply(post)
           bot_user ||= User.find_by(id: mentioned[:user_id]) if mentioned
         end
 
-        # in the past we would have an edge case where we would not reply on a mention if it was
-        # also a reply this just causes confusion
+        if !mentioned && bot_user && post.reply_to_post_number && !post.reply_to_post.user&.bot?
+          # replying to a non-bot user
+          return
+        end
 
         if bot_user
           topic_persona_id = post.topic.custom_fields["ai_persona_id"]

From 41b7ac7db9863f2e9f2f45c8f0f181604035ae37 Mon Sep 17 00:00:00 2001
From: Sam Saffron <sam.saffron@gmail.com>
Date: Mon, 3 Mar 2025 17:10:50 +1100
Subject: [PATCH 17/18] code deletions not working, fixed

---
 lib/ai_bot/artifact_update_strategies/diff.rb |  4 +--
 .../artifact_update_strategies/diff_spec.rb   | 28 +++++++++++++++++++
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/lib/ai_bot/artifact_update_strategies/diff.rb b/lib/ai_bot/artifact_update_strategies/diff.rb
index 4a0ff28d2..b75f78bca 100644
--- a/lib/ai_bot/artifact_update_strategies/diff.rb
+++ b/lib/ai_bot/artifact_update_strategies/diff.rb
@@ -95,9 +95,9 @@ def extract_search_replace_blocks(content)
           blocks = []
           remaining = content
 
-          pattern = /<<+\s*SEARCH\s*\n(.*?)\n=+\s*\n(.*?)\n>>+\s*REPLACE/m
+          pattern = /<<+\s*SEARCH\s*\n(.*?)\n==+\s*(\n(.*?))?\n>>+\s*REPLACE/m
           while remaining =~ pattern
-            blocks << { search: $1.strip, replace: $2.strip }
+            blocks << { search: $1.strip, replace: $3.to_s.strip }
             remaining = $'
           end
 
diff --git a/spec/lib/modules/ai_bot/artifact_update_strategies/diff_spec.rb b/spec/lib/modules/ai_bot/artifact_update_strategies/diff_spec.rb
index c2426ad43..689984cf7 100644
--- a/spec/lib/modules/ai_bot/artifact_update_strategies/diff_spec.rb
+++ b/spec/lib/modules/ai_bot/artifact_update_strategies/diff_spec.rb
@@ -108,6 +108,34 @@
       expect(artifact.versions.last.css).to eq("body {\n  color: red;\n}")
     end
 
+    it "can handle removal with blank blocks" do
+      original_css = <<~CSS
+        body {
+          color: red;
+        }
+        .button {
+          color: blue;
+        }
+      CSS
+
+      artifact.update!(css: original_css)
+
+      response = <<~RESPONSE
+        [CSS]
+        <<<<<<< SEARCH
+        body {
+          color: red;
+        }
+        =======
+        >>>>>>> REPLACE
+        [/CSS]
+      RESPONSE
+
+      DiscourseAi::Completions::Llm.with_prepared_responses([response]) { strategy.apply }
+
+      expect(artifact.versions.last.css.strip).to eq(".button {\n  color: blue;\n}")
+    end
+
     it "tracks failed searches" do
       original_css = ".button { color: blue; }"
       artifact.update!(css: original_css)

From 33a5d4763c0b8cbb0ec9f1160607f1dab662dde6 Mon Sep 17 00:00:00 2001
From: Sam Saffron <sam.saffron@gmail.com>
Date: Tue, 4 Mar 2025 09:22:20 +1100
Subject: [PATCH 18/18] safer parsing of search replace blocks, use state
 machine prompt engineering

---
 lib/ai_bot/artifact_update_strategies/diff.rb | 163 +++++++++++-------
 1 file changed, 98 insertions(+), 65 deletions(-)

diff --git a/lib/ai_bot/artifact_update_strategies/diff.rb b/lib/ai_bot/artifact_update_strategies/diff.rb
index b75f78bca..39ba85494 100644
--- a/lib/ai_bot/artifact_update_strategies/diff.rb
+++ b/lib/ai_bot/artifact_update_strategies/diff.rb
@@ -90,15 +90,45 @@ def apply_changes(changes)
 
         def extract_search_replace_blocks(content)
           return nil if content.blank? || content.to_s.strip.downcase.match?(/^\(?no changes?\)?$/m)
-          return [{ replace: content }] if !content.match?(/<<+\s*SEARCH/)
+          return [{ replace: content }] if !content.include?("<<< SEARCH")
 
           blocks = []
-          remaining = content
+          current_block = {}
+          state = :initial
+          search_lines = []
+          replace_lines = []
+
+          content.each_line do |line|
+            line = line.chomp
+
+            case state
+            when :initial
+              state = :collecting_search if line.match?(/^<<<* SEARCH/)
+            when :collecting_search
+              if line.start_with?("===")
+                current_block[:search] = search_lines.join("\n").strip
+                search_lines = []
+                state = :collecting_replace
+              else
+                search_lines << line
+              end
+            when :collecting_replace
+              if line.match?(/>>>* REPLACE/)
+                current_block[:replace] = replace_lines.join("\n").strip
+                replace_lines = []
+                blocks << current_block
+                current_block = {}
+                state = :initial
+              else
+                replace_lines << line
+              end
+            end
+          end
 
-          pattern = /<<+\s*SEARCH\s*\n(.*?)\n==+\s*(\n(.*?))?\n>>+\s*REPLACE/m
-          while remaining =~ pattern
-            blocks << { search: $1.strip, replace: $3.to_s.strip }
-            remaining = $'
+          # Handle any remaining block
+          if state == :collecting_replace && !replace_lines.empty?
+            current_block[:replace] = replace_lines.join("\n").strip
+            blocks << current_block
           end
 
           blocks.empty? ? nil : blocks
@@ -108,26 +138,50 @@ def system_prompt
           <<~PROMPT
             You are a web development expert generating precise search/replace changes for updating HTML, CSS, and JavaScript code.
 
-            Important rules:
+            CRITICAL RULES:
 
             1. Use EXACTLY this format for changes:
                <<<<<<< SEARCH
-               (first line of code to replace)
-               (other lines of code to avoid ambiguity)
-               (last line of code to replace)
+               (code to replace)
                =======
                (replacement code)
                >>>>>>> REPLACE
-            2. DO NOT modify the markers or add spaces around them
-            3. DO NOT add explanations or comments within sections
-            4. ONLY include [HTML], [CSS], and [JavaScript] sections if they have changes
-            5. HTML should not include <html>, <head>, or <body> tags, it is injected into a template
-            6. When specifying a SEARCH block, ALWAYS keep it 8 lines or less, you will be interrupted and a retry will be required if you exceed this limit
-            7. NEVER EVER ask followup questions, ALL changes must be performed in a single response, you are consumed via an API, there is no opportunity for humans in the loop
-            8. When performing a non-contiguous search, ALWAYS use ... to denote the skipped lines
-            9. Be mindful that ... non-contiguous search is not greedy, the following line will only match the first occurrence of the search block
-            10. Never mix a full section replacement with a search/replace block in the same section
-            11. ALWAYS skip sections you to not want to change, do not include them in the response
+
+            2. SEARCH blocks MUST be 8 lines or less. Break larger changes into multiple smaller search/replace blocks.
+
+            3. DO NOT modify the markers or add spaces around them.
+
+            4. DO NOT add explanations or comments within sections.
+
+            5. ONLY include [HTML], [CSS], and [JavaScript] sections if they have changes.
+
+            6. HTML should not include <html>, <head>, or <body> tags, it is injected into a template.
+
+            7. NEVER EVER ask followup questions, ALL changes must be performed in a single response.
+
+            8. When performing a non-contiguous search, ALWAYS use ... to denote the skipped lines.
+
+            9. Be mindful that ... non-contiguous search is not greedy, it will only match the first occurrence.
+
+            10. Never mix a full section replacement with a search/replace block in the same section.
+
+            11. ALWAYS skip sections you do not want to change, do not include them in the response.
+
+            HANDLING LARGE CHANGES:
+
+            - Break large HTML structures into multiple smaller search/replace blocks.
+            - Use strategic anchor points like unique IDs or class names to target specific elements.
+            - Consider replacing entire components rather than modifying complex internals.
+            - When elements contain dynamic content, use precise context markers or replace entire containers.
+
+            VALIDATION CHECKLIST:
+            - Each SEARCH block is 8 lines or less
+            - Every SEARCH has exactly one matching REPLACE
+            - All blocks are properly closed
+            - No SEARCH/REPLACE blocks are nested
+            - Each change is a complete, separate block with its own SEARCH/REPLACE markers
+
+            WARNING: Never nest search/replace blocks. Each change must be a complete sequence.
 
             JavaScript libraries must be sourced from the following CDNs, otherwise CSP will reject it:
             #{AiArtifact::ALLOWED_CDN_SOURCES.join("\n")}
@@ -143,7 +197,7 @@ def system_prompt
             (changes or empty if no changes or entire JavaScript)
             [/JavaScript]
 
-            Example - Multiple changes in one file:
+            EXAMPLE 1 - Multiple small changes in one file:
 
             [JavaScript]
             <<<<<<< SEARCH
@@ -158,39 +212,35 @@ def system_prompt
             >>>>>>> REPLACE
             [/JavaScript]
 
-            Example - CSS with multiple blocks:
+            EXAMPLE 2 - Breaking up large HTML changes:
 
-            [CSS]
+            [HTML]
             <<<<<<< SEARCH
-            .button { color: blue; }
+            <div class="header">
+              <div class="logo">
+                <img src="old-logo.png">
+              </div>
             =======
-            .button { color: red; }
+            <div class="header">
+              <div class="logo">
+                <img src="new-logo.png">
+              </div>
             >>>>>>> REPLACE
+
             <<<<<<< SEARCH
-            .text { font-size: 12px; }
+              <div class="navigation">
+                <ul>
+                  <li>Home</li>
+                  <li>Products</li>
             =======
-            .text { font-size: 16px; }
+              <div class="navigation">
+                <ul>
+                  <li>Home</li>
+                  <li>Services</li>
             >>>>>>> REPLACE
-            [/CSS]
-
-            Example - Non contiguous search in CSS (replace most CSS with new CSS)
+            [/HTML]
 
-            Original CSS:
-
-            [CSS]
-            body {
-              color: red;
-            }
-            .button {
-              color: blue;
-            }
-            .alert {
-              background-color: green;
-            }
-            .alert2 {
-              background-color: green;
-            }
-            [/CSS]
+            EXAMPLE 3 - Non-contiguous search in CSS:
 
             [CSS]
             <<<<<<< SEARCH
@@ -203,23 +253,13 @@ def system_prompt
               color: red;
             }
             >>>>>>> REPLACE
-
-            RESULT:
-
-            [CSS]
-            body {
-              color: red;
-            }
-            .alert2 {
-              background-color: green;
-            }
             [/CSS]
 
-            Example - full HTML replacement:
+            EXAMPLE 4 - Full HTML replacement:
 
             [HTML]
             <div>something old</div>
-            <div>another somethin old</div>
+            <div>another something old</div>
             [/HTML]
 
             output:
@@ -227,13 +267,6 @@ def system_prompt
             [HTML]
             <div>something new</div>
             [/HTML]
-
-            result:
-            [HTML]
-            <div>something new</div>
-            [/HTML]
-
-
           PROMPT
         end