Skip to content

Commit

Permalink
Add Anthropic/Claude!
Browse files Browse the repository at this point in the history
  • Loading branch information
awwaiid committed Nov 8, 2024
1 parent bcf1acd commit 70e8fe4
Show file tree
Hide file tree
Showing 2 changed files with 169 additions and 0 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,12 @@ Draw some stuff on your screen, and then trigger the assistant by *touching/tapp
* Switch to providing some tools -- draw_text and draw_svg
* This should make it more compatible with Anthropic?
* More immediately, this means now there is the one overall assistant and it decides to draw back keyboard text or SVG drawing
* **2024-11-07** - Claude! (Anthropic)
* More shuffling to start to isolate the API
* ... and now I added Claude/Anthropic!
* It is able to use an almost identical tool-use setup, so I should be able to merge the two
* So far it seems to like drawing a bit more, but it is not great at drawing and not much better at spacial awareness
* Maybe next on the queue will be augmenting spacial awareness through some image pre-processing and result positioning. Like detect bounding boxes, segments, etc, feed that into the model, and have the model return an array of svgs and where they should be positioned. Maybe.

## Ideas
* [DONE] Matt showed me his iOS super calc that just came out, take inspiration from that!
Expand Down
163 changes: 163 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use anyhow::Result;
use ureq::Error;

use serde_json::json;

Expand Down Expand Up @@ -49,13 +50,15 @@ struct Args {
enum Command {
KeyboardTest,
TextAssist,
ClaudeAssist,
}

fn main() -> Result<()> {
let args = Args::parse();

match &args.command {
Some(Command::KeyboardTest) => keyboard_test(),
Some(Command::ClaudeAssist) => claude_assist(&args),
Some(Command::TextAssist) | None => ghostwriter(&args),
}
}
Expand Down Expand Up @@ -239,3 +242,163 @@ fn draw_svg(svg_data: &str, keyboard: &mut Keyboard, pen: &mut Pen) -> Result<()
keyboard.progress_end()?;
Ok(())
}


fn claude_assist(args: &Args) -> Result<()> {
let mut keyboard = Keyboard::new();
let mut pen = Pen::new();
let mut touch = Touch::new();

// Default to regular text size
keyboard.key_cmd_body()?;

loop {
println!("Waiting for trigger (hand-touch in the upper-right corner)...");
touch.wait_for_trigger()?;

keyboard.progress()?;

// TODO: Show progress indicator using the keyboard in all cases? Some other cool doodle?

let screenshot = Screenshot::new()?;
screenshot.save_image("tmp/screenshot.png")?;
let base64_image = screenshot.base64()?;
keyboard.progress()?;

if args.no_submit {
println!("Image not submitted to OpenAI due to --no-submit flag");
keyboard.progress_end()?;
return Ok(());
}

let api_key = std::env::var("ANTHROPIC_API_KEY")?;
let tools = json!([
{
"name": "draw_text",
"description": "Draw text to the screen using simulated keyboard input. The input_description and output_description are used to build a plan for the actual output.",
"input_schema": {
"type": "object",
"properties": {
"input_description": {
"type": "string",
"description": "Description of what was detected in the input image"
},
"output_description": {
"type": "string",
"description": "Description of what will be output"
},
"text": {
"type": "string",
"description": "Text to be written"
}
},
"required": ["input_description", "output_description", "text"]
}
},
{
"name": "draw_svg",
"description": "Draw an SVG to the screen using simulated pen input. The input_description and output_description are used to build a plan for the actual output.",
"input_schema": {
"type": "object",
"properties": {
"input_description": {
"type": "string",
"description": "Description of what was detected in the input image"
},
"output_description": {
"type": "string",
"description": "Description of what will be drawn"
},
"svg": {
"type": "string",
"description": "SVG data to be rendered. This is drawn on top of the input image, and should be the same size as the input image (1404x1872 px). The display can only show black and white. Try to place the output in an integrated position. Use the `Noto Sans` font-family when you are showing text. Do not use a style tag tag. Do not use any fill colors or gradients or transparency or shadows. Do include the xmlns in the main svg tag."
}
},
"required": ["input_description", "output_description", "svg"]
}
}
]);

let body = json!({
// "model": "args.model,
"model": "claude-3-5-sonnet-20241022",
"max_tokens": 5000,
"messages": [{
"role": "user",
"content": [
{
"type": "text",
"text": "You are a helpful assistant. You live inside of a remarkable2 notepad, which has a 1404x1872 sized screen which can only display grayscale. Your input is the current content of the screen, which may contain content written by the user or previously written by you (the assistant). Look at this content, interpret it, and respond to the content. The content will contain handwritten notes, diagrams, and maybe typewritten text. Respond by calling a tool. Call draw_text to output text which will be sent using simulated keyboard input. Call draw_svg to respond with an SVG drawing which will be drawn on top of the existing content. Try to place the output on the screen at coordinates that make sense. If you need to place text at a very specific location, you should output an SVG instead of keyboard text."
},
{
"type": "image",
"source": {
"type": "base64",
"media_type": "image/png",
"data": base64_image
}
}
]
}],
"tools": tools,
"tool_choice": {
"type": "any",
"disable_parallel_tool_use": true
}
});

keyboard.progress()?;


let raw_response = ureq::post("https://api.anthropic.com/v1/messages")
.set("x-api-key", api_key.as_str())
.set("anthropic-version", "2023-06-01")
.set("Content-Type", "application/json")
.send_json(&body);

let response = match raw_response {
Ok(response) => { response }
Err(Error::Status(code, response)) => {
println!("Error: {}", code);
let json: serde_json::Value = response.into_json()?;
println!("Response: {}", json);
// return Err(Error::from(code));
return Err(anyhow::anyhow!("API ERROR"))
}
Err(_) => {
return Err(anyhow::anyhow!("OTHER API ERROR"))
}
};

keyboard.progress()?;

let json: serde_json::Value = response.into_json()?;
println!("Response: {}", json);
let tool_calls = &json["content"];

if let Some(tool_call) = tool_calls.get(0) {
keyboard.progress()?;
let function_name = tool_call["name"].as_str().unwrap();
let arguments = &tool_call["input"];
keyboard.progress()?;

match function_name {
"draw_text" => {
let text = arguments["text"].as_str().unwrap();
draw_text(text, &mut keyboard)?;
}
"draw_svg" => {
let svg_data = arguments["svg"].as_str().unwrap();
draw_svg(svg_data, &mut keyboard, &mut pen)?;
}
_ => {
keyboard.progress_end()?;
return Err(anyhow::anyhow!("Unknown function called"))
}
}
} else {
keyboard.progress_end()?;
return Err(anyhow::anyhow!("No tool call found in response"))
}
}
}

0 comments on commit 70e8fe4

Please sign in to comment.