diff --git a/.cargo/config.toml b/.cargo/config.toml index d84abd5..329fff5 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -1,11 +1,11 @@ -[target.armv7-unknown-linux-gnueabihf] -linker = "/sysroots/x86_64-codexsdk-linux/usr/bin/arm-remarkable-linux-gnueabi/arm-remarkable-linux-gnueabi-gcc" -rustflags = [ - "-C", "link-arg=-march=armv7-a", - "-C", "link-arg=-marm", - "-C", "link-arg=-mfpu=neon", - "-C", "link-arg=-mfloat-abi=hard", - "-C", "link-arg=-mcpu=cortex-a9", - "-C", "link-arg=--sysroot=/sysroots/cortexa7hf-neon-remarkable-linux-gnueabi", -] +# [target.armv7-unknown-linux-gnueabihf] +# linker = "/sysroots/x86_64-codexsdk-linux/usr/bin/arm-remarkable-linux-gnueabi/arm-remarkable-linux-gnueabi-gcc" +# rustflags = [ +# "-C", "link-arg=-march=armv7-a", +# "-C", "link-arg=-marm", +# "-C", "link-arg=-mfpu=neon", +# "-C", "link-arg=-mfloat-abi=hard", +# "-C", "link-arg=-mcpu=cortex-a9", +# "-C", "link-arg=--sysroot=/sysroots/cortexa7hf-neon-remarkable-linux-gnueabi", +# ] diff --git a/Cargo.lock b/Cargo.lock index 23e6877..9c806a1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -23,6 +23,15 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + [[package]] name = "anstream" version = "0.6.15" @@ -183,6 +192,8 @@ version = "1.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "812acba72f0a070b003d3697490d2b55b837230ae7c6c6497f05cc2ddbb8d938" dependencies = [ + "jobserver", + "libc", "shlex", ] @@ -192,6 +203,26 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "clang" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84c044c781163c001b913cd018fc95a628c50d0d2dfea8bca77dad71edb16e37" +dependencies = [ + "clang-sys", + "libc", +] + +[[package]] +name = "clang-sys" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +dependencies = [ + "glob", + "libc", +] + [[package]] name = "clap" version = "4.5.19" @@ -305,6 +336,12 @@ version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77c90badedccf4105eca100756a0b1289e191f6fcbdadd3cee1d2f614f97da8f" +[[package]] +name = "dunce" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" + [[package]] name = "either" version = "1.13.0" @@ -434,6 +471,7 @@ dependencies = [ "dotenv", "evdev", "image", + "opencv", "resvg", "serde", "serde_json", @@ -457,6 +495,12 @@ version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32085ea23f3234fc7846555e85283ba4de91e21016dc0455a16286d87a292d64" +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + [[package]] name = "half" version = "2.4.1" @@ -535,6 +579,15 @@ version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +[[package]] +name = "jobserver" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" +dependencies = [ + "libc", +] + [[package]] name = "jpeg-decoder" version = "0.3.1" @@ -683,6 +736,41 @@ dependencies = [ "portable-atomic", ] +[[package]] +name = "opencv" +version = "0.93.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff94f73f156cdcad77336a9dff19cd1737e5463faaa9e69424c6ee1248eb58df" +dependencies = [ + "cc", + "dunce", + "jobserver", + "libc", + "num-traits", + "once_cell", + "opencv-binding-generator", + "pkg-config", + "semver", + "shlex", + "vcpkg", + "windows", +] + +[[package]] +name = "opencv-binding-generator" +version = "0.93.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cef95a75b87e0f8052af822f04d6fbc48c414907d01d33fd5f3f4184f10f896a" +dependencies = [ + "clang", + "clang-sys", + "dunce", + "once_cell", + "percent-encoding", + "regex", + "shlex", +] + [[package]] name = "parking_lot" version = "0.12.3" @@ -724,6 +812,12 @@ version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" +[[package]] +name = "pkg-config" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" + [[package]] name = "png" version = "0.17.14" @@ -811,6 +905,35 @@ dependencies = [ "bitflags 2.6.0", ] +[[package]] +name = "regex" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + [[package]] name = "resvg" version = "0.44.0" @@ -926,6 +1049,12 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "semver" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" + [[package]] name = "serde" version = "1.0.210" @@ -1305,6 +1434,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + [[package]] name = "version_check" version = "0.9.5" @@ -1332,6 +1467,70 @@ version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53a85b86a771b1c87058196170769dd264f66c0782acf1ae6cc51bfd64b39082" +[[package]] +name = "windows" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd04d41d93c4992d421894c18c8b43496aa748dd4c081bac0dc93eb0489272b6" +dependencies = [ + "windows-core", + "windows-targets", +] + +[[package]] +name = "windows-core" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ba6d44ec8c2591c134257ce647b7ea6b20335bf6379a27dac5f1641fcf59f99" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-result", + "windows-strings", + "windows-targets", +] + +[[package]] +name = "windows-implement" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bbd5b46c938e506ecbce286b6628a02171d56153ba733b6c741fc627ec9579b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053c4c462dc91d3b1504c6fe5a726dd15e216ba718e84a0e46a88fbe5ded3515" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-result" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-strings" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" +dependencies = [ + "windows-result", + "windows-targets", +] + [[package]] name = "windows-sys" version = "0.52.0" diff --git a/Cargo.toml b/Cargo.toml index 234c163..6fbc4dd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,6 +2,7 @@ name = "ghostwriter" version = "0.1.0" edition = "2021" + [dependencies] tokio = { version = "1.28", features = ["full"] } ureq = { version = "2.6", features = ["json"] } @@ -15,3 +16,5 @@ clap = { version = "4.0", features = ["derive"] } evdev = "0.12.0" resvg = "0.44.0" dotenv = "0.15" +opencv = "0.93.4" + diff --git a/Cross.toml b/Cross.toml new file mode 100644 index 0000000..e44e02f --- /dev/null +++ b/Cross.toml @@ -0,0 +1,11 @@ +# [build] + +# additional commands to run prior to building the package +# pre-build = [ +# "dpkg --add-architecture $CROSS_DEB_ARCH", +# # "apt-get update && apt-get --assume-yes install libopencv-dev:$CROSS_DEB_ARCH libclang-dev:$CROSS_DEB_ARCH clang:$CROSS_DEB_ARCH" +# "apt-get update && apt-get --assume-yes install libopencv-dev:$CROSS_DEB_ARCH" +# ] + +[target.armv7-unknown-linux-gnueabihf] +pre-build = "./cross-deps.sh" diff --git a/README.md b/README.md index 0ed1db1..df1ad77 100644 --- a/README.md +++ b/README.md @@ -151,20 +151,28 @@ scp target/armv7-unknown-linux-gnueabihf/release/ghostwriter remarkable: ## Scratch ``` + # Record an evaluation on the device +./ghostwriter --output-file tmp/result.out --model-output-file tmp/result.json --save-screenshot tmp/input.png --no-draw-progress --save-bitmap tmp/result.png claude-assist # On local, copy the evaluation to local and then put it into a folder +export evaluation_name=tic_tac_toe_1 +rm tmp/* scp -r remarkable:tmp/ ./ -mkdir evaluations/tic_tac_toe_1 -mv tmp/* evaluations/tic_tac_toe_1 +mkdir -p evaluations/$evaluation_name +mv tmp/* evaluations/$evaluation_name # Run an evaluation -./target/release/ghostwriter --input-png evaluations/x_in_box/input.png --output-file tmp/result.out --model-output-file tmp/result.json --save-bitmap tmp/result.png --no-draw --no-draw-progress --no-loop claude-assist +./target/release/ghostwriter --input-png evaluations/$evaluation_name/input.png --output-file tmp/result.out --model-output-file tmp/result.json --save-bitmap tmp/result.png --no-draw --no-draw-progress --no-loop claude-assist # Layer the input and output -magick \( evaluations/x_in_box/input.png -colorspace RGB \) \( tmp/result.png -type truecolormatte -transparent white -fill red -colorize 100 \) -compose Over -composite tmp/merged-output.png +magick \( evaluations/$evaluation_name/input.png -colorspace RGB \) \( tmp/result.png -type truecolormatte -transparent white -fill red -colorize 100 \) -compose Over -composite tmp/merged-output.png ``` Resize from 1872x1404 to 1268x951 px (I think claude does it for us already) OR maybe 768x1024 is better. Same ratio, but "standard" XGA +Now I've added opencv-rust as a dependency to pre-segment images. To get this in ubuntu I had to do `sudo apt install libopencv-dev libclang-dev clang`. + +comment on https://www.lukasmoro.com/paper + diff --git a/build.sh b/build.sh index 8d83af1..300a490 100755 --- a/build.sh +++ b/build.sh @@ -6,6 +6,9 @@ remarkable="${1:-remarkable}" if [ "$1" == "local" ]; then cargo build --release else + # export PKG_CONFIG_SYSROOT_DIR="/sysroots/cortexa7hf-neon-remarkable-linux-gnueabi" + # export PKG_CONFIG_PATH="/sysroots/cortexa7hf-neon-remarkable-linux-gnueabi/usr/lib/pkgconfig" + # export PKG_CONFIG_ALLOW_CROSS=1 cross build --release --target=armv7-unknown-linux-gnueabihf && scp target/armv7-unknown-linux-gnueabihf/release/ghostwriter root@$remarkable: fi diff --git a/cross-deps.sh b/cross-deps.sh new file mode 100755 index 0000000..3812962 --- /dev/null +++ b/cross-deps.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +apt-get install -y \ + libssl-dev \ + libopencv-dev \ + clang \ + libclang-dev + diff --git a/src/main.rs b/src/main.rs index 8ddf4c2..fba6329 100644 --- a/src/main.rs +++ b/src/main.rs @@ -26,6 +26,9 @@ use crate::touch::Touch; mod util; use crate::util::{svg_to_bitmap, write_bitmap_to_file}; +mod segmenter; +use crate::segmenter::analyze_image; + const REMARKABLE_WIDTH: u32 = 768; const REMARKABLE_HEIGHT: u32 = 1024; @@ -119,7 +122,7 @@ fn keyboard_test() -> Result<()> { fn ghostwriter(args: &Args) -> Result<()> { let mut keyboard = Keyboard::new(args.no_draw, args.no_draw_progress); let mut pen = Pen::new(args.no_draw); - let mut touch = Touch::new(); + let mut touch = Touch::new(args.no_draw); // Default to regular text size keyboard.key_cmd_body()?; @@ -151,12 +154,6 @@ fn ghostwriter(args: &Args) -> Result<()> { return Ok(()); } - if args.no_draw { - println!("Skipping draw_text and draw_svg due to --no-draw flag"); - keyboard.progress_end()?; - return Ok(()); - } - let api_key = std::env::var("OPENAI_API_KEY")?; let tools = json!([ { @@ -202,7 +199,7 @@ fn ghostwriter(args: &Args) -> Result<()> { }, "svg": { "type": "string", - "description": "SVG data to be rendered. This is drawn on top of the input image, and should be the same size as the input image (1404x1872 px). The display can only show black and white. Try to place the output in an integrated position. Use the `Noto Sans` font-family when you are showing text. Do not use a style tag tag. Do not use any fill colors or gradients or transparency or shadows. Do include the xmlns in the main svg tag." + "description": "SVG data to be rendered. This is drawn on top of the input image, and should be the same size as the input image (768x1024 px). The display can only show black and white. Try to place the output in an integrated position. Use the `Noto Sans` font-family when you are showing text. Do not use a style tag tag. Do not use any fill colors or gradients or transparency or shadows. Do include the xmlns in the main svg tag." } }, "required": ["input_description", "output_description", "svg"] @@ -325,7 +322,8 @@ fn draw_svg(svg_data: &str, keyboard: &mut Keyboard, pen: &mut Pen, save_bitmap: fn claude_assist(args: &Args) -> Result<()> { let mut keyboard = Keyboard::new(args.no_draw, args.no_draw_progress); let mut pen = Pen::new(args.no_draw); - let mut touch = Touch::new(); + let mut touch = Touch::new(args.no_draw); + // Default to regular text size keyboard.key_cmd_body()?; @@ -357,6 +355,12 @@ fn claude_assist(args: &Args) -> Result<()> { return Ok(()); } + // Analyze the image to get bounding box descriptions + let segmentation_description = match analyze_image(&args.input_png.clone().unwrap_or("screenshot.png".to_string())) { + Ok(description) => description, + Err(e) => format!("Error analyzing image: {}", e), + }; + let api_key = std::env::var("ANTHROPIC_API_KEY")?; let tools = json!([ { @@ -382,32 +386,67 @@ fn claude_assist(args: &Args) -> Result<()> { } }, { - "name": "draw_svg", - "description": "Draw an SVG to the screen using simulated pen input. The input_description and output_description are used to build a plan for the actual output.", - "input_schema": { - "type": "object", - "properties": { - "input_description": { - "type": "string", - "description": "Description of what was detected in the input image. Include the exact pixel x, y, width, height bounding box coordinates of everything." + "name": "draw_svg", + "description": "Draw an SVG to the screen using simulated pen input. The input_description and output_description are used to build a plan for the actual output.", + "input_schema": { + "type": "object", + "properties": { + "input_description": { + "type": "string", + "description": "Description of what was detected in the input image. Include the exact pixel x, y, width, height bounding box coordinates of everything." + }, + "input_features": { + "type": "array", + "description": "A list of exact bounding boxes for important features of the input", + "items": { + "type": "object", + "description": "A specific feature and bounding box", + "properties": { + "feature_description": { + "type": "string", + "description": "Description of the feature" }, - "output_description": { - "type": "string", - "description": "Description of what will be drawn. Include the exact pixel x, y, width, height bounding box coordinates of what you want to draw." + "top_left_x_px": { + "type": "integer", + "description": "The top-left x coordinate in px" }, - "svg": { - "type": "string", - "description": "SVG data to be rendered. This is drawn on top of the input image, and should be the same size as the input image (1404x1872 px). The display can only show black and white. Try to place the output in an integrated position. Use the `Noto Sans` font-family when you are showing text. Do not use a style tag tag. Do not use any fill colors or gradients or transparency or shadows. Do include the xmlns in the main svg tag." + "top_left_y_px": { + "type": "integer", + "description": "The top-left y coordinate in px" + }, + "bottom_right_x_px": { + "type": "integer", + "description": "The bottom-right x coordinate in px" + }, + "bottom_right_y_px": { + "type": "integer", + "description": "The bottom-right y coordinate in px" } - }, - "required": ["input_description", "output_description", "svg"] - } + }, + "required": ["feature_description", "top_left_x_px", "top_left_y_px", "bottom_right_x_px", "bottom_right_y_px"] + } + }, + "output_description": { + "type": "string", + "description": "Description of what will be drawn. Include the exact pixel x, y, width, height bounding box coordinates of what you want to draw." + }, + "svg": { + "type": "string", + "description": "SVG data to be rendered. This is drawn on top of the input image, and should be the same size as the input image (768x1024 px). The display can only show black and white. Try to place the output in an integrated position. Use the `Noto Sans` font-family when you are showing text. Do not use a style tag tag. Do not use any fill colors or gradients or transparency or shadows. Do include the xmlns in the main svg tag." + } + }, + "required": [ + "input_description", + "input_features", + "output_description", + "svg" + ] + } } ]); let body = json!({ - // "model": "args.model, - "model": "claude-3-5-sonnet-20241022", + "model": "claude-3-5-sonnet-latest", "max_tokens": 5000, "messages": [{ "role": "user", @@ -416,6 +455,10 @@ fn claude_assist(args: &Args) -> Result<()> { "type": "text", "text": "You are a helpful assistant. You live inside of a remarkable2 notepad, which has a 768x1024 px sized screen which can only display grayscale. Your input is the current content of the screen, which may contain content written by the user or previously written by you (the assistant). Look at this content, interpret it, and respond to the content. The content will contain handwritten notes, diagrams, and maybe typewritten text. Respond by calling a tool. Call draw_text to output text which will be sent using simulated keyboard input. Call draw_svg to respond with an SVG drawing which will be drawn on top of the existing content. Try to place the output on the screen at coordinates that make sense. If you need to place text at a very specific location, you should output an SVG instead of keyboard text." }, + { + "type": "text", + "text": format!("Here are interesting regions based on an automatic segmentation algorithm. Use them to help identify the exact location of interesting features.\n\n{}", segmentation_description) + }, { "type": "image", "source": { @@ -448,7 +491,6 @@ fn claude_assist(args: &Args) -> Result<()> { println!("Error: {}", code); let json: serde_json::Value = response.into_json()?; println!("Response: {}", json); - // return Err(Error::from(code)); return Err(anyhow::anyhow!("API ERROR")) } Err(_) => { @@ -460,6 +502,11 @@ fn claude_assist(args: &Args) -> Result<()> { let json: serde_json::Value = response.into_json()?; println!("Response: {}", json); + + if let Some(model_output_file) = &args.model_output_file { + std::fs::write(model_output_file, json.to_string())?; + } + let tool_calls = &json["content"]; if let Some(tool_call) = tool_calls.get(0) { @@ -477,13 +524,6 @@ fn claude_assist(args: &Args) -> Result<()> { if !args.no_draw { draw_text(text, &mut keyboard)?; } - if let Some(model_output_file) = &args.model_output_file { - let params = json!({ - "function": function_name, - "arguments": arguments - }); - std::fs::write(model_output_file, params.to_string())?; - } } "draw_svg" => { let svg_data = arguments["svg"].as_str().unwrap(); @@ -491,13 +531,6 @@ fn claude_assist(args: &Args) -> Result<()> { std::fs::write(output_file, svg_data)?; } draw_svg(svg_data, &mut keyboard, &mut pen, args.save_bitmap.as_ref(), args.no_draw)?; - if let Some(model_output_file) = &args.model_output_file { - let params = json!({ - "function": function_name, - "arguments": arguments - }); - std::fs::write(model_output_file, params.to_string())?; - } } _ => { keyboard.progress_end()?; diff --git a/src/segmenter.rs b/src/segmenter.rs new file mode 100644 index 0000000..8a7bb6c --- /dev/null +++ b/src/segmenter.rs @@ -0,0 +1,225 @@ +use opencv::{ + prelude::*, + core::{Point, Rect, Scalar, Vec4i, Mat, MatTraitConst}, + imgproc::{ + self, CHAIN_APPROX_SIMPLE, RETR_EXTERNAL, + connected_components, ConnectedComponentsTypes, + }, + imgcodecs, + Error as OpenCvError, +}; +use serde::Serialize; + +#[derive(Debug, Serialize)] +pub struct Region { + pub bounds: (i32, i32, i32, i32), // x, y, width, height + pub center: (i32, i32), + pub area: f64, + pub contour_points: Vec<(i32, i32)>, +} + +#[derive(Debug, Serialize)] +pub struct SegmentationResult { + pub regions: Vec, + pub image_size: (i32, i32), +} + +pub struct ImageAnalyzer { + min_region_size: f64, + max_regions: usize, +} + +impl ImageAnalyzer { + pub fn new(min_region_size: f64, max_regions: usize) -> Self { + Self { + min_region_size, + max_regions, + } + } + + pub fn analyze_image_file(&self, image_path: &str) -> Result { + println!("Reading image from: {}", image_path); + + // Read image + let image = imgcodecs::imread(image_path, imgcodecs::IMREAD_COLOR)?; + let (height, width) = (image.rows(), image.cols()); + println!("Image loaded: {}x{}", width, height); + + // Convert to grayscale + let mut gray = Mat::default(); + imgproc::cvt_color(&image, &mut gray, imgproc::COLOR_BGR2GRAY, 0)?; + + // Apply adaptive threshold + let mut binary = Mat::default(); + imgproc::adaptive_threshold( + &gray, + &mut binary, + 255.0, + imgproc::ADAPTIVE_THRESH_GAUSSIAN_C, + imgproc::THRESH_BINARY_INV, + 11, + 2.0, + )?; + + // Find contours + let mut contours = opencv::types::VectorOfVectorOfPoint::new(); + + imgproc::find_contours( + &binary, + &mut contours, + RETR_EXTERNAL as i32, + CHAIN_APPROX_SIMPLE as i32, + Point::new(0, 0), + )?; + + println!("Found {} contours", contours.len()); + + // Process regions + let mut regions = Vec::new(); + let min_area = (width * height) as f64 * self.min_region_size; + + for i in 0..contours.len() { + let contour = contours.get(i)?; + let area = imgproc::contour_area(&contour, false)?; + + if area >= min_area { + let bounds = imgproc::bounding_rect(&contour)?; + let moments = imgproc::moments(&contour, false)?; + + // Calculate centroid + let center_x = (moments.m10 / moments.m00) as i32; + let center_y = (moments.m01 / moments.m00) as i32; + + // Convert contour points to Vec + let contour_points: Vec<(i32, i32)> = contour.iter() + .map(|p| (p.x, p.y)) + .collect(); + + regions.push(Region { + bounds: (bounds.x, bounds.y, bounds.width, bounds.height), + center: (center_x, center_y), + area, + contour_points, + }); + } + } + + // Sort by area and limit number of regions + regions.sort_by(|a, b| b.area.partial_cmp(&a.area).unwrap()); + regions.truncate(self.max_regions); + + println!("Processed {} significant regions", regions.len()); + + Ok(SegmentationResult { + regions, + image_size: (width, height), + }) + } + + pub fn analyze_with_connected_components(&self, image_path: &str) + -> Result { + let image = imgcodecs::imread(image_path, imgcodecs::IMREAD_COLOR)?; + let mut gray = Mat::default(); + imgproc::cvt_color(&image, &mut gray, imgproc::COLOR_BGR2GRAY, 0)?; + + let mut binary = Mat::default(); + imgproc::threshold(&gray, &mut binary, 127.0, 255.0, imgproc::THRESH_BINARY)?; + + // Connected components with stats + let mut labels = Mat::default(); + let mut stats = Mat::default(); + let mut centroids = Mat::default(); + + connected_components( + &binary, + &mut labels, + 8, + opencv::core::CV_32S, + )?; + + Ok(labels) + } + + pub fn generate_description(&self, result: &SegmentationResult) -> String { + let mut description = format!( + "Image size: {}x{}\nDetected {} regions:\n\n", + result.image_size.0, + result.image_size.1, + result.regions.len() + ); + + for (i, region) in result.regions.iter().enumerate() { + description.push_str(&format!( + "Region {}:\n\ + - Position: ({}, {})\n\ + - Size: {}x{}\n\ + - Center: ({}, {})\n\ + - Area: {:.2} pixels\n\ + - Relative position: {:.2}%, {:.2}%\n\n", + i + 1, + region.bounds.0, + region.bounds.1, + region.bounds.2, + region.bounds.3, + region.center.0, + region.center.1, + region.area, + (region.center.0 as f64 / result.image_size.0 as f64) * 100.0, + (region.center.1 as f64 / result.image_size.1 as f64) * 100.0, + )); + } + + description + } +} + +pub fn analyze_image(image_path: &str) -> Result { + let analyzer = ImageAnalyzer::new(0.01, 10); + + println!("\n=== Contour-based Analysis ==="); + let description = match analyzer.analyze_image_file(image_path) { + Ok(result) => analyzer.generate_description(&result), + Err(e) => format!("Error analyzing image with contours: {}", e), + }; + + println!("\n=== Connected Components Analysis ==="); + match analyzer.analyze_with_connected_components(image_path) { + Ok(labels) => { + println!("Label matrix size: {}x{}", labels.rows(), labels.cols()); + + // Get the label data + match unsafe { labels.data_typed::() } { + Ok(label_data) => { + // Print a small section of the label matrix as a sample + println!("\nSample of label matrix (top-left 10x10 if available):"); + let rows = std::cmp::min(10, labels.rows()); + let cols = std::cmp::min(10, labels.cols()); + + for i in 0..rows { + for j in 0..cols { + let idx = (i * labels.cols() + j) as usize; + print!("{:3} ", label_data[idx]); + } + println!(); + } + + // Count occurrences of each label + let mut label_counts = std::collections::HashMap::new(); + for &label in label_data.iter() { + *label_counts.entry(label).or_insert(0) += 1; + } + + println!("\nLabel counts:"); + for (&label, count) in label_counts.iter() { + println!("Label {}: {} pixels", label, count); + } + }, + Err(e) => println!("Error accessing label data: {}", e), + } + }, + Err(e) => println!("Error analyzing image with connected components: {}", e), + } + + println!("\nAnalysis complete"); + Ok(description) +} diff --git a/src/touch.rs b/src/touch.rs index 9a5c885..7340ffa 100644 --- a/src/touch.rs +++ b/src/touch.rs @@ -2,21 +2,26 @@ use anyhow::Result; use evdev::Device; pub struct Touch { - device: Device, + device: Option, } impl Touch { - pub fn new() -> Self { - let device = Device::open("/dev/input/event2").unwrap(); + pub fn new(no_touch: bool) -> Self { - Self { device: device } + let device = if no_touch { + None + } else { + Some(Device::open("/dev/input/event2").unwrap()) + }; + + Self { device } } pub fn wait_for_trigger(&mut self) -> Result<()> { let mut position_x = 0; let mut position_y = 0; loop { - for event in self.device.fetch_events().unwrap() { + for event in self.device.as_mut().unwrap().fetch_events().unwrap() { if event.code() == 53 { position_x = event.value(); } diff --git a/src/util.rs b/src/util.rs index 0837b69..f8aeec1 100644 --- a/src/util.rs +++ b/src/util.rs @@ -16,7 +16,7 @@ pub fn svg_to_bitmap(svg_data: &str, width: u32, height: u32) -> Result tree, Err(e) => { println!("Error parsing SVG: {}. Using fallback SVG.", e); - let fallback_svg = r#"ERROR!"#; + let fallback_svg = r#"ERROR!"#; Tree::from_str(fallback_svg, &opt)? } };