diff --git a/.bazelignore b/.bazelignore index c52920b24..8051e1bd7 100644 --- a/.bazelignore +++ b/.bazelignore @@ -11,5 +11,6 @@ bazel-remote-nativelink bazel-root bazel-testlogs bazel-nativelink +toolchain-examples local-remote-execution/generated-cc local-remote-execution/generated-java diff --git a/.github/styles/config/vocabularies/TraceMachina/accept.txt b/.github/styles/config/vocabularies/TraceMachina/accept.txt index 85e5b719b..a44016f24 100644 --- a/.github/styles/config/vocabularies/TraceMachina/accept.txt +++ b/.github/styles/config/vocabularies/TraceMachina/accept.txt @@ -4,9 +4,11 @@ Astro Bazel Cloudflare ELB +FFI GPUs Goma [Hh]ermeticity +JDK Kustomization LLD LLM @@ -30,7 +32,9 @@ TraceMachina Qwik Verilator Verilog +Zstandard alex +[Ee]xecution autoscaling blazingly bundler diff --git a/toolchain-examples/.bazelrc b/toolchain-examples/.bazelrc new file mode 100644 index 000000000..76ded3ff2 --- /dev/null +++ b/toolchain-examples/.bazelrc @@ -0,0 +1,17 @@ +# Don't use the host's default PATH and LD_LIBRARY_PATH. +build --incompatible_strict_action_env + +# Use rules_python's builtin script to emulate a bootstrap python. +build --@rules_python//python/config_settings:bootstrap_impl=script + +# Toolchain to verify remote execution with zig-cc. +build:zig-cc --platforms @zig_sdk//platform:linux_amd64 +build:zig-cc --extra_toolchains @zig_sdk//toolchain:linux_amd64_gnu.2.38 + +# Toolchain to verify remote execution with contrib/toolchains_llvm. +build:llvm --platforms=@toolchains_llvm//platforms:linux-x86_64 +build:llvm --extra_toolchains=@llvm_toolchain//:cc-toolchain-x86_64-linux + +# Java runtime to ensure hermeticity on the remote. +build:java --java_runtime_version=remotejdk_21 +build:java --tool_java_runtime_version=remotejdk_21 diff --git a/toolchain-examples/.bazelversion b/toolchain-examples/.bazelversion new file mode 100644 index 000000000..815da58b7 --- /dev/null +++ b/toolchain-examples/.bazelversion @@ -0,0 +1 @@ +7.4.1 diff --git a/toolchain-examples/MODULE.bazel b/toolchain-examples/MODULE.bazel new file mode 100644 index 000000000..60b86ff02 --- /dev/null +++ b/toolchain-examples/MODULE.bazel @@ -0,0 +1,88 @@ +module( + name = "toolchain-examples", + version = "0.0.0", + compatibility_level = 0, +) + +bazel_dep(name = "platforms", version = "0.0.10") + +# C++ +bazel_dep(name = "rules_cc", version = "0.0.17") + +# Java +bazel_dep(name = "rules_java", version = "8.5.1") + +java = use_extension("//java:extensions.bzl", "toolchains") +use_repo(java, "local_jdk") + +# Python +bazel_dep(name = "rules_python", version = "0.40.0") + +pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip") +pip.parse( + hub_name = "pip", + python_version = "3.12", + requirements_lock = "//:requirements_lock.txt", +) + +python = use_extension("@rules_python//python/extensions:python.bzl", "python") +python.toolchain( + is_default = True, + python_version = "3.12", +) + +use_repo(pip, "pip") + +# Go +bazel_dep(name = "rules_go", version = "0.50.1") + +# Rust +bazel_dep(name = "rules_rust", version = "0.54.1") + +# C++ toolchain via zig-cc. +# +# - Hermetic: YES +# - Speed: SLOW Optimized for size rather than speed of execution. +# - Download size: SMALL +# +# To test this toolchain, use for use with the config flag `--config=zig-cc`. +# +bazel_dep(name = "hermetic_cc_toolchain", version = "3.1.1") + +zig = use_extension("@hermetic_cc_toolchain//toolchain:ext.bzl", "toolchains") +use_repo(zig, "zig_sdk") + +# C++ toolchain via contrib/toolchains_llvm. +# +# - Hermetic: NO LLVM artifacts are dynamically linked and will +# depend on your host's glibc. +# - Speed: FAST Release build optimized for speed of execution. +# - Download size: LARGE Around 1.5 Gb for the linux toolchain. +# +# To test this toolchain, use for use with the config flag `--config=llvm`. +# +bazel_dep(name = "toolchains_llvm", version = "1.2.0") + +llvm = use_extension("@toolchains_llvm//toolchain/extensions:llvm.bzl", "llvm") +llvm.toolchain(llvm_version = "19.1.0") +use_repo(llvm, "llvm_toolchain") + +# Example projects to test compilation. + +# libcurl (C) +bazel_dep(name = "curl", version = "8.8.0.bcr.1") + +# Zstandard (C) +bazel_dep(name = "zstd", version = "1.5.6") + +# Abseil for C++ +bazel_dep(name = "abseil-cpp", version = "20240722.0.bcr.1") + +# Abseil for python +bazel_dep(name = "abseil-py", version = "2.1.0") + +# GRPC +bazel_dep(name = "grpc", version = "1.68.0") + +# Circl (Go, C++) +bazel_dep(name = "circl", version = "1.3.8") diff --git a/toolchain-examples/README.md b/toolchain-examples/README.md new file mode 100644 index 000000000..a4a7b5586 --- /dev/null +++ b/toolchain-examples/README.md @@ -0,0 +1,3 @@ +# Example builds to test remote execution + +See: [RBE examples docs](http://nativelink.com/docs/rbe/remote-execution-examples) diff --git a/toolchain-examples/cpp/BUILD.bazel b/toolchain-examples/cpp/BUILD.bazel new file mode 100644 index 000000000..c01c43afd --- /dev/null +++ b/toolchain-examples/cpp/BUILD.bazel @@ -0,0 +1,6 @@ +load("@rules_cc//cc:defs.bzl", "cc_test") + +cc_test( + name = "cpp", + srcs = ["main.cpp"], +) diff --git a/toolchain-examples/cpp/main.cpp b/toolchain-examples/cpp/main.cpp new file mode 100644 index 000000000..eacc287e6 --- /dev/null +++ b/toolchain-examples/cpp/main.cpp @@ -0,0 +1,5 @@ +#include + +auto main() -> int { + std::cout << "Hello, world!\n"; +} diff --git a/toolchain-examples/go/BUILD.bazel b/toolchain-examples/go/BUILD.bazel new file mode 100644 index 000000000..d6967a8f4 --- /dev/null +++ b/toolchain-examples/go/BUILD.bazel @@ -0,0 +1,6 @@ +load("@rules_go//go:def.bzl", "go_test") + +go_test( + name = "go", + srcs = ["main.go"], +) diff --git a/toolchain-examples/go/main.go b/toolchain-examples/go/main.go new file mode 100644 index 000000000..c04811917 --- /dev/null +++ b/toolchain-examples/go/main.go @@ -0,0 +1,7 @@ +package main + +import "fmt" + +func main() { + fmt.Println("hello world") +} diff --git a/toolchain-examples/java/BUILD.bazel b/toolchain-examples/java/BUILD.bazel new file mode 100644 index 000000000..07503f236 --- /dev/null +++ b/toolchain-examples/java/BUILD.bazel @@ -0,0 +1,6 @@ +load("@rules_java//java:defs.bzl", "java_test") + +java_test( + name = "HelloWorld", + srcs = ["HelloWorld.java"], +) diff --git a/toolchain-examples/java/HelloWorld.java b/toolchain-examples/java/HelloWorld.java new file mode 100644 index 000000000..40c596b82 --- /dev/null +++ b/toolchain-examples/java/HelloWorld.java @@ -0,0 +1,8 @@ +import org.junit.Test; + +public class HelloWorld { + @Test + public void testHelloWorld() { + System.out.println("Hello, World!"); + } +} diff --git a/toolchain-examples/nativelink-config.json b/toolchain-examples/nativelink-config.json new file mode 100644 index 000000000..b23f113b7 --- /dev/null +++ b/toolchain-examples/nativelink-config.json @@ -0,0 +1,146 @@ +{ + "stores": { + "AC_MAIN_STORE": { + "filesystem": { + "content_path": "/tmp/nativelink/data-worker-test/content_path-ac", + "temp_path": "/tmp/nativelink/data-worker-test/tmp_path-ac", + "eviction_policy": { + // 1gb. + "max_bytes": 1000000000, + } + } + }, + "WORKER_FAST_SLOW_STORE": { + "fast_slow": { + // "fast" must be a "filesystem" store because the worker uses it to make + // hardlinks on disk to a directory where the jobs are running. + "fast": { + "filesystem": { + "content_path": "/tmp/nativelink/data-worker-test/content_path-cas", + "temp_path": "/tmp/nativelink/data-worker-test/tmp_path-cas", + "eviction_policy": { + // 10gb. + "max_bytes": 10000000000, + } + } + }, + "slow": { + /// Discard data. + /// This example usage has the CAS and the Worker live in the same place, + /// so they share the same underlying CAS. Since workers require a fast_slow + /// store, we use the fast store as our primary data store, and the slow store + /// is just a noop, since there's no shared storage in this config. + "noop": {} + } + } + } + }, + "schedulers": { + "MAIN_SCHEDULER": { + "simple": { + "supported_platform_properties": { + "cpu_count": "minimum", + "memory_kb": "minimum", + "network_kbps": "minimum", + "cpu_arch": "exact", + "OSFamily": "priority", + "container-image": "priority", + } + } + } + }, + "workers": [{ + "local": { + "worker_api_endpoint": { + "uri": "grpc://127.0.0.1:50061", + }, + "cas_fast_slow_store": "WORKER_FAST_SLOW_STORE", + "upload_action_result": { + "ac_store": "AC_MAIN_STORE", + }, + "work_directory": "/tmp/nativelink/work", + "platform_properties": { + "cpu_count": { + "values": ["16"], + }, + "memory_kb": { + "values": ["500000"], + }, + "network_kbps": { + "values": ["100000"], + }, + "cpu_arch": { + "values": ["x86_64"], + }, + "OSFamily": { + "values": [""] + }, + "container-image": { + "values": [""] + }, + } + } + }], + "servers": [{ + "name": "public", + "listener": { + "http": { + "socket_address": "0.0.0.0:50051" + } + }, + "services": { + "cas": { + "": { + "cas_store": "WORKER_FAST_SLOW_STORE" + } + }, + "ac": { + "": { + "ac_store": "AC_MAIN_STORE" + } + }, + "execution": { + "": { + "cas_store": "WORKER_FAST_SLOW_STORE", + "scheduler": "MAIN_SCHEDULER", + } + }, + "capabilities": { + "": { + "remote_execution": { + "scheduler": "MAIN_SCHEDULER", + } + } + }, + "bytestream": { + "cas_stores": { + "": "WORKER_FAST_SLOW_STORE", + } + } + } + }, { + "name": "private_workers_servers", + "listener": { + "http": { + "socket_address": "0.0.0.0:50061" + } + }, + "services": { + "experimental_prometheus": { + "path": "/metrics" + }, + // Note: This should be served on a different port, because it has + // a different permission set than the other services. + // In other words, this service is a backend api. The ones above + // are a frontend api. + "worker_api": { + "scheduler": "MAIN_SCHEDULER", + }, + "admin": {}, + "health": {}, + } + }], + "global": { + "max_open_files": 512 + } +} diff --git a/toolchain-examples/python/BUILD.bazel b/toolchain-examples/python/BUILD.bazel new file mode 100644 index 000000000..9e18616bd --- /dev/null +++ b/toolchain-examples/python/BUILD.bazel @@ -0,0 +1,6 @@ +load("@rules_python//python:defs.bzl", "py_test") + +py_test( + name = "python", + srcs = ["python.py"], +) diff --git a/toolchain-examples/python/python.py b/toolchain-examples/python/python.py new file mode 100644 index 000000000..1007a14d7 --- /dev/null +++ b/toolchain-examples/python/python.py @@ -0,0 +1,3 @@ +import sys + +print(f"{sys.version_info.major}.{sys.version_info.minor}") diff --git a/toolchain-examples/rust/BUILD.bazel b/toolchain-examples/rust/BUILD.bazel new file mode 100644 index 000000000..5e1623d02 --- /dev/null +++ b/toolchain-examples/rust/BUILD.bazel @@ -0,0 +1,6 @@ +load("@rules_rust//rust:defs.bzl", "rust_test") + +rust_test( + name = "rust", + srcs = ["main.rs"], +) diff --git a/toolchain-examples/rust/main.rs b/toolchain-examples/rust/main.rs new file mode 100644 index 000000000..da3137b45 --- /dev/null +++ b/toolchain-examples/rust/main.rs @@ -0,0 +1,17 @@ +fn main() { + println!("Hello, World!"); +} + +fn get_greeting() -> String { + String::from("Hello, World!") +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_greeting() { + assert_eq!(get_greeting(), "Hello, World!"); + } +} diff --git a/web/platform/src/content/docs/docs/rbe/remote-execution-examples.mdx b/web/platform/src/content/docs/docs/rbe/remote-execution-examples.mdx new file mode 100644 index 000000000..b50d3cf91 --- /dev/null +++ b/web/platform/src/content/docs/docs/rbe/remote-execution-examples.mdx @@ -0,0 +1,344 @@ +--- +title: Classic remote execution examples +description: Guidance on testing remote execution capabilities. +--- + +Debugging remote builds can be tricky. These examples provide builds that you +can use to test the remote execution capabilities of your worker image. + +:::caution +Most builds don't work out of the box when running under remote execution, +including some builds in these examples. + +None of these builds are "recommended" targets. Some invocations might have bugs +and raise build errors. Use these tests purely to get a better picture of what +does and doesn't work with your remote execution setup. +::: + +## Getting the test sources + +All examples are in a single Bazel module at [`nativelink/toolchain-examples`](https://github.com/TraceMachina/tree/main/toolchain-examples). + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + + + + If you haven't set up Nix yet, consider consulting the [local development setup guide](https://www.nativelink.com/docs/contribute/guidelines#local-development-setup) + guide. Then move to the `toolchain-examples` directory: + + ```bash + git clone https://github.com/TraceMachina/nativelink + cd nativelink/toolchain-examples + + # If you haven't set up `direnv`, remember to activate the Nix flake + # manually via `nix develop`. + ``` + + + If you running outside of Nix, install [bazelisk](https://github.com/bazelbuild/bazelisk/tree/master). Then move to the `toolchain-examples` directory: + + ```bash + git clone https://github.com/TraceMachina/nativelink + cd nativelink/toolchain-examples + ``` + + + + +:::caution +At the moment all examples assume that your host is an `x86_64-linux` system. +::: + +## Preparing the remote execution infrastructure + + + + Port-forward your NativeLink cas/scheduler service to `localhost:50051`: + + ```bash + kubectl port-forward svc/YOURSERVICE 50051 + ``` + + + Likely the most straightforward way to test a remote execution image is by + creating a custom "test image" that you debug locally. If you have an + existing Dockerfile, here is what you need to adjust to test remote + execution against a locally running worker: + + ```dockerfile + # INSERT YOUR EXISTING DOCKERFILE CONTENTS HERE. + + # ... + + # Append something similar to the section below. We assume that you've built + # nativelink at a recent commit via + # `nix build github:Tracemachina/nativelink`. + # + # Then copy the executable and the `nativelink-config.json` from the + # `toolchain-examples` directory into the image and set the entrypoint to + # nativelink with that config. + + COPY nativelink /usr/bin/nativelink + COPY nativelink-config.json /etc/nativelink-config.json + + RUN chmod +x /usr/bin/nativelink + + ENTRYPOINT ["/usr/bin/nativelink"] + CMD ["/etc/nativelink-config.json"] + ``` + + Then build your image and push it to your localhost: + + ```bash + docker build . \ + -t rbetests:local + ``` + + You can now run the remote execution image locally and run builds against + it: + + ```bash + docker run \ + -e RUST_LOG=info \ + -p 50051:50051 \ + rbetests:local + ``` + + :::caution + Don't use the image you created here as worker for your cloud deployments. + Your worker should **not** bundle the nativelink executable. Instead, the + cloud deployments will automatically inject nativelink with an appropriate + config into your worker pods. + ::: + + + +All future invocations may now use the `--remote_cache=grpc://localhost:50051` +and `--remote_executor=grpc://localhost:50051` flags to send builds to the +running container. + +## Available toolchain configurations + +This Bazel module comes with some commonly used toolchains that you can enable +via `--config` flags. See the `.bazelrc` file in the `toolchain-examples` +directory for details. Here are your options: + +| Config | Hermetic | Size | Description | +| - | - | - | - | +| `zig-cc` | yes | ~100Mb | Hermetic, but slow. The intended use for this toolchain are projects that need a baseline C++ toolchain, but aren't "real" C++ projects, such as Go projects with a limited number of C FFIs. | +| `llvm` | no | ~1.5Gb | Not hermetic, but fast and standardized. This toolchain tends to be safe to use for C++ projects as long as you don't require full hermeticity. Your remote execution image needs to bundle `glibc <= 2.34` for this toolchain to work. | +| `java` | yes | ? | This sets the JDK to use a remote JDK. Use this one for Java. | + +### Notes on how to register your toolchains + +:::danger +Some toolchains come with a "register_toolchains" function. **DON'T USE IT IN +YOUR `MODULE.bazel` FILE**. +::: + +Toolchains tend to be complex dependencies and you'll almost always have bugs in +your toolchain that are build-breaking for some users. If you register your +toolchain in your `MODULE.bazel` it'll turn such bugs into hard errors that +might require deep incisions into your toolchain configuration to fix them. + +Instead, register platforms and toolchains in your `.bazelrc` file. This way you +give your users the option to opt out of your default toolchain and provide +their own. For instance: + +```bash +build:sometoolchain --platforms @sometoolchain//TODO +build:sometoolchain --extra_toolchains @sometoolchain//TODO +``` + +Now `--config=sometoolchain` is your happy path, but you keep the ability to +omit the flag so that if your happy path doesn't work you still have the ability +to build with "unsupported" toolchains. + +All examples below require some sort of `--config` flag to work with remote +execution. + +## Minimal example targets + +Examples to test whether your worker can function at all. + +:::important +Since the toolchains used here tend to focus on ease of use rather than +performance, expect build times of several minutes even for a small "Hello +World" program. + +Keep in mind that some remote toolchains first fetch tools to the executor. This +can take several minutes and might look like a slow compile action. +::: + +:::caution +Depending on your store setup, you might need to add one of +`--digest_function=blake3` or `--digest_function=sha256` to your Bazel +invocation. Failing to do so might cause errors about mismatching or +nonexistent artifact hashes. +::: + +### C and C++ + + + + ```bash + bazel build //cpp \ + --config=zig-cc \ + --remote_cache=grpc://localhost:50051 \ + --remote_executor=grpc://localhost:50051 + ``` + + + ```bash + bazel build //cpp \ + --config=llvm \ + --remote_cache=grpc://localhost:50051 \ + --remote_executor=grpc://localhost:50051 + ``` + + + +### Python + +:::caution +The default `rules_python` bootstrap process requires a preinstalled Python on +the worker. Minimal worker images or images with Python in unexpected locations +won't work if the bootstrap process can't find this preinstalled Python. + +To work around this, it's paramount that you use the +`--@rules_python//python/config_settings:bootstrap_impl=script` flag either in +your `.bazelrc` or on the command line. This overrides the bootstrap process +with a script that simulates a Python installation. + +Failing to do so will cause your build to raise a bunch of errors and tests to +fail with confusing (but technically correct) "file not found" errors. +::: + +```bash +bazel test //python \ + --remote_cache=grpc://localhost:50051 \ + --remote_executor=grpc://localhost:50051 +``` + +### Go + +```bash +bazel test //go \ + --config=zig-cc \ + --remote_cache=grpc://localhost:50051 \ + --remote_executor=grpc://localhost:50051 +``` + +### Rust + +:::caution +This one *shouldn't* work as `rules_rust` doesn't support remote execution. +If this build passes there is a high chance that you have an hermeticity issue +in your worker image. +::: + +```bash +bazel test //rust \ + --config=zig-cc \ + --remote_cache=grpc://localhost:50051 \ + --remote_executor=grpc://localhost:50051 + +# Should raise and error like this if your toolchain is correctly hermetic: +# +# error: the self-contained linker was requested, but it wasn't found in the +# target's sysroot, or in rustc's sysroot +``` + +### Java + +:::tip +Note the use of `--config=java` to ensure use of a RemoteJDK. +::: + +```bash +bazel test //java:HelloWorld \ + --config=java \ + --remote_cache=grpc://localhost:50051 \ + --remote_executor=grpc://localhost:50051 +``` + +### All at once + +```bash +bazel test //... \ + --config=java \ + --config=zig-cc \ + --remote_cache=grpc://localhost:50051 \ + --remote_executor=grpc://localhost:50051 \ + --keep_going +``` + +## Larger builds + +These builds can help fine-tune larger deployments. + +:::tip +Before building one of these larger projects, consider verifying the toolchain +for the respective language via the minimal examples above. +::: + +### Curl (C) + +```bash +bazel build @curl//... \ + --config=zig-cc \ + --remote_cache=grpc://localhost:50051 \ + --remote_executor=grpc://localhost:50051 +``` + +### Zstandard (C) + +```bash +bazel build @zstd//... \ + --config=zig-cc \ + --remote_cache=grpc://localhost:50051 \ + --remote_executor=grpc://localhost:50051 +``` + +### Abseil-cpp (C++) + +:::note +Expect a bunch of errors like `No repository visible as +'@com_github_google_benchmark'` as abseil doesn't fully declare its +dependencies. Use `--keep_going` here. +::: + +```bash +bazel test @abseil-cpp//... \ + --config=zig-cc \ + --remote_cache=grpc://localhost:50051 \ + --remote_executor=grpc://localhost:50051 \ + --keep_going +``` + +### Abseil-py (Python) + +:::note +The `@abseil-py//absl/flags:tests/flags_test` fails on remote executors due to +potential permission restrictions. +::: + +```bash +bazel test @abseil-py//... \ + --remote_cache=grpc://localhost:50051 \ + --remote_executor=grpc://localhost:50051 +``` + +### CIRCL (Go) + +:::note +Not all tests will build, so you need `--keep_going`. +::: + +```bash +bazel test @circl//... \ + --config=zig-cc \ + --remote_cache=grpc://localhost:50051 \ + --remote_executor=grpc://localhost:50051 \ + --keep_going +``` diff --git a/web/platform/starlight.conf.ts b/web/platform/starlight.conf.ts index c42c116c8..1a29e5177 100644 --- a/web/platform/starlight.conf.ts +++ b/web/platform/starlight.conf.ts @@ -84,6 +84,16 @@ export const starlightConfig = { }, ], }, + { + label: "Testing Remote Execution", + collapsed: true, + items: [ + { + label: "Classic RBE Examples", + link: `${docsRoot}/rbe/remote-execution-examples`, + }, + ], + }, { // Corresponds to https://diataxis.fr/how-to-guides/. Guides don't // need to be "complete". They should provide practical guidance for