diff --git a/.cargo/config.toml b/.cargo/config.toml index 344995e9..a684c3d4 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -1,7 +1,7 @@ [env] COMPILE_ENV = { value = "compile-env", relative = true, force = false } PATH = { value = "compile-env/bin", relative = true, force = true } -LD_LIBRARY_PATH = { value = "compile-env/lib", relative = true, force = true } +#LD_LIBRARY_PATH = { value = "compile-env/lib", relative = true, force = true } LIBCLANG_PATH = { value = "compile-env/lib", relative = true, force = true } [build] diff --git a/.github/workflows/bump.yml b/.github/workflows/bump.yml index 066fd5d9..f5c54f69 100644 --- a/.github/workflows/bump.yml +++ b/.github/workflows/bump.yml @@ -51,7 +51,7 @@ jobs: - name: "install envsubst" run: | sudo apt-get update - sudo apt-get --yes --no-install-recommends gettext + sudo apt-get install --yes --no-install-recommends gettext - run: | ./scripts/update-versions.sh - name: "Create Pull Request" diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index 93c329d3..725636f0 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -58,17 +58,45 @@ jobs: - '!(README.md|LICENSE|design-docs/**|.gitignore|.github/**)' - '.github/workflows/dev.yml' - build: - needs: [ check_changes ] + commits: + name: "Deduce commit history" + permissions: + contents: "read" + pull-requests: "read" + runs-on: "ubuntu-latest" + outputs: + commits: "${{ steps.commits.outputs.commits }}" + steps: + - name: "install jq" + run: | + set -euxo pipefail + sudo apt-get update + sudo apt-get install --yes --no-install-recommends jq + - name: "Checkout" + uses: "actions/checkout@v4" + with: + persist-credentials: "false" + fetch-depth: "0" + - name: "commits" + id: "commits" + run: | + set -euxo pipefail + git rev-list --pretty=oneline ${{ github.event.pull_request.base.sha }}..${{ github.event.pull_request.head.sha }} \ + | sed -e 's/ /", "msg": "/' -e 's/^/{"ref": "/' -e 's/$/"}/' \ + | jq --null-input --raw-output --compact-output '[inputs] | "commits=" + (. | tostring)' \ + | tee -a "${GITHUB_OUTPUT}" + + build-and-test: + name: "<${{ matrix.rust }}> ${{matrix.commit.msg}} [${{matrix.commit.ref}}]" + needs: [ check_changes, commits ] if: "${{ needs.check_changes.outputs.devfiles == 'true' }}" strategy: fail-fast: false matrix: rust: - "stable" - - "beta" - "nightly" - name: "Developer build (Rust ${{ matrix.rust }})" + commit: ${{ fromJSON(needs.commits.outputs.commits) }} runs-on: "lab" timeout-minutes: 45 steps: @@ -83,11 +111,12 @@ jobs: with: toolchain: "${{ matrix.rust }}" targets: "x86_64-unknown-linux-gnu,x86_64-unknown-linux-musl" - - name: "Checkout" + - name: "Checkout ${{matrix.commit.msg}}" uses: "actions/checkout@v4" with: + ref: "${{ matrix.commit.ref }}" persist-credentials: "false" - fetch-depth: "0" + fetch-depth: "1" - uses: "cargo-bins/cargo-binstall@main" - name: "install just" run: | @@ -106,33 +135,13 @@ jobs: sudo apt-get --yes --no-install-recommends install pkg-config openssl libssl-dev colorized-logs - name: "install markdown-test-report" run: | - cargo binstall --no-confirm markdown-test-report + cargo install markdown-test-report - name: refresh-compile-env run: | just --yes debug=true refresh-compile-env - run: | just --yes debug=true fake-nix - - run: | - # Run a simple build for each separate commit (for "pull_request") - # or for the HEAD of the branch (other events). - set -eu -o pipefail - COMMITS=${{ github.sha }} - if [[ "${{ github.event_name == 'pull_request' }}" == "true" ]]; then - # Get all commits from Pull Request, in chronological order - COMMITS=$(git rev-list --reverse ${{ github.event.pull_request.base.sha }}..${{ github.event.pull_request.head.sha }}) - fi - for commit in $COMMITS ; do - git checkout $commit || exit 1 - printf "::group::Build commit %s\n" "$(git log --oneline --no-decorate -n 1)" - (just debug=true cargo +${{matrix.rust}} build --locked --profile=dev --target=x86_64-unknown-linux-gnu) || exit 1 - printf "::endgroup::\n" - done - printf "::notice::HEAD remains at %s\n" "$(git log --oneline --no-decorate -n 1)" - continue-on-error: ${{ matrix.rust.optional }} - - # At this stage, for Pull Requests, we're back to the HEAD of the branch, - # start running tests for different configurations. - name: "tests: rust ${{ matrix.rust }} profile=dev target=x86_64-unknown-linux-gnu" run: | export GITHUB_STEP_SUMMARY @@ -166,7 +175,7 @@ jobs: - uses: "actions/upload-artifact@v4" if: ${{ always() }} with: - name: "test-results-${{ matrix.rust }}" + name: "results-${{ matrix.rust }}-${{ matrix.commit.ref }}" path: "target/nextest/" - name: "Setup tmate session for debug" @@ -175,3 +184,17 @@ jobs: timeout-minutes: 60 with: limit-access-to-actor: true + + + summary: + name: "Dev Test and Build" + if: ${{ always() }} + runs-on: "ubuntu-latest" + needs: + - build-and-test + steps: + - name: "Flag any build matrix failures" + if: ${{ needs.build-and-test.result != 'success' }} + run: | + >&2 echo "A critical step failed!" + exit 1 diff --git a/.github/workflows/sterile.yml b/.github/workflows/sterile.yml index 44364ca5..69718074 100644 --- a/.github/workflows/sterile.yml +++ b/.github/workflows/sterile.yml @@ -5,12 +5,12 @@ name: "sterile.yml" on: - pull_request: {} + pull_request: { } push: branches: - "main" merge_group: - types: ["checks_requested"] + types: [ "checks_requested" ] workflow_dispatch: inputs: debug_enabled: @@ -24,30 +24,34 @@ concurrency: cancel-in-progress: true jobs: - check_changes: - name: "Deduce required tests from code changes" + commits: + name: "Track commits" runs-on: "ubuntu-latest" outputs: devfiles: "${{ steps.changes.outputs.devfiles }}" + commits: "${{ steps.commits.outputs.commits }}" steps: + - name: "install jq" + run: | + set -euxo pipefail + sudo apt-get update + sudo apt-get install --yes --no-install-recommends jq - name: "Checkout" - if: "${{ !github.event.pull_request }}" uses: "actions/checkout@v4" with: persist-credentials: "false" fetch-depth: "0" - - name: "Check code changes" - uses: "dorny/paths-filter@v3" - id: "changes" - with: - filters: | - devfiles: - - '!(README.md|LICENSE|design-docs/**|.gitignore|.github/**)' - - '.github/workflows/sterile.yml' + - name: "commits" + id: "commits" + run: | + set -euxo pipefail + git rev-list --pretty=oneline ${{ github.event.pull_request.base.sha }}..${{ github.event.pull_request.head.sha }} \ + | sed -e 's/ /", "msg": "/' -e 's/^/{"ref": "/' -e 's/$/"}/' \ + | jq --null-input --raw-output --compact-output '[inputs] | "commits=" + (. | tostring)' \ + | tee -a "${GITHUB_OUTPUT}" test: - needs: [ check_changes ] - if: "${{ needs.check_changes.outputs.devfiles == 'true' }}" + needs: [ commits ] permissions: contents: "read" packages: "read" @@ -58,8 +62,8 @@ jobs: matrix: rust: - "stable" - - "nightly" - name: "Sterile test run (Rust ${{ matrix.rust }})" + commit: ${{ fromJSON(needs.commits.outputs.commits) }} + name: "${{matrix.commit.msg}} [${{matrix.commit.ref}} rust: ${{ matrix.rust }}]" steps: - name: "login to ghcr.io" uses: "docker/login-action@v3" @@ -81,6 +85,9 @@ jobs: - name: "Checkout" uses: "actions/checkout@v4" + with: + ref: ${{ matrix.commit.ref }} + fetch-depth: '1' - name: "dev/gnu sterile test" if: ${{ always() }} @@ -123,7 +130,7 @@ jobs: libssl-dev \ openssl \ pkg-config - cargo binstall --no-confirm markdown-test-report + cargo install markdown-test-report - name: "dev/gnu report" if: ${{ always() }} @@ -156,7 +163,7 @@ jobs: - uses: "actions/upload-artifact@v4" if: ${{ always() }} with: - name: 'rust-${{matrix.rust}}-test-results' + name: 'rust-${{matrix.rust}}-test-results-${{matrix.commit.ref}}' path: "target/nextest" - name: "Setup tmate session for debug" @@ -167,8 +174,7 @@ jobs: limit-access-to-actor: true push: - needs: [ check_changes ] - if: "${{ needs.check_changes.outputs.devfiles == 'true' }}" + needs: [ commits ] permissions: contents: "read" packages: "write" @@ -179,8 +185,7 @@ jobs: matrix: rust: - "stable" - - "nightly" - name: "Push containers (Rust ${{ matrix.rust }})" + name: "push container image [rust ${{ matrix.rust }}]" steps: - name: "login to ghcr.io" uses: "docker/login-action@v3" @@ -229,3 +234,17 @@ jobs: timeout-minutes: 60 with: limit-access-to-actor: true + + summary: + name: "Sterile Test and Build" + if: ${{ always() }} + runs-on: "ubuntu-latest" + needs: + - push + - test + steps: + - name: "Flag any build matrix failures" + if: ${{ needs.push.result != 'success' || needs.test.result != 'success' }} + run: | + >&2 echo "A critical step failed!" + exit 1 diff --git a/design-docs/src/mdbook/book.toml b/design-docs/src/mdbook/book.toml index fef0f441..2f6acff5 100644 --- a/design-docs/src/mdbook/book.toml +++ b/design-docs/src/mdbook/book.toml @@ -6,10 +6,8 @@ src = "src" title = "Hedgehog Dataplane Design Notes" [preprocessor.plantuml] -#plantuml-cmd = "/lib/openjdk/bin/java -jar /lib/plantuml.jar" plantuml-cmd = "/bin/plantuml" after = ["links"] -use-data-uris = false [preprocessor.mermaid] after = ["links"] @@ -29,7 +27,6 @@ strict = false [output.html] additional-css = [] -additional-js = ["./theme/external/mermaid.min.js", "./theme/external/mermaid-init.js"] smart-punctuation = true preferrred-dark-theme = "ayu" default-theme = "light" diff --git a/design-docs/src/mdbook/src/SUMMARY.md b/design-docs/src/mdbook/src/SUMMARY.md index c3fc1b8b..753aaa62 100644 --- a/design-docs/src/mdbook/src/SUMMARY.md +++ b/design-docs/src/mdbook/src/SUMMARY.md @@ -1,5 +1,7 @@ # Hedgehog Docs +- [LINKS](links.md) + - [Dataplane project: executive summary](./dataplane/executive-summary.md) - [Reflections from last time](./dataplane/reflections-from-last-time.md) - [Hardware selection for dataplane project](./dataplane/hardware.md) @@ -11,3 +13,44 @@ - [fake-nix](./build/fake-nix.md) - [Build dataplane](./build/just-cargo-build.md) - [Sterile builds](./build/sterile-build.md) + +- [Design session](./dataplane/design-session.md) + +- [Development Plan](./dataplane/development-plan.md) + - [Control plane dev-env](./dataplane/tasks2/control-plane-dev-env.md) + - [Create control plane image](./dataplane/tasks2/create-control-plane-image.md) + - [FRR Plugin](./dataplane/tasks2/frr-plugin.md) + - [Dataplane / Control plane transport](./dataplane/tasks2/dataplane-control-plane-transport.md) + - [Dataplane / Control plane protocol](./dataplane/tasks2/dataplane-control-plane-protocol.md) + - [Dataplane / Control plane reconcile](./dataplane/tasks2/dataplane-control-plane-reconcile.md) + - [Gateway test env](./dataplane/tasks2/gateway-test-env.md) + - [Identify local traffic](./dataplane/tasks2/identify-local-traffic.md) + - [Configuration Persistence Investigation](./dataplane/tasks2/configuration-persistence-investigation.md) + - [Route manager](./dataplane/tasks2/route-manager.md) + - [Dataplane worker lifecycle](./dataplane/tasks2/dataplane-worker-lifecycle.md) + - [Telemetry (investigation)](./dataplane/tasks2/telemetry-investigation.md) + - [Telemetry (basic)](./dataplane/tasks2/telemetry-basic.md) + - [Telemetry (integration)](./dataplane/tasks2/telemetry-integration.md) + - [Configuration database schema](./dataplane/tasks2/config-db-schema.md) + - [Management plane - dataplane interaction](./dataplane/tasks2/management-plane-dataplane-interaction.md) + - [VXLAN tunnels](./dataplane/tasks2/vxlan-tunnels.md) + + - [Underlay routing](./dataplane/tasks2/underlay-routing.md) + - [Management plane - dataplane interaction](./dataplane/tasks2/management-plane-dataplane-interaction.md) + - [Management plane - control plane interaction](./dataplane/tasks2/management-plane-control-plane-interaction.md) + - [VPC routing](./dataplane/tasks2/vpc-routing.md) + - [Rate limiting investigation](./dataplane/tasks2/rate-limiting-investigation.md) + - [VPC rate-limiting](./dataplane/tasks2/vpc-rate-limiting.md) + - [NAT44](./dataplane/tasks2/NAT44.md) + - [NAT66](./dataplane/tasks2/NAT66.md) + - [NAT64 (investigation)](./dataplane/tasks2/NAT64-investigation.md) + - [NAT64](./dataplane/tasks2/NAT64.md) + - [State sync (design)](./dataplane/tasks2/state-sync-design.md) + - [State sync (implementation)](./dataplane/tasks2/state-sync.md) + - [Public internet access](./dataplane/tasks2/public-internet-access.md) + - [Fault tolerance (implementation)](./dataplane/tasks2/fault-tolerance-implementation.md) + - [Fault tolerance (validation)](./dataplane/tasks2/fault-tolerance-validation.md) + - [Performance measurement](./dataplane/tasks2/performance-measurement.md) + - [Core pinning](./dataplane/tasks2/core-pinning.md) + - [One control plane daemon per container](./dataplane/tasks2/one-control-plane-daemon-per-container.md) + - [Programattic Control of FRR](./dataplane/tasks2/programmatic-control-of-frr.md) diff --git a/design-docs/src/mdbook/src/context.toml b/design-docs/src/mdbook/src/context.toml new file mode 100644 index 00000000..b944921f --- /dev/null +++ b/design-docs/src/mdbook/src/context.toml @@ -0,0 +1 @@ +siteRoot="/" diff --git a/design-docs/src/mdbook/src/css/main.css b/design-docs/src/mdbook/src/css/main.css index ce2a1a32..b24a6d7e 100644 --- a/design-docs/src/mdbook/src/css/main.css +++ b/design-docs/src/mdbook/src/css/main.css @@ -156,21 +156,12 @@ html > body.js.sidebar-hidden { color: var(--links); } -/*figure {*/ -/* padding-left: 0.5em;*/ -/* margin-inline: 0;*/ -/* width: max(1000px, 50vw);*/ - -/* !*max-width: max(1000px, 100%);*!*/ -/*}*/ - -nav:has(figure:first-child) { - width: 1000px; - height: 1000px; -} +figure { + padding-left: 0.5em; + margin-inline: 0; + width: max(200px, 50vw); -nav > figure { - position: fixed; + max-width: max(200px, 100%); } figcaption { @@ -614,12 +605,12 @@ p:hover > a.ref-paragraph:last-child::before { } p:target::before { - /*position: relative;*/ - /*display: inline-block;*/ - /*content: var(--target-icon);*/ - /*margin-inline-start: calc(0px - var(--target-icon-distance));*/ - /*width: var(--target-icon-distance);*/ - /*border-left: 1px solid var(--links);*/ + position: relative; + display: inline-block; + content: var(--target-icon); + margin-inline-start: calc(0px - var(--target-icon-distance)); + width: var(--target-icon-distance); + border-left: 1px solid var(--links); } @@ -667,10 +658,9 @@ p:target { } figure > figcaption { - margin-top: 1.5em; + margin-top: 0.0em; background: var(--table-alternate-bg); min-width: 30%; - /*width: calc(0.8 * var(--fig-width));*/ width: fit-content(100%); } @@ -679,10 +669,6 @@ figure > figcaption { font-style: unset; } -figure { - display: inline-block; -} - figure:target::before { content: var(--target-icon); display: inline-block; @@ -690,6 +676,11 @@ figure:target::before { width: var(--target-icon-distance); } +figure { + border-left: 1px solid var(--links); + width: fit-content; +} + a.figure-label, a.figure-label:visited, a.figure-label:target, a.figure-label:hover { text-decoration: unset; color: unset; @@ -702,3 +693,16 @@ a.figure-label, a.figure-label:visited, a.figure-label:target, a.figure-label:ho margin-inline-start: calc(0px - var(--target-icon-distance)); width: var(--target-icon-distance); } + +[href*="://"]::after { + content: "\f08e"; + font-family: FontAwesome, serif; + /*vertical-align: super;*/ + font-weight: normal; + font-style: normal; + display: inline-block; + text-decoration: none; + font-size: smaller; + padding-left: 3px; +} + diff --git a/design-docs/src/mdbook/src/dataplane/design-session.md b/design-docs/src/mdbook/src/dataplane/design-session.md new file mode 100644 index 00000000..e055010f --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/design-session.md @@ -0,0 +1,393 @@ +# Required features for MVP + +At a very high level, these are the _user facing_ features that we require to reach MVP with the gateway: + +1. BGP underlay +2. EVPN overlay +3. VPC routing (aka RIOT) +4. VPC nat 44/66 +5. VPC nat 64 +6. Telemetry +7. Rate limiting +8. AB fault tolerance +9. Management API + +## User-facing features + +
+ +```plantuml +@startdot +digraph features { +labelloc=t +graph [ranksep=0.6] + +node[shape="rect"] +BGP_underlay [ label="BGP underlay", style=filled, color="lightblue"] +EVPN_overlay [ label="EVPN overlay", style=filled, color="lightblue"] +VPC_routing [ label="VPC routing", style=filled, color="lightblue"] +VPC_nat44_66 [ label="VPC nat44/66", style=filled, color="lightblue"] +VPC_nat64 [ label="VPC nat64", style=filled, color="lightblue"] +Telemetry [ label="Telemetry/observability", style=filled, color="lightblue"] +rate_limiting [ label="Rate limiting", style=filled, color="lightblue"] +Fault_tolerance [ label="Fault tolerance", style=filled, color="lightblue"] +Management_API [label="Management API", style=filled, color="lightblue"] +all [label="*"] +all -> Management_API +Management_API -> all + +BGP_underlay -> EVPN_overlay; +EVPN_overlay -> VPC_routing; +VPC_routing -> VPC_nat44_66; +VPC_routing -> VPC_nat64; +VPC_routing -> rate_limiting; +EVPN_overlay -> Fault_tolerance; +Fault_tolerance -> VPC_nat64; +Fault_tolerance -> VPC_nat44_66; +VPC_routing -> Telemetry; +VPC_nat44_66 -> Telemetry [xlabel="weak"]; +VPC_nat64 -> Telemetry [xlabel="weak"]; +rate_limiting -> Telemetry [xlabel="weak"]; +} +@enddot +``` + +> A graph of the functional dependencies between the required _user facing_ features. +> Each node on the graph represents a feature. +> No feature can be _completed_ without all of the other features which point to it. + +
+ +
+ +```plantuml +@startdot +digraph features { + labelloc=t + node [shape="box"] + graph [ranksep=0.8] + label=< Feature map
(major features)
> + + BGP_underlay [ label="BGP underlay", style=filled, color="lightblue" ] + EVPN_overlay [ label="EVPN overlay", style=filled, color="lightblue" ] + VPC_routing [ label="VPC routing\n(aka RIOT)", style=filled, color="lightblue" ] + VPC_nat44_66 [ label="VPC nat44/66", style=filled, color="lightblue" ] + VPC_nat64 [ label="VPC nat64", style=filled, color="lightblue" ] + telemetry [ label="Telemetry/observability", style=filled, color="lightblue" ] + rate_limiting [ label="Rate limiting", style=filled, color="lightblue" ] + fault_tolerance [ label="Fault tolerance", style=filled, color="lightblue" ] + Management_API [ label="Management API", style=filled, color="lightblue" ] + + control_plane_integration [ label="control plane integration"] + state_sync [ label="state sync" ] + hardware_offloaded_nat [ label="offload nat" ] + hardware_offloaded_routing [ label="Underlay route offload" ] + hardware_offloaded_vpc [ label="VPC route offload" ] + hardware_offloading_basic [ label="basic offloading" ] + datastore_integration [ label="datastore integration" ] + + all [label="*"] + Management_API -> all + all -> Management_API + + datastore_integration -> control_plane_integration + datastore_integration -> hardware_offloaded_routing + hardware_offloading_basic -> hardware_offloaded_routing + hardware_offloaded_routing -> BGP_underlay + fault_tolerance -> VPC_nat44_66 + fault_tolerance -> VPC_nat64 + BGP_underlay -> EVPN_overlay + EVPN_overlay -> VPC_routing + EVPN_overlay -> state_sync + EVPN_overlay -> hardware_offloaded_vpc + hardware_offloaded_nat -> VPC_nat44_66 + hardware_offloaded_nat -> VPC_nat64 + VPC_nat44_66 -> telemetry [xlabel="weak"] + VPC_nat64 -> telemetry [xlabel="weak"] + VPC_routing -> telemetry + VPC_routing -> VPC_nat44_66 + VPC_routing -> VPC_nat64 + VPC_routing -> rate_limiting + control_plane_integration -> BGP_underlay + state_sync -> fault_tolerance + hardware_offloaded_vpc -> hardware_offloaded_nat + hardware_offloaded_vpc -> rate_limiting + hardware_offloading_basic -> hardware_offloaded_vpc + rate_limiting -> telemetry [xlabel="weak"] +} +@enddot +``` + +> Here is a _very_ high-level graph of the functional dependencies between the required features. +> Each node on the graph represents a feature. +> No feature can be _completed_ without all the other features which point to it. +> Features shown in blue are user facing. +> All other features represent internal implementation concerns. + +
+ +## Component Map + +
+ +```puml +@startuml +skinparam hyperlinkUnderline false +skinparam linetype ortho +!unquoted function $link($name, $url) +!return "[[" + $url + " " + $name + "]]" +!endfunction + + + +!$q = { "uote": "\"" } + +!$doc_links = { + "config_store": { "text": "Configuration Store", "url": "#configuration-store" }, + "gateway_agent": { "text": "Gateway Agent", "url": "#gateway-agent" }, + "frr_agent": { "text": "FRR agent", "url": "#frr-agent" }, + "zebra": { "text": "zebra", "url": "https://docs.frrouting.org/en/latest/zebra.html" }, + "routing_daemons": { "text": "routing daemons", "url": "#routing-daemons" }, + "hh_plugin": { "text": "Hedgehog\\nplugin", "url": "#hedgehog-plugin" }, + "kernel": { "text": "kernel", "url": "#kernel" }, + "interface_manager": { "text": "interface manager", "url": "#interface-manager" }, + "routing_manager": { "text": "routing manager", "url": "#routing-manager" }, + "dataplane_workers": { "text": "dataplane workers", "url": "#dataplane-workers" }, + "nat_manager": { "text": "nat manager", "url": "#nat-manager" }, + "control_plane_interface": { "text": "control plane interface", "url": "#control-plane-interface" }, + "management_plane_interface": { "text": "management plane interface", "url": "#management-plane-interface" }, + "state_sync": { "text": "state sync", "url": "#state-sync" }, + "dataplane_model": { "text": "dataplane model", "url": "#dataplane-model" }, + "management_plane": { "text": "management plane", "url": "#management-plane" }, + "control_plane": { "text": "control plane", "url": "#control-plane" }, + "dataplane": { "text": "dataplane", "url": "#dataplane" } +} + +!unquoted function $linked($key) + !return $link($doc_links[$key].text, $doc_links[$key].url) +!endfunction + +!unquoted function $r($key) + !return "rectangle " + $key + " as " + $q.uote + $linked($key) + $q.uote +!endfunction + +!unquoted function $db($key) + !return "database " + $key + " as " + $q.uote + $linked($key) + $q.uote +!endfunction + +$r(management_plane) { + $r(gateway_agent) + $db(config_store) +} + +$r(kernel) + +$r(control_plane) { + $r(routing_daemons) + $r(zebra) { + $r(hh_plugin) + } + $r(frr_agent) +} + +$r(dataplane) { + $r(control_plane_interface) + $r(management_plane_interface) + $db(dataplane_model) + $r(routing_manager) + $r(nat_manager) + $r(state_sync) + $r(interface_manager) + $r(dataplane_workers) +} + +rectangle sister_dataplane as "sister dataplane" { + rectangle rest as "..." + rectangle sister_state_sync as "state sync" +} + +rectangle nics + +control_plane_interface -- dataplane_model +dataplane_workers <--> nics : dpdk +frr_agent <--> routing_daemons +frr_agent <--> zebra +gateway_agent -- frr_agent +gateway_agent -- management_plane_interface +config_store -- gateway_agent +interface_manager -- dataplane_model +interface_manager <--> kernel : [[ https://man7.org/linux/man-pages/man7/netlink.7.html netlink socket ]] +dataplane_model - state_sync +dataplane_model <--> nat_manager +dataplane_model <--> routing_manager +management_plane_interface -- dataplane_model +nat_manager <--> dataplane_workers +hh_plugin --- control_plane_interface : [[ https://en.wikipedia.org/wiki/Unix_domain_socket unix socket ]] +routing_daemons <-> zebra +routing_manager <--> dataplane_workers +state_sync <-> sister_state_sync : [[ https://en.wikipedia.org/wiki/Remote_direct_memory_access rdma]] +zebra <-> kernel : [[ https://man7.org/linux/man-pages/man7/netlink.7.html netlink socket ]] + +@enduml +``` + +> Map of the relationships between planned dataplane components + +
+ +
+ +### Configuration Store + +I could (and maybe should) write a book about the design considerations of [Configuration Store]. +For the moment I will limit myself to a list of hard and fast requirements: + +1. CP in the [CAP theorem](https://en.wikipedia.org/wiki/CAP_theorem) sense. + - immediate consistency in the sense that + + > Every read receives either the most recent data or an error. + + - partition tolerance + + > The system continues to operate despite an arbitrary number of messages being dropped (or delayed) by the network between nodes. + + The guiding theory is that + + 1. It is better to **not** function than to **mal**function. + 2. _**It doesn't matter how quickly you can do the wrong thing**_. + +
+
+ +### Gateway Agent + +This is another subject deserving of a small book. + +For now, I will point out some notable design decisions we need to make: + +1. Do we expect a subscription model? +2. If not, do we expect the [gateway agent] to explicitly push state to dependent components? + +Beyond that, we need to make some high-level design choices: + +1. programming language? Likely Go or Rust. +2. REST? GraphQL? I tend to think REST is more appropriate at this time. + +
+
+ +### FRR agent + +Be afraid. Make Fredi fill in this section. But also be afraid. + +
+
+ +### Hedgehog Plugin + +This is a planned [zebra] plugin in the same spirit as [`fpm`](https://docs.frrouting.org/projects/dev-guide/en/latest/fpm.html#id1) or [`dataplane_fpm_nl`](https://docs.frrouting.org/projects/dev-guide/en/latest/fpm.html#dplane-fpm-nl). + +The core idea is to have a plugin that can be dynamically loaded into `zebra` and will listen to the `zebra` event stream for updates. +The plugin will then take those updates and push them into the dataplane agent, allowing the dataplane to react to route updates. + +
+
+ +### Routing daemons + +For the moment these are [`bgpd`](https://docs.frrouting.org/en/latest/bgp.html) and [`bfdd`](https://docs.frrouting.org/en/latest/bfd.html). + +
+
+ +### Interface Manager + +This is a component that exchanges [netlink] messages with the [kernel] in response to changes in the [dataplane model]. +Its responsibilities include + +1. construction of virtual network interfaces needed by [zebra] +2. translation of ephemeral linux kernel parameters into ephemeral [dpdk] parameters (e.g. netlink interface index to dpdk interface id). +3. retrieval of information not available to [zebra]/[frr] such as neighbor tables / [ARP] / [IPv6 ND] resolution or [bridge] fdb. + +### Control Plane Interface + +This component is responsible for adjudicating communication between the [control plane] and the [dataplane]. +This component is expected to: + +1. Deserialize [bincode] (or perhaps [bitcode]) messages from the [hedgehog plugin] articulating the control plane's rules for the dataplane. +2. Express error messages back to the [control plane] articulating any error conditions. + For example, if the [dataplane] is unable to offload a route for whatever reason (e.g. rout type not supported) so that said routes are not advertised by the [control plane]. +3. Express the offloading status (including counters) back to the [control plane] (if possible). + +### Management plane interface + +The [management plane interface] is the interface between the [management plane] and the [dataplane]. + +1. Receive [bincode] (or perhaps [bitcode]) messages from the [gateway agent] over a [unix domain socket] (or perhaps a TCP socket?), parse them, and then update the [dataplane model] to reflect the desired configuration. + +
+
+ +### Dataplane model + +This is an internal component of the [dataplane] which is responsible for managing the _desired_ state of the dataplane. It is updated by the [management plane interface] and is responsible for expressing the _desired state_ (not the observed state) of the [dataplane] to downstream components such as the [routing manager] or the [nat manager]. + +
+
+ +### State sync + +This component is responsible for synchronizing the state of sister dataplanes in the name of fault tolerance. + +
+
+ +### Routing manager + +This component is responsible for managing the routing tables for the dataplane. It is responsible for translating the _desired routing rules_ expressed by the [management plane interface] into a set of rules that can be executed by the [dataplane workers][dataplane worker]. + +
+
+ +### NAT manager + +This component is responsible for managing the [network address translation] tables for the dataplane. It is responsible for translating the _desired NAT rules_ expressed by the [management plane interface] into a set of rules that can be executed by the [dataplane workers][dataplane worker]. + + +
+
+ +### Dataplane workers + +This is a collection of [rte lcores] which are responsible for actually performing the packet processing. +The workers are responsible for performing the following tasks: + +- Receive packets from the NIC +- Identify local traffic +- Perform underlay routing +- Perform overlay routing +- Perform [NAT] +- Transmit packets to the NIC + +
+
+ +### Management Plane + +The management plane is a high-level abstraction that is responsible for + +1. Accepting API calls from the end user. +2. Translating those API calls into dataplane and control plane configuration. +3. Storing that configuration in the [Configuration Store] + +
+
+ +### Control Plane + +The control plane is, for the moment, just [bgpd] and [bfdd]. + +
+ +{{#include ../links.md}} diff --git a/design-docs/src/mdbook/src/dataplane/development-plan.md b/design-docs/src/mdbook/src/dataplane/development-plan.md new file mode 100644 index 00000000..e6c36d01 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/development-plan.md @@ -0,0 +1,148 @@ +## Development plan + +
+ +```plantuml +@startdot +!$ptr=./tasks2 +!$optional="color=lightyellow, style=filled" +!$started="color=lightblue, style=filled" +!$completed="color=lightgreen, style=filled" +!$urgent="color=orange, style=filled" +!$difficult="color=pink, style=filled" +digraph g { + node [shape="box"] + splines=ortho; + + graph [ranksep=0.9] + labelloc=t + overlap=false; + concentrate="true"; + remincross=true; + mclimit=800; + compound=true; + + underlay_routing [label="underlay routing", href="$ptr/underlay-routing.html", fontcolor=blue] + config_db_schema [label="config db schema", $difficult, href="$ptr/config-db-schema.html", fontcolor=blue] + core_pinning [label="core pinning", $optional, href="$ptr/core-pinning.html", fontcolor=blue] + cp_api_control_investigation [label=<programmatic control of frr
(investigation)
>, $urgent, href="$ptr/programmatic-control-of-frr.html", fontcolor=blue] + cp_dev_env [label="control plane\ndev env", href="$ptr/control-plane-dev-env.html", fontcolor=blue] + cp_image_creation [ label="Create control plane container image", href="$ptr/create-control-plane-image.html", fontcolor=blue] + dp_cp_reconciliation [ label="dp/cp reconcile", href="$ptr/dataplane-control-plane-reconcile.html", fontcolor=blue ] + dp_dev_env [label="dataplane dev env", $completed, href="../../build/index.html", fontcolor=blue] + dp_dp_state_sync [label="state sync\n(implementation)", $difficult, href="$ptr/state-sync.html", fontcolor=blue] + dp_dp_state_sync_design [label="state sync\n(design)", $urgent, href="$ptr/state-sync-design.html", fontcolor=blue] + dp_image_creation [label="dataplane image build", $completed] + fault_tolerance [label="fault tolerance (implementation)", href="$ptr/fault-tolerance-implementation.html", fontcolor=blue] + fault_tolerance_proof [label="fault tolerance (validation)", $difficult, href="$ptr/fault-tolerance-validation.html", fontcolor=blue] + frr_plugin_basic [ label="frr plugin\n(basic)", href="$ptr/frr-plugin.html", fontcolor=blue ] + frr_programmatic_control [label=<programmatic
control of frr
>, $difficult, href="$ptr/programmatic-control-of-frr.html", fontcolor=blue] + gw_test_env [label="gateway test env", href="$ptr/gateway-test-env.html", fontcolor=blue] + investigate_config_persist [ label=<configuration
persistence
(investigation)
>, $urgent, href="$ptr/configuration-persistence-investigation.html", fontcolor=blue ] + local_traffic_ident [ label="identify local traffic", href="$ptr/identify-local-traffic.html", fontcolor=blue] + mp_cp_interaction [ label="management plane \ncontrol plane interaction", href="$ptr/management-plane-control-plane-interaction.html", fontcolor=blue] + mp_dp_interaction [ label="management plane \ndataplane interaction", href="$ptr/management-plane-dataplane-interaction.html", fontcolor=blue] + nat64_investigation [label=<NAT64 investigation>, $urgent, href="$ptr/NAT64-investigation.html", fontcolor=blue] + performance_measurement [ label="measure performance", href="$ptr/performance-measurement.html", fontcolor=blue] + plugin_dp_proto [ label="plugin/dp protocol", $started, href="$ptr/dataplane-control-plane-protocol.html", fontcolor=blue] + plugin_dp_transport [ label="plugin/dp transport", $completed, href="$ptr/dataplane-control-plane-transport.html", fontcolor=blue] + public_internet_access [label="public internet access", href="$ptr/public-internet-access.html", fontcolor=blue] + rate_limiting_investigation [label="rate limiting investigation", $completed] + routing_manager [label="routing manager", href="$ptr/route-manager.html", fontcolor=blue] + separate_cp_containers [ label="one cp daemon per container", $optional, href="$ptr/one-control-plane-daemon-per-container.html", fontcolor=blue] + telemetry_basic [label="telemetry (basic)", href="$ptr/telemetry-basic.html", fontcolor=blue] + telemetry_investigation [label="telemetry\n(investigation)", $completed, href="$ptr/telemetry-investigation.html", fontcolor=blue] + telemetry_integrated [label="telemetry (integration)", href="$ptr/telemetry-integration.html", fontcolor=blue] + vpc_nat44 [label="nat44", href="$ptr/NAT44.html", fontcolor=blue] + vpc_nat64 [label="nat64", $difficult, href="$ptr/NAT64.html", fontcolor=blue] + vpc_nat66 [label="nat66", href="$ptr/NAT66.html", fontcolor=blue] + vpc_rate_limiting [label="vpc rate limiting", href="$ptr/vpc-rate-limiting.html", fontcolor=blue] + vpc_routing [label="vpc routing", href="$ptr/vpc-routing.html", fontcolor=blue] + vxlan_tunnels [label="vxlan tunnels", href="$ptr/vxlan-tunnels.html", fontcolor=blue] + vxlan_tunnel_investigation [label="vxlan tunnels\n(investigation)", $completed] + worker_lifecycle [label="dp worker lifecycle", href="$ptr/dataplane-worker-lifecycle.html", fontcolor=blue] + + nat64_investigation -> dp_dp_state_sync_design + investigate_config_persist -> config_db_schema + dp_dp_state_sync_design -> dp_dp_state_sync + cp_api_control_investigation -> frr_programmatic_control + frr_programmatic_control -> mp_cp_interaction + vxlan_tunnel_investigation -> vxlan_tunnels + vxlan_tunnels -> vpc_routing + + nat64_investigation -> vpc_nat64 + vpc_nat64 -> public_internet_access + vpc_nat44 -> public_internet_access + vpc_nat66 -> public_internet_access + dp_dp_state_sync -> fault_tolerance + + rate_limiting_investigation -> vpc_rate_limiting + telemetry_investigation -> telemetry_basic + telemetry_basic -> telemetry_integrated + + mp_dp_interaction -> telemetry_integrated + + telemetry_integrated -> performance_measurement + core_pinning -> performance_measurement + dp_dp_state_sync -> performance_measurement + + vpc_routing -> vpc_rate_limiting + mp_cp_interaction -> vpc_routing + underlay_routing -> vpc_routing + cp_dev_env -> gw_test_env + cp_image_creation -> cp_dev_env + cp_image_creation -> separate_cp_containers + dp_cp_reconciliation -> frr_plugin_basic + dp_dev_env -> gw_test_env + dp_image_creation -> dp_dev_env + gw_test_env -> frr_plugin_basic + frr_plugin_basic -> routing_manager + config_db_schema -> mp_cp_interaction + config_db_schema -> mp_dp_interaction + local_traffic_ident -> frr_plugin_basic + mp_dp_interaction -> vpc_routing + plugin_dp_proto -> dp_cp_reconciliation + plugin_dp_transport -> dp_cp_reconciliation + routing_manager -> underlay_routing + config_db_schema -> underlay_routing + vpc_routing -> vpc_nat44 + vpc_routing -> vpc_nat64 + vpc_routing -> vpc_nat66 + worker_lifecycle -> core_pinning + worker_lifecycle -> vpc_routing + + vpc_nat44 -> dp_dp_state_sync + vpc_nat66 -> dp_dp_state_sync + vpc_nat64 -> dp_dp_state_sync + fault_tolerance -> fault_tolerance_proof + + subgraph cluster_legend { + label="legend"; + started [label="started", $started] + optional [label="optional", $optional] + completed [label="\"completed\"", $completed] + urgent [label="urgent", $urgent] + difficult [label="difficult", $difficult] + } + +} +@enddot +``` +
+ +> Graph of the engineering development plan. +> Each node on the graph represents a task or required function. +> No task can be _completed_ without all the other tasks which point to it. +> +> * Tasks shown in orange are points of higher uncertainty and risk. +> * Tasks shown in pink are points of expected higher difficulty. +> * Tasks shown in gray are already completed. +
+
+ +> [!NOTE] +> I am recommending that tasks with higher uncertainty (shown in orange) be addressed with all possible speed. +> Especially if they directly connect to tasks of high expected difficulty. + +> [!WARNING] +> Tasks of high expected difficulty are different from tasks which we expect will be very time-consuming. diff --git a/design-docs/src/mdbook/src/dataplane/tasks/control-plane-interface.md b/design-docs/src/mdbook/src/dataplane/tasks/control-plane-interface.md new file mode 100644 index 00000000..0e9d7c5c --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/control-plane-interface.md @@ -0,0 +1,18 @@ +# Dataplane <-> Control Plane interface + +We need to decide how the dataplane and the control plane will exchange information. + +The obvious options for transport are + +1. tcp socket +2. unix socket + +Orthogonal to that decision is the protocol. + +1. netlink messages (yuck) +2. directly serialize frr messages (very yuck) +3. manual json schema (less yuck but slow) +4. serde driven (lovely but requires rust to get involved) + + +My vote is for unix socket and serde! diff --git a/design-docs/src/mdbook/src/dataplane/tasks/eswitch-config.md b/design-docs/src/mdbook/src/dataplane/tasks/eswitch-config.md new file mode 100644 index 00000000..74ac3dea --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/eswitch-config.md @@ -0,0 +1,5 @@ +# Eswitch configuration script + +The eswitch configuration requires the use of devlink and ethtool. + +Unfortunately, the only netlink tools currently advanced enough to actually manage the job is `iproute2`, which means this part (tragically) needs to be in bash. diff --git a/design-docs/src/mdbook/src/dataplane/tasks/flow-manager.md b/design-docs/src/mdbook/src/dataplane/tasks/flow-manager.md new file mode 100644 index 00000000..7e835de4 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/flow-manager.md @@ -0,0 +1,29 @@ +# Flow manager + +We will need + +1. A method of creating, tracking, and editing flow tables + 1. This goal falls neatly under the life-cycle methods of rust. + 2. I don't think transactional edits to flow tables will be practical +2. Telemetry / error reporting on the flow tables + 1. Telemetry will need to be implemented by polling. I don't think triggers are workable. + 2. Error reporting relates this issue closely to both the [control plane interface](./control-plane-interface.md) and the [routing manager](./route-manager.md). + The essential point is that we _do not advertise routes which we cannot support_. + For example, if we fail to offload a route, then it should not be advertised by BGP. +3. Mirroring + 1. this is not especially difficult in terms of rte flow but needs to be accounted for in the timeline +4. QoS (TODO: make this another issue entirely) + 1. Traffic prioritization: + 1. Management-plane + 2. observability + 3. Control-plane + 4. GW - Synchronization + 5. User/tenant traffic + + The reasoning for the traffic prioritization is that you can block tenant traffic rather than the operational traffic and the system will keep working. If the tenant traffic wins then DoS attacks or config mistakes can more easily drop the system. + 2. Rate limiting + + This is likely to be a major client requirement, so I think we should shoot for this as part of our 1.0 api. + 3. Full HH QoS model + + This just needs research. I don't know the details of this. diff --git a/design-docs/src/mdbook/src/dataplane/tasks/interface-manager.md b/design-docs/src/mdbook/src/dataplane/tasks/interface-manager.md new file mode 100644 index 00000000..07f2ff4a --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/interface-manager.md @@ -0,0 +1,5 @@ +# Interface manager + +We need an agent to create linux network interfaces in response to configuration requests. + +My vote would be to use [`rust-netlink`](https://github.com/rust-netlink/rtnetlink) as I am familiar with it and I (mostly) like it. diff --git a/design-docs/src/mdbook/src/dataplane/tasks/lifecycle-management.md b/design-docs/src/mdbook/src/dataplane/tasks/lifecycle-management.md new file mode 100644 index 00000000..2a6d62e7 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/lifecycle-management.md @@ -0,0 +1,10 @@ +# Dataplane lifecycle management + +1. How do we start up the dataplane? +2. How do we supply initial config to the dataplane? + + + +- [](./pick-a-datastore.md) + + diff --git a/design-docs/src/mdbook/src/dataplane/tasks/management-interface.md b/design-docs/src/mdbook/src/dataplane/tasks/management-interface.md new file mode 100644 index 00000000..e0691af9 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/management-interface.md @@ -0,0 +1,10 @@ +# Management Interface + +We need to pick a library to monitor the data-store. + + + +- [pick a datastore](./pick-a-datastore.md) + + + diff --git a/design-docs/src/mdbook/src/dataplane/tasks/nat-manager.md b/design-docs/src/mdbook/src/dataplane/tasks/nat-manager.md new file mode 100644 index 00000000..18e6d67f --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/nat-manager.md @@ -0,0 +1,10 @@ +# NAT manager + +We will need + +1. A hash table (I love [hashbrown](https://github.com/rust-lang/hashbrown)) +2. a memory allocator (I vote we just use the DPDK allocator) +3. a method of data exchange with the flow-manager + + + diff --git a/design-docs/src/mdbook/src/dataplane/tasks/pick-a-datastore.md b/design-docs/src/mdbook/src/dataplane/tasks/pick-a-datastore.md new file mode 100644 index 00000000..5eee82c6 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/pick-a-datastore.md @@ -0,0 +1,43 @@ +# Pick a data store + +We need to officially pick a data store for configuration information. + +This data store _is not_ intended for storing "fast" state. +Rather, this store needs to hold configuration data which is + +1. durable +2. atomic +3. strongly typed +4. immediately consistent + +[`etcd`] is a reasonable choice because + +1. It is already in use in kubernetes and is therefore likely to be well-maintained and tested. +2. we are already using / integrating with kubernetes so any flaws in `etcd` are likely to impact us anyway. + +I have used [`zookeeper`](https://zookeeper.apache.org/) in the past and *strongly recommend against it*. + +I would also consider [`consul`](https://github.com/hashicorp/consul) but [the license](https://github.com/hashicorp/consul/blob/main/LICENSE) is *_not_* acceptable. + +A newer entry in the space is [`nacos`](https://github.com/alibaba/nacos) but I think it is less well suited since it only seems to support eventual consistency. + +The remaining option I know of is [`rqlite`]. _I have not used it,_ but it seems to be a reasonable option. + +- has a supported [rust client](https://github.com/tomvoet/rqlite-rs) (and even a [sqlx](https://github.com/launchbadge/sqlx) client in the form of [sqlx-rqlite](https://crates.io/crates/sqlx-rqlite)) +- [weak](https://rqlite.io/docs/api/read-consistency/#weak), [linearizable](https://rqlite.io/docs/api/read-consistency/#linearizable), and [strong](https://rqlite.io/docs/api/read-consistency/#strong) consistency models supported +- [transactions](https://rqlite.io/docs/api/api/#transactions) (this seems less than ideal tho) + +Thus, I think the real choice is between [`etcd`] and [`rqlite`]. + +That choice comes down to how much we value the functionality of sqlite (multiple indexes, referential integrity, strong schema) vs. the upsides of `etcd` (watches, battle tested and more widely used). + + + + +- management +- data-store + + + +[`rqlite`]: https://rqlite.io/ +[`etcd`]: https://github.com/coreos/etcd diff --git a/design-docs/src/mdbook/src/dataplane/tasks/route-manager.md b/design-docs/src/mdbook/src/dataplane/tasks/route-manager.md new file mode 100644 index 00000000..373ee804 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/route-manager.md @@ -0,0 +1,9 @@ +# Flow manager + +We will need + +1. An LPM implementation (DPDK provides one but I understand Sergi is currently on it) +2. a method of data exchange with the flow-manager + + + diff --git a/design-docs/src/mdbook/src/dataplane/tasks/sync-with-other-gateway.md b/design-docs/src/mdbook/src/dataplane/tasks/sync-with-other-gateway.md new file mode 100644 index 00000000..e9186f1e --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/sync-with-other-gateway.md @@ -0,0 +1,22 @@ +# Sync with other gateway(s) + +We need to + +1. pick a transport +2. pick a protocol + +As for transport, we can likely start with TCP (although RoCE is almost objectively more appropriate for this job). + +As for protocol, we can likely use either [`bitcode`](https://crates.io/crates/bitcode/) or [`bincode`](https://docs.rs/bincode/latest/bincode/). + +I expect this will be a task we do later on in the project, but before we reach 1.0. + +This is a particularly tricky subject as it needs to be accounted for at all planning stages but should not be implemented until later in the process. +The sooner we implement this function the more complex all the inevitable surrounding refactoring will be. + + +```yaml +label: + - milestone + - sync +``` diff --git a/design-docs/src/mdbook/src/dataplane/tasks/tracing.md b/design-docs/src/mdbook/src/dataplane/tasks/tracing.md new file mode 100644 index 00000000..2aac20d7 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/tracing.md @@ -0,0 +1,8 @@ +# Trace all the things + + +I could write a whole thing about this but honestly, just go read these (excelent) docs: + +1. [tracing crate](https://docs.rs/tracing/latest/tracing/) +2. [subscribers](https://docs.rs/tracing/latest/tracing/#related-crates) + diff --git a/design-docs/src/mdbook/src/dataplane/tasks/traffic-workers.md b/design-docs/src/mdbook/src/dataplane/tasks/traffic-workers.md new file mode 100644 index 00000000..a4de179f --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/traffic-workers.md @@ -0,0 +1,10 @@ +# Traffic workers + +This basically comes down to launching (and correctly pinning) rte worker threads. + +The major design issue is that we need to be mindful of data exchange patterns between traffic workers (if any) and especially of data exchange between other threads which may need to access kernel functionality. + +Generally we would chase all other processes, kthreads, and rcu operations off of rte worker cores in a DPDK application. +The side effect of this is that if you need kernel operations on the same threads they will be either very inefficient or blocking. +Thus we need to move data between the threads, which can have deleterious performance effects. +Mitigating this will require planning. diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/NAT44.md b/design-docs/src/mdbook/src/dataplane/tasks2/NAT44.md new file mode 100644 index 00000000..85c8dbec --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/NAT44.md @@ -0,0 +1,3 @@ +# NAT44 + +Basic IPv4 NAT. diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/NAT64-investigation.md b/design-docs/src/mdbook/src/dataplane/tasks2/NAT64-investigation.md new file mode 100644 index 00000000..88defad6 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/NAT64-investigation.md @@ -0,0 +1,8 @@ +# NAT64 (investigation) + +Linux provides no implementation of [NAT64] so we don't have much in the way of reference implementation to fall back on without going full layer 7. + +Getting the hardware offloads to work on this may be really challenging. +My understanding is that the ConnectX-7 cards are the only ones that support [NAT64] offload, and even then under limited conditions. + +{{#include ../../links.md}} diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/NAT64.md b/design-docs/src/mdbook/src/dataplane/tasks2/NAT64.md new file mode 100644 index 00000000..45df287f --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/NAT64.md @@ -0,0 +1,10 @@ +# NAT64 + +This one is going to be tricky. +I hesitate to posit any specific design for this at this time. +See the [investigation](./NAT64-investigation.md) for more deatils. + +> [!WARNING] +> Here be dragons! + +{{#include ../../links.md}} diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/NAT66.md b/design-docs/src/mdbook/src/dataplane/tasks2/NAT66.md new file mode 100644 index 00000000..fdb200f6 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/NAT66.md @@ -0,0 +1,6 @@ +# NAT66 + +Basic(?) IPv6 NAT. + +> [!NOTE] +> Are we really-really sure we need this? diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/config-db-schema.md b/design-docs/src/mdbook/src/dataplane/tasks2/config-db-schema.md new file mode 100644 index 00000000..ce2cb912 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/config-db-schema.md @@ -0,0 +1,105 @@ +# Configuration database schema + +One of our biggest TODO items is to create an ER diagram for our configuration database. + +To be clear, I am not asserting that we need to use an RDBMS in practice. +We just need an exacting spec for the relationships between our configuration data. + +
+ +```plantuml +@startuml +skinparam linetype ortho +skinparam hyperlinkUnderline false + +hide empty description +hide empty members +hide circle + +entity Group { + **id: PK**, + name: String, +} + +entity User { + **id: PK**, + name: String, +} + +entity GroupMembership { + **id: PK**, + user: FK, + group: FK, +} + +entity Vpc { + **id: PK**, + name: String, + vrf: u32, + group: FK, +} + +entity Discriminant { + **id: PK**, + vni: Option, + vid: Option, + aci: Option<(Vid, Vni)>, + --- + Note: + \t Exclusive: vni, vid, aci + \t (only one non-null) + +} + +entity Interface { + **id: PK**, + meta: Unique, + vpc: FK, + name: String, +} + +entity IpAddressAssignment { + **id: PK**, + vpc: FK, + interface: FK, + cidr: (Ip, Subnet), + --- + -- prevent overlapping Ip assignments + exclude using gist ( + \t vpc with =, cidr inet_ops with && + ) +} + +entity Peering { + **id: PK**, + group: FK, +} + +entity PeeringRelation { + **id: PK**, + type: enum (provider, consumer, peer, direct) + peering: FK, + interface: FK, + --- + Note: + \t restrict to one provider + \t type per peering (needs gin index?) +} + +Group ||--o{ Peering +Group ||--o{ Vpc +Group ||--o| GroupMembership +Interface ||--o{ IpAddressAssignment +Interface ||--|| Discriminant +Peering ||--o{ PeeringRelation +PeeringRelation }o--|| Interface +User ||--o| GroupMembership +Vpc ||--o{ Interface +Vpc ||-o{ IpAddressAssignment + +@enduml +``` + +> We need to think about access controls and cardinality more. + +
diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/configuration-persistence-investigation.md b/design-docs/src/mdbook/src/dataplane/tasks2/configuration-persistence-investigation.md new file mode 100644 index 00000000..1ca5d887 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/configuration-persistence-investigation.md @@ -0,0 +1,61 @@ +# Configuration persistence + +We need to officially pick a data store for configuration information. + +This data store _is not_ intended for storing "fast" state. +Rather, this store needs to hold configuration data which is + +1. durable +2. atomic +3. strongly typed +4. immediately consistent + +## etcd + +[`etcd`] is a reasonable choice because + +1. It is already in use in kubernetes and is therefore likely to be well-maintained and tested. +2. we are already using / integrating with kubernetes so any flaws in `etcd` are likely to impact us anyway. + +I have used [`zookeeper`](https://zookeeper.apache.org/) in the past and *strongly recommend against it*. + +I would also consider [`consul`](https://github.com/hashicorp/consul) but [the license](https://github.com/hashicorp/consul/blob/main/LICENSE) is *_not_* acceptable. + +A newer entry in the space is [`nacos`](https://github.com/alibaba/nacos) but I think it is less well suited since it only seems to support eventual consistency. + +## rqlite + +_I have not used [`rqlite`],_ but it seems to be a reasonable (if young) option. +My biggest concern is that [transactions](https://rqlite.io/docs/api/api/#transactions) support seems _very_ weak. + +- has a supported [rust client](https://github.com/tomvoet/rqlite-rs) (and even a [sqlx](https://github.com/launchbadge/sqlx) client in the form of [sqlx-rqlite](https://crates.io/crates/sqlx-rqlite)) +- [weak](https://rqlite.io/docs/api/read-consistency/#weak), [linearizable](https://rqlite.io/docs/api/read-consistency/#linearizable), and [strong](https://rqlite.io/docs/api/read-consistency/#strong) consistency models supported +- [transactions](https://rqlite.io/docs/api/api/#transactions) (this seems less than ideal tho) + +## TiKV + +[TiKV] seems like the **strongest near-term option** on the list. + +I think that the biggest advantage is in the case that we want to _eventually_ switch to [TiDB]. +That strategy allows us the most flexibility to use a "real" database in the future while using a "simple" KV database in the near term. + +## TiDB + +[TiDB] is a [MySQL] compatible [distributed SQL] database built on top of [TiKV]. + +The thing which I find most striking about this database is the excellent documentation and robust feature set (robust all things considered). + +- [Generated columns](https://docs.pingcap.com/tidb/dev/generated-columns) +- [JSON](https://docs.pingcap.com/tidb/dev/data-type-json) +- [Referential integrity](https://docs.pingcap.com/tidb/dev/foreign-key) +- [Transactions](https://docs.pingcap.com/tidb/dev/transaction-overview) +- [Views](https://docs.pingcap.com/tidb/dev/views) +- [Change data capture](https://docs.pingcap.com/tidb/stable/ticdc-overview) + +## Summary + +Thus, I think the real choice is between [`etcd`], [TiDB], and [TiKV]. + +That choice comes down to how much we value the functionality of sql (multiple indexes, referential integrity, strong schema) vs. the upsides of kv databases (watches, more easily evolved schema). + +{{#include ../../links.md}} diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/control-plane-dev-env.md b/design-docs/src/mdbook/src/dataplane/tasks2/control-plane-dev-env.md new file mode 100644 index 00000000..ebc87385 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/control-plane-dev-env.md @@ -0,0 +1,15 @@ +# Control plane dev-env + +Create and document a development environment for the [`zebra`] [hedgehog plugin]. + +Requirements: + +- **REQUIRE**: can build plugin withing container +- **REQUIRE**: CI builds dev-env container +- **REQUIRE**: CI runs tests in dev-env container or, +- **IDEALLY**: tests run in a more minimal test-env container. + +```yaml issue-meta +assign: + - @Fredi Raspall +``` diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/core-pinning.md b/design-docs/src/mdbook/src/dataplane/tasks2/core-pinning.md new file mode 100644 index 00000000..ff981e1d --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/core-pinning.md @@ -0,0 +1,4 @@ +# Core pinning + +> [!NOTE] +> I think we can punt on this till the last minute! diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/create-control-plane-image.md b/design-docs/src/mdbook/src/dataplane/tasks2/create-control-plane-image.md new file mode 100644 index 00000000..3a92d3bd --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/create-control-plane-image.md @@ -0,0 +1,27 @@ +# Create a control-plane container image + +We need to generate a docker image to run our control plane. + +## Goals: + +1. **REQUIRE**: [`zebra`] plugin support +2. **REQUIRE**: [`bgpd`] support +3. **REQUIRE**: [`bfdd`] support +4. **REQUIRE**: CI builds and container +5. **REQUIRE**: [Lua scripting] should be disabled in build +6. **IDEALLY**: disable as much functionality as we can get away with +7. **IDEALLY**: supply a debug build and release build + +## Note: + +Both [@Fredi-raspall] and [@daniel-noland] have made some progress on this task and should sync up to get it over the line. + +```yaml issue-meta +assign: + - @Fredi Raspall +``` + +[Lua scripting]: https://docs.frrouting.org/en/latest/scripting.html + +{{#include ../../links.md}} + diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/dataplane-control-plane-protocol.md b/design-docs/src/mdbook/src/dataplane/tasks2/dataplane-control-plane-protocol.md new file mode 100644 index 00000000..1a3420d2 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/dataplane-control-plane-protocol.md @@ -0,0 +1,15 @@ +# Dataplane / Control Plane communication protocol + +We need some method of sending and receiving data between the [dataplane] and [control plane]. + +This may take the form of [serde] driven message serialization and deserialization. +Use of [serde] almost certainly requires the use of [bindgen] or [cbindgen]. + +Alternatives include schema-first method such as [protobuf] or [capnproto], or a bespoke binary protocol. + +## Likely assignment + +* [@Fredi-raspall] +* coordinate with: [@daniel-noland] + +{{#include ../../links.md}} diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/dataplane-control-plane-reconcile.md b/design-docs/src/mdbook/src/dataplane/tasks2/dataplane-control-plane-reconcile.md new file mode 100644 index 00000000..eaa36057 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/dataplane-control-plane-reconcile.md @@ -0,0 +1,24 @@ +# Dataplane / Control Plane reconcile + +The dataplane and control plane need to communicate with each other regarding + +1. Full routing tables (for [state sync]) +2. route updates (i.e. differential updates) +3. route offloading status (including failures) +4. Address assignments, to ensure the dataplane can configure [local delivery](./identify-local-traffic.md) + +Keep in mind that route tables are, in general, notably more complex than a naive LPM trie, and may include like: + +1. [ECMP]/WCMP +2. [encapsulation rules](https://www.man7.org/linux/man-pages/man8/ip-route.8.html), +3. [nexthop groups](https://man7.org/linux/man-pages/man8/ip-nexthop.8.html), +4. multicast routes (this is unlikely to be important in the near term). + +We only expect to support basic IPv4 and IPv6 LPM routes in the near term, but feature evolution should be accounted for in the design. + +## Likely dispatch + +* [@Fredi-raspall] +* coordinate with: [@daniel-noland] + +{{#include ../../links.md}} diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/dataplane-control-plane-transport.md b/design-docs/src/mdbook/src/dataplane/tasks2/dataplane-control-plane-transport.md new file mode 100644 index 00000000..9fcd97d1 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/dataplane-control-plane-transport.md @@ -0,0 +1,10 @@ +# Dataplane / Control Plane communication transport + +It seems like we have all agreed on [unix domain sockets]. + +## Likely assignment + +* [@Fredi-raspall] +* coordinate with: [@daniel-noland] + +{{#include ../../links.md}} diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/dataplane-worker-lifecycle.md b/design-docs/src/mdbook/src/dataplane/tasks2/dataplane-worker-lifecycle.md new file mode 100644 index 00000000..75474d35 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/dataplane-worker-lifecycle.md @@ -0,0 +1,25 @@ +# Dataplane worker lifecycle + +This is mostly a design task at this point. + +Things which need to be worked out and documented: + +1. communication pattern between workers +2. communication pattern between workers and the control plane +3. communication pattern between workers and the management plane +4. communication pattern between workers and the telemetry / monitoring subsystems + +In each case, we need to consider + +1. performance impact, +2. thread safety, +3. design simplicity, +4. transactionality, +5. extensibility. + +## Likely dispatch + +- primary: [@daniel-noland] +- sync with: [@sergeymatov] + +{{#include ../../links.md}} diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/fault-tolerance-implementation.md b/design-docs/src/mdbook/src/dataplane/tasks2/fault-tolerance-implementation.md new file mode 100644 index 00000000..f99fcb3a --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/fault-tolerance-implementation.md @@ -0,0 +1,4 @@ +# Fault tolerance (implementation) + +This is principally challenging from a testing perspective. +Beyond that, it mostly comes down to endless retry. diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/fault-tolerance-validation.md b/design-docs/src/mdbook/src/dataplane/tasks2/fault-tolerance-validation.md new file mode 100644 index 00000000..a560210c --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/fault-tolerance-validation.md @@ -0,0 +1,10 @@ +# Fault tolerance (validation) + +> [!WARNING] +> Fault tolerance is an extremely challenging thing to test and to prove! + +Some tools can help us here: + +1. [bolero](https://github.com/camshaft/bolero) +2. [smoltcp](https://github.com/smoltcp-rs/smoltcp) +3. [Stateright](https://github.com/stateright/stateright) diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/frr-plugin.md b/design-docs/src/mdbook/src/dataplane/tasks2/frr-plugin.md new file mode 100644 index 00000000..3fa9e3ed --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/frr-plugin.md @@ -0,0 +1,5 @@ +# FRR Plugin (basic) + +Just a feature marker for MVP FRR [plugin][hedgehog plugin]. + +{{#include ../../links.md}} diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/gateway-test-env.md b/design-docs/src/mdbook/src/dataplane/tasks2/gateway-test-env.md new file mode 100644 index 00000000..52d0782c --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/gateway-test-env.md @@ -0,0 +1,14 @@ +# Gateway test-env + +We need an environment in which we can run integration tests between the [control plane], [dataplane], and [management plane]. + +This task will require coordination between the [dataplane]'s extant test environment and the [control plane]'s test environment. +Note that there is _**no requirement**_ that the integration tests exist in a single. +In fact, it is likely best that they do not. + +## Likely assignment + +* Primary: [@Fredi-raspall] +* Coordinate with: [@daniel-noland] + +{{#include ../../links.md}} diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/identify-local-traffic.md b/design-docs/src/mdbook/src/dataplane/tasks2/identify-local-traffic.md new file mode 100644 index 00000000..b35f8dee --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/identify-local-traffic.md @@ -0,0 +1,33 @@ +# Identify local traffic + +At its most basic level, the Hedgehog dataplane is a router. +While most traffic processed by the router will be directed _through_ the router, some traffic will be directed _to_ the router itself. + +The primary classes of this traffic are: + +1. [Control plane] traffic + - e.g. BGP session traffic + - (future) [IPsec] [IKE] traffic +2. [Management plane] traffic + - traffic directed to the data plane from a management plane running on another machine. + - traffic directed to the management plane from the end user (e.g., API calls). +3. Low-level network management protocol traffic + - [ARP] requests and responses + - [IPv6 ND] requests and responses + - (possibly) [LACP] pdu frames (depending on client configuration) + - [BFD] pdu frames +4. [state sync] traffic + - traffic to maintain state synchronization between dataplane nodes + +These types of traffic will need to be accounted for in the offload rules of the data plane to avoid: + +1. forwarding such traffic +2. dropping such traffic + +## Likely dispatch + +- develop: [@daniel-noland] +- coordinate with [@Fredi-raspall] to ensure that needed control plane traffic makes it through. +- coordinate with [@sergeymatov] to ensure that needed dataplane control traffic makes it through. + +{{#include ../../links.md}} diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/management-plane-control-plane-interaction.md b/design-docs/src/mdbook/src/dataplane/tasks2/management-plane-control-plane-interaction.md new file mode 100644 index 00000000..237f7948 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/management-plane-control-plane-interaction.md @@ -0,0 +1,7 @@ +# Management plane - control plane interaction + +At first I thought this was mostly dependent on the [config db schema](./config-db-schema.md) but now I think that maybe this should all be routed through the dataplane. + +> [!CAUTION] +> This is a potential source of misalignment in the project overall. +> We need to sync on this one. diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/management-plane-dataplane-interaction.md b/design-docs/src/mdbook/src/dataplane/tasks2/management-plane-dataplane-interaction.md new file mode 100644 index 00000000..a8e5c199 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/management-plane-dataplane-interaction.md @@ -0,0 +1,9 @@ +# Management Plane - Dataplane Interaction + +We need to settle on how the management plane and dataplane interact. + +1. We need a transport protocol (eg., tcp session, http session with [SSE](https://en.wikipedia.org/wiki/Server-sent_events), [WebSocket](https://en.wikipedia.org/wiki/WebSocket)). +2. We need a protocol (schema for the data we transport) +3. We need an overall strategy (kill-and-fill or differential updates) + + diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/one-control-plane-daemon-per-container.md b/design-docs/src/mdbook/src/dataplane/tasks2/one-control-plane-daemon-per-container.md new file mode 100644 index 00000000..aed69cc3 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/one-control-plane-daemon-per-container.md @@ -0,0 +1,4 @@ +# One control plane daemon per container + +> [!NOTE] +> I think we can punt on this one! diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/performance-measurement.md b/design-docs/src/mdbook/src/dataplane/tasks2/performance-measurement.md new file mode 100644 index 00000000..4417a429 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/performance-measurement.md @@ -0,0 +1,5 @@ +# Performance measurement + +I hate to say it, but this is going to be one of the last things we manage to get to. + +It will be challenging from a marketing perspective, but there is very little we can do about that. diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/programmatic-control-of-frr.md b/design-docs/src/mdbook/src/dataplane/tasks2/programmatic-control-of-frr.md new file mode 100644 index 00000000..5d3e3317 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/programmatic-control-of-frr.md @@ -0,0 +1,8 @@ +# Programmatic Control of FRR + +> [!NOTE] +> I am going to recommend we cheat in the short term and just use the reload method for the near term. + +> [!WARNING] +> **I DON'T WANT TO DO THAT LONG TERM!!!** + diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/public-internet-access.md b/design-docs/src/mdbook/src/dataplane/tasks2/public-internet-access.md new file mode 100644 index 00000000..2f396e08 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/public-internet-access.md @@ -0,0 +1,4 @@ +# Public internet access + +This is mostly distinct from the NAT tickets and [vpc routing](./vpc-routing.md) in the sense that we need to make extra sure our policy engine prohibits incorrect communication patterns. +Otherwise, the internet is just another VPC to us. diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/rate-limiting-investigation.md b/design-docs/src/mdbook/src/dataplane/tasks2/rate-limiting-investigation.md new file mode 100644 index 00000000..adb4d38e --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/rate-limiting-investigation.md @@ -0,0 +1 @@ +# Rate limiting investigation diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/route-manager.md b/design-docs/src/mdbook/src/dataplane/tasks2/route-manager.md new file mode 100644 index 00000000..8071c935 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/route-manager.md @@ -0,0 +1,8 @@ +# Route manager + +This is basically a big TODO. + +For the moment, I would like to get some more precise feature definition from [@sergeymatov]. + +It is also important to align this task with the [dataplane worker lifecycle]. + diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/state-sync-design.md b/design-docs/src/mdbook/src/dataplane/tasks2/state-sync-design.md new file mode 100644 index 00000000..9b6591f0 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/state-sync-design.md @@ -0,0 +1,7 @@ +# State sync (design) + +My major objection to this, as an issue, is that we are inherently eventually consistent (if consistent at all) in the two actor model. +It seems like we are setting ourselves up for the famous [Byzantine General's Problem](https://en.wikipedia.org/wiki/Byzantine_fault). + +> [!WARNING] +> Here be dragons! diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/state-sync-implementation.md b/design-docs/src/mdbook/src/dataplane/tasks2/state-sync-implementation.md new file mode 100644 index 00000000..a8a4f142 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/state-sync-implementation.md @@ -0,0 +1 @@ +# State sync diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/state-sync.md b/design-docs/src/mdbook/src/dataplane/tasks2/state-sync.md new file mode 100644 index 00000000..fa943d02 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/state-sync.md @@ -0,0 +1,3 @@ +# State sync + +I hesitate to make any comments on design until [NAT64 investigation](./NAT64-investigation.md) and [the design ticket](./state-sync-design.md) are further along. diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/telemetry-basic.md b/design-docs/src/mdbook/src/dataplane/tasks2/telemetry-basic.md new file mode 100644 index 00000000..e83741ea --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/telemetry-basic.md @@ -0,0 +1,17 @@ +# Telemetry (basic) + +We need this implemented and hooked up. + +Skills required: + +1. [tracing] +2. [Kubernetes] +3. [graphana] +4. [loki] +5. [prometheus]? + +## Likely dispatch + +Anybody can take this one. + +{{#include ../../links.md}} diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/telemetry-integration.md b/design-docs/src/mdbook/src/dataplane/tasks2/telemetry-integration.md new file mode 100644 index 00000000..0029f440 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/telemetry-integration.md @@ -0,0 +1,12 @@ +# Telemetry (integration) + +We need this hooked up. + +The main tasks here will be: + +1. determine customer telemetry / observability requirements +2. integrate [tracing] with the customer's desired observability stack. +3. integrate [tracing] with our [management plane]. + + +{{#include ../../links.md}} diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/telemetry-investigation.md b/design-docs/src/mdbook/src/dataplane/tasks2/telemetry-investigation.md new file mode 100644 index 00000000..c54fcf62 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/telemetry-investigation.md @@ -0,0 +1,16 @@ +# Telemetry (investigation) + +Trace all the things! + +I could write a whole thing about the [tracing] crate, but I don't need to. +Go read these (excellent) docs: + +1. [tracing crate][tracing] +2. [subscribers](https://docs.rs/tracing/latest/tracing/#related-crates) + + +## Dispatch + +[@daniel-noland] + +{{#include ../../links.md}} diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/underlay-routing.md b/design-docs/src/mdbook/src/dataplane/tasks2/underlay-routing.md new file mode 100644 index 00000000..8f50c7ce --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/underlay-routing.md @@ -0,0 +1,12 @@ +# Underlay routing + +Basic IPv4 / IPv6 routing + +Requirements: + +1. **REQUIRE**: span both NIC ports +2. **REQUIRE**: full hardware offloading +3. **REQUIRE**: basic fault tolerance +4. **REQUIRE**: [ARP] / [IPv6 ND] managed by [kernel] + +{{#include ../../links.md}} diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/vpc-rate-limiting.md b/design-docs/src/mdbook/src/dataplane/tasks2/vpc-rate-limiting.md new file mode 100644 index 00000000..e846fc55 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/vpc-rate-limiting.md @@ -0,0 +1,6 @@ +# VPC rate-limiting + +Just rate limiting! + +Explicitly not full QoS for the moment. +If we involve QoS in the MVP then we will have zero chance on this timeline. diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/vpc-routing.md b/design-docs/src/mdbook/src/dataplane/tasks2/vpc-routing.md new file mode 100644 index 00000000..c701a0cd --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/vpc-routing.md @@ -0,0 +1,6 @@ +# VPC routing + +VPC routing is the process of routing within and between VPCs. + +Accomplishing this is a major milestone for the project. + diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/vxlan-tunnels b/design-docs/src/mdbook/src/dataplane/tasks2/vxlan-tunnels new file mode 100644 index 00000000..f3eff1fc --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/vxlan-tunnels @@ -0,0 +1 @@ +# VXLAN encap/decap diff --git a/design-docs/src/mdbook/src/dataplane/tasks2/vxlan-tunnels.md b/design-docs/src/mdbook/src/dataplane/tasks2/vxlan-tunnels.md new file mode 100644 index 00000000..fab89538 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks2/vxlan-tunnels.md @@ -0,0 +1,8 @@ +# VXLAN Decap + +We need systems in place to + +1. determine the [VXLAN] tunnels we should terminate / originate. +2. install and remove the [rte flow] rules needed to terminate / originate those tunnels. + +{{#include ../../links.md}} diff --git a/design-docs/src/mdbook/src/js/main.js b/design-docs/src/mdbook/src/js/main.js index 903d73ee..69351386 100644 --- a/design-docs/src/mdbook/src/js/main.js +++ b/design-docs/src/mdbook/src/js/main.js @@ -1,17 +1,3 @@ -const formatBibliography = () => { - const referenceNumbers = document.querySelectorAll('#refs > [id^="ref-"] > p > span.csl-left-margin'); - referenceNumbers.forEach((referenceNumberSpan) => { - const referenceDiv = referenceNumberSpan.parentElement.parentElement; - const referenceNumberText = referenceNumberSpan.innerHTML.trim() - const referenceAnchor = document.createElement('a'); - referenceAnchor.setAttribute('href', `#${referenceDiv.getAttribute('id')}`); - referenceAnchor.classList.add('reference-anchor'); - referenceAnchor.innerText = referenceNumberText; - referenceNumberSpan.childNodes.forEach(node => node.remove()); - referenceNumberSpan.append(referenceAnchor); - }); -}; - const formatBlockcite = () => { const spannedCitations = document.querySelectorAll('cite[data-scope]') console.log("spannedCitations", spannedCitations); @@ -212,44 +198,6 @@ const formatGloassary = () => { }; -const formatFigures = () => { - - const figures = document.querySelectorAll('figure'); - figures.forEach((figure, idx) => { - const figureTitleElement = figure.querySelector(':scope > h6:first-child, :scope > h5:first-child, :scope > h4:first-child, :scope > h3:first-child, :scope > h2:first-child, :scope > h1:first-child') - if (!figureTitleElement) { - figure.setAttribute('title', `Figure ${idx + 1}`); - figure.setAttribute('aria-description', `Figure ${idx + 1}`); - figureTitle = document.createElement('h6'); - figureTitle.innerText = `Figure ${idx + 1}`; - figure.prepend(figureTitle); - return; - } - const figureTitleAnchor = figureTitleElement.querySelector('a.header'); - if (!figureTitleAnchor) { - console.warn(`Figure ${idx + 1} has no title anchor`); - return; - } - const title = figureTitleAnchor.innerText; - figure.setAttribute('aria-description', `Figure ${idx + 1}: ${title}`); - figure.setAttribute('title', `Figure ${idx + 1}: ${title}`); - - const caption = figure.querySelector(':scope > figcaption'); - if (!caption) { - return; - } - caption.setAttribute("id", `figure/caption/${figureTitleElement.getAttribute("id")}`); - figure.setAttribute("id", `figure/${figureTitleElement.getAttribute("id")}`); - figure.setAttribute("aria-describedby", caption.getAttribute("id")); - figure.setAttribute("data-figure-number", idx + 1); - const figureLabel = document.createElement('span'); - figureLabel.classList.add('figure-label'); - figureLabel.innerText = `Figure ${idx + 1}: `; - figureTitleAnchor.prepend(figureLabel); - }); - -}; - figureBlockquoteToCaption = () => { const figures = document.querySelectorAll('figure'); figures.forEach((figure, i) => { @@ -301,38 +249,20 @@ const embedPlantuml = async () => { } // svg.setAttribute('style', 'width: 100%; height: fit-content; max-width: 30vw;') // svg.removeAttribute('style'); - // svg.removeAttribute('width'); - // svg.removeAttribute('height'); + const width = svg.getAttribute('width'); + const height = svg.getAttribute('height'); + if (width) { + svg.removeAttribute('width'); + svg.style.maxWidth = width; + } + if (height) { + svg.removeAttribute('height'); + svg.style.maxHeight = height; + } svgImg.replaceWith(svgWrapper); } } -const autoEquation = () => { - const equationsInParagraphs = document.querySelectorAll('p > .katex-display:only-child'); - equationsInParagraphs.forEach(equation => { - const equationDiv = document.createElement('div'); - equationDiv.classList.add('equation'); - equation.parentElement.prepend(equationDiv); - equationDiv.append(equation); - }); -} - -const fixIeeeBib = () => { - const bibEntries = document.querySelectorAll('div.csl-entry > p'); - bibEntries.forEach(bibEntry => { - const parent = bibEntry.parentElement; - parent.innerHTML = bibEntry.innerHTML; - }); -}; - -const barOverRefs = () => { - const referencesSection = document.querySelector('section.references'); - if (!referencesSection) { - return; - } - referencesSection.parentElement.insertBefore(document.createElement('hr'), referencesSection); -} - const fixFootnotes = () => { const footnotesSection = document.createElement('section'); footnotesSection.setAttribute('class', 'footnotes'); @@ -442,155 +372,6 @@ const drawMarkers = () => { }); } -const showBubble = (targetBubbleId) => { - const dataOver = document.querySelector(`[data-over-note="${targetBubbleId}"]`); - const targetBubble = document.getElementById(targetBubbleId); - targetBubble.classList.add("visible"); - const dataOverRect = dataOver.getBoundingClientRect(); - const targetBubbleRect = targetBubble.getBoundingClientRect(); - console.log("dataOverRect", dataOverRect); - console.log("target bubble rect", targetBubbleRect); - const position = { - x: -10, - y: -targetBubbleRect.height - 20, //dataOverRect.top - targetBubbleRect.height * 2 - 10, - } - targetBubble.setAttribute("style", `top: ${position.y}px; left: ${position.x}px;`) -}; - -const hideBubble = (targetBubbleId) => { - const targetBubble = document.getElementById(targetBubbleId); - if (!targetBubbleId) { - console.warn("no bubble with target id", targetBubbleId); - return; - } - targetBubble.classList.remove("visible"); -} - -const bubbles = () => { - const dataOvers = document.querySelectorAll(`[data-over]`); - dataOvers.forEach(dataOver => { - const targetBubbleId = dataOver.getAttribute("data-over"); - if (!targetBubbleId) { - console.warn("data-over attribute missing value for ", dataOver); - return; - } - const targetBubble = document.getElementById(targetBubbleId); - if (!targetBubble) { - console.warn("no target bubble for ", dataOver, targetBubbleId); - return; - } - const noteIcon = document.createElement('i'); - noteIcon.classList.add('fas', 'fa-comment-alt'); - noteIcon.setAttribute('data-over-note', targetBubbleId); - noteIcon.append(targetBubble); - dataOver.append(noteIcon); - console.log("target bubble: ", targetBubble); - // noteIcon.setAttribute("onmouseover", `showBubble("${targetBubbleId}")`); - // noteIcon.setAttribute("onmouseout", `hideBubble("${targetBubbleId}")`); - const targetBubbleRect = targetBubble.getBoundingClientRect(); - const position = { - x: -10, - y: -targetBubbleRect.height - 20, //dataOverRect.top - targetBubbleRect.height * 2 - 10, - } - targetBubble.setAttribute("style", `top: ${-targetBubble.clientHeight - 20}px; left: ${position.x}px;`) - }); - document.styleSheets[0].insertRule(`.bubble { display: none; }`) -}; - -// const showNote = (targetNoteId) => { -// const main = document.querySelector('#content.content main'); -// if (!main) { -// console.warn("no main"); -// return; -// } -// const dataNote = document.querySelector(`[data-note="${targetNoteId}"]`); -// const targetNote = document.getElementById(targetNoteId); -// targetNote.classList.add("visible"); -// const dataNoteRect = dataNote.getBoundingClientRect(); -// const targetNoteRect = targetNote.getBoundingClientRect(); -// console.log("dataNoteRect", dataNoteRect); -// console.log("target note rect", targetNoteRect); -// // const mainRect = main.getBoundingClientRect(); -// main.append(targetNote); -// const fixedSpan = document.querySelector(`[data-note-fixed-span="${targetNoteId}"]`); -// const fixedSpanBox = fixedSpan.getBoundingClientRect(); -// console.log("fixedSpanBox", fixedSpanBox);// const mainX = mainRect.width + 10; -// // const position = { -// // x: mainX, -// // // y: dataNoteRect.top - targetNoteRect.height, -// // y: 0, -// // } -// // -// // -// targetNote.setAttribute("style", `top: ${fixedSpanBox.y}px;`) -// }; -// -// const hideNote = (targetNoteId) => { -// const targetBubble = document.getElementById(targetNoteId); -// if (!targetNoteId) { -// console.warn("no note with target id", targetNoteId); -// return; -// } -// targetBubble.classList.remove("visible"); -// }; -// -// const drawNoteNear = (targetNoteId) => { -// const main = document.querySelector('#content.content main'); -// if (!main) { -// console.warn("no main"); -// return; -// } -// const dataNote = document.querySelector(`[data-note="${targetNoteId}"]`); -// const targetNote = document.getElementById(targetNoteId); -// dataNote.append(targetNote); -// const dataNoteRect = dataNote.getBoundingClientRect(); -// const targetNoteRect = targetNote.getBoundingClientRect(); -// dataNote.getClientRects() -// console.log("dataNoteRect", dataNoteRect);console.log("target note rect", targetNoteRect); -// const position = { -// // x: targetNoteRect.width + 10, -// x: 0, -// // width: 0, -// // y: dataNoteRect.top - targetNoteRect.height, -// y: 0, -// } -// targetNote.setAttribute("style", `position: absolute; top: ${position.y}px; left: ${position.x}px;`) -// let offsetParent = targetNote.offsetParent; -// while (offsetParent != null) { -// console.log("offsetParent", offsetParent); -// if (offsetParent === document.body) { -// console.log("offsetParent is body"); -// } -// console.log("offsetParentClientRect", offsetParent.getBoundingClientRect()); -// offsetParent = offsetParent.offsetParent; -// } -// console.log("computed style map: ", targetNote.computedStyleMap()); -// }; -// -// const marginNotes = () => { -// const fixedSpan = document.createElement('span'); -// const dataNotes = document.querySelectorAll(`[data-note]`); -// dataNotes.forEach(dataNote => { -// const targetNoteId = dataNote.getAttribute("data-note"); -// if (!targetNoteId) { -// console.warn("data-over attribute missing value for ", dataNote); -// return; -// } -// const targetNote = document.getElementById(targetNoteId); -// if (!targetNote) { -// console.warn("no target note for ", dataNote, targetNoteId); -// return; -// } -// fixedSpan.classList.add('fixed-tag'); -// fixedSpan.setAttribute('data-note-fixed-span', targetNoteId); -// dataNote.append(fixedSpan); -// console.log("target note: ", targetNote); -// dataNote.setAttribute("onmouseover", `drawNoteNear("${targetNoteId}")`); -// dataNote.setAttribute("onmouseout", `showNote("${targetNoteId}")`); -// }); -// }; - - const fillInEquationReferences = () => { const equationRefs = document.querySelectorAll('a[href^="#eq/"]'); equationRefs.forEach(equationRef => { @@ -681,18 +462,34 @@ const citeParagraphs = () => { }); }; +const absToRelativeLinks = () => { + const links = document.querySelectorAll('a[href^="/"]'); + links.forEach(link => { + console.log("path to root", pathToRoot); + console.log("link: ", link); + const oldHref = link.getAttribute("href"); + const oldUrl = new URL(oldHref, window.location.href); + if (oldUrl.search !== "") { + console.warn("found unexpected search query on absolute URL, not rewriting") + return; + } + const newUrl = new URL(pathToRoot + oldHref.slice(1), window.location.href); + if (window.location.pathname === newUrl.pathname) { + link.setAttribute("href", newUrl.hash); + } else { + link.setAttribute("href", pathToRoot + newUrl.pathname + newUrl.hash); + } + }); +}; + const format = async () => { await embedPlantuml(); - formatBibliography(); formatBlockcite(); referencesSection(); formatBlockQuotes(); formatEquations() formatGloassary(); - // formatFigures(); citationReferences(); - fixIeeeBib(); - barOverRefs(); fixFootnotes(); drawMarkers(); fillInEquationReferences(); @@ -701,9 +498,8 @@ const format = async () => { footnoteBacklinks(); embedYoutubeVideos(); fixPlantumlSvgBackground(); - bubbles(); citeParagraphs(); - // marginNotes(); + // absToRelativeLinks(); } const main = async () => { diff --git a/design-docs/src/mdbook/src/links.md b/design-docs/src/mdbook/src/links.md new file mode 100644 index 00000000..67e17a9b --- /dev/null +++ b/design-docs/src/mdbook/src/links.md @@ -0,0 +1,67 @@ + + +[configuration store]: /dataplane/design-session.md#configuration-store +[control plane]: /dataplane/design-session.md#control-plane +[dataplane model]: /dataplane/design-session.md#dataplane-model +[dataplane worker]: /dataplane/design-session.md#dataplane-workers +[dataplane]: /dataplane/design-session.md#dataplane +[gateway agent]: /dataplane/design-session.md#gateway-agent +[hedgehog plugin]: /dataplane/design-session.md#hedgehog-plugin +[management plane interface]: /dataplane/design-session.md#management-plane-interface +[management plane]: /dataplane/design-session.md#management-plane +[nat manager]: /dataplane/design-session.md#nat-manager +[routing manager]: /dataplane/design-session.md#routing-manager +[state sync]: /dataplane/design-session.md#state-sync + + + +[ARP]: https://en.wikipedia.org/wiki/Address_Resolution_Protocol +[BFD]: https://en.wikipedia.org/wiki/Bidirectional_Forwarding_Detection +[ECMP]: https://en.wikipedia.org/wiki/Equal-cost_multi-path_routing +[IKE]: https://en.wikipedia.org/wiki/Internet_Key_Exchange +[IPsec]: https://en.wikipedia.org/wiki/IPsec +[IPv6 ND]: https://en.wikipedia.org/wiki/Neighbor_Discovery_Protocol +[LACP]: https://en.wikipedia.org/wiki/Link_aggregation#Link_Aggregation_Control_Protocol +[MySQL]: https://www.mysql.com/ +[NAT]: https://en.wikipedia.org/wiki/Network_address_translation +[TiDB]: https://www.pingcap.com/ +[TiKV]: https://tikv.org/ +[VXLAN]: https://en.wikipedia.org/wiki/Virtual_Extensible_LAN +[`bfdd`]: https://docs.frrouting.org/en/latest/bfd.html +[`bgpd`]: https://docs.frrouting.org/en/latest/bgp.html +[`etcd`]: https://github.com/coreos/etcd +[`rqlite`]: https://rqlite.io/ +[`zebra`]: https://docs.frrouting.org/en/latest/zebra.html +[bfdd]: https://docs.frrouting.org/en/latest/bfd.html +[bgpd]: https://docs.frrouting.org/en/latest/bgp.html +[bincode]: https://github.com/bincode-org/bincode?tab=readme-ov-file +[bindgen]: https://github.com/rust-lang/rust-bindgen +[bitcode]: https://crates.io/crates/bitcode/ +[bridge]: https://man7.org/linux/man-pages/man8/bridge.8.html +[capnproto]: https://capnproto.org/ +[cbindgen]: https://github.com/mozilla/cbindgen +[distributed SQL]: https://en.wikipedia.org/wiki/Distributed_SQL +[dpdk]: https://www.dpdk.org/ +[frr]: https://frrouting.org/ +[graphana]: https://grafana.com/ +[kernel]: https://en.wikipedia.org/wiki/Linux_kernel +[kubernetes]: https://kubernetes.io/ +[loki]: https://grafana.com/docs/loki/latest/get-started/overview/ +[netlink]: https://en.wikipedia.org/wiki/Netlink +[network address translation]: https://en.wikipedia.org/wiki/Network_address_translation +[prometheus]: https://prometheus.io/ +[protobuf]: https://protobuf.dev/ +[rte lcores]: https://doc.dpdk.org/api/rte__lcore_8h.html +[serde]: https://serde.rs/ +[tracing]: https://docs.rs/tracing/latest/tracing/ +[unix domain socket]: https://en.wikipedia.org/wiki/Unix_domain_socket +[zebra]: https://docs.frrouting.org/en/latest/zebra.html + + + +[@Fredi-raspall]: https://github.com/Fredi-raspall +[@cesargithedgehog]: https://github.com/cesargithedgehog +[@daniel-noland]: https://github.com/daniel-noland +[@qmonnet]: https://github.com/qmonnet +[@sergeymatov]: https://github.com/sergeymatov +[@thedvorkin]: https://github.com/thedvorkin diff --git a/design-docs/src/mdbook/theme/head.hbs b/design-docs/src/mdbook/theme/head.hbs index 71232803..7cec400a 100644 --- a/design-docs/src/mdbook/theme/head.hbs +++ b/design-docs/src/mdbook/theme/head.hbs @@ -1,3 +1,14 @@ + + + diff --git a/dpdk/src/dev.rs b/dpdk/src/dev.rs index c7d16939..269af95e 100644 --- a/dpdk/src/dev.rs +++ b/dpdk/src/dev.rs @@ -387,31 +387,55 @@ impl From for TxOffload { use wrte_eth_tx_offload::*; TxOffload( if value.geneve_tnl_tso { - GENEVE_TNL_TSO + TX_OFFLOAD_GENEVE_TNL_TSO } else { 0 - } | if value.gre_tnl_tso { GRE_TNL_TSO } else { 0 } - | if value.ipip_tnl_tso { IPIP_TNL_TSO } else { 0 } - | if value.ipv4_cksum { IPV4_CKSUM } else { 0 } - | if value.macsec_insert { - MACSEC_INSERT + } | if value.gre_tnl_tso { + TX_OFFLOAD_GRE_TNL_TSO + } else { + 0 + } | if value.ipip_tnl_tso { + TX_OFFLOAD_IPIP_TNL_TSO + } else { + 0 + } | if value.ipv4_cksum { + TX_OFFLOAD_IPV4_CKSUM + } else { + 0 + } | if value.macsec_insert { + TX_OFFLOAD_MACSEC_INSERT + } else { + 0 + } | if value.outer_ipv4_cksum { + TX_OFFLOAD_OUTER_IPV4_CKSUM + } else { + 0 + } | if value.qinq_insert { + TX_OFFLOAD_QINQ_INSERT + } else { + 0 + } | if value.sctp_cksum { + TX_OFFLOAD_SCTP_CKSUM + } else { + 0 + } | if value.tcp_cksum { + TX_OFFLOAD_TCP_CKSUM + } else { + 0 + } | if value.tcp_tso { TX_OFFLOAD_TCP_TSO } else { 0 } + | if value.udp_cksum { + TX_OFFLOAD_UDP_CKSUM } else { 0 } - | if value.outer_ipv4_cksum { - OUTER_IPV4_CKSUM + | if value.udp_tso { TX_OFFLOAD_UDP_TSO } else { 0 } + | if value.vlan_insert { + TX_OFFLOAD_VLAN_INSERT } else { 0 } - | if value.qinq_insert { QINQ_INSERT } else { 0 } - | if value.sctp_cksum { SCTP_CKSUM } else { 0 } - | if value.tcp_cksum { TCP_CKSUM } else { 0 } - | if value.tcp_tso { TCP_TSO } else { 0 } - | if value.udp_cksum { UDP_CKSUM } else { 0 } - | if value.udp_tso { UDP_TSO } else { 0 } - | if value.vlan_insert { VLAN_INSERT } else { 0 } | if value.vxlan_tnl_tso { - VXLAN_TNL_TSO + TX_OFFLOAD_VXLAN_TNL_TSO } else { 0 } @@ -424,20 +448,20 @@ impl From for TxOffloadConfig { fn from(value: TxOffload) -> Self { use wrte_eth_tx_offload::*; TxOffloadConfig { - geneve_tnl_tso: value.0 & GENEVE_TNL_TSO != 0, - gre_tnl_tso: value.0 & GRE_TNL_TSO != 0, - ipip_tnl_tso: value.0 & IPIP_TNL_TSO != 0, - ipv4_cksum: value.0 & IPV4_CKSUM != 0, - macsec_insert: value.0 & MACSEC_INSERT != 0, - outer_ipv4_cksum: value.0 & OUTER_IPV4_CKSUM != 0, - qinq_insert: value.0 & QINQ_INSERT != 0, - sctp_cksum: value.0 & SCTP_CKSUM != 0, - tcp_cksum: value.0 & TCP_CKSUM != 0, - tcp_tso: value.0 & TCP_TSO != 0, - udp_cksum: value.0 & UDP_CKSUM != 0, - udp_tso: value.0 & UDP_TSO != 0, - vlan_insert: value.0 & VLAN_INSERT != 0, - vxlan_tnl_tso: value.0 & VXLAN_TNL_TSO != 0, + geneve_tnl_tso: value.0 & TX_OFFLOAD_GENEVE_TNL_TSO != 0, + gre_tnl_tso: value.0 & TX_OFFLOAD_GRE_TNL_TSO != 0, + ipip_tnl_tso: value.0 & TX_OFFLOAD_IPIP_TNL_TSO != 0, + ipv4_cksum: value.0 & TX_OFFLOAD_IPV4_CKSUM != 0, + macsec_insert: value.0 & TX_OFFLOAD_MACSEC_INSERT != 0, + outer_ipv4_cksum: value.0 & TX_OFFLOAD_OUTER_IPV4_CKSUM != 0, + qinq_insert: value.0 & TX_OFFLOAD_QINQ_INSERT != 0, + sctp_cksum: value.0 & TX_OFFLOAD_SCTP_CKSUM != 0, + tcp_cksum: value.0 & TX_OFFLOAD_TCP_CKSUM != 0, + tcp_tso: value.0 & TX_OFFLOAD_TCP_TSO != 0, + udp_cksum: value.0 & TX_OFFLOAD_UDP_CKSUM != 0, + udp_tso: value.0 & TX_OFFLOAD_UDP_TSO != 0, + vlan_insert: value.0 & TX_OFFLOAD_VLAN_INSERT != 0, + vxlan_tnl_tso: value.0 & TX_OFFLOAD_VXLAN_TNL_TSO != 0, unknown: value.0 & !TxOffload::ALL_KNOWN.0, } } @@ -445,52 +469,53 @@ impl From for TxOffloadConfig { impl TxOffload { /// GENEVE tunnel segmentation offload. - pub const GENEVE_TNL_TSO: TxOffload = TxOffload(wrte_eth_tx_offload::GENEVE_TNL_TSO); + pub const GENEVE_TNL_TSO: TxOffload = TxOffload(wrte_eth_tx_offload::TX_OFFLOAD_GENEVE_TNL_TSO); /// GRE tunnel segmentation offload. - pub const GRE_TNL_TSO: TxOffload = TxOffload(wrte_eth_tx_offload::GRE_TNL_TSO); + pub const GRE_TNL_TSO: TxOffload = TxOffload(wrte_eth_tx_offload::TX_OFFLOAD_GRE_TNL_TSO); /// IPIP tunnel segmentation offload. - pub const IPIP_TNL_TSO: TxOffload = TxOffload(wrte_eth_tx_offload::IPIP_TNL_TSO); + pub const IPIP_TNL_TSO: TxOffload = TxOffload(wrte_eth_tx_offload::TX_OFFLOAD_IPIP_TNL_TSO); /// IPv4 checksum calculation. - pub const IPV4_CKSUM: TxOffload = TxOffload(wrte_eth_tx_offload::IPV4_CKSUM); + pub const IPV4_CKSUM: TxOffload = TxOffload(wrte_eth_tx_offload::TX_OFFLOAD_IPV4_CKSUM); /// MACsec insertion. - pub const MACSEC_INSERT: TxOffload = TxOffload(wrte_eth_tx_offload::MACSEC_INSERT); + pub const MACSEC_INSERT: TxOffload = TxOffload(wrte_eth_tx_offload::TX_OFFLOAD_MACSEC_INSERT); /// Outer IPv4 checksum calculation. - pub const OUTER_IPV4_CKSUM: TxOffload = TxOffload(wrte_eth_tx_offload::OUTER_IPV4_CKSUM); + pub const OUTER_IPV4_CKSUM: TxOffload = + TxOffload(wrte_eth_tx_offload::TX_OFFLOAD_OUTER_IPV4_CKSUM); /// QinQ (double VLAN) insertion. - pub const QINQ_INSERT: TxOffload = TxOffload(wrte_eth_tx_offload::QINQ_INSERT); + pub const QINQ_INSERT: TxOffload = TxOffload(wrte_eth_tx_offload::TX_OFFLOAD_QINQ_INSERT); /// SCTP checksum calculation. - pub const SCTP_CKSUM: TxOffload = TxOffload(wrte_eth_tx_offload::SCTP_CKSUM); + pub const SCTP_CKSUM: TxOffload = TxOffload(wrte_eth_tx_offload::TX_OFFLOAD_SCTP_CKSUM); /// TCP checksum calculation. - pub const TCP_CKSUM: TxOffload = TxOffload(wrte_eth_tx_offload::TCP_CKSUM); + pub const TCP_CKSUM: TxOffload = TxOffload(wrte_eth_tx_offload::TX_OFFLOAD_TCP_CKSUM); /// TCP segmentation offload. - pub const TCP_TSO: TxOffload = TxOffload(wrte_eth_tx_offload::TCP_TSO); + pub const TCP_TSO: TxOffload = TxOffload(wrte_eth_tx_offload::TX_OFFLOAD_TCP_TSO); /// UDP checksum calculation. - pub const UDP_CKSUM: TxOffload = TxOffload(wrte_eth_tx_offload::UDP_CKSUM); + pub const UDP_CKSUM: TxOffload = TxOffload(wrte_eth_tx_offload::TX_OFFLOAD_UDP_CKSUM); /// UDP segmentation offload. - pub const UDP_TSO: TxOffload = TxOffload(wrte_eth_tx_offload::UDP_TSO); + pub const UDP_TSO: TxOffload = TxOffload(wrte_eth_tx_offload::TX_OFFLOAD_UDP_TSO); /// VXLAN tunnel segmentation offload. - pub const VXLAN_TNL_TSO: TxOffload = TxOffload(wrte_eth_tx_offload::VXLAN_TNL_TSO); + pub const VXLAN_TNL_TSO: TxOffload = TxOffload(wrte_eth_tx_offload::TX_OFFLOAD_VXLAN_TNL_TSO); /// VLAN tag insertion. - pub const VLAN_INSERT: TxOffload = TxOffload(wrte_eth_tx_offload::VLAN_INSERT); + pub const VLAN_INSERT: TxOffload = TxOffload(wrte_eth_tx_offload::TX_OFFLOAD_VLAN_INSERT); /// Union of all [`TxOffload`]s documented at the time of writing. pub const ALL_KNOWN: TxOffload = { use wrte_eth_tx_offload::*; TxOffload( - GENEVE_TNL_TSO - | GRE_TNL_TSO - | IPIP_TNL_TSO - | IPV4_CKSUM - | MACSEC_INSERT - | OUTER_IPV4_CKSUM - | QINQ_INSERT - | SCTP_CKSUM - | TCP_CKSUM - | TCP_TSO - | UDP_CKSUM - | UDP_TSO - | VLAN_INSERT - | VXLAN_TNL_TSO, + TX_OFFLOAD_GENEVE_TNL_TSO + | TX_OFFLOAD_GRE_TNL_TSO + | TX_OFFLOAD_IPIP_TNL_TSO + | TX_OFFLOAD_IPV4_CKSUM + | TX_OFFLOAD_MACSEC_INSERT + | TX_OFFLOAD_OUTER_IPV4_CKSUM + | TX_OFFLOAD_QINQ_INSERT + | TX_OFFLOAD_SCTP_CKSUM + | TX_OFFLOAD_TCP_CKSUM + | TX_OFFLOAD_TCP_TSO + | TX_OFFLOAD_UDP_CKSUM + | TX_OFFLOAD_UDP_TSO + | TX_OFFLOAD_VLAN_INSERT + | TX_OFFLOAD_VXLAN_TNL_TSO, ) }; } diff --git a/justfile b/justfile index 1383fd06..c2b809b6 100644 --- a/justfile +++ b/justfile @@ -41,7 +41,7 @@ profile := "dev" _container_repo := "ghcr.io/githedgehog/dataplane" rust := "stable" _dpdk_sys_container_repo := "ghcr.io/githedgehog/dpdk-sys" -_dpdk_sys_container_tag := dpdk_sys_commit + "-rust-" + rust +_dpdk_sys_container_tag := dpdk_sys_commit + ".rust-" + rust _dev_env_container := _dpdk_sys_container_repo + "/dev-env:" + _dpdk_sys_container_tag _doc_env_container := _dpdk_sys_container_repo + "/doc-env:" + _dpdk_sys_container_tag _compile_env_container := _dpdk_sys_container_repo + "/compile-env:" + _dpdk_sys_container_tag @@ -480,8 +480,10 @@ report: # run commands in a minimal mdbook container [script] mdbook *args="build": + {{ _just_debuggable_ }} mkdir -p /tmp/doc-env cd ./design-docs/src/mdbook + docker pull {{ _doc_env_container }} docker run \ --rm \ --init \ diff --git a/scratch/src/main.rs b/scratch/src/main.rs index 232aa84e..56101352 100644 --- a/scratch/src/main.rs +++ b/scratch/src/main.rs @@ -220,12 +220,12 @@ fn check_hairpin_cap(port_id: u16) { fn init_port2(port_id: u16, mbuf_pool: &mut rte_mempool) { let mut port_conf = rte_eth_conf { txmode: rte_eth_txmode { - offloads: wrte_eth_tx_offload::VLAN_INSERT - | wrte_eth_tx_offload::IPV4_CKSUM - | wrte_eth_tx_offload::UDP_CKSUM - | wrte_eth_tx_offload::TCP_CKSUM - | wrte_eth_tx_offload::SCTP_CKSUM - | wrte_eth_tx_offload::TCP_TSO, + offloads: wrte_eth_tx_offload::TX_OFFLOAD_VLAN_INSERT + | wrte_eth_tx_offload::TX_OFFLOAD_IPV4_CKSUM + | wrte_eth_tx_offload::TX_OFFLOAD_UDP_CKSUM + | wrte_eth_tx_offload::TX_OFFLOAD_TCP_CKSUM + | wrte_eth_tx_offload::TX_OFFLOAD_SCTP_CKSUM + | wrte_eth_tx_offload::TX_OFFLOAD_TCP_TSO, ..Default::default() }, ..Default::default() diff --git a/scripts/dpdk-sys.env b/scripts/dpdk-sys.env index 3b0195a9..2132ad47 100644 --- a/scripts/dpdk-sys.env +++ b/scripts/dpdk-sys.env @@ -1,2 +1,2 @@ DPDK_SYS_BRANCH="main" -DPDK_SYS_COMMIT="bd2967e3e36851b5591f1bb944cc7f28b3639e1a" +DPDK_SYS_COMMIT="362f54faf27e7b02148fe524492455e2f6762854"