diff --git a/.github/workflows/bump.yml b/.github/workflows/bump.yml index f5c54f69..81b73273 100644 --- a/.github/workflows/bump.yml +++ b/.github/workflows/bump.yml @@ -1,4 +1,4 @@ -# Check for Cargo dependencies updates, and automatically open a Pull Request +# Check for Cargo dependencies updates and automatically open a Pull Request # if updates are found. name: "bump.yml" diff --git a/design-docs/src/mdbook/src/SUMMARY.md b/design-docs/src/mdbook/src/SUMMARY.md index dbe233c5..11a77b83 100644 --- a/design-docs/src/mdbook/src/SUMMARY.md +++ b/design-docs/src/mdbook/src/SUMMARY.md @@ -1,17 +1,14 @@ -# Hedgehog Docs +# Hedgehog Dataplane Design Docs -- [Dataplane project: executive summary](./dataplane/executive-summary.md) - - [Reflections from last time](./dataplane/reflections-from-last-time.md) - - [Hardware selection for dataplane project](./dataplane/hardware.md) -- [Offloading the dataplane](./dataplane/offloading-plan.md) -- [DPDK abstraction map](./dataplane/map-of-dpdk.md) - [Build process](./build/index.md) - [Prerequisites](./build/prerequisites.md) - - [Compile env](./build/compile-env.md) - - [fake-nix](./build/fake-nix.md) - - [Build dataplane](./build/just-cargo-build.md) + - [Getting the compile environment](./build/compile-env.md) + - [`just fake-nix`](./build/fake-nix.md) + - [Building the dataplane](./build/just-cargo-build.md) - [Sterile builds](./build/sterile-build.md) -- [Design](./dataplane/design.md) +- [Design](./dataplane/design/index.md) + - [Offloading the dataplane](./dataplane/design/offloading-plan.md) + - [DPDK abstraction map](./dataplane/design/map-of-dpdk.md) - [Development Plan](./dataplane/development-plan.md) - [Configuration Persistence Investigation](./dataplane/tasks/configuration-persistence-investigation.md) - [Configuration database schema](./dataplane/tasks/config-db-schema.md) diff --git a/design-docs/src/mdbook/src/build/just-cargo-build.md b/design-docs/src/mdbook/src/build/just-cargo-build.md index 2d7d6143..c4cabeda 100644 --- a/design-docs/src/mdbook/src/build/just-cargo-build.md +++ b/design-docs/src/mdbook/src/build/just-cargo-build.md @@ -47,4 +47,4 @@ should take care of it for normal development flows. But there is a second level to this story. -Sterile builds! +[Sterile builds!](./sterile-build.md) diff --git a/design-docs/src/mdbook/src/dataplane/design.md b/design-docs/src/mdbook/src/dataplane/design.md index 1cc5a352..3d14cb7c 100644 --- a/design-docs/src/mdbook/src/dataplane/design.md +++ b/design-docs/src/mdbook/src/dataplane/design.md @@ -1,397 +1 @@ -# Required features for MVP - -At a very high level, these are the _user facing_ features that we require to reach MVP with the gateway: - -1. BGP underlay -2. EVPN overlay -3. VPC routing (aka RIOT) -4. VPC nat 44/66 -5. VPC nat 64 -6. Telemetry -7. Rate limiting -8. AB fault tolerance -9. Management API - -## User-facing features - -
- -```plantuml -@startdot -digraph features { -labelloc=t -graph [ranksep=0.6] - -node[shape="rect"] -BGP_underlay [ label="BGP underlay", style=filled, color="lightblue"] -EVPN_overlay [ label="EVPN overlay", style=filled, color="lightblue"] -VPC_routing [ label="VPC routing", style=filled, color="lightblue"] -VPC_nat44_66 [ label="VPC nat44/66", style=filled, color="lightblue"] -VPC_nat64 [ label="VPC nat64", style=filled, color="lightblue"] -Telemetry [ label="Telemetry/observability", style=filled, color="lightblue"] -rate_limiting [ label="Rate limiting", style=filled, color="lightblue"] -Fault_tolerance [ label="Fault tolerance", style=filled, color="lightblue"] -Management_API [label="Management API", style=filled, color="lightblue"] -all [label="*"] -all -> Management_API -Management_API -> all - -BGP_underlay -> EVPN_overlay; -EVPN_overlay -> VPC_routing; -VPC_routing -> VPC_nat44_66; -VPC_routing -> VPC_nat64; -VPC_routing -> rate_limiting; -EVPN_overlay -> Fault_tolerance; -Fault_tolerance -> VPC_nat64; -Fault_tolerance -> VPC_nat44_66; -VPC_routing -> Telemetry; -VPC_nat44_66 -> Telemetry [xlabel="weak"]; -VPC_nat64 -> Telemetry [xlabel="weak"]; -rate_limiting -> Telemetry [xlabel="weak"]; -} -@enddot -``` - -> A graph of the functional dependencies between the required _user facing_ features. -> Each node on the graph represents a feature. -> No feature can be _completed_ without all of the other features which point to it. - -
- -
- -```plantuml -@startdot -digraph features { - labelloc=t - node [shape="box"] - graph [ranksep=0.8] - label=< Feature map
(major features)
> - - BGP_underlay [ label="BGP underlay", style=filled, color="lightblue" ] - EVPN_overlay [ label="EVPN overlay", style=filled, color="lightblue" ] - VPC_routing [ label="VPC routing\n(aka RIOT)", style=filled, color="lightblue" ] - VPC_nat44_66 [ label="VPC nat44/66", style=filled, color="lightblue" ] - VPC_nat64 [ label="VPC nat64", style=filled, color="lightblue" ] - telemetry [ label="Telemetry/observability", style=filled, color="lightblue" ] - rate_limiting [ label="Rate limiting", style=filled, color="lightblue" ] - fault_tolerance [ label="Fault tolerance", style=filled, color="lightblue" ] - Management_API [ label="Management API", style=filled, color="lightblue" ] - - control_plane_integration [ label="control plane integration"] - state_sync [ label="state sync" ] - hardware_offloaded_nat [ label="offload nat" ] - hardware_offloaded_routing [ label="Underlay route offload" ] - hardware_offloaded_vpc [ label="VPC route offload" ] - hardware_offloading_basic [ label="basic offloading" ] - datastore_integration [ label="datastore integration" ] - - all [label="*"] - Management_API -> all - all -> Management_API - - datastore_integration -> control_plane_integration - datastore_integration -> hardware_offloaded_routing - hardware_offloading_basic -> hardware_offloaded_routing - hardware_offloaded_routing -> BGP_underlay - fault_tolerance -> VPC_nat44_66 - fault_tolerance -> VPC_nat64 - BGP_underlay -> EVPN_overlay - EVPN_overlay -> VPC_routing - EVPN_overlay -> state_sync - EVPN_overlay -> hardware_offloaded_vpc - hardware_offloaded_nat -> VPC_nat44_66 - hardware_offloaded_nat -> VPC_nat64 - VPC_nat44_66 -> telemetry [xlabel="weak"] - VPC_nat64 -> telemetry [xlabel="weak"] - VPC_routing -> telemetry - VPC_routing -> VPC_nat44_66 - VPC_routing -> VPC_nat64 - VPC_routing -> rate_limiting - control_plane_integration -> BGP_underlay - state_sync -> fault_tolerance - hardware_offloaded_vpc -> hardware_offloaded_nat - hardware_offloaded_vpc -> rate_limiting - hardware_offloading_basic -> hardware_offloaded_vpc - rate_limiting -> telemetry [xlabel="weak"] -} -@enddot -``` - -> Here is a _very_ high-level graph of the functional dependencies between the required features. -> Each node on the graph represents a feature. -> No feature can be _completed_ without all the other features which point to it. -> Features shown in blue are user facing. -> All other features represent internal implementation concerns. - -
- -## Component Map - -
- -```puml -@startuml -skinparam hyperlinkUnderline false -skinparam linetype ortho -!unquoted function $link($name, $url) -!return "[[" + $url + " " + $name + "]]" -!endfunction - - - -!$q = { "uote": "\"" } - -!$doc_links = { - "config_store": { "text": "Configuration Store", "url": "#configuration-store" }, - "gateway_agent": { "text": "Gateway Agent", "url": "#gateway-agent" }, - "frr_agent": { "text": "FRR agent", "url": "#frr-agent" }, - "zebra": { "text": "zebra", "url": "https://docs.frrouting.org/en/latest/zebra.html" }, - "routing_daemons": { "text": "routing daemons", "url": "#routing-daemons" }, - "zebra_plugin": { "text": "Zebra\\nplugin", "url": "#zebra-plugin" }, - "kernel": { "text": "kernel", "url": "https://en.wikipedia.org/wiki/Linux_kernel" }, - "interface_manager": { "text": "interface manager", "url": "#interface-manager" }, - "routing_manager": { "text": "routing manager", "url": "#routing-manager" }, - "dataplane_workers": { "text": "dataplane workers", "url": "#dataplane-workers" }, - "nat_manager": { "text": "nat manager", "url": "#nat-manager" }, - "control_plane_interface": { "text": "control plane interface", "url": "#control-plane-interface" }, - "management_plane_interface": { "text": "management plane interface", "url": "#management-plane-interface" }, - "state_sync": { "text": "state sync", "url": "#state-sync" }, - "dataplane_model": { "text": "dataplane model", "url": "#dataplane-model" }, - "management_plane": { "text": "management plane", "url": "#management-plane" }, - "control_plane": { "text": "control plane", "url": "#control-plane" }, - "dataplane": { "text": "dataplane", "url": "#dataplane" } -} - -!unquoted function $linked($key) - !return $link($doc_links[$key].text, $doc_links[$key].url) -!endfunction - -!unquoted function $r($key) - !return "rectangle " + $key + " as " + $q.uote + $linked($key) + $q.uote -!endfunction - -!unquoted function $db($key) - !return "database " + $key + " as " + $q.uote + $linked($key) + $q.uote -!endfunction - -$r(management_plane) { - $r(gateway_agent) - $db(config_store) -} - -$r(kernel) - -$r(control_plane) { - $r(routing_daemons) - $r(zebra) { - $r(zebra_plugin) - } - $r(frr_agent) -} - -$r(dataplane) { - $r(control_plane_interface) - $r(management_plane_interface) - $db(dataplane_model) - $r(routing_manager) - $r(nat_manager) - $r(state_sync) - $r(interface_manager) - $r(dataplane_workers) -} - -rectangle sister_dataplane as "sister dataplane" { - rectangle rest as "..." - rectangle sister_state_sync as "state sync" -} - -rectangle nics - -control_plane_interface -- dataplane_model -dataplane_workers <--> nics : [[ https://www.dpdk.org/ dpdk ]] -frr_agent <--> routing_daemons -frr_agent <--> zebra -gateway_agent -- frr_agent -gateway_agent -- management_plane_interface -config_store -- gateway_agent -interface_manager -- dataplane_model -interface_manager <--> kernel : [[ https://man7.org/linux/man-pages/man7/netlink.7.html netlink socket ]] -dataplane_model - state_sync -dataplane_model <--> nat_manager -dataplane_model <--> routing_manager -management_plane_interface -- dataplane_model -nat_manager <--> dataplane_workers -zebra_plugin --- control_plane_interface : [[ https://en.wikipedia.org/wiki/Unix_domain_socket unix socket ]] -routing_daemons <-> zebra -routing_manager <--> dataplane_workers -state_sync <-> sister_state_sync : [[ https://en.wikipedia.org/wiki/Remote_direct_memory_access rdma]] -zebra <-> kernel : [[ https://man7.org/linux/man-pages/man7/netlink.7.html netlink socket ]] - -@enduml -``` - -> Map of the relationships between planned dataplane components - -
- -
- -### Configuration Store - -I could (and maybe should) write a book about the design considerations of [Configuration Store]. -For the moment I will limit myself to a list of hard and fast requirements: - -1. CP in the [CAP theorem](https://en.wikipedia.org/wiki/CAP_theorem) sense. - - immediate consistency in the sense that - - > Every read receives either the most recent data or an error. - - - partition tolerance - - > The system continues to operate despite an arbitrary number of messages being dropped (or delayed) by the network between nodes. - - The guiding theory is that - - 1. It is better to **not** function than to **mal**function. - 2. _**It doesn't matter how quickly you can do the wrong thing**_. - -
-
- -### Gateway Agent - -This is another subject deserving of a small book. - -For now, I will point out some notable design decisions we need to make: - -1. Do we expect a subscription model? -2. If not, do we expect the [gateway agent] to explicitly push state to dependent components? - -Beyond that, we need to make some high-level design choices: - -1. programming language? Likely Go or Rust. -2. REST? GraphQL? I tend to think REST is more appropriate at this time. - -
-
- -### FRR agent - -Be afraid. Make Fredi fill in this section. But also be afraid. - -
-
- -### Zebra Plugin - -This is a planned [zebra] plugin in the same spirit as [`fpm`](https://docs.frrouting.org/projects/dev-guide/en/latest/fpm.html#id1) or [`dataplane_fpm_nl`](https://docs.frrouting.org/projects/dev-guide/en/latest/fpm.html#dplane-fpm-nl). - -The core idea is to have a plugin that can be dynamically loaded into [zebra] and will listen to the [zebra event stream](https://github.com/FRRouting/frr/blob/ee5a3456d34a756c70ad8856ab7be7bed75ee31c/zebra/zebra_dplane.h#L114-L217) for updates. -The plugin will then take those updates and push them into the dataplane agent, allowing the dataplane to react to route updates. - -
-
- -### Routing daemons - -For the moment these are [`bgpd`](https://docs.frrouting.org/en/latest/bgp.html) and [`bfdd`](https://docs.frrouting.org/en/latest/bfd.html). - -
-
- -### Interface Manager - -This is a component that exchanges [netlink] messages with the [kernel] in response to changes in the [dataplane model]. -Its responsibilities include - -1. construction of virtual network interfaces needed by [zebra] -2. translation of ephemeral linux kernel parameters into ephemeral [dpdk] parameters (e.g. netlink interface index to dpdk interface id). -3. retrieval of information not available to [zebra]/[frr] such as neighbor tables / [ARP] / [IPv6 ND] resolution or [bridge] fdb. - -### Control Plane Interface - -This component is responsible for adjudicating communication between the [control plane] and the [dataplane]. -This component is expected to: - -1. Deserialize [bincode] (or perhaps [bitcode]) messages from the [hedgehog plugin] articulating the control plane's rules for the dataplane. -2. Express error messages back to the [control plane] articulating any error conditions. - For example, if the [dataplane] is unable to offload a route for whatever reason (e.g. rout type not supported) so that said routes are not advertised by the [control plane]. -3. Express the offloading status (including counters) back to the [control plane] (if possible). - -### Management plane interface - -The [management plane interface] is the interface between the [management plane] and the [dataplane]. - -1. Receive [bincode] (or perhaps [bitcode]) messages from the [gateway agent] over a [unix domain socket] (or perhaps a TCP socket?), parse them, and then update the [dataplane model] to reflect the desired configuration. - -
-
- -### Dataplane model - -This is an internal component of the [dataplane] which is responsible for managing the _desired_ state of the dataplane. It is updated by the [management plane interface] and is responsible for expressing the _desired state_ (not the observed state) of the [dataplane] to downstream components such as the [routing manager] or the [nat manager]. - -
-
- -### State sync - -This component is responsible for synchronizing the state of sister dataplanes in the name of fault tolerance. - -
-
- -### Routing manager - -This component is responsible for managing the routing tables for the dataplane. It is responsible for translating the _desired routing rules_ expressed by the [management plane interface] into a set of rules that can be executed by the [dataplane workers][dataplane worker]. - -
-
- -### NAT manager - -This component is responsible for managing the [network address translation] tables for the dataplane. It is responsible for translating the _desired NAT rules_ expressed by the [management plane interface] into a set of rules that can be executed by the [dataplane workers][dataplane worker]. - - -
-
- -### Dataplane workers - -This is a collection of [rte lcores] which are responsible for actually performing the packet processing. -The workers are responsible for performing the following tasks: - -- Receive packets from the NIC -- Identify local traffic -- Perform underlay routing -- Perform overlay routing -- Perform [NAT] -- Transmit packets to the NIC - -
-
- -### Dataplane - -The main packet processing engine. - -### Management Plane - -The management plane is a high-level abstraction that is responsible for - -1. Accepting API calls from the end user. -2. Translating those API calls into dataplane and control plane configuration. -3. Storing that configuration in the [Configuration Store] - -
-
- -### Control Plane - -The control plane is, for the moment, just [bgpd] and [bfdd]. - -
- -{{#include ../links.md}} +# Design diff --git a/design-docs/src/mdbook/src/dataplane/design/index.md b/design-docs/src/mdbook/src/dataplane/design/index.md new file mode 100644 index 00000000..5c368845 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/design/index.md @@ -0,0 +1,659 @@ +# Required features for MVP + +At a very high level, these are the _user facing_ features that we require to reach MVP with the gateway: + +1. BGP underlay +2. EVPN overlay +3. VPC routing (aka RIOT) +4. VPC nat 44/66 +5. VPC nat 64 +6. Telemetry +7. Rate limiting +8. AB fault tolerance +9. Management API + +## User-facing features + +
+ +```plantuml +@startdot +digraph features { +labelloc=t +graph [ranksep=0.6] + +node[shape="rect"] +BGP_underlay [ label="BGP underlay", style=filled, color="lightblue"] +EVPN_overlay [ label="EVPN overlay", style=filled, color="lightblue"] +VPC_routing [ label="VPC routing", style=filled, color="lightblue"] +VPC_nat44_66 [ label="VPC nat44/66", style=filled, color="lightblue"] +VPC_nat64 [ label="VPC nat64", style=filled, color="lightblue"] +Telemetry [ label="Telemetry/observability", style=filled, color="lightblue"] +rate_limiting [ label="Rate limiting", style=filled, color="lightblue"] +Fault_tolerance [ label="Fault tolerance", style=filled, color="lightblue"] +Management_API [label="Management API", style=filled, color="lightblue"] +all [label="*"] +all -> Management_API +Management_API -> all + +BGP_underlay -> EVPN_overlay; +EVPN_overlay -> VPC_routing; +VPC_routing -> VPC_nat44_66; +VPC_routing -> VPC_nat64; +VPC_routing -> rate_limiting; +EVPN_overlay -> Fault_tolerance; +Fault_tolerance -> VPC_nat64; +Fault_tolerance -> VPC_nat44_66; +VPC_routing -> Telemetry; +VPC_nat44_66 -> Telemetry [xlabel="weak"]; +VPC_nat64 -> Telemetry [xlabel="weak"]; +rate_limiting -> Telemetry [xlabel="weak"]; +} +@enddot +``` + +> A graph of the functional dependencies between the required _user facing_ features. +> Each node on the graph represents a feature. +> No feature can be _completed_ without all of the other features which point to it. + +
+ +
+ +```plantuml +@startdot +digraph features { + labelloc=t + node [shape="box"] + graph [ranksep=0.8] + label=< Feature map
(major features)
> + + BGP_underlay [ label="BGP underlay", style=filled, color="lightblue" ] + EVPN_overlay [ label="EVPN overlay", style=filled, color="lightblue" ] + VPC_routing [ label="VPC routing\n(aka RIOT)", style=filled, color="lightblue" ] + VPC_nat44_66 [ label="VPC nat44/66", style=filled, color="lightblue" ] + VPC_nat64 [ label="VPC nat64", style=filled, color="lightblue" ] + telemetry [ label="Telemetry/observability", style=filled, color="lightblue" ] + rate_limiting [ label="Rate limiting", style=filled, color="lightblue" ] + fault_tolerance [ label="Fault tolerance", style=filled, color="lightblue" ] + Management_API [ label="Management API", style=filled, color="lightblue" ] + + control_plane_integration [ label="control plane integration"] + state_sync [ label="state sync" ] + hardware_offloaded_nat [ label="offload nat" ] + hardware_offloaded_routing [ label="Underlay route offload" ] + hardware_offloaded_vpc [ label="VPC route offload" ] + hardware_offloading_basic [ label="basic offloading" ] + datastore_integration [ label="datastore integration" ] + + all [label="*"] + Management_API -> all + all -> Management_API + + datastore_integration -> control_plane_integration + datastore_integration -> hardware_offloaded_routing + hardware_offloading_basic -> hardware_offloaded_routing + hardware_offloaded_routing -> BGP_underlay + fault_tolerance -> VPC_nat44_66 + fault_tolerance -> VPC_nat64 + BGP_underlay -> EVPN_overlay + EVPN_overlay -> VPC_routing + EVPN_overlay -> state_sync + EVPN_overlay -> hardware_offloaded_vpc + hardware_offloaded_nat -> VPC_nat44_66 + hardware_offloaded_nat -> VPC_nat64 + VPC_nat44_66 -> telemetry [xlabel="weak"] + VPC_nat64 -> telemetry [xlabel="weak"] + VPC_routing -> telemetry + VPC_routing -> VPC_nat44_66 + VPC_routing -> VPC_nat64 + VPC_routing -> rate_limiting + control_plane_integration -> BGP_underlay + state_sync -> fault_tolerance + hardware_offloaded_vpc -> hardware_offloaded_nat + hardware_offloaded_vpc -> rate_limiting + hardware_offloading_basic -> hardware_offloaded_vpc + rate_limiting -> telemetry [xlabel="weak"] +} +@enddot +``` + +> Here is a _very_ high-level graph of the functional dependencies between the required features. +> Each node on the graph represents a feature. +> No feature can be _completed_ without all the other features which point to it. +> Features shown in blue are user facing. +> All other features represent internal implementation concerns. + +
+ +## Component Map + +
+ +```puml +@startuml +skinparam hyperlinkUnderline false +skinparam linetype ortho +!unquoted function $link($name, $url) +!return "[[" + $url + " " + $name + "]]" +!endfunction + + + +!$q = { "uote": "\"" } + +!$doc_links = { + "config_store": { "text": "Configuration Store", "url": "#configuration-store" }, + "gateway_agent": { "text": "Gateway Agent", "url": "#gateway-agent" }, + "frr_agent": { "text": "FRR agent", "url": "#frr-agent" }, + "zebra": { "text": "zebra", "url": "https://docs.frrouting.org/en/latest/zebra.html" }, + "routing_daemons": { "text": "routing daemons", "url": "#routing-daemons" }, + "zebra_plugin": { "text": "Zebra\\nplugin", "url": "#zebra-plugin" }, + "kernel": { "text": "kernel", "url": "https://en.wikipedia.org/wiki/Linux_kernel" }, + "interface_manager": { "text": "interface manager", "url": "#interface-manager" }, + "routing_manager": { "text": "routing manager", "url": "#routing-manager" }, + "dataplane_workers": { "text": "dataplane workers", "url": "#dataplane-workers" }, + "nat_manager": { "text": "nat manager", "url": "#nat-manager" }, + "control_plane_interface": { "text": "control plane interface", "url": "#control-plane-interface" }, + "management_plane_interface": { "text": "management plane interface", "url": "#management-plane-interface" }, + "state_sync": { "text": "state sync", "url": "#state-sync" }, + "dataplane_model": { "text": "dataplane model", "url": "#dataplane-model" }, + "management_plane": { "text": "management plane", "url": "#management-plane" }, + "control_plane": { "text": "control plane", "url": "#control-plane" }, + "dataplane": { "text": "dataplane", "url": "#dataplane" } +} + +!unquoted function $linked($key) + !return $link($doc_links[$key].text, $doc_links[$key].url) +!endfunction + +!unquoted function $r($key) + !return "rectangle " + $key + " as " + $q.uote + $linked($key) + $q.uote +!endfunction + +!unquoted function $db($key) + !return "database " + $key + " as " + $q.uote + $linked($key) + $q.uote +!endfunction + +$r(management_plane) { + $r(gateway_agent) + $db(config_store) +} + +$r(kernel) + +$r(control_plane) { + $r(routing_daemons) + $r(zebra) { + $r(zebra_plugin) + } + $r(frr_agent) +} + +$r(dataplane) { + $r(control_plane_interface) + $r(management_plane_interface) + $db(dataplane_model) + $r(routing_manager) + $r(nat_manager) + $r(state_sync) + $r(interface_manager) + $r(dataplane_workers) +} + +rectangle sister_dataplane as "sister dataplane" { + rectangle rest as "..." + rectangle sister_state_sync as "state sync" +} + +rectangle nics + +control_plane_interface -- dataplane_model +dataplane_workers <--> nics : [[ https://www.dpdk.org/ dpdk ]] +frr_agent <--> routing_daemons +frr_agent <--> zebra +gateway_agent -- frr_agent +gateway_agent -- management_plane_interface +config_store -- gateway_agent +interface_manager -- dataplane_model +interface_manager <--> kernel : [[ https://man7.org/linux/man-pages/man7/netlink.7.html netlink socket ]] +dataplane_model - state_sync +dataplane_model <--> nat_manager +dataplane_model <--> routing_manager +management_plane_interface -- dataplane_model +nat_manager <--> dataplane_workers +zebra_plugin --- control_plane_interface : [[ https://en.wikipedia.org/wiki/Unix_domain_socket unix socket ]] +routing_daemons <-> zebra +routing_manager <--> dataplane_workers +state_sync <-> sister_state_sync : [[ https://en.wikipedia.org/wiki/Remote_direct_memory_access rdma]] +zebra <-> kernel : [[ https://man7.org/linux/man-pages/man7/netlink.7.html netlink socket ]] + +@enduml +``` + +> Map of the relationships between planned dataplane components + +
+ +
+ +### Configuration Store + +I could (and maybe should) write a book about the design considerations of [Configuration Store]. +For the moment I will limit myself to a list of hard and fast requirements: + +1. CP in the [CAP theorem](https://en.wikipedia.org/wiki/CAP_theorem) sense. + - immediate consistency in the sense that + + > Every read receives either the most recent data or an error. + + - partition tolerance + + > The system continues to operate despite an arbitrary number of messages being dropped (or delayed) by the network between nodes. + + The guiding theory is that + + 1. It is better to **not** function than to **mal**function. + 2. _**It doesn't matter how quickly you can do the wrong thing**_. + +
+
+ +### Gateway Agent + +This is another subject deserving of a small book. + +For now, I will point out some notable design decisions we need to make: + +1. Do we expect a subscription model? +2. If not, do we expect the [gateway agent] to explicitly push state to dependent components? + +Beyond that, we need to make some high-level design choices: + +1. programming language? Likely Go or Rust. +2. REST? GraphQL? I tend to think REST is more appropriate at this time. + +
+
+ +### FRR agent + +Be afraid. Make Fredi fill in this section. But also be afraid. + +
+
+ +### Zebra Plugin + +This is a planned [zebra] plugin in the same spirit as [`fpm`](https://docs.frrouting.org/projects/dev-guide/en/latest/fpm.html#id1) or [`dataplane_fpm_nl`](https://docs.frrouting.org/projects/dev-guide/en/latest/fpm.html#dplane-fpm-nl). + +The core idea is to have a plugin that can be dynamically loaded into [zebra] and will listen to the [zebra event stream](https://github.com/FRRouting/frr/blob/ee5a3456d34a756c70ad8856ab7be7bed75ee31c/zebra/zebra_dplane.h#L114-L217) for updates. +The plugin will then take those updates and push them into the dataplane agent, allowing the dataplane to react to route updates. + +
+
+ +### Routing daemons + +For the moment these are [`bgpd`](https://docs.frrouting.org/en/latest/bgp.html) and [`bfdd`](https://docs.frrouting.org/en/latest/bfd.html). + +
+
+ +### Interface Manager + +This is a component that exchanges [netlink] messages with the [kernel] in response to changes in the [dataplane model]. +Its responsibilities include + +1. construction of virtual network interfaces needed by [zebra] +2. translation of ephemeral linux kernel parameters into ephemeral [dpdk] parameters (e.g. netlink interface index to dpdk interface id). +3. retrieval of information not available to [zebra]/[frr] such as neighbor tables / [ARP] / [IPv6 ND] resolution or [bridge] fdb. + +### Control Plane Interface + +This component is responsible for adjudicating communication between the [control plane] and the [dataplane]. +This component is expected to: + +1. Deserialize [bincode] (or perhaps [bitcode]) messages from the [hedgehog plugin] articulating the control plane's rules for the dataplane. +2. Express error messages back to the [control plane] articulating any error conditions. + For example, if the [dataplane] is unable to offload a route for whatever reason (e.g. rout type not supported) so that said routes are not advertised by the [control plane]. +3. Express the offloading status (including counters) back to the [control plane] (if possible). + +### Management plane interface + +The [management plane interface] is the interface between the [management plane] and the [dataplane]. + +1. Receive [bincode] (or perhaps [bitcode]) messages from the [gateway agent] over a [unix domain socket] (or perhaps a TCP socket?), parse them, and then update the [dataplane model] to reflect the desired configuration. + +
+
+ +### Dataplane model + +This is an internal component of the [dataplane] which is responsible for managing the _desired_ state of the dataplane. It is updated by the [management plane interface] and is responsible for expressing the _desired state_ (not the observed state) of the [dataplane] to downstream components such as the [routing manager] or the [nat manager]. + +
+
+ +### State sync + +This component is responsible for synchronizing the state of sister dataplanes in the name of fault tolerance. + +
+
+ +### Routing manager + +This component is responsible for managing the routing tables for the dataplane. It is responsible for translating the _desired routing rules_ expressed by the [management plane interface] into a set of rules that can be executed by the [dataplane workers][dataplane worker]. + +
+
+ +### NAT manager + +This component is responsible for managing the [network address translation] tables for the dataplane. It is responsible for translating the _desired NAT rules_ expressed by the [management plane interface] into a set of rules that can be executed by the [dataplane workers][dataplane worker]. + + +
+
+ +### Dataplane workers + +This is a collection of [rte lcores] which are responsible for actually performing the packet processing. +The workers are responsible for performing the following tasks: + +- Receive packets from the NIC +- Identify local traffic +- Perform underlay routing +- Perform overlay routing +- Perform [NAT] +- Transmit packets to the NIC + +
+
+ +### Dataplane + +The main packet processing engine. + +### Management Plane + +The management plane is a high-level abstraction that is responsible for + +1. Accepting API calls from the end user. +2. Translating those API calls into dataplane and control plane configuration. +3. Storing that configuration in the [Configuration Store] + +
+
+ +### Control Plane + +The control plane is, for the moment, just [bgpd] and [bfdd]. + +
+ +## Zoom in on dataplane + +
+ +
+ +```plantuml +@startuml +rectangle state_manager as "state manager" +cloud flow_lib as "flow lib" +collections workers +collections rx_queue +collections tx_queue +rectangle nic +rectangle flow_queue as "flow queue" + +workers <-- rx_queue : poll +workers --> tx_queue : push + +state_manager <--> workers : state\nupdate +state_manager -> flow_lib : calls +flow_lib <---> flow_queue : edit/query\nflows +flow_queue <--> nic : push/pull +tx_queue -- nic : tx +rx_queue -- nic : rx +@enduml +``` + +```plantuml +@startuml +!pragma teoz true +box "worker" #lightblue + participant rx_queue + participant worker + participant tx_queue +end box +participant state_manager +database flow_rules + +worker -> rx_queue ++ : rte_eth_rx_burst +return packets +worker -> state_manager ++ : updates +state_manager -> flow_rules : push updates +return verdicts +worker -> tx_queue : burst + +@enduml +``` + +
+ +> Biscuits + +
+ +
+ +
+ +
+ +#### Component diagram {#component-diagram-centralized-with-local} + +```plantuml +@startuml +rectangle state_manager as "state manager" +cloud flow_lib as "flow lib" +collections workers +collections rx_queue +collections tx_queue +rectangle nic +rectangle flow_queue as "flow queue" + +workers <-- rx_queue : poll +workers --> tx_queue : push + +state_manager <--> workers : state\nupdate +state_manager -> flow_lib : calls +flow_lib <---> flow_queue : edit/query\nflows +flow_queue <--> nic : push/pull +tx_queue -- nic : tx +rx_queue -- nic : rx +@enduml +``` +
+
+ +#### Sequence diagram {#sequence-diagram-centralized-with-local} + +```plantuml +@startuml +!pragma teoz true +box "worker" #lightblue + participant rx_queue + participant worker + database local_state as "local state" + participant tx_queue +end box +participant state_manager +database flow_rules + +worker -> rx_queue ++ : rte_eth_rx_burst +return packets +worker -> local_state ++ : table\nlookups +return verdicts +worker -> state_manager ++ : updates +state_manager -> flow_rules : push updates +return verdicts +worker -> tx_queue : burst + +@enduml +``` +
+
+ +> Biscuits + +
+ +
+ +
+ +```plantuml +@startuml +rectangle state_manager as "state manager" +cloud flow_lib as "flow lib" +collections workers +collections rx_queue +collections tx_queue +rectangle nic +collections flow_queue as "flow queue" + +workers <-- rx_queue : poll +workers --> tx_queue : push + +state_manager <--> workers : state\nupdate +workers <-> flow_lib : calls +flow_lib <--> flow_queue : edit/query\nflows +flow_queue <--> nic : push/pull +tx_queue --> nic : tx +rx_queue <-- nic : rx +@enduml +``` + + +```plantuml +@startuml +!pragma teoz true +box "worker" #lightblue + participant rx_queue + participant worker + database local_state as "local state" + participant tx_queue +end box +participant state_manager +database flow_rules + +worker -> rx_queue ++ : rte_eth_rx_burst +return packets +worker -> local_state ++ : table\nlookups +return verdicts +worker -> flow_rules : push updates +worker -> tx_queue : burst +worker -> state_manager : updates + +@enduml +``` + +
+ +> Double biscuit + +
+ +```plantuml +@startuml +!pragma use toez +skinparam linetype ortho +skinparam hyperlinkUnderline false +hide empty description +hide empty members +hide circle + +entity vpc +entity worker +entity peering +entity interface +entity chunk +entity subnet + +interface ||-o{ subnet +subnet ||-|{ chunk +peering ||--o{ interface +vpc ||-o{ interface +vpc }o--|| worker +chunk }o--|| worker +@enduml +``` + +```plantuml +@startuml +!pragma teoz true +box "worker" #lightblue + participant rx_queue + participant worker + database local_state as "local state" + participant tx_queue +end box +database flow_rules +participant state_manager + +worker -> rx_queue : rte_eth_rx_burst +return packets +worker -> local_state : table\nlookups +return verdict +worker -> flow_rules : push updates +worker -> tx_queue : rte_eth_tx_burst +worker -> local_state : chunk capacity ok? +alt chunk ok +local_state -> worker : chunk capacity ok? +else chunk low +local_state -> worker : get more chunk +worker -> state_manager : more chunk plz +return ok +worker -> local_state : refill chunk +end alt + +@enduml +``` + +#### Interfaces as transforms + +Think of each network interface as a pair of transform functions. + +One transform function applies when a packet ingresses from the interface. +The other transform function applies when a packet egresses through the interface. + +```rust +#[repr(transparent)] +struct UUID(u128); +struct Vpc { + id: UUID, + interfaces: Vec +} +struct Interface<'vpc> { + pub vpc: Weak, + // ... +} + +fn process(packet: Packet, ingress: &mut Interface) { + ingress.vpc +} + +``` + +``` equation + +vpc(ingress, +``` + + +{{#include ../links.md}} diff --git a/design-docs/src/mdbook/src/dataplane/map-of-dpdk.md b/design-docs/src/mdbook/src/dataplane/design/map-of-dpdk.md similarity index 100% rename from design-docs/src/mdbook/src/dataplane/map-of-dpdk.md rename to design-docs/src/mdbook/src/dataplane/design/map-of-dpdk.md diff --git a/design-docs/src/mdbook/src/dataplane/design/offloading-plan.md b/design-docs/src/mdbook/src/dataplane/design/offloading-plan.md new file mode 100644 index 00000000..05bb51a7 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/design/offloading-plan.md @@ -0,0 +1,118 @@ +# Offloading the dataplane + + +
+ +```plantuml +@startuml +!pragma teoz true +!$sty = { + "question": "#gold", + "action": "#lightblue", + "future": "#lightgreen", + "attention": "#pink" +} +!$action = $sty.action +!$question = $sty.question +!$future = $sty.future + +group Shared Ingress +start +$action:goto group 1; +end group + +group lookup interface id +switch (parse) +case (VLAN) + $question:eth / vlan N != 2; + $action:let iface = f(vlan);; + $action:pop vlan; +case (EVPN) + $question: eth / ipv{4,6} / udp dst == 4789 / vxlan; + $action:let iface = f(vni);; + $action:vxlan decap; +case (Cisco ACI) + $question:eth / vlan 2 / ipv{4,6} / udp dst == 4789 / vxlan; + $action:let iface = f(vni);; + $action:pop vlan + vxlan decap; +endswitch +end group + +group Process packet + + switch (ingress interface type?) + case (inner) + group routing + :route lookup; + :set nexthop ip; + :lookup egress interface; + :rewrite src/dst mac; + :dec ttl; + end group + group nat + switch (egress interface type?) + case (inner) + switch (connection state) + case (new) + :pull chunk; + :construct mapping; + :install offload; + case (established|related) + :offloaded; + detach + case (invalid) + :drop; + detach + endswitch + case (outer) + endswitch + end group + case (outer) + group nat + switch (connection state) + case (new|invalid) + :count; + :drop; + detach; + case (established|related) + :offloaded transform; + endswitch + end group + group routing + :route lookup; + :set nexthop ip; + :lookup egress interface; + :rewrite src/dst mac; + :dec ttl; + end group + endswitch + + +end group + +group Group 3: Re-tag/encap + +switch (metadata lookup) +case () + $question:vlan+vxlan?; + $action:raw encap (vlan + vxlan); + $action:set vni based on meta; + $future:count; +case () + $question:push vlan?; + $action:push vlan; + $action:set vid based on meta; + $future:count; +endswitch +end group + +group Group 4: Egress +stop +end group + +@enduml +``` + +> How it's done + +
diff --git a/design-docs/src/mdbook/src/dataplane/offloading-plan.md b/design-docs/src/mdbook/src/dataplane/offloading-plan.md deleted file mode 100644 index 38652761..00000000 --- a/design-docs/src/mdbook/src/dataplane/offloading-plan.md +++ /dev/null @@ -1,1028 +0,0 @@ -# Offloading the dataplane - -These are just unordered design ideas for the moment. - - - -
- -## Some figure title - -```plantuml -@startuml -!pragma teoz true -!$sty = { - "question": "#gold", - "action": "#lightblue", - "future": "#lightgreen", - "attention": "#pink" -} -!$action = $sty.action -!$question = $sty.question -!$future = $sty.future - -group Group 0: Shared Ingress -start -$action:goto group 1; -end group - -group Group 1: Decap -switch (parse) -case () - $question:eth / vlan 2 / ipv4 / udp dst == 4789 / vxlan; - $action:set metadata f(vni); - $action:pop vlan + vxlan decap (raw decap); -case () - $question:eth / vlan N != 2; - $action:set metadata f(vlan); - $action:pop vlan; -endswitch -end group -group Group 2: NAT -switch (parse) -case () -$question:eth/ip/(tcp|udp|icmp); -switch (and ct is) -case () - $question:new; - $future:count (per ingress meta?); - $future:rate limit? (per ingress meta?); - $action:raw encap (vxlan); - note left - We have already - stripped tags at - this point so we - need to re-encap - if we are going - to trap to the - kernel - end note - - $action:set vni based on meta; - $action:trap to kernel; - detach -case () - $question:established | related; - $future:count (per ingress meta?); - $future:rate limit? (per ingress meta?); - $action:NAT; - note left $sty.attention - HOT PATH - This action is the main - workload of the whole - program. - end note -case () - $question:invalid; - $action:count; - $action:drop; - detach -endswitch -case () -$question:eth/(arp|ipv6/icmpv6 nd); -$future:rate limit!; -note right $sty.attention - This is the most important - thing to rate limit. -end note -$action:raw encap (vxlan); -$action:set vni based on meta; -$action:trap to kernel; -detach -endswitch - -end group - -group Group 3: Re-tag/encap - -switch (metadata lookup) -case () - $question:vlan+vxlan?; - $action:raw encap (vlan + vxlan); - $action:set vni based on meta; - $future:count; -case () - $question:push vlan?; - $action:push vlan; - $action:set vid based on meta; - $future:count; -endswitch -end group - -group Group 4: Egress -stop -end group - -@enduml -``` - -```plantuml -@startuml -!pragma teoz true -!$sty = { -"port": { -"vtep": "#lightgreen", -"rep": "#lightpink", -"sriov": "#lightblue", -"physical": "#orange", -"veth": "#c962a9" -} -} - -cloud elsewhere - -rectangle host { -rectangle eswitch { -rectangle "physical port 1" as phys_port1 $sty.port.physical -rectangle "physical port 2" as phys_port2 $sty.port.physical -rectangle "user rep" as user_rep $sty.port.rep -rectangle "kernel rep" as kernel_rep $sty.port.rep -} -rectangle "user sriov" as user_sriov $sty.port.sriov - -rectangle netns { -rectangle "kernel sriov" as kernel_sriov $sty.port.sriov -rectangle bridge0 as bridge { -rectangle "vtep" as vtep $sty.port.vtep -rectangle "veth[0]" as veth_0_br $sty.port.veth -rectangle "veth[1]" as veth_1_br $sty.port.veth -rectangle "veth[2]" as veth_2_br $sty.port.veth -rectangle "veth[3]" as veth_3_br $sty.port.veth -} -} - -rectangle "veth[0]" as veth_0 $sty.port.veth -rectangle "veth[1]" as veth_1 $sty.port.veth -rectangle "veth[2]" as veth_2 $sty.port.veth -rectangle "veth[3]" as veth_3 $sty.port.veth - -} - -user_rep -- user_sriov -kernel_rep --- kernel_sriov -phys_port1 -[#hidden] phys_port2 -phys_port1 -[#hidden]- user_rep -phys_port2 -[#hidden]- kernel_rep - -elsewhere --- phys_port1 -elsewhere --- phys_port2 - -veth_0_br --- veth_0 -veth_1_br --- veth_1 -veth_2_br --- veth_2 -veth_3_br --- veth_3 -@enduml -``` - -
- - -```plantuml -@startuml -!pragma teoz true -!$sty = { - "port": { - "vtep": "#lightgreen", - "rep": "#lightpink", - "sriov": "#lightblue", - "physical": "#orange", - "veth": "#c962a9" - } -} - -cloud elsewhere - -rectangle host { - rectangle eswitch { - rectangle "physical port 1" as phys_port1 $sty.port.physical - rectangle "physical port 2" as phys_port2 $sty.port.physical - } - - rectangle bridge0 as bridge { - rectangle "vtep" as vtep $sty.port.vtep - rectangle "veth[0]" as veth_0_br $sty.port.veth - rectangle "veth[1]" as veth_1_br $sty.port.veth - rectangle "veth[2]" as veth_2_br $sty.port.veth - rectangle "veth[3]" as veth_3_br $sty.port.veth - rectangle "veth[4]" as veth_4_br $sty.port.veth - rectangle "veth[5]" as veth_5_br $sty.port.veth - rectangle "veth[6]" as veth_6_br $sty.port.veth - } - - note right of bridge - I only draw one here, - but we can have more vteps - and more bridges using - the **external**, and **vnifilter** - flags when you make bridges. - - Recent FRR supports this. - end note - - - rectangle "netns A" { - rectangle "veth[0]" as veth_0 $sty.port.veth - rectangle "veth[1]" as veth_1 $sty.port.veth - rectangle "some process" as some_process - } - rectangle "netns B" { - rectangle "veth[2]" as veth_2 $sty.port.veth - rectangle "veth[3]" as veth_3 $sty.port.veth - rectangle "some other process" as some_other_process - } - - rectangle "netns C" { - rectangle "veth[4]" as veth_4 $sty.port.veth - rectangle "veth[5]" as veth_5 $sty.port.veth - rectangle "veth[6]" as veth_6 $sty.port.veth - rectangle "yet another process" as yet_another_process - } - - rectangle "Kubernetes\n(present?)" as kubernetes - - note right of kubernetes - I need to make sure - I understand exactly what - the plan is regarding - kubernetes. - end note - - rectangle "FRR???\n(future)" as frr - note right of frr - I understand that we don't - need FRR now, but I think we - can all see that one coming. - - Let me know if I'm wrong **¯\_(ツ)_/¯** - end note -} - -phys_port1 -[#hidden] phys_port2 - -elsewhere -- phys_port1 -elsewhere --- phys_port2 - -veth_0_br -- veth_0 -veth_1_br -- veth_1 -veth_2_br -- veth_2 -veth_3_br -- veth_3 -veth_4_br -- veth_4 -veth_5_br -- veth_5 -veth_6_br -- veth_6 - -phys_port1 -[#hidden]- frr -phys_port2 -[#hidden]- frr - -veth_0 -[hidden]- some_process -veth_1 -[hidden]- some_process - -veth_2 -[hidden]- some_other_process -veth_3 -[hidden]- some_other_process - -veth_4 -[hidden]- yet_another_process -veth_5 -[hidden]- yet_another_process -veth_6 -[hidden]- yet_another_process - -@enduml -``` - -```puml -@startuml -!pragma teoz true - -title First pass - -!$sty = { - "question": "#gold", - "action": "#lightblue", - "future": "#lightgreen", - "attention": "#pink" -} -!$action = $sty.action -!$question = $sty.question - -group Group 0: Shared Ingress -start -$action:goto group 1; -end group - -group Group 1: Decap -switch (parse) -case () - $question:eth / vlan 2 / ipv4 / udp dst == 4789 / vxlan; - $action:set metadata f(vni); - $action:pop vlan + vxlan decap (raw decap); -case () - $question:eth / vlan N != 2; - $action:set metadata f(vlan); - $action:pop vlan; -endswitch -end group -group Group 2: NAT -switch (parse) -case () -$question:eth/ip/(tcp|udp|icmp); -switch (and ct is) -case () - $question:new; - $action:send to dpdk queue **N**; - detach -case () - $question:established | related; - $action:NAT; - note left $sty.attention - HOT PATH - This action is the main - workload of the whole - program. - end note -case () - $question:invalid; - $action:count; - $action:drop; - detach -endswitch -case () -$question:eth/(arp|ipv6/icmpv6 nd); -note right $sty.attention - This is the most important - thing to rate limit. -end note -$action:raw encap (vxlan); -$action:set vni based on meta; -$action:send to DPDK queue **N**; -detach -endswitch - -end group - -group Group 3: Re-tag/encap - -switch (metadata lookup) -case () - $question:vlan+vxlan?; - $action:raw encap (vlan + vxlan); - $action:set vni based on meta; -case () - $question:push vlan?; - $action:push vlan; - $action:set vid based on meta; -endswitch -end group - -group Group 4: Egress -stop -end group - -@enduml -``` - -```puml -@startuml -!pragma teoz true - -title First pass - -!$sty = { -"question": "#gold", -"action": "#lightblue", -"future": "#lightgreen", -"attention": "#pink" -} -!$action = $sty.action -!$question = $sty.question - -group Group 0: Shared Ingress -start -$action:goto group 1; -end group - -group Group 1: Decap -switch (parse) -case () -$question:eth / vlan 2 / ipv4 / udp dst == 4789 / vxlan; -$action:set metadata f(vni); -$action:pop vlan + vxlan decap (raw decap); -case () -$question:eth / vlan N != 2; -$action:set metadata f(vlan); -$action:pop vlan; -endswitch -end group -group Group 2: NAT -switch (parse) -case () -$question:eth/ip/(tcp|udp|icmp); -switch (and ct is) -case () -$question:new; -$action:send to dpdk queue **N**; -detach -case () -$question:established | related; -$action:NAT; -note left $sty.attention -HOT PATH -This action is the main -workload of the whole -program. -end note -case () -$question:invalid; -$action:count; -$action:drop; -detach -endswitch -case () -$question:eth/(arp|ipv6/icmpv6 nd); -note right $sty.attention -This is the most important -thing to rate limit. -end note -$action:raw encap (vxlan); -$action:set vni based on meta; -$action:send to DPDK queue **N**; -detach -endswitch - -end group - -group Group 3: Re-tag/encap - -switch (metadata lookup) -case () -$question:vlan+vxlan?; -$action:raw encap (vlan + vxlan); -$action:set vni based on meta; -case () -$question:push vlan?; -$action:push vlan; -$action:set vid based on meta; -endswitch -end group - -group Group 4: Egress -stop -end group - -@enduml -``` - -```puml -@startuml -!pragma teoz true -!$sty = { - "port": { - "vtep": "#lightgreen", - "rep": "#lightpink", - "sriov": "#lightblue", - "physical": "#orange", - "veth": "#c962a9" - } -} - -cloud elsewhere - -rectangle host { - rectangle dpdk_netns { - rectangle eswitch { - rectangle "physical port 1" as phys_port1 $sty.port.physical - rectangle "physical port 2" as phys_port2 $sty.port.physical - } - rectangle veth as veth.dpdk - } - - rectangle veth as veth.kernel - - rectangle bridge as bridge { - rectangle "vtep" as vtep $sty.port.vtep - rectangle "veth[0]" as veth_0_br $sty.port.veth - rectangle "veth[1]" as veth_1_br $sty.port.veth - rectangle "veth[2]" as veth_2_br $sty.port.veth - rectangle "veth[3]" as veth_3_br $sty.port.veth - rectangle "veth[4]" as veth_4_br $sty.port.veth - rectangle "veth[5]" as veth_5_br $sty.port.veth - rectangle "veth[6]" as veth_6_br $sty.port.veth - } - - note right of bridge - I only draw one here, - but we can have more vteps - and more bridges using - the **external**, and **vnifilter** - flags when you make bridges. - - Recent FRR supports this. - end note - - rectangle "netns A" { - rectangle "veth[0]" as veth_0 $sty.port.veth - rectangle "veth[1]" as veth_1 $sty.port.veth - rectangle "some process" as some_process - } - rectangle "netns B" { - rectangle "veth[2]" as veth_2 $sty.port.veth - rectangle "veth[3]" as veth_3 $sty.port.veth - rectangle "some other process" as some_other_process - } - rectangle "netns C" { - rectangle "veth[4]" as veth_4 $sty.port.veth - rectangle "veth[5]" as veth_5 $sty.port.veth - rectangle "veth[6]" as veth_6 $sty.port.veth - rectangle "yet another process" as yet_another_process - } - -} - -phys_port1 -[#hidden] phys_port2 - -elsewhere -- phys_port1 -elsewhere --- phys_port2 - -veth_0_br -- veth_0 -veth_1_br -- veth_1 -veth_2_br -- veth_2 -veth_3_br -- veth_3 -veth_4_br -- veth_4 -veth_5_br -- veth_5 -veth_6_br -- veth_6 - -veth_0 -[hidden]- some_process -veth_1 -[hidden]- some_process - -veth_2 -[hidden]- some_other_process -veth_3 -[hidden]- some_other_process - -veth_4 -[hidden]- yet_another_process -veth_5 -[hidden]- yet_another_process -veth_6 -[hidden]- yet_another_process - -veth.dpdk -- veth.kernel -veth.kernel -[hidden]- bridge - -@enduml -``` - -```plantuml -@startuml -!pragma teoz true -!$sty = { - "question": "#gold", - "action": "#lightblue", - "future": "#lightgreen", - "attention": "#pink" -} -!$action = $sty.action -!$question = $sty.question -!$future = $sty.future - -group Group 0: Shared Ingress -start -$action:goto group 1; -end group - -group Group 1: Decap -switch (parse) -case () - $question:eth / vlan 2 / ipv4 / udp dst == 4789 / vxlan; - $action:set metadata f(vni); - $action:pop vlan + vxlan decap (raw decap); -case () - $question:eth / vlan N != 2; - $action:set metadata f(vlan); - $action:pop vlan; -endswitch -end group -group Group 2: NAT -switch (parse) -case () -$question:eth/ip/(tcp|udp|icmp); -switch (and ct is) -case () - $question:new; - $future:count (per ingress meta?); - $future:rate limit? (per ingress meta?); - $action:raw encap (vxlan); - note left - We have already - stripped tags at - this point so we - need to re-encap - if we are going - to trap to the - kernel - end note - - $action:set vni based on meta; - $action:trap to kernel; - detach -case () - $question:established | related; - $future:count (per ingress meta?); - $future:rate limit? (per ingress meta?); - $action:NAT; - note left $sty.attention - HOT PATH - This action is the main - workload of the whole - program. - end note -case () - $question:invalid; - $action:count; - $action:drop; - detach -endswitch -case () -$question:eth/(arp|ipv6/icmpv6 nd); -$future:rate limit!; -note right $sty.attention - This is the most important - thing to rate limit. -end note -$action:raw encap (vxlan); -$action:set vni based on meta; -$action:trap to kernel; -detach -endswitch - -end group - -group Group 3: Re-tag/encap - -switch (metadata lookup) -case () - $question:vlan+vxlan?; - $action:raw encap (vlan + vxlan); - $action:set vni based on meta; - $future:count; -case () - $question:push vlan?; - $action:push vlan; - $action:set vid based on meta; - $future:count; -endswitch -end group - -group Group 4: Egress -stop -end group - -@enduml -``` - -```plantuml -@startuml -!pragma teoz true -!$sty = { - "port": { - "vtep": "#lightgreen", - "rep": "#lightpink", - "sriov": "#lightblue", - "physical": "#orange", - "veth": "#c962a9" - } -} - -cloud elsewhere - -rectangle host { - rectangle eswitch { - rectangle "physical port 1" as phys_port1 $sty.port.physical - rectangle "physical port 2" as phys_port2 $sty.port.physical - rectangle "user rep" as user_rep $sty.port.rep - rectangle "kernel rep" as kernel_rep $sty.port.rep - } - rectangle "user sriov" as user_sriov $sty.port.sriov - - rectangle netns { - rectangle "kernel sriov" as kernel_sriov $sty.port.sriov - rectangle bridge0 as bridge { - rectangle "vtep" as vtep $sty.port.vtep - rectangle "veth[0]" as veth_0_br $sty.port.veth - rectangle "veth[1]" as veth_1_br $sty.port.veth - rectangle "veth[2]" as veth_2_br $sty.port.veth - rectangle "veth[3]" as veth_3_br $sty.port.veth - } - } - - rectangle "veth[0]" as veth_0 $sty.port.veth - rectangle "veth[1]" as veth_1 $sty.port.veth - rectangle "veth[2]" as veth_2 $sty.port.veth - rectangle "veth[3]" as veth_3 $sty.port.veth - -} - -user_rep -- user_sriov -kernel_rep --- kernel_sriov -phys_port1 -[#hidden] phys_port2 -phys_port1 -[#hidden]- user_rep -phys_port2 -[#hidden]- kernel_rep - -elsewhere --- phys_port1 -elsewhere --- phys_port2 - -veth_0_br --- veth_0 -veth_1_br --- veth_1 -veth_2_br --- veth_2 -veth_3_br --- veth_3 -@enduml -``` - - - -```plantuml -@startuml -!pragma teoz true -!$sty = { - "port": { - "vtep": "#lightgreen", - "rep": "#lightpink", - "sriov": "#lightblue", - "physical": "#orange", - "veth": "#c962a9" - } -} - -cloud elsewhere - -rectangle host { - rectangle eswitch { - rectangle "physical port 1" as phys_port1 $sty.port.physical - rectangle "physical port 2" as phys_port2 $sty.port.physical - } - - rectangle bridge0 as bridge { - rectangle "vtep" as vtep $sty.port.vtep - rectangle "veth[0]" as veth_0_br $sty.port.veth - rectangle "veth[1]" as veth_1_br $sty.port.veth - rectangle "veth[2]" as veth_2_br $sty.port.veth - rectangle "veth[3]" as veth_3_br $sty.port.veth - rectangle "veth[4]" as veth_4_br $sty.port.veth - rectangle "veth[5]" as veth_5_br $sty.port.veth - rectangle "veth[6]" as veth_6_br $sty.port.veth - } - - note right of bridge - I only draw one here, - but we can have more vteps - and more bridges using - the **external**, and **vnifilter** - flags when you make bridges. - - Recent FRR supports this. - end note - - - rectangle "netns A" { - rectangle "veth[0]" as veth_0 $sty.port.veth - rectangle "veth[1]" as veth_1 $sty.port.veth - rectangle "some process" as some_process - } - rectangle "netns B" { - rectangle "veth[2]" as veth_2 $sty.port.veth - rectangle "veth[3]" as veth_3 $sty.port.veth - rectangle "some other process" as some_other_process - } - - rectangle "netns C" { - rectangle "veth[4]" as veth_4 $sty.port.veth - rectangle "veth[5]" as veth_5 $sty.port.veth - rectangle "veth[6]" as veth_6 $sty.port.veth - rectangle "yet another process" as yet_another_process - } - - rectangle "Kubernetes\n(present?)" as kubernetes - - note right of kubernetes - I need to make sure - I understand exactly what - the plan is regarding - kubernetes. - end note - - rectangle "FRR???\n(future)" as frr - note right of frr - I understand that we don't - need FRR now, but I think we - can all see that one coming. - - Let me know if I'm wrong **¯\_(ツ)_/¯** - end note -} - -phys_port1 -[#hidden] phys_port2 - -elsewhere -- phys_port1 -elsewhere --- phys_port2 - -veth_0_br -- veth_0 -veth_1_br -- veth_1 -veth_2_br -- veth_2 -veth_3_br -- veth_3 -veth_4_br -- veth_4 -veth_5_br -- veth_5 -veth_6_br -- veth_6 - -phys_port1 -[#hidden]- frr -phys_port2 -[#hidden]- frr - -veth_0 -[hidden]- some_process -veth_1 -[hidden]- some_process - -veth_2 -[hidden]- some_other_process -veth_3 -[hidden]- some_other_process - -veth_4 -[hidden]- yet_another_process -veth_5 -[hidden]- yet_another_process -veth_6 -[hidden]- yet_another_process - -@enduml -``` - -```plantuml -@startuml -!pragma teoz true - -title First pass - -!$sty = { - "question": "#gold", - "action": "#lightblue", - "future": "#lightgreen", - "attention": "#pink" -} -!$action = $sty.action -!$question = $sty.question - -group Group 0: Shared Ingress -start -$action:goto group 1; -end group - -group Group 1: Decap -switch (parse) -case () - $question:eth / vlan 2 / ipv4 / udp dst == 4789 / vxlan; - $action:set metadata f(vni); - $action:pop vlan + vxlan decap (raw decap); -case () - $question:eth / vlan N != 2; - $action:set metadata f(vlan); - $action:pop vlan; -endswitch -end group -group Group 2: NAT -switch (parse) -case () -$question:eth/ip/(tcp|udp|icmp); -switch (and ct is) -case () - $question:new; - $action:send to dpdk queue **N**; - detach -case () - $question:established | related; - $action:NAT; - note left $sty.attention - HOT PATH - This action is the main - workload of the whole - program. - end note -case () - $question:invalid; - $action:count; - $action:drop; - detach -endswitch -case () -$question:eth/(arp|ipv6/icmpv6 nd); -note right $sty.attention - This is the most important - thing to rate limit. -end note -$action:raw encap (vxlan); -$action:set vni based on meta; -$action:send to DPDK queue **N**; -detach -endswitch - -end group - -group Group 3: Re-tag/encap - -switch (metadata lookup) -case () - $question:vlan+vxlan?; - $action:raw encap (vlan + vxlan); - $action:set vni based on meta; -case () - $question:push vlan?; - $action:push vlan; - $action:set vid based on meta; -endswitch -end group - -group Group 4: Egress -stop -end group - -@enduml -``` - - -```plantuml -@startuml -!pragma teoz true -!$sty = { - "port": { - "vtep": "#lightgreen", - "rep": "#lightpink", - "sriov": "#lightblue", - "physical": "#orange", - "veth": "#c962a9" - } -} - -cloud elsewhere - -rectangle host { - rectangle dpdk_netns { - rectangle eswitch { - rectangle "physical port 1" as phys_port1 $sty.port.physical - rectangle "physical port 2" as phys_port2 $sty.port.physical - } - rectangle veth as veth.dpdk - } - - rectangle veth as veth.kernel - - rectangle bridge as bridge { - rectangle "vtep" as vtep $sty.port.vtep - rectangle "veth[0]" as veth_0_br $sty.port.veth - rectangle "veth[1]" as veth_1_br $sty.port.veth - rectangle "veth[2]" as veth_2_br $sty.port.veth - rectangle "veth[3]" as veth_3_br $sty.port.veth - rectangle "veth[4]" as veth_4_br $sty.port.veth - rectangle "veth[5]" as veth_5_br $sty.port.veth - rectangle "veth[6]" as veth_6_br $sty.port.veth - } - - note right of bridge - I only draw one here, - but we can have more vteps - and more bridges using - the **external**, and **vnifilter** - flags when you make bridges. - - Recent FRR supports this. - end note - - rectangle "netns A" { - rectangle "veth[0]" as veth_0 $sty.port.veth - rectangle "veth[1]" as veth_1 $sty.port.veth - rectangle "some process" as some_process - } - rectangle "netns B" { - rectangle "veth[2]" as veth_2 $sty.port.veth - rectangle "veth[3]" as veth_3 $sty.port.veth - rectangle "some other process" as some_other_process - } - rectangle "netns C" { - rectangle "veth[4]" as veth_4 $sty.port.veth - rectangle "veth[5]" as veth_5 $sty.port.veth - rectangle "veth[6]" as veth_6 $sty.port.veth - rectangle "yet another process" as yet_another_process - } - -} - -phys_port1 -[#hidden] phys_port2 - -elsewhere -- phys_port1 -elsewhere --- phys_port2 - -veth_0_br -- veth_0 -veth_1_br -- veth_1 -veth_2_br -- veth_2 -veth_3_br -- veth_3 -veth_4_br -- veth_4 -veth_5_br -- veth_5 -veth_6_br -- veth_6 - -veth_0 -[hidden]- some_process -veth_1 -[hidden]- some_process - -veth_2 -[hidden]- some_other_process -veth_3 -[hidden]- some_other_process - -veth_4 -[hidden]- yet_another_process -veth_5 -[hidden]- yet_another_process -veth_6 -[hidden]- yet_another_process - -veth.dpdk -- veth.kernel -veth.kernel -[hidden]- bridge - -@enduml -``` diff --git a/design-docs/src/mdbook/src/dataplane/tasks/control-plane-dev-env.md b/design-docs/src/mdbook/src/dataplane/tasks/control-plane-dev-env.md index 2b8c569b..f0e48090 100644 --- a/design-docs/src/mdbook/src/dataplane/tasks/control-plane-dev-env.md +++ b/design-docs/src/mdbook/src/dataplane/tasks/control-plane-dev-env.md @@ -12,3 +12,5 @@ Requirements: ## Likely dispatch - [@Fredi-raspall] + +{{#include ../../links.md}} diff --git a/design-docs/src/mdbook/src/links.md b/design-docs/src/mdbook/src/links.md index 0ba73213..8bce87d3 100644 --- a/design-docs/src/mdbook/src/links.md +++ b/design-docs/src/mdbook/src/links.md @@ -1,17 +1,17 @@ -[configuration store]: /dataplane/design.md#configuration-store -[control plane]: /dataplane/design.md#control-plane -[dataplane model]: /dataplane/design.md#dataplane-model -[dataplane worker]: /dataplane/design.md#dataplane-workers -[dataplane]: /dataplane/design.md#dataplane -[gateway agent]: /dataplane/design.md#gateway-agent -[hedgehog plugin]: /dataplane/design.md#hedgehog-plugin -[management plane interface]: /dataplane/design.md#management-plane-interface -[management plane]: /dataplane/design.md#management-plane -[nat manager]: /dataplane/design.md#nat-manager -[routing manager]: /dataplane/design.md#routing-manager -[state sync]: /dataplane/design.md#state-sync +[configuration store]: /dataplane/design/index.md#configuration-store +[control plane]: /dataplane/design/index.md#control-plane +[dataplane model]: /dataplane/design/index.md#dataplane-model +[dataplane worker]: /dataplane/design/index.md#dataplane-workers +[dataplane]: /dataplane/design/index.md#dataplane +[gateway agent]: /dataplane/design/index.md#gateway-agent +[hedgehog plugin]: /dataplane/design/index.md#hedgehog-plugin +[management plane interface]: /dataplane/design/index.md#management-plane-interface +[management plane]: /dataplane/design/index.md#management-plane +[nat manager]: /dataplane/design/index.md#nat-manager +[routing manager]: /dataplane/design/index.md#routing-manager +[state sync]: /dataplane/design/index.md#state-sync @@ -23,6 +23,7 @@ [IPv6 ND]: https://en.wikipedia.org/wiki/Neighbor_Discovery_Protocol [LACP]: https://en.wikipedia.org/wiki/Link_aggregation#Link_Aggregation_Control_Protocol [MySQL]: https://www.mysql.com/ +[NAT64]: https://en.wikipedia.org/wiki/NAT64 [NAT]: https://en.wikipedia.org/wiki/Network_address_translation [QoS]: https://en.wikipedia.org/wiki/Quality_of_service [TiDB]: https://www.pingcap.com/ diff --git a/dpdk/src/lib.rs b/dpdk/src/lib.rs index 4db272ef..87ac7168 100644 --- a/dpdk/src/lib.rs +++ b/dpdk/src/lib.rs @@ -27,7 +27,7 @@ //! //! This crate uses lints to discourage casual use of `unwrap`, `expect`, and `panic` to help //! encourage this practice. -#![cfg_attr(not(test), no_std)] +// #![cfg_attr(not(test), no_std)] #![warn(clippy::all)] #![deny(clippy::unwrap_used, clippy::expect_used, clippy::panic)] #![allow(private_bounds)] @@ -40,3 +40,4 @@ pub mod flow; pub mod mem; pub mod queue; pub mod socket; +mod scratch; diff --git a/dpdk/src/scratch.rs b/dpdk/src/scratch.rs new file mode 100644 index 00000000..413cf130 --- /dev/null +++ b/dpdk/src/scratch.rs @@ -0,0 +1,131 @@ +use alloc::string::String; +use alloc::vec::Vec; +use core::net::Ipv4Addr; +use std::collections::{BTreeMap, HashMap, VecDeque}; +use std::marker::PhantomData; +use std::sync::{Arc, Weak}; + +/// A universally unique id +#[repr(transparent)] +struct Uuid(u128); + +enum Tcp {} +enum Udp {} + +trait L4Proto { + type Port; +} + +impl L4Proto for Tcp { + type Port = TcpPort; +} +impl L4Proto for Udp { + type Port = UdpPort; +} + +/// Marker trait for layer 4 ports +trait L4Port {} + +#[repr(transparent)] +struct Port { + pub val: u16, + phantom: PhantomData, +} + +impl Port { + fn new(val: u16) -> Self { + Self { + val, + phantom: PhantomData, + } + } +} + +impl From for Port { + fn from(val: u16) -> Self { + Self::new(val) + } +} + + +/// A tcp port +#[repr(transparent)] +struct TcpPort(u16); + +/// A udp port +#[repr(transparent)] +struct UdpPort(u16); + +impl L4Port for TcpPort {} +impl L4Port for UdpPort {} + +#[repr(transparent)] +struct SctpPort(u16); + +#[repr(transparent)] +struct ThreadId(u32); + +struct Peering { + pub client: Arc, + pub server: Arc, +} + +struct Vpc { + pub id: Uuid, + pub name: String, + pub interfaces: Vec, +} + +struct Cidr { + pub ip: Ipv4Addr, + pub prefix: u8, +} + +#[repr(transparent)] +pub struct Chunk

(VecDeque

); + +impl

Chunk

{ + fn pop(&mut self) -> Option

{ + self.0.pop_front() + } +} + +trait Merge { + fn merge(&mut self, other: Self); +} + +impl

Merge for Chunk

{ + fn merge(&mut self, other: Self) { + self.0.extend(other.0); + } +} + +struct InterfaceWorker { + interface: Weak, + used_tcp: HashMap>, + used_udp: HashMap>, + available_tcp: BTreeMap>, + available_udp: BTreeMap>, + nat: HashMap<(Ipv4Addr, u16), (Ipv4Addr, u16)>, + incoming_tcp: std::sync::mpsc::Receiver<(Ipv4Addr, Chunk)>, + incoming_udp: std::sync::mpsc::Receiver<(Ipv4Addr, Chunk)>, + outgoing_tcp: std::sync::mpsc::Sender<(Ipv4Addr, Chunk)>, + outgoing_udp: std::sync::mpsc::Sender<(Ipv4Addr, Chunk)>, +} + +struct InterfaceWorkerAllocation { + interface_worker: InterfaceWorker, + outgoing_tcp: std::sync::mpsc::Sender<(Ipv4Addr, Chunk)>, + outgoing_udp: std::sync::mpsc::Sender<(Ipv4Addr, Chunk)>, +} + +struct Interface { + vpc: Weak, + cidrs: Vec, + workers: BTreeMap, + available_tcp: BTreeMap>, + available_udp: BTreeMap>, + incoming_tcp: std::sync::mpsc::Receiver>, + incoming_udp: std::sync::mpsc::Receiver>, + // ... +}