From b41d85c10dd398549729e89b8e3d62d5a793cbb6 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Fri, 13 Dec 2024 17:48:25 +0000 Subject: [PATCH] [WIP] Add support for dedicated chaincode nodes See #228 Signed-off-by: James Taylor --- cmd/run.go | 129 ++++++++++++++++----- cmd/run/main_test.go | 22 ++++ docs/configuring/dedicated-nodes.md | 15 +++ docs/configuring/overview.md | 2 + go.mod | 1 + internal/builder/run.go | 2 + internal/util/env.go | 1 + internal/util/k8s.go | 37 +++++- mkdocs.yml | 1 + test/integration/main_test.go | 2 + test/integration/testdata/kind-config.yaml | 8 ++ 11 files changed, 188 insertions(+), 32 deletions(-) create mode 100644 docs/configuring/dedicated-nodes.md create mode 100644 test/integration/testdata/kind-config.yaml diff --git a/cmd/run.go b/cmd/run.go index cfdf528..07fb30b 100644 --- a/cmd/run.go +++ b/cmd/run.go @@ -5,75 +5,141 @@ package cmd import ( "context" "os" - "strconv" "github.com/hyperledger-labs/fabric-builder-k8s/internal/builder" "github.com/hyperledger-labs/fabric-builder-k8s/internal/log" "github.com/hyperledger-labs/fabric-builder-k8s/internal/util" - "k8s.io/apimachinery/pkg/api/validation" + apivalidation "k8s.io/apimachinery/pkg/api/validation" + "k8s.io/apimachinery/pkg/util/validation" ) -func Run() int { - const ( - expectedArgsLength = 3 - buildOutputDirectoryArg = 1 - runMetadataDirectoryArg = 2 - maximumKubeNamePrefixLength = 30 - ) - - debug, _ := strconv.ParseBool(util.GetOptionalEnv(util.DebugVariable, "false")) - ctx := log.NewCmdContext(context.Background(), debug) - logger := log.New(ctx) - - if len(os.Args) != expectedArgsLength { - logger.Println("Expected BUILD_OUTPUT_DIR and RUN_METADATA_DIR arguments") - - return 1 - } - - buildOutputDirectory := os.Args[buildOutputDirectoryArg] - runMetadataDirectory := os.Args[runMetadataDirectoryArg] - - logger.Debugf("Build output directory: %s", buildOutputDirectory) - logger.Debugf("Run metadata directory: %s", runMetadataDirectory) - +//nolint:nonamedreturns // using the ok bool convention to indicate errors +func getPeerID(logger *log.CmdLogger) (peerID string, ok bool) { peerID, err := util.GetRequiredEnv(util.PeerIDVariable) if err != nil { logger.Printf("Expected %s environment variable\n", util.PeerIDVariable) - return 1 + return peerID, false } logger.Debugf("%s=%s", util.PeerIDVariable, peerID) + return peerID, true +} + +func getKubeconfigPath(logger *log.CmdLogger) string { kubeconfigPath := util.GetOptionalEnv(util.KubeconfigPathVariable, "") logger.Debugf("%s=%s", util.KubeconfigPathVariable, kubeconfigPath) + return kubeconfigPath +} + +func getKubeNamespace(logger *log.CmdLogger) string { kubeNamespace := util.GetOptionalEnv(util.ChaincodeNamespaceVariable, "") logger.Debugf("%s=%s", util.ChaincodeNamespaceVariable, kubeNamespace) if kubeNamespace == "" { + var err error + kubeNamespace, err = util.GetKubeNamespace() if err != nil { + logger.Debugf("Error getting namespace: %+v\n", util.DefaultNamespace, err) kubeNamespace = util.DefaultNamespace } + + logger.Debugf("Using default namespace: %s\n", util.DefaultNamespace) } + return kubeNamespace +} + +//nolint:nonamedreturns // using the ok bool convention to indicate errors +func getKubeNodeRole(logger *log.CmdLogger) (kubeNodeRole string, ok bool) { + kubeNodeRole = util.GetOptionalEnv(util.ChaincodeNodeRoleVariable, "") + logger.Debugf("%s=%s", util.ChaincodeNodeRoleVariable, kubeNodeRole) + + // TODO: are valid taint values the same?! + if msgs := validation.IsValidLabelValue(kubeNodeRole); len(msgs) > 0 { + logger.Printf("The %s environment variable must be a valid Kubernetes label value: %s", util.ChaincodeNodeRoleVariable, msgs[0]) + + return kubeNodeRole, false + } + + return kubeNodeRole, true +} + +func getKubeServiceAccount(logger *log.CmdLogger) string { kubeServiceAccount := util.GetOptionalEnv(util.ChaincodeServiceAccountVariable, util.DefaultServiceAccountName) logger.Debugf("%s=%s", util.ChaincodeServiceAccountVariable, kubeServiceAccount) - kubeNamePrefix := util.GetOptionalEnv(util.ObjectNamePrefixVariable, util.DefaultObjectNamePrefix) + return kubeServiceAccount +} + +//nolint:nonamedreturns // using the ok bool convention to indicate errors +func getKubeNamePrefix(logger *log.CmdLogger) (kubeNamePrefix string, ok bool) { + const maximumKubeNamePrefixLength = 30 + + kubeNamePrefix = util.GetOptionalEnv(util.ObjectNamePrefixVariable, util.DefaultObjectNamePrefix) logger.Debugf("%s=%s", util.ObjectNamePrefixVariable, kubeNamePrefix) if len(kubeNamePrefix) > maximumKubeNamePrefixLength { - logger.Printf("The FABRIC_K8S_BUILDER_OBJECT_NAME_PREFIX environment variable must be a maximum of 30 characters") + logger.Printf("The %s environment variable must be a maximum of 30 characters", util.ObjectNamePrefixVariable) + + return kubeNamePrefix, false + } + + if msgs := apivalidation.NameIsDNS1035Label(kubeNamePrefix, true); len(msgs) > 0 { + logger.Printf("The %s environment variable must be a valid DNS-1035 label: %s", util.ObjectNamePrefixVariable, msgs[0]) + + return kubeNamePrefix, false + } + + return kubeNamePrefix, true +} + +func Run() int { + const ( + expectedArgsLength = 3 + buildOutputDirectoryArg = 1 + runMetadataDirectoryArg = 2 + ) + + debug := util.GetOptionalEnv(util.DebugVariable, "false") + ctx := log.NewCmdContext(context.Background(), debug == "true") + logger := log.New(ctx) + + if len(os.Args) != expectedArgsLength { + logger.Println("Expected BUILD_OUTPUT_DIR and RUN_METADATA_DIR arguments") + + return 1 + } + + buildOutputDirectory := os.Args[buildOutputDirectoryArg] + runMetadataDirectory := os.Args[runMetadataDirectoryArg] + + logger.Debugf("Build output directory: %s", buildOutputDirectory) + logger.Debugf("Run metadata directory: %s", runMetadataDirectory) + + //nolint:varnamelen // using the ok bool convention to indicate errors + var ok bool + + peerID, ok := getPeerID(logger) + if !ok { + return 1 + } + + kubeconfigPath := getKubeconfigPath(logger) + kubeNamespace := getKubeNamespace(logger) + kubeNodeRole, ok := getKubeNodeRole(logger) + if !ok { return 1 } - if msgs := validation.NameIsDNS1035Label(kubeNamePrefix, true); len(msgs) > 0 { - logger.Printf("The FABRIC_K8S_BUILDER_OBJECT_NAME_PREFIX environment variable must be a valid DNS-1035 label: %s", msgs[0]) + kubeServiceAccount := getKubeServiceAccount(logger) + kubeNamePrefix, ok := getKubeNamePrefix(logger) + if !ok { return 1 } @@ -83,6 +149,7 @@ func Run() int { PeerID: peerID, KubeconfigPath: kubeconfigPath, KubeNamespace: kubeNamespace, + KubeNodeRole: kubeNodeRole, KubeServiceAccount: kubeServiceAccount, KubeNamePrefix: kubeNamePrefix, } diff --git a/cmd/run/main_test.go b/cmd/run/main_test.go index a8166a4..3f0ed07 100644 --- a/cmd/run/main_test.go +++ b/cmd/run/main_test.go @@ -43,6 +43,28 @@ var _ = Describe("Main", func() { ), ) + DescribeTable("Running the run command produces the correct error for invalid FABRIC_K8S_BUILDER_NODE_ROLE environment variable values", + func(kubeNodeRoleValue, expectedErrorMessage string) { + args := []string{"BUILD_OUTPUT_DIR", "RUN_METADATA_DIR"} + command := exec.Command(runCmdPath, args...) + command.Env = append(os.Environ(), + "CORE_PEER_ID=core-peer-id-abcdefghijklmnopqrstuvwxyz-0123456789", + "FABRIC_K8S_BUILDER_NODE_ROLE="+kubeNodeRoleValue, + ) + session, err := gexec.Start(command, GinkgoWriter, GinkgoWriter) + Expect(err).NotTo(HaveOccurred()) + + Eventually(session).Should(gexec.Exit(1)) + Eventually( + session.Err, + ).Should(gbytes.Say(expectedErrorMessage)) + }, + Entry("When the FABRIC_K8S_BUILDER_NODE_ROLE is too long", "long-node-role-is-looooooooooooooooooooooooooooooooooooooooooong", `run \[\d+\]: The FABRIC_K8S_BUILDER_NODE_ROLE environment variable must be a valid Kubernetes label value: must be no more than 63 characters`), + Entry("When the FABRIC_K8S_BUILDER_NODE_ROLE contains invalid characters", "invalid*value", `run \[\d+\]: The FABRIC_K8S_BUILDER_NODE_ROLE environment variable must be a valid Kubernetes label value: a valid label must be an empty string or consist of alphanumeric characters, '-', '_' or '\.', and must start and end with an alphanumeric character`), + Entry("When the FABRIC_K8S_BUILDER_NODE_ROLE does not start with an alphanumeric character", ".role", `run \[\d+\]: The FABRIC_K8S_BUILDER_NODE_ROLE environment variable must be a valid Kubernetes label value: a valid label must be an empty string or consist of alphanumeric characters, '-', '_' or '\.', and must start and end with an alphanumeric character`), + Entry("When the FABRIC_K8S_BUILDER_NODE_ROLE does not end with an alphanumeric character", "role-", `run \[\d+\]: The FABRIC_K8S_BUILDER_NODE_ROLE environment variable must be a valid Kubernetes label value: a valid label must be an empty string or consist of alphanumeric characters, '-', '_' or '\.', and must start and end with an alphanumeric character`), + ) + DescribeTable("Running the run command produces the correct error for invalid FABRIC_K8S_BUILDER_OBJECT_NAME_PREFIX environment variable values", func(kubeNamePrefixValue, expectedErrorMessage string) { args := []string{"BUILD_OUTPUT_DIR", "RUN_METADATA_DIR"} diff --git a/docs/configuring/dedicated-nodes.md b/docs/configuring/dedicated-nodes.md new file mode 100644 index 0000000..7ae2f06 --- /dev/null +++ b/docs/configuring/dedicated-nodes.md @@ -0,0 +1,15 @@ +# Dedicated nodes + +TBC + +The `FABRIC_K8S_BUILDER_NODE_ROLE` environment variable can be used to... + +For example, if `FABRIC_K8S_BUILDER_NODE_ROLE` is set to `chaincode`, ... using the following command. + +```shell +kubectl label nodes node1 fabric-builder-k8s-role=chaincode +kubectl taint nodes node1 fabric-builder-k8s-role=chaincode:NoSchedule +``` + +More complex requirements should be handled with Dynamic Admission Control using a Mutating Webhook. +For example, it looks like the namespace-node-affinity webhook could be used to assign node affinity and tolerations to all pods in the FABRIC_K8S_BUILDER_NAMESPACE namespace. diff --git a/docs/configuring/overview.md b/docs/configuring/overview.md index ff447e6..0f04f21 100644 --- a/docs/configuring/overview.md +++ b/docs/configuring/overview.md @@ -14,6 +14,7 @@ External builders are configured in the `core.yaml` file, for example: - CORE_PEER_ID - FABRIC_K8S_BUILDER_DEBUG - FABRIC_K8S_BUILDER_NAMESPACE + - FABRIC_K8S_BUILDER_NODE_ROLE - FABRIC_K8S_BUILDER_OBJECT_NAME_PREFIX - FABRIC_K8S_BUILDER_SERVICE_ACCOUNT - KUBERNETES_SERVICE_HOST @@ -30,6 +31,7 @@ The k8s builder is configured using the following environment variables. | ------------------------------------- | -------------------------------- | ---------------------------------------------------- | | CORE_PEER_ID | | The Fabric peer ID (required) | | FABRIC_K8S_BUILDER_NAMESPACE | The peer namespace or `default` | The Kubernetes namespace to run chaincode with | +| FABRIC_K8S_BUILDER_NODE_ROLE | | TBC | | FABRIC_K8S_BUILDER_OBJECT_NAME_PREFIX | `hlfcc` | Eye-catcher prefix for Kubernetes object names | | FABRIC_K8S_BUILDER_SERVICE_ACCOUNT | `default` | The Kubernetes service account to run chaincode with | | FABRIC_K8S_BUILDER_DEBUG | `false` | Set to `true` to enable k8s builder debug messages | diff --git a/go.mod b/go.mod index 9ff42ba..d2b9760 100644 --- a/go.mod +++ b/go.mod @@ -49,6 +49,7 @@ require ( k8s.io/component-base v0.31.1 // indirect mvdan.cc/sh/v3 v3.7.0 // indirect sigs.k8s.io/controller-runtime v0.19.0 // indirect + sigs.k8s.io/e2e-framework v0.5.0 // indirect ) require ( diff --git a/internal/builder/run.go b/internal/builder/run.go index 3e10128..24d4b11 100644 --- a/internal/builder/run.go +++ b/internal/builder/run.go @@ -16,6 +16,7 @@ type Run struct { PeerID string KubeconfigPath string KubeNamespace string + KubeNodeRole string KubeServiceAccount string KubeNamePrefix string } @@ -73,6 +74,7 @@ func (r *Run) Run(ctx context.Context) error { kubeObjectName, r.KubeNamespace, r.KubeServiceAccount, + r.KubeNodeRole, r.PeerID, chaincodeData, imageData, diff --git a/internal/util/env.go b/internal/util/env.go index f56eb88..7ecf24d 100644 --- a/internal/util/env.go +++ b/internal/util/env.go @@ -10,6 +10,7 @@ import ( const ( builderVariablePrefix = "FABRIC_K8S_BUILDER_" ChaincodeNamespaceVariable = builderVariablePrefix + "NAMESPACE" + ChaincodeNodeRoleVariable = builderVariablePrefix + "NODE_ROLE" ObjectNamePrefixVariable = builderVariablePrefix + "OBJECT_NAME_PREFIX" ChaincodeServiceAccountVariable = builderVariablePrefix + "SERVICE_ACCOUNT" DebugVariable = builderVariablePrefix + "DEBUG" diff --git a/internal/util/k8s.go b/internal/util/k8s.go index aed324b..77513d3 100644 --- a/internal/util/k8s.go +++ b/internal/util/k8s.go @@ -365,7 +365,7 @@ func CreateChaincodeJob( ctx context.Context, logger *log.CmdLogger, jobsClient typedBatchv1.JobInterface, - objectName, namespace, serviceAccount, peerID string, + objectName, namespace, serviceAccount, nodeRole, peerID string, chaincodeData *ChaincodeJSON, imageData *ImageJSON, ) (*batchv1.Job, error) { @@ -381,6 +381,41 @@ func CreateChaincodeJob( return nil, fmt.Errorf("error getting chaincode job definition for chaincode ID %s: %w", chaincodeData.ChaincodeID, err) } + if nodeRole != "" { + logger.Debugf( + "Adding node affinity and toleration to job definition for chaincode ID %s: %s", + chaincodeData.ChaincodeID, + nodeRole, + ) + + jobDefinition.Spec.Template.Spec.Affinity = &apiv1.Affinity{ + NodeAffinity: &apiv1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &apiv1.NodeSelector{ + NodeSelectorTerms: []apiv1.NodeSelectorTerm{ + { + MatchExpressions: []apiv1.NodeSelectorRequirement{ + { + Key: "fabric-builder-k8s-role", + Operator: apiv1.NodeSelectorOpIn, + Values: []string{nodeRole}, + }, + }, + }, + }, + }, + }, + } + + jobDefinition.Spec.Template.Spec.Tolerations = []apiv1.Toleration{ + { + Key: "fabric-builder-k8s-role", + Operator: apiv1.TolerationOpEqual, + Value: nodeRole, + Effect: apiv1.TaintEffectNoSchedule, + }, + } + } + jobName := jobDefinition.ObjectMeta.Name logger.Debugf( diff --git a/mkdocs.yml b/mkdocs.yml index 69be1d9..a26a220 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -106,6 +106,7 @@ nav: - Kubernetes permissions: configuring/kubernetes-permissions.md - Kubernetes namespace: configuring/kubernetes-namespace.md - Kubernetes service account: configuring/kubernetes-service-account.md + - Dedicated nodes: configuring/dedicated-nodes.md - Tutorials: - Developing and debuging chaincode: tutorials/develop-chaincode.md - Creating a chaincode package: tutorials/package-chaincode.md diff --git a/test/integration/main_test.go b/test/integration/main_test.go index f24fd50..76fd166 100644 --- a/test/integration/main_test.go +++ b/test/integration/main_test.go @@ -30,11 +30,13 @@ func TestMain(m *testing.M) { testenv.Setup( envfuncs.CreateCluster(kind.NewProvider(), clusterName), + // envfuncs.CreateClusterWithConfig(kind.NewProvider(), clusterName, "testdata/kind-config.yaml", kind.WithImage("kindest/node:v1.22.2")), envfuncs.CreateNamespace(envCfg.Namespace()), ) testenv.Finish( envfuncs.DeleteNamespace(envCfg.Namespace()), + // envfuncs.ExportClusterLogs(kindClusterName, "./logs"), envfuncs.DestroyCluster(clusterName), ) diff --git a/test/integration/testdata/kind-config.yaml b/test/integration/testdata/kind-config.yaml new file mode 100644 index 0000000..c51a943 --- /dev/null +++ b/test/integration/testdata/kind-config.yaml @@ -0,0 +1,8 @@ +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +nodes: +- role: control-plane +- role: worker +- role: worker + labels: + fabric-builder-k8s-role: chaincode