diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..a3aab7a --- /dev/null +++ b/.dockerignore @@ -0,0 +1,3 @@ +# More info: https://docs.docker.com/engine/reference/builder/#dockerignore-file +# Ignore build and test binaries. +bin/ diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ada68ff --- /dev/null +++ b/.gitignore @@ -0,0 +1,27 @@ +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib +bin/* +Dockerfile.cross + +# Test binary, built with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out + +# Go workspace file +go.work + +# Kubernetes Generated files - skip generated files, except for vendored files +!vendor/**/zz_generated.* + +# editor and IDE paraphernalia +.idea +.vscode +*.swp +*.swo +*~ diff --git a/.golangci.yml b/.golangci.yml new file mode 100644 index 0000000..aac8a13 --- /dev/null +++ b/.golangci.yml @@ -0,0 +1,47 @@ +run: + timeout: 5m + allow-parallel-runners: true + +issues: + # don't skip warning about doc comments + # don't exclude the default set of lint + exclude-use-default: false + # restore some of the defaults + # (fill in the rest as needed) + exclude-rules: + - path: "api/*" + linters: + - lll + - path: "internal/*" + linters: + - dupl + - lll +linters: + disable-all: true + enable: + - dupl + - errcheck + - exportloopref + - ginkgolinter + - goconst + - gocyclo + - gofmt + - goimports + - gosimple + - govet + - ineffassign + - lll + - misspell + - nakedret + - prealloc + - revive + - staticcheck + - typecheck + - unconvert + - unparam + - unused + +linters-settings: + revive: + rules: + - name: comment-spacings diff --git a/COPYRIGHT b/COPYRIGHT new file mode 100644 index 0000000..d7b7eb5 --- /dev/null +++ b/COPYRIGHT @@ -0,0 +1,15 @@ +Intellectual Property Notice +---------------------------- + +HPCIC DevTools is licensed under the MIT license (LICENSE). + +Copyrights and patents in this project are retained by +contributors. No copyright assignment is required to contribute to +HPCIC DevTools. + +SPDX usage +------------ + +Individual files contain SPDX tags instead of the full license text. +This enables machine processing of license information based on the SPDX +License Identifiers that are available here: https://spdx.org/licenses/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..a48973e --- /dev/null +++ b/Dockerfile @@ -0,0 +1,33 @@ +# Build the manager binary +FROM golang:1.22 AS builder +ARG TARGETOS +ARG TARGETARCH + +WORKDIR /workspace +# Copy the Go Modules manifests +COPY go.mod go.mod +COPY go.sum go.sum +# cache deps before building and copying source so that we don't need to re-download as much +# and so that source changes don't invalidate our downloaded layer +RUN go mod download + +# Copy the go source +COPY cmd/main.go cmd/main.go +COPY api/ api/ +COPY internal/controller/ internal/controller/ + +# Build +# the GOARCH has not a default value to allow the binary be built according to the host where the command +# was called. For example, if we call make docker-build in a local env which has the Apple Silicon M1 SO +# the docker BUILDPLATFORM arg will be linux/arm64 when for Apple x86 it will be linux/amd64. Therefore, +# by leaving it empty we can ensure that the container and binary shipped on it will have the same platform. +RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -o manager cmd/main.go + +# Use distroless as minimal base image to package the manager binary +# Refer to https://github.com/GoogleContainerTools/distroless for more details +FROM gcr.io/distroless/static:nonroot +WORKDIR / +COPY --from=builder /workspace/manager . +USER 65532:65532 + +ENTRYPOINT ["/manager"] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..1019cee --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022-2023 LLNS, LLC and other HPCIC DevTools Developers. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..fef72bd --- /dev/null +++ b/Makefile @@ -0,0 +1,367 @@ +# VERSION defines the project version for the bundle. +# Update this value when you upgrade the version of your project. +# To re-generate a bundle for another specific version without changing the standard setup, you can: +# - use the VERSION as arg of the bundle target (e.g make bundle VERSION=0.0.2) +# - use environment variables to overwrite this value (e.g export VERSION=0.0.2) +VERSION ?= 0.0.1 + +# CHANNELS define the bundle channels used in the bundle. +# Add a new line here if you would like to change its default config. (E.g CHANNELS = "candidate,fast,stable") +# To re-generate a bundle for other specific channels without changing the standard setup, you can: +# - use the CHANNELS as arg of the bundle target (e.g make bundle CHANNELS=candidate,fast,stable) +# - use environment variables to overwrite this value (e.g export CHANNELS="candidate,fast,stable") +ifneq ($(origin CHANNELS), undefined) +BUNDLE_CHANNELS := --channels=$(CHANNELS) +endif + +# DEFAULT_CHANNEL defines the default channel used in the bundle. +# Add a new line here if you would like to change its default config. (E.g DEFAULT_CHANNEL = "stable") +# To re-generate a bundle for any other default channel without changing the default setup, you can: +# - use the DEFAULT_CHANNEL as arg of the bundle target (e.g make bundle DEFAULT_CHANNEL=stable) +# - use environment variables to overwrite this value (e.g export DEFAULT_CHANNEL="stable") +ifneq ($(origin DEFAULT_CHANNEL), undefined) +BUNDLE_DEFAULT_CHANNEL := --default-channel=$(DEFAULT_CHANNEL) +endif +BUNDLE_METADATA_OPTS ?= $(BUNDLE_CHANNELS) $(BUNDLE_DEFAULT_CHANNEL) + +# IMAGE_TAG_BASE defines the docker.io namespace and part of the image name for remote images. +# This variable is used to construct full image tags for bundle and catalog images. +# +# For example, running 'make bundle-build bundle-push catalog-build catalog-push' will build and push both +# converged-computing.org/fluxqueue-bundle:$VERSION and converged-computing.org/fluxqueue-catalog:$VERSION. +IMAGE_TAG_BASE ?= converged-computing.org/fluxqueue + +# BUNDLE_IMG defines the image:tag used for the bundle. +# You can use it as an arg. (E.g make bundle-build BUNDLE_IMG=/:) +BUNDLE_IMG ?= $(IMAGE_TAG_BASE)-bundle:v$(VERSION) + +# BUNDLE_GEN_FLAGS are the flags passed to the operator-sdk generate bundle command +BUNDLE_GEN_FLAGS ?= -q --overwrite --version $(VERSION) $(BUNDLE_METADATA_OPTS) + +# USE_IMAGE_DIGESTS defines if images are resolved via tags or digests +# You can enable this value if you would like to use SHA Based Digests +# To enable set flag to true +USE_IMAGE_DIGESTS ?= false +ifeq ($(USE_IMAGE_DIGESTS), true) + BUNDLE_GEN_FLAGS += --use-image-digests +endif + +# Set the Operator SDK version to use. By default, what is installed on the system is used. +# This is useful for CI or a project to utilize a specific version of the operator-sdk toolkit. +OPERATOR_SDK_VERSION ?= v1.38.0 + +# Image URL to use all building/pushing image targets +REGISTRY ?= ghcr.io/converged-computing +IMG ?= $(REGISTRY)/fluxqueue:latest +DEVIMG ?= $(REGISTRY)/fluxqueue:test +POSTGRES_IMAGE ?= $(REGISTRY)/fluxqueue-postgres:latest + +# ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary. +ENVTEST_K8S_VERSION = 1.30.0 + +# Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set) +ifeq (,$(shell go env GOBIN)) +GOBIN=$(shell go env GOPATH)/bin +else +GOBIN=$(shell go env GOBIN) +endif + +# CONTAINER_TOOL defines the container tool to be used for building images. +# Be aware that the target commands are only tested with Docker which is +# scaffolded by default. However, you might want to replace it to use other +# tools. (i.e. podman) +CONTAINER_TOOL ?= docker + +# Setting SHELL to bash allows bash commands to be executed by recipes. +# Options are set to exit when a recipe line exits non-zero or a piped command fails. +SHELL = /usr/bin/env bash -o pipefail +.SHELLFLAGS = -ec + +.PHONY: all +all: build + +##@ General + +# The help target prints out all targets with their descriptions organized +# beneath their categories. The categories are represented by '##@' and the +# target descriptions by '##'. The awk command is responsible for reading the +# entire set of makefiles included in this invocation, looking for lines of the +# file as xyz: ## something, and then pretty-format the target and help. Then, +# if there's a line with ##@ something, that gets pretty-printed as a category. +# More info on the usage of ANSI control characters for terminal formatting: +# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters +# More info on the awk command: +# http://linuxcommand.org/lc3_adv_awk.php + +.PHONY: help +help: ## Display this help. + @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) + +##@ Development + +.PHONY: manifests +manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects. + $(CONTROLLER_GEN) rbac:roleName=manager-role crd webhook paths="./..." output:crd:artifacts:config=config/crd/bases + +.PHONY: generate +generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations. + $(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt" paths="./..." + +.PHONY: fmt +fmt: ## Run go fmt against code. + go fmt ./... + +.PHONY: vet +vet: ## Run go vet against code. + go vet ./... + +.PHONY: test +test: manifests generate fmt vet envtest ## Run tests. + KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test $$(go list ./... | grep -v /e2e) -coverprofile cover.out + +# Utilize Kind or modify the e2e tests to load the image locally, enabling compatibility with other vendors. +.PHONY: test-e2e # Run the e2e tests against a Kind k8s instance that is spun up. +test-e2e: + go test ./test/e2e/ -v -ginkgo.v + +.PHONY: lint +lint: golangci-lint ## Run golangci-lint linter + $(GOLANGCI_LINT) run + +.PHONY: lint-fix +lint-fix: golangci-lint ## Run golangci-lint linter and perform fixes + $(GOLANGCI_LINT) run --fix + +##@ Build + +.PHONY: build +build: manifests generate fmt vet ## Build manager binary. + go build -o bin/manager cmd/main.go + +.PHONY: run +run: manifests generate fmt vet ## Run a controller from your host. + go run ./cmd/main.go + +# If you wish to build the manager image targeting other platforms you can use the --platform flag. +# (i.e. docker build --platform linux/arm64). However, you must enable docker buildKit for it. +# More info: https://docs.docker.com/develop/develop-images/build_enhancements/ +.PHONY: docker-build +docker-build: ## Build docker image with the manager. + $(CONTAINER_TOOL) build -t ${IMG} . + + +.PHONY: build-all +build-all: docker-build build-postgres + +.PHONY: docker-push +docker-push: ## Push docker image with the manager. + $(CONTAINER_TOOL) push ${IMG} + +# PLATFORMS defines the target platforms for the manager image be built to provide support to multiple +# architectures. (i.e. make docker-buildx IMG=myregistry/mypoperator:0.0.1). To use this option you need to: +# - be able to use docker buildx. More info: https://docs.docker.com/build/buildx/ +# - have enabled BuildKit. More info: https://docs.docker.com/develop/develop-images/build_enhancements/ +# - be able to push the image to your registry (i.e. if you do not set a valid value via IMG=> then the export will fail) +# To adequately provide solutions that are compatible with multiple platforms, you should consider using this option. +PLATFORMS ?= linux/arm64,linux/amd64,linux/s390x,linux/ppc64le +.PHONY: docker-buildx +docker-buildx: ## Build and push docker image for the manager for cross-platform support + # copy existing Dockerfile and insert --platform=${BUILDPLATFORM} into Dockerfile.cross, and preserve the original Dockerfile + sed -e '1 s/\(^FROM\)/FROM --platform=\$$\{BUILDPLATFORM\}/; t' -e ' 1,// s//FROM --platform=\$$\{BUILDPLATFORM\}/' Dockerfile > Dockerfile.cross + - $(CONTAINER_TOOL) buildx create --name fluxqueue-builder + $(CONTAINER_TOOL) buildx use fluxqueue-builder + - $(CONTAINER_TOOL) buildx build --push --platform=$(PLATFORMS) --tag ${IMG} -f Dockerfile.cross . + - $(CONTAINER_TOOL) buildx rm fluxqueue-builder + rm Dockerfile.cross + +.PHONY: build-installer +build-installer: manifests generate kustomize ## Generate a consolidated YAML with CRDs and deployment. + mkdir -p dist + cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG} + $(KUSTOMIZE) build config/default > dist/install.yaml + +##@ Deployment + +ifndef ignore-not-found + ignore-not-found = false +endif + +.PHONY: install +install: manifests kustomize ## Install CRDs into the K8s cluster specified in ~/.kube/config. + $(KUSTOMIZE) build config/crd | $(KUBECTL) apply -f - + +.PHONY: uninstall +uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion. + $(KUSTOMIZE) build config/crd | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f - + +.PHONY: deploy +deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config. + cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG} + $(KUSTOMIZE) build config/default | $(KUBECTL) apply -f - + +.PHONY: build-config +build-config: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config. + cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG} + $(KUSTOMIZE) build config/default > examples/dist/fluxqueue.yaml + +.PHONY build-postgres: + docker build -f build/postgres/Dockerfile -t ${POSTGRES_IMAGE} . + +# Build a test image, push to the registry at test, and apply the build-config +.PHONY: test-deploy +test-deploy: manifests kustomize build-postgres + docker build --no-cache -t ${DEVIMG} . + docker push ${DEVIMG} + cd config/manager && $(KUSTOMIZE) edit set image controller=${DEVIMG} + $(KUSTOMIZE) build config/default > examples/dist/fluxqueue-dev.yaml + +.PHONY: test-deploy-recreate +test-deploy-recreate: test-deploy + kubectl delete -f ./examples/dist/fluxqueue-dev.yaml || echo "Already deleted" + kubectl apply -f ./examples/dist/fluxqueue-dev.yaml + +.PHONY: undeploy +undeploy: kustomize ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion. + $(KUSTOMIZE) build config/default | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f - + +##@ Dependencies + +## Location to install dependencies to +LOCALBIN ?= $(shell pwd)/bin +$(LOCALBIN): + mkdir -p $(LOCALBIN) + +## Tool Binaries +KUBECTL ?= kubectl +KUSTOMIZE ?= $(LOCALBIN)/kustomize +CONTROLLER_GEN ?= $(LOCALBIN)/controller-gen +ENVTEST ?= $(LOCALBIN)/setup-envtest +GOLANGCI_LINT = $(LOCALBIN)/golangci-lint +HELMIFY ?= $(LOCALBIN)/helmify + +## Tool Versions +KUSTOMIZE_VERSION ?= v5.4.2 +CONTROLLER_TOOLS_VERSION ?= v0.15.0 +ENVTEST_VERSION ?= release-0.18 +GOLANGCI_LINT_VERSION ?= v1.59.1 + +HELMIFY ?= $(LOCALBIN)/helmify +.PHONY: kustomize +kustomize: $(KUSTOMIZE) ## Download kustomize locally if necessary. +$(KUSTOMIZE): $(LOCALBIN) + $(call go-install-tool,$(KUSTOMIZE),sigs.k8s.io/kustomize/kustomize/v5,$(KUSTOMIZE_VERSION)) + +.PHONY: controller-gen +controller-gen: $(CONTROLLER_GEN) ## Download controller-gen locally if necessary. +$(CONTROLLER_GEN): $(LOCALBIN) + $(call go-install-tool,$(CONTROLLER_GEN),sigs.k8s.io/controller-tools/cmd/controller-gen,$(CONTROLLER_TOOLS_VERSION)) + +.PHONY: envtest +envtest: $(ENVTEST) ## Download setup-envtest locally if necessary. +$(ENVTEST): $(LOCALBIN) + $(call go-install-tool,$(ENVTEST),sigs.k8s.io/controller-runtime/tools/setup-envtest,$(ENVTEST_VERSION)) + +.PHONY: golangci-lint +golangci-lint: $(GOLANGCI_LINT) ## Download golangci-lint locally if necessary. +$(GOLANGCI_LINT): $(LOCALBIN) + $(call go-install-tool,$(GOLANGCI_LINT),github.com/golangci/golangci-lint/cmd/golangci-lint,$(GOLANGCI_LINT_VERSION)) + +# go-install-tool will 'go install' any package with custom target and name of binary, if it doesn't exist +# $1 - target path with name of binary +# $2 - package url which can be installed +# $3 - specific version of package +define go-install-tool +@[ -f "$(1)-$(3)" ] || { \ +set -e; \ +package=$(2)@$(3) ;\ +echo "Downloading $${package}" ;\ +rm -f $(1) || true ;\ +GOBIN=$(LOCALBIN) go install $${package} ;\ +mv $(1) $(1)-$(3) ;\ +} ;\ +ln -sf $(1)-$(3) $(1) +endef + +.PHONY: operator-sdk +OPERATOR_SDK ?= $(LOCALBIN)/operator-sdk +operator-sdk: ## Download operator-sdk locally if necessary. +ifeq (,$(wildcard $(OPERATOR_SDK))) +ifeq (, $(shell which operator-sdk 2>/dev/null)) + @{ \ + set -e ;\ + mkdir -p $(dir $(OPERATOR_SDK)) ;\ + OS=$(shell go env GOOS) && ARCH=$(shell go env GOARCH) && \ + curl -sSLo $(OPERATOR_SDK) https://github.com/operator-framework/operator-sdk/releases/download/$(OPERATOR_SDK_VERSION)/operator-sdk_$${OS}_$${ARCH} ;\ + chmod +x $(OPERATOR_SDK) ;\ + } +else +OPERATOR_SDK = $(shell which operator-sdk) +endif +endif + +.PHONY: bundle +bundle: manifests kustomize operator-sdk ## Generate bundle manifests and metadata, then validate generated files. + $(OPERATOR_SDK) generate kustomize manifests -q + cd config/manager && $(KUSTOMIZE) edit set image controller=$(IMG) + $(KUSTOMIZE) build config/manifests | $(OPERATOR_SDK) generate bundle $(BUNDLE_GEN_FLAGS) + $(OPERATOR_SDK) bundle validate ./bundle + +.PHONY: bundle-build +bundle-build: ## Build the bundle image. + docker build -f bundle.Dockerfile -t $(BUNDLE_IMG) . + +.PHONY: bundle-push +bundle-push: ## Push the bundle image. + $(MAKE) docker-push IMG=$(BUNDLE_IMG) + +.PHONY: opm +OPM = $(LOCALBIN)/opm +opm: ## Download opm locally if necessary. +ifeq (,$(wildcard $(OPM))) +ifeq (,$(shell which opm 2>/dev/null)) + @{ \ + set -e ;\ + mkdir -p $(dir $(OPM)) ;\ + OS=$(shell go env GOOS) && ARCH=$(shell go env GOARCH) && \ + curl -sSLo $(OPM) https://github.com/operator-framework/operator-registry/releases/download/v1.23.0/$${OS}-$${ARCH}-opm ;\ + chmod +x $(OPM) ;\ + } +else +OPM = $(shell which opm) +endif +endif + +# A comma-separated list of bundle images (e.g. make catalog-build BUNDLE_IMGS=example.com/operator-bundle:v0.1.0,example.com/operator-bundle:v0.2.0). +# These images MUST exist in a registry and be pull-able. +BUNDLE_IMGS ?= $(BUNDLE_IMG) + +# The image tag given to the resulting catalog image (e.g. make catalog-build CATALOG_IMG=example.com/operator-catalog:v0.2.0). +CATALOG_IMG ?= $(IMAGE_TAG_BASE)-catalog:v$(VERSION) + +# Set CATALOG_BASE_IMG to an existing catalog image tag to add $BUNDLE_IMGS to that image. +ifneq ($(origin CATALOG_BASE_IMG), undefined) +FROM_INDEX_OPT := --from-index $(CATALOG_BASE_IMG) +endif + +.PHONY: helmify +helmify: $(HELMIFY) ## Download helmify locally if necessary. +$(HELMIFY): $(LOCALBIN) + test -s $(LOCALBIN)/helmify || GOBIN=$(LOCALBIN) go install github.com/arttor/helmify/cmd/helmify@latest + +helm: manifests kustomize helmify + $(KUSTOMIZE) build config/default | $(HELMIFY) + +.PHONY: pre-push +pre-push: generate api build-config helm + +# Build a catalog image by adding bundle images to an empty catalog using the operator package manager tool, 'opm'. +# This recipe invokes 'opm' in 'semver' bundle add mode. For more information on add modes, see: +# https://github.com/operator-framework/community-operators/blob/7f1438c/docs/packaging-operator.md#updating-your-existing-operator +.PHONY: catalog-build +catalog-build: opm ## Build a catalog image. + $(OPM) index add --container-tool docker --mode semver --tag $(CATALOG_IMG) --bundles $(BUNDLE_IMGS) $(FROM_INDEX_OPT) + +# Push the catalog image. +.PHONY: catalog-push +catalog-push: ## Push a catalog image. + $(MAKE) docker-push IMG=$(CATALOG_IMG) diff --git a/NOTICE b/NOTICE new file mode 100644 index 0000000..3737d5a --- /dev/null +++ b/NOTICE @@ -0,0 +1,21 @@ +This work was produced under the auspices of the U.S. Department of +Energy by Lawrence Livermore National Laboratory under Contract +DE-AC52-07NA27344. + +This work was prepared as an account of work sponsored by an agency of +the United States Government. Neither the United States Government nor +Lawrence Livermore National Security, LLC, nor any of their employees +makes any warranty, expressed or implied, or assumes any legal liability +or responsibility for the accuracy, completeness, or usefulness of any +information, apparatus, product, or process disclosed, or represents that +its use would not infringe privately owned rights. + +Reference herein to any specific commercial product, process, or service +by trade name, trademark, manufacturer, or otherwise does not necessarily +constitute or imply its endorsement, recommendation, or favoring by the +United States Government or Lawrence Livermore National Security, LLC. + +The views and opinions of authors expressed herein do not necessarily +state or reflect those of the United States Government or Lawrence +Livermore National Security, LLC, and shall not be used for advertising +or product endorsement purposes. diff --git a/PROJECT b/PROJECT new file mode 100644 index 0000000..1ecbd18 --- /dev/null +++ b/PROJECT @@ -0,0 +1,26 @@ +# Code generated by tool. DO NOT EDIT. +# This file is used to track the info used to scaffold your project +# and allow the plugins properly work. +# More info: https://book.kubebuilder.io/reference/project-config.html +domain: converged-computing.org +layout: +- go.kubebuilder.io/v4 +plugins: + manifests.sdk.operatorframework.io/v2: {} + scorecard.sdk.operatorframework.io/v2: {} +projectName: fluxqueue +repo: github.com/converged-computing/fluxqueue +resources: +- api: + crdVersion: v1 + namespaced: true + controller: true + domain: converged-computing.org + group: jobs + kind: FluxJob + path: github.com/converged-computing/fluxqueue/api/v1alpha1 + version: v1alpha1 + webhooks: + defaulting: true + webhookVersion: v1 +version: "3" diff --git a/README.md b/README.md new file mode 100644 index 0000000..a115dcf --- /dev/null +++ b/README.md @@ -0,0 +1,118 @@ +# Fluxqueue + +> Under development! + +![img/fluxqueue.png](img/fluxqueue.png) + +I'm still thinking over improvements to fluxnetes, fluence, and related projects, and this is the direction I'm currently taking. I've been thinking of a design where Flux works as a controller, as follows: + +1. The controller has an admission webhook that intercepts jobs and pods being submit. For jobs, they are suspended. For all other abstractions, [scheduling gates](https://kubernetes.io/blog/2022/12/26/pod-scheduling-readiness-alpha/) are used. +2. The jobs are wrapped as `FluxJob` and parsed into Flux Job specs and passed to a part of the controller, the Flux Queue. +3. The Flux Queue, which runs in a loop, moves through the queue and interacts with a Fluxion service to schedule work. +4. When a job is scheduled, it is unsuspended and/or targeted for the fluxqueue custom scheduler plugin that will assign exactly to the nodes it has been intended for. +5. We will need an equivalent cleanup process to receive when pods are done, and tell fluxion and update the queue. Likely those will be done in the same operation. + +This project comes out of [fluxqueue](https://github.com/converged-computing/fluxqueue), which was similar in design, but did the implementation entirely inside of Kubernetes. fluxqueue was a combination of Kubernetes and [Fluence](https://github.com/flux-framework/flux-k8s), both of which use the HPC-grade pod scheduling [Fluxion scheduler](https://github.com/flux-framework/flux-sched) to schedule pod groups to nodes. For our queue, we use [river](https://riverqueue.com/docs) backed by a Postgres database. The database is deployed alongside fluence and could be customized to use an operator instead. + +**Important** This is an experiment, and is under development. I will change this design a million times - it's how I tend to learn and work. I'll share updates when there is something to share. It deploys but does not work yet! +See the [docs](docs) for some detail on design choices. + +## Design + +Fluxqueue builds three primary containers: + + - `ghcr.io/converged-computing/fluxqueue`: contains a custom kube-scheduler build with fluxqueue as the primary scheduler. + - `ghcr.io/converged-computing/fluxqueue-sidecar`: provides the fluxion service, queue for pods and groups, and a second service that will expose a kubectl command for inspection of state. + - `ghcr.io/converged-computing/fluxqueue-postgres`: holds the worker queue and provisional queue tables + +Instead of doing an out of tree scheduler plugin, for this project I am adding directly to Kubernetes and building. I'm curious to see if this will be easier or harder to maintain than an out of tree plugin, which seems to break frequently as the upstream for scheduler-plugins changes. + +## Deploy + +Create a kind cluster. You need more than a control plane. + +```bash +kind create cluster --config ./examples/kind-config.yaml +``` + +Install the certificate manager: + +```bash +kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.13.1/cert-manager.yaml +``` + +Then you can deploy as follows: + +```bash +./hack/quick-build-kind.sh +``` + +You'll then have the fluxqueue service running, a postgres database (for the job queue), along with (TBA) the scheduler plugins controller, which we +currently have to use PodGroup. + +```bash +$ kubectl get pods -n fluxqueue +NAME READY STATUS RESTARTS AGE +fluxqueue-chart-controller-manager-6dd6f95c6-z9qdk 0/1 Running 0 9s +postgres-5dc8c6b49d-llv2s 0/1 Running 0 9s +``` + +You can then create a job or a pod: + +```bash +kubectl apply -f test/job.yaml +kubectl apply -f test/pod.yaml +``` + +Which will currently each be suspended (job) or schedule gated (pod) to prevent scheduling. A FluxJob to wrap them is also created: + +```bash +$ kubectl get fluxjobs.jobs.converged-computing.org +NAME AGE +job-pod 4s +pod-pod 6s +``` + +Next I'm going to figure out how we can add a queue that receives these jobs and asks to schedule with fluxion. + +## Development + +### Debugging Postgres + +It is often helpful to shell into the postgres container to see the database directly: + +```bash +kubectl exec -n fluxqueue-system -it postgres-597db46977-9lb25 bash +psql -U postgres + +# Connect to database +\c + +# list databases +\l + +# show tables +\dt + +# test a query +SELECT group_name, group_size from pods_provisional; +``` + +### TODO + +- [ ] Figure out how to add queue +- [ ] Figure out how to add fluxion +- [ ] kubectl plugin to get fluxion state? + +## License + +HPCIC DevTools is distributed under the terms of the MIT license. +All new contributions must be made under this license. + +See [LICENSE](https://github.com/converged-computing/cloud-select/blob/main/LICENSE), +[COPYRIGHT](https://github.com/converged-computing/cloud-select/blob/main/COPYRIGHT), and +[NOTICE](https://github.com/converged-computing/cloud-select/blob/main/NOTICE) for details. + +SPDX-License-Identifier: (MIT) + +LLNL-CODE- 842614 diff --git a/api/v1alpha1/fluxjob_types.go b/api/v1alpha1/fluxjob_types.go new file mode 100644 index 0000000..e42d820 --- /dev/null +++ b/api/v1alpha1/fluxjob_types.go @@ -0,0 +1,129 @@ +/* +Copyright 2025. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + "fmt" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// "Enum" to determine what we are wrapping +type JobWrapped int + +const ( + JobWrappedPod JobWrapped = iota + JobWrappedJob +) + +// GetJobName creates a job name that appends the type +func GetJobName(jobType JobWrapped, name string) string { + + switch jobType { + case JobWrappedJob: + return fmt.Sprintf("%s-job", name) + case JobWrappedPod: + return fmt.Sprintf("%s-pod", name) + } + return fmt.Sprintf("%s-unknown", name) +} + +// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! +// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. + +// FluxJobSpec defines the desired state of FluxJob +// A user is not expected to create these - +// 1. A job or similar will be submit +// 2. It will be intercepted by a webhook here +// 3. The webhook will generate the object +// 4. The job will go into the fluxqueue +// 5. When scheduled, it gets sent with an exact node assignment +// to the custom scheduler plugin. +// 6. Cleanup will need to be handled +// +// A FluxJob is a mapping of a Kubernetes abstraction (e.g., job) +// into a Flux JobSpec, one that Fluxion can digest. +type FluxJobSpec struct { + + // JobSpec is the Flux jobspec + // +optional + JobSpec string `json:"jobspec,omitempty"` + + // Type of object that is wrapped + // +optional + Type JobWrapped `json:"type"` + + // Object is the underlying pod/job/object specification + // This currently is assumed that one job has equivalent pods under it + // +optional + Object []byte `json:"object,omitempty"` + + // Duration is the maximum runtime of the job + // +optional + Duration int32 `json:"duration,omitempty"` + + // If true, we are allowed to ask fluxion for + // a reservation + // +optional + Reservation bool `json:"reservation,omitempty"` + + // Nodes needed for the job + // +optional + Nodes int32 `json:"nodes"` + + // Resources assigned + // +optional + Resources Resources `json:"resources"` +} + +type Resources struct { + + // Nodes assigned to the job + // +optional + Nodes []string `json:"nodes"` +} + +// FluxJobStatus defines the observed state of FluxJob +type FluxJobStatus struct { + // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster + // Important: Run "make" to regenerate code after modifying this file +} + +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status + +// FluxJob is the Schema for the fluxjobs API +type FluxJob struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec FluxJobSpec `json:"spec,omitempty"` + Status FluxJobStatus `json:"status,omitempty"` +} + +// +kubebuilder:object:root=true + +// FluxJobList contains a list of FluxJob +type FluxJobList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []FluxJob `json:"items"` +} + +func init() { + SchemeBuilder.Register(&FluxJob{}, &FluxJobList{}) +} diff --git a/api/v1alpha1/fluxjob_webhook.go b/api/v1alpha1/fluxjob_webhook.go new file mode 100644 index 0000000..01a754a --- /dev/null +++ b/api/v1alpha1/fluxjob_webhook.go @@ -0,0 +1,144 @@ +package v1alpha1 + +import ( + "context" + "encoding/json" + "net/http" + + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/webhook/admission" +) + +var ( + logger = ctrl.Log.WithName("webhook") + schedulingGateName = "fluxqueue" + schedulerName = "fluxion" + mgr manager.Manager +) + +// IMPORTANT: if you use the controller-runtime builder, it will derive this name automatically from the gvk (kind, version, etc. so find the actual created path in the logs) +// kubectl describe mutatingwebhookconfigurations.admissionregistration.k8s.io +// It will also only allow you to describe one object type with For() +// This is disabled so we manually manage it - multiple types to a list did not work: config/webhook/manifests.yaml +////kubebuilder:webhook:path=/mutate-v1-sidecar,mutating=true,failurePolicy=fail,sideEffects=None,groups=core;batch,resources=pods;jobs,verbs=create,versions=v1,name=morascache.kb.io,admissionReviewVersions=v1 + +// NewMutatingWebhook allows us to keep the jobReceiver private +// If it's public it's exported and kubebuilder tries to add to zz_generated_deepcopy +// and you get all kinds of terrible errors about admission.Decoder missing DeepCopyInto +func NewMutatingWebhook(mainManager manager.Manager) *jobReceiver { + mgr = mainManager + return &jobReceiver{decoder: admission.NewDecoder(mainManager.GetScheme())} +} + +type jobReceiver struct { + decoder admission.Decoder +} + +// Handle is the main function to receive the object (por or job to start) +// Pods: we use scheduling gates to prevent from scheduling +// Jobs: we suspend +func (a *jobReceiver) Handle(ctx context.Context, req admission.Request) admission.Response { + + // First try for job + job := &batchv1.Job{} + err := a.decoder.Decode(req, job) + if err != nil { + + // Try for a pod next + pod := &corev1.Pod{} + err := a.decoder.Decode(req, pod) + if err != nil { + logger.Error(err, "admission error") + return admission.Errored(http.StatusBadRequest, err) + } + + // If we get here, we decoded a pod, and want to enqueue as a FluxJob + err = a.EnqueuePod(ctx, pod) + if err != nil { + logger.Error(err, "inject pod") + return admission.Errored(http.StatusBadRequest, err) + } + + // Mutate the fields in pod + marshalledPod, err := json.Marshal(pod) + if err != nil { + logger.Error(err, "marshalling pod") + return admission.Errored(http.StatusInternalServerError, err) + } + logger.Info("Admission pod success.") + return admission.PatchResponseFromRaw(req.Object.Raw, marshalledPod) + } + + // If we get here, we found a job + err = a.EnqueueJob(ctx, job) + if err != nil { + logger.Error(err, "inject job") + return admission.Errored(http.StatusBadRequest, err) + } + marshalledJob, err := json.Marshal(job) + if err != nil { + logger.Error(err, "marshalling job") + return admission.Errored(http.StatusInternalServerError, err) + } + logger.Info("Admission job success.") + return admission.PatchResponseFromRaw(req.Object.Raw, marshalledJob) +} + +// InjectPod adds the sidecar container to a pod +func (a *jobReceiver) EnqueuePod(ctx context.Context, pod *corev1.Pod) error { + logger.Info("Enqueue pod", "Name", pod.Name, "Namespace", pod.Namespace) + + // Add scheduling gate to the pod + if pod.Spec.SchedulingGates == nil { + pod.Spec.SchedulingGates = []corev1.PodSchedulingGate{} + } + fluxqGate := corev1.PodSchedulingGate{Name: schedulingGateName} + pod.Spec.SchedulingGates = append(pod.Spec.SchedulingGates, fluxqGate) + + // Ensure the pod gets scheduled with fluxion scheduler + pod.Spec.SchedulerName = schedulerName + logger.Info("received pod", "Name", pod.Name) + + podBytes, err := json.Marshal(pod) + if err != nil { + return err + } + + return SubmitFluxJob( + ctx, + JobWrappedPod, + podBytes, + pod.Name, + pod.Namespace, + 1, + ) +} + +// EnqueueJob suspends the job and creates a FluxJob +func (a *jobReceiver) EnqueueJob(ctx context.Context, job *batchv1.Job) error { + + logger.Info("Enqueue job", "Name", job.Name, "Namespace", job.Namespace) + + // Suspend the job + suspended := true + job.Spec.Suspend = &suspended + + // Convert to bytes to store with the queue + jobBytes, err := json.Marshal(job) + if err != nil { + return err + } + + logger.Info("received job", "Name", job.Name) + return SubmitFluxJob( + ctx, + JobWrappedPod, + jobBytes, + job.Name, + job.Namespace, + *job.Spec.Parallelism, + ) +} diff --git a/api/v1alpha1/fluxjob_webhook_test.go b/api/v1alpha1/fluxjob_webhook_test.go new file mode 100644 index 0000000..8adea87 --- /dev/null +++ b/api/v1alpha1/fluxjob_webhook_test.go @@ -0,0 +1,33 @@ +/* +Copyright 2025. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + . "github.com/onsi/ginkgo/v2" +) + +var _ = Describe("FluxJob Webhook", func() { + + Context("When creating FluxJob under Defaulting Webhook", func() { + It("Should fill in the default value if a required field is empty", func() { + + // TODO(user): Add your logic here + + }) + }) + +}) diff --git a/api/v1alpha1/groupversion_info.go b/api/v1alpha1/groupversion_info.go new file mode 100644 index 0000000..c6c85e1 --- /dev/null +++ b/api/v1alpha1/groupversion_info.go @@ -0,0 +1,36 @@ +/* +Copyright 2025. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package v1alpha1 contains API Schema definitions for the jobs v1alpha1 API group +// +kubebuilder:object:generate=true +// +groupName=jobs.converged-computing.org +package v1alpha1 + +import ( + "k8s.io/apimachinery/pkg/runtime/schema" + "sigs.k8s.io/controller-runtime/pkg/scheme" +) + +var ( + // GroupVersion is group version used to register these objects + GroupVersion = schema.GroupVersion{Group: "jobs.converged-computing.org", Version: "v1alpha1"} + + // SchemeBuilder is used to add go types to the GroupVersionKind scheme + SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} + + // AddToScheme adds the types in this group-version to the given scheme. + AddToScheme = SchemeBuilder.AddToScheme +) diff --git a/api/v1alpha1/submit.go b/api/v1alpha1/submit.go new file mode 100644 index 0000000..17973e8 --- /dev/null +++ b/api/v1alpha1/submit.go @@ -0,0 +1,72 @@ +package v1alpha1 + +import ( + "context" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" + + "sigs.k8s.io/controller-runtime/pkg/client" +) + +var ( + cli client.Client + slog = ctrl.Log.WithName("submit") +) + +// SubmitFluxJob wraps a pod or job spec into a FluxJob +// We essentially create a CRD for a a FluxJob +func SubmitFluxJob( + ctx context.Context, + jobType JobWrapped, + spec []byte, + name string, + namespace string, + nodes int32, +) error { + + // Ensure we have a client that knows how to create FluxJob + if cli == nil { + cli = mgr.GetClient() + } + + // Check for existing Flux Job + jobName := GetJobName(jobType, name) + existing := &FluxJob{} + + // Don't allow a job (of same name, type, namespace) to be submit twice + err := cli.Get(ctx, types.NamespacedName{Name: jobName, Namespace: namespace}, existing) + if err == nil { + slog.Info("Job already exists, will not submit again", "Namespace", namespace, "Name", jobName) + return err + } + // If it's not an issue of not being found, this should not happen + if !errors.IsNotFound(err) { + slog.Error(err, "Issue with getting job", "Namespace", namespace, "Name", jobName) + return err + } + + // If we get here, create! + slog.Info("Creating flux job ", "Namespace", namespace, "Name", jobName) + + // Define the Flux Job + fluxjob := &FluxJob{ + ObjectMeta: metav1.ObjectMeta{Name: jobName, Namespace: namespace}, + Spec: FluxJobSpec{ + JobSpec: "", + Object: spec, + Nodes: nodes, + Type: jobType, + }, + } + err = cli.Create(ctx, fluxjob) + if err != nil { + slog.Error(err, "Issue with creating job", "Namespace", namespace, "Name", jobName) + return err + } + slog.Info("Created flux job", "Namespace", namespace, "Name", jobName) + return nil +} diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go new file mode 100644 index 0000000..a95db3b --- /dev/null +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -0,0 +1,140 @@ +//go:build !ignore_autogenerated + +/* +Copyright 2025. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by controller-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + runtime "k8s.io/apimachinery/pkg/runtime" +) + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *FluxJob) DeepCopyInto(out *FluxJob) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + out.Status = in.Status +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FluxJob. +func (in *FluxJob) DeepCopy() *FluxJob { + if in == nil { + return nil + } + out := new(FluxJob) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *FluxJob) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *FluxJobList) DeepCopyInto(out *FluxJobList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]FluxJob, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FluxJobList. +func (in *FluxJobList) DeepCopy() *FluxJobList { + if in == nil { + return nil + } + out := new(FluxJobList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *FluxJobList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *FluxJobSpec) DeepCopyInto(out *FluxJobSpec) { + *out = *in + if in.Object != nil { + in, out := &in.Object, &out.Object + *out = make([]byte, len(*in)) + copy(*out, *in) + } + in.Resources.DeepCopyInto(&out.Resources) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FluxJobSpec. +func (in *FluxJobSpec) DeepCopy() *FluxJobSpec { + if in == nil { + return nil + } + out := new(FluxJobSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *FluxJobStatus) DeepCopyInto(out *FluxJobStatus) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FluxJobStatus. +func (in *FluxJobStatus) DeepCopy() *FluxJobStatus { + if in == nil { + return nil + } + out := new(FluxJobStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Resources) DeepCopyInto(out *Resources) { + *out = *in + if in.Nodes != nil { + in, out := &in.Nodes, &out.Nodes + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Resources. +func (in *Resources) DeepCopy() *Resources { + if in == nil { + return nil + } + out := new(Resources) + in.DeepCopyInto(out) + return out +} diff --git a/build/postgres/Dockerfile b/build/postgres/Dockerfile new file mode 100644 index 0000000..176e4eb --- /dev/null +++ b/build/postgres/Dockerfile @@ -0,0 +1,16 @@ +FROM postgres:15.5-bookworm + +ENV DEBIAN_FRONTEND=noninteractive +ENV GO_VERSION=1.22.5 + +RUN apt-get update && apt-get install -y wget && apt clean -y && apt -y autoremove + +# Install go +RUN wget https://go.dev/dl/go${GO_VERSION}.linux-amd64.tar.gz && tar -xvf go${GO_VERSION}.linux-amd64.tar.gz && \ + mv go /usr/local && rm go${GO_VERSION}.linux-amd64.tar.gz +ENV PATH=/usr/local/go/bin:/root/go/bin:$PATH +RUN go install github.com/riverqueue/river/cmd/river@latest && mv /root/go/bin/river /usr/local/bin +# On start need to run: +# runuser -l postgres -c '/usr/local/bin/river migrate-up --database-url postgres://localhost:5432/postgres' +# this is relative to the Makefile that calls build +COPY ./build/postgres/create-tables.sql /docker-entrypoint-initdb.d/ diff --git a/build/postgres/create-tables.sql b/build/postgres/create-tables.sql new file mode 100644 index 0000000..2a4f42c --- /dev/null +++ b/build/postgres/create-tables.sql @@ -0,0 +1,37 @@ +CREATE TABLE pods_provisional ( + podspec TEXT NOT NULL, + namespace TEXT NOT NULL, + name TEXT NOT NULL, + duration INTEGER NOT NULL, + created_at timestamptz NOT NULL default NOW(), + group_name TEXT NOT NULL +); +CREATE UNIQUE INDEX group_name_index ON pods_provisional (group_name, namespace, name); + +-- A single row for each group +CREATE TABLE groups_provisional ( + podspec TEXT NOT NULL, + namespace TEXT NOT NULL, + duration INTEGER NOT NULL, + created_at timestamptz NOT NULL default NOW(), + group_name TEXT NOT NULL, + group_size INTEGER NOT NULL, + current_size INTEGER NOT NULL +); +CREATE UNIQUE INDEX groups_provisional_index ON groups_provisional (group_name, namespace); + +-- We only need the fluxid for a reservation +CREATE TABLE reservations ( + group_name TEXT NOT NULL, + flux_id INTEGER NOT NULL +); +-- Pods get moved from provisional to pending as group objects +-- The pending queue includes states pending (still waiting to run), +CREATE TABLE pending_queue ( + group_name TEXT NOT NULL, + namespace TEXT NOT NULL, + group_size INTEGER NOT NULL, + flux_id INTEGER +); + -- Don't allow inserting the same group name / namespace stwice +CREATE UNIQUE INDEX pending_key ON pending_queue(group_name, namespace); \ No newline at end of file diff --git a/chart/.helmignore b/chart/.helmignore new file mode 100644 index 0000000..0e8a0eb --- /dev/null +++ b/chart/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/chart/Chart.yaml b/chart/Chart.yaml new file mode 100644 index 0000000..200d973 --- /dev/null +++ b/chart/Chart.yaml @@ -0,0 +1,21 @@ +apiVersion: v2 +name: chart +description: A Helm chart for Kubernetes +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "0.1.0" diff --git a/chart/templates/_helpers.tpl b/chart/templates/_helpers.tpl new file mode 100644 index 0000000..7ba5edc --- /dev/null +++ b/chart/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "chart.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "chart.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "chart.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "chart.labels" -}} +helm.sh/chart: {{ include "chart.chart" . }} +{{ include "chart.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "chart.selectorLabels" -}} +app.kubernetes.io/name: {{ include "chart.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "chart.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "chart.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/chart/templates/deployment.yaml b/chart/templates/deployment.yaml new file mode 100644 index 0000000..60e04f1 --- /dev/null +++ b/chart/templates/deployment.yaml @@ -0,0 +1,65 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "chart.fullname" . }}-controller-manager + labels: + control-plane: controller-manager + {{- include "chart.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.controllerManager.replicas }} + selector: + matchLabels: + control-plane: controller-manager + {{- include "chart.selectorLabels" . | nindent 6 }} + template: + metadata: + labels: + control-plane: controller-manager + {{- include "chart.selectorLabels" . | nindent 8 }} + annotations: + kubectl.kubernetes.io/default-container: manager + spec: + containers: + - args: {{- toYaml .Values.controllerManager.manager.args | nindent 8 }} + command: + - /manager + env: + - name: KUBERNETES_CLUSTER_DOMAIN + value: {{ quote .Values.kubernetesClusterDomain }} + image: {{ .Values.controllerManager.manager.image.repository }}:{{ .Values.controllerManager.manager.image.tag + | default .Chart.AppVersion }} + imagePullPolicy: {{ .Values.controllerManager.manager.imagePullPolicy }} + livenessProbe: + httpGet: + path: /healthz + port: 8081 + initialDelaySeconds: 15 + periodSeconds: 20 + name: manager + ports: + - containerPort: 9443 + name: webhook-server + protocol: TCP + readinessProbe: + httpGet: + path: /readyz + port: 8081 + initialDelaySeconds: 5 + periodSeconds: 10 + resources: {{- toYaml .Values.controllerManager.manager.resources | nindent 10 + }} + securityContext: {{- toYaml .Values.controllerManager.manager.containerSecurityContext + | nindent 10 }} + volumeMounts: + - mountPath: /tmp/k8s-webhook-server/serving-certs + name: cert + readOnly: true + securityContext: {{- toYaml .Values.controllerManager.podSecurityContext | nindent + 8 }} + serviceAccountName: {{ include "chart.fullname" . }}-controller-manager + terminationGracePeriodSeconds: 10 + volumes: + - name: cert + secret: + defaultMode: 420 + secretName: webhook-server-cert \ No newline at end of file diff --git a/chart/templates/fluxjob-crd.yaml b/chart/templates/fluxjob-crd.yaml new file mode 100644 index 0000000..2e774b2 --- /dev/null +++ b/chart/templates/fluxjob-crd.yaml @@ -0,0 +1,116 @@ +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: fluxjobs.jobs.converged-computing.org + annotations: + cert-manager.io/inject-ca-from: '{{ .Release.Namespace }}/{{ include "chart.fullname" + . }}-serving-cert' + controller-gen.kubebuilder.io/version: v0.15.0 + labels: + {{- include "chart.labels" . | nindent 4 }} +spec: + conversion: + strategy: Webhook + webhook: + clientConfig: + service: + name: '{{ include "chart.fullname" . }}-webhook-service' + namespace: '{{ .Release.Namespace }}' + path: /convert + conversionReviewVersions: + - v1 + group: jobs.converged-computing.org + names: + kind: FluxJob + listKind: FluxJobList + plural: fluxjobs + singular: fluxjob + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: FluxJob is the Schema for the fluxjobs API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: |- + FluxJobSpec defines the desired state of FluxJob + A user is not expected to create these - + 1. A job or similar will be submit + 2. It will be intercepted by a webhook here + 3. The webhook will generate the object + 4. The job will go into the fluxqueue + 5. When scheduled, it gets sent with an exact node assignment + to the custom scheduler plugin. + 6. Cleanup will need to be handled + + + A FluxJob is a mapping of a Kubernetes abstraction (e.g., job) + into a Flux JobSpec, one that Fluxion can digest. + properties: + duration: + description: Duration is the maximum runtime of the job + format: int32 + type: integer + jobspec: + description: JobSpec is the Flux jobspec + type: string + nodes: + description: Nodes needed for the job + format: int32 + type: integer + object: + description: |- + Object is the underlying pod/job/object specification + This currently is assumed that one job has equivalent pods under it + format: byte + type: string + reservation: + description: |- + If true, we are allowed to ask fluxion for + a reservation + type: boolean + resources: + description: Resources assigned + properties: + nodes: + description: Nodes assigned to the job + items: + type: string + type: array + type: object + type: + description: Type of object that is wrapped + type: integer + type: object + status: + description: FluxJobStatus defines the observed state of FluxJob + type: object + type: object + served: true + storage: true + subresources: + status: {} +status: + acceptedNames: + kind: "" + plural: "" + conditions: [] + storedVersions: [] \ No newline at end of file diff --git a/chart/templates/fluxjob-editor-rbac.yaml b/chart/templates/fluxjob-editor-rbac.yaml new file mode 100644 index 0000000..e248439 --- /dev/null +++ b/chart/templates/fluxjob-editor-rbac.yaml @@ -0,0 +1,25 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "chart.fullname" . }}-fluxjob-editor-role + labels: + {{- include "chart.labels" . | nindent 4 }} +rules: +- apiGroups: + - jobs.converged-computing.org + resources: + - fluxjobs + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - jobs.converged-computing.org + resources: + - fluxjobs/status + verbs: + - get \ No newline at end of file diff --git a/chart/templates/fluxjob-viewer-rbac.yaml b/chart/templates/fluxjob-viewer-rbac.yaml new file mode 100644 index 0000000..3c74713 --- /dev/null +++ b/chart/templates/fluxjob-viewer-rbac.yaml @@ -0,0 +1,21 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "chart.fullname" . }}-fluxjob-viewer-role + labels: + {{- include "chart.labels" . | nindent 4 }} +rules: +- apiGroups: + - jobs.converged-computing.org + resources: + - fluxjobs + verbs: + - get + - list + - watch +- apiGroups: + - jobs.converged-computing.org + resources: + - fluxjobs/status + verbs: + - get \ No newline at end of file diff --git a/chart/templates/leader-election-rbac.yaml b/chart/templates/leader-election-rbac.yaml new file mode 100644 index 0000000..822b5fe --- /dev/null +++ b/chart/templates/leader-election-rbac.yaml @@ -0,0 +1,53 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ include "chart.fullname" . }}-leader-election-role + labels: + {{- include "chart.labels" . | nindent 4 }} +rules: +- apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ include "chart.fullname" . }}-leader-election-rolebinding + labels: + {{- include "chart.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: '{{ include "chart.fullname" . }}-leader-election-role' +subjects: +- kind: ServiceAccount + name: '{{ include "chart.fullname" . }}-controller-manager' + namespace: '{{ .Release.Namespace }}' \ No newline at end of file diff --git a/chart/templates/manager-rbac.yaml b/chart/templates/manager-rbac.yaml new file mode 100644 index 0000000..5cfb233 --- /dev/null +++ b/chart/templates/manager-rbac.yaml @@ -0,0 +1,48 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "chart.fullname" . }}-manager-role + labels: + {{- include "chart.labels" . | nindent 4 }} +rules: +- apiGroups: + - jobs.converged-computing.org + resources: + - fluxjobs + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - jobs.converged-computing.org + resources: + - fluxjobs/finalizers + verbs: + - update +- apiGroups: + - jobs.converged-computing.org + resources: + - fluxjobs/status + verbs: + - get + - patch + - update +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "chart.fullname" . }}-manager-rolebinding + labels: + {{- include "chart.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: '{{ include "chart.fullname" . }}-manager-role' +subjects: +- kind: ServiceAccount + name: '{{ include "chart.fullname" . }}-controller-manager' + namespace: '{{ .Release.Namespace }}' \ No newline at end of file diff --git a/chart/templates/metrics-auth-rbac.yaml b/chart/templates/metrics-auth-rbac.yaml new file mode 100644 index 0000000..ad7035b --- /dev/null +++ b/chart/templates/metrics-auth-rbac.yaml @@ -0,0 +1,34 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "chart.fullname" . }}-metrics-auth-role + labels: + {{- include "chart.labels" . | nindent 4 }} +rules: +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "chart.fullname" . }}-metrics-auth-rolebinding + labels: + {{- include "chart.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: '{{ include "chart.fullname" . }}-metrics-auth-role' +subjects: +- kind: ServiceAccount + name: '{{ include "chart.fullname" . }}-controller-manager' + namespace: '{{ .Release.Namespace }}' \ No newline at end of file diff --git a/chart/templates/metrics-reader-rbac.yaml b/chart/templates/metrics-reader-rbac.yaml new file mode 100644 index 0000000..8ebd620 --- /dev/null +++ b/chart/templates/metrics-reader-rbac.yaml @@ -0,0 +1,11 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "chart.fullname" . }}-metrics-reader + labels: + {{- include "chart.labels" . | nindent 4 }} +rules: +- nonResourceURLs: + - /metrics + verbs: + - get \ No newline at end of file diff --git a/chart/templates/metrics-service.yaml b/chart/templates/metrics-service.yaml new file mode 100644 index 0000000..85ec688 --- /dev/null +++ b/chart/templates/metrics-service.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "chart.fullname" . }}-controller-manager-metrics-service + labels: + control-plane: controller-manager + {{- include "chart.labels" . | nindent 4 }} +spec: + type: {{ .Values.metricsService.type }} + selector: + control-plane: controller-manager + {{- include "chart.selectorLabels" . | nindent 4 }} + ports: + {{- .Values.metricsService.ports | toYaml | nindent 2 }} \ No newline at end of file diff --git a/chart/templates/mutating-webhook-configuration.yaml b/chart/templates/mutating-webhook-configuration.yaml new file mode 100644 index 0000000..acd1b7e --- /dev/null +++ b/chart/templates/mutating-webhook-configuration.yaml @@ -0,0 +1,32 @@ +apiVersion: admissionregistration.k8s.io/v1 +kind: MutatingWebhookConfiguration +metadata: + name: {{ include "chart.fullname" . }}-mutating-webhook-configuration + annotations: + cert-manager.io/inject-ca-from: {{ .Release.Namespace }}/{{ include "chart.fullname" . }}-serving-cert + labels: + {{- include "chart.labels" . | nindent 4 }} +webhooks: +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: '{{ include "chart.fullname" . }}-webhook-service' + namespace: '{{ .Release.Namespace }}' + path: /mutate-v1-sidecar + failurePolicy: Fail + name: mfluxjob.kb.io + rules: + - apiGroups: + - "" + - core + - batch + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - pods + - jobs + sideEffects: None \ No newline at end of file diff --git a/chart/templates/postgres.yaml b/chart/templates/postgres.yaml new file mode 100644 index 0000000..5ef6f11 --- /dev/null +++ b/chart/templates/postgres.yaml @@ -0,0 +1,63 @@ +# Note: This is intended for development/test deployments +apiVersion: apps/v1 +kind: Deployment +metadata: + name: postgres + namespace: {{ .Release.Namespace }} +spec: + replicas: 1 + selector: + matchLabels: + app: postgres + template: + metadata: + labels: + app: postgres + spec: + containers: + - name: postgres + image: {{ .Values.postgres.image }} + imagePullPolicy: {{ .Values.postgres.pullPolicy }} + ports: + - name: postgres-port + containerPort: 5432 + env: + - name: POSTGRES_USER + value: postgres + - name: POSTGRES_PASSWORD + value: postgres + - name: POSTGRES_DB + value: postgres + resources: + requests: + memory: "1Gi" + cpu: "500m" + limits: + memory: "1Gi" + cpu: "2" + readinessProbe: + exec: + command: + - "sh" + - "-c" + - > + pg_isready -q -d postgres -U postgres; + runuser -l postgres -c '/usr/local/bin/river migrate-up --database-url postgres://localhost:5432/postgres > /tmp/post-start.log' + initialDelaySeconds: 5 + periodSeconds: 10 + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 3 +--- +apiVersion: v1 +kind: Service +metadata: + name: postgres + namespace: {{ .Release.Namespace }} +spec: + selector: + app: postgres + ports: + - protocol: TCP + port: 5432 + targetPort: 5432 \ No newline at end of file diff --git a/chart/templates/selfsigned-issuer.yaml b/chart/templates/selfsigned-issuer.yaml new file mode 100644 index 0000000..e2a3fea --- /dev/null +++ b/chart/templates/selfsigned-issuer.yaml @@ -0,0 +1,8 @@ +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + name: {{ include "chart.fullname" . }}-selfsigned-issuer + labels: + {{- include "chart.labels" . | nindent 4 }} +spec: + selfSigned: {} \ No newline at end of file diff --git a/chart/templates/serviceaccount.yaml b/chart/templates/serviceaccount.yaml new file mode 100644 index 0000000..d94867d --- /dev/null +++ b/chart/templates/serviceaccount.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "chart.fullname" . }}-controller-manager + labels: + {{- include "chart.labels" . | nindent 4 }} + annotations: + {{- toYaml .Values.controllerManager.serviceAccount.annotations | nindent 4 }} \ No newline at end of file diff --git a/chart/templates/serving-cert.yaml b/chart/templates/serving-cert.yaml new file mode 100644 index 0000000..5e87f6d --- /dev/null +++ b/chart/templates/serving-cert.yaml @@ -0,0 +1,15 @@ +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: {{ include "chart.fullname" . }}-serving-cert + labels: + {{- include "chart.labels" . | nindent 4 }} +spec: + dnsNames: + - '{{ include "chart.fullname" . }}-webhook-service.{{ .Release.Namespace }}.svc' + - '{{ include "chart.fullname" . }}-webhook-service.{{ .Release.Namespace }}.svc.{{ + .Values.kubernetesClusterDomain }}' + issuerRef: + kind: Issuer + name: '{{ include "chart.fullname" . }}-selfsigned-issuer' + secretName: webhook-server-cert \ No newline at end of file diff --git a/chart/templates/webhook-service.yaml b/chart/templates/webhook-service.yaml new file mode 100644 index 0000000..3118ea2 --- /dev/null +++ b/chart/templates/webhook-service.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "chart.fullname" . }}-webhook-service + labels: + {{- include "chart.labels" . | nindent 4 }} +spec: + type: {{ .Values.webhookService.type }} + selector: + control-plane: controller-manager + {{- include "chart.selectorLabels" . | nindent 4 }} + ports: + {{- .Values.webhookService.ports | toYaml | nindent 2 }} \ No newline at end of file diff --git a/chart/values.yaml b/chart/values.yaml new file mode 100644 index 0000000..d6a59b7 --- /dev/null +++ b/chart/values.yaml @@ -0,0 +1,41 @@ +controllerManager: + manager: + args: + - --metrics-bind-address=:8443 + - --leader-elect + - --health-probe-bind-address=:8081 + containerSecurityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + image: + repository: ghcr.io/converged-computing/fluxqueue + tag: test + imagePullPolicy: Always + resources: + limits: + cpu: 500m + memory: 128Mi + requests: + cpu: 10m + memory: 64Mi + podSecurityContext: + runAsNonRoot: true + replicas: 1 + serviceAccount: + annotations: {} +kubernetesClusterDomain: cluster.local +metricsService: + ports: + - name: https + port: 8443 + protocol: TCP + targetPort: 8443 + type: ClusterIP +webhookService: + ports: + - port: 443 + protocol: TCP + targetPort: 9443 + type: ClusterIP diff --git a/cmd/main.go b/cmd/main.go new file mode 100644 index 0000000..3e769df --- /dev/null +++ b/cmd/main.go @@ -0,0 +1,198 @@ +/* +Copyright 2025. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "crypto/tls" + "flag" + "os" + + // Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.) + // to ensure that exec-entrypoint and run can make use of them. + _ "k8s.io/client-go/plugin/pkg/client/auth" + "k8s.io/client-go/rest" + + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/runtime/serializer" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client/apiutil" + "sigs.k8s.io/controller-runtime/pkg/healthz" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + "sigs.k8s.io/controller-runtime/pkg/metrics/filters" + metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" + "sigs.k8s.io/controller-runtime/pkg/webhook" + + api "github.com/converged-computing/fluxqueue/api/v1alpha1" + "github.com/converged-computing/fluxqueue/internal/controller" + // +kubebuilder:scaffold:imports +) + +var ( + scheme = runtime.NewScheme() + setupLog = ctrl.Log.WithName("setup") +) + +func init() { + utilruntime.Must(clientgoscheme.AddToScheme(scheme)) + + utilruntime.Must(api.AddToScheme(scheme)) + // +kubebuilder:scaffold:scheme +} + +func main() { + var metricsAddr string + var enableLeaderElection bool + var probeAddr string + var secureMetrics bool + var enableHTTP2 bool + var tlsOpts []func(*tls.Config) + flag.StringVar(&metricsAddr, "metrics-bind-address", "0", "The address the metrics endpoint binds to. "+ + "Use :8443 for HTTPS or :8080 for HTTP, or leave as 0 to disable the metrics service.") + flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.") + flag.BoolVar(&enableLeaderElection, "leader-elect", false, + "Enable leader election for controller manager. "+ + "Enabling this will ensure there is only one active controller manager.") + flag.BoolVar(&secureMetrics, "metrics-secure", true, + "If set, the metrics endpoint is served securely via HTTPS. Use --metrics-secure=false to use HTTP instead.") + flag.BoolVar(&enableHTTP2, "enable-http2", false, + "If set, HTTP/2 will be enabled for the metrics and webhook servers") + opts := zap.Options{ + Development: true, + } + opts.BindFlags(flag.CommandLine) + flag.Parse() + + ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts))) + + // if the enable-http2 flag is false (the default), http/2 should be disabled + // due to its vulnerabilities. More specifically, disabling http/2 will + // prevent from being vulnerable to the HTTP/2 Stream Cancellation and + // Rapid Reset CVEs. For more information see: + // - https://github.com/advisories/GHSA-qppj-fm5r-hxr3 + // - https://github.com/advisories/GHSA-4374-p667-p6c8 + disableHTTP2 := func(c *tls.Config) { + setupLog.Info("disabling http/2") + c.NextProtos = []string{"http/1.1"} + } + + if !enableHTTP2 { + tlsOpts = append(tlsOpts, disableHTTP2) + } + + webhookServer := webhook.NewServer(webhook.Options{ + TLSOpts: tlsOpts, + }) + + // Metrics endpoint is enabled in 'config/default/kustomization.yaml'. The Metrics options configure the server. + // More info: + // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.18.4/pkg/metrics/server + // - https://book.kubebuilder.io/reference/metrics.html + metricsServerOptions := metricsserver.Options{ + BindAddress: metricsAddr, + SecureServing: secureMetrics, + // TODO(user): TLSOpts is used to allow configuring the TLS config used for the server. If certificates are + // not provided, self-signed certificates will be generated by default. This option is not recommended for + // production environments as self-signed certificates do not offer the same level of trust and security + // as certificates issued by a trusted Certificate Authority (CA). The primary risk is potentially allowing + // unauthorized access to sensitive metrics data. Consider replacing with CertDir, CertName, and KeyName + // to provide certificates, ensuring the server communicates using trusted and secure certificates. + TLSOpts: tlsOpts, + } + + if secureMetrics { + // FilterProvider is used to protect the metrics endpoint with authn/authz. + // These configurations ensure that only authorized users and service accounts + // can access the metrics endpoint. The RBAC are configured in 'config/rbac/kustomization.yaml'. More info: + // https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.18.4/pkg/metrics/filters#WithAuthenticationAndAuthorization + metricsServerOptions.FilterProvider = filters.WithAuthenticationAndAuthorization + } + + mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ + Scheme: scheme, + Metrics: metricsServerOptions, + WebhookServer: webhookServer, + HealthProbeBindAddress: probeAddr, + LeaderElection: enableLeaderElection, + LeaderElectionID: "b321c34b.converged-computing.org", + // LeaderElectionReleaseOnCancel defines if the leader should step down voluntarily + // when the Manager ends. This requires the binary to immediately end when the + // Manager is stopped, otherwise, this setting is unsafe. Setting this significantly + // speeds up voluntary leader transitions as the new leader don't have to wait + // LeaseDuration time first. + // + // In the default scaffold provided, the program ends immediately after + // the manager stops, so would be fine to enable this option. However, + // if you are doing or is intended to do any operation such as perform cleanups + // after the manager stops then its usage might be unsafe. + // LeaderElectionReleaseOnCancel: true, + }) + if err != nil { + setupLog.Error(err, "unable to start manager") + os.Exit(1) + } + + // Create a RESTful client to interact with the cluster + gvk := schema.GroupVersionKind{ + Group: "", + Version: "v1", + Kind: "Pod", + } + + c, err := rest.HTTPClientFor(mgr.GetConfig()) + if err != nil { + setupLog.Error(err, "unable to create REST HTTP client", "controller", c) + } + restClient, err := apiutil.RESTClientForGVK(gvk, false, mgr.GetConfig(), serializer.NewCodecFactory(mgr.GetScheme()), c) + if err != nil { + setupLog.Error(err, "unable to create REST client", "controller", restClient) + } + reconciler := controller.NewFluxJobReconciler( + mgr.GetClient(), + mgr.GetScheme(), + mgr.GetConfig(), + restClient, + ) + if err = reconciler.SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "MiniMummi") + os.Exit(1) + } + if os.Getenv("ENABLE_WEBHOOKS") != "false" { + mgr.GetWebhookServer().Register("/mutate-v1-sidecar", &webhook.Admission{ + Handler: api.NewMutatingWebhook(mgr), + }) + } + + // +kubebuilder:scaffold:builder + + if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { + setupLog.Error(err, "unable to set up health check") + os.Exit(1) + } + if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil { + setupLog.Error(err, "unable to set up ready check") + os.Exit(1) + } + + setupLog.Info("starting manager") + if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { + setupLog.Error(err, "problem running manager") + os.Exit(1) + } +} diff --git a/config/certmanager/certificate.yaml b/config/certmanager/certificate.yaml new file mode 100644 index 0000000..2340bd0 --- /dev/null +++ b/config/certmanager/certificate.yaml @@ -0,0 +1,35 @@ +# The following manifests contain a self-signed issuer CR and a certificate CR. +# More document can be found at https://docs.cert-manager.io +# WARNING: Targets CertManager v1.0. Check https://cert-manager.io/docs/installation/upgrading/ for breaking changes. +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + labels: + app.kubernetes.io/name: fluxqueue + app.kubernetes.io/managed-by: kustomize + name: selfsigned-issuer + namespace: system +spec: + selfSigned: {} +--- +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + labels: + app.kubernetes.io/name: certificate + app.kubernetes.io/instance: serving-cert + app.kubernetes.io/component: certificate + app.kubernetes.io/created-by: fluxqueue + app.kubernetes.io/part-of: fluxqueue + app.kubernetes.io/managed-by: kustomize + name: serving-cert # this name should match the one appeared in kustomizeconfig.yaml + namespace: system +spec: + # SERVICE_NAME and SERVICE_NAMESPACE will be substituted by kustomize + dnsNames: + - SERVICE_NAME.SERVICE_NAMESPACE.svc + - SERVICE_NAME.SERVICE_NAMESPACE.svc.cluster.local + issuerRef: + kind: Issuer + name: selfsigned-issuer + secretName: webhook-server-cert # this secret will not be prefixed, since it's not managed by kustomize diff --git a/config/certmanager/kustomization.yaml b/config/certmanager/kustomization.yaml new file mode 100644 index 0000000..bebea5a --- /dev/null +++ b/config/certmanager/kustomization.yaml @@ -0,0 +1,5 @@ +resources: +- certificate.yaml + +configurations: +- kustomizeconfig.yaml diff --git a/config/certmanager/kustomizeconfig.yaml b/config/certmanager/kustomizeconfig.yaml new file mode 100644 index 0000000..cf6f89e --- /dev/null +++ b/config/certmanager/kustomizeconfig.yaml @@ -0,0 +1,8 @@ +# This configuration is for teaching kustomize how to update name ref substitution +nameReference: +- kind: Issuer + group: cert-manager.io + fieldSpecs: + - kind: Certificate + group: cert-manager.io + path: spec/issuerRef/name diff --git a/config/crd/bases/jobs.converged-computing.org_fluxjobs.yaml b/config/crd/bases/jobs.converged-computing.org_fluxjobs.yaml new file mode 100644 index 0000000..855ce78 --- /dev/null +++ b/config/crd/bases/jobs.converged-computing.org_fluxjobs.yaml @@ -0,0 +1,97 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.15.0 + name: fluxjobs.jobs.converged-computing.org +spec: + group: jobs.converged-computing.org + names: + kind: FluxJob + listKind: FluxJobList + plural: fluxjobs + singular: fluxjob + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: FluxJob is the Schema for the fluxjobs API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: |- + FluxJobSpec defines the desired state of FluxJob + A user is not expected to create these - + 1. A job or similar will be submit + 2. It will be intercepted by a webhook here + 3. The webhook will generate the object + 4. The job will go into the fluxqueue + 5. When scheduled, it gets sent with an exact node assignment + to the custom scheduler plugin. + 6. Cleanup will need to be handled + + + A FluxJob is a mapping of a Kubernetes abstraction (e.g., job) + into a Flux JobSpec, one that Fluxion can digest. + properties: + duration: + description: Duration is the maximum runtime of the job + format: int32 + type: integer + jobspec: + description: JobSpec is the Flux jobspec + type: string + nodes: + description: Nodes needed for the job + format: int32 + type: integer + object: + description: |- + Object is the underlying pod/job/object specification + This currently is assumed that one job has equivalent pods under it + format: byte + type: string + reservation: + description: |- + If true, we are allowed to ask fluxion for + a reservation + type: boolean + resources: + description: Resources assigned + properties: + nodes: + description: Nodes assigned to the job + items: + type: string + type: array + type: object + type: + description: Type of object that is wrapped + type: integer + type: object + status: + description: FluxJobStatus defines the observed state of FluxJob + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/config/crd/kustomization.yaml b/config/crd/kustomization.yaml new file mode 100644 index 0000000..d5d7a3a --- /dev/null +++ b/config/crd/kustomization.yaml @@ -0,0 +1,23 @@ +# This kustomization.yaml is not intended to be run by itself, +# since it depends on service name and namespace that are out of this kustomize package. +# It should be run by config/default +resources: +- bases/jobs.converged-computing.org_fluxjobs.yaml +# +kubebuilder:scaffold:crdkustomizeresource + +patches: +# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix. +# patches here are for enabling the conversion webhook for each CRD +- path: patches/webhook_in_fluxjobs.yaml +# +kubebuilder:scaffold:crdkustomizewebhookpatch + +# [CERTMANAGER] To enable cert-manager, uncomment all the sections with [CERTMANAGER] prefix. +# patches here are for enabling the CA injection for each CRD +#- path: patches/cainjection_in_fluxjobs.yaml +# +kubebuilder:scaffold:crdkustomizecainjectionpatch + +# [WEBHOOK] To enable webhook, uncomment the following section +# the following config is for teaching kustomize how to do kustomization for CRDs. + +configurations: +- kustomizeconfig.yaml diff --git a/config/crd/kustomizeconfig.yaml b/config/crd/kustomizeconfig.yaml new file mode 100644 index 0000000..ec5c150 --- /dev/null +++ b/config/crd/kustomizeconfig.yaml @@ -0,0 +1,19 @@ +# This file is for teaching kustomize how to substitute name and namespace reference in CRD +nameReference: +- kind: Service + version: v1 + fieldSpecs: + - kind: CustomResourceDefinition + version: v1 + group: apiextensions.k8s.io + path: spec/conversion/webhook/clientConfig/service/name + +namespace: +- kind: CustomResourceDefinition + version: v1 + group: apiextensions.k8s.io + path: spec/conversion/webhook/clientConfig/service/namespace + create: false + +varReference: +- path: metadata/annotations diff --git a/config/crd/patches/cainjection_in_fluxjobs.yaml b/config/crd/patches/cainjection_in_fluxjobs.yaml new file mode 100644 index 0000000..7ae56c1 --- /dev/null +++ b/config/crd/patches/cainjection_in_fluxjobs.yaml @@ -0,0 +1,7 @@ +# The following patch adds a directive for certmanager to inject CA into the CRD +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + cert-manager.io/inject-ca-from: CERTIFICATE_NAMESPACE/CERTIFICATE_NAME + name: fluxjobs.jobs.converged-computing.org diff --git a/config/crd/patches/webhook_in_fluxjobs.yaml b/config/crd/patches/webhook_in_fluxjobs.yaml new file mode 100644 index 0000000..bdc4997 --- /dev/null +++ b/config/crd/patches/webhook_in_fluxjobs.yaml @@ -0,0 +1,16 @@ +# The following patch enables a conversion webhook for the CRD +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: fluxjobs.jobs.converged-computing.org +spec: + conversion: + strategy: Webhook + webhook: + clientConfig: + service: + namespace: system + name: webhook-service + path: /convert + conversionReviewVersions: + - v1 diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml new file mode 100644 index 0000000..6f6f9db --- /dev/null +++ b/config/default/kustomization.yaml @@ -0,0 +1,146 @@ +# Adds namespace to all resources. +namespace: fluxqueue-system + +# Value of this field is prepended to the +# names of all resources, e.g. a deployment named +# "wordpress" becomes "alices-wordpress". +# Note that it should also match with the prefix (text before '-') of the namespace +# field above. +namePrefix: fluxqueue- + +# Labels to add to all resources and selectors. +#labels: +#- includeSelectors: true +# pairs: +# someName: someValue + +resources: +- ../crd +- ../rbac +- ../manager +# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in +# crd/kustomization.yaml +- ../webhook +# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required. +- ../certmanager +# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'. +#- ../prometheus +# [METRICS] Expose the controller manager metrics service. +- metrics_service.yaml + +# Uncomment the patches line if you enable Metrics, and/or are using webhooks and cert-manager +patches: +# [METRICS] The following patch will enable the metrics endpoint using HTTPS and the port :8443. +# More info: https://book.kubebuilder.io/reference/metrics +- path: manager_metrics_patch.yaml + target: + kind: Deployment + +# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in +# crd/kustomization.yaml +- path: manager_webhook_patch.yaml + +# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. +# Uncomment 'CERTMANAGER' sections in crd/kustomization.yaml to enable the CA injection in the admission webhooks. +# 'CERTMANAGER' needs to be enabled to use ca injection +- path: webhookcainjection_patch.yaml + +# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER' prefix. +# Uncomment the following replacements to add the cert-manager CA injection annotations +replacements: + - source: # Add cert-manager annotation to ValidatingWebhookConfiguration, MutatingWebhookConfiguration and CRDs + kind: Certificate + group: cert-manager.io + version: v1 + name: serving-cert # this name should match the one in certificate.yaml + fieldPath: .metadata.namespace # namespace of the certificate CR + targets: + - select: + kind: ValidatingWebhookConfiguration + fieldPaths: + - .metadata.annotations.[cert-manager.io/inject-ca-from] + options: + delimiter: '/' + index: 0 + create: true + - select: + kind: MutatingWebhookConfiguration + fieldPaths: + - .metadata.annotations.[cert-manager.io/inject-ca-from] + options: + delimiter: '/' + index: 0 + create: true + - select: + kind: CustomResourceDefinition + fieldPaths: + - .metadata.annotations.[cert-manager.io/inject-ca-from] + options: + delimiter: '/' + index: 0 + create: true + - source: + kind: Certificate + group: cert-manager.io + version: v1 + name: serving-cert # this name should match the one in certificate.yaml + fieldPath: .metadata.name + targets: + - select: + kind: ValidatingWebhookConfiguration + fieldPaths: + - .metadata.annotations.[cert-manager.io/inject-ca-from] + options: + delimiter: '/' + index: 1 + create: true + - select: + kind: MutatingWebhookConfiguration + fieldPaths: + - .metadata.annotations.[cert-manager.io/inject-ca-from] + options: + delimiter: '/' + index: 1 + create: true + - select: + kind: CustomResourceDefinition + fieldPaths: + - .metadata.annotations.[cert-manager.io/inject-ca-from] + options: + delimiter: '/' + index: 1 + create: true + - source: # Add cert-manager annotation to the webhook Service + kind: Service + version: v1 + name: webhook-service + fieldPath: .metadata.name # namespace of the service + targets: + - select: + kind: Certificate + group: cert-manager.io + version: v1 + fieldPaths: + - .spec.dnsNames.0 + - .spec.dnsNames.1 + options: + delimiter: '.' + index: 0 + create: true + - source: + kind: Service + version: v1 + name: webhook-service + fieldPath: .metadata.namespace # namespace of the service + targets: + - select: + kind: Certificate + group: cert-manager.io + version: v1 + fieldPaths: + - .spec.dnsNames.0 + - .spec.dnsNames.1 + options: + delimiter: '.' + index: 1 + create: true diff --git a/config/default/manager_metrics_patch.yaml b/config/default/manager_metrics_patch.yaml new file mode 100644 index 0000000..2aaef65 --- /dev/null +++ b/config/default/manager_metrics_patch.yaml @@ -0,0 +1,4 @@ +# This patch adds the args to allow exposing the metrics endpoint using HTTPS +- op: add + path: /spec/template/spec/containers/0/args/0 + value: --metrics-bind-address=:8443 diff --git a/config/default/manager_webhook_patch.yaml b/config/default/manager_webhook_patch.yaml new file mode 100644 index 0000000..e34c4c5 --- /dev/null +++ b/config/default/manager_webhook_patch.yaml @@ -0,0 +1,26 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: controller-manager + namespace: system + labels: + app.kubernetes.io/name: fluxqueue + app.kubernetes.io/managed-by: kustomize +spec: + template: + spec: + containers: + - name: manager + ports: + - containerPort: 9443 + name: webhook-server + protocol: TCP + volumeMounts: + - mountPath: /tmp/k8s-webhook-server/serving-certs + name: cert + readOnly: true + volumes: + - name: cert + secret: + defaultMode: 420 + secretName: webhook-server-cert diff --git a/config/default/metrics_service.yaml b/config/default/metrics_service.yaml new file mode 100644 index 0000000..7b6191e --- /dev/null +++ b/config/default/metrics_service.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + control-plane: controller-manager + app.kubernetes.io/name: fluxqueue + app.kubernetes.io/managed-by: kustomize + name: controller-manager-metrics-service + namespace: system +spec: + ports: + - name: https + port: 8443 + protocol: TCP + targetPort: 8443 + selector: + control-plane: controller-manager diff --git a/config/default/webhookcainjection_patch.yaml b/config/default/webhookcainjection_patch.yaml new file mode 100644 index 0000000..89e10af --- /dev/null +++ b/config/default/webhookcainjection_patch.yaml @@ -0,0 +1,25 @@ +# This patch add annotation to admission webhook config and +# CERTIFICATE_NAMESPACE and CERTIFICATE_NAME will be substituted by kustomize +apiVersion: admissionregistration.k8s.io/v1 +kind: MutatingWebhookConfiguration +metadata: + labels: + app.kubernetes.io/name: fluxqueue + app.kubernetes.io/managed-by: kustomize + name: mutating-webhook-configuration + annotations: + cert-manager.io/inject-ca-from: CERTIFICATE_NAMESPACE/CERTIFICATE_NAME +#--- +#apiVersion: admissionregistration.k8s.io/v1 +#kind: ValidatingWebhookConfiguration +#metadata: +# labels: +# app.kubernetes.io/name: validatingwebhookconfiguration +# app.kubernetes.io/instance: validating-webhook-configuration +# app.kubernetes.io/component: webhook +# app.kubernetes.io/created-by: fluxqueue +# app.kubernetes.io/part-of: fluxqueue +# app.kubernetes.io/managed-by: kustomize +# name: validating-webhook-configuration +# annotations: +# cert-manager.io/inject-ca-from: CERTIFICATE_NAMESPACE/CERTIFICATE_NAME diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml new file mode 100644 index 0000000..4f34c95 --- /dev/null +++ b/config/manager/kustomization.yaml @@ -0,0 +1,8 @@ +resources: +- manager.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +images: +- name: controller + newName: ghcr.io/converged-computing/fluxqueue + newTag: latest diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml new file mode 100644 index 0000000..7605259 --- /dev/null +++ b/config/manager/manager.yaml @@ -0,0 +1,96 @@ +apiVersion: v1 +kind: Namespace +metadata: + labels: + control-plane: controller-manager + app.kubernetes.io/name: fluxqueue + app.kubernetes.io/managed-by: kustomize + name: system +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: controller-manager + namespace: system + labels: + control-plane: controller-manager + app.kubernetes.io/name: fluxqueue + app.kubernetes.io/managed-by: kustomize +spec: + selector: + matchLabels: + control-plane: controller-manager + replicas: 1 + template: + metadata: + annotations: + kubectl.kubernetes.io/default-container: manager + labels: + control-plane: controller-manager + spec: + # TODO(user): Uncomment the following code to configure the nodeAffinity expression + # according to the platforms which are supported by your solution. + # It is considered best practice to support multiple architectures. You can + # build your manager image using the makefile target docker-buildx. + # affinity: + # nodeAffinity: + # requiredDuringSchedulingIgnoredDuringExecution: + # nodeSelectorTerms: + # - matchExpressions: + # - key: kubernetes.io/arch + # operator: In + # values: + # - amd64 + # - arm64 + # - ppc64le + # - s390x + # - key: kubernetes.io/os + # operator: In + # values: + # - linux + securityContext: + runAsNonRoot: true + # TODO(user): For common cases that do not require escalating privileges + # it is recommended to ensure that all your Pods/Containers are restrictive. + # More info: https://kubernetes.io/docs/concepts/security/pod-security-standards/#restricted + # Please uncomment the following code if your project does NOT have to work on old Kubernetes + # versions < 1.19 or on vendors versions which do NOT support this field by default (i.e. Openshift < 4.11 ). + # seccompProfile: + # type: RuntimeDefault + containers: + - command: + - /manager + args: + - --leader-elect + - --health-probe-bind-address=:8081 + image: controller:latest + imagePullPolicy: Always + name: manager + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - "ALL" + livenessProbe: + httpGet: + path: /healthz + port: 8081 + initialDelaySeconds: 15 + periodSeconds: 20 + readinessProbe: + httpGet: + path: /readyz + port: 8081 + initialDelaySeconds: 5 + periodSeconds: 10 + # TODO(user): Configure the resources accordingly based on the project requirements. + # More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + resources: + limits: + cpu: 500m + memory: 128Mi + requests: + cpu: 10m + memory: 64Mi + serviceAccountName: controller-manager + terminationGracePeriodSeconds: 10 diff --git a/config/manifests/kustomization.yaml b/config/manifests/kustomization.yaml new file mode 100644 index 0000000..ccc9b69 --- /dev/null +++ b/config/manifests/kustomization.yaml @@ -0,0 +1,28 @@ +# These resources constitute the fully configured set of manifests +# used to generate the 'manifests/' directory in a bundle. +resources: +- bases/fluxqueue.clusterserviceversion.yaml +- ../default +- ../samples +- ../scorecard + +# [WEBHOOK] To enable webhooks, uncomment all the sections with [WEBHOOK] prefix. +# Do NOT uncomment sections with prefix [CERTMANAGER], as OLM does not support cert-manager. +# These patches remove the unnecessary "cert" volume and its manager container volumeMount. +#patches: +#- target: +# group: apps +# version: v1 +# kind: Deployment +# name: controller-manager +# namespace: system +# patch: |- +# # Remove the manager container's "cert" volumeMount, since OLM will create and mount a set of certs. +# # Update the indices in this path if adding or removing containers/volumeMounts in the manager's Deployment. +# - op: remove + +# path: /spec/template/spec/containers/0/volumeMounts/0 +# # Remove the "cert" volume, since OLM will create and mount a set of certs. +# # Update the indices in this path if adding or removing volumes in the manager's Deployment. +# - op: remove +# path: /spec/template/spec/volumes/0 diff --git a/config/prometheus/kustomization.yaml b/config/prometheus/kustomization.yaml new file mode 100644 index 0000000..ed13716 --- /dev/null +++ b/config/prometheus/kustomization.yaml @@ -0,0 +1,2 @@ +resources: +- monitor.yaml diff --git a/config/prometheus/monitor.yaml b/config/prometheus/monitor.yaml new file mode 100644 index 0000000..1656e15 --- /dev/null +++ b/config/prometheus/monitor.yaml @@ -0,0 +1,30 @@ +# Prometheus Monitor Service (Metrics) +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + control-plane: controller-manager + app.kubernetes.io/name: fluxqueue + app.kubernetes.io/managed-by: kustomize + name: controller-manager-metrics-monitor + namespace: system +spec: + endpoints: + - path: /metrics + port: https # Ensure this is the name of the port that exposes HTTPS metrics + scheme: https + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + tlsConfig: + # TODO(user): The option insecureSkipVerify: true is not recommended for production since it disables + # certificate verification. This poses a significant security risk by making the system vulnerable to + # man-in-the-middle attacks, where an attacker could intercept and manipulate the communication between + # Prometheus and the monitored services. This could lead to unauthorized access to sensitive metrics data, + # compromising the integrity and confidentiality of the information. + # Please use the following options for secure configurations: + # caFile: /etc/metrics-certs/ca.crt + # certFile: /etc/metrics-certs/tls.crt + # keyFile: /etc/metrics-certs/tls.key + insecureSkipVerify: true + selector: + matchLabels: + control-plane: controller-manager diff --git a/config/rbac/fluxjob_editor_role.yaml b/config/rbac/fluxjob_editor_role.yaml new file mode 100644 index 0000000..b8b4a42 --- /dev/null +++ b/config/rbac/fluxjob_editor_role.yaml @@ -0,0 +1,27 @@ +# permissions for end users to edit fluxjobs. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: fluxqueue + app.kubernetes.io/managed-by: kustomize + name: fluxjob-editor-role +rules: +- apiGroups: + - jobs.converged-computing.org + resources: + - fluxjobs + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - jobs.converged-computing.org + resources: + - fluxjobs/status + verbs: + - get diff --git a/config/rbac/fluxjob_viewer_role.yaml b/config/rbac/fluxjob_viewer_role.yaml new file mode 100644 index 0000000..f1f3171 --- /dev/null +++ b/config/rbac/fluxjob_viewer_role.yaml @@ -0,0 +1,23 @@ +# permissions for end users to view fluxjobs. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: fluxqueue + app.kubernetes.io/managed-by: kustomize + name: fluxjob-viewer-role +rules: +- apiGroups: + - jobs.converged-computing.org + resources: + - fluxjobs + verbs: + - get + - list + - watch +- apiGroups: + - jobs.converged-computing.org + resources: + - fluxjobs/status + verbs: + - get diff --git a/config/rbac/kustomization.yaml b/config/rbac/kustomization.yaml new file mode 100644 index 0000000..c87e237 --- /dev/null +++ b/config/rbac/kustomization.yaml @@ -0,0 +1,27 @@ +resources: +# All RBAC will be applied under this service account in +# the deployment namespace. You may comment out this resource +# if your manager will use a service account that exists at +# runtime. Be sure to update RoleBinding and ClusterRoleBinding +# subjects if changing service account names. +- service_account.yaml +- role.yaml +- role_binding.yaml +- leader_election_role.yaml +- leader_election_role_binding.yaml +# The following RBAC configurations are used to protect +# the metrics endpoint with authn/authz. These configurations +# ensure that only authorized users and service accounts +# can access the metrics endpoint. Comment the following +# permissions if you want to disable this protection. +# More info: https://book.kubebuilder.io/reference/metrics.html +- metrics_auth_role.yaml +- metrics_auth_role_binding.yaml +- metrics_reader_role.yaml +# For each CRD, "Editor" and "Viewer" roles are scaffolded by +# default, aiding admins in cluster management. Those roles are +# not used by the Project itself. You can comment the following lines +# if you do not want those helpers be installed with your Project. +- fluxjob_editor_role.yaml +- fluxjob_viewer_role.yaml + diff --git a/config/rbac/leader_election_role.yaml b/config/rbac/leader_election_role.yaml new file mode 100644 index 0000000..a111ee7 --- /dev/null +++ b/config/rbac/leader_election_role.yaml @@ -0,0 +1,40 @@ +# permissions to do leader election. +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + labels: + app.kubernetes.io/name: fluxqueue + app.kubernetes.io/managed-by: kustomize + name: leader-election-role +rules: +- apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch diff --git a/config/rbac/leader_election_role_binding.yaml b/config/rbac/leader_election_role_binding.yaml new file mode 100644 index 0000000..9b4e281 --- /dev/null +++ b/config/rbac/leader_election_role_binding.yaml @@ -0,0 +1,15 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + labels: + app.kubernetes.io/name: fluxqueue + app.kubernetes.io/managed-by: kustomize + name: leader-election-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: leader-election-role +subjects: +- kind: ServiceAccount + name: controller-manager + namespace: system diff --git a/config/rbac/metrics_auth_role.yaml b/config/rbac/metrics_auth_role.yaml new file mode 100644 index 0000000..32d2e4e --- /dev/null +++ b/config/rbac/metrics_auth_role.yaml @@ -0,0 +1,17 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: metrics-auth-role +rules: +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create diff --git a/config/rbac/metrics_auth_role_binding.yaml b/config/rbac/metrics_auth_role_binding.yaml new file mode 100644 index 0000000..e775d67 --- /dev/null +++ b/config/rbac/metrics_auth_role_binding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: metrics-auth-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: metrics-auth-role +subjects: +- kind: ServiceAccount + name: controller-manager + namespace: system diff --git a/config/rbac/metrics_reader_role.yaml b/config/rbac/metrics_reader_role.yaml new file mode 100644 index 0000000..51a75db --- /dev/null +++ b/config/rbac/metrics_reader_role.yaml @@ -0,0 +1,9 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: metrics-reader +rules: +- nonResourceURLs: + - "/metrics" + verbs: + - get diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml new file mode 100644 index 0000000..c0a38d7 --- /dev/null +++ b/config/rbac/role.yaml @@ -0,0 +1,32 @@ +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: manager-role +rules: +- apiGroups: + - jobs.converged-computing.org + resources: + - fluxjobs + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - jobs.converged-computing.org + resources: + - fluxjobs/finalizers + verbs: + - update +- apiGroups: + - jobs.converged-computing.org + resources: + - fluxjobs/status + verbs: + - get + - patch + - update diff --git a/config/rbac/role_binding.yaml b/config/rbac/role_binding.yaml new file mode 100644 index 0000000..fae5ce4 --- /dev/null +++ b/config/rbac/role_binding.yaml @@ -0,0 +1,15 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/name: fluxqueue + app.kubernetes.io/managed-by: kustomize + name: manager-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: manager-role +subjects: +- kind: ServiceAccount + name: controller-manager + namespace: system diff --git a/config/rbac/service_account.yaml b/config/rbac/service_account.yaml new file mode 100644 index 0000000..703d853 --- /dev/null +++ b/config/rbac/service_account.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/name: fluxqueue + app.kubernetes.io/managed-by: kustomize + name: controller-manager + namespace: system diff --git a/config/samples/jobs_v1alpha1_fluxjob.yaml b/config/samples/jobs_v1alpha1_fluxjob.yaml new file mode 100644 index 0000000..9451aea --- /dev/null +++ b/config/samples/jobs_v1alpha1_fluxjob.yaml @@ -0,0 +1,9 @@ +apiVersion: jobs.converged-computing.org/v1alpha1 +kind: FluxJob +metadata: + labels: + app.kubernetes.io/name: fluxqueue + app.kubernetes.io/managed-by: kustomize + name: fluxjob-sample +spec: + # TODO(user): Add fields here diff --git a/config/samples/kustomization.yaml b/config/samples/kustomization.yaml new file mode 100644 index 0000000..4f76428 --- /dev/null +++ b/config/samples/kustomization.yaml @@ -0,0 +1,4 @@ +## Append samples of your project ## +resources: +- jobs_v1alpha1_fluxjob.yaml +# +kubebuilder:scaffold:manifestskustomizesamples diff --git a/config/scorecard/bases/config.yaml b/config/scorecard/bases/config.yaml new file mode 100644 index 0000000..c770478 --- /dev/null +++ b/config/scorecard/bases/config.yaml @@ -0,0 +1,7 @@ +apiVersion: scorecard.operatorframework.io/v1alpha3 +kind: Configuration +metadata: + name: config +stages: +- parallel: true + tests: [] diff --git a/config/scorecard/kustomization.yaml b/config/scorecard/kustomization.yaml new file mode 100644 index 0000000..54e8aa5 --- /dev/null +++ b/config/scorecard/kustomization.yaml @@ -0,0 +1,18 @@ +resources: +- bases/config.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +patches: +- path: patches/basic.config.yaml + target: + group: scorecard.operatorframework.io + kind: Configuration + name: config + version: v1alpha3 +- path: patches/olm.config.yaml + target: + group: scorecard.operatorframework.io + kind: Configuration + name: config + version: v1alpha3 +# +kubebuilder:scaffold:patches diff --git a/config/scorecard/patches/basic.config.yaml b/config/scorecard/patches/basic.config.yaml new file mode 100644 index 0000000..84683cf --- /dev/null +++ b/config/scorecard/patches/basic.config.yaml @@ -0,0 +1,10 @@ +- op: add + path: /stages/0/tests/- + value: + entrypoint: + - scorecard-test + - basic-check-spec + image: quay.io/operator-framework/scorecard-test:v1.38.0 + labels: + suite: basic + test: basic-check-spec-test diff --git a/config/scorecard/patches/olm.config.yaml b/config/scorecard/patches/olm.config.yaml new file mode 100644 index 0000000..43f40a8 --- /dev/null +++ b/config/scorecard/patches/olm.config.yaml @@ -0,0 +1,50 @@ +- op: add + path: /stages/0/tests/- + value: + entrypoint: + - scorecard-test + - olm-bundle-validation + image: quay.io/operator-framework/scorecard-test:v1.38.0 + labels: + suite: olm + test: olm-bundle-validation-test +- op: add + path: /stages/0/tests/- + value: + entrypoint: + - scorecard-test + - olm-crds-have-validation + image: quay.io/operator-framework/scorecard-test:v1.38.0 + labels: + suite: olm + test: olm-crds-have-validation-test +- op: add + path: /stages/0/tests/- + value: + entrypoint: + - scorecard-test + - olm-crds-have-resources + image: quay.io/operator-framework/scorecard-test:v1.38.0 + labels: + suite: olm + test: olm-crds-have-resources-test +- op: add + path: /stages/0/tests/- + value: + entrypoint: + - scorecard-test + - olm-spec-descriptors + image: quay.io/operator-framework/scorecard-test:v1.38.0 + labels: + suite: olm + test: olm-spec-descriptors-test +- op: add + path: /stages/0/tests/- + value: + entrypoint: + - scorecard-test + - olm-status-descriptors + image: quay.io/operator-framework/scorecard-test:v1.38.0 + labels: + suite: olm + test: olm-status-descriptors-test diff --git a/config/webhook/kustomization.yaml b/config/webhook/kustomization.yaml new file mode 100644 index 0000000..9cf2613 --- /dev/null +++ b/config/webhook/kustomization.yaml @@ -0,0 +1,6 @@ +resources: +- manifests.yaml +- service.yaml + +configurations: +- kustomizeconfig.yaml diff --git a/config/webhook/kustomizeconfig.yaml b/config/webhook/kustomizeconfig.yaml new file mode 100644 index 0000000..206316e --- /dev/null +++ b/config/webhook/kustomizeconfig.yaml @@ -0,0 +1,22 @@ +# the following config is for teaching kustomize where to look at when substituting nameReference. +# It requires kustomize v2.1.0 or newer to work properly. +nameReference: +- kind: Service + version: v1 + fieldSpecs: + - kind: MutatingWebhookConfiguration + group: admissionregistration.k8s.io + path: webhooks/clientConfig/service/name + - kind: ValidatingWebhookConfiguration + group: admissionregistration.k8s.io + path: webhooks/clientConfig/service/name + +namespace: +- kind: MutatingWebhookConfiguration + group: admissionregistration.k8s.io + path: webhooks/clientConfig/service/namespace + create: true +- kind: ValidatingWebhookConfiguration + group: admissionregistration.k8s.io + path: webhooks/clientConfig/service/namespace + create: true diff --git a/config/webhook/manifests.yaml b/config/webhook/manifests.yaml new file mode 100644 index 0000000..7a43f07 --- /dev/null +++ b/config/webhook/manifests.yaml @@ -0,0 +1,29 @@ +--- +apiVersion: admissionregistration.k8s.io/v1 +kind: MutatingWebhookConfiguration +metadata: + name: mutating-webhook-configuration +webhooks: +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: webhook-service + namespace: system + path: /mutate-v1-sidecar + failurePolicy: Fail + name: mfluxjob.kb.io + rules: + - apiGroups: + - "" + - core + - batch + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - pods + - jobs + sideEffects: None diff --git a/config/webhook/service.yaml b/config/webhook/service.yaml new file mode 100644 index 0000000..dc0f282 --- /dev/null +++ b/config/webhook/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/name: fluxqueue + app.kubernetes.io/managed-by: kustomize + name: webhook-service + namespace: system +spec: + ports: + - port: 443 + protocol: TCP + targetPort: 9443 + selector: + control-plane: controller-manager diff --git a/examples/dist/fluxqueue-dev.yaml b/examples/dist/fluxqueue-dev.yaml new file mode 100644 index 0000000..88749ab --- /dev/null +++ b/examples/dist/fluxqueue-dev.yaml @@ -0,0 +1,495 @@ +apiVersion: v1 +kind: Namespace +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: fluxqueue + control-plane: controller-manager + name: fluxqueue-system +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + cert-manager.io/inject-ca-from: fluxqueue-system/fluxqueue-serving-cert + controller-gen.kubebuilder.io/version: v0.15.0 + name: fluxjobs.jobs.converged-computing.org +spec: + conversion: + strategy: Webhook + webhook: + clientConfig: + service: + name: fluxqueue-webhook-service + namespace: fluxqueue-system + path: /convert + conversionReviewVersions: + - v1 + group: jobs.converged-computing.org + names: + kind: FluxJob + listKind: FluxJobList + plural: fluxjobs + singular: fluxjob + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: FluxJob is the Schema for the fluxjobs API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: |- + FluxJobSpec defines the desired state of FluxJob + A user is not expected to create these - + 1. A job or similar will be submit + 2. It will be intercepted by a webhook here + 3. The webhook will generate the object + 4. The job will go into the fluxqueue + 5. When scheduled, it gets sent with an exact node assignment + to the custom scheduler plugin. + 6. Cleanup will need to be handled + + + A FluxJob is a mapping of a Kubernetes abstraction (e.g., job) + into a Flux JobSpec, one that Fluxion can digest. + properties: + duration: + description: Duration is the maximum runtime of the job + format: int32 + type: integer + jobspec: + description: JobSpec is the Flux jobspec + type: string + nodes: + description: Nodes needed for the job + format: int32 + type: integer + object: + description: |- + Object is the underlying pod/job/object specification + This currently is assumed that one job has equivalent pods under it + format: byte + type: string + reservation: + description: |- + If true, we are allowed to ask fluxion for + a reservation + type: boolean + resources: + description: Resources assigned + properties: + nodes: + description: Nodes assigned to the job + items: + type: string + type: array + type: object + type: + description: Type of object that is wrapped + type: integer + type: object + status: + description: FluxJobStatus defines the observed state of FluxJob + type: object + type: object + served: true + storage: true + subresources: + status: {} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: fluxqueue + name: fluxqueue-controller-manager + namespace: fluxqueue-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: fluxqueue + name: fluxqueue-leader-election-role + namespace: fluxqueue-system +rules: +- apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: fluxqueue + name: fluxqueue-fluxjob-editor-role +rules: +- apiGroups: + - jobs.converged-computing.org + resources: + - fluxjobs + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - jobs.converged-computing.org + resources: + - fluxjobs/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: fluxqueue + name: fluxqueue-fluxjob-viewer-role +rules: +- apiGroups: + - jobs.converged-computing.org + resources: + - fluxjobs + verbs: + - get + - list + - watch +- apiGroups: + - jobs.converged-computing.org + resources: + - fluxjobs/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: fluxqueue-manager-role +rules: +- apiGroups: + - jobs.converged-computing.org + resources: + - fluxjobs + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - jobs.converged-computing.org + resources: + - fluxjobs/finalizers + verbs: + - update +- apiGroups: + - jobs.converged-computing.org + resources: + - fluxjobs/status + verbs: + - get + - patch + - update +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: fluxqueue-metrics-auth-role +rules: +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: fluxqueue-metrics-reader +rules: +- nonResourceURLs: + - /metrics + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: fluxqueue + name: fluxqueue-leader-election-rolebinding + namespace: fluxqueue-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: fluxqueue-leader-election-role +subjects: +- kind: ServiceAccount + name: fluxqueue-controller-manager + namespace: fluxqueue-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: fluxqueue + name: fluxqueue-manager-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: fluxqueue-manager-role +subjects: +- kind: ServiceAccount + name: fluxqueue-controller-manager + namespace: fluxqueue-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: fluxqueue-metrics-auth-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: fluxqueue-metrics-auth-role +subjects: +- kind: ServiceAccount + name: fluxqueue-controller-manager + namespace: fluxqueue-system +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: fluxqueue + control-plane: controller-manager + name: fluxqueue-controller-manager-metrics-service + namespace: fluxqueue-system +spec: + ports: + - name: https + port: 8443 + protocol: TCP + targetPort: 8443 + selector: + control-plane: controller-manager +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: fluxqueue + name: fluxqueue-webhook-service + namespace: fluxqueue-system +spec: + ports: + - port: 443 + protocol: TCP + targetPort: 9443 + selector: + control-plane: controller-manager +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: fluxqueue + control-plane: controller-manager + name: fluxqueue-controller-manager + namespace: fluxqueue-system +spec: + replicas: 1 + selector: + matchLabels: + control-plane: controller-manager + template: + metadata: + annotations: + kubectl.kubernetes.io/default-container: manager + labels: + control-plane: controller-manager + spec: + containers: + - args: + - --metrics-bind-address=:8443 + - --leader-elect + - --health-probe-bind-address=:8081 + command: + - /manager + image: ghcr.io/converged-computing/fluxqueue:test + imagePullPolicy: Always + livenessProbe: + httpGet: + path: /healthz + port: 8081 + initialDelaySeconds: 15 + periodSeconds: 20 + name: manager + ports: + - containerPort: 9443 + name: webhook-server + protocol: TCP + readinessProbe: + httpGet: + path: /readyz + port: 8081 + initialDelaySeconds: 5 + periodSeconds: 10 + resources: + limits: + cpu: 500m + memory: 128Mi + requests: + cpu: 10m + memory: 64Mi + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + volumeMounts: + - mountPath: /tmp/k8s-webhook-server/serving-certs + name: cert + readOnly: true + securityContext: + runAsNonRoot: true + serviceAccountName: fluxqueue-controller-manager + terminationGracePeriodSeconds: 10 + volumes: + - name: cert + secret: + defaultMode: 420 + secretName: webhook-server-cert +--- +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + labels: + app.kubernetes.io/component: certificate + app.kubernetes.io/created-by: fluxqueue + app.kubernetes.io/instance: serving-cert + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: certificate + app.kubernetes.io/part-of: fluxqueue + name: fluxqueue-serving-cert + namespace: fluxqueue-system +spec: + dnsNames: + - fluxqueue-webhook-service.fluxqueue-system.svc + - fluxqueue-webhook-service.fluxqueue-system.svc.cluster.local + issuerRef: + kind: Issuer + name: fluxqueue-selfsigned-issuer + secretName: webhook-server-cert +--- +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: fluxqueue + name: fluxqueue-selfsigned-issuer + namespace: fluxqueue-system +spec: + selfSigned: {} +--- +apiVersion: admissionregistration.k8s.io/v1 +kind: MutatingWebhookConfiguration +metadata: + annotations: + cert-manager.io/inject-ca-from: fluxqueue-system/fluxqueue-serving-cert + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: fluxqueue + name: fluxqueue-mutating-webhook-configuration +webhooks: +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: fluxqueue-webhook-service + namespace: fluxqueue-system + path: /mutate-v1-sidecar + failurePolicy: Fail + name: mfluxjob.kb.io + rules: + - apiGroups: + - "" + - core + - batch + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - pods + - jobs + sideEffects: None diff --git a/examples/dist/fluxqueue.yaml b/examples/dist/fluxqueue.yaml new file mode 100644 index 0000000..cab037c --- /dev/null +++ b/examples/dist/fluxqueue.yaml @@ -0,0 +1,495 @@ +apiVersion: v1 +kind: Namespace +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: fluxqueue + control-plane: controller-manager + name: fluxqueue-system +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + cert-manager.io/inject-ca-from: fluxqueue-system/fluxqueue-serving-cert + controller-gen.kubebuilder.io/version: v0.15.0 + name: fluxjobs.jobs.converged-computing.org +spec: + conversion: + strategy: Webhook + webhook: + clientConfig: + service: + name: fluxqueue-webhook-service + namespace: fluxqueue-system + path: /convert + conversionReviewVersions: + - v1 + group: jobs.converged-computing.org + names: + kind: FluxJob + listKind: FluxJobList + plural: fluxjobs + singular: fluxjob + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: FluxJob is the Schema for the fluxjobs API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: |- + FluxJobSpec defines the desired state of FluxJob + A user is not expected to create these - + 1. A job or similar will be submit + 2. It will be intercepted by a webhook here + 3. The webhook will generate the object + 4. The job will go into the fluxqueue + 5. When scheduled, it gets sent with an exact node assignment + to the custom scheduler plugin. + 6. Cleanup will need to be handled + + + A FluxJob is a mapping of a Kubernetes abstraction (e.g., job) + into a Flux JobSpec, one that Fluxion can digest. + properties: + duration: + description: Duration is the maximum runtime of the job + format: int32 + type: integer + jobspec: + description: JobSpec is the Flux jobspec + type: string + nodes: + description: Nodes needed for the job + format: int32 + type: integer + object: + description: |- + Object is the underlying pod/job/object specification + This currently is assumed that one job has equivalent pods under it + format: byte + type: string + reservation: + description: |- + If true, we are allowed to ask fluxion for + a reservation + type: boolean + resources: + description: Resources assigned + properties: + nodes: + description: Nodes assigned to the job + items: + type: string + type: array + type: object + type: + description: Type of object that is wrapped + type: integer + type: object + status: + description: FluxJobStatus defines the observed state of FluxJob + type: object + type: object + served: true + storage: true + subresources: + status: {} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: fluxqueue + name: fluxqueue-controller-manager + namespace: fluxqueue-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: fluxqueue + name: fluxqueue-leader-election-role + namespace: fluxqueue-system +rules: +- apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: fluxqueue + name: fluxqueue-fluxjob-editor-role +rules: +- apiGroups: + - jobs.converged-computing.org + resources: + - fluxjobs + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - jobs.converged-computing.org + resources: + - fluxjobs/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: fluxqueue + name: fluxqueue-fluxjob-viewer-role +rules: +- apiGroups: + - jobs.converged-computing.org + resources: + - fluxjobs + verbs: + - get + - list + - watch +- apiGroups: + - jobs.converged-computing.org + resources: + - fluxjobs/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: fluxqueue-manager-role +rules: +- apiGroups: + - jobs.converged-computing.org + resources: + - fluxjobs + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - jobs.converged-computing.org + resources: + - fluxjobs/finalizers + verbs: + - update +- apiGroups: + - jobs.converged-computing.org + resources: + - fluxjobs/status + verbs: + - get + - patch + - update +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: fluxqueue-metrics-auth-role +rules: +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: fluxqueue-metrics-reader +rules: +- nonResourceURLs: + - /metrics + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: fluxqueue + name: fluxqueue-leader-election-rolebinding + namespace: fluxqueue-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: fluxqueue-leader-election-role +subjects: +- kind: ServiceAccount + name: fluxqueue-controller-manager + namespace: fluxqueue-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: fluxqueue + name: fluxqueue-manager-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: fluxqueue-manager-role +subjects: +- kind: ServiceAccount + name: fluxqueue-controller-manager + namespace: fluxqueue-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: fluxqueue-metrics-auth-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: fluxqueue-metrics-auth-role +subjects: +- kind: ServiceAccount + name: fluxqueue-controller-manager + namespace: fluxqueue-system +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: fluxqueue + control-plane: controller-manager + name: fluxqueue-controller-manager-metrics-service + namespace: fluxqueue-system +spec: + ports: + - name: https + port: 8443 + protocol: TCP + targetPort: 8443 + selector: + control-plane: controller-manager +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: fluxqueue + name: fluxqueue-webhook-service + namespace: fluxqueue-system +spec: + ports: + - port: 443 + protocol: TCP + targetPort: 9443 + selector: + control-plane: controller-manager +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: fluxqueue + control-plane: controller-manager + name: fluxqueue-controller-manager + namespace: fluxqueue-system +spec: + replicas: 1 + selector: + matchLabels: + control-plane: controller-manager + template: + metadata: + annotations: + kubectl.kubernetes.io/default-container: manager + labels: + control-plane: controller-manager + spec: + containers: + - args: + - --metrics-bind-address=:8443 + - --leader-elect + - --health-probe-bind-address=:8081 + command: + - /manager + image: ghcr.io/converged-computing/fluxqueue:latest + imagePullPolicy: Always + livenessProbe: + httpGet: + path: /healthz + port: 8081 + initialDelaySeconds: 15 + periodSeconds: 20 + name: manager + ports: + - containerPort: 9443 + name: webhook-server + protocol: TCP + readinessProbe: + httpGet: + path: /readyz + port: 8081 + initialDelaySeconds: 5 + periodSeconds: 10 + resources: + limits: + cpu: 500m + memory: 128Mi + requests: + cpu: 10m + memory: 64Mi + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + volumeMounts: + - mountPath: /tmp/k8s-webhook-server/serving-certs + name: cert + readOnly: true + securityContext: + runAsNonRoot: true + serviceAccountName: fluxqueue-controller-manager + terminationGracePeriodSeconds: 10 + volumes: + - name: cert + secret: + defaultMode: 420 + secretName: webhook-server-cert +--- +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + labels: + app.kubernetes.io/component: certificate + app.kubernetes.io/created-by: fluxqueue + app.kubernetes.io/instance: serving-cert + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: certificate + app.kubernetes.io/part-of: fluxqueue + name: fluxqueue-serving-cert + namespace: fluxqueue-system +spec: + dnsNames: + - fluxqueue-webhook-service.fluxqueue-system.svc + - fluxqueue-webhook-service.fluxqueue-system.svc.cluster.local + issuerRef: + kind: Issuer + name: fluxqueue-selfsigned-issuer + secretName: webhook-server-cert +--- +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: fluxqueue + name: fluxqueue-selfsigned-issuer + namespace: fluxqueue-system +spec: + selfSigned: {} +--- +apiVersion: admissionregistration.k8s.io/v1 +kind: MutatingWebhookConfiguration +metadata: + annotations: + cert-manager.io/inject-ca-from: fluxqueue-system/fluxqueue-serving-cert + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: fluxqueue + name: fluxqueue-mutating-webhook-configuration +webhooks: +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: fluxqueue-webhook-service + namespace: fluxqueue-system + path: /mutate-v1-sidecar + failurePolicy: Fail + name: mfluxjob.kb.io + rules: + - apiGroups: + - "" + - core + - batch + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - pods + - jobs + sideEffects: None diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..5a5ace3 --- /dev/null +++ b/go.mod @@ -0,0 +1,95 @@ +module github.com/converged-computing/fluxqueue + +go 1.22.0 + +require ( + github.com/onsi/ginkgo/v2 v2.17.1 + github.com/onsi/gomega v1.32.0 + k8s.io/api v0.30.1 + k8s.io/apimachinery v0.30.1 + k8s.io/client-go v0.30.1 + sigs.k8s.io/controller-runtime v0.18.4 +) + +require ( + github.com/antlr/antlr4/runtime/Go/antlr/v4 v4.0.0-20230305170008-8188dc5388df // indirect + github.com/beorn7/perks v1.0.1 // indirect + github.com/blang/semver/v4 v4.0.0 // indirect + github.com/cenkalti/backoff/v4 v4.2.1 // indirect + github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/emicklei/go-restful/v3 v3.11.0 // indirect + github.com/evanphx/json-patch/v5 v5.9.0 // indirect + github.com/felixge/httpsnoop v1.0.3 // indirect + github.com/fsnotify/fsnotify v1.7.0 // indirect + github.com/go-logr/logr v1.4.1 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/go-logr/zapr v1.3.0 // indirect + github.com/go-openapi/jsonpointer v0.19.6 // indirect + github.com/go-openapi/jsonreference v0.20.2 // indirect + github.com/go-openapi/swag v0.22.3 // indirect + github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect + github.com/golang/protobuf v1.5.4 // indirect + github.com/google/cel-go v0.17.8 // indirect + github.com/google/gnostic-models v0.6.8 // indirect + github.com/google/go-cmp v0.6.0 // indirect + github.com/google/gofuzz v1.2.0 // indirect + github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 // indirect + github.com/google/uuid v1.3.0 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 // indirect + github.com/imdario/mergo v0.3.6 // indirect + github.com/josharian/intern v1.0.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/mailru/easyjson v0.7.7 // indirect + github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/pkg/errors v0.9.1 // indirect + github.com/prometheus/client_golang v1.16.0 // indirect + github.com/prometheus/client_model v0.4.0 // indirect + github.com/prometheus/common v0.44.0 // indirect + github.com/prometheus/procfs v0.12.0 // indirect + github.com/spf13/pflag v1.0.5 // indirect + github.com/stoewer/go-strcase v1.2.0 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.44.0 // indirect + go.opentelemetry.io/otel v1.19.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.19.0 // indirect + go.opentelemetry.io/otel/metric v1.19.0 // indirect + go.opentelemetry.io/otel/sdk v1.19.0 // indirect + go.opentelemetry.io/otel/trace v1.19.0 // indirect + go.opentelemetry.io/proto/otlp v1.0.0 // indirect + go.uber.org/multierr v1.11.0 // indirect + go.uber.org/zap v1.26.0 // indirect + golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e // indirect + golang.org/x/net v0.23.0 // indirect + golang.org/x/oauth2 v0.12.0 // indirect + golang.org/x/sync v0.6.0 // indirect + golang.org/x/sys v0.18.0 // indirect + golang.org/x/term v0.18.0 // indirect + golang.org/x/text v0.14.0 // indirect + golang.org/x/time v0.3.0 // indirect + golang.org/x/tools v0.18.0 // indirect + gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect + google.golang.org/appengine v1.6.7 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20230726155614-23370e0ffb3e // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d // indirect + google.golang.org/grpc v1.58.3 // indirect + google.golang.org/protobuf v1.33.0 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + gopkg.in/yaml.v2 v2.4.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect + k8s.io/apiextensions-apiserver v0.30.1 // indirect + k8s.io/apiserver v0.30.1 // indirect + k8s.io/component-base v0.30.1 // indirect + k8s.io/klog/v2 v2.120.1 // indirect + k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect + k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect + sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.29.0 // indirect + sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect + sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect + sigs.k8s.io/yaml v1.3.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..9ce9c0d --- /dev/null +++ b/go.sum @@ -0,0 +1,253 @@ +github.com/antlr/antlr4/runtime/Go/antlr/v4 v4.0.0-20230305170008-8188dc5388df h1:7RFfzj4SSt6nnvCPbCqijJi1nWCd+TqAT3bYCStRC18= +github.com/antlr/antlr4/runtime/Go/antlr/v4 v4.0.0-20230305170008-8188dc5388df/go.mod h1:pSwJ0fSY5KhvocuWSx4fz3BA8OrA1bQn+K1Eli3BRwM= +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= +github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ= +github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM= +github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= +github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= +github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= +github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= +github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= +github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/evanphx/json-patch v4.12.0+incompatible h1:4onqiflcdA9EOZ4RxV643DvftH5pOlLGNtQ5lPWQu84= +github.com/evanphx/json-patch v4.12.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= +github.com/evanphx/json-patch/v5 v5.9.0 h1:kcBlZQbplgElYIlo/n1hJbls2z/1awpXxpRi0/FOJfg= +github.com/evanphx/json-patch/v5 v5.9.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ= +github.com/felixge/httpsnoop v1.0.3 h1:s/nj+GCswXYzN5v2DpNMuMQYe+0DDwt5WVCU6CWBdXk= +github.com/felixge/httpsnoop v1.0.3/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= +github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= +github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= +github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg= +github.com/go-openapi/jsonpointer v0.19.6 h1:eCs3fxoIi3Wh6vtgmLTOjdhSpiqphQ+DaPn38N2ZdrE= +github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= +github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE= +github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= +github.com/go-openapi/swag v0.22.3 h1:yMBqmnQ0gyZvEb/+KzuWZOXgllrXT4SADYbvDaXHv/g= +github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= +github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= +github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang/glog v1.1.0 h1:/d3pCKDPWNnvIWe0vVUpNP32qc8U3PDVxySP/y360qE= +github.com/golang/glog v1.1.0/go.mod h1:pfYeQZ3JWZoXTV5sFc986z3HTpwQs9At6P4ImfuP3NQ= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/cel-go v0.17.8 h1:j9m730pMZt1Fc4oKhCLUHfjj6527LuhYcYw0Rl8gqto= +github.com/google/cel-go v0.17.8/go.mod h1:HXZKzB0LXqer5lHHgfWAnlYwJaQBDKMjxjulNQzhwhY= +github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I= +github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= +github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 h1:K6RDEckDVWvDI9JAJYCmNdQXq6neHJOYx3V6jnqNEec= +github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= +github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rHAxPBD8KFhJpmcqms= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0/go.mod h1:YN5jB8ie0yfIUg6VvR9Kz84aCaG7AsGZnLjhHbUqwPg= +github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= +github.com/imdario/mergo v0.3.6 h1:xTNEAn+kxVO7dTZGu0CegyqKZmoWFI0rF8UxjlB2d28= +github.com/imdario/mergo v0.3.6/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA= +github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= +github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo= +github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/onsi/ginkgo/v2 v2.17.1 h1:V++EzdbhI4ZV4ev0UTIj0PzhzOcReJFyJaLjtSF55M8= +github.com/onsi/ginkgo/v2 v2.17.1/go.mod h1:llBI3WDLL9Z6taip6f33H76YcWtJv+7R3HigUjbIBOs= +github.com/onsi/gomega v1.32.0 h1:JRYU78fJ1LPxlckP6Txi/EYqJvjtMrDC04/MM5XRHPk= +github.com/onsi/gomega v1.32.0/go.mod h1:a4x4gW6Pz2yK1MAmvluYme5lvYTn61afQ2ETw/8n4Lg= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v1.16.0 h1:yk/hx9hDbrGHovbci4BY+pRMfSuuat626eFsHb7tmT8= +github.com/prometheus/client_golang v1.16.0/go.mod h1:Zsulrv/L9oM40tJ7T815tM89lFEugiJ9HzIqaAx4LKc= +github.com/prometheus/client_model v0.4.0 h1:5lQXD3cAg1OXBf4Wq03gTrXHeaV0TQvGfUooCfx1yqY= +github.com/prometheus/client_model v0.4.0/go.mod h1:oMQmHW1/JoDwqLtg57MGgP/Fb1CJEYF2imWWhWtMkYU= +github.com/prometheus/common v0.44.0 h1:+5BrQJwiBB9xsMygAB3TNvpQKOwlkc25LbISbrdOOfY= +github.com/prometheus/common v0.44.0/go.mod h1:ofAIvZbQ1e/nugmZGz4/qCb9Ap1VoSTIO7x0VV9VvuY= +github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo= +github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo= +github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= +github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stoewer/go-strcase v1.2.0 h1:Z2iHWqGXH00XYgqDmNgQbIBxf3wrNq0F3feEy0ainaU= +github.com/stoewer/go-strcase v1.2.0/go.mod h1:IBiWB2sKIp3wVVQ3Y035++gc+knqhUQag1KpM8ahLw8= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.44.0 h1:KfYpVmrjI7JuToy5k8XV3nkapjWx48k4E4JOtVstzQI= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.44.0/go.mod h1:SeQhzAEccGVZVEy7aH87Nh0km+utSpo1pTv6eMMop48= +go.opentelemetry.io/otel v1.19.0 h1:MuS/TNf4/j4IXsZuJegVzI1cwut7Qc00344rgH7p8bs= +go.opentelemetry.io/otel v1.19.0/go.mod h1:i0QyjOq3UPoTzff0PJB2N66fb4S0+rSbSB15/oyH9fY= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0 h1:Mne5On7VWdx7omSrSSZvM4Kw7cS7NQkOOmLcgscI51U= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0/go.mod h1:IPtUMKL4O3tH5y+iXVyAXqpAwMuzC1IrxVS81rummfE= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.19.0 h1:3d+S281UTjM+AbF31XSOYn1qXn3BgIdWl8HNEpx08Jk= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.19.0/go.mod h1:0+KuTDyKL4gjKCF75pHOX4wuzYDUZYfAQdSu43o+Z2I= +go.opentelemetry.io/otel/metric v1.19.0 h1:aTzpGtV0ar9wlV4Sna9sdJyII5jTVJEvKETPiOKwvpE= +go.opentelemetry.io/otel/metric v1.19.0/go.mod h1:L5rUsV9kM1IxCj1MmSdS+JQAcVm319EUrDVLrt7jqt8= +go.opentelemetry.io/otel/sdk v1.19.0 h1:6USY6zH+L8uMH8L3t1enZPR3WFEmSTADlqldyHtJi3o= +go.opentelemetry.io/otel/sdk v1.19.0/go.mod h1:NedEbbS4w3C6zElbLdPJKOpJQOrGUJ+GfzpjUvI0v1A= +go.opentelemetry.io/otel/trace v1.19.0 h1:DFVQmlVbfVeOuBRrwdtaehRrWiL1JoVs9CPIQ1Dzxpg= +go.opentelemetry.io/otel/trace v1.19.0/go.mod h1:mfaSyvGyEJEI0nyV2I4qhNQnbBOUUmYZpYojqMnX2vo= +go.opentelemetry.io/proto/otlp v1.0.0 h1:T0TX0tmXU8a3CbNXzEKGeU5mIVOdf0oykP+u2lIVU/I= +go.opentelemetry.io/proto/otlp v1.0.0/go.mod h1:Sy6pihPLfYHkr3NkUbEhGHFhINUSI/v80hjKIs5JXpM= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +go.uber.org/zap v1.26.0 h1:sI7k6L95XOKS281NhVKOFCUNIvv9e0w4BF8N3u+tCRo= +go.uber.org/zap v1.26.0/go.mod h1:dtElttAiwGvoJ/vj4IwHBS/gXsEu/pZ50mUIRWuG0so= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e h1:+WEEuIdZHnUeJJmEUjyYC2gfUMj69yZXw17EnHg/otA= +golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e/go.mod h1:Kr81I6Kryrl9sr8s2FK3vxD90NdsKWRuOIl2O4CvYbA= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= +golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= +golang.org/x/oauth2 v0.12.0 h1:smVPGxink+n1ZI5pkQa8y6fZT0RW0MgCO5bFpepy4B4= +golang.org/x/oauth2 v0.12.0/go.mod h1:A74bZ3aGXgCY0qaIC9Ahg6Lglin4AMAco8cIv9baba4= +golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= +golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= +golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8= +golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= +golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.18.0 h1:k8NLag8AGHnn+PHbl7g43CtqZAwG60vZkLqgyZgIHgQ= +golang.org/x/tools v0.18.0/go.mod h1:GL7B4CwcLLeo59yx/9UWWuNOW1n3VZ4f5axWfML7Lcg= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw= +gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= +google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c= +google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= +google.golang.org/genproto v0.0.0-20230803162519-f966b187b2e5 h1:L6iMMGrtzgHsWofoFcihmDEMYeDR9KN/ThbPWGrh++g= +google.golang.org/genproto v0.0.0-20230803162519-f966b187b2e5/go.mod h1:oH/ZOT02u4kWEp7oYBGYFFkCdKS/uYR9Z7+0/xuuFp8= +google.golang.org/genproto/googleapis/api v0.0.0-20230726155614-23370e0ffb3e h1:z3vDksarJxsAKM5dmEGv0GHwE2hKJ096wZra71Vs4sw= +google.golang.org/genproto/googleapis/api v0.0.0-20230726155614-23370e0ffb3e/go.mod h1:rsr7RhLuwsDKL7RmgDDCUc6yaGr1iqceVb5Wv6f6YvQ= +google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d h1:uvYuEyMHKNt+lT4K3bN6fGswmK8qSvcreM3BwjDh+y4= +google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d/go.mod h1:+Bk1OCOj40wS2hwAMA+aCW9ypzm63QTBBHp6lQ3p+9M= +google.golang.org/grpc v1.58.3 h1:BjnpXut1btbtgN/6sp+brB2Kbm2LjNXnidYujAVbSoQ= +google.golang.org/grpc v1.58.3/go.mod h1:tgX3ZQDlNJGU96V6yHh1T/JeoBQ2TXdr43YbYSsCJk0= +google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= +google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +k8s.io/api v0.30.1 h1:kCm/6mADMdbAxmIh0LBjS54nQBE+U4KmbCfIkF5CpJY= +k8s.io/api v0.30.1/go.mod h1:ddbN2C0+0DIiPntan/bye3SW3PdwLa11/0yqwvuRrJM= +k8s.io/apiextensions-apiserver v0.30.1 h1:4fAJZ9985BmpJG6PkoxVRpXv9vmPUOVzl614xarePws= +k8s.io/apiextensions-apiserver v0.30.1/go.mod h1:R4GuSrlhgq43oRY9sF2IToFh7PVlF1JjfWdoG3pixk4= +k8s.io/apimachinery v0.30.1 h1:ZQStsEfo4n65yAdlGTfP/uSHMQSoYzU/oeEbkmF7P2U= +k8s.io/apimachinery v0.30.1/go.mod h1:iexa2somDaxdnj7bha06bhb43Zpa6eWH8N8dbqVjTUc= +k8s.io/apiserver v0.30.1 h1:BEWEe8bzS12nMtDKXzCF5Q5ovp6LjjYkSp8qOPk8LZ8= +k8s.io/apiserver v0.30.1/go.mod h1:i87ZnQ+/PGAmSbD/iEKM68bm1D5reX8fO4Ito4B01mo= +k8s.io/client-go v0.30.1 h1:uC/Ir6A3R46wdkgCV3vbLyNOYyCJ8oZnjtJGKfytl/Q= +k8s.io/client-go v0.30.1/go.mod h1:wrAqLNs2trwiCH/wxxmT/x3hKVH9PuV0GGW0oDoHVqc= +k8s.io/component-base v0.30.1 h1:bvAtlPh1UrdaZL20D9+sWxsJljMi0QZ3Lmw+kmZAaxQ= +k8s.io/component-base v0.30.1/go.mod h1:e/X9kDiOebwlI41AvBHuWdqFriSRrX50CdwA9TFaHLI= +k8s.io/klog/v2 v2.120.1 h1:QXU6cPEOIslTGvZaXvFWiP9VKyeet3sawzTOvdXb4Vw= +k8s.io/klog/v2 v2.120.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= +k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7FjZpUb45WallggurYhKGag= +k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98= +k8s.io/utils v0.0.0-20230726121419-3b25d923346b h1:sgn3ZU783SCgtaSJjpcVVlRqd6GSnlTLKgpAAttJvpI= +k8s.io/utils v0.0.0-20230726121419-3b25d923346b/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.29.0 h1:/U5vjBbQn3RChhv7P11uhYvCSm5G2GaIi5AIGBS6r4c= +sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.29.0/go.mod h1:z7+wmGM2dfIiLRfrC6jb5kV2Mq/sK1ZP303cxzkV5Y4= +sigs.k8s.io/controller-runtime v0.18.4 h1:87+guW1zhvuPLh1PHybKdYFLU0YJp4FhJRmiHvm5BZw= +sigs.k8s.io/controller-runtime v0.18.4/go.mod h1:TVoGrfdpbA9VRFaRnKgk9P5/atA0pMwq+f+msb9M8Sg= +sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= +sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= +sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= +sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= +sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo= +sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8= diff --git a/hack/boilerplate.go.txt b/hack/boilerplate.go.txt new file mode 100644 index 0000000..221dcbe --- /dev/null +++ b/hack/boilerplate.go.txt @@ -0,0 +1,15 @@ +/* +Copyright 2025. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ \ No newline at end of file diff --git a/hack/quick-build-kind.sh b/hack/quick-build-kind.sh new file mode 100755 index 0000000..f39697b --- /dev/null +++ b/hack/quick-build-kind.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +REGISTRY="${1:-ghcr.io/vsoch}" +HERE=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +ROOT=$(dirname ${HERE}) + +# Go to the script directory +cd ${ROOT} + +# These build each of the images. The sidecar is separate from the other two in src/ +make build-all REGISTRY=${REGISTRY} # SCHEDULER_IMAGE=fluxqueue # SIDECAR_IMAGE=fluxqueue-sidecar + +# This is what it might look like to push +# docker push ghcr.io/vsoch/fluxnetes-sidecar && docker push ghcr.io/vsoch/fluxnetes:latest + +# We load into kind so we don't need to push/pull and use up internet data ;) +# kind load docker-image ${REGISTRY}/fluxnetes-sidecar:latest +kind load docker-image ${REGISTRY}/fluxqueue:latest +kind load docker-image ${REGISTRY}/fluxqueue-postgres:latest + +# And then install using the charts. The pull policy ensures we use the loaded ones +# --set scheduler.image=${REGISTRY}/fluxnetes:latest \ +# --set sidecar.image=${REGISTRY}/fluxnetes-sidecar:latest \ +helm uninstall fluxqueue --namespace fluxqueue-system || true +helm install \ + --set postgres.image=${REGISTRY}/fluxqueue-postgres:latest \ + --namespace fluxqueue-system \ + --create-namespace \ + --set postgres.pullPolicy=Never \ + --set scheduler.pullPolicy=Never \ + --set sidecar.pullPolicy=Never \ + fluxqueue chart/ diff --git a/img/fluxqueue.png b/img/fluxqueue.png new file mode 100644 index 0000000..1c88248 Binary files /dev/null and b/img/fluxqueue.png differ diff --git a/internal/controller/fluxjob_controller.go b/internal/controller/fluxjob_controller.go new file mode 100644 index 0000000..e5c4886 --- /dev/null +++ b/internal/controller/fluxjob_controller.go @@ -0,0 +1,84 @@ +/* +Copyright 2025. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/rest" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" + + api "github.com/converged-computing/fluxqueue/api/v1alpha1" +) + +var ( + rlog = ctrl.Log.WithName("fluxqueue") +) + +// FluxJobReconciler reconciles a FluxJob object +type FluxJobReconciler struct { + client.Client + Scheme *runtime.Scheme + RESTClient rest.Interface + RESTConfig *rest.Config +} + +func NewFluxJobReconciler( + client client.Client, + scheme *runtime.Scheme, + restConfig *rest.Config, + restClient rest.Interface, +) *FluxJobReconciler { + return &FluxJobReconciler{ + Client: client, + Scheme: scheme, + RESTClient: restClient, + RESTConfig: restConfig, + } +} + +// +kubebuilder:rbac:groups=jobs.converged-computing.org,resources=fluxjobs,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=jobs.converged-computing.org,resources=fluxjobs/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=jobs.converged-computing.org,resources=fluxjobs/finalizers,verbs=update + +// Reconcile is part of the main kubernetes reconciliation loop which aims to +// move the current state of the cluster closer to the desired state. +// TODO(user): Modify the Reconcile function to compare the state specified by +// the FluxJob object against the actual cluster state, and then +// perform operations to make the cluster state reflect the state specified by +// the user. +// +// For more details, check Reconcile and its Result here: +// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.18.4/pkg/reconcile +func (r *FluxJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + logger := log.FromContext(ctx) + + // TODO(user): your logic here + logger.Info("Hello!") + + return ctrl.Result{}, nil +} + +// SetupWithManager sets up the controller with the Manager. +func (r *FluxJobReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + For(&api.FluxJob{}). + Complete(r) +} diff --git a/internal/controller/fluxjob_controller_test.go b/internal/controller/fluxjob_controller_test.go new file mode 100644 index 0000000..d2c6691 --- /dev/null +++ b/internal/controller/fluxjob_controller_test.go @@ -0,0 +1,84 @@ +/* +Copyright 2025. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + jobsv1alpha1 "github.com/converged-computing/fluxqueue/api/v1alpha1" +) + +var _ = Describe("FluxJob Controller", func() { + Context("When reconciling a resource", func() { + const resourceName = "test-resource" + + ctx := context.Background() + + typeNamespacedName := types.NamespacedName{ + Name: resourceName, + Namespace: "default", // TODO(user):Modify as needed + } + fluxjob := &jobsv1alpha1.FluxJob{} + + BeforeEach(func() { + By("creating the custom resource for the Kind FluxJob") + err := k8sClient.Get(ctx, typeNamespacedName, fluxjob) + if err != nil && errors.IsNotFound(err) { + resource := &jobsv1alpha1.FluxJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: resourceName, + Namespace: "default", + }, + // TODO(user): Specify other spec details if needed. + } + Expect(k8sClient.Create(ctx, resource)).To(Succeed()) + } + }) + + AfterEach(func() { + // TODO(user): Cleanup logic after each test, like removing the resource instance. + resource := &jobsv1alpha1.FluxJob{} + err := k8sClient.Get(ctx, typeNamespacedName, resource) + Expect(err).NotTo(HaveOccurred()) + + By("Cleanup the specific resource instance FluxJob") + Expect(k8sClient.Delete(ctx, resource)).To(Succeed()) + }) + It("should successfully reconcile the resource", func() { + By("Reconciling the created resource") + controllerReconciler := &FluxJobReconciler{ + Client: k8sClient, + Scheme: k8sClient.Scheme(), + } + + _, err := controllerReconciler.Reconcile(ctx, reconcile.Request{ + NamespacedName: typeNamespacedName, + }) + Expect(err).NotTo(HaveOccurred()) + // TODO(user): Add more specific assertions depending on your controller's reconciliation logic. + // Example: If you expect a certain status condition after reconciliation, verify it here. + }) + }) +}) diff --git a/internal/controller/suite_test.go b/internal/controller/suite_test.go new file mode 100644 index 0000000..463a2d8 --- /dev/null +++ b/internal/controller/suite_test.go @@ -0,0 +1,90 @@ +/* +Copyright 2025. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "fmt" + "path/filepath" + "runtime" + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/rest" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/envtest" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + + jobsv1alpha1 "github.com/converged-computing/fluxqueue/api/v1alpha1" + // +kubebuilder:scaffold:imports +) + +// These tests use Ginkgo (BDD-style Go testing framework). Refer to +// http://onsi.github.io/ginkgo/ to learn more about Ginkgo. + +var cfg *rest.Config +var k8sClient client.Client +var testEnv *envtest.Environment + +func TestControllers(t *testing.T) { + RegisterFailHandler(Fail) + + RunSpecs(t, "Controller Suite") +} + +var _ = BeforeSuite(func() { + logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) + + By("bootstrapping test environment") + testEnv = &envtest.Environment{ + CRDDirectoryPaths: []string{filepath.Join("..", "..", "config", "crd", "bases")}, + ErrorIfCRDPathMissing: true, + + // The BinaryAssetsDirectory is only required if you want to run the tests directly + // without call the makefile target test. If not informed it will look for the + // default path defined in controller-runtime which is /usr/local/kubebuilder/. + // Note that you must have the required binaries setup under the bin directory to perform + // the tests directly. When we run make test it will be setup and used automatically. + BinaryAssetsDirectory: filepath.Join("..", "..", "bin", "k8s", + fmt.Sprintf("1.30.0-%s-%s", runtime.GOOS, runtime.GOARCH)), + } + + var err error + // cfg is defined in this file globally. + cfg, err = testEnv.Start() + Expect(err).NotTo(HaveOccurred()) + Expect(cfg).NotTo(BeNil()) + + err = jobsv1alpha1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + // +kubebuilder:scaffold:scheme + + k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) + Expect(err).NotTo(HaveOccurred()) + Expect(k8sClient).NotTo(BeNil()) + +}) + +var _ = AfterSuite(func() { + By("tearing down the test environment") + err := testEnv.Stop() + Expect(err).NotTo(HaveOccurred()) +}) diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go new file mode 100644 index 0000000..74b5c52 --- /dev/null +++ b/test/e2e/e2e_suite_test.go @@ -0,0 +1,32 @@ +/* +Copyright 2025. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "fmt" + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +// Run e2e tests using the Ginkgo runner. +func TestE2E(t *testing.T) { + RegisterFailHandler(Fail) + _, _ = fmt.Fprintf(GinkgoWriter, "Starting fluxqueue suite\n") + RunSpecs(t, "e2e suite") +} diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go new file mode 100644 index 0000000..5547509 --- /dev/null +++ b/test/e2e/e2e_test.go @@ -0,0 +1,122 @@ +/* +Copyright 2025. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "fmt" + "os/exec" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/converged-computing/fluxqueue/test/utils" +) + +const namespace = "fluxqueue-system" + +var _ = Describe("controller", Ordered, func() { + BeforeAll(func() { + By("installing prometheus operator") + Expect(utils.InstallPrometheusOperator()).To(Succeed()) + + By("installing the cert-manager") + Expect(utils.InstallCertManager()).To(Succeed()) + + By("creating manager namespace") + cmd := exec.Command("kubectl", "create", "ns", namespace) + _, _ = utils.Run(cmd) + }) + + AfterAll(func() { + By("uninstalling the Prometheus manager bundle") + utils.UninstallPrometheusOperator() + + By("uninstalling the cert-manager bundle") + utils.UninstallCertManager() + + By("removing manager namespace") + cmd := exec.Command("kubectl", "delete", "ns", namespace) + _, _ = utils.Run(cmd) + }) + + Context("Operator", func() { + It("should run successfully", func() { + var controllerPodName string + var err error + + // projectimage stores the name of the image used in the example + var projectimage = "example.com/fluxqueue:v0.0.1" + + By("building the manager(Operator) image") + cmd := exec.Command("make", "docker-build", fmt.Sprintf("IMG=%s", projectimage)) + _, err = utils.Run(cmd) + ExpectWithOffset(1, err).NotTo(HaveOccurred()) + + By("loading the the manager(Operator) image on Kind") + err = utils.LoadImageToKindClusterWithName(projectimage) + ExpectWithOffset(1, err).NotTo(HaveOccurred()) + + By("installing CRDs") + cmd = exec.Command("make", "install") + _, err = utils.Run(cmd) + ExpectWithOffset(1, err).NotTo(HaveOccurred()) + + By("deploying the controller-manager") + cmd = exec.Command("make", "deploy", fmt.Sprintf("IMG=%s", projectimage)) + _, err = utils.Run(cmd) + ExpectWithOffset(1, err).NotTo(HaveOccurred()) + + By("validating that the controller-manager pod is running as expected") + verifyControllerUp := func() error { + // Get pod name + + cmd = exec.Command("kubectl", "get", + "pods", "-l", "control-plane=controller-manager", + "-o", "go-template={{ range .items }}"+ + "{{ if not .metadata.deletionTimestamp }}"+ + "{{ .metadata.name }}"+ + "{{ \"\\n\" }}{{ end }}{{ end }}", + "-n", namespace, + ) + + podOutput, err := utils.Run(cmd) + ExpectWithOffset(2, err).NotTo(HaveOccurred()) + podNames := utils.GetNonEmptyLines(string(podOutput)) + if len(podNames) != 1 { + return fmt.Errorf("expect 1 controller pods running, but got %d", len(podNames)) + } + controllerPodName = podNames[0] + ExpectWithOffset(2, controllerPodName).Should(ContainSubstring("controller-manager")) + + // Validate pod status + cmd = exec.Command("kubectl", "get", + "pods", controllerPodName, "-o", "jsonpath={.status.phase}", + "-n", namespace, + ) + status, err := utils.Run(cmd) + ExpectWithOffset(2, err).NotTo(HaveOccurred()) + if string(status) != "Running" { + return fmt.Errorf("controller pod in %s status", status) + } + return nil + } + EventuallyWithOffset(1, verifyControllerUp, time.Minute, time.Second).Should(Succeed()) + + }) + }) +}) diff --git a/test/job.yaml b/test/job.yaml new file mode 100644 index 0000000..8f38065 --- /dev/null +++ b/test/job.yaml @@ -0,0 +1,22 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: job +spec: + completions: 2 + parallelism: 2 + completionMode: Indexed + template: + metadata: + labels: + fluxqueue.pod-group: job-1 + fluxqueue.group-size: "5" + spec: + # add fluxqueue? + # schedulerName: fluence + containers: + - name: job + image: busybox + command: [sleep, "10"] + restartPolicy: Never + backoffLimit: 4 diff --git a/test/pod.yaml b/test/pod.yaml new file mode 100644 index 0000000..ee2d0b8 --- /dev/null +++ b/test/pod.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: Pod +metadata: + name: pod +spec: + containers: + - name: pod + image: registry.k8s.io/pause:2.0 diff --git a/test/utils/utils.go b/test/utils/utils.go new file mode 100644 index 0000000..87db878 --- /dev/null +++ b/test/utils/utils.go @@ -0,0 +1,140 @@ +/* +Copyright 2025. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package utils + +import ( + "fmt" + "os" + "os/exec" + "strings" + + . "github.com/onsi/ginkgo/v2" //nolint:golint,revive +) + +const ( + prometheusOperatorVersion = "v0.72.0" + prometheusOperatorURL = "https://github.com/prometheus-operator/prometheus-operator/" + + "releases/download/%s/bundle.yaml" + + certmanagerVersion = "v1.14.4" + certmanagerURLTmpl = "https://github.com/jetstack/cert-manager/releases/download/%s/cert-manager.yaml" +) + +func warnError(err error) { + _, _ = fmt.Fprintf(GinkgoWriter, "warning: %v\n", err) +} + +// InstallPrometheusOperator installs the prometheus Operator to be used to export the enabled metrics. +func InstallPrometheusOperator() error { + url := fmt.Sprintf(prometheusOperatorURL, prometheusOperatorVersion) + cmd := exec.Command("kubectl", "create", "-f", url) + _, err := Run(cmd) + return err +} + +// Run executes the provided command within this context +func Run(cmd *exec.Cmd) ([]byte, error) { + dir, _ := GetProjectDir() + cmd.Dir = dir + + if err := os.Chdir(cmd.Dir); err != nil { + _, _ = fmt.Fprintf(GinkgoWriter, "chdir dir: %s\n", err) + } + + cmd.Env = append(os.Environ(), "GO111MODULE=on") + command := strings.Join(cmd.Args, " ") + _, _ = fmt.Fprintf(GinkgoWriter, "running: %s\n", command) + output, err := cmd.CombinedOutput() + if err != nil { + return output, fmt.Errorf("%s failed with error: (%v) %s", command, err, string(output)) + } + + return output, nil +} + +// UninstallPrometheusOperator uninstalls the prometheus +func UninstallPrometheusOperator() { + url := fmt.Sprintf(prometheusOperatorURL, prometheusOperatorVersion) + cmd := exec.Command("kubectl", "delete", "-f", url) + if _, err := Run(cmd); err != nil { + warnError(err) + } +} + +// UninstallCertManager uninstalls the cert manager +func UninstallCertManager() { + url := fmt.Sprintf(certmanagerURLTmpl, certmanagerVersion) + cmd := exec.Command("kubectl", "delete", "-f", url) + if _, err := Run(cmd); err != nil { + warnError(err) + } +} + +// InstallCertManager installs the cert manager bundle. +func InstallCertManager() error { + url := fmt.Sprintf(certmanagerURLTmpl, certmanagerVersion) + cmd := exec.Command("kubectl", "apply", "-f", url) + if _, err := Run(cmd); err != nil { + return err + } + // Wait for cert-manager-webhook to be ready, which can take time if cert-manager + // was re-installed after uninstalling on a cluster. + cmd = exec.Command("kubectl", "wait", "deployment.apps/cert-manager-webhook", + "--for", "condition=Available", + "--namespace", "cert-manager", + "--timeout", "5m", + ) + + _, err := Run(cmd) + return err +} + +// LoadImageToKindClusterWithName loads a local docker image to the kind cluster +func LoadImageToKindClusterWithName(name string) error { + cluster := "kind" + if v, ok := os.LookupEnv("KIND_CLUSTER"); ok { + cluster = v + } + kindOptions := []string{"load", "docker-image", name, "--name", cluster} + cmd := exec.Command("kind", kindOptions...) + _, err := Run(cmd) + return err +} + +// GetNonEmptyLines converts given command output string into individual objects +// according to line breakers, and ignores the empty elements in it. +func GetNonEmptyLines(output string) []string { + var res []string + elements := strings.Split(output, "\n") + for _, element := range elements { + if element != "" { + res = append(res, element) + } + } + + return res +} + +// GetProjectDir will return the directory where the project is +func GetProjectDir() (string, error) { + wd, err := os.Getwd() + if err != nil { + return wd, err + } + wd = strings.Replace(wd, "/test/e2e", "", -1) + return wd, nil +}