-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: deployment support and ungate as jobs
Ungating a pod can be erroneous if the pod is not listed with the API listing (common for deployments) or there is some other ephemeral api error. To fix this, we can use a task that will retry if it does not work the first time. Signed-off-by: vsoch <vsoch@users.noreply.github.com>
Showing
15 changed files
with
299 additions
and
71 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
apiVersion: apps/v1 | ||
kind: Deployment | ||
metadata: | ||
name: deployment | ||
spec: | ||
replicas: 1 | ||
selector: | ||
matchLabels: | ||
app: deployment | ||
template: | ||
metadata: | ||
labels: | ||
app: deployment | ||
spec: | ||
containers: | ||
- name: container | ||
image: registry.k8s.io/pause:2.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
apiVersion: apps/v1 | ||
kind: Deployment | ||
metadata: | ||
name: deployment | ||
spec: | ||
replicas: 2 | ||
selector: | ||
matchLabels: | ||
app: deployment | ||
template: | ||
metadata: | ||
labels: | ||
app: deployment | ||
spec: | ||
containers: | ||
- name: container | ||
image: registry.k8s.io/pause:2.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
apiVersion: batch/v1 | ||
kind: Job | ||
metadata: | ||
name: job | ||
spec: | ||
completions: 1 | ||
parallelism: 1 | ||
completionMode: Indexed | ||
template: | ||
metadata: | ||
labels: | ||
app: job | ||
spec: | ||
restartPolicy: Never | ||
containers: | ||
- name: job | ||
image: registry.k8s.io/pause:2.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
package workers | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"strings" | ||
|
||
api "github.com/converged-computing/fluxqueue/api/v1alpha1" | ||
"github.com/converged-computing/fluxqueue/pkg/defaults" | ||
"github.com/riverqueue/river" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
patchTypes "k8s.io/apimachinery/pkg/types" | ||
"k8s.io/client-go/kubernetes" | ||
"k8s.io/client-go/rest" | ||
) | ||
|
||
// Ungate workers explicitly ungate pods, and add node labels | ||
func (args UngateArgs) Kind() string { return "ungate" } | ||
|
||
type UngateWorker struct { | ||
river.WorkerDefaults[UngateArgs] | ||
RESTConfig rest.Config | ||
} | ||
|
||
// NewJobWorker returns a new job worker with a Fluxion client | ||
func NewUngateWorker(cfg rest.Config) (*UngateWorker, error) { | ||
worker := UngateWorker{RESTConfig: cfg} | ||
return &worker, nil | ||
} | ||
|
||
// JobArgs serializes a postgres row back into fields for the FluxJob | ||
// We add extra fields to anticipate getting node assignments | ||
type UngateArgs struct { | ||
Name string `json:"name"` | ||
Type string `json:"type"` | ||
Namespace string `json:"namespace"` | ||
Nodes []string `json:"nodes"` | ||
JobID int64 `json:"jobid"` | ||
} | ||
|
||
// Ungate a specific pod for a group (e.g., deployment) | ||
// Right now we aren't using this for single pods (but can/will) | ||
func (w UngateWorker) Work(ctx context.Context, job *river.Job[UngateArgs]) error { | ||
|
||
var err error | ||
wlog.Info("Running ungate worker", "Namespace", job.Args.Namespace, "Name", job.Args.Name) | ||
jobid := fmt.Sprintf("%d", job.Args.JobID) | ||
|
||
client, err := kubernetes.NewForConfig(&w.RESTConfig) | ||
if err != nil { | ||
wlog.Info("Error getting Kubernetes client", "Namespace", job.Args.Namespace, "Name", job.Args.Name, "Error", err) | ||
return err | ||
} | ||
|
||
// Ungate single pod (should only be one) | ||
if job.Args.Type == api.JobWrappedPod.String() { | ||
nodesStr := strings.Join(job.Args.Nodes, "__") | ||
payload := `{"metadata": {"labels": {"` + defaults.NodesLabel + `": "` + nodesStr + `", "` + defaults.FluxJobIdLabel + `": "` + jobid + `"}}}` | ||
_, err = client.CoreV1().Pods(job.Args.Namespace).Patch(ctx, job.Args.Name, patchTypes.MergePatchType, []byte(payload), metav1.PatchOptions{}) | ||
if err != nil { | ||
return err | ||
} | ||
err = removeGate(ctx, client, job.Args.Namespace, job.Args.Name) | ||
if err != nil { | ||
wlog.Info("Error in removing single pod", "Error", err) | ||
return err | ||
} | ||
} | ||
|
||
// For a deployment, we need to get the pods based on a selector | ||
if job.Args.Type == api.JobWrappedDeployment.String() { | ||
selector := fmt.Sprintf("%s=deployment-%s-%s", defaults.SelectorLabel, job.Args.Name, job.Args.Namespace) | ||
|
||
// 4. Get pods in the default namespace | ||
pods, err := client.CoreV1().Pods(job.Args.Namespace).List(ctx, metav1.ListOptions{ | ||
LabelSelector: selector, | ||
}) | ||
wlog.Info("Selector returned pods for nodes", "Pods", len(pods.Items), "Nodes", len(job.Args.Nodes)) | ||
|
||
if err != nil { | ||
wlog.Info("Error listing pods in ungate worker", "Namespace", job.Args.Namespace, "Name", job.Args.Name, "Error", err) | ||
return err | ||
} | ||
// Ungate as many as we are able | ||
for i, pod := range pods.Items { | ||
|
||
// This shouldn't happen | ||
if i >= len(pods.Items) { | ||
wlog.Info("Warning - we have more pods than nodes") | ||
break | ||
} | ||
|
||
// We should not try to ungate (and assign a node) to a pod that | ||
// already has been ungated | ||
ungated := true | ||
if pod.Spec.SchedulingGates != nil { | ||
for _, gate := range pod.Spec.SchedulingGates { | ||
if gate.Name == defaults.SchedulingGateName { | ||
ungated = false | ||
break | ||
} | ||
} | ||
} | ||
if ungated { | ||
continue | ||
} | ||
payload := `{"metadata": {"labels": {"` + defaults.NodesLabel + `": "` + job.Args.Nodes[i] + `", "` + defaults.FluxJobIdLabel + `": "` + jobid + `"}}}` | ||
_, err = client.CoreV1().Pods(job.Args.Namespace).Patch(ctx, pod.ObjectMeta.Name, patchTypes.MergePatchType, []byte(payload), metav1.PatchOptions{}) | ||
if err != nil { | ||
wlog.Info("Error in patching deployment pod", "Error", err) | ||
return err | ||
} | ||
err = removeGate(ctx, client, pod.ObjectMeta.Namespace, pod.ObjectMeta.Name) | ||
if err != nil { | ||
wlog.Info("Error in removing deployment pod gate", "Error", err) | ||
return err | ||
} | ||
} | ||
|
||
// Kubernetes has not created the pod objects yet | ||
// Returning an error will have it run again, with a delay | ||
// https://riverqueue.com/docs/job-retries | ||
if len(pods.Items) < len(job.Args.Nodes) { | ||
return fmt.Errorf("ungate pods job did not have all pods") | ||
} | ||
} | ||
return err | ||
} |