Skip to content

Commit

Permalink
Merge branch 'kubeflow:master' into ocean-spark-v2
Browse files Browse the repository at this point in the history
  • Loading branch information
ImpSy authored Oct 23, 2024
2 parents f02afe1 + 735c7fc commit e763b91
Show file tree
Hide file tree
Showing 28 changed files with 437 additions and 126 deletions.
5 changes: 4 additions & 1 deletion .github/workflows/integration.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,9 @@ jobs:
e2e-test:
runs-on: ubuntu-latest
strategy:
matrix:
k8s_version: [v1.28.13, v1.29.8, v1.30.4, v1.31.1]
steps:
- name: Checkout source code
uses: actions/checkout@v4
Expand All @@ -221,7 +224,7 @@ jobs:
go-version-file: go.mod

- name: Create a Kind cluster
run: make kind-create-cluster
run: make kind-create-cluster KIND_K8S_VERSION=${{ matrix.k8s_version }}

- name: Build and load image to Kind cluster
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/trivy-image-scanning.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
run: make print-IMAGE >> $GITHUB_ENV

- name: trivy scan for github security tab
uses: aquasecurity/trivy-action@0.27.0
uses: aquasecurity/trivy-action@0.28.0
with:
image-ref: '${{ env.IMAGE }}'
format: 'sarif'
Expand Down
8 changes: 7 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ LOCALBIN ?= $(shell pwd)/bin
KUSTOMIZE_VERSION ?= v5.4.1
CONTROLLER_TOOLS_VERSION ?= v0.15.0
KIND_VERSION ?= v0.23.0
KIND_K8S_VERSION ?= v1.29.3
ENVTEST_VERSION ?= release-0.18
# ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary.
ENVTEST_K8S_VERSION ?= 1.29.3
Expand Down Expand Up @@ -249,7 +250,12 @@ endif
.PHONY: kind-create-cluster
kind-create-cluster: kind ## Create a kind cluster for integration tests.
if ! $(KIND) get clusters 2>/dev/null | grep -q "^$(KIND_CLUSTER_NAME)$$"; then \
$(KIND) create cluster --name $(KIND_CLUSTER_NAME) --config $(KIND_CONFIG_FILE) --kubeconfig $(KIND_KUBE_CONFIG) --wait=1m; \
$(KIND) create cluster \
--name $(KIND_CLUSTER_NAME) \
--config $(KIND_CONFIG_FILE) \
--image kindest/node:$(KIND_K8S_VERSION) \
--kubeconfig $(KIND_KUBE_CONFIG) \
--wait=1m; \
fi

.PHONY: kind-load-image
Expand Down
24 changes: 23 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,34 @@
# Kubeflow Spark Operator

[![Integration Test](https://github.com/kubeflow/spark-operator/actions/workflows/integration.yaml/badge.svg)](https://github.com/kubeflow/spark-operator/actions/workflows/integration.yaml)[![Go Report Card](https://goreportcard.com/badge/github.com/kubeflow/spark-operator)](https://goreportcard.com/report/github.com/kubeflow/spark-operator)
[![Integration Test](https://github.com/kubeflow/spark-operator/actions/workflows/integration.yaml/badge.svg)](https://github.com/kubeflow/spark-operator/actions/workflows/integration.yaml)
[![Go Report Card](https://goreportcard.com/badge/github.com/kubeflow/spark-operator)](https://goreportcard.com/report/github.com/kubeflow/spark-operator)
[![GitHub release](https://img.shields.io/github/v/release/kubeflow/spark-operator)](https://github.com/kubeflow/spark-operator/releases)

## What is Spark Operator?

The Kubernetes Operator for Apache Spark aims to make specifying and running [Spark](https://github.com/apache/spark) applications as easy and idiomatic as running other workloads on Kubernetes. It uses
[Kubernetes custom resources](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/) for specifying, running, and surfacing status of Spark applications.

## Quick Start

For a more detailed guide, please refer to the [Getting Started guide](https://www.kubeflow.org/docs/components/spark-operator/getting-started/).

```bash
# Add the Helm repository
helm repo add spark-operator https://kubeflow.github.io/spark-operator
helm repo update

# Install the operator into the spark-operator namespace and wait for deployments to be ready
helm install spark-operator spark-operator/spark-operator \
--namespace spark-operator --create-namespace --wait

# Create an example application in the default namespace
kubectl apply -f https://raw.githubusercontent.com/kubeflow/spark-operator/refs/heads/master/examples/spark-pi.yaml

# Get the status of the application
kubectl get sparkapp spark-pi
```

## Overview

For a complete reference of the custom resource definitions, please refer to the [API Definition](docs/api-docs.md). For details on its design, please refer to the [Architecture](https://www.kubeflow.org/docs/components/spark-operator/overview/#architecture). It requires Spark 2.3 and above that supports Kubernetes as a native scheduler backend.
Expand Down
4 changes: 2 additions & 2 deletions api/v1beta2/scheduledsparkapplication_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,9 @@ type ScheduledSparkApplicationStatus struct {
// ScheduledSparkApplication is the Schema for the scheduledsparkapplications API.
type ScheduledSparkApplication struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`
metav1.ObjectMeta `json:"metadata"`

Spec ScheduledSparkApplicationSpec `json:"spec,omitempty"`
Spec ScheduledSparkApplicationSpec `json:"spec"`
Status ScheduledSparkApplicationStatus `json:"status,omitempty"`
}

Expand Down
13 changes: 6 additions & 7 deletions api/v1beta2/sparkapplication_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ type SparkApplicationSpec struct {
// +optional
MainClass *string `json:"mainClass,omitempty"`
// MainFile is the path to a bundled JAR, Python, or R file of the application.
// +optional
MainApplicationFile *string `json:"mainApplicationFile"`
// Arguments is a list of arguments to be passed to the application.
// +optional
Expand Down Expand Up @@ -187,9 +186,9 @@ type SparkApplicationStatus struct {
// SparkApplication is the Schema for the sparkapplications API
type SparkApplication struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`
metav1.ObjectMeta `json:"metadata"`

Spec SparkApplicationSpec `json:"spec,omitempty"`
Spec SparkApplicationSpec `json:"spec"`
Status SparkApplicationStatus `json:"status,omitempty"`
}

Expand Down Expand Up @@ -280,15 +279,15 @@ type SparkUIConfiguration struct {
// ServicePort allows configuring the port at service level that might be different from the targetPort.
// TargetPort should be the same as the one defined in spark.ui.port
// +optional
ServicePort *int32 `json:"servicePort"`
ServicePort *int32 `json:"servicePort,omitempty"`
// ServicePortName allows configuring the name of the service port.
// This may be useful for sidecar proxies like Envoy injected by Istio which require specific ports names to treat traffic as proper HTTP.
// Defaults to spark-driver-ui-port.
// +optional
ServicePortName *string `json:"servicePortName"`
ServicePortName *string `json:"servicePortName,omitempty"`
// ServiceType allows configuring the type of the service. Defaults to ClusterIP.
// +optional
ServiceType *corev1.ServiceType `json:"serviceType"`
ServiceType *corev1.ServiceType `json:"serviceType,omitempty"`
// ServiceAnnotations is a map of key,value pairs of annotations that might be added to the service object.
// +optional
ServiceAnnotations map[string]string `json:"serviceAnnotations,omitempty"`
Expand All @@ -312,7 +311,7 @@ type DriverIngressConfiguration struct {
ServicePortName *string `json:"servicePortName"`
// ServiceType allows configuring the type of the service. Defaults to ClusterIP.
// +optional
ServiceType *corev1.ServiceType `json:"serviceType"`
ServiceType *corev1.ServiceType `json:"serviceType,omitempty"`
// ServiceAnnotations is a map of key,value pairs of annotations that might be added to the service object.
// +optional
ServiceAnnotations map[string]string `json:"serviceAnnotations,omitempty"`
Expand Down
1 change: 1 addition & 0 deletions charts/spark-operator-chart/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ See [helm uninstall](https://helm.sh/docs/helm/helm_uninstall) for command docum
| controller.uiService.enable | bool | `true` | Specifies whether to create service for Spark web UI. |
| controller.uiIngress.enable | bool | `false` | Specifies whether to create ingress for Spark web UI. `controller.uiService.enable` must be `true` to enable ingress. |
| controller.uiIngress.urlFormat | string | `""` | Ingress URL format. Required if `controller.uiIngress.enable` is true. |
| controller.uiIngress.ingressClassName | string | `""` | Optionally set the ingressClassName. |
| controller.batchScheduler.enable | bool | `false` | Specifies whether to enable batch scheduler for spark jobs scheduling. If enabled, users can specify batch scheduler name in spark application. |
| controller.batchScheduler.kubeSchedulerNames | list | `[]` | Specifies a list of kube-scheduler names for scheduling Spark pods. |
| controller.batchScheduler.default | string | `""` | Default batch scheduler to be used if not specified by the user. If specified, this value must be either "volcano" or "yunikorn". Specifying any other value will cause the controller to error on startup. |
Expand Down
2 changes: 0 additions & 2 deletions charts/spark-operator-chart/ci/kind-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,4 @@ kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
image: kindest/node:v1.29.2
- role: worker
image: kindest/node:v1.29.2
Original file line number Diff line number Diff line change
Expand Up @@ -11573,6 +11573,7 @@ spec:
required:
- driver
- executor
- mainApplicationFile
- sparkVersion
- type
type: object
Expand Down Expand Up @@ -11621,6 +11622,9 @@ spec:
application.
type: string
type: object
required:
- metadata
- spec
type: object
served: true
storage: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11475,6 +11475,7 @@ spec:
required:
- driver
- executor
- mainApplicationFile
- sparkVersion
- type
type: object
Expand Down Expand Up @@ -11555,6 +11556,9 @@ spec:
required:
- driverInfo
type: object
required:
- metadata
- spec
type: object
served: true
storage: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,14 @@ spec:
{{- end }}
{{- end }}
- --controller-threads={{ .Values.controller.workers }}
{{- with .Values.controller.uiService.enable }}
- --enable-ui-service=true
{{- end }}
- --enable-ui-service={{ .Values.controller.uiService.enable }}
{{- if .Values.controller.uiIngress.enable }}
{{- with .Values.controller.uiIngress.urlFormat }}
- --ingress-url-format={{ . }}
{{- end }}
{{- with .Values.controller.uiIngress.ingressClassName }}
- --ingress-class-name={{ . }}
{{- end }}
{{- end }}
{{- if .Values.controller.batchScheduler.enable }}
- --enable-batch-scheduler=true
Expand Down
13 changes: 13 additions & 0 deletions charts/spark-operator-chart/tests/controller/deployment_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,19 @@ tests:
path: spec.template.spec.containers[?(@.name=="spark-operator-controller")].args
content: --ingress-url-format={{$appName}}.example.com/{{$appNamespace}}/{{$appName}}

- it: Should contain `--ingress-class-name` arg if `controller.uiIngress.enable` is set to `true` and `controller.uiIngress.ingressClassName` is set
set:
controller:
uiService:
enable: true
uiIngress:
enable: true
ingressClassName: nginx
asserts:
- contains:
path: spec.template.spec.containers[?(@.name=="spark-operator-controller")].args
content: --ingress-class-name=nginx

- it: Should contain `--enable-batch-scheduler` arg if `controller.batchScheduler.enable` is `true`
set:
controller:
Expand Down
2 changes: 2 additions & 0 deletions charts/spark-operator-chart/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ controller:
# -- Ingress URL format.
# Required if `controller.uiIngress.enable` is true.
urlFormat: ""
# -- Optionally set the ingressClassName.
ingressClassName: ""

batchScheduler:
# -- Specifies whether to enable batch scheduler for spark jobs scheduling.
Expand Down
10 changes: 0 additions & 10 deletions codecov.yaml

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -11573,6 +11573,7 @@ spec:
required:
- driver
- executor
- mainApplicationFile
- sparkVersion
- type
type: object
Expand Down Expand Up @@ -11621,6 +11622,9 @@ spec:
application.
type: string
type: object
required:
- metadata
- spec
type: object
served: true
storage: true
Expand Down
4 changes: 4 additions & 0 deletions config/crd/bases/sparkoperator.k8s.io_sparkapplications.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11475,6 +11475,7 @@ spec:
required:
- driver
- executor
- mainApplicationFile
- sparkVersion
- type
type: object
Expand Down Expand Up @@ -11555,6 +11556,9 @@ spec:
required:
- driverInfo
type: object
required:
- metadata
- spec
type: object
served: true
storage: true
Expand Down
2 changes: 0 additions & 2 deletions docs/api-docs.md
Original file line number Diff line number Diff line change
Expand Up @@ -1913,7 +1913,6 @@ string
</em>
</td>
<td>
<em>(Optional)</em>
<p>MainFile is the path to a bundled JAR, Python, or R file of the application.</p>
</td>
</tr>
Expand Down Expand Up @@ -2355,7 +2354,6 @@ string
</em>
</td>
<td>
<em>(Optional)</em>
<p>MainFile is the path to a bundled JAR, Python, or R file of the application.</p>
</td>
</tr>
Expand Down
32 changes: 16 additions & 16 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,16 @@ module github.com/kubeflow/spark-operator
go 1.23.1

require (
cloud.google.com/go/storage v1.44.0
cloud.google.com/go/storage v1.45.0
github.com/aws/aws-sdk-go-v2 v1.32.2
github.com/aws/aws-sdk-go-v2/config v1.27.43
github.com/aws/aws-sdk-go-v2/service/s3 v1.63.3
github.com/aws/aws-sdk-go-v2/config v1.28.0
github.com/aws/aws-sdk-go-v2/service/s3 v1.66.0
github.com/golang/glog v1.2.2
github.com/google/uuid v1.6.0
github.com/olekukonko/tablewriter v0.0.5
github.com/onsi/ginkgo/v2 v2.20.2
github.com/onsi/gomega v1.34.2
github.com/prometheus/client_golang v1.20.4
github.com/prometheus/client_golang v1.20.5
github.com/robfig/cron/v3 v3.0.1
github.com/spf13/cobra v1.8.1
github.com/spf13/viper v1.19.0
Expand All @@ -21,10 +21,10 @@ require (
gocloud.dev v0.40.0
golang.org/x/net v0.30.0
golang.org/x/time v0.7.0
helm.sh/helm/v3 v3.16.1
k8s.io/api v0.31.0
k8s.io/apiextensions-apiserver v0.31.0
k8s.io/apimachinery v0.31.0
helm.sh/helm/v3 v3.16.2
k8s.io/api v0.31.1
k8s.io/apiextensions-apiserver v0.31.1
k8s.io/apimachinery v0.31.1
k8s.io/client-go v1.5.2
k8s.io/kubernetes v1.30.2
k8s.io/utils v0.0.0-20240711033017-18e509b52bc8
Expand Down Expand Up @@ -56,18 +56,18 @@ require (
github.com/Microsoft/hcsshim v0.12.4 // indirect
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect
github.com/aws/aws-sdk-go v1.55.5 // indirect
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.5 // indirect
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.6 // indirect
github.com/aws/aws-sdk-go-v2/credentials v1.17.41 // indirect
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.17 // indirect
github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.10 // indirect
github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.21 // indirect
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.21 // indirect
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.1 // indirect
github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.18 // indirect
github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.21 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.0 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.3.20 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.4.2 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.2 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.17.18 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.2 // indirect
github.com/aws/aws-sdk-go-v2/service/sso v1.24.2 // indirect
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.2 // indirect
github.com/aws/aws-sdk-go-v2/service/sts v1.32.2 // indirect
Expand Down Expand Up @@ -218,12 +218,12 @@ require (
gopkg.in/ini.v1 v1.67.0 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/apiserver v0.31.0 // indirect
k8s.io/cli-runtime v0.31.0 // indirect
k8s.io/component-base v0.31.0 // indirect
k8s.io/apiserver v0.31.1 // indirect
k8s.io/cli-runtime v0.31.1 // indirect
k8s.io/component-base v0.31.1 // indirect
k8s.io/klog/v2 v2.130.1 // indirect
k8s.io/kube-openapi v0.0.0-20240709000822-3c01b740850f // indirect
k8s.io/kubectl v0.31.0 // indirect
k8s.io/kubectl v0.31.1 // indirect
oras.land/oras-go v1.2.5 // indirect
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
sigs.k8s.io/kustomize/api v0.17.2 // indirect
Expand Down
Loading

0 comments on commit e763b91

Please sign in to comment.