Skip to content

Commit

Permalink
Add e2e for active-standby switching of spidercontroller
Browse files Browse the repository at this point in the history
Signed-off-by: ty-dc <[email protected]>
  • Loading branch information
ty-dc committed Jan 10, 2024
1 parent 7f5a0c6 commit 170fd48
Show file tree
Hide file tree
Showing 6 changed files with 121 additions and 9 deletions.
5 changes: 0 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -488,8 +488,3 @@ lint_chart_trivy:
aquasec/trivy:$(TRIVY_VERSION) config --exit-code 1 --severity $(LINT_TRIVY_SEVERITY_LEVEL) /tmp/src/charts ; \
(($$?==0)) || { echo "error, failed to check chart trivy" && exit 1 ; } ; \
echo "chart trivy check: pass"


.PHONY: build-chart
build-chart:
@ cd charts ; make
2 changes: 1 addition & 1 deletion test/doc/metric.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@

| Case ID | Title | Priority | Smoke | Status | Other |
| ------- | ------------------------------------------------------------ | -------- | ----- | ------ | ----- |
| T00001 | The metric should work fine. | p1 | true | | |
| K00001 | The metric should work fine. | p1 | true | done | |
2 changes: 1 addition & 1 deletion test/doc/reliability.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@
| R00004 | Successfully run a pod when the Spiderpool agent is restarting | p2 | | done | |
| R00005 | Successfully run a pod when the coreDns is restarting | p3 | | done | |
| R00006 | Successfully recover a pod whose original node is power-off | p2 | | done | |
| R00007 | Spiderpool Controller active/standby switching is normal | p2 | | | |
| R00007 | Spiderpool Controller active/standby switching is normal | p2 | | done | |
6 changes: 5 additions & 1 deletion test/e2e/common/constant.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ var ForcedWaitingTime = time.Second
const (
SpiderPoolConfigmapName = "spiderpool-conf"
SpiderPoolConfigmapNameSpace = "kube-system"
SpiderPoolLeases = "spiderpool-controller-leases"
SpiderPoolLeasesNamespace = "kube-system"
)

// Kubeadm configurations
Expand Down Expand Up @@ -114,7 +116,9 @@ const (

// Webhook Port
const (
WebhookPort = "5722"
WebhookPort = "5722"
SpiderControllerMetricsPort = "5721"
SpiderAgentMetricsPort = "5711"
)

func init() {
Expand Down
3 changes: 2 additions & 1 deletion test/e2e/reliability/reliability_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
coordinationv1 "k8s.io/api/coordination/v1"
)

func TestReliability(t *testing.T) {
Expand All @@ -23,7 +24,7 @@ var frame *e2e.Framework
var _ = BeforeSuite(func() {
defer GinkgoRecover()
var e error
frame, e = e2e.NewFramework(GinkgoT(), []func(*runtime.Scheme) error{spiderpool.AddToScheme})
frame, e = e2e.NewFramework(GinkgoT(), []func(*runtime.Scheme) error{spiderpool.AddToScheme, coordinationv1.AddToScheme})
Expect(e).NotTo(HaveOccurred())

})
112 changes: 112 additions & 0 deletions test/e2e/reliability/reliability_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ package reliability_test

import (
"context"
"fmt"
"sync"
"time"

Expand All @@ -13,7 +14,9 @@ import (
"github.com/spidernet-io/spiderpool/pkg/constant"
spiderpool "github.com/spidernet-io/spiderpool/pkg/k8s/apis/spiderpool.spidernet.io/v2beta1"
"github.com/spidernet-io/spiderpool/test/e2e/common"
coordinationv1 "k8s.io/api/coordination/v1"
corev1 "k8s.io/api/core/v1"
apitypes "k8s.io/apimachinery/pkg/types"
)

var _ = Describe("test reliability", Label("reliability"), Serial, func() {
Expand Down Expand Up @@ -232,4 +235,113 @@ var _ = Describe("test reliability", Label("reliability"), Serial, func() {
},
PEntry("Successfully recovery a pod whose original node is power-off", Serial, Label("R00006"), int32(2)),
)

It("Spiderpool Controller active/standby switching is normal", Label("R00007"), func() {

podList, err := frame.GetPodListByLabel(map[string]string{"app.kubernetes.io/component": constant.SpiderpoolController})
Expect(err).NotTo(HaveOccurred())

if len(podList.Items) <= 1 {
Skip("There is only one replicas of spidercontroller, so there is no need to switch between primary and secondary.")
}

spiderControllerLeases, err := getLeases(common.SpiderPoolLeasesNamespace, common.SpiderPoolLeases)
Expect(err).NotTo(HaveOccurred())

leaseMap := make(map[string]bool)
for _, v := range podList.Items {
if *spiderControllerLeases.Spec.HolderIdentity == v.Name {
GinkgoWriter.Printf("the spiderpool-controller current master is: %v \n", *spiderControllerLeases.Spec.HolderIdentity)
leaseMap[v.Name] = true
} else {
leaseMap[v.Name] = false
}
}
GinkgoWriter.Printf("The master-slave information of spidercontroller is as follows: %v", leaseMap)

for m, n := range leaseMap {
if n {
Expect(frame.DeletePod(m, podList.Items[0].Namespace)).NotTo(HaveOccurred())
ctx, cancel := context.WithTimeout(context.Background(), common.PodReStartTimeout)
defer cancel()
err = frame.WaitPodListRunning(podList.Items[0].Labels, len(podList.Items), ctx)
Expect(err).NotTo(HaveOccurred())

Eventually(func() bool {
spiderControllerLeases, err = getLeases(common.SpiderPoolLeasesNamespace, common.SpiderPoolLeases)
if err != nil {
return false
}
if *spiderControllerLeases.Spec.HolderIdentity == m {
// After the Pod is restarted, the master should be re-elected.
return false
}

// When there are 3 or more replicas of a spidercontroller,
// it is impossible to determine which replica is the master.
// But they must be on the map.
if _, ok := leaseMap[*spiderControllerLeases.Spec.HolderIdentity]; !ok {
GinkgoWriter.Printf("lease records a value:%v that does not exist \n", *spiderControllerLeases.Spec.HolderIdentity)
return false
}

GinkgoWriter.Printf("spiderpool-controller master-slave switchover is successful, the current master is: %v \n", *spiderControllerLeases.Spec.HolderIdentity)
return true
}).WithTimeout(time.Minute).WithPolling(time.Second * 3).Should(BeTrue())
}
}
})

It("The metric should work fine.", Label("K00001"), func() {
ctx, cancel := context.WithTimeout(context.Background(), common.PodReStartTimeout)
defer cancel()
Expect(checkMetrics(ctx, common.SpiderControllerMetricsPort)).NotTo(HaveOccurred())
GinkgoWriter.Println("spidercontroller metrics access successful.")

ctx, cancel = context.WithTimeout(context.Background(), common.PodReStartTimeout)
defer cancel()
Expect(checkMetrics(ctx, common.SpiderAgentMetricsPort)).NotTo(HaveOccurred())
GinkgoWriter.Println("spiderAgent metrics access successful.")
})
})

func getLeases(namespace, leaseName string) (*coordinationv1.Lease, error) {
v := apitypes.NamespacedName{Name: leaseName, Namespace: namespace}
existing := &coordinationv1.Lease{}
e := frame.GetResource(v, existing)
if e != nil {
return nil, e
}
return existing, nil
}

func checkMetrics(ctx context.Context, metricsPort string) error {
const metricsRoute = "/metrics"

nodeList, err := frame.GetNodeList()
if err != nil {
return fmt.Errorf("failed to get node information")
}

var metricsHealthyCheck string
if frame.Info.IpV6Enabled && !frame.Info.IpV4Enabled {
metricsHealthyCheck = fmt.Sprintf("curl -I -m 1 -g [%s]:%s%s --insecure", nodeList.Items[0].Status.Addresses[0].Address, metricsPort, metricsRoute)
} else {
metricsHealthyCheck = fmt.Sprintf("curl -I -m 1 %s:%s%s --insecure", nodeList.Items[0].Status.Addresses[0].Address, metricsPort, metricsRoute)
}

for {
select {
case <-ctx.Done():
return fmt.Errorf("timeout waiting for metrics Healthy Check to be ready")
default:
out, err := frame.DockerExecCommand(ctx, nodeList.Items[0].Name, metricsHealthyCheck)
if err != nil {
time.Sleep(common.ForcedWaitingTime)
frame.Log("failed to check metrics healthy, error: %v, output log is: %v ", err, string(out))
continue
}
return nil
}
}
}

0 comments on commit 170fd48

Please sign in to comment.