diff --git a/cce-network-v2/VERSION b/cce-network-v2/VERSION index 719c84e..e9548af 100644 --- a/cce-network-v2/VERSION +++ b/cce-network-v2/VERSION @@ -1 +1 @@ -2.12.14 +2.12.15 diff --git a/cce-network-v2/deploy/cce-network-v2-2.12.tar.gz b/cce-network-v2/deploy/cce-network-v2-2.12.tar.gz index a3eba7f..e6e1cf7 100644 Binary files a/cce-network-v2/deploy/cce-network-v2-2.12.tar.gz and b/cce-network-v2/deploy/cce-network-v2-2.12.tar.gz differ diff --git a/cce-network-v2/deploy/cce-network-v2/Chart.yaml b/cce-network-v2/deploy/cce-network-v2/Chart.yaml index cd31226..1050453 100644 --- a/cce-network-v2/deploy/cce-network-v2/Chart.yaml +++ b/cce-network-v2/deploy/cce-network-v2/Chart.yaml @@ -15,10 +15,10 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 2.12.14 +version: 2.12.15 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "2.12.14" +appVersion: "2.12.15" diff --git a/cce-network-v2/docs/release.md b/cce-network-v2/docs/release.md index 1b4f87d..de0c544 100644 --- a/cce-network-v2/docs/release.md +++ b/cce-network-v2/docs/release.md @@ -8,6 +8,12 @@ v2 版本新架构,支持VPC-ENI 辅助IP和vpc路由。版本发布历史如 3. 增加节点网络配置集功能 NetResourceConfigSet,支持指定节点独立配置网络资源。 4. 增加对 HPAS 实例的支持 +#### 2.12.15 [20250227] +1. [Optimize] 优化 NetworkresourceSet.Spec.Addresses 的添加规则,避免重复添加节点 IP +2. [Bug] 修复 VPC-ENI 模式下,访问 ENI 接口失败时,误将 ENI 对象的 VPCStatus 标记为 None 的问题 +3. [Optimize] 创建/lib/systemd/network/98-default.link文件,监控并持续维持其macAddressPolicy为None,解决该参数被意外修改后导致 veth 的 mac 地址被非预期变更的问题 +4. [Bug] 修复 VPC-ENI 模式下,弹性网卡预挂载 eni-pre-allocate-num 配置在等于 eniQuota 时数量少一个的问题,增大重试次数避免 ENI 串行创建失败的问题 + #### 2.12.14 [20250213] 1. [Bug] 修复 VPC-ENI 模式下的 remove ENI finalizer 更新逻辑,解决因节点删除时 ENI finalizer 未清理导致 ENI 对象残留的问题 2. [Bug] 修复 VPC-ENI 模式下,因启动时序导致在 cce-network-operator 启动过程中, PSTS 固定 IP 跨节点分配时可能导致的 panic 问题 diff --git a/cce-network-v2/pkg/bce/bcesync/eni.go b/cce-network-v2/pkg/bce/bcesync/eni.go index 593021b..9b111ae 100644 --- a/cce-network-v2/pkg/bce/bcesync/eni.go +++ b/cce-network-v2/pkg/bce/bcesync/eni.go @@ -384,7 +384,7 @@ func (esm *eniStateMachine) start() error { esm.scopeLog.Info("update eni spec success") } else { _, err = esm.es.remoteSyncer.statENI(esm.ctx, esm.resource.Name) - if err != nil { + if cloud.IsErrorReasonNoSuchObject(err) { esm.scopeLog.Infof("eni state machine failed to get inuse eni(%s): %v", esm.resource.Name, err) (&esm.resource.Status).AppendVPCStatus(ccev2.VPCENIStatusNone) // update spec @@ -395,6 +395,8 @@ func (esm *eniStateMachine) start() error { } esm.scopeLog.Info("update eni spec success") return nil + } else if err != nil { + return fmt.Errorf("eni state machine failed to refresh eni(%s) status: %v", esm.resource.Name, err) } } } else if esm.resource.Status.VPCStatus != ccev2.VPCENIStatusDeleted { @@ -404,7 +406,6 @@ func (esm *eniStateMachine) start() error { // eni not found, will delete it which not inuse log.WithField("eniID", esm.resource.Name).Error("not inuse eni not found in vpc, will delete it") return esm.deleteENI() - } else if err != nil { return fmt.Errorf("eni state machine failed to refresh eni(%s) status: %v", esm.resource.Name, err) } diff --git a/cce-network-v2/pkg/bce/vpceni/node_super.go b/cce-network-v2/pkg/bce/vpceni/node_super.go index 85d1973..3241e40 100644 --- a/cce-network-v2/pkg/bce/vpceni/node_super.go +++ b/cce-network-v2/pkg/bce/vpceni/node_super.go @@ -212,6 +212,7 @@ func (n *bceNetworkResourceSet) getENIQuota() ENIQuotaManager { if n.k8sObj.Annotations != nil && n.k8sObj.Annotations[k8s.AnnotationIPResourceCapacitySynced] != "" { lastResyncTime := n.k8sObj.Annotations[k8s.AnnotationIPResourceCapacitySynced] t, err := time.Parse(time.RFC3339, lastResyncTime) + // if the last resync time is not set or expired one day ago, go to slow path if err != nil || t.Add(DayDuration).Before(time.Now()) { goto slowPath } @@ -549,13 +550,14 @@ func (n *bceNetworkResourceSet) CreateInterface(ctx context.Context, allocation inums, msg, err := n.real.createInterface(ctx, allocation, scopedLog) n.creatingEni.add(-1) - preAllocateENINum := math.IntMin(eniQuota.GetMaxENI()-1, n.k8sObj.Spec.ENI.PreAllocateENI) + preAllocateENINum := math.IntMin(eniQuota.GetMaxENI(), n.k8sObj.Spec.ENI.PreAllocateENI) preAllocateENINum = preAllocateENINum - availableENICount - 1 + retryTimes := preAllocateENINum * 2 for i := 0; i < preAllocateENINum; i++ { inums++ go func() { retry := 0 - for retry < preAllocateENINum { + for retry < retryTimes { n.creatingEni.add(1) _, _, e := n.real.createInterface(ctx, allocation, scopedLog) n.creatingEni.add(-1) @@ -563,7 +565,7 @@ func (n *bceNetworkResourceSet) CreateInterface(ctx context.Context, allocation scopedLog.Infof("create addition interface success") return } - scopedLog.WithError(e).Errorf("create addition interface failed, retry later for %ds(%d/%d)", retryDelay, retry+1, preAllocateENINum) + scopedLog.WithError(e).Warnf("create addition interface failed, retry later for %ds(%d/%d)", retryDelay, retry+1, retryTimes) retry++ // attaching ENI is need to wait serveral seconds (<15s) for the ENI to be attached, // so we can retry preAllocateENINum times for every retryDelay(15s) seconds later. @@ -824,6 +826,7 @@ func (n *bceNetworkResourceSet) AllocateIPs(ctx context.Context, allocation *ipa return err } else { // if partial success, we will continue to allocate + // DO NOT EDIT HERE!Because the actual allocation is indeed not this much, but it can guarantee exiting the loop. ipv4ToAllocate -= ipv4PaticalToAllocate ipv6ToAllocate -= ipv6PaticalToAllocate continue diff --git a/cce-network-v2/pkg/ipam/net_resource.go b/cce-network-v2/pkg/ipam/net_resource.go index de82be8..9314ee8 100644 --- a/cce-network-v2/pkg/ipam/net_resource.go +++ b/cce-network-v2/pkg/ipam/net_resource.go @@ -619,6 +619,7 @@ func (n *NetResource) determineMaintenanceAction() (*maintenanceAction, error) { // Validate that the node still requires addresses to be released, the // request may have been resolved in the meantime. // we will disable the release of excess IPs for burstable ENI mode. + // getMaxIPBurstableIPCount() == 0 meanes that we are not in burstable ENI mode. if n.manager.releaseExcessIPs && stats.ExcessIPs > 0 && n.getMaxIPBurstableIPCount() == 0 { a.release = n.ops.PrepareIPRelease(stats.ExcessIPs, scopedLog) return a, nil diff --git a/cce-network-v2/pkg/nodediscovery/nodediscovery.go b/cce-network-v2/pkg/nodediscovery/nodediscovery.go index 81eac1e..afa552c 100644 --- a/cce-network-v2/pkg/nodediscovery/nodediscovery.go +++ b/cce-network-v2/pkg/nodediscovery/nodediscovery.go @@ -227,10 +227,7 @@ func (n *NodeDiscovery) updateLocalNode() { if err != nil { log.WithError(err).Fatal("Unable to detect OS distribution") } - err = release.HostOS().DisableAndMonitorMacPersistant() - if err != nil { - log.WithError(err).Fatal("Unable to disable mac persist") - } + _ = release.HostOS().DisableAndMonitorMacPersistant() if k8s.IsEnabled() { // CRD IPAM endpoint restoration depends on the completion of this @@ -322,6 +319,9 @@ func (n *NodeDiscovery) UpdateNetResourceSetResource() { } func (n *NodeDiscovery) mutateNodeResource(nodeResource *ccev2.NetResourceSet) error { + // reset NetworkresourceSet.Spec.Addresses to avoid stale data + nodeResource.Spec.Addresses = []ccev2.NodeAddress{} + // If we are unable to fetch the K8s Node resource and the NetResourceSet does // not have an OwnerReference set, then somehow we are running in an // environment where only the NetResourceSet exists. Do not proceed as this is diff --git a/cce-network-v2/pkg/os/systemd_networkd.go b/cce-network-v2/pkg/os/systemd_networkd.go index 4b13183..9231006 100644 --- a/cce-network-v2/pkg/os/systemd_networkd.go +++ b/cce-network-v2/pkg/os/systemd_networkd.go @@ -3,6 +3,7 @@ package os import ( "fmt" "io" + "io/ioutil" "os" "os/exec" @@ -11,19 +12,19 @@ import ( const ( usrPath = "/usr-host" - defaultLinkPath = usrPath + "/lib/systemd/network/99-default.link" + defaultLinkPath = usrPath + "/lib/systemd/network/98-default.link" macAddressPolicyKey = "MACAddressPolicy" macAddressPolicyValueNone = "none" defaultLinkTemplate = ` - [Match] - OriginalName=* - - [Link] - NamePolicy=keep kernel database onboard slot path - AlternativeNamesPolicy=database onboard slot path - MACAddressPolicy=none - ` +[Match] +OriginalName=* + +[Link] +NamePolicy=keep kernel database onboard slot path +AlternativeNamesPolicy=database onboard slot path +MACAddressPolicy=none +` ) func UpdateSystemdConfigOption(linkPath, key, value string) error { @@ -94,3 +95,19 @@ func CheckIfLinkOptionConfigured(linkPath, key, value string) (bool, error) { return false, fmt.Errorf("failed to find %s in %s, the key is missing", key, linkPath) } + +// PrintFileContent reads the entire content of the specified file into a string, and then prints it +func PrintFileContent(filename string) error { + log.Info(filename, " file content:") + // Read the entire file + data, err := ioutil.ReadFile(filename) + if err != nil { + return fmt.Errorf("unable to read file: %v", err) + } + + // Convert the data to a string and print it + content := string(data) + log.Info(content) + + return nil +} diff --git a/cce-network-v2/pkg/os/systemd_networkd_test.go b/cce-network-v2/pkg/os/systemd_networkd_test.go index d05ba4c..fb0758e 100644 --- a/cce-network-v2/pkg/os/systemd_networkd_test.go +++ b/cce-network-v2/pkg/os/systemd_networkd_test.go @@ -1,6 +1,8 @@ package os import ( + "io/ioutil" + "os" "testing" "github.com/baidubce/baiducloud-cce-cni-driver/cce-network-v2/test/testdata" @@ -32,3 +34,104 @@ func TestUpdateSystemdConfigOption(t *testing.T) { UpdateSystemdConfigOption(tt.args.linkPath, tt.args.key, tt.args.value) } } + +func TestPrintFileContent(t *testing.T) { + type args struct { + filename string + } + tests := []struct { + name string + args args + wantErr bool + }{ + { + name: "File exists and can be read", + args: args{ + filename: "testfile.txt", + }, + wantErr: false, + }, + { + name: "File does not exist", + args: args{ + filename: "nonexistent.txt", + }, + wantErr: true, + }, + } + + // Create a temporary valid file for the test + tempFileContent := "Hello, World!" + tempFile, err := ioutil.TempFile("", "testfile.txt") + if err != nil { + t.Fatal(err) + } + defer os.Remove(tempFile.Name()) // Clean up the file afterwards + + if _, err := tempFile.WriteString(tempFileContent); err != nil { + t.Fatal(err) + } + tempFile.Close() + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var err error + if tt.args.filename == "testfile.txt" { + err = PrintFileContent(tempFile.Name()) + } else { + err = PrintFileContent(tt.args.filename) + } + + if (err != nil) != tt.wantErr { + t.Errorf("PrintFileContent() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} + +func TestCheckIfLinkOptionConfigured(t *testing.T) { + type args struct { + linkPath string + key string + value string + } + tests := []struct { + name string + args args + want bool + wantErr bool + }{ + { + name: "check systemd config option", + args: args{ + linkPath: testdata.Path("os/ubuntu/systemd/default.link"), + key: macAddressPolicyKey, + value: "none", + }, + want: true, + wantErr: false, + }, + { + name: "file does not exist", + args: args{ + linkPath: testdata.Path(""), + key: macAddressPolicyKey, + value: "none", + }, + want: false, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := CheckIfLinkOptionConfigured(tt.args.linkPath, tt.args.key, tt.args.value) + if (err != nil) != tt.wantErr { + t.Errorf("CheckIfLinkOptionConfigured() error = %v, wantErr %v", err, tt.wantErr) + return + } + if got != tt.want { + t.Errorf("CheckIfLinkOptionConfigured() got = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/cce-network-v2/pkg/os/ubuntu.go b/cce-network-v2/pkg/os/ubuntu.go index 739f885..b8beb32 100644 --- a/cce-network-v2/pkg/os/ubuntu.go +++ b/cce-network-v2/pkg/os/ubuntu.go @@ -42,11 +42,6 @@ func (o *ubuntuOS) DisableAndMonitorMacPersistant() error { log.Info("not ubuntu 22.04, skip disable mac persistent") return nil } - err := o.overrideSystemdDefaultLinkConfig() - if err != nil { - log.Errorf("failed to disable mac persistent, ignored os policy: %v", err) - } - go o.startWatchingDefaultLinkFile() return nil @@ -79,24 +74,29 @@ func (o *ubuntuOS) overrideSystemdDefaultLinkConfig() error { } } -// startWatchingDefaultLinkFile starts an goroutine that continues to watche the default link file, -// when the option "MACAddressPolicy" is changed from "none", -// which was configured by updateLocalNode() in StartDiscovery(), -// the condition "networkUnavailable" will be turned into "true" on node. +// startWatchingDefaultLinkFile starts watching the default link file for any changes. +// This function is specific to Ubuntu 22.04. +// It first creates a new file system watcher and adds the default link file to the watcher. +// If there are any write or remove operations on the file, appropriate actions are taken. +// On a write operation, it logs the event and checks and deals with the MAC address policy. +// On a remove operation, it removes the watcher for the deleted file, recreates the file if it does not exist, +// restarts the systemd-udevd, adds the new file to the watcher, and then checks and deals with the MAC address policy again. func (o *ubuntuOS) startWatchingDefaultLinkFile() { log.Info("ubuntu 22.04 detected, start to watch default link file") + // ensure the file was created correctly + o.checkAndDealMACAddressPolicy(defaultLinkPath, defaultLinkTemplate) + // Create a new file system watcher watcher, err := fsnotify.NewWatcher() if err != nil { - log.Error("Unable to create watcher for default link file") - return + log.Fatal("Unable to create watcher for default link file") } defer watcher.Close() + // Add the default link file to the watcher err = watcher.Add(defaultLinkPath) if err != nil { - log.Errorf("watcher failed to watch file at: %s", defaultLinkPath) - return + log.Fatalf("watcher failed to watch file at: %s", defaultLinkPath) } log.Infof("start to watch default link file: %s", defaultLinkPath) @@ -109,45 +109,53 @@ func (o *ubuntuOS) startWatchingDefaultLinkFile() { if event.Op&fsnotify.Write == fsnotify.Write { log.Infof("event: 'default link file overwritten' is watched") - o.checkAndDealMACAddressPolicy() + o.checkAndDealMACAddressPolicy(defaultLinkPath, defaultLinkTemplate) } if event.Op&fsnotify.Remove == fsnotify.Remove { log.Infof("event: 'default link file removed' is watched") - _, err := os.Open(defaultLinkPath) - if os.IsNotExist(err) { - err = os.WriteFile(defaultLinkPath, []byte(defaultLinkTemplate), 0644) - if err != nil { - log.Errorf("write default link file %s failed: %v", defaultLinkPath, err) - return - } - log.Infof("write default link file %s success", defaultLinkPath) - log.Infof("restart systemd-udevd") - // restart systemd-udevd - exec.Command("nsenter", "-m", "-u", "-t", "1", "systemctl", "restart", "systemd-udevd").Run() + + // Remove the watcher for the deleted file + watcher.Remove(defaultLinkPath) + + // ensure the file was created correctly + o.checkAndDealMACAddressPolicy(defaultLinkPath, defaultLinkTemplate) + // Add the new file to the watcher + err = watcher.Add(defaultLinkPath) + if err != nil { + log.Fatalf("watcher failed to re-watch file at: %s", defaultLinkPath) } + log.Infof("re-watching default link file: %s", defaultLinkPath) } case err, ok := <-watcher.Errors: if !ok { - return + log.Fatalf("Watcher error channel closed") } - log.WithError(err).Errorf("watcher error: %s", err) + log.WithError(err).Fatal("Watcher encountered an error") } } } -func (o *ubuntuOS) checkAndDealMACAddressPolicy() { - - isConfigured, err := CheckIfLinkOptionConfigured(defaultLinkPath, macAddressPolicyKey, macAddressPolicyValueNone) +// checkAndDealMACAddressPolicy ensures that the MAC address policy in the default link file is correctly set. +// If the policy is incorrect or the file does not exist, it attempts to correct it by writing the default configuration. +func (o *ubuntuOS) checkAndDealMACAddressPolicy(filePath, template string) { + if err := PrintFileContent(filePath); err != nil { + log.Warnf("PrintFileContent error: %v, try to recreate it", err) + // If printing fails, write the default template to the file. + err = os.WriteFile(filePath, []byte(template), 0644) + if err != nil { + log.Fatalf("write default link file %s failed: %v", filePath, err) + } + } + isConfigured, err := CheckIfLinkOptionConfigured(filePath, macAddressPolicyKey, macAddressPolicyValueNone) if isConfigured { log.Infof("default link file option %s is still as expected: %s", macAddressPolicyKey, macAddressPolicyValueNone) return } log.Warningf("default link file option %s go wrong, reason: %s, now try to set it back", macAddressPolicyKey, err) - err = UpdateSystemdConfigOption(defaultLinkPath, macAddressPolicyKey, macAddressPolicyValueNone) + err = UpdateSystemdConfigOption(filePath, macAddressPolicyKey, macAddressPolicyValueNone) if err != nil { - log.Errorf("update default link file %s failed: %v", defaultLinkPath, err) - return + log.Fatalf("update default link file %s failed: %v", filePath, err) } log.Infof("update default link file success,now option %s is set correctly to %s", macAddressPolicyKey, macAddressPolicyValueNone) } diff --git a/cce-network-v2/pkg/os/ubuntu_test.go b/cce-network-v2/pkg/os/ubuntu_test.go new file mode 100644 index 0000000..710bbae --- /dev/null +++ b/cce-network-v2/pkg/os/ubuntu_test.go @@ -0,0 +1,35 @@ +package os + +import ( + "os" + "testing" +) + +func TestCheckAndDealMACAddressPolicy_FileNotExists(t *testing.T) { + // Override global variables with local variables for this test + defaultLinkPath := "/tmp/test_link.conf" + defaultLinkTemplate := "[Match]\nName=eth0\n\n[Link]\nMACAddressPolicy=none\n" + + // Ensure the test file does not exist + os.Remove(defaultLinkPath) + + o := &ubuntuOS{} + o.checkAndDealMACAddressPolicy(defaultLinkPath, defaultLinkTemplate) + + // Check if the file was created + if _, err := os.Stat(defaultLinkPath); os.IsNotExist(err) { + t.Errorf("Expected file %s to be created, but it does not exist", defaultLinkPath) + } + + // Check if the file content matches the expected template + content, err := os.ReadFile(defaultLinkPath) + if err != nil { + t.Fatalf("Failed to read file %s: %v", defaultLinkPath, err) + } + if string(content) != defaultLinkTemplate { + t.Errorf("Expected file content:\n%s\nBut got:\n%s", defaultLinkTemplate, string(content)) + } + + // Clean up the test file + os.Remove(defaultLinkPath) +}