diff --git a/cce-network-v2/VERSION b/cce-network-v2/VERSION index ae93586..c6ec532 100644 --- a/cce-network-v2/VERSION +++ b/cce-network-v2/VERSION @@ -1 +1 @@ -2.12.7 +2.12.8 diff --git a/cce-network-v2/docs/release.md b/cce-network-v2/docs/release.md index 023e4c7..0d3e6f6 100644 --- a/cce-network-v2/docs/release.md +++ b/cce-network-v2/docs/release.md @@ -7,6 +7,13 @@ v2 版本新架构,支持VPC-ENI 辅助IP和vpc路由。版本发布历史如 2. 增加 eni 安全组同步功能, 保持CCE ENI 和节点安全组同步。 3. 增加节点网络配置集功能 NetResourceConfigSet,支持指定节点独立配置网络资源。 +#### 2.12.8 [20240924] +1. [Bug] 增加 eni 主 IP 获取流程,避免新节点缺少主 IP 无法就绪的问题 +2. [Bug] 增加 EBC 主网卡 IP 查询流程,避免新节点缺少主 IP 无法就绪的问题 +3. [Optimize] 增加 restore IP 流程的严格模式,严格模式下如果无法 restore IP 时,触发 agent 重启 +4. [Optimize] 优化 IP 借用规则,当主网卡已有辅助 IP 时,减少借用 IP 地址的数量 +5. [Bug] 修复 EBC 主网卡重复创建 ENI 的问题 + #### 2.12.7 [20240923] 1. [Optimize] psts 增加对 cep ttl 未过期时直接移除 Node 导致 cep 后续 ttl 过期后因无对应 eni 而无法正常删除时的清理逻辑 2. [Optimize] 增加 ENI 同步时不一致信息的差异对比日志,方便出现 ENI 数据不一致时排查问题 @@ -61,6 +68,9 @@ v2 版本新架构,支持VPC-ENI 辅助IP和vpc路由。版本发布历史如 新特性功能: 1. 新特性:容器内支持分配 RDMA 子网卡及 RDMA 辅助IP。 +#### 2.11.6 [20240924] +1. [Bug] 修复 ENI 同步不支持 EHC 的问题 + #### 2.11.5 [20240920] 1. [Optimize] 增加 ENI 同步时不一致信息的差异对比日志,方便出现 ENI 数据不一致时排查问题 2. [Optimize] 去掉 ERI 的独立同步逻辑,复用 ERI 和 ENI 的同步流程 diff --git a/cce-network-v2/pkg/bce/bcesync/eni.go b/cce-network-v2/pkg/bce/bcesync/eni.go index eba48dc..8d96a62 100644 --- a/cce-network-v2/pkg/bce/bcesync/eni.go +++ b/cce-network-v2/pkg/bce/bcesync/eni.go @@ -12,6 +12,7 @@ import ( "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/tools/record" + "github.com/baidubce/baiducloud-cce-cni-driver/cce-network-v2/api/v1/models" operatorOption "github.com/baidubce/baiducloud-cce-cni-driver/cce-network-v2/operator/option" "github.com/baidubce/baiducloud-cce-cni-driver/cce-network-v2/pkg/bce/api/cloud" "github.com/baidubce/baiducloud-cce-cni-driver/cce-network-v2/pkg/bce/api/eni" @@ -89,10 +90,11 @@ func (es *VPCENISyncerRouter) StartENISyncer(ctx context.Context, updater syncer // Create implements syncer.ENIEventHandler func (es *VPCENISyncerRouter) Create(resource *ccev2.ENI) error { types := resource.Spec.Type - if types == ccev2.ENIForBCC { - return es.eni.Create(resource) + if types == ccev2.ENIForBBC || types == ccev2.ENIForHPC || types == ccev2.ENIForERI { + return nil } - return nil + + return es.eni.Create(resource) } // Delete implements syncer.ENIEventHandler @@ -108,10 +110,11 @@ func (es *VPCENISyncerRouter) ResyncENI(ctx context.Context) time.Duration { // Update implements syncer.ENIEventHandler func (es *VPCENISyncerRouter) Update(resource *ccev2.ENI) error { types := resource.Spec.Type - if types == ccev2.ENIForBCC { - return es.eni.Update(resource) + if types == ccev2.ENIForBBC || types == ccev2.ENIForHPC || types == ccev2.ENIForERI { + return nil } - return nil + + return es.eni.Update(resource) } var ( @@ -216,6 +219,7 @@ func (es *eniSyncher) handleENIUpdate(resource *ccev2.ENI, scopeLog *logrus.Entr es: es, ctx: ctx, resource: newObj, + scopeLog: scopeLog, } err = machine.start() @@ -316,12 +320,39 @@ type eniStateMachine struct { ctx context.Context resource *ccev2.ENI vpceni *eni.Eni + scopeLog *logrus.Entry } // Start state machine flow func (esm *eniStateMachine) start() error { var err error - if esm.resource.Status.VPCStatus != ccev2.VPCENIStatusInuse && esm.resource.Status.VPCStatus != ccev2.VPCENIStatusDeleted { + if esm.resource.Status.VPCStatus == ccev2.VPCENIStatusInuse { + if len(esm.resource.Spec.PrivateIPSet) == 0 { + esm.vpceni, err = esm.es.remoteSyncer.statENI(esm.ctx, esm.resource.Name) + if err != nil { + return fmt.Errorf("eni state machine failed to get inuse eni(%s): %v", esm.resource.Name, err) + } + esm.resource.Spec.ENI.ID = esm.vpceni.EniId + esm.resource.Spec.ENI.Name = esm.vpceni.Name + esm.resource.Spec.ENI.MacAddress = esm.vpceni.MacAddress + esm.resource.Spec.ENI.SecurityGroupIds = esm.vpceni.SecurityGroupIds + esm.resource.Spec.ENI.EnterpriseSecurityGroupIds = esm.vpceni.EnterpriseSecurityGroupIds + esm.resource.Spec.ENI.Description = esm.vpceni.Description + esm.resource.Spec.ENI.VpcID = esm.vpceni.VpcId + esm.resource.Spec.ENI.ZoneName = esm.vpceni.ZoneName + esm.resource.Spec.ENI.SubnetID = esm.vpceni.SubnetId + esm.resource.Spec.ENI.PrivateIPSet = toModelPrivateIP(esm.vpceni.PrivateIpSet, esm.vpceni.VpcId, esm.vpceni.SubnetId) + esm.resource.Spec.ENI.IPV6PrivateIPSet = toModelPrivateIP(esm.vpceni.Ipv6PrivateIpSet, esm.vpceni.VpcId, esm.vpceni.SubnetId) + ElectENIIPv6PrimaryIP(esm.resource) + // update spec + _, updateError := esm.es.updater.Update(esm.resource) + if updateError != nil { + esm.scopeLog.WithError(updateError).Error("update eni spec failed") + return updateError + } + esm.scopeLog.Info("update eni spec success") + } + } else if esm.resource.Status.VPCStatus != ccev2.VPCENIStatusDeleted { // refresh status of ENI esm.vpceni, err = esm.es.remoteSyncer.statENI(esm.ctx, esm.resource.Name) if cloud.IsErrorReasonNoSuchObject(err) { @@ -456,3 +487,18 @@ func ElectENIIPv6PrimaryIP(newObj *ccev2.ENI) { } } } + +// toModelPrivateIP convert private ip to model +func toModelPrivateIP(ipset []enisdk.PrivateIp, vpcID, subnetID string) []*models.PrivateIP { + var pIPSet []*models.PrivateIP + for _, pip := range ipset { + newPIP := &models.PrivateIP{ + PublicIPAddress: pip.PublicIpAddress, + PrivateIPAddress: pip.PrivateIpAddress, + Primary: pip.Primary, + } + newPIP.SubnetID = SearchSubnetID(vpcID, subnetID, pip.PrivateIpAddress) + pIPSet = append(pIPSet, newPIP) + } + return pIPSet +} diff --git a/cce-network-v2/pkg/bce/vpceni/node_ebc.go b/cce-network-v2/pkg/bce/vpceni/node_ebc.go index effd2f1..c499382 100644 --- a/cce-network-v2/pkg/bce/vpceni/node_ebc.go +++ b/cce-network-v2/pkg/bce/vpceni/node_ebc.go @@ -146,6 +146,17 @@ func (n *ebcNetworkResourceSet) createPrimaryENIOnCluster(ctx context.Context, s Type: ccev2.ENIType(resource.Spec.ENI.InstanceType), }, } + + // use bcc api to get primary ips of primary ENI + for _, nicip := range bccInfo.NicInfo.Ips { + eni.Spec.ENI.PrivateIPSet = append(eni.Spec.ENI.PrivateIPSet, &models.PrivateIP{ + SubnetID: bccInfo.NicInfo.SubnetId, + Primary: nicip.Primary == "true", + PrivateIPAddress: nicip.PrivateIp, + PublicIPAddress: nicip.Eip, + }) + } + eni, err = k8s.CCEClient().CceV2().ENIs().Create(ctx, eni, metav1.CreateOptions{}) if err != nil { scopedLog.Errorf("failed to create primary ENI %s with secondary IP: %v", eni.Name, err) @@ -160,12 +171,6 @@ func (n *ebcNetworkResourceSet) createPrimaryENIOnCluster(ctx context.Context, s return err } - _, err = k8s.CCEClient().CceV2().ENIs().Create(ctx, eni, metav1.CreateOptions{}) - if err != nil { - scopedLog.Errorf("failed to create primary ENI %s with secondary IP: %v", eni.Name, err) - return fmt.Errorf("failed to create primary ENI %s on k8s", eni.Name) - } - if eni.Status.VPCStatus != ccev2.VPCENIStatusInuse { (&eni.Status).AppendVPCStatus(ccev2.VPCENIStatusInuse) _, err = k8s.CCEClient().CceV2().ENIs().UpdateStatus(ctx, eni, metav1.UpdateOptions{}) diff --git a/cce-network-v2/pkg/bce/vpceni/node_super.go b/cce-network-v2/pkg/bce/vpceni/node_super.go index 4cf4412..379a24a 100644 --- a/cce-network-v2/pkg/bce/vpceni/node_super.go +++ b/cce-network-v2/pkg/bce/vpceni/node_super.go @@ -1401,7 +1401,7 @@ func (n *bceNetworkResourceSet) tryBorrowIPs(newENI *ccev2.ENI) error { var ( maxAllocateIPs = n.GetMaximumAllocatableIPv4() - n.getAvailableIPv4() - quotaIPs = n.getENIQuota().GetMaxIP() + quotaIPs = n.getENIQuota().GetMaxIP() - len(newENI.Spec.PrivateIPSet) toBorrowIps = quotaIPs ) if maxAllocateIPs < quotaIPs { diff --git a/cce-network-v2/pkg/endpoint/agent_endpoint_allocator.go b/cce-network-v2/pkg/endpoint/agent_endpoint_allocator.go index ad6bf88..9d43c40 100644 --- a/cce-network-v2/pkg/endpoint/agent_endpoint_allocator.go +++ b/cce-network-v2/pkg/endpoint/agent_endpoint_allocator.go @@ -451,7 +451,39 @@ func (e *EndpointAllocator) Restore() { for _, ip := range ips { _, err = e.dynamicIPAM.AllocateIPWithoutSyncUpstream(net.ParseIP(ip), ep.Namespace+"/"+ep.Name) if err != nil { - epLog.WithError(err).Error("AllocateIPWithoutSyncUpstream error") + epLog.WithError(err).Warnf("failed to restore ip %s, strict inspection mode will be activated", ip) + pod, err := e.podClient.Get(ep.Namespace, ep.Name) + if err == nil { + if pod.Status.Phase != corev1.PodRunning && pod.Status.Phase != corev1.PodPending { + epLog.Infof("pod is not running or pending, try to delete expired endpoint") + e.tryDeleteEndpointAfterPodDeleted(ep, true, epLog) + continue + } + if ep.Spec.ExternalIdentifiers == nil || ep.Spec.ExternalIdentifiers.K8sObjectID != string(pod.UID) { + epLog.Infof("externalIdentifiers is not equal to pod uid, try to delete expired endpoint") + e.tryDeleteEndpointAfterPodDeleted(ep, true, epLog) + continue + } + err = wait.PollImmediate(time.Millisecond*200, time.Minute, func() (done bool, err error) { + _, err = e.dynamicIPAM.AllocateIPWithoutSyncUpstream(net.ParseIP(ip), ep.Namespace+"/"+ep.Name) + if err != nil { + epLog.WithError(err).Warnf("failed to restore ip %s, will retry later", ip) + return false, nil + } + epLog.Infof("restore ip %s success", ip) + return true, nil + }) + if err != nil { + epLog.WithError(err).Fatal("failed to restore ip in strict inspection mode after 1 minute") + } + } else { + if kerrors.IsNotFound(err) { + epLog.Infof("pod not found, try to delete expired endpoint") + e.tryDeleteEndpointAfterPodDeleted(ep, false, epLog) + continue + } + epLog.WithError(err).Fatal("failed to restore ip in strict inspection mode, failed to get pod") + } } } }