diff --git a/README.md b/README.md index f8158c2f3c..78c279426a 100644 --- a/README.md +++ b/README.md @@ -226,7 +226,7 @@ Type: Integer as a String Default: 9001 -Used to configure the MTU size for attached ENIs. The valid range is from `576` to `9001`. +Used to configure the MTU size for attached ENIs. The valid range for IPv4 is from `576` to `9001`, while the valid range for IPv6 is from `1280` to `9001`. #### `AWS_VPC_K8S_CNI_EXTERNALSNAT` @@ -267,14 +267,14 @@ Default: empty Specify a comma-separated list of IPv4 CIDRs to exclude from SNAT. For every item in the list an `iptables` rule and off\-VPC IP rule will be applied. If an item is not a valid ipv4 range it will be skipped. This should be used when `AWS_VPC_K8S_CNI_EXTERNALSNAT=false`. -#### `POD_MTU` (v1.x.x+) +#### `POD_MTU` (v1.16.4+) Type: Integer as a String -*Note*: The default value is set to AWS_VPC_ENI_MTU, which defaults to 9001 if unset. +*Note*: If unset, the default value is derived from `AWS_VPC_ENI_MTU`, which defaults to `9001`. Default: 9001 -Used to configure the MTU size for pod virtual interfaces. The valid range is from `576` to `9001`. +Used to configure the MTU size for pod virtual interfaces. The valid range for IPv4 is from `576` to `9001`, while the valid range for IPv6 is from `1280` to `9001`. #### `WARM_ENI_TARGET` @@ -598,7 +598,7 @@ Setting `ANNOTATE_POD_IP` to `true` will allow IPAMD to add an annotation `vpc.a There is a known [issue](https://github.com/kubernetes/kubernetes/issues/39113) with kubelet taking time to update `Pod.Status.PodIP` leading to calico being blocked on programming the policy. Setting `ANNOTATE_POD_IP` to `true` will enable AWS VPC CNI plugin to add Pod IP as an annotation to the pod spec to address this race condition. -To annotate the pod with pod IP, you will have to add "patch" permission for pods resource in aws-node clusterrole. You can use the below command - +To annotate the pod with pod IP, you will have to add `patch` permission for pods resource in aws-node clusterrole. You can use the below command - ``` cat << EOF > append.yaml @@ -615,6 +615,8 @@ EOF kubectl apply -f <(cat <(kubectl get clusterrole aws-node -o yaml) append.yaml) ``` +NOTE: Adding `patch` permissions to the `aws-node` Daemonset increases the security scope for the plugin, so add this permission only after performing a proper security assessment of the tradeoffs. + #### `ENABLE_IPv4` (v1.10.0+) Type: Boolean as a String diff --git a/cmd/aws-vpc-cni/main.go b/cmd/aws-vpc-cni/main.go index 7e6a3a7b77..063d766fdd 100644 --- a/cmd/aws-vpc-cni/main.go +++ b/cmd/aws-vpc-cni/main.go @@ -66,7 +66,9 @@ const ( defaultAWSconflistFile = "/app/10-aws.conflist" tmpAWSconflistFile = "/tmp/10-aws.conflist" defaultVethPrefix = "eni" - defaultMTU = "9001" + defaultMTU = 9001 + minMTUv4 = 576 + minMTUv6 = 1280 defaultEnablePodEni = false defaultPodSGEnforcingMode = "strict" defaultPluginLogFile = "/var/log/aws-routed-eni/plugin.log" @@ -279,8 +281,8 @@ func generateJSON(jsonFile string, outFile string, getPrimaryIP func(ipv4 bool) } } vethPrefix := utils.GetEnv(envVethPrefix, defaultVethPrefix) - // Derive pod MTU from ENI MTU by default - eniMTU := utils.GetEnv(envEniMTU, defaultMTU) + // Derive pod MTU from ENI MTU by default (note that values have already been validated) + eniMTU := utils.GetEnv(envEniMTU, strconv.Itoa(defaultMTU)) // If pod MTU environment variable is set, overwrite ENI MTU. podMTU := utils.GetEnv(envPodMTU, eniMTU) podSGEnforcingMode := utils.GetEnv(envPodSGEnforcingMode, defaultPodSGEnforcingMode) @@ -389,6 +391,11 @@ func validateEnvVars() bool { return false } + // Validate MTU value for ENIs and pods + if !validateMTU(envEniMTU) || !validateMTU(envPodMTU) { + return false + } + prefixDelegationEn := utils.GetBoolAsStringEnvVar(envEnPrefixDelegation, defaultEnPrefixDelegation) warmIPTarget := utils.GetEnv(envWarmIPTarget, "0") warmPrefixTarget := utils.GetEnv(envWarmPrefixTarget, "0") @@ -402,6 +409,29 @@ func validateEnvVars() bool { return true } +func validateMTU(envVar string) bool { + // Validate MTU range based on IP address family + enabledIPv6 := utils.GetBoolAsStringEnvVar(envEnIPv6, defaultEnableIPv6) + + mtu, err, input := utils.GetIntFromStringEnvVar(envVar, defaultMTU) + if err != nil { + log.Errorf("%s MUST be a valid integer. %s is invalid", envVar, input) + return false + } + if enabledIPv6 { + if mtu < minMTUv6 || mtu > defaultMTU { + log.Errorf("%s cannot be less than 1280 or greater than 9001 in IPv6. %s is invalid", envVar, input) + return false + } + } else { + if mtu < minMTUv4 || mtu > defaultMTU { + log.Errorf("%s cannot be less than 576 or greater than 9001 in IPv4. %s is invalid", envVar, input) + return false + } + } + return true +} + func main() { os.Exit(_main()) } diff --git a/cmd/aws-vpc-cni/main_test.go b/cmd/aws-vpc-cni/main_test.go index 477fa927b0..1b7387f3c9 100644 --- a/cmd/aws-vpc-cni/main_test.go +++ b/cmd/aws-vpc-cni/main_test.go @@ -47,3 +47,40 @@ func TestGenerateJSONPlusBandwidthAndTuning(t *testing.T) { err := generateJSON(awsConflist, devNull, getPrimaryIPMock) assert.NoError(t, err) } + +func TestMTUValidation(t *testing.T) { + // By default, ENI MTU and pod MTU should be valid + assert.True(t, validateMTU(envEniMTU)) + assert.True(t, validateMTU(envPodMTU)) + + // Non-integer values should fail + _ = os.Setenv(envEniMTU, "true") + _ = os.Setenv(envPodMTU, "abc") + assert.False(t, validateMTU(envEniMTU)) + assert.False(t, validateMTU(envPodMTU)) + + // Integer values within IPv4 range should succeed + _ = os.Setenv(envEniMTU, "5000") + _ = os.Setenv(envPodMTU, "3000") + assert.True(t, validateMTU(envEniMTU)) + assert.True(t, validateMTU(envPodMTU)) + + // Integer values outside IPv4 range should fail + _ = os.Setenv(envEniMTU, "10000") + _ = os.Setenv(envPodMTU, "500") + assert.False(t, validateMTU(envEniMTU)) + assert.False(t, validateMTU(envPodMTU)) + + // Integer values within IPv6 range should succeed + _ = os.Setenv(envEnIPv6, "true") + _ = os.Setenv(envEniMTU, "5000") + _ = os.Setenv(envPodMTU, "3000") + assert.True(t, validateMTU(envEniMTU)) + assert.True(t, validateMTU(envPodMTU)) + + // Integer values outside IPv6 range should fail + _ = os.Setenv(envEniMTU, "10000") + _ = os.Setenv(envPodMTU, "1200") + assert.False(t, validateMTU(envEniMTU)) + assert.False(t, validateMTU(envPodMTU)) +} diff --git a/cmd/routed-eni-cni-plugin/cni.go b/cmd/routed-eni-cni-plugin/cni.go index a0619920cb..0e38f65b23 100644 --- a/cmd/routed-eni-cni-plugin/cni.go +++ b/cmd/routed-eni-cni-plugin/cni.go @@ -144,7 +144,8 @@ func add(args *skel.CmdArgs, cniTypes typeswrapper.CNITYPES, grpcClient grpcwrap return errors.Wrap(err, "add cmd: failed to load k8s config from arg") } - mtu := networkutils.GetEthernetMTU(conf.MTU) + // Derive pod MTU. Note that the value has already been validated. + mtu := networkutils.GetPodMTU(conf.MTU) log.Debugf("MTU value set is %d:", mtu) // Set up a connection to the ipamD server. diff --git a/pkg/networkutils/network.go b/pkg/networkutils/network.go index 4189c9cbe0..00ff6af202 100644 --- a/pkg/networkutils/network.go +++ b/pkg/networkutils/network.go @@ -29,6 +29,7 @@ import ( "github.com/coreos/go-iptables/iptables" "github.com/aws/amazon-vpc-cni-k8s/pkg/sgpp" + "github.com/aws/amazon-vpc-cni-k8s/utils" "k8s.io/apimachinery/pkg/util/sets" @@ -115,7 +116,9 @@ const ( defaultConnmark = 0x80 // envMTU gives a way to configure the MTU size for new ENIs attached. Range is from 576 to 9001. - envMTU = "AWS_VPC_ENI_MTU" + envMTU = "AWS_VPC_ENI_MTU" + defaultMTU = 9001 + minMTUv4 = 576 // envVethPrefix is the environment variable to configure the prefix of the host side veth device names envVethPrefix = "AWS_VPC_K8S_CNI_VETHPREFIX" @@ -126,10 +129,6 @@ const ( // envEnIpv6Egress is the environment variable to enable IPv6 egress support on EKS v4 cluster envEnIpv6Egress = "ENABLE_V6_EGRESS" - // Range of MTU for each ENI and veth pair. Defaults to maximumMTU - minimumMTU = 576 - maximumMTU = 9001 - // number of retries to add a route maxRetryRouteAdd = 5 @@ -198,7 +197,7 @@ func New() NetworkAPIs { typeOfSNAT: typeOfSNAT(), nodePortSupportEnabled: nodePortSupportEnabled(), mainENIMark: getConnmark(), - mtu: GetEthernetMTU(""), + mtu: GetEthernetMTU(), vethPrefix: getVethPrefixName(), podSGEnforcingMode: sgpp.LoadEnforcingModeFromEnv(), @@ -849,7 +848,7 @@ func GetConfigForDebug() map[string]interface{} { envExcludeSNATCIDRs: parseCIDRString(envExcludeSNATCIDRs), envExternalSNAT: useExternalSNAT(), envExternalServiceCIDRs: parseCIDRString(envExternalServiceCIDRs), - envMTU: GetEthernetMTU(""), + envMTU: GetEthernetMTU(), envVethPrefix: getVethPrefixName(), envNodePortSupport: nodePortSupportEnabled(), envRandomizeSNAT: typeOfSNAT(), @@ -1293,32 +1292,25 @@ func (n *linuxNetwork) UpdateExternalServiceIpRules(ruleList []netlink.Rule, ext return nil } -// GetEthernetMTU gets the MTU setting from AWS_VPC_ENI_MTU if set, or takes the passed in string. Defaults to 9001 if not set. -func GetEthernetMTU(envMTUValue string) int { - inputStr, found := os.LookupEnv(envMTU) - if found { - envMTUValue = inputStr +// GetEthernetMTU returns the MTU value to program for ENIs. Note that the value was already validated during container initialization. +func GetEthernetMTU() int { + mtu, _, _ := utils.GetIntFromStringEnvVar(envMTU, defaultMTU) + return mtu +} + +// GetPodMTU validates the pod MTU value. If an invalid value is passed, the default is used. +func GetPodMTU(podMTU string) int { + mtu, err := strconv.Atoi(podMTU) + if err != nil { + log.Errorf("Failed to parse pod MTU %s: %v", mtu, err) + return defaultMTU } - if envMTUValue != "" { - mtu, err := strconv.Atoi(envMTUValue) - if err != nil { - log.Errorf("Failed to parse %s will use %d: %v", envMTU, maximumMTU, err.Error()) - return maximumMTU - } - // Restrict range between jumbo frame and the maximum required size to assemble. - // Details in https://tools.ietf.org/html/rfc879 and - // https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/network_mtu.html - if mtu < minimumMTU { - log.Errorf("%s is too low: %d. Will use %d", envMTU, mtu, minimumMTU) - return minimumMTU - } - if mtu > maximumMTU { - log.Errorf("%s is too high: %d. Will use %d", envMTU, mtu, maximumMTU) - return maximumMTU - } - return mtu + + // Only IPv4 bounds can be enforced, but note that the conflist value is already validated during container initialization. + if mtu < minMTUv4 || mtu > defaultMTU { + return defaultMTU } - return maximumMTU + return mtu } // getVethPrefixName gets the name prefix of the veth devices based on the AWS_VPC_K8S_CNI_VETHPREFIX environment variable diff --git a/pkg/networkutils/network_test.go b/pkg/networkutils/network_test.go index 906b060a4b..fd5950ab84 100644 --- a/pkg/networkutils/network_test.go +++ b/pkg/networkutils/network_test.go @@ -419,19 +419,21 @@ func TestSetupHostNetworkNodePortDisabledAndSNATEnabled(t *testing.T) { }, mockIptables.(*mock_iptables.MockIptables).DataplaneState) } -func TestLoadMTUFromEnvTooLow(t *testing.T) { - os.Setenv(envMTU, "1") - assert.Equal(t, GetEthernetMTU(""), minimumMTU) -} +func TestGetEthernetMTU(t *testing.T) { + assert.Equal(t, GetEthernetMTU(), defaultMTU) -func TestLoadMTUFromEnv1500(t *testing.T) { - os.Setenv(envMTU, "1500") - assert.Equal(t, GetEthernetMTU(""), 1500) + os.Setenv(envMTU, "5000") + assert.Equal(t, GetEthernetMTU(), 5000) } -func TestLoadMTUFromEnvTooHigh(t *testing.T) { - os.Setenv(envMTU, "65536") - assert.Equal(t, GetEthernetMTU(""), maximumMTU) +func TestGetPodMTU(t *testing.T) { + // For any invalid value, return the default MTU + assert.Equal(t, GetPodMTU("abc"), defaultMTU) + assert.Equal(t, GetPodMTU("500"), defaultMTU) + assert.Equal(t, GetPodMTU("10000"), defaultMTU) + + // For any valid value, return the value + assert.Equal(t, GetPodMTU("8000"), 8000) } func TestLoadExcludeSNATCIDRsFromEnv(t *testing.T) {