Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplify IPv6 Gateway Calculation #2703

Merged
merged 1 commit into from
Dec 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 1 addition & 12 deletions pkg/ipamd/ipamd.go
Original file line number Diff line number Diff line change
Expand Up @@ -216,8 +216,6 @@ type IPAMContext struct {
lastInsufficientCidrError time.Time
enableManageUntaggedMode bool
enablePodIPAnnotation bool
// For IPv6 Security Groups for Pods, the gateway address is cached in order to speedup CNI ADD
v6Gateway net.IP
}

// setUnmanagedENIs will rebuild the set of ENI IDs for ENIs tagged as "no_manage"
Expand Down Expand Up @@ -1059,14 +1057,13 @@ func (c *IPAMContext) setupENI(eni string, eniMetadata awsutils.ENIMetadata, isT
return errors.Wrapf(err, "Failed to allocate IPv6 Prefixes to Primary ENI")
}
} else {
var gw net.IP
// For other ENIs, set up the network
if eni != primaryENI {
subnetCidr := eniMetadata.SubnetIPv4CIDR
if c.enableIPv6 {
subnetCidr = eniMetadata.SubnetIPv6CIDR
}
gw, err = c.networkClient.SetupENINetwork(c.primaryIP[eni], eniMetadata.MAC, eniMetadata.DeviceNumber, subnetCidr)
err = c.networkClient.SetupENINetwork(c.primaryIP[eni], eniMetadata.MAC, eniMetadata.DeviceNumber, subnetCidr)
if err != nil {
// Failed to set up the ENI
errRemove := c.dataStore.RemoveENIFromDataStore(eni, true)
Expand All @@ -1083,14 +1080,6 @@ func (c *IPAMContext) setupENI(eni string, eniMetadata awsutils.ENIMetadata, isT
c.addENIsecondaryIPsToDataStore(eniMetadata.IPv4Addresses, eni)
c.addENIv4prefixesToDataStore(eniMetadata.IPv4Prefixes, eni)
} else {
// Cache the IPv6 gateway address to speed up CNI ADD. In IPv4, the gateway address is the .1 address for the subnet. In IPv6, the gateway
// address is a link-local address that is fixed for the lifetime of the instance and is consistent across instances in the same subnet.
c.v6Gateway = gw
// In strict mode, install gateway rule for ICMPv6 traffic. Note that this is a global rule, but installing/removing here is acceptable as
// IPv6 only supports attaching a trunk ENI, and trunk ENI is only attached once.
if err := c.networkClient.UpdateIPv6GatewayRule(&gw); err != nil {
return errors.Wrapf(err, "failed to install IPv6 gateway rule")
}
// This is a trunk ENI in IPv6 PD mode, so do not add IPs or prefixes to datastore
log.Infof("Found IPv6 trunk ENI having %d secondary IPs and %d Prefixes", len(eniMetadata.IPv6Addresses), len(eniMetadata.IPv6Prefixes))
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/ipamd/ipamd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1877,7 +1877,7 @@ func TestIPAMContext_setupENI(t *testing.T) {

newENIMetadata := getSecondaryENIMetadata()
m.awsutils.EXPECT().GetPrimaryENI().Return(primaryENIid)
m.network.EXPECT().SetupENINetwork(gomock.Any(), secMAC, secDevice, primarySubnet).Return(nil, errors.New("not able to set route 0.0.0.0/0 via 10.10.10.1 table 2"))
m.network.EXPECT().SetupENINetwork(gomock.Any(), secMAC, secDevice, primarySubnet).Return(errors.New("not able to set route 0.0.0.0/0 via 10.10.10.1 table 2"))

err = mockContext.setupENI(newENIMetadata.ENIID, newENIMetadata, false, false)
assert.Error(t, err)
Expand Down Expand Up @@ -1923,7 +1923,7 @@ func TestIPAMContext_setupENIwithPDenabled(t *testing.T) {

newENIMetadata := getSecondaryENIMetadata()
m.awsutils.EXPECT().GetPrimaryENI().Return(primaryENIid)
m.network.EXPECT().SetupENINetwork(gomock.Any(), secMAC, secDevice, primarySubnet).Return(nil, errors.New("not able to set route 0.0.0.0/0 via 10.10.10.1 table 2"))
m.network.EXPECT().SetupENINetwork(gomock.Any(), secMAC, secDevice, primarySubnet).Return(errors.New("not able to set route 0.0.0.0/0 via 10.10.10.1 table 2"))

err = mockContext.setupENI(newENIMetadata.ENIID, newENIMetadata, false, false)
assert.Error(t, err)
Expand Down
2 changes: 1 addition & 1 deletion pkg/ipamd/rpc_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ func (s *server) AddNetwork(ctx context.Context, in *rpc.AddNetworkRequest) (*rp
// For IPv6, the gateway is derived from the RA route on the primary ENI. The primary ENI is always in the same subnet as the trunk and branch ENI.
// For IPv4, the gateway is always the .1 address for the subnet CIDR.
if s.ipamContext.enableIPv6 {
gw = s.ipamContext.v6Gateway
gw = networkutils.GetIPv6Gateway()
} else {
gw = networkutils.GetIPv4Gateway(subnetCIDR)
}
Expand Down
21 changes: 3 additions & 18 deletions pkg/networkutils/mocks/network_mocks.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

73 changes: 32 additions & 41 deletions pkg/networkutils/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ type NetworkAPIs interface {
SetupHostNetwork(vpcCIDRs []string, primaryMAC string, primaryAddr *net.IP, enablePodENI bool,
v4Enabled bool, v6Enabled bool) error
// SetupENINetwork performs ENI level network configuration. Not needed on the primary ENI
SetupENINetwork(eniIP string, mac string, deviceNumber int, subnetCIDR string) (net.IP, error)
SetupENINetwork(eniIP string, mac string, deviceNumber int, subnetCIDR string) error
// UpdateHostIptablesRules updates the nat table iptables rules on the host
UpdateHostIptablesRules(vpcCIDRs []string, primaryMAC string, primaryAddr *net.IP, v4Enabled bool, v6Enabled bool) error
UseExternalSNAT() bool
Expand All @@ -160,7 +160,6 @@ type NetworkAPIs interface {
UpdateRuleListBySrc(ruleList []netlink.Rule, src net.IPNet) error
UpdateExternalServiceIpRules(ruleList []netlink.Rule, externalIPs []string) error
GetLinkByMac(mac string, retryInterval time.Duration) (netlink.Link, error)
UpdateIPv6GatewayRule(gw *net.IP) error
}

type linuxNetwork struct {
Expand Down Expand Up @@ -359,6 +358,13 @@ func (n *linuxNetwork) SetupHostNetwork(vpcv4CIDRs []string, primaryMAC string,
if err != nil && !containsNoSuchRule(err) {
return errors.Wrap(err, "ChangeLocalRulePriority: failed to delete priority 0 local rule")
}

// In IPv6 strict mode, ICMPv6 packets from the gateway must lookup in the local routing table so that branch interfaces can resolve their gateway.
if v6Enabled {
if err := n.createIPv6GatewayRule(); err != nil {
return errors.Wrapf(err, "failed to install IPv6 gateway rule")
}
}
}

return n.updateHostIptablesRules(vpcv4CIDRs, primaryMAC, primaryAddr, v4Enabled, v6Enabled)
Expand Down Expand Up @@ -954,23 +960,10 @@ func linkByMac(mac string, netLink netlinkwrapper.NetLink, retryInterval time.Du
}
}

// For IPv6, derive gateway address from primary ENI's RA route. Note that in IPv6, all ENIs must be in the same subnet.
func GetIPv6Gateway() (net.IP, error) {
primaryEni, err := netlink.LinkByName("eth0")
if err != nil {
return nil, errors.Wrapf(err, "GetIPv6Gateway failed to get primary ENI")
}
primaryEniRoutes, err := netlink.RouteList(primaryEni, unix.AF_INET6)
if err != nil {
return nil, errors.Wrapf(err, "GetIPv6Gateway failed to list primary ENI routes")
}
for _, route := range primaryEniRoutes {
if route.Table == mainRoutingTable && len(route.Gw) != 0 && route.Protocol.String() == "ra" {
log.Infof("Found IPv6 gateway: %s", route.Gw.String())
return route.Gw, nil
}
}
return nil, errors.Wrapf(err, "GetIPv6Gateway failed to find eth0 ra route")
// On AWS/VPC, the subnet gateway can always be reached at FE80:EC2::1
// https://aws.amazon.com/about-aws/whats-new/2022/11/ipv6-subnet-default-gateway-router-multiple-addresses/
func GetIPv6Gateway() net.IP {
return net.IP{0xfe, 0x80, 0x0e, 0xc2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}
}

func GetIPv4Gateway(eniSubnetCIDR *net.IPNet) net.IP {
Expand All @@ -980,42 +973,40 @@ func GetIPv4Gateway(eniSubnetCIDR *net.IPNet) net.IP {
}

// SetupENINetwork adds default route to route table (eni-<eni_table>), so it does not need to be called on the primary ENI
func (n *linuxNetwork) SetupENINetwork(eniIP string, eniMAC string, deviceNumber int, eniSubnetCIDR string) (net.IP, error) {
func (n *linuxNetwork) SetupENINetwork(eniIP string, eniMAC string, deviceNumber int, eniSubnetCIDR string) error {
return setupENINetwork(eniIP, eniMAC, deviceNumber, eniSubnetCIDR, n.netLink, retryLinkByMacInterval, retryRouteAddInterval, n.mtu)
}

func setupENINetwork(eniIP string, eniMAC string, deviceNumber int, eniSubnetCIDR string, netLink netlinkwrapper.NetLink,
retryLinkByMacInterval time.Duration, retryRouteAddInterval time.Duration, mtu int) (net.IP, error) {
retryLinkByMacInterval time.Duration, retryRouteAddInterval time.Duration, mtu int) error {
if deviceNumber == 0 {
return nil, errors.New("setupENINetwork should never be called on the primary ENI")
return errors.New("setupENINetwork should never be called on the primary ENI")
}
tableNumber := deviceNumber + 1
log.Infof("Setting up network for an ENI with IP address %s, MAC address %s, CIDR %s and route table %d",
eniIP, eniMAC, eniSubnetCIDR, tableNumber)
link, err := linkByMac(eniMAC, netLink, retryLinkByMacInterval)
if err != nil {
return nil, errors.Wrapf(err, "setupENINetwork: failed to find the link which uses MAC address %s", eniMAC)
return errors.Wrapf(err, "setupENINetwork: failed to find the link which uses MAC address %s", eniMAC)
}

if err = netLink.LinkSetMTU(link, mtu); err != nil {
return nil, errors.Wrapf(err, "setupENINetwork: failed to set MTU to %d for %s", mtu, eniIP)
return errors.Wrapf(err, "setupENINetwork: failed to set MTU to %d for %s", mtu, eniIP)
}

if err = netLink.LinkSetUp(link); err != nil {
return nil, errors.Wrapf(err, "setupENINetwork: failed to bring up ENI %s", eniIP)
return errors.Wrapf(err, "setupENINetwork: failed to bring up ENI %s", eniIP)
}

isV6 := strings.Contains(eniSubnetCIDR, ":")
_, eniSubnetIPNet, err := net.ParseCIDR(eniSubnetCIDR)
if err != nil {
return nil, errors.Wrapf(err, "setupENINetwork: invalid IP CIDR block %s", eniSubnetCIDR)
return errors.Wrapf(err, "setupENINetwork: invalid IP CIDR block %s", eniSubnetCIDR)
}
// Get gateway IP address for ENI
var gw net.IP
if isV6 {
if gw, err = GetIPv6Gateway(); err != nil {
return nil, errors.Wrapf(err, "setupENINetwork: unable to get IPv6 gateway")
}
gw = GetIPv6Gateway()
} else {
gw = GetIPv4Gateway(eniSubnetIPNet)
}
Expand All @@ -1032,14 +1023,14 @@ func setupENINetwork(eniIP string, eniMAC string, deviceNumber int, eniSubnetCID
var addrs []netlink.Addr
addrs, err = netLink.AddrList(link, family)
if err != nil {
return nil, errors.Wrap(err, "setupENINetwork: failed to list IP address for ENI")
return errors.Wrap(err, "setupENINetwork: failed to list IP address for ENI")
}

for _, addr := range addrs {
if addr.IP.IsGlobalUnicast() {
log.Debugf("Deleting existing IP address %s", addr.String())
if err = netLink.AddrDel(link, &addr); err != nil {
return nil, errors.Wrap(err, "setupENINetwork: failed to delete IP addr from ENI")
return errors.Wrap(err, "setupENINetwork: failed to delete IP addr from ENI")
}
}
}
Expand All @@ -1051,7 +1042,7 @@ func setupENINetwork(eniIP string, eniMAC string, deviceNumber int, eniSubnetCID
}
log.Debugf("Adding IP address %s", eniAddr.String())
if err = netLink.AddrAdd(link, &netlink.Addr{IPNet: eniAddr}); err != nil {
return nil, errors.Wrap(err, "setupENINetwork: failed to add IP addr to ENI")
return errors.Wrap(err, "setupENINetwork: failed to add IP addr to ENI")
}

linkIndex := link.Attrs().Index
Expand Down Expand Up @@ -1082,7 +1073,7 @@ func setupENINetwork(eniIP string, eniMAC string, deviceNumber int, eniSubnetCID
for _, r := range routes {
err := netLink.RouteDel(&r)
if err != nil && !netlinkwrapper.IsNotExistsError(err) {
return nil, errors.Wrap(err, "setupENINetwork: failed to clean up old routes")
return errors.Wrap(err, "setupENINetwork: failed to clean up old routes")
}

err = retry.NWithBackoff(retry.NewSimpleBackoff(500*time.Millisecond, retryRouteAddInterval, 0.15, 2.0), maxRetryRouteAdd, func() error {
Expand All @@ -1094,7 +1085,7 @@ func setupENINetwork(eniIP string, eniMAC string, deviceNumber int, eniSubnetCID
return nil
})
if err != nil {
return gw, err
return err
}
}

Expand All @@ -1110,7 +1101,7 @@ func setupENINetwork(eniIP string, eniMAC string, deviceNumber int, eniSubnetCID
// eniSubnetIPNet was modified by GetIPv4Gateway, so the string must be parsed again
_, eniSubnetCIDRNet, err := net.ParseCIDR(eniSubnetCIDR)
if err != nil {
return gw, errors.Wrapf(err, "setupENINetwork: invalid IPv4 CIDR block: %s", eniSubnetCIDR)
return errors.Wrapf(err, "setupENINetwork: invalid IPv4 CIDR block: %s", eniSubnetCIDR)
}
defaultRoute = netlink.Route{
Dst: eniSubnetCIDRNet,
Expand All @@ -1121,30 +1112,30 @@ func setupENINetwork(eniIP string, eniMAC string, deviceNumber int, eniSubnetCID
}
if err := netLink.RouteDel(&defaultRoute); err != nil {
if !netlinkwrapper.IsNotExistsError(err) {
return gw, errors.Wrapf(err, "setupENINetwork: unable to delete default route %s for source IP %s", eniSubnetIPNet.String(), eniIP)
return errors.Wrapf(err, "setupENINetwork: unable to delete default route %s for source IP %s", eniSubnetIPNet.String(), eniIP)
}
}
return gw, nil
return nil
}

// For IPv6 strict mode, ICMPv6 packets from the gateway must lookup in the local routing table so that branch interfaces can resolve their gateway.
func (n *linuxNetwork) UpdateIPv6GatewayRule(gw *net.IP) error {
func (n *linuxNetwork) createIPv6GatewayRule() error {
gatewayRule := n.netLink.NewRule()
gatewayRule.Src = &net.IPNet{IP: *gw, Mask: net.CIDRMask(128, 128)}
gatewayRule.Src = &net.IPNet{IP: GetIPv6Gateway(), Mask: net.CIDRMask(128, 128)}
Copy link
Contributor

@anguslees anguslees Dec 8, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm curious: Was this sufficient and necessary, during your testing?

If I remember my AWS-internal discussion with the VPC folks leading up to this FE80:EC2::1 launch, the idea was that the gateway would respond to these additional addresses, but still use the original SLAAC address for traffic initiated from the gateway (eg router advertisements). This was an important detail, to avoid changing any user-visible behaviour.

I'm not sure what "can resolve their gateway" includes exactly. I think what you've done in this PR is break router-initiated RAs, but (continue to) allow explicit host-initiated NDs. If so, I think one of the following is true:

  1. sgpp needs ICMPv6, and this breaks it (RAs?), and we need to fix/revert this line. 👎
  2. sgpp needs ICMPv6, and works with this change (NDs? dhcp6?). Best answer if true, but something feels odd that we don't need all the other variants of gateway address too ($subnet::1, original SLAAC address). 👍
  3. sgpp no longer needs ICMPv6 at all now that we're hardcoding fe80:ec2::1 elsewhere, and ICMPv6 to/from gateway is broken but we don't care. 😐

If it's this third case, I think we should remove this createIPv6GatewayRule() function entirely.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this rule is required. The way that the route tables are setup for SGPP (same for IPv6 as is for IPv4), Neighbor Discovery initiated by the host needs this carve-out in order to receive the NA to its NS. We disable RA (accept_ra=0) on the VLAN interfaces and pod veth pair, so we need ND to succeed.

As for the address itself, I do see NA coming from the gateway using fe80:ec2::1. It may be safer to include the other variants, though.

gatewayRule.IPProto = unix.IPPROTO_ICMPV6
gatewayRule.Table = localRouteTable
gatewayRule.Priority = 0
gatewayRule.Family = unix.AF_INET6
if n.podSGEnforcingMode == sgpp.EnforcingModeStrict {
err := n.netLink.RuleAdd(gatewayRule)
if err != nil && !isRuleExistsError(err) {
return errors.Wrap(err, "UpdateIPv6GatewayRule: unable to create rule for IPv6 gateway")
return errors.Wrap(err, "createIPv6GatewayRule: unable to create rule for IPv6 gateway")
}
} else {
// Rule must be deleted when not in strict mode to support transitions.
err := n.netLink.RuleDel(gatewayRule)
if !netlinkwrapper.IsNotExistsError(err) {
return errors.Wrap(err, "UpdateIPv6GatewayRule: unable to delete rule for IPv6 gateway")
return errors.Wrap(err, "createIPv6GatewayRule: unable to delete rule for IPv6 gateway")
}
}
return nil
Expand Down
12 changes: 6 additions & 6 deletions pkg/networkutils/network_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ func TestSetupENINetwork(t *testing.T) {

mockNetLink.EXPECT().RouteDel(gomock.Any()).Return(nil)

_, err = setupENINetwork(testEniIP, testMAC2, testTable, testEniSubnet, mockNetLink, 0*time.Second, 0*time.Second, testMTU)
err = setupENINetwork(testEniIP, testMAC2, testTable, testEniSubnet, mockNetLink, 0*time.Second, 0*time.Second, testMTU)
assert.NoError(t, err)
}

Expand Down Expand Up @@ -171,7 +171,7 @@ func TestSetupENIV6Network(t *testing.T) {
mockNetLink.EXPECT().RouteReplace(gomock.Any()).Return(nil)
mockNetLink.EXPECT().RouteDel(gomock.Any()).Return(nil)

_, err = setupENINetwork(testEniIP6, testMAC2, testTable, testEniV6Subnet, mockNetLink, 0*time.Second, 0*time.Second, testMTU)
err = setupENINetwork(testEniIP6, testMAC2, testTable, testEniV6Subnet, mockNetLink, 0*time.Second, 0*time.Second, testMTU)
assert.NoError(t, err)
}

Expand All @@ -185,15 +185,15 @@ func TestSetupENINetworkMACFail(t *testing.T) {
mockNetLink.EXPECT().LinkList().Return(nil, fmt.Errorf("simulated failure"))
}

_, err := setupENINetwork(testEniIP, testMAC2, testTable, testEniSubnet, mockNetLink, 0*time.Second, 0*time.Second, testMTU)
err := setupENINetwork(testEniIP, testMAC2, testTable, testEniSubnet, mockNetLink, 0*time.Second, 0*time.Second, testMTU)
assert.Errorf(t, err, "simulated failure")
}

func TestSetupENINetworkErrorOnPrimaryENI(t *testing.T) {
ctrl, mockNetLink, _, _, _ := setup(t)
defer ctrl.Finish()
deviceNumber := 0
_, err := setupENINetwork(testEniIP, testMAC2, deviceNumber, testEniSubnet, mockNetLink, 0*time.Second, 0*time.Second, testMTU)
err := setupENINetwork(testEniIP, testMAC2, deviceNumber, testEniSubnet, mockNetLink, 0*time.Second, 0*time.Second, testMTU)
assert.Error(t, err)
}

Expand All @@ -218,15 +218,15 @@ func TestUpdateIPv6GatewayRule(t *testing.T) {
mockNetLink.EXPECT().NewRule().Return(&icmpRule)
mockNetLink.EXPECT().RuleAdd(&icmpRule)

err := ln.UpdateIPv6GatewayRule(&testEniV6GatewayNet)
err := ln.createIPv6GatewayRule()
assert.NoError(t, err)

// Validate rule del in non-strict mode
ln.podSGEnforcingMode = sgpp.EnforcingModeStandard
mockNetLink.EXPECT().NewRule().Return(&icmpRule)
mockNetLink.EXPECT().RuleDel(&icmpRule)

err = ln.UpdateIPv6GatewayRule(&testEniV6GatewayNet)
err = ln.createIPv6GatewayRule()
assert.NoError(t, err)
}

Expand Down
Loading