thimslugga · August 6, 2024 03:20
diff --git a/lib.sh b/lib.sh
 #!/bin/bash
 #
 # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"). You may
 # not use this file except in compliance with the License. A copy of the
 # License is located at
 #
 #      http://aws.amazon.com/apache2.0/
 #
 # or in the "license" file accompanying this file. This file is distributed
 # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 # express or implied. See the License for the specific language governing
 # permissions and limitations under the License.

 # These should be set by the calling program
 declare ether
 declare unitdir
 declare lockdir
 declare reload_flag

 declare -r imds_endpoints=("http://169.254.169.254/latest" "http://[fd00:ec2::254]/latest")
 declare -r imds_token_path="api/token"
 declare -r syslog_facility="user"
 declare -r syslog_tag="ec2net"
 declare -i -r rule_base=10000

 # Systemd installs routes with a metric of 1024 by default.  We
 # override to a lower metric to ensure that our fully configured
 # interfaces are preferred over those in the process of being
 # configured.
 declare -i -r metric_base=512
 declare imds_endpoint imds_token

 get_token() {
    # try getting a token early, using each endpoint in
    # turn. Whichever endpoint responds will be used for the rest of
    # the IMDS API calls.  On initial interface setup, we'll retry
    # this operation for up to 30 seconds, but on subsequent
    # invocations we avoid retrying
    local deadline
    deadline=$(date -d "now+30 seconds" +%s)
    local old_opts=$-
    while [ "$(date +%s)" -lt $deadline ]; do
        for ep in "${imds_endpoints[@]}"; do
            set +e
            imds_token=$(curl --max-time 5 --connect-timeout 0.15 -s --fail \
                              -X PUT -H "X-aws-ec2-metadata-token-ttl-seconds: 60" ${ep}/${imds_token_path})
            [[ $old_opts = *e* ]] && set -e
            if [ -n "$imds_token" ]; then
                debug "Got IMDSv2 token from ${ep}"
                imds_endpoint=$ep
                return
            fi
        done
        if [ ! -v EC2_IF_INITIAL_SETUP ]; then
            break
        fi
        sleep 0.5
    done
 }

 log() {
    local priority
    priority=$1 ; shift
    logger --id=$$ --priority "${syslog_facility}.${priority}" --tag "$syslog_tag" "$@"
 }

 # debug-level messages
 debug() {
    log debug "$@"
 }

 # informational messages
 info() {
    log info "$@"
 }

 # warning conditions
 warn() {
    log warn "$@"
 }

 # error conditions
 error() {
    log err "$@"
 }

 get_meta() {
    local key=$1
    local max_tries=${2:-10}
    local -i attempts=0 ms_per_backoff=100 backoff=0
    debug "[get_meta] Querying IMDS for ${key}"

    get_token

    local url="${imds_endpoint}/meta-data/${key}"
    local meta rc
    while [ $attempts -lt $max_tries ]; do
        meta=$(curl -s --max-time 5 -H "X-aws-ec2-metadata-token:${imds_token}" -f "$url")
        rc=$?
        if [ $rc -eq 0 ]; then
            echo "$meta"
            return 0
        fi
        if [ ! -v EC2_IF_INITIAL_SETUP ]; then
            return 1
        else
            attempts+=1
            backoff=$((attempts*ms_per_backoff))
            sleep $((backoff/1000)).$((backoff%1000))
        fi
    done
    return 1
 }

 get_imds() {
    local key=$1
    local max_tries=${2:-10}
    get_meta $key $max_tries
 }

 get_iface_imds() {
    local mac=$1
    local key=$2
    local max_tries=${3:-10}
    get_imds network/interfaces/macs/${mac}/${key} $max_tries
 }

 _install_and_reload() {
    local src=$1
    local dest=$2
    if [ -e "$dest" ]; then
        if [ "$(md5sum < $dest)" = "$(md5sum < $src)" ]; then
            # The config is unchanged since last run. Nothing left to do:
            rm "$src"
            echo 0
        else
            # The file content has changed, we need to reload:
            mv "$src" "$dest"
            echo 1
        fi
        return
    fi

    # If we're here then we're creating a new config file
    if [ "$(stat --format=%s $src)" -gt 0 ]; then
        mv "$src" "$dest"
        echo 1
        return
    fi
    rm "$src"
    echo 0
 }

 ipv6_disabled() {
  # Linux kernel tunable for ipv6: 0 = false, 1 = true
  # returns: false = enabled, true = disabled
  if grep -Pqs '^\h*1\b' /sys/module/ipv6/parameters/{disable,disable_ipv6} \
  	/proc/sys/net/ipv6/conf/{default,all}/disable_ipv6;
  then
    true
  else
    false
  fi
 }

 create_ipv4_aliases() {
    local iface=$1
    local mac=$2
    local addresses
    subnet_supports_ipv4 "$iface" || return 0
    addresses=$(get_iface_imds $mac local-ipv4s | tail -n +2 | sort)
    local drop_in_dir="${unitdir}/70-${iface}.network.d"
    mkdir -p "$drop_in_dir"
    local file="$drop_in_dir/ec2net_alias.conf"
    local work="${file}.new"
    touch "$work"

    for a in $addresses; do
        cat <<EOF >> "$work"
 [Address]
 Address=${a}/32
 AddPrefixRoute=false

 EOF
    done
    _install_and_reload "$work" "$file"
 }

 subnet_supports_ipv4() {
    local iface=$1
    if [ -z "$iface" ]; then
        error "${FUNCNAME[0]} called without an interface"
        return 1
    fi
    ! ip -4 addr show dev "$iface" scope global | \
        sed -n -E 's,^.*inet (\S+).*,\1,p' | grep -E -q '^169\.254\.'
 }

 subnet_supports_ipv6() {
    local iface=$1
    if [ -z "$iface" ]; then
        error "${FUNCNAME[0]} called without an interface"
        return 1
    fi
    ip -6 addr show dev "$iface" scope global | grep -q inet6
 }

 subnet_prefixroutes() {
    local ether=$1
    local family=${2:-ipv4}
    if [ -z "$ether" ]; then
        err "${FUNCNAME[0]} called without an MAC address"
        return 1
    fi
    case "$family" in
        ipv4)
            get_iface_imds "$ether" "subnet-${family}-cidr-block"
            ;;
        ipv6)
            get_iface_imds "$ether" "subnet-${family}-cidr-blocks"
            ;;
    esac
 }

 create_rules() {
    local iface=$1
    local device_number=$2
    local network_card=$3
    local family=$4
    local addrs prefixes
    local local_addr_key subnet_pd_key
    local drop_in_dir="${unitdir}/70-${iface}.network.d"
    mkdir -p "$drop_in_dir"

    local -i ruleid=$((device_number+rule_base+100*network_card))

    case $family in
        4)
            if ! subnet_supports_ipv4 $iface; then
                return 0
            fi
            local_addr_key=local-ipv4s
            subnet_pd_key=ipv4-prefix
            ;;
        6)
            if ! subnet_supports_ipv6 $iface; then
                return 0
            fi
            local_addr_key=ipv6s
            subnet_pd_key=ipv6-prefix
            ;;
        *)
            error "unable to determine protocol"
            return 1
            ;;
    esac

    # We'd like to retry here, but we can't distinguish between an
    # IMDS failure, a propagation delay, or a legitimately empty
    # response.
    addrs=$(get_iface_imds ${ether} ${local_addr_key} || true)

    # don't fail or retry prefix retrieval. IMDS currently returns an
    # error, rather than an empty response, if no prefixes are
    # assigned, so we are unable to distinguish between a service
    # error and a successful but empty response
    prefixes=$(get_iface_imds ${ether} ${subnet_pd_key} 1 || true)

    local source
    local file="$drop_in_dir/ec2net_policy_${family}.conf"
    local work="${file}.new"
    touch "$work"

    for source in $addrs $prefixes; do
        cat <<EOF >> "$work"
 [RoutingPolicyRule]
 From=${source}
 Priority=${ruleid}
 Table=${ruleid}

 EOF
    done
    _install_and_reload "$work" "$file"
 }

 create_if_overrides() {
    local iface="$1"; test -n "$iface" || { echo "Invalid iface at $LINENO" >&2 ; exit 1; }
    local -i device_number="$2"; test -n "$device_number" || { echo "Invalid device_number at $LINENO" >&2 ; exit 1; }
    local -i network_card="$3"; test -n "$network_card" || { echo "Invalid network_card at $LINENO" >&2 ; exit 1; }
    local ether="$4"; test -n "$ether" || { echo "Invalid ether at $LINENO" >&2 ; exit 1; }
    local cfgfile="$5"; test -n "$cfgfile" || { echo "Invalid cfgfile at $LINENO" >&2 ; exit 1; }

    local cfgdir="${cfgfile}.d"
    local dropin="${cfgdir}/eni.conf"
    local -i metric=$((metric_base+100*network_card+device_number))
    local -i tableid=$((rule_base+100*network_card+device_number))

    mkdir -p "$cfgdir"
    cat <<EOF > "${dropin}.tmp"
 # Configuration for ${iface} generated by policy-routes@${iface}.service
 [Match]
 MACAddress=${ether}
 [Network]
 DHCP=yes

 [DHCPv4]
 RouteMetric=${metric}
 UseRoutes=true
 UseGateway=true

 [IPv6AcceptRA]
 RouteMetric=${metric}
 UseGateway=true

 EOF

    cat <<EOF >> "${dropin}.tmp"
 [Route]
 Table=${tableid}
 Gateway=_ipv6ra

 EOF

    if ! ipv6_disabled; then
        for dest in $(subnet_prefixroutes "$ether" ipv6); do
            cat <<EOF >> "${dropin}.tmp"
 [Route]
 Table=${tableid}
 Destination=${dest}

 EOF
        done
    fi

    if subnet_supports_ipv4 "$iface"; then
        # if not in a v6-only network, add IPv4 routes to the private table
        cat <<EOF >> "${dropin}.tmp"
 [Route]
 Gateway=_dhcp4
 Table=${tableid}

 EOF
        local dest
        for dest in $(subnet_prefixroutes "$ether" ipv4); do
            cat <<EOF >> "${dropin}.tmp"
 [Route]
 Table=${tableid}
 Destination=${dest}

 EOF
        done
    fi
    
    mv "${dropin}.tmp" "$dropin"
    echo 1
 }

 add_altnames() {
    local iface=$1
    local ether=$2
    local device_number=$3
    local network_card=$4
    local eni_id
    eni_id=$(get_iface_imds "$ether" interface-id)
    # Interface altnames can also be added using systemd .link files.
    # However, in order to use them, we need to wait until a
    # systemd-networkd reload operation completes and then trigger a
    # udev "move" event.  We avoid that overhead by adding the
    # altnames directly using ip(8).
    if [ -n "$eni_id" ] &&
           ! ip link show dev "$iface" | grep -q -E "altname\s+${eni_id}"; then
        ip link property add dev "$iface" altname "$eni_id" || true
    fi
    local device_number_alt="device-number-${device_number}"
    if [ -n "$network_card" ]; then
        # On instance types that don't support a network-card key, we
        # won't append a value here.  A value of zero would be
        # appropriate, but would be a visible change to the interface
        # configuration on these instance types and could disrupt
        # existing automation.
        device_number_alt="${device_number_alt}.${network_card}"
    fi
    if [ -n "$device_number" ] &&
           ! ip link show dev "$device_number_alt" > /dev/null 2>&1; then
        ip link property add dev "$iface" altname "${device_number_alt}" || true
    fi
 }

 create_interface_config() {
    local iface=$1
    local device_number=$2
    local network_card=$3
    local ether=$4

    local libdir=/usr/lib/systemd/network
    local defconfig="${libdir}/80-ec2.network"

    local -i retval=0

    local cfgfile="${unitdir}/70-${iface}.network"
    if [ -e "$cfgfile" ] &&
           [ ! -v EC2_IF_INITIAL_SETUP ]; then
        debug "Using existing cfgfile ${cfgfile}"
        echo $retval
        return
    fi

    debug "Linking $cfgfile to $defconfig"
    mkdir -p "$unitdir"
    ln -sf "$defconfig" "$cfgfile"
    retval+=$(create_if_overrides "$iface" "$device_number" "$network_card" "$ether" "$cfgfile")
    add_altnames "$iface" "$ether" "$device_number" "$network_card"
    echo $retval
 }

 # The primary interface is defined as the interface whose MAC address
 # is in the top-level `mac` key.  It will always have device-number 0
 # and network-card 0.  It gets unique treatment in a few areas.
 _is_primary_interface() {
    local ether default_mac
    ether="$1"
    default_mac=$(get_imds mac)
    [ "$ether" = "$default_mac" ]
 }

 # device-number, which represents the DeviceIndex field in an EC2
 # NetworkInterfaceAttachment object, is not guaranteed to have
 # propagated to IMDS by the time a hot-plugged interface is visible to
 # the instance.  Further complicating things, IMDS returns 0 for the
 # device-number before propagation is complete, which is a valid value
 # and represents the instance's primary interface.  We cope with this
 # by ensuring that the only interface for which we return 0 as the
 # device-number is the one whose MAC address matches the instance's
 # top-level "mac" field, which is static and guaranteed to be
 # available as soon as the instance launches.
 _get_device_number() {
    local iface ether network_card_index
    iface="$1"
    ether="$2"
    network_card_index=${3:-0}

    if _is_primary_interface "$ether"; then
        echo 0 ; return 0
    fi

    local -i maxtries=60 ntries=0
    for (( ntries = 0; ntries < maxtries; ntries++ )); do
        device_number=$(get_iface_imds "$ether" device-number 1)
        # if either the device number or the card index are nonzero,
        # then we treat the value returned as valid.  Zero values for
        # both is only valid for the primary interface, which we've
        # already concluded is not this one.
        if [ $device_number -ne 0 ] || [ $network_card_index -ne 0 ]; then
            echo "$device_number"
            return 0
        else
            sleep 0.1
        fi
    done
    error "Unable to identify device-number for $iface after $ntries attempts"
    echo -1
    return 1
 }

 # print the network-card IMDS value for the given interface
 # NOTE: On many instance types, this value is not defined.  This
 # function will print the empty string on those instances.  On
 # instances where it is defined, it will be a numeric value.
 _get_network_card() {
    local iface ether network_card
    iface="$1"
    ether="$2"

    if _is_primary_interface "$ether"; then
        echo 0 ; return 0
    fi
    network_card=$(get_iface_imds "$ether" network-card)
    echo ${network_card}
 }


 # Interfaces get configured with addresses and routes from
 # DHCP. Routes are inserted in the main table with metrics based on
 # their physical location (slot ID) to ensure deterministic route
 # ordering.  Interfaces also get policy routing rules based on source
 # address matching and ensuring that all egress traffic with one of
 # the interface's IPs (primary or secondary, IPv4 or IPv6, including
 # addresses from delegated prefixes) will be routing according to an
 # interface-specific routing table.
 setup_interface() {
    local iface ether
    local -i device_number network_card rc
    iface=$1
    ether=$2

    network_card=$(_get_network_card "$iface" "$ether")
    device_number=$(_get_device_number "$iface" "$ether" "$network_card")
    rc=$?
    if [ $rc -ne 0 ]; then
        error "Unable to identify device-number for $iface in IMDS"
        exit 1
    fi

    # Newly provisioned resources (new ENI attachments) take some
    # time to be fully reflected in IMDS. In that case, we poll
    # for a period of time to ensure we've captured all the
    # sources needed for policy routing.  When refreshing an
    # existing ENI attachment's configuration, we skip the
    # polling.
    local -i deadline
    deadline=$(date -d "now+30 seconds" +%s)
    while [ "$(date +%s)" -lt $deadline ]; do
        local -i changes=0

        changes+=$(create_interface_config "$iface" "$device_number" "$network_card" "$ether")
        for family in 4 6; do
            if ! _is_primary_interface "$ether"; then
                # We only create rules for secondary interfaces so
                # external tools that modify the main route table can
                # still communicate with the host's primary IPs.  For
                # example, considering a host with address 10.1.2.3 on
                # ens5 (device-number-0) and a container communicating
                # on a docker0 bridge interface, the expectation is
                # that the container can communicate with 10.1.2.3 in
                # both directions.  If we install policy rules,
                # they'll redirect the return traffic out ens5 rather
                # than docker0, effectively blackholing it.
                # https://github.com/amazonlinux/amazon-ec2-net-utils/issues/97
                changes+=$(create_rules "$iface" "$device_number" "$network_card" $family)
            fi
        done
        changes+=$(create_ipv4_aliases $iface $ether)

        if [ ! -v EC2_IF_INITIAL_SETUP ] ||
               [ "$changes" -gt 0 ]; then
            break
        fi
    done
    echo $changes
 }

 # All instances of this process that may reconfigure networkd register
 # themselves as such. When exiting, they'll reload networkd only if
 # they're the registered process running.
 maybe_reload_networkd() {
    rm -f "${lockdir}/${iface}"
    if rmdir "$lockdir" 2> /dev/null; then
        if [ -e "$reload_flag" ]; then
            rm -f "$reload_flag" 2> /dev/null
            networkctl reload
            info "Reloaded networkd"
        else
            debug "No networkd reload needed"
        fi
    else
        debug "Deferring networkd reload to another process"
    fi
 }


 register_networkd_reloader() {
    local -i registered=1 cnt=0
    local -i max=10000
    local -r lockfile="${lockdir}/${iface}"
    local old_opts=$-

    # Disable -o errexit in the following block so we can capture
    # nonzero exit codes from a redirect without considering them
    # fatal errors
    set +e
    while [ $cnt -lt $max ]; do
        cnt+=1
        mkdir -p "$lockdir"
        trap 'debug "Called trap" ; maybe_reload_networkd' EXIT
        # If the redirect fails, most likely because the target file
        # already exists and -o noclobber is in effect, $? will be set
        # nonzero.  If it succeeds, it is set to 0
        echo $$ > "${lockfile}"
 	# shellcheck disable=SC2320
        registered=$?
        [ $registered -eq 0 ] && break
        sleep 0.1
        if (( $cnt % 100 == 0 )); then
            warn "Unable to lock ${iface} after ${cnt} tries."
        fi
    done
    # re-enable -o errexit if it had originally been set
    [[ $old_opts = *e* ]] && set -e

    # If registered is still nonzero when we get here, we have failed
    # to create the lock.  Log this and exit.
    if [ $registered -ne 0 ]; then
        local msg="Unable to lock configuration for ${iface}."
        error "$(printf "%s Check pid %d", "$msg", "$(cat "${lockfile}")")"
        exit 1
    fi
 }
	#!/bin/bash
	#
	# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License"). You may
	# not use this file except in compliance with the License. A copy of the
	# License is located at
	#
	# http://aws.amazon.com/apache2.0/
	#
	# or in the "license" file accompanying this file. This file is distributed
	# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
	# express or implied. See the License for the specific language governing
	# permissions and limitations under the License.

	# These should be set by the calling program
	declare ether
	declare unitdir
	declare lockdir
	declare reload_flag

	declare -r imds_endpoints=("http://169.254.169.254/latest" "http://[fd00:ec2::254]/latest")
	declare -r imds_token_path="api/token"
	declare -r syslog_facility="user"
	declare -r syslog_tag="ec2net"
	declare -i -r rule_base=10000

	# Systemd installs routes with a metric of 1024 by default. We
	# override to a lower metric to ensure that our fully configured
	# interfaces are preferred over those in the process of being
	# configured.
	declare -i -r metric_base=512
	declare imds_endpoint imds_token

	get_token() {
	# try getting a token early, using each endpoint in
	# turn. Whichever endpoint responds will be used for the rest of
	# the IMDS API calls. On initial interface setup, we'll retry
	# this operation for up to 30 seconds, but on subsequent
	# invocations we avoid retrying
	local deadline
	deadline=$(date -d "now+30 seconds" +%s)
	local old_opts=$-
	while [ "$(date +%s)" -lt $deadline ]; do
	for ep in "${imds_endpoints[@]}"; do
	set +e
	imds_token=$(curl --max-time 5 --connect-timeout 0.15 -s --fail \
	-X PUT -H "X-aws-ec2-metadata-token-ttl-seconds: 60" ${ep}/${imds_token_path})
	[[ $old_opts = e ]] && set -e
	if [ -n "$imds_token" ]; then
	debug "Got IMDSv2 token from ${ep}"
	imds_endpoint=$ep
	return
	fi
	done
	if [ ! -v EC2_IF_INITIAL_SETUP ]; then
	break
	fi
	sleep 0.5
	done
	}

	log() {
	local priority
	priority=$1 ; shift
	logger --id=$$ --priority "${syslog_facility}.${priority}" --tag "$syslog_tag" "$@"
	}

	# debug-level messages
	debug() {
	log debug "$@"
	}

	# informational messages
	info() {
	log info "$@"
	}

	# warning conditions
	warn() {
	log warn "$@"
	}

	# error conditions
	error() {
	log err "$@"
	}

	get_meta() {
	local key=$1
	local max_tries=${2:-10}
	local -i attempts=0 ms_per_backoff=100 backoff=0
	debug "[get_meta] Querying IMDS for ${key}"

	get_token

	local url="${imds_endpoint}/meta-data/${key}"
	local meta rc
	while [ $attempts -lt $max_tries ]; do
	meta=$(curl -s --max-time 5 -H "X-aws-ec2-metadata-token:${imds_token}" -f "$url")
	rc=$?
	if [ $rc -eq 0 ]; then
	echo "$meta"
	return 0
	fi
	if [ ! -v EC2_IF_INITIAL_SETUP ]; then
	return 1
	else
	attempts+=1
	backoff=$((attempts*ms_per_backoff))
	sleep $((backoff/1000)).$((backoff%1000))
	fi
	done
	return 1
	}

	get_imds() {
	local key=$1
	local max_tries=${2:-10}
	get_meta $key $max_tries
	}

	get_iface_imds() {
	local mac=$1
	local key=$2
	local max_tries=${3:-10}
	get_imds network/interfaces/macs/${mac}/${key} $max_tries
	}

	_install_and_reload() {
	local src=$1
	local dest=$2
	if [ -e "$dest" ]; then
	if [ "$(md5sum < $dest)" = "$(md5sum < $src)" ]; then
	# The config is unchanged since last run. Nothing left to do:
	rm "$src"
	echo 0
	else
	# The file content has changed, we need to reload:
	mv "$src" "$dest"
	echo 1
	fi
	return
	fi

	# If we're here then we're creating a new config file
	if [ "$(stat --format=%s $src)" -gt 0 ]; then
	mv "$src" "$dest"
	echo 1
	return
	fi
	rm "$src"
	echo 0
	}

	ipv6_disabled() {
	# Linux kernel tunable for ipv6: 0 = false, 1 = true
	# returns: false = enabled, true = disabled
	if grep -Pqs '^\h*1\b' /sys/module/ipv6/parameters/{disable,disable_ipv6} \
	/proc/sys/net/ipv6/conf/{default,all}/disable_ipv6;
	then
	true
	else
	false
	fi
	}

	create_ipv4_aliases() {
	local iface=$1
	local mac=$2
	local addresses
	subnet_supports_ipv4 "$iface" \|\| return 0
	addresses=$(get_iface_imds $mac local-ipv4s \| tail -n +2 \| sort)
	local drop_in_dir="${unitdir}/70-${iface}.network.d"
	mkdir -p "$drop_in_dir"
	local file="$drop_in_dir/ec2net_alias.conf"
	local work="${file}.new"
	touch "$work"

	for a in $addresses; do
	cat <<EOF >> "$work"
	[Address]
	Address=${a}/32
	AddPrefixRoute=false

	EOF
	done
	_install_and_reload "$work" "$file"
	}

	subnet_supports_ipv4() {
	local iface=$1
	if [ -z "$iface" ]; then
	error "${FUNCNAME[0]} called without an interface"
	return 1
	fi
	! ip -4 addr show dev "$iface" scope global \| \
	sed -n -E 's,^.inet (\S+).,\1,p' \| grep -E -q '^169\.254\.'
	}

	subnet_supports_ipv6() {
	local iface=$1
	if [ -z "$iface" ]; then
	error "${FUNCNAME[0]} called without an interface"
	return 1
	fi
	ip -6 addr show dev "$iface" scope global \| grep -q inet6
	}

	subnet_prefixroutes() {
	local ether=$1
	local family=${2:-ipv4}
	if [ -z "$ether" ]; then
	err "${FUNCNAME[0]} called without an MAC address"
	return 1
	fi
	case "$family" in
	ipv4)
	get_iface_imds "$ether" "subnet-${family}-cidr-block"
	;;
	ipv6)
	get_iface_imds "$ether" "subnet-${family}-cidr-blocks"
	;;
	esac
	}

	create_rules() {
	local iface=$1
	local device_number=$2
	local network_card=$3
	local family=$4
	local addrs prefixes
	local local_addr_key subnet_pd_key
	local drop_in_dir="${unitdir}/70-${iface}.network.d"
	mkdir -p "$drop_in_dir"

	local -i ruleid=$((device_number+rule_base+100*network_card))

	case $family in
	4)
	if ! subnet_supports_ipv4 $iface; then
	return 0
	fi
	local_addr_key=local-ipv4s
	subnet_pd_key=ipv4-prefix
	;;
	6)
	if ! subnet_supports_ipv6 $iface; then
	return 0
	fi
	local_addr_key=ipv6s
	subnet_pd_key=ipv6-prefix
	;;
	*)
	error "unable to determine protocol"
	return 1
	;;
	esac

	# We'd like to retry here, but we can't distinguish between an
	# IMDS failure, a propagation delay, or a legitimately empty
	# response.
	addrs=$(get_iface_imds ${ether} ${local_addr_key} \|\| true)

	# don't fail or retry prefix retrieval. IMDS currently returns an
	# error, rather than an empty response, if no prefixes are
	# assigned, so we are unable to distinguish between a service
	# error and a successful but empty response
	prefixes=$(get_iface_imds ${ether} ${subnet_pd_key} 1 \|\| true)

	local source
	local file="$drop_in_dir/ec2net_policy_${family}.conf"
	local work="${file}.new"
	touch "$work"

	for source in $addrs $prefixes; do
	cat <<EOF >> "$work"
	[RoutingPolicyRule]
	From=${source}
	Priority=${ruleid}
	Table=${ruleid}

	EOF
	done
	_install_and_reload "$work" "$file"
	}

	create_if_overrides() {
	local iface="$1"; test -n "$iface" \|\| { echo "Invalid iface at $LINENO" >&2 ; exit 1; }
	local -i device_number="$2"; test -n "$device_number" \|\| { echo "Invalid device_number at $LINENO" >&2 ; exit 1; }
	local -i network_card="$3"; test -n "$network_card" \|\| { echo "Invalid network_card at $LINENO" >&2 ; exit 1; }
	local ether="$4"; test -n "$ether" \|\| { echo "Invalid ether at $LINENO" >&2 ; exit 1; }
	local cfgfile="$5"; test -n "$cfgfile" \|\| { echo "Invalid cfgfile at $LINENO" >&2 ; exit 1; }

	local cfgdir="${cfgfile}.d"
	local dropin="${cfgdir}/eni.conf"
	local -i metric=$((metric_base+100*network_card+device_number))
	local -i tableid=$((rule_base+100*network_card+device_number))

	mkdir -p "$cfgdir"
	cat <<EOF > "${dropin}.tmp"
	# Configuration for ${iface} generated by policy-routes@${iface}.service
	[Match]
	MACAddress=${ether}
	[Network]
	DHCP=yes

	[DHCPv4]
	RouteMetric=${metric}
	UseRoutes=true
	UseGateway=true

	[IPv6AcceptRA]
	RouteMetric=${metric}
	UseGateway=true

	EOF

	cat <<EOF >> "${dropin}.tmp"
	[Route]
	Table=${tableid}
	Gateway=_ipv6ra

	EOF

	if ! ipv6_disabled; then
	for dest in $(subnet_prefixroutes "$ether" ipv6); do
	cat <<EOF >> "${dropin}.tmp"
	[Route]
	Table=${tableid}
	Destination=${dest}

	EOF
	done
	fi

	if subnet_supports_ipv4 "$iface"; then
	# if not in a v6-only network, add IPv4 routes to the private table
	cat <<EOF >> "${dropin}.tmp"
	[Route]
	Gateway=_dhcp4
	Table=${tableid}

	EOF
	local dest
	for dest in $(subnet_prefixroutes "$ether" ipv4); do
	cat <<EOF >> "${dropin}.tmp"
	[Route]
	Table=${tableid}
	Destination=${dest}

	EOF
	done
	fi

	mv "${dropin}.tmp" "$dropin"
	echo 1
	}

	add_altnames() {
	local iface=$1
	local ether=$2
	local device_number=$3
	local network_card=$4
	local eni_id
	eni_id=$(get_iface_imds "$ether" interface-id)
	# Interface altnames can also be added using systemd .link files.
	# However, in order to use them, we need to wait until a
	# systemd-networkd reload operation completes and then trigger a
	# udev "move" event. We avoid that overhead by adding the
	# altnames directly using ip(8).
	if [ -n "$eni_id" ] &&
	! ip link show dev "$iface" \| grep -q -E "altname\s+${eni_id}"; then
	ip link property add dev "$iface" altname "$eni_id" \|\| true
	fi
	local device_number_alt="device-number-${device_number}"
	if [ -n "$network_card" ]; then
	# On instance types that don't support a network-card key, we
	# won't append a value here. A value of zero would be
	# appropriate, but would be a visible change to the interface
	# configuration on these instance types and could disrupt
	# existing automation.
	device_number_alt="${device_number_alt}.${network_card}"
	fi
	if [ -n "$device_number" ] &&
	! ip link show dev "$device_number_alt" > /dev/null 2>&1; then
	ip link property add dev "$iface" altname "${device_number_alt}" \|\| true
	fi
	}

	create_interface_config() {
	local iface=$1
	local device_number=$2
	local network_card=$3
	local ether=$4

	local libdir=/usr/lib/systemd/network
	local defconfig="${libdir}/80-ec2.network"

	local -i retval=0

	local cfgfile="${unitdir}/70-${iface}.network"
	if [ -e "$cfgfile" ] &&
	[ ! -v EC2_IF_INITIAL_SETUP ]; then
	debug "Using existing cfgfile ${cfgfile}"
	echo $retval
	return
	fi

	debug "Linking $cfgfile to $defconfig"
	mkdir -p "$unitdir"
	ln -sf "$defconfig" "$cfgfile"
	retval+=$(create_if_overrides "$iface" "$device_number" "$network_card" "$ether" "$cfgfile")
	add_altnames "$iface" "$ether" "$device_number" "$network_card"
	echo $retval
	}

	# The primary interface is defined as the interface whose MAC address
	# is in the top-level `mac` key. It will always have device-number 0
	# and network-card 0. It gets unique treatment in a few areas.
	_is_primary_interface() {
	local ether default_mac
	ether="$1"
	default_mac=$(get_imds mac)
	[ "$ether" = "$default_mac" ]
	}

	# device-number, which represents the DeviceIndex field in an EC2
	# NetworkInterfaceAttachment object, is not guaranteed to have
	# propagated to IMDS by the time a hot-plugged interface is visible to
	# the instance. Further complicating things, IMDS returns 0 for the
	# device-number before propagation is complete, which is a valid value
	# and represents the instance's primary interface. We cope with this
	# by ensuring that the only interface for which we return 0 as the
	# device-number is the one whose MAC address matches the instance's
	# top-level "mac" field, which is static and guaranteed to be
	# available as soon as the instance launches.
	_get_device_number() {
	local iface ether network_card_index
	iface="$1"
	ether="$2"
	network_card_index=${3:-0}

	if _is_primary_interface "$ether"; then
	echo 0 ; return 0
	fi

	local -i maxtries=60 ntries=0
	for (( ntries = 0; ntries < maxtries; ntries++ )); do
	device_number=$(get_iface_imds "$ether" device-number 1)
	# if either the device number or the card index are nonzero,
	# then we treat the value returned as valid. Zero values for
	# both is only valid for the primary interface, which we've
	# already concluded is not this one.
	if [ $device_number -ne 0 ] \|\| [ $network_card_index -ne 0 ]; then
	echo "$device_number"
	return 0
	else
	sleep 0.1
	fi
	done
	error "Unable to identify device-number for $iface after $ntries attempts"
	echo -1
	return 1
	}

	# print the network-card IMDS value for the given interface
	# NOTE: On many instance types, this value is not defined. This
	# function will print the empty string on those instances. On
	# instances where it is defined, it will be a numeric value.
	_get_network_card() {
	local iface ether network_card
	iface="$1"
	ether="$2"

	if _is_primary_interface "$ether"; then
	echo 0 ; return 0
	fi
	network_card=$(get_iface_imds "$ether" network-card)
	echo ${network_card}
	}


	# Interfaces get configured with addresses and routes from
	# DHCP. Routes are inserted in the main table with metrics based on
	# their physical location (slot ID) to ensure deterministic route
	# ordering. Interfaces also get policy routing rules based on source
	# address matching and ensuring that all egress traffic with one of
	# the interface's IPs (primary or secondary, IPv4 or IPv6, including
	# addresses from delegated prefixes) will be routing according to an
	# interface-specific routing table.
	setup_interface() {
	local iface ether
	local -i device_number network_card rc
	iface=$1
	ether=$2

	network_card=$(_get_network_card "$iface" "$ether")
	device_number=$(_get_device_number "$iface" "$ether" "$network_card")
	rc=$?
	if [ $rc -ne 0 ]; then
	error "Unable to identify device-number for $iface in IMDS"
	exit 1
	fi

	# Newly provisioned resources (new ENI attachments) take some
	# time to be fully reflected in IMDS. In that case, we poll
	# for a period of time to ensure we've captured all the
	# sources needed for policy routing. When refreshing an
	# existing ENI attachment's configuration, we skip the
	# polling.
	local -i deadline
	deadline=$(date -d "now+30 seconds" +%s)
	while [ "$(date +%s)" -lt $deadline ]; do
	local -i changes=0

	changes+=$(create_interface_config "$iface" "$device_number" "$network_card" "$ether")
	for family in 4 6; do
	if ! _is_primary_interface "$ether"; then
	# We only create rules for secondary interfaces so
	# external tools that modify the main route table can
	# still communicate with the host's primary IPs. For
	# example, considering a host with address 10.1.2.3 on
	# ens5 (device-number-0) and a container communicating
	# on a docker0 bridge interface, the expectation is
	# that the container can communicate with 10.1.2.3 in
	# both directions. If we install policy rules,
	# they'll redirect the return traffic out ens5 rather
	# than docker0, effectively blackholing it.
	# https://github.com/amazonlinux/amazon-ec2-net-utils/issues/97
	changes+=$(create_rules "$iface" "$device_number" "$network_card" $family)
	fi
	done
	changes+=$(create_ipv4_aliases $iface $ether)

	if [ ! -v EC2_IF_INITIAL_SETUP ] \|\|
	[ "$changes" -gt 0 ]; then
	break
	fi
	done
	echo $changes
	}

	# All instances of this process that may reconfigure networkd register
	# themselves as such. When exiting, they'll reload networkd only if
	# they're the registered process running.
	maybe_reload_networkd() {
	rm -f "${lockdir}/${iface}"
	if rmdir "$lockdir" 2> /dev/null; then
	if [ -e "$reload_flag" ]; then
	rm -f "$reload_flag" 2> /dev/null
	networkctl reload
	info "Reloaded networkd"
	else
	debug "No networkd reload needed"
	fi
	else
	debug "Deferring networkd reload to another process"
	fi
	}


	register_networkd_reloader() {
	local -i registered=1 cnt=0
	local -i max=10000
	local -r lockfile="${lockdir}/${iface}"
	local old_opts=$-

	# Disable -o errexit in the following block so we can capture
	# nonzero exit codes from a redirect without considering them
	# fatal errors
	set +e
	while [ $cnt -lt $max ]; do
	cnt+=1
	mkdir -p "$lockdir"
	trap 'debug "Called trap" ; maybe_reload_networkd' EXIT
	# If the redirect fails, most likely because the target file
	# already exists and -o noclobber is in effect, $? will be set
	# nonzero. If it succeeds, it is set to 0
	echo $$ > "${lockfile}"
	# shellcheck disable=SC2320
	registered=$?
	[ $registered -eq 0 ] && break
	sleep 0.1
	if (( $cnt % 100 == 0 )); then
	warn "Unable to lock ${iface} after ${cnt} tries."
	fi
	done
	# re-enable -o errexit if it had originally been set
	[[ $old_opts = e ]] && set -e

	# If registered is still nonzero when we get here, we have failed
	# to create the lock. Log this and exit.
	if [ $registered -ne 0 ]; then
	local msg="Unable to lock configuration for ${iface}."
	error "$(printf "%s Check pid %d", "$msg", "$(cat "${lockfile}")")"
	exit 1
	fi
	}
No results found