Skip to content

Instantly share code, notes, and snippets.

@sveniu
Created January 11, 2017 15:52
Show Gist options
  • Save sveniu/2dc755627f0290923ae4a309531a52a4 to your computer and use it in GitHub Desktop.
Save sveniu/2dc755627f0290923ae4a309531a52a4 to your computer and use it in GitHub Desktop.
#!/bin/sh
# Enumerate all interrupts used by either the default or a specified
# network interface, and map them to separate CPUs. Each CPU that gets
# assigned interrupts will also have its C-states deeper than C1
# disabled to reduce the probability of rx queue overflow.
#
# TODO: NUMA interrupt scheduling.
#
# Space-separated list of interfaces.
iface="$IFACE"
# Space-separated list of CPUs that should not be assigned interrupts.
cpu_blacklist="${CPU_BLACKLIST:-0}"
# Space-separated list of CPUs that should be assigned interrupts.
# Defaults to [0, 1024]. Blacklisted or offline CPUs will not be
# assigned any interrupts.
cpu_whitelist="${CPU_WHITELIST:-`seq -s ' ' 0 1024`}"
# Convenience vars.
sysfs_net="/sys/class/net"
sysfs_cpu="/sys/devices/system/cpu"
# Bail out if we're missing essential sysfs directories.
for dir in "$sysfs_net" "$sysfs_cpu"
do
test -d "$dir" || {
echo >&2 "ERROR: $dir not found."
exit 1
}
done
# Return a list of interrupts for a given network interface.
iface_irqs () {
iface_irqs=""
test -d "${sysfs_net}"/${1}/device/msi_irqs && {
for f in "${sysfs_net}"/${1}/device/msi_irqs/*
do
iface_irqs="$iface_irqs `basename $f`"
done
echo "$iface_irqs"
return
}
# Fall back to naive parsing of /proc/interrupts.
grep "\<${1}\>" /proc/interrupts | cut -d: -f1 | tr '\n' ' '
}
# Return a list of all interface interrupts.
avail_irqs () {
avail_irqs=""
# Enumerate bonding slaves, if any.
test -f "${sysfs_net}"/${iface}/bonding/slaves &&
iface=`cat "${sysfs_net}"/${iface}/bonding/slaves`
for i in $iface
do
avail_irqs="$avail_irqs `iface_irqs $i`"
done
echo "$avail_irqs"
}
# Return 1 if CPU is blacklisted.
bl_cpu () {
test -n "$cpu_blacklist" && {
for bl_cpu in $cpu_blacklist
do
test "$1" -eq "$bl_cpu" 2>/dev/null && return 1
done
}
# Explicit zero return in case of test(1) error.
return 0
}
# Return a list of available CPUs.
avail_cpus () {
avail_cpus=""
# Iterate over all CPUs.
for cpu in $cpu_whitelist
do
# Break if no more CPUs.
test -d "${sysfs_cpu}"/cpu${cpu} || break
# Check if CPU > 0 is online.
online=1
test "$cpu" -gt 0 && {
online=`cat "${sysfs_cpu}"/cpu${cpu}/online`
}
test "$online" -gt 0 || {
echo >&2 "WARNING: Skipping offline CPU $cpu."
continue
}
# Skip CPUs in blacklist.
bl_cpu "$cpu" || {
echo >&2 "WARNING: Skipping blacklisted CPU $cpu."
continue
}
# Add CPU to available list.
avail_cpus="$avail_cpus $cpu"
done
echo "$avail_cpus"
}
# Summary: Write two files into the temporary directory: One
# containing IRQs, and one containing CPUs. Repeat the CPU list as
# many times as needed to ensure all IRQs are handled, trim the list
# to exactly match the number of IRQs, and then merge them together.
do_start () {
tmpdir=`mktemp -d`
# Populate the CPU list.
ncpus="0"
for cpu in `avail_cpus`
do
echo "$cpu" >> "$tmpdir"/cpus_init
ncpus=$(( $ncpus + 1 ))
done
# Populate the IRQ list.
nirqs="0"
for irq in `avail_irqs`
do
echo "$irq" >> "$tmpdir"/irqs
nirqs=$(( $nirqs + 1 ))
done
# Number of times to repeat the CPU list.
cpu_reps=$(( $nirqs / $ncpus + 1 ))
for i in `seq $cpu_reps`
do
cat "$tmpdir"/cpus_init >> "$tmpdir"/cpus_rep
done
# Trim the CPU list length to match the number of IRQs.
head -n "$nirqs" "$tmpdir"/cpus_rep > "$tmpdir"/cpus
# Merge IRQ and CPU lists.
paste "$tmpdir"/irqs "$tmpdir"/cpus 2>/dev/null | while read irq cpu
do
# Map interrupt to CPU.
echo TESTING $cpu \> /proc/irq/${irq}/smp_affinity_list
# Disable high-latency CPU C-states.
for f in "${sysfs_cpu}"/cpu${cpu}/cpuidle/state[2-9]/disable
do
test -f "$f" && echo TESTING 1 \> "$f"
done
done
rm -rf "$tmpdir"
# Warn if no C-state toggles are available (since 3a53396b in
# v3.4-rc1; per-cpu since dc7fd275 in v3.6-rc1).
test -f "${sysfs_cpu}"/cpu0/cpuidle/state0/disable ||
echo >&2 "WARNING: No C-state toggles available. Old kernel?"
}
# Simply re-enable all CPU C-states. We'll leave the interrupt mapping
# untouched, as it doesn't really have any ill effects on the system.
do_stop () {
for cpu in `avail_cpus`
do
for f in "${sysfs_cpu}"/cpu${cpu}/cpuidle/state[0-9]/disable
do
test -f "$f" && echo TESTING 0 \> "$f"
done
done
# Explicit zero return in case of test(1) error.
return 0
}
# Verify that we have a valid interface. Fall back to using
# the default route interface.
test -n "$iface" && test -d "${sysfs_net}"/${iface} ||
iface=`route -n|grep "^0\.0\.0\.0"|awk '{print $NF}'`
case "$1" in
start|"")
do_start
;;
stop)
do_stop
;;
*)
echo >&2 "Usage: $0 [start|stop]"
exit 1
;;
esac
@cosimo
Copy link

cosimo commented Feb 22, 2017

The TESTING and \> are intended as a precautionary measure to avoid accidentally a production box, right?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment