Created
January 11, 2017 15:52
-
-
Save sveniu/2dc755627f0290923ae4a309531a52a4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# Enumerate all interrupts used by either the default or a specified | |
# network interface, and map them to separate CPUs. Each CPU that gets | |
# assigned interrupts will also have its C-states deeper than C1 | |
# disabled to reduce the probability of rx queue overflow. | |
# | |
# TODO: NUMA interrupt scheduling. | |
# | |
# Space-separated list of interfaces. | |
iface="$IFACE" | |
# Space-separated list of CPUs that should not be assigned interrupts. | |
cpu_blacklist="${CPU_BLACKLIST:-0}" | |
# Space-separated list of CPUs that should be assigned interrupts. | |
# Defaults to [0, 1024]. Blacklisted or offline CPUs will not be | |
# assigned any interrupts. | |
cpu_whitelist="${CPU_WHITELIST:-`seq -s ' ' 0 1024`}" | |
# Convenience vars. | |
sysfs_net="/sys/class/net" | |
sysfs_cpu="/sys/devices/system/cpu" | |
# Bail out if we're missing essential sysfs directories. | |
for dir in "$sysfs_net" "$sysfs_cpu" | |
do | |
test -d "$dir" || { | |
echo >&2 "ERROR: $dir not found." | |
exit 1 | |
} | |
done | |
# Return a list of interrupts for a given network interface. | |
iface_irqs () { | |
iface_irqs="" | |
test -d "${sysfs_net}"/${1}/device/msi_irqs && { | |
for f in "${sysfs_net}"/${1}/device/msi_irqs/* | |
do | |
iface_irqs="$iface_irqs `basename $f`" | |
done | |
echo "$iface_irqs" | |
return | |
} | |
# Fall back to naive parsing of /proc/interrupts. | |
grep "\<${1}\>" /proc/interrupts | cut -d: -f1 | tr '\n' ' ' | |
} | |
# Return a list of all interface interrupts. | |
avail_irqs () { | |
avail_irqs="" | |
# Enumerate bonding slaves, if any. | |
test -f "${sysfs_net}"/${iface}/bonding/slaves && | |
iface=`cat "${sysfs_net}"/${iface}/bonding/slaves` | |
for i in $iface | |
do | |
avail_irqs="$avail_irqs `iface_irqs $i`" | |
done | |
echo "$avail_irqs" | |
} | |
# Return 1 if CPU is blacklisted. | |
bl_cpu () { | |
test -n "$cpu_blacklist" && { | |
for bl_cpu in $cpu_blacklist | |
do | |
test "$1" -eq "$bl_cpu" 2>/dev/null && return 1 | |
done | |
} | |
# Explicit zero return in case of test(1) error. | |
return 0 | |
} | |
# Return a list of available CPUs. | |
avail_cpus () { | |
avail_cpus="" | |
# Iterate over all CPUs. | |
for cpu in $cpu_whitelist | |
do | |
# Break if no more CPUs. | |
test -d "${sysfs_cpu}"/cpu${cpu} || break | |
# Check if CPU > 0 is online. | |
online=1 | |
test "$cpu" -gt 0 && { | |
online=`cat "${sysfs_cpu}"/cpu${cpu}/online` | |
} | |
test "$online" -gt 0 || { | |
echo >&2 "WARNING: Skipping offline CPU $cpu." | |
continue | |
} | |
# Skip CPUs in blacklist. | |
bl_cpu "$cpu" || { | |
echo >&2 "WARNING: Skipping blacklisted CPU $cpu." | |
continue | |
} | |
# Add CPU to available list. | |
avail_cpus="$avail_cpus $cpu" | |
done | |
echo "$avail_cpus" | |
} | |
# Summary: Write two files into the temporary directory: One | |
# containing IRQs, and one containing CPUs. Repeat the CPU list as | |
# many times as needed to ensure all IRQs are handled, trim the list | |
# to exactly match the number of IRQs, and then merge them together. | |
do_start () { | |
tmpdir=`mktemp -d` | |
# Populate the CPU list. | |
ncpus="0" | |
for cpu in `avail_cpus` | |
do | |
echo "$cpu" >> "$tmpdir"/cpus_init | |
ncpus=$(( $ncpus + 1 )) | |
done | |
# Populate the IRQ list. | |
nirqs="0" | |
for irq in `avail_irqs` | |
do | |
echo "$irq" >> "$tmpdir"/irqs | |
nirqs=$(( $nirqs + 1 )) | |
done | |
# Number of times to repeat the CPU list. | |
cpu_reps=$(( $nirqs / $ncpus + 1 )) | |
for i in `seq $cpu_reps` | |
do | |
cat "$tmpdir"/cpus_init >> "$tmpdir"/cpus_rep | |
done | |
# Trim the CPU list length to match the number of IRQs. | |
head -n "$nirqs" "$tmpdir"/cpus_rep > "$tmpdir"/cpus | |
# Merge IRQ and CPU lists. | |
paste "$tmpdir"/irqs "$tmpdir"/cpus 2>/dev/null | while read irq cpu | |
do | |
# Map interrupt to CPU. | |
echo TESTING $cpu \> /proc/irq/${irq}/smp_affinity_list | |
# Disable high-latency CPU C-states. | |
for f in "${sysfs_cpu}"/cpu${cpu}/cpuidle/state[2-9]/disable | |
do | |
test -f "$f" && echo TESTING 1 \> "$f" | |
done | |
done | |
rm -rf "$tmpdir" | |
# Warn if no C-state toggles are available (since 3a53396b in | |
# v3.4-rc1; per-cpu since dc7fd275 in v3.6-rc1). | |
test -f "${sysfs_cpu}"/cpu0/cpuidle/state0/disable || | |
echo >&2 "WARNING: No C-state toggles available. Old kernel?" | |
} | |
# Simply re-enable all CPU C-states. We'll leave the interrupt mapping | |
# untouched, as it doesn't really have any ill effects on the system. | |
do_stop () { | |
for cpu in `avail_cpus` | |
do | |
for f in "${sysfs_cpu}"/cpu${cpu}/cpuidle/state[0-9]/disable | |
do | |
test -f "$f" && echo TESTING 0 \> "$f" | |
done | |
done | |
# Explicit zero return in case of test(1) error. | |
return 0 | |
} | |
# Verify that we have a valid interface. Fall back to using | |
# the default route interface. | |
test -n "$iface" && test -d "${sysfs_net}"/${iface} || | |
iface=`route -n|grep "^0\.0\.0\.0"|awk '{print $NF}'` | |
case "$1" in | |
start|"") | |
do_start | |
;; | |
stop) | |
do_stop | |
;; | |
*) | |
echo >&2 "Usage: $0 [start|stop]" | |
exit 1 | |
;; | |
esac |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The
TESTING
and\>
are intended as a precautionary measure to avoid accidentally a production box, right?