Last active
April 19, 2021 14:07
-
-
Save smarterclayton/03b50c8f9b6351b2d9903d7fb35b342f to your computer and use it in GitHub Desktop.
Detect CI infrastructure symptoms
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
set -exuo pipefail | |
# This scripts searches the directories passed as arguments for known failure causes from a set of symptom inputs. | |
# This is currently experimental and subject to change. | |
function xmlescape() { | |
echo -n "$1" | sed 's/&/\&/g; s/</\</g; s/>/\>/g; s/"/\"/g; s/'"'"'/\'/g' | |
} | |
tests=$( mktemp -t result-XXXX ) | |
input=$( mktemp -t search-XXXX ) | |
# Hardcoded list of detection for now. In the future this would be generated elsewhere. | |
cat <<EOF > ${input} | |
==Undiagnosed panic detected in pod=pods/*=Observed a panic | |
==Undiagnosed panic detected in journal=nodes/*/journal*=Observed a panic | |
=segfault=Bug 1812261: iptables is segfaulting=nodes/*/journal*=kernel: .+: segfault .+ libnftnl | |
segfault==Node process segfaulted=nodes/*/journal*=kernel: .+: segfault | |
==Infrastructure - AWS simulate policy rate-limit=installer/.openshift_install.log*=simulating policy: Throttling: Rate exceeded | |
==Infrastructure - GCP quota exceeded (route to forum-gcp)=installer/.openshift_install.log*=Error waiting for Creating Subnetwork: Quota 'SUBNETWORKS' exceeded | |
EOF | |
searches=0 | |
failures=0 | |
for i in "$@"; do | |
cd "${i}" | |
declare -A covered | |
while IFS= read -r line; do | |
searches=$((searches+1)) | |
id=$( echo -n "${line}" | cut -f 1 -d = ) | |
covers=$( echo -n "${line}" | cut -f 2 -d = ) | |
if [[ -n "${id}" && -n "${covered[${id}]-}" ]]; then | |
continue | |
fi | |
prefix=$( echo -n "${line}" | cut -f 3 -d = ) | |
files=$( echo -n "${line}" | cut -f 4 -d = ) | |
search=$( echo -n "${line}" | cut -f 5- -d = ) | |
out=$( zgrep -E "${search}" ${files} || true ) # ignore failures but log them to stderr | |
if [[ -z "${out}" ]]; then | |
echo "<testcase name=\"$( xmlescape "${prefix}" )\"></testcase>" >> "${tests}" | |
continue | |
fi | |
echo Detected: "${prefix}" 1>&2 | |
failures=$((failures+1)) | |
if [[ -n "${covers}" ]]; then | |
covered[${covers}]="1" | |
fi | |
echo "<testcase name=\"$( xmlescape "${prefix}" )\"><failure>$( xmlescape "${out}" )</failure></testcase>" >> "${tests}" | |
done < "${input}" | |
done | |
cat <<EOF | |
<testsuite name="Symptom Detection" tests="${searches}" errors="0" failures="${failures}" skipped="0" time="0" timestamp="0001-01-01T00:00:00Z" package="symptom"> | |
$( cat ${tests} ) | |
</testsuite> | |
EOF |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment