Skip to content

Instantly share code, notes, and snippets.

@kwk
Created March 30, 2023 21:17
Show Gist options
  • Save kwk/2691397e8ee15b4787c1298f89fd3a72 to your computer and use it in GitHub Desktop.
Save kwk/2691397e8ee15b4787c1298f89fd3a72 to your computer and use it in GitHub Desktop.
llvm-profdata merge in the background
#!/bin/bash
# This script is supposed to show the usage of inotifywait when you want process
# files in a directory in batches once the files have been created.
#
#
# Test this script with:
#
# $ this-script.sh
# $ for i in a b c ; do echo $i > /tmp/$i.profraw; done \
# && sleep 2 \
# && for i in d e f g; do echo $i > /tmp/$i.profraw; done \
# && sleep 2 \
# && echo "" > /tmp/shutdown
#
# You should see an output like this:
#
# Processing batch (size: 3) in 5 seconds:
# a.profraw
# b.profraw
# c.profraw
# DONE
# Processing batch (size: 4) in 5 seconds:
# d.profraw
# e.profraw
# f.profraw
# g.profraw
# DONE
#
# Notice that the first batch has the minimal size (3) and that the second batch
# has size (4) because the /tmp/g.profraw file is created while the main script
# is still processing the first batch. This proofs that queing up events like
# this does work.
set -x
function show_usage()
{
cat <<EOF
Usage:
$0 \\
-d <observe_dir>
-r <files_regex>
-s <min_batch_size>
-b <batch_file>
-u <batch_file_in_process>
-p <pid_file>
-f <target_merge_file>
-l <log_file>
-h show_usage;;
EOF
exit 0
}
while getopts "d:r:s:b:u:p:f:l:h" flag; do
case "${flag}"
in
d) observe_dir=${OPTARG};;
r) files_regex=${OPTARG};;
s) min_batch_size=${OPTARG};;
b) batch_file=${OPTARG};;
u) batch_file_in_process=${OPTARG};;
p) pid_file=${OPTARG};;
f) target_merge_file=${OPTARG};;
l) log_file=${OPTARG};;
h) show_usage;;
esac
done
# Handle defaults
# Directory in which raw PGO profiles are stored
# NOTE: Normally PGO raw profiles are stored in the location where the
# instrumented binary is invoked. We make the assumption that all profiles are
# stored in the same directory.
# See %t here:
# https://clang.llvm.org/docs/SourceBasedCodeCoverage.html#running-the-instrumented-program
observe_dir=${observe_dir:-$PWD}
# Regex for the files to look out for
files_regex=${file_regex:-'.*\.profraw$'}
# Number of files that have to exist before we're processing them.
min_batch_size=${min_batch_size:-10}
# This file acts as to queue up file paths that we want to work on.
batch_file=${batch_file:-$observe_dir/background-merge.batch.txt}
# Once the number of lines in the batch_file reach the min_batch_size we move
# the content of batch_file over to this file and then processing can happen
# while batch_file can collect more files.
batch_file_in_process=${batch_file_in_process:-/tmp/background-merge.batch_in_process.txt}
# File to store the PID of this process. Once this file is deleted, the outer script will
pid_file=${pid_file:-/tmp/background-merge.pid}
target_merge_file=${target_merge_file:--/tmp/background-merge.target}
log_file=${log_file:-/tmp/background-merge.log}
# Once there's a write event to this file, the program exits gracefully.
shutdown_file=${shutdown_file:-$observe_dir/background-merge.shutdown}
function show_config()
{
cat <<EOF
PGO Background merge starting with this config:
observe_dir = $observe_dir
files_regex = $files_regex
min_batch_size = $min_batch_size
batch_file = $batch_file
batch_file_in_process = $batch_file_in_process
pid_file = $pid_file
target_merge_file = $target_merge_file
log_file = $log_file
shutdown_file = $shutdown_file
EOF
}
trap 'echo "" > $shutdown_file' SIGTERM
# Empty batch_file (if exists) or create batch file.
function empty_batch_file()
{
truncate -s 0 $batch_file
}
empty_batch_file
# tag::process_batch[]
function process_batch()
{
# tag::merge[]
# llvm-profdata itself is instrumented as well so we need to
# tell it where to write its own profile data.
# TODO(kwk): Eventually use this in the final merge?
export TMPDIR=/tmp
export LLVM_PROFILE_FILE="%t/llvm-profdata.tmp"
pushd $observe_dir
if [ -e $target_merge_file ]; then
llvm-profdata merge \
--compress-all-sections \
--sparse \
$target_merge_file \
$(cat $batch_file_in_process) \
-o $target_merge_file
else
llvm-profdata merge \
--compress-all-sections \
--sparse \
$(cat $batch_file_in_process) \
-o $target_merge_file
fi
popd
# IMPORTANT: Free up disk space!
rm -f $TMPDIR/llvm-profdata.tmp
# end::merge[]
}
# end::process_batch[]
function main()
{
# On every *.profraw file written to in the /tmp directory,
# write an event line to list of files to process in a batch.
nohup inotifywait -q -m -o $batch_file -e close_write \
--format '%f' \
--include $files_regex \
$observe_dir > /dev/null 2>&1 &
background_monitor_pid=$!
trap 'kill -s KILL $background_monitor_pid' EXIT
# Observe if a new profile was added to the list of the current batch.
# If the shutdown file was modified, gracefully shutdown.
inotifywait -q -m -e modify \
--include "($(basename $batch_file)|$(basename $shutdown_file))" \
$observe_dir \
| while read -r directory event filename
do
if [ "$filename" = "$(basename $shutdown_file)" ]; then
echo "Exiting gracefully..."
rm -f $pid_file
exit 0
fi
batch_size=$(wc -l < $batch_file)
if [ $batch_size -le 0 ]; then
# This event happens when we empty the batch file
continue
fi
if [ $batch_size -lt $min_batch_size ]; then
echo "Batch is still too small: $batch_size must be at least $min_batch_size"
continue
fi
cat $batch_file > $batch_file_in_process
empty_batch_file
echo "Processing batch (size: $batch_size) in 5 seconds: "
cat $batch_file_in_process
process_batch
# IMPORTANT: Free up disk space!
pushd $observe_dir
rm -fv $(cat $batch_file_in_process)
popd
done
}
# tag::setup[]
function setup() {
# Handle if PID file exists and whether process is still running or not.
if [ -e $pid_file ]; then
echo "ERROR: PID file already in use: $pid_file"
echo "Once you're done, kill job with: kill -s TERM \$(cat $pid_file)"
exit 0
fi
# Save this PID to a file
echo $$ > $pid_file
# Create log backup
if [ -e $log_file ]; then
echo "Backing up existing log file: $log_file"
cp -bv $log_file $log_file.bak
fi
}
# end::setup[]
show_config
setup
# Upon a SIGTERM event we modify the shutdown_file to gracefully exit
trap "echo '' > $shutdown_file" SIGTERM
main > $log_file 2>&1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment