Skip to content

Instantly share code, notes, and snippets.

@kwk
Last active March 30, 2023 19:03
Show Gist options
  • Save kwk/ac5a01ef9445b8a8f7a89a0f734e282f to your computer and use it in GitHub Desktop.
Save kwk/ac5a01ef9445b8a8f7a89a0f734e282f to your computer and use it in GitHub Desktop.
Inotifywait for doing batch file processing
#!/bin/bash
# This script is supposed to show the usage of inotifywait when you want process
# files in a directory in batches once the files have been created.
#
#
# Test this script with:
#
# $ this-script.sh
# $ for i in a b c ; do echo $i > /tmp/$i.profraw; done \
# && sleep 2 \
# && for i in d e f g; do echo $i > /tmp/$i.profraw; done \
# && sleep 2 \
# && echo "" > /tmp/shutdown
#
# You should see an output like this:
#
# Processing batch (size: 3) in 5 seconds:
# a.profraw
# b.profraw
# c.profraw
# DONE
# Processing batch (size: 4) in 5 seconds:
# d.profraw
# e.profraw
# f.profraw
# g.profraw
# DONE
#
# Notice that the first batch has the minimal size (3) and that the second batch
# has size (4) because the /tmp/g.profraw file is created while the main script
# is still processing the first batch. This proofs that queing up events like
# this does work.
# The directory in which to search for files
directory=/tmp
# Regex for the files to look out for
files_regex='.*\.profraw$'
# Number of files that have to exist before we're processing them.
min_batch_size=3
# This file acts as to queue up file paths that we want to work on.
batch_file=/tmp/batch.txt
# Once the number of lines in the batch_file reach the min_batch_size we move
# the content of batch_file over to this file and then processing can happen
# while batch_file can collect more files.
batch_file_in_process=/tmp/batch_in_process.txt
# Once there's a write event to this file, the program exits gracefully.
shutdown_file=/tmp/shutdown
# Empty batch_file (if exists) or create batch file.
function empty_batch_file() {
truncate -s 0 $batch_file
}
empty_batch_file
# On every *.profraw file written to in the /tmp directory,
# write an event line to list of files to process in a batch.
nohup inotifywait -q -m -o $batch_file -e close_write \
--format '%f' \
--include $files_regex \
$directory > /dev/null 2>&1 &
background_monitor_pid=$!
trap 'kill -s KILL $background_monitor_pid' EXIT
# Observe if a new profile was added to the list of the current batch.
# If the shutdown file was modified, gracefully shutdown.
inotifywait -q -m -e modify \
--include "($(basename $batch_file)|$(basename $shutdown_file))" \
$directory \
| while read -r directory event filename
do
if [ "$filename" = "$(basename $shutdown_file)" ]; then
echo "Exiting gracefully..."
exit 0
fi
batch_size=$(wc -l < $batch_file)
if [ $batch_size -le 0 ]; then
# This event happens when we empty the batch file
continue
fi
if [ $batch_size -lt $min_batch_size ]; then
echo "Batch is still too small: $batch_size must be at least $min_batch_size"
continue
fi
cat $batch_file > $batch_file_in_process
empty_batch_file
echo "Processing batch (size: $batch_size) in 5 seconds: "
cat $batch_file_in_process
sleep 5 # <----- Do your actual work here
echo "DONE"
rm -fv $(cat $batch_file_in_process)
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment