Last active
March 30, 2023 19:03
-
-
Save kwk/ac5a01ef9445b8a8f7a89a0f734e282f to your computer and use it in GitHub Desktop.
Inotifywait for doing batch file processing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# This script is supposed to show the usage of inotifywait when you want process | |
# files in a directory in batches once the files have been created. | |
# | |
# | |
# Test this script with: | |
# | |
# $ this-script.sh | |
# $ for i in a b c ; do echo $i > /tmp/$i.profraw; done \ | |
# && sleep 2 \ | |
# && for i in d e f g; do echo $i > /tmp/$i.profraw; done \ | |
# && sleep 2 \ | |
# && echo "" > /tmp/shutdown | |
# | |
# You should see an output like this: | |
# | |
# Processing batch (size: 3) in 5 seconds: | |
# a.profraw | |
# b.profraw | |
# c.profraw | |
# DONE | |
# Processing batch (size: 4) in 5 seconds: | |
# d.profraw | |
# e.profraw | |
# f.profraw | |
# g.profraw | |
# DONE | |
# | |
# Notice that the first batch has the minimal size (3) and that the second batch | |
# has size (4) because the /tmp/g.profraw file is created while the main script | |
# is still processing the first batch. This proofs that queing up events like | |
# this does work. | |
# The directory in which to search for files | |
directory=/tmp | |
# Regex for the files to look out for | |
files_regex='.*\.profraw$' | |
# Number of files that have to exist before we're processing them. | |
min_batch_size=3 | |
# This file acts as to queue up file paths that we want to work on. | |
batch_file=/tmp/batch.txt | |
# Once the number of lines in the batch_file reach the min_batch_size we move | |
# the content of batch_file over to this file and then processing can happen | |
# while batch_file can collect more files. | |
batch_file_in_process=/tmp/batch_in_process.txt | |
# Once there's a write event to this file, the program exits gracefully. | |
shutdown_file=/tmp/shutdown | |
# Empty batch_file (if exists) or create batch file. | |
function empty_batch_file() { | |
truncate -s 0 $batch_file | |
} | |
empty_batch_file | |
# On every *.profraw file written to in the /tmp directory, | |
# write an event line to list of files to process in a batch. | |
nohup inotifywait -q -m -o $batch_file -e close_write \ | |
--format '%f' \ | |
--include $files_regex \ | |
$directory > /dev/null 2>&1 & | |
background_monitor_pid=$! | |
trap 'kill -s KILL $background_monitor_pid' EXIT | |
# Observe if a new profile was added to the list of the current batch. | |
# If the shutdown file was modified, gracefully shutdown. | |
inotifywait -q -m -e modify \ | |
--include "($(basename $batch_file)|$(basename $shutdown_file))" \ | |
$directory \ | |
| while read -r directory event filename | |
do | |
if [ "$filename" = "$(basename $shutdown_file)" ]; then | |
echo "Exiting gracefully..." | |
exit 0 | |
fi | |
batch_size=$(wc -l < $batch_file) | |
if [ $batch_size -le 0 ]; then | |
# This event happens when we empty the batch file | |
continue | |
fi | |
if [ $batch_size -lt $min_batch_size ]; then | |
echo "Batch is still too small: $batch_size must be at least $min_batch_size" | |
continue | |
fi | |
cat $batch_file > $batch_file_in_process | |
empty_batch_file | |
echo "Processing batch (size: $batch_size) in 5 seconds: " | |
cat $batch_file_in_process | |
sleep 5 # <----- Do your actual work here | |
echo "DONE" | |
rm -fv $(cat $batch_file_in_process) | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment