Created
April 25, 2019 03:27
-
-
Save larryv/26c4e8e0ada3e25f072f6e1346c934ed to your computer and use it in GitHub Desktop.
A script I used once to attempt to benchmark different methods of emulating `rm -fR` with find(1).
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
readonly DIR=/tmp/traversal-test-dir | |
readonly MAX_DIR_ENTRIES=32 | |
# Given a positive integer N, print the whitespace-delimited sequence | |
# 0 1 2 ... N-2 N-1 to standard output. Each element is zero-padded to | |
# the same number of digits as N-1. If a second argument is provided, it | |
# is prefixed to each number in the sequence. | |
padded_countup() { | |
awk -v count="$1" -v prefix="$2" ' | |
BEGIN { | |
for (i = 0; i < count; ++i) { | |
printf prefix "%0" length(count - 1) "d\n", i; | |
} | |
} | |
' | |
} | |
# Given a path to a directory PATH, a string DIR, and a string FILE: | |
# 1. Read a line of two positive integers M and N from standard input. | |
# 2. Create directories PATH/DIR[0], PATH/DIR[1], ..., PATH/DIR[M-1]. | |
# 3. Create empty files PATH/FILE[0], PATH/FILE[1], ..., PATH/FILE[N-1]. | |
# The sequence numbers are zero-padded to the same number of digits as | |
# M-1 and N-1. | |
populate() { | |
read -r dirs files | |
padded_countup "$dirs" "$1"/"$2" | xargs mkdir | |
padded_countup "$files" "$1"/"$3" | xargs touch | |
} | |
# Given the path to a directory, delete its contents with one call to | |
# find(1), then print timing statistics to standard error. The directory | |
# itself is not deleted. | |
purge1() { | |
time -p sh -c ' | |
find "$1" -depth ! -path "$1" \ | |
\( -type d -exec rmdir {} + -o -exec rm -f {} \; \) | |
' _ "$1" | |
} | |
# Given the path to a directory, delete its contents with two calls to | |
# find(1), then print timing statistics to standard error. The directory | |
# itself is not deleted. | |
purge2() { | |
time -p sh -c ' | |
find "$1" ! -type d ! -path "$1" -exec rm -f {} + \ | |
&& find "$1" -depth ! -path "$1" -exec rmdir {} + | |
' _ "$1" | |
} | |
main() { | |
# Reset testing directory and prevent Spotlight from indexing it. | |
rm -fR "$DIR" && mkdir -p "$DIR"/foo || return | |
touch "$DIR"/.metadata_never_index | |
# Create four-level directory hierarchy. Use awk(1) to generate the | |
# stream of random-ish numbers consumed by populate(). | |
readonly d0="$DIR"/foo | |
awk -v max="$MAX_DIR_ENTRIES" ' | |
BEGIN { | |
srand(); | |
while (1) { | |
total = int((max + 1) * rand()); | |
dirs = int((total + 1) * rand()); | |
files = total - dirs; | |
printf "%d %d\n", dirs, files; | |
} | |
} | |
' | { | |
populate "$d0" dir file | |
for d1 in "$d0"/dir*; do | |
[ -d "$d1" ] && populate "$d1" dir file || continue | |
for d2 in "$d1"/dir*; do | |
[ -d "$d2" ] && populate "$d2" dir file || continue | |
for d3 in "$d2"/dir*; do | |
[ -d "$d3" ] && populate "$d3" dir file || continue | |
done | |
done | |
done | |
} | |
# Duplicate the directory hierarchy. | |
cp -pR "$DIR"/foo "$DIR"/bar | |
# Since awk(1)'s srand() uses the system time as its default seed, | |
# pause to ensure the system time advances. Attempt to mitigate | |
# caching effects by randomly choosing the ordering of purge method | |
# and target directory. | |
sleep 1 | |
awk 'BEGIN { srand(); exit int(4 * rand()); }' | |
case $? in | |
0) | |
f1=$(purge1 "$DIR"/foo 2>&1) | |
f2=$(purge2 "$DIR"/bar 2>&1) | |
;; | |
1) | |
f1=$(purge1 "$DIR"/bar 2>&1) | |
f2=$(purge2 "$DIR"/foo 2>&1) | |
;; | |
2) | |
f2=$(purge2 "$DIR"/foo 2>&1) | |
f1=$(purge1 "$DIR"/bar 2>&1) | |
;; | |
3) | |
f2=$(purge2 "$DIR"/bar 2>&1) | |
f1=$(purge1 "$DIR"/foo 2>&1) | |
;; | |
esac | |
printf '%s\n\n%s\n' "$f1" "$f2" | |
} | |
main "$@" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment