Skip to content

Instantly share code, notes, and snippets.

@deardooley
Last active April 25, 2019 08:39
Show Gist options
  • Save deardooley/712adf3b3fe3ca435d170173132bde9c to your computer and use it in GitHub Desktop.
Save deardooley/712adf3b3fe3ca435d170173132bde9c to your computer and use it in GitHub Desktop.
Shell script to generate a directory tree of randomized test data.
#!/usr/bin/env bash
# http://stackoverflow.com/questions/13400312/linux-create-random-directory-file-hierarchy
set -o errexit #abort if any command fails
usage() {
echo -n "$(basename $0) [OPTIONS]
This script creates a directory tree of random depth, width and file size. All parameters are
configurable.
Options:
-h, --help Show this help information.
-o, --outdir <PATH> Directory where the test data will be generated. Default: ./rndpath
-e, --file_extension File extension to apply to files. Default: .bin
-u, --uniform_content Reuse file content every time. If present, file content will be
randomly generated for each file. Otherwise, a single file will
created and the contents copied to every other file. Default: false
-d, --depth <int> Depth of the generated tree. Default: 8
-w, --width <int> Max width of first level directories
-c, --min_dirs <int> The min directories to be created in any folder. Default: 1
-C, --max_dirs <int> The max directories to be created in any folder. Default: 4
--min_dirname_length <int> The min length of a directory name. Default 3
--max_dirname_length <int> The max length of a directory name. Default 24
-f, --min_files <int> The min files to be created in any folder. Default: 1
-F, --max_files <int> The max files to be created in any folder. Default: 4
--min_filename_length <int> The min length of a file name. Default 3
--max_filename_length <int> The max length of a file name. Default 24
-s, --min_size <int> The min file size in bytes. Default: 2048
-S, --max_size <int> The max file size in bytes. Default: 2097152
-U, --utf8 Only generate UTF8 file and folder names. Default: 1
-i, --interactive Interactive mode. Prompt for approval. Default: false
-v, --verbose Verbose output
"
}
parse_args() {
# Decimal ASCII codes (see man ascii); added space
AARR=( 32 {48..57} {65..90} {97..122} )
# Array count
aarrcount=${#AARR[@]}
ASCIIONLY=1
# Parse arg flags
# If something is exposed as an environment variable, set/overwrite it
# here. Otherwise, set/overwrite the internal variable instead.
while : ; do
if [[ $1 = "-h" || $1 = "--help" ]]; then
usage >&2;
safe_exit
elif [[ $1 = "-v" || $1 = "--verbose" ]]; then
VERBOSE=1
shift
elif [[ $1 = "-u" || $1 = "--uniform_content" ]]; then
RANDOM_CONTENT=1
shift
elif [[ $1 = "-U" || $1 = "--utf8" ]]; then
ASCIIONLY=0
shift
elif [[ $1 = "-i" || $1 = "--interactive" ]]; then
INTERACTIVE=1
shift
elif [[ ( $1 = "-e" || $1 = "--file_extension" ) && -n $2 ]]; then
FILEEXT=$2
shift 2
elif [[ ( $1 = "-o" || $1 = "--outdir" ) && -n $2 ]]; then
OUTDIR=$2
shift 2
elif [[ ( $1 = "-d" || $1 = "--depth" ) && -n $2 ]]; then
DIRDEPTH=$2
shift 2
elif [[ ( $1 = "-w" || $1 = "--width" ) && -n $2 ]]; then
MAXFIRSTLEVELDIRS=$2
shift 2
elif [[ ( $1 = "-c" || $1 = "--min_dirs" ) && -n $2 ]]; then
MINDIRCHILDREN=$2
shift 2
elif [[ ( $1 = "-C" || $1 = "--max_dirs" ) && -n $2 ]]; then
MAXDIRCHILDREN=$2
shift 2
elif [[ ( $1 = "--min_dirname_length" ) && -n $2 ]]; then
MINDIRNAMELEN=$2
shift 2
elif [[ ( $1 = "--max_dirname_length" ) && -n $2 ]]; then
MAXDIRNAMELEN=$2
shift 2
elif [[ ( $1 = "-f" || $1 = "--min_files" ) && -n $2 ]]; then
MINFILECHILDREN=$2
shift 2
elif [[ ( $1 = "-F" || $1 = "--max_files" ) && -n $2 ]]; then
MAXFILECHILDREN=$2
shift 2
elif [[ ( $1 = "--min_filename_length" ) && -n $2 ]]; then
MINFILENAMELEN=$2
shift 2
elif [[ ( $1 = "--max_filename_length" ) && -n $2 ]]; then
MAXFILENAMELEN=$2
shift 2
elif [[ ( $1 = "-f" || $1 = "--min_size" ) && -n $2 ]]; then
MINFILESIZE=$2
shift 2
elif [[ ( $1 = "-F" || $1 = "--max_size" ) && -n $2 ]]; then
MAXFILESIZE=$2
shift 2
else
break
fi
done
# Set internal option vars from the environment and arg flags. All internal
# vars should be declared here, with sane defaults if applicable.
FILEEXT=${FILEEXT:-".bin"}
OUTDIR=${OUTDIR:-"./rndpath"}
DIRDEPTH=${DIRDEPTH:-8}
MAXFIRSTLEVELDIRS=${MAXFIRSTLEVELDIRS:-6}
MINDIRCHILDREN=${MINDIRCHILDREN:-1}
MAXDIRCHILDREN=${MAXDIRCHILDREN:-8}
MINDIRNAMELEN=${MINDIRNAMELEN:-3}
MAXDIRNAMELEN=${MAXDIRNAMELEN:-24}
MINFILECHILDREN=${MINFILECHILDREN:-1}
MAXFILECHILDREN=${MAXFILECHILDREN:-4}
MINFILENAMELEN=${MINFILENAMELEN:-3}
MAXFILENAMELEN=${MAXFILENAMELEN:-20}
MINFILESIZE=${MINFILESIZE:-2048}
MAXFILESIZE=${MAXFILESIZE:-2000000000}
}
out() {
echo $@
}
die() {
out "$@"; exit 1;
} >&2
err() {
out "$@"
} >&2
# A non-destructive exit for when the script exits naturally.
safe_exit() {
trap - INT TERM EXIT
exit
}
get_rand_dirname() {
if [ "$ASCIIONLY" == "1" ]; then
for ((i=0; i<$((MINDIRNAMELEN+RANDOM%MAXDIRNAMELEN)); i++)) {
printf \\$(printf '%03o' ${AARR[RANDOM%aarrcount]});
}
else
cat /dev/urandom | tr -dc '[ -~]' | tr -d '[$></~:`\\]' | head -c$((MINDIRNAMELEN + RANDOM % MAXDIRNAMELEN)) | sed 's/\(["]\)/\\\1/g'
fi
#echo -e " " # debug last dirname space
}
get_rand_filename() {
if [ "$ASCIIONLY" == "1" ]; then
for ((i=0; i<$((MINFILENAMELEN+RANDOM%MAXFILENAMELEN)); i++)) {
printf \\$(printf '%03o' ${AARR[RANDOM%aarrcount]});
}
else
# no need to escape double quotes for filename
cat /dev/urandom | tr -dc '[ -~]' | tr -d '[$></~:`\\]' | head -c$((MINFILENAMELEN + RANDOM % MAXFILENAMELEN)) #| sed 's/\(["]\)/\\\1/g'
fi
printf "%s" $FILEEXT
}
main() {
parse_args "$@"
echo "Creating random tree at: $OUTDIR"
[ "$VERBOSE" == "1" ] && echo -e "$(basename $0) \\
--width=$MAXFIRSTLEVELDIRS \\
--utf8=$ASCIIONLY \\
--interactive=$INTERACTIVE \\
--verbose=$VERBOSE \\
--file_extension=$FILEEXT \\
--outdir=$OUTDIR \\
--depth=$DIRDEPTH \\
--width=$MAXFIRSTLEVELDIRS \\
--min_dirs=$MINDIRCHILDREN \\
--max_dirs=$MAXDIRCHILDREN \\
--min_dirname_length=$MINDIRNAMELEN \\
--max_dirname_length=$MAXDIRNAMELEN \\
--min_files=$MINFILECHILDREN \\
--max_files=$MAXFILECHILDREN \\
--min_filename_length=$MINFILENAMELEN \\
--max_filename_length=$MAXFILENAMELEN \\
--min_size=$MINFILESIZE \\
--max_size=$MAXFILESIZE "
if (( INTERACTIVE )); then
read -p "Proceed (y/n)? " READANS
if [ "$READANS" != "y" ]; then
die "Aborting at user request"
fi
fi
if [ -d "$OUTDIR" ]; then
[ "$VERBOSE" == "1" ] && echo "Removing old outdir $OUTDIR"
rm -rf "$OUTDIR"
fi
mkdir -p "$OUTDIR"
if [ $MAXFIRSTLEVELDIRS -gt 0 ]; then
NUMFIRSTLEVELDIRS=$((1+RANDOM%MAXFIRSTLEVELDIRS))
else
NUMFIRSTLEVELDIRS=0
fi
# create directories
for (( ifl=0;ifl<$((NUMFIRSTLEVELDIRS));ifl++ )) {
FLDIR="$(get_rand_dirname)"
FLCHILDREN="";
for (( ird=0;ird<$((DIRDEPTH-1));ird++ )) {
DIRCHILDREN=""; MOREDC=0;
for ((idc=0; idc<$((MINDIRCHILDREN+RANDOM%MAXDIRCHILDREN)); idc++)) {
CDIR="$(get_rand_dirname)" ;
# make sure comma is last, so brace expansion works even for 1 element? that can mess with expansion math, though
if [ "$DIRCHILDREN" == "" ]; then DIRCHILDREN="\"$CDIR\"" ;
else DIRCHILDREN="$DIRCHILDREN,\"$CDIR\"" ; MOREDC=1 ; fi
}
if [ "$MOREDC" == "1" ] ; then
if [ "$FLCHILDREN" == "" ]; then FLCHILDREN="{$DIRCHILDREN}" ;
else FLCHILDREN="$FLCHILDREN/{$DIRCHILDREN}" ; fi
else
if [ "$FLCHILDREN" == "" ]; then FLCHILDREN="$DIRCHILDREN" ;
else FLCHILDREN="$FLCHILDREN/$DIRCHILDREN" ; fi
fi
}
DIRCMD="mkdir -p $OUTDIR/\"$FLDIR\"/$FLCHILDREN"
eval "$DIRCMD"
[ "$VERBOSE" == "1" ] && echo "$DIRCMD"
}
# now loop through all directories, create random files inside
# note printf '%q' escapes to preserve spaces; also here
# escape, and don't wrap path parts in double quotes (e.g. | sed 's_/_"/"_g');
# note then we STILL have to eval to use it!
# but now ls "$D" works, so noneed for QD
# unfortunately backslashes can make '%q' barf - prevent them
find "$OUTDIR" -type d | while IFS= read D ; do
QD="$(printf '%q' "$(echo "$D")" )" ;
[ "$VERBOSE" == "1" ] && echo "$D"; #echo "$QD"; ls -la "$D"; #eval "ls -la $QD";
for ((ifc=0; ifc<$((MINFILECHILDREN+RANDOM%MAXFILECHILDREN)); ifc++)) {
CFILE="$(get_rand_filename)" ;
[ "$VERBOSE" == "1" ] && echo -n '> '
[ "$VERBOSE" == "1" ] && echo "$D"/"$CFILE"
cat /dev/urandom \
| head -c$((MINFILESIZE + RANDOM % MAXFILESIZE)) \
> "$D"/"$CFILE"
}
done
echo
tree -a --dirsfirst -s "$OUTDIR"
[ "$VERBOSE" == "1" ] && echo "total bytes: $(du -bs $(echo "$OUTDIR"))"
}
# set -x
main $@
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment