Last active
April 25, 2019 08:39
-
-
Save deardooley/712adf3b3fe3ca435d170173132bde9c to your computer and use it in GitHub Desktop.
Shell script to generate a directory tree of randomized test data.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# http://stackoverflow.com/questions/13400312/linux-create-random-directory-file-hierarchy | |
set -o errexit #abort if any command fails | |
usage() { | |
echo -n "$(basename $0) [OPTIONS] | |
This script creates a directory tree of random depth, width and file size. All parameters are | |
configurable. | |
Options: | |
-h, --help Show this help information. | |
-o, --outdir <PATH> Directory where the test data will be generated. Default: ./rndpath | |
-e, --file_extension File extension to apply to files. Default: .bin | |
-u, --uniform_content Reuse file content every time. If present, file content will be | |
randomly generated for each file. Otherwise, a single file will | |
created and the contents copied to every other file. Default: false | |
-d, --depth <int> Depth of the generated tree. Default: 8 | |
-w, --width <int> Max width of first level directories | |
-c, --min_dirs <int> The min directories to be created in any folder. Default: 1 | |
-C, --max_dirs <int> The max directories to be created in any folder. Default: 4 | |
--min_dirname_length <int> The min length of a directory name. Default 3 | |
--max_dirname_length <int> The max length of a directory name. Default 24 | |
-f, --min_files <int> The min files to be created in any folder. Default: 1 | |
-F, --max_files <int> The max files to be created in any folder. Default: 4 | |
--min_filename_length <int> The min length of a file name. Default 3 | |
--max_filename_length <int> The max length of a file name. Default 24 | |
-s, --min_size <int> The min file size in bytes. Default: 2048 | |
-S, --max_size <int> The max file size in bytes. Default: 2097152 | |
-U, --utf8 Only generate UTF8 file and folder names. Default: 1 | |
-i, --interactive Interactive mode. Prompt for approval. Default: false | |
-v, --verbose Verbose output | |
" | |
} | |
parse_args() { | |
# Decimal ASCII codes (see man ascii); added space | |
AARR=( 32 {48..57} {65..90} {97..122} ) | |
# Array count | |
aarrcount=${#AARR[@]} | |
ASCIIONLY=1 | |
# Parse arg flags | |
# If something is exposed as an environment variable, set/overwrite it | |
# here. Otherwise, set/overwrite the internal variable instead. | |
while : ; do | |
if [[ $1 = "-h" || $1 = "--help" ]]; then | |
usage >&2; | |
safe_exit | |
elif [[ $1 = "-v" || $1 = "--verbose" ]]; then | |
VERBOSE=1 | |
shift | |
elif [[ $1 = "-u" || $1 = "--uniform_content" ]]; then | |
RANDOM_CONTENT=1 | |
shift | |
elif [[ $1 = "-U" || $1 = "--utf8" ]]; then | |
ASCIIONLY=0 | |
shift | |
elif [[ $1 = "-i" || $1 = "--interactive" ]]; then | |
INTERACTIVE=1 | |
shift | |
elif [[ ( $1 = "-e" || $1 = "--file_extension" ) && -n $2 ]]; then | |
FILEEXT=$2 | |
shift 2 | |
elif [[ ( $1 = "-o" || $1 = "--outdir" ) && -n $2 ]]; then | |
OUTDIR=$2 | |
shift 2 | |
elif [[ ( $1 = "-d" || $1 = "--depth" ) && -n $2 ]]; then | |
DIRDEPTH=$2 | |
shift 2 | |
elif [[ ( $1 = "-w" || $1 = "--width" ) && -n $2 ]]; then | |
MAXFIRSTLEVELDIRS=$2 | |
shift 2 | |
elif [[ ( $1 = "-c" || $1 = "--min_dirs" ) && -n $2 ]]; then | |
MINDIRCHILDREN=$2 | |
shift 2 | |
elif [[ ( $1 = "-C" || $1 = "--max_dirs" ) && -n $2 ]]; then | |
MAXDIRCHILDREN=$2 | |
shift 2 | |
elif [[ ( $1 = "--min_dirname_length" ) && -n $2 ]]; then | |
MINDIRNAMELEN=$2 | |
shift 2 | |
elif [[ ( $1 = "--max_dirname_length" ) && -n $2 ]]; then | |
MAXDIRNAMELEN=$2 | |
shift 2 | |
elif [[ ( $1 = "-f" || $1 = "--min_files" ) && -n $2 ]]; then | |
MINFILECHILDREN=$2 | |
shift 2 | |
elif [[ ( $1 = "-F" || $1 = "--max_files" ) && -n $2 ]]; then | |
MAXFILECHILDREN=$2 | |
shift 2 | |
elif [[ ( $1 = "--min_filename_length" ) && -n $2 ]]; then | |
MINFILENAMELEN=$2 | |
shift 2 | |
elif [[ ( $1 = "--max_filename_length" ) && -n $2 ]]; then | |
MAXFILENAMELEN=$2 | |
shift 2 | |
elif [[ ( $1 = "-f" || $1 = "--min_size" ) && -n $2 ]]; then | |
MINFILESIZE=$2 | |
shift 2 | |
elif [[ ( $1 = "-F" || $1 = "--max_size" ) && -n $2 ]]; then | |
MAXFILESIZE=$2 | |
shift 2 | |
else | |
break | |
fi | |
done | |
# Set internal option vars from the environment and arg flags. All internal | |
# vars should be declared here, with sane defaults if applicable. | |
FILEEXT=${FILEEXT:-".bin"} | |
OUTDIR=${OUTDIR:-"./rndpath"} | |
DIRDEPTH=${DIRDEPTH:-8} | |
MAXFIRSTLEVELDIRS=${MAXFIRSTLEVELDIRS:-6} | |
MINDIRCHILDREN=${MINDIRCHILDREN:-1} | |
MAXDIRCHILDREN=${MAXDIRCHILDREN:-8} | |
MINDIRNAMELEN=${MINDIRNAMELEN:-3} | |
MAXDIRNAMELEN=${MAXDIRNAMELEN:-24} | |
MINFILECHILDREN=${MINFILECHILDREN:-1} | |
MAXFILECHILDREN=${MAXFILECHILDREN:-4} | |
MINFILENAMELEN=${MINFILENAMELEN:-3} | |
MAXFILENAMELEN=${MAXFILENAMELEN:-20} | |
MINFILESIZE=${MINFILESIZE:-2048} | |
MAXFILESIZE=${MAXFILESIZE:-2000000000} | |
} | |
out() { | |
echo $@ | |
} | |
die() { | |
out "$@"; exit 1; | |
} >&2 | |
err() { | |
out "$@" | |
} >&2 | |
# A non-destructive exit for when the script exits naturally. | |
safe_exit() { | |
trap - INT TERM EXIT | |
exit | |
} | |
get_rand_dirname() { | |
if [ "$ASCIIONLY" == "1" ]; then | |
for ((i=0; i<$((MINDIRNAMELEN+RANDOM%MAXDIRNAMELEN)); i++)) { | |
printf \\$(printf '%03o' ${AARR[RANDOM%aarrcount]}); | |
} | |
else | |
cat /dev/urandom | tr -dc '[ -~]' | tr -d '[$></~:`\\]' | head -c$((MINDIRNAMELEN + RANDOM % MAXDIRNAMELEN)) | sed 's/\(["]\)/\\\1/g' | |
fi | |
#echo -e " " # debug last dirname space | |
} | |
get_rand_filename() { | |
if [ "$ASCIIONLY" == "1" ]; then | |
for ((i=0; i<$((MINFILENAMELEN+RANDOM%MAXFILENAMELEN)); i++)) { | |
printf \\$(printf '%03o' ${AARR[RANDOM%aarrcount]}); | |
} | |
else | |
# no need to escape double quotes for filename | |
cat /dev/urandom | tr -dc '[ -~]' | tr -d '[$></~:`\\]' | head -c$((MINFILENAMELEN + RANDOM % MAXFILENAMELEN)) #| sed 's/\(["]\)/\\\1/g' | |
fi | |
printf "%s" $FILEEXT | |
} | |
main() { | |
parse_args "$@" | |
echo "Creating random tree at: $OUTDIR" | |
[ "$VERBOSE" == "1" ] && echo -e "$(basename $0) \\ | |
--width=$MAXFIRSTLEVELDIRS \\ | |
--utf8=$ASCIIONLY \\ | |
--interactive=$INTERACTIVE \\ | |
--verbose=$VERBOSE \\ | |
--file_extension=$FILEEXT \\ | |
--outdir=$OUTDIR \\ | |
--depth=$DIRDEPTH \\ | |
--width=$MAXFIRSTLEVELDIRS \\ | |
--min_dirs=$MINDIRCHILDREN \\ | |
--max_dirs=$MAXDIRCHILDREN \\ | |
--min_dirname_length=$MINDIRNAMELEN \\ | |
--max_dirname_length=$MAXDIRNAMELEN \\ | |
--min_files=$MINFILECHILDREN \\ | |
--max_files=$MAXFILECHILDREN \\ | |
--min_filename_length=$MINFILENAMELEN \\ | |
--max_filename_length=$MAXFILENAMELEN \\ | |
--min_size=$MINFILESIZE \\ | |
--max_size=$MAXFILESIZE " | |
if (( INTERACTIVE )); then | |
read -p "Proceed (y/n)? " READANS | |
if [ "$READANS" != "y" ]; then | |
die "Aborting at user request" | |
fi | |
fi | |
if [ -d "$OUTDIR" ]; then | |
[ "$VERBOSE" == "1" ] && echo "Removing old outdir $OUTDIR" | |
rm -rf "$OUTDIR" | |
fi | |
mkdir -p "$OUTDIR" | |
if [ $MAXFIRSTLEVELDIRS -gt 0 ]; then | |
NUMFIRSTLEVELDIRS=$((1+RANDOM%MAXFIRSTLEVELDIRS)) | |
else | |
NUMFIRSTLEVELDIRS=0 | |
fi | |
# create directories | |
for (( ifl=0;ifl<$((NUMFIRSTLEVELDIRS));ifl++ )) { | |
FLDIR="$(get_rand_dirname)" | |
FLCHILDREN=""; | |
for (( ird=0;ird<$((DIRDEPTH-1));ird++ )) { | |
DIRCHILDREN=""; MOREDC=0; | |
for ((idc=0; idc<$((MINDIRCHILDREN+RANDOM%MAXDIRCHILDREN)); idc++)) { | |
CDIR="$(get_rand_dirname)" ; | |
# make sure comma is last, so brace expansion works even for 1 element? that can mess with expansion math, though | |
if [ "$DIRCHILDREN" == "" ]; then DIRCHILDREN="\"$CDIR\"" ; | |
else DIRCHILDREN="$DIRCHILDREN,\"$CDIR\"" ; MOREDC=1 ; fi | |
} | |
if [ "$MOREDC" == "1" ] ; then | |
if [ "$FLCHILDREN" == "" ]; then FLCHILDREN="{$DIRCHILDREN}" ; | |
else FLCHILDREN="$FLCHILDREN/{$DIRCHILDREN}" ; fi | |
else | |
if [ "$FLCHILDREN" == "" ]; then FLCHILDREN="$DIRCHILDREN" ; | |
else FLCHILDREN="$FLCHILDREN/$DIRCHILDREN" ; fi | |
fi | |
} | |
DIRCMD="mkdir -p $OUTDIR/\"$FLDIR\"/$FLCHILDREN" | |
eval "$DIRCMD" | |
[ "$VERBOSE" == "1" ] && echo "$DIRCMD" | |
} | |
# now loop through all directories, create random files inside | |
# note printf '%q' escapes to preserve spaces; also here | |
# escape, and don't wrap path parts in double quotes (e.g. | sed 's_/_"/"_g'); | |
# note then we STILL have to eval to use it! | |
# but now ls "$D" works, so noneed for QD | |
# unfortunately backslashes can make '%q' barf - prevent them | |
find "$OUTDIR" -type d | while IFS= read D ; do | |
QD="$(printf '%q' "$(echo "$D")" )" ; | |
[ "$VERBOSE" == "1" ] && echo "$D"; #echo "$QD"; ls -la "$D"; #eval "ls -la $QD"; | |
for ((ifc=0; ifc<$((MINFILECHILDREN+RANDOM%MAXFILECHILDREN)); ifc++)) { | |
CFILE="$(get_rand_filename)" ; | |
[ "$VERBOSE" == "1" ] && echo -n '> ' | |
[ "$VERBOSE" == "1" ] && echo "$D"/"$CFILE" | |
cat /dev/urandom \ | |
| head -c$((MINFILESIZE + RANDOM % MAXFILESIZE)) \ | |
> "$D"/"$CFILE" | |
} | |
done | |
echo | |
tree -a --dirsfirst -s "$OUTDIR" | |
[ "$VERBOSE" == "1" ] && echo "total bytes: $(du -bs $(echo "$OUTDIR"))" | |
} | |
# set -x | |
main $@ | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment