Skip to content

Instantly share code, notes, and snippets.

@gartenfeld
Created February 20, 2016 00:17
Show Gist options
  • Save gartenfeld/0d1d76bfd6de6b48a95d to your computer and use it in GitHub Desktop.
Save gartenfeld/0d1d76bfd6de6b48a95d to your computer and use it in GitHub Desktop.
Generate a number of users and directories in HDFS
USERNAMES=("aada" "aaro" "aino" "aleksi" "anni" "eeli" "eemil" "eetu" "eevi" "eino" "elias" "ella" "elsa" "emil" "enni" "helmi" "joona" "kerttu" "lauri" "leevi" "lenni" "lilja" "lotta" "luka" "lumi" "matias" "mikael" "mila" "nella" "niilo" "niklas" "olivia" "onni" "oona" "peetu" "pihla" "siiri" "veera" "veeti" "venla" "vilho" "vilma");
RAND_BASE=8;
RAND_POWER=3;
GROUP_NAME="demo";
DUMMY_FILE_SIZE=4096;
TMP_DUMMY_DIR="/tmp/dummy";
time_in_ms() {
date +"%s"
} # time since the epoch in milliseconds
suffix() {
openssl rand -hex 3
} # random 8 character string
rand_name() {
echo "$(time_in_ms)-$(suffix)"
} # randomized file or directory name
rand_num() {
echo $[($RANDOM % $1) + 1]
} # generate random integer
exp_scale() {
echo $1'^'$(rand_num 3) | bc
} # generate an exponential number based on level
# Add group
sudo groupadd $GROUP_NAME; # tested
sudo mkdir -p $TMP_DUMMY_DIR;
for name in "${USERNAMES[@]}"
{
# Create user
sudo useradd -g $GROUP_NAME $name; # tested
sudo usermod -g $GROUP_NAME $name; # tested - "usermod: no changes"
user_dir="/user/$name"
# Create user directory
sudo -u hdfs hadoop fs -mkdir -p $user_dir; # tested
# Make user the owner of user directory
sudo -u hdfs hadoop fs -chown -R $name:$GROUP_NAME $user_dir; # tested
number_of_dirs=$(exp_scale $(rand_num $RAND_BASE));
for i in $(seq 1 $number_of_dirs)
{
dummy_dir=$user_dir'/'$(rand_name);
sudo -u $name hadoop fs -mkdir -p $dummy_dir; # tested
number_of_files=$(rand_num 4);
# echo 'files '$number_of_files
for f in $(seq 1 $number_of_files)
{
# Generate tmp dummy file
dummy_file=$(rand_name)'.txt';
dummy_local=$TMP_DUMMY_DIR'/'$dummy_file;
head -c $DUMMY_FILE_SIZE /dev/urandom > $dummy_local;
# Add file to HDFS
sudo -u $name hadoop fs -put $dummy_local $dummy_dir'/'$dummy_file; # tested
} # Files
} # Directories
echo $number_of_dirs' dirs created for '$name;
} # Users
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment