Skip to content

Instantly share code, notes, and snippets.

@lnaia
Created September 9, 2014 21:25
Show Gist options
  • Save lnaia/c275eb5ca77065ca2061 to your computer and use it in GitHub Desktop.
Save lnaia/c275eb5ca77065ca2061 to your computer and use it in GitHub Desktop.
Distribute a random amount of tests in several files, among N machines equally
# Example: ./file_path:number_of_tests_in_this_file
./file_path:50
./file_path:23
./file_path:7
#!/bin/bash
DATASET=`cat dataset.txt`
NUMBER_OF_MACHINES=10
machine_counter=()
generate_machine_counters() {
i="1"
while [ $i -le "$NUMBER_OF_MACHINES" ]
do
machine_counter[$i]=0
i=$((i+1))
done
}
generate_machine_arrays() {
i="1"
while [ $i -le "$NUMBER_OF_MACHINES" ]
do
machine_name="machine_$i"
#echo "machine_name declared as: $machine_name"
declare -a "$machine_name"
i=$((i+1))
done
}
show_machine_counter() {
for i in "${!machine_counter[@]}"
do
echo "machine $i contains : ${machine_counter[$i]} samples"
done
}
report_files_per_machine() {
i="1"
while [ $i -le "$NUMBER_OF_MACHINES" ]
do
eval temp_var=( '"${machine_'${i}'[@]}"' )
echo
echo "Files on machine_$i"
for temp_line in "${temp_var[@]}"
do
echo `echo $temp_line | cut -d':' -f 1`
done
i=$((i+1))
done
}
# Initialize dynamic variables
generate_machine_counters
generate_machine_arrays
total_numbers_sum=0
index=1
for line in $DATASET
do
[[ $line =~ (.*):([[:digit:]]+) ]] # [[:digit]] == \d
file="${BASH_REMATCH[1]}"
number="${BASH_REMATCH[2]}"
total_numbers_sum=$((total_numbers_sum + number))
if [ "$index" -le "$NUMBER_OF_MACHINES" ]; then
machine_counter[$index]=$((${machine_counter[$index]} + number))
# no need to search for lowest
target_machine="machine_$index"
eval "$target_machine+=(\"$line\")"
else
lowest_key=`echo "${machine_counter[*]}" | tr ' ' '\n' | awk 'NR==1{min=$0}NR>1 && $1<min{min=$1;pos=NR}END{print pos}'`
machine_counter[$lowest_key]=$((${machine_counter[$lowest_key]} + number))
target_machine="machine_$lowest_key"
eval "$target_machine+=(\"$line\")"
fi
# uncomment to show evolution of distribution
#echo "******* index: $index *******"
#show_machine_counter
index=$((index+1))
done
show_machine_counter
report_files_per_machine
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment