Created
February 4, 2016 18:04
-
-
Save jeongho/957508450d22eaa2f964 to your computer and use it in GitHub Desktop.
Hadoop benchmark 3. run testdfsio
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# TestDFS will be performed with the total file size of 1TB using different dfs.block.size variations. | |
# Usage: TestDFSIO [genericOptions] -read | -write | -append | -clean [-nrFiles N] [-fileSize Size[B|KB|MB|GB|TB]] [-resFile resultFileName] [-bufferSize Bytes] [-rootDir] | |
# | |
# The test is designed with two variables | |
# 1) file_sizes_mb: file size variation with 1GB file x 1,000 = 1TB and 100MB file x 10,000 = 1TB | |
# this is to test large file and small file impact on HDFS | |
# 2) dfs.block.size (MB) variation: 512, 256, 128, 50 10 | |
# this is to test impact of different block sizes. | |
# | |
# This will give a broad spectrum of DFSIO performance compared to a set of runs with single parameter value. | |
# The expected result is that all test would give smooth read and write throughput across two axes. | |
# | |
# command to run nohup | |
# nohup bash ./run_testdfsio.sh > testdfsio.out 2>&1 & | |
# sudo -u hdfs nohup bash /tmp/run_testdfsio.sh > /tmp/testdfsio.out 2>&1 & | |
#yarn | |
#hadoop_jar=/opt/cloudera/parcels/CDH/lib/hadoop-0.20-mapreduce/hadoop-test.jar | |
#mr1 | |
hadoop_jar=/opt/cloudera/parcels/CDH/lib/hadoop-0.20-mapreduce/hadoop-test-mr1.jar | |
total_size_mb=1000000 #1TB | |
file_sizes_mb="1000 100" #1GB 100MB | |
block_sizes_mb="512 256 128 50 10" | |
test_data_dir=/tmp/benchmarks/TestDFSIO | |
summary_file=all_results.tsv | |
rm -f $summary_file | |
hadoop fs -rm -R -skipTrash $test_data_dir | |
for file_size_mb in $file_sizes_mb; do | |
for block_size_mb in $block_sizes_mb; do | |
#num_files are calculated by $TOTAL_SIZE_MB / (1000, 100) = (1000, 10000) | |
num_files=$(( $total_size_mb / file_size_mb )) | |
block_size_bytes=$(( block_size_mb * 1024 * 1024 )) | |
result_file="TestDFSIO_results_${num_files}_${file_size_mb}MB_blocksize_${block_size_mb}MB.log" | |
echo "# $result_file" | |
hadoop jar $hadoop_jar TestDFSIO \ | |
-Dtest.build.data=$test_data_dir \ | |
-Ddfs.block.size=${block_size_bytes} \ | |
-write \ | |
-nrFiles $num_files \ | |
-fileSize ${file_size_mb}MB \ | |
-resFile $result_file | |
hadoop jar $hadoop_jar TestDFSIO \ | |
-Dtest.build.data=$test_data_dir \ | |
-Ddfs.block.size=${block_size_bytes} \ | |
-read \ | |
-nrFiles $num_files \ | |
-fileSize ${file_size_mb}MB \ | |
-resFile $result_file | |
hadoop jar $hadoop_jar TestDFSIO \ | |
-Dtest.build.data=$test_data_dir \ | |
-clean | |
awk -v file_name=$result_file ' | |
/TestDFSIO/ {phase=$NF} | |
/Number of files/ {num_files=$NF} | |
/Total MBytes processed/ {total_mb=$NF} | |
/Throughput mb.sec/ {throughput[phase]=$NF} | |
/Average IO rate mb.sec/ {iorateavg[phase]=$NF} | |
/IO rate std deviation/ {ioratestd[phase]=$NF} | |
/Test exec time sec/ {exectime[phase]=$NF} | |
END{printf "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", | |
file_name, num_files, total_mb, | |
throughput["write"], iorateavg["write"], ioratestd["write"], exectime["write"], | |
throughput["read"], iorateavg["read"], ioratestd["read"], exectime["read"] | |
}' $result_file >> $summary_file | |
done | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment