To run benchmark run:
./run-benchmarks.shSee ./run-benchmarks.sh script for parameters used for benchmark runs.
| #!/usr/bin/env bash | |
| set -euo pipefail | |
| RASTER_SIZES="${1}" | |
| OUTPUT_DIR="${2}" | |
| COVERAGES="${3}" | |
| BASE_URL="${4}" | |
| NR_OF_REQUEST="${5:-50}" | |
| NR_OF_PARALLEL_REQUEST="${6:-2}" | |
| BBOXBOUND_MINX=140100 | |
| BBOXBOUND_MINY=450100 | |
| BBOXBOUND_MAXX=217900 | |
| BBOXBOUND_MAXY=497900 | |
| mkdir -p "$OUTPUT_DIR" | |
| function get_output_filename() { | |
| OUTPUT_FILE="$1" | |
| output_filename="${OUTPUT_FILE%.*}" | |
| output_extension="${OUTPUT_FILE##*.}" | |
| i=0 | |
| while true; do | |
| if [[ $i -eq 0 ]]; then | |
| test_filename=$OUTPUT_FILE | |
| else | |
| test_filename="${output_filename}_${i}.${output_extension}" | |
| fi | |
| i=$((i + 1)) | |
| if [[ -f $test_filename ]]; then | |
| continue | |
| else | |
| break | |
| fi | |
| done | |
| echo "$test_filename" | |
| } | |
| function gen_random_bbox() { | |
| minx=$(shuf -i $BBOXBOUND_MINX-$BBOXBOUND_MAXX -n1) | |
| maxx=$((minx + $MAX_SIZE)) | |
| miny=$(shuf -i $BBOXBOUND_MINY-$BBOXBOUND_MAXY -n1) | |
| maxy=$((miny + $MAX_SIZE)) | |
| echo "${minx},${miny},${maxx},${maxy}" | |
| } | |
| function get_cov_request() { | |
| cov=$1 | |
| bbox=$2 | |
| min_x=$(echo "$bbox" | awk -F, '{print $1}') | |
| min_y=$(echo "$bbox" | awk -F, '{print $2}') | |
| max_x=$(echo "$bbox" | awk -F, '{print $3}') | |
| max_y=$(echo "$bbox" | awk -F, '{print $4}') | |
| width=$(((max_x - min_x) * 2)) | |
| height=$(((max_y - min_y) * 2)) | |
| url="${BASE_URL}?service=WCS&version=1.0.0&request=GetCoverage&coverage=${cov}&crs=EPSG:28992&response_crs=EPSG:28992&bbox=${min_x},${min_y},${max_x},${max_y}&width=${width}&height=${height}&format=image/tiff" | |
| echo "$url" | |
| start_time=$(date +%s.%3N) | |
| content_type=$(curl -w '%{content_type}' --max-time 60 -s "$url" -o /dev/null) | |
| end_time=$(date +%s.%3N) | |
| elapsed=$(echo "scale=3; $end_time - $start_time" | bc) | |
| if [[ $content_type == "image/tiff" ]];then | |
| echo "${cov},${elapsed}" >>"$OUTPUT_FILE" | |
| fi | |
| } | |
| for RASTER_SIZE in $RASTER_SIZES;do | |
| MAX_SIZE=$((RASTER_SIZE / 2)) | |
| OUTPUT_SUFFIX="${NR_OF_PARALLEL_REQUEST}_${RASTER_SIZE}" | |
| OUTPUT_FILE_TEMPLATE="${OUTPUT_DIR}/wcs-performance-par-${OUTPUT_SUFFIX}.csv" | |
| OUTPUT_FILE="$(get_output_filename $OUTPUT_FILE_TEMPLATE)" | |
| bboxes=$(for run in $(seq 1 $NR_OF_REQUEST); do gen_random_bbox; done) | |
| export -f get_cov_request | |
| export OUTPUT_FILE | |
| export BASE_URL | |
| for coverage in $COVERAGES;do | |
| echo "$bboxes" | parallel -j$NR_OF_PARALLEL_REQUEST "get_cov_request ${coverage} {}" | |
| done | |
| done |
| #!/usr/bin/env bash | |
| set -euo pipefail | |
| OUTPUT_DIR="${1}" | |
| COVS="${2}" | |
| CSVFILE="${OUTPUT_DIR}/stats.csv" | |
| # generate csv for gnuplot to use | |
| # csv looks like: | |
| # size,cov1-avg,cov1-stddev,cov2-avg,cov2-stddev,etc... | |
| function process_stats(){ | |
| header="size" | |
| for c in $COVS; do | |
| c=$(tr "_" "-" <<< $c) | |
| header="${header},${c}-avg,${c}-stddev" | |
| done | |
| csv=$(printf '%s\n\n' "${header}") | |
| for file in $(find "$OUTPUT_DIR" -regex ".*/.*_[0-9]+.csv$" | sort -V);do | |
| size=$(sed -En "s/.*?_([0-9]+)\.csv$/\1/p" <<< "$file") | |
| line="${size}" | |
| for coverage in $COVS;do | |
| stats=$(grep < "$file" "$coverage" | cut -d, -f2 | python3 -c "import statistics,sys;vals=[float(x.rstrip()) for x in sys.stdin];print(f\"{statistics.mean(vals):.3f},{statistics.stdev(vals):.3f}\")") | |
| avg=$(cut <<< "$stats" -d, -f1) | |
| stddev=$(cut <<< "$stats" -d, -f2) | |
| line="${line},${avg},${stddev}" | |
| done | |
| csv=$(printf '%s\n%s' "$csv" "$line") | |
| done | |
| echo "$csv" > "$CSVFILE" | |
| } | |
| # shellcheck disable=SC2120 | |
| function plot(){ | |
| input_csv="${1}" | |
| output="${2:-stats.png}" | |
| range="${3:-[0:4100]}" | |
| nr_columns=$(head -n1 $input_csv | tr ',' '\n' | wc -l) | |
| plot_command="plot " | |
| for i in $(seq 2 2 $nr_columns);do # start a 2 to skip count columns, step 2 since avg and stdev | |
| plot_command="${plot_command}'${input_csv}' using 1:${i} with lines, '${input_csv}' using 1:${i}:$((i+1)) with yerrorbars," | |
| done | |
| rm -f "$output" | |
| gnuplot << EOF | |
| set terminal png size 1000,800 | |
| set key left top | |
| set xrange $range | |
| set ylabel "Response time (in seconds)" | |
| set xlabel "Raster size (nr of pixels)" | |
| set output '$output' | |
| set grid ytics mytics # draw lines for each ytics and mytics | |
| set mytics 2 # set the spacing for the mytics | |
| set grid # enable the grid | |
| set key autotitle columnhead | |
| set datafile separator "," | |
| ${plot_command} | |
| EOF | |
| } | |
| process_stats | |
| plot "$CSVFILE" "${OUTPUT_DIR}/stats.png" | |
| # plot "$CSVFILE" "${OUTPUT_DIR}/0-500-stats.png" "[0:600]" | |
| # plot "$CSVFILE" "${OUTPUT_DIR}/0-1000-stats.png" "[0:1100]" | |
| # plot "$CSVFILE" "${OUTPUT_DIR}/1000-2000-stats.png" "[900:2100]" | |
| # plot "$CSVFILE" "${OUTPUT_DIR}/2000-4000-stats.png" "[1900:4100]" |
| #!/usr/bin/env bash | |
| tempfile=$(mktemp) | |
| echo comp,size,min,max,median,avg,stddev >> $tempfile | |
| # combine all stats in one csv | |
| for c in none zstd deflate;do | |
| for s in 10 20 50 100 200 500 1000 1500 2000 3000 4000;do #4000 | |
| file=output/ahn3_05m_dsm_${c}_stats_2_${s}.txt | |
| min=$(grep < "$file" "x 50" | awk '{ print $3 }') | |
| max=$(grep < "$file" "x 50" | awk '{ print $4 }') | |
| median=$(grep < "$file" "x 50" | awk '{ print $5 }') | |
| avg=$(grep < "$file" "x 50" | awk '{ print $6 }') | |
| stddev=$(grep < "$file" "x 50" | awk '{ print $7 }') | |
| echo $c,$s,$min,$max,$median,$avg,$stddev >> $tempfile | |
| done | |
| done | |
| # transpose combined stats | |
| csv=$(printf '%s\n\n' "size,none-avg,none-stddev,deflate-avg,deflate-stddev,zstd-avg,zstd-stddev") | |
| for size in $(tail -n+2 $tempfile | cut -d, -f2 | sort -n -u);do | |
| size_output=$(mlr --csv filter '$size == '$size $tempfile) | |
| csv=$(printf '%s\n%s' "$csv" "${size}," ) | |
| for comp in none deflate zstd;do | |
| comp_size_output=$(mlr --csv filter "\$comp == \"${comp}\"" <<< "$size_output") | |
| avg=$(mlr --csv --headerless-csv-output cut -f avg <<< "$comp_size_output") | |
| stddev=$(mlr --csv --headerless-csv-output cut -f stddev <<< "$comp_size_output") | |
| csv="${csv}${avg},${stddev}," | |
| done | |
| csv=$(printf '%s\n' "$csv") | |
| done | |
| OUTPUT_DIR="${1:-output}" | |
| output_file="${OUTPUT_DIR}/plot_stats.csv" | |
| echo "$csv" > "$output_file" |
| #!/usr/bin/env bash | |
| # script to glue together benchmark.sh and plot.sh | |
| set -euo pipefail | |
| function run-benchmark(){ | |
| OUTPUT_DIR="$1" | |
| COVERAGES="$2" # "ahn3_05m_dsm" | "ahn3_05m_dsm_none ahn3_05m_dsm_deflate ahn3_05m_dsm_zstd" | |
| URL="$3" # "https://geodata.nationaalgeoregister.nl/ahn3/wcs" | "https://service.pdok.nl/rws/ahn3/wcs/v1_0-preprod" | |
| RASTER_SIZES="${4:-10 20 50 100 200 500 1000 1500 2000 3000 4000}" | |
| NR_OF_REQS="${5:-50}" | |
| PARALLEL_REQS=${6:-2} | |
| rm -rf "$OUTPUT_DIR" | |
| ./benchmark.sh "$RASTER_SIZES" "$OUTPUT_DIR" "$COVERAGES" "$URL" "$NR_OF_REQS" "$PARALLEL_REQS" | |
| ./plot.sh "$OUTPUT_DIR" "$COVERAGES" | |
| } | |
| run-benchmark output-geotiff-nfs "ahn3_05m_dsm" https://geodata.nationaalgeoregister.nl/ahn3/wcs | |
| run-benchmark output-cog-s3-150 "ahn3_05m_dsm_none ahn3_05m_dsm_deflate ahn3_05m_dsm_zstd" https://service.pdok.nl/rws/ahn3/wcs/v1_0-preprod |