Skip to content

Instantly share code, notes, and snippets.

@arbakker
Last active June 28, 2022 09:52
Show Gist options
  • Save arbakker/92312d300945d2d94b1657507d16d0ef to your computer and use it in GitHub Desktop.
Save arbakker/92312d300945d2d94b1657507d16d0ef to your computer and use it in GitHub Desktop.
Benchmark script to test response time duration of a WCS service #wcs #bash #gnuparallel #pdok #geotiff #ogc #gnuplot #csv

README

To run benchmark run:

./run-benchmarks.sh

See ./run-benchmarks.sh script for parameters used for benchmark runs.

#!/usr/bin/env bash
set -euo pipefail
RASTER_SIZES="${1}"
OUTPUT_DIR="${2}"
COVERAGES="${3}"
BASE_URL="${4}"
NR_OF_REQUEST="${5:-50}"
NR_OF_PARALLEL_REQUEST="${6:-2}"
BBOXBOUND_MINX=140100
BBOXBOUND_MINY=450100
BBOXBOUND_MAXX=217900
BBOXBOUND_MAXY=497900
mkdir -p "$OUTPUT_DIR"
function get_output_filename() {
OUTPUT_FILE="$1"
output_filename="${OUTPUT_FILE%.*}"
output_extension="${OUTPUT_FILE##*.}"
i=0
while true; do
if [[ $i -eq 0 ]]; then
test_filename=$OUTPUT_FILE
else
test_filename="${output_filename}_${i}.${output_extension}"
fi
i=$((i + 1))
if [[ -f $test_filename ]]; then
continue
else
break
fi
done
echo "$test_filename"
}
function gen_random_bbox() {
minx=$(shuf -i $BBOXBOUND_MINX-$BBOXBOUND_MAXX -n1)
maxx=$((minx + $MAX_SIZE))
miny=$(shuf -i $BBOXBOUND_MINY-$BBOXBOUND_MAXY -n1)
maxy=$((miny + $MAX_SIZE))
echo "${minx},${miny},${maxx},${maxy}"
}
function get_cov_request() {
cov=$1
bbox=$2
min_x=$(echo "$bbox" | awk -F, '{print $1}')
min_y=$(echo "$bbox" | awk -F, '{print $2}')
max_x=$(echo "$bbox" | awk -F, '{print $3}')
max_y=$(echo "$bbox" | awk -F, '{print $4}')
width=$(((max_x - min_x) * 2))
height=$(((max_y - min_y) * 2))
url="${BASE_URL}?service=WCS&version=1.0.0&request=GetCoverage&coverage=${cov}&crs=EPSG:28992&response_crs=EPSG:28992&bbox=${min_x},${min_y},${max_x},${max_y}&width=${width}&height=${height}&format=image/tiff"
echo "$url"
start_time=$(date +%s.%3N)
content_type=$(curl -w '%{content_type}' --max-time 60 -s "$url" -o /dev/null)
end_time=$(date +%s.%3N)
elapsed=$(echo "scale=3; $end_time - $start_time" | bc)
if [[ $content_type == "image/tiff" ]];then
echo "${cov},${elapsed}" >>"$OUTPUT_FILE"
fi
}
for RASTER_SIZE in $RASTER_SIZES;do
MAX_SIZE=$((RASTER_SIZE / 2))
OUTPUT_SUFFIX="${NR_OF_PARALLEL_REQUEST}_${RASTER_SIZE}"
OUTPUT_FILE_TEMPLATE="${OUTPUT_DIR}/wcs-performance-par-${OUTPUT_SUFFIX}.csv"
OUTPUT_FILE="$(get_output_filename $OUTPUT_FILE_TEMPLATE)"
bboxes=$(for run in $(seq 1 $NR_OF_REQUEST); do gen_random_bbox; done)
export -f get_cov_request
export OUTPUT_FILE
export BASE_URL
for coverage in $COVERAGES;do
echo "$bboxes" | parallel -j$NR_OF_PARALLEL_REQUEST "get_cov_request ${coverage} {}"
done
done
#!/usr/bin/env bash
set -euo pipefail
OUTPUT_DIR="${1}"
COVS="${2}"
CSVFILE="${OUTPUT_DIR}/stats.csv"
# generate csv for gnuplot to use
# csv looks like:
# size,cov1-avg,cov1-stddev,cov2-avg,cov2-stddev,etc...
function process_stats(){
header="size"
for c in $COVS; do
c=$(tr "_" "-" <<< $c)
header="${header},${c}-avg,${c}-stddev"
done
csv=$(printf '%s\n\n' "${header}")
for file in $(find "$OUTPUT_DIR" -regex ".*/.*_[0-9]+.csv$" | sort -V);do
size=$(sed -En "s/.*?_([0-9]+)\.csv$/\1/p" <<< "$file")
line="${size}"
for coverage in $COVS;do
stats=$(grep < "$file" "$coverage" | cut -d, -f2 | python3 -c "import statistics,sys;vals=[float(x.rstrip()) for x in sys.stdin];print(f\"{statistics.mean(vals):.3f},{statistics.stdev(vals):.3f}\")")
avg=$(cut <<< "$stats" -d, -f1)
stddev=$(cut <<< "$stats" -d, -f2)
line="${line},${avg},${stddev}"
done
csv=$(printf '%s\n%s' "$csv" "$line")
done
echo "$csv" > "$CSVFILE"
}
# shellcheck disable=SC2120
function plot(){
input_csv="${1}"
output="${2:-stats.png}"
range="${3:-[0:4100]}"
nr_columns=$(head -n1 $input_csv | tr ',' '\n' | wc -l)
plot_command="plot "
for i in $(seq 2 2 $nr_columns);do # start a 2 to skip count columns, step 2 since avg and stdev
plot_command="${plot_command}'${input_csv}' using 1:${i} with lines, '${input_csv}' using 1:${i}:$((i+1)) with yerrorbars,"
done
rm -f "$output"
gnuplot << EOF
set terminal png size 1000,800
set key left top
set xrange $range
set ylabel "Response time (in seconds)"
set xlabel "Raster size (nr of pixels)"
set output '$output'
set grid ytics mytics # draw lines for each ytics and mytics
set mytics 2 # set the spacing for the mytics
set grid # enable the grid
set key autotitle columnhead
set datafile separator ","
${plot_command}
EOF
}
process_stats
plot "$CSVFILE" "${OUTPUT_DIR}/stats.png"
# plot "$CSVFILE" "${OUTPUT_DIR}/0-500-stats.png" "[0:600]"
# plot "$CSVFILE" "${OUTPUT_DIR}/0-1000-stats.png" "[0:1100]"
# plot "$CSVFILE" "${OUTPUT_DIR}/1000-2000-stats.png" "[900:2100]"
# plot "$CSVFILE" "${OUTPUT_DIR}/2000-4000-stats.png" "[1900:4100]"
#!/usr/bin/env bash
tempfile=$(mktemp)
echo comp,size,min,max,median,avg,stddev >> $tempfile
# combine all stats in one csv
for c in none zstd deflate;do
for s in 10 20 50 100 200 500 1000 1500 2000 3000 4000;do #4000
file=output/ahn3_05m_dsm_${c}_stats_2_${s}.txt
min=$(grep < "$file" "x 50" | awk '{ print $3 }')
max=$(grep < "$file" "x 50" | awk '{ print $4 }')
median=$(grep < "$file" "x 50" | awk '{ print $5 }')
avg=$(grep < "$file" "x 50" | awk '{ print $6 }')
stddev=$(grep < "$file" "x 50" | awk '{ print $7 }')
echo $c,$s,$min,$max,$median,$avg,$stddev >> $tempfile
done
done
# transpose combined stats
csv=$(printf '%s\n\n' "size,none-avg,none-stddev,deflate-avg,deflate-stddev,zstd-avg,zstd-stddev")
for size in $(tail -n+2 $tempfile | cut -d, -f2 | sort -n -u);do
size_output=$(mlr --csv filter '$size == '$size $tempfile)
csv=$(printf '%s\n%s' "$csv" "${size}," )
for comp in none deflate zstd;do
comp_size_output=$(mlr --csv filter "\$comp == \"${comp}\"" <<< "$size_output")
avg=$(mlr --csv --headerless-csv-output cut -f avg <<< "$comp_size_output")
stddev=$(mlr --csv --headerless-csv-output cut -f stddev <<< "$comp_size_output")
csv="${csv}${avg},${stddev},"
done
csv=$(printf '%s\n' "$csv")
done
OUTPUT_DIR="${1:-output}"
output_file="${OUTPUT_DIR}/plot_stats.csv"
echo "$csv" > "$output_file"
#!/usr/bin/env bash
# script to glue together benchmark.sh and plot.sh
set -euo pipefail
function run-benchmark(){
OUTPUT_DIR="$1"
COVERAGES="$2" # "ahn3_05m_dsm" | "ahn3_05m_dsm_none ahn3_05m_dsm_deflate ahn3_05m_dsm_zstd"
URL="$3" # "https://geodata.nationaalgeoregister.nl/ahn3/wcs" | "https://service.pdok.nl/rws/ahn3/wcs/v1_0-preprod"
RASTER_SIZES="${4:-10 20 50 100 200 500 1000 1500 2000 3000 4000}"
NR_OF_REQS="${5:-50}"
PARALLEL_REQS=${6:-2}
rm -rf "$OUTPUT_DIR"
./benchmark.sh "$RASTER_SIZES" "$OUTPUT_DIR" "$COVERAGES" "$URL" "$NR_OF_REQS" "$PARALLEL_REQS"
./plot.sh "$OUTPUT_DIR" "$COVERAGES"
}
run-benchmark output-geotiff-nfs "ahn3_05m_dsm" https://geodata.nationaalgeoregister.nl/ahn3/wcs
run-benchmark output-cog-s3-150 "ahn3_05m_dsm_none ahn3_05m_dsm_deflate ahn3_05m_dsm_zstd" https://service.pdok.nl/rws/ahn3/wcs/v1_0-preprod
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment