Skip to content

Instantly share code, notes, and snippets.

@grobbie
Last active November 10, 2021 17:16
Show Gist options
  • Save grobbie/d8190820f319201c6871d4775db77e7b to your computer and use it in GitHub Desktop.
Save grobbie/d8190820f319201c6871d4775db77e7b to your computer and use it in GitHub Desktop.
TPC-DS 100 performance testing on PostgreSQL 12 & Ubuntu 20.04 LTS
#dexter and build basics
sudo apt-get install gcc make flex bison byacc git ruby ruby-dev
#pg_bulkload
sudo apt install libcrypto++6 libssl-dev libkrb5-dev libselinux-dev libpam-dev libcrypto++-dev libgssapi-krb5-2 libz-dev libedit-dev
wget https://github.com/ossc-db/pg_bulkload/archive/refs/tags/VERSION3_1_19.tar.gz
tar xzf VERSION3_1_19.tar.gz
pushd pg_bulkload-VERSION3_1_19
make
make install
popd
git clone https://github.com/gregrahn/tpcds-kit.git
pushd tpcds-kit/tools
make OS=LINUX
tpcds_home=$PWD
#change this to some high performance large volume eg. /dev/nvme1n1
#mounted at /mnt/nvme with postgres as owner
pushd /mnt/nvme
sudo -u postgres cp $tpcds_home/tpcds.idx .
sudo -u postgres createdb tpcds
sudo -u postgres psql tpcds -f tpcds.sql
$tpcds_home/dsdgen -FORCE -VERBOSE -SCALE 100 -PARALLEL 4 -CHILD 1 &
$tpcds_home/dsdgen -FORCE -VERBOSE -SCALE 100 -PARALLEL 4 -CHILD 2 &
$tpcds_home/dsdgen -FORCE -VERBOSE -SCALE 100 -PARALLEL 4 -CHILD 3 &
$tpcds_home/dsdgen -FORCE -VERBOSE -SCALE 100 -PARALLEL 4 -CHILD 4 &
sudo -u postgres psql tpcds -c "CREATE EXTENSION pg_bulkload"
mkdir -p cleanup
sudo -u postgres rm -f cleanup/*.dat
for f in customer call_center catalog_page catalog_returns catalog_sales customer_address customer_demographics \
date_dim dbgen_version household_demographics income_band inventory item promotion reason ship_mode store \
store_returns store_sales time_dim warehouse web_page web_returns web_sales web_site
do
sudo -u postgres cat ${f}_[0-9]*_4.dat | sed -e "s/|$//" | sudo -u postgres dd of=./cleanup/$f.dat oflag=append conv=notrunc
sudo -u postgres rm -f ${f}_[0-9]*_4.dat
done
pushd cleanup
for i in `ls *.dat`; do
t=${i/.dat/}
cat <<-EOF | tee $t.ctl
OUTPUT = $t
INPUT = $PWD/$i
TYPE = CSV
QUOTE = "\""
DELIMITER = "|"
EOF
done
for i in `ls *.dat`; do
table=${i/.dat/}
echo "Loading $table..."
sudo -u postgres psql tpcds -q -c "TRUNCATE $table"
start_time=$(date +%s.%N)
sudo -u postgres /usr/lib/postgresql/12/bin/pg_bulkload -d tpcds $table.ctl
stop_time=$(date +%s.%N)
duration=$(echo "$stop_time - $start_time" | bc)
echo "Load time $table -- $duration"
done
popd #back to /mnt/nvme
popd #back to tcpds-kit/tools
# fix benchmark queries for PGSQL language idiosyncrasies
sed -i -e "s/from catalog_sales)/from catalog_sales) as a/" ../query_templates/query2.tpl
sed -i -e "s/14 days/INTERVAL '14 days'/" ../query_templates/query5.tpl
sed -i -e "s/decimal(/numeric(/" ../query_templates/query5.tpl
sed -i -e "s/decimal(/numeric(/" ../query_templates/query90.tpl
sed -i -e "s/decimal(/numeric(/" ../query_templates/query61.tpl
sed -i -e "s/decimal(/numeric(/" ../query_templates/query49.tpl
sed -i -e "s/decimal(/numeric(/" ../query_templates/query18.tpl
./dsqgen -DIRECTORY ../query_templates -INPUT ../query_templates/templates.lst \
-VERBOSE Y -QUALIFY Y -DIALECT netezza -QUALIFY Y -SCALE 100 -STREAMS 7
sudo -u postgres psql tpcds -c "ANALYZE VERBOSE"
sudo -u postgres wget https://raw.githubusercontent.com/RunningJon/TPC-DS/032b0cb9934bf7a8e4e4a59d80aa2ded108a15c2/03_ddl/051.postgresql.foreignkeys.sql
sudo -u postgres psql tpcds -f 051.postgresql.foreignkeys.sql
sudo -u postgres wget https://raw.githubusercontent.com/RunningJon/TPC-DS/032b0cb9934bf7a8e4e4a59d80aa2ded108a15c2/03_ddl/052.postgresql.indexes.sql
sudo -u postgres psql tpcds -f 052.postgresql.indexes.sql
sudo -u postgres psql tpcds -c "create index idx_catalog_sales_1 on catalog_sales (cs_sales_price, cs_net_profit)"
sudo -u postgres psql tpcds -c "create index idx_catalog_sales_2 on catalog_sales (cs_quantity, cs_ext_sales_price, cs_net_profit)"
sudo -u postgres psql tpcds -c "create index idx_web_sales_1 on web_sales (ws_sales_price, ws_net_profit)"
sudo -u postgres psql tpcds -c "create index idx_web_sales_2 on web_sales (ws_quantity, ws_ext_sales_price, ws_net_profit)"
# TPC-DS test requires 7 parallel query streams
for i in {0..6}; do
cat << EOF > query_engine_${i}.sh
#!/bin/bash
total_cost=0
echo "begin run Q${i}, query_$i.sql, \$(date)"
begin_time=\$(date +%s.%N)
sudo -u postgres psql tpcds < query_${i}.sql > /tmp/log_query_${i}.log
rc=\$?
end_time=\$(date +%s.%N)
cost=\$(echo "\$end_time-\$begin_time"|bc)
total_cost=\$(echo "\$total_cost+\$cost"|bc)
if [ \$rc -ne 0 ] ; then
echo "run Q\$i fail, cost: \$cost, totalCost: \$total_cost, \$(date)\n" > /tmp/query${i}.report
else
echo "run Q\$i succ, cost: \$cost, totalCost: \$total_cost, \$(date)\n" > /tmp/query${i}.report
fi
EOF
chmod +x query_engine_${i}.sh
./query_engine_${i}.sh &
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment