Last active
November 10, 2021 17:16
-
-
Save grobbie/d8190820f319201c6871d4775db77e7b to your computer and use it in GitHub Desktop.
TPC-DS 100 performance testing on PostgreSQL 12 & Ubuntu 20.04 LTS
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#dexter and build basics | |
sudo apt-get install gcc make flex bison byacc git ruby ruby-dev | |
#pg_bulkload | |
sudo apt install libcrypto++6 libssl-dev libkrb5-dev libselinux-dev libpam-dev libcrypto++-dev libgssapi-krb5-2 libz-dev libedit-dev | |
wget https://github.com/ossc-db/pg_bulkload/archive/refs/tags/VERSION3_1_19.tar.gz | |
tar xzf VERSION3_1_19.tar.gz | |
pushd pg_bulkload-VERSION3_1_19 | |
make | |
make install | |
popd | |
git clone https://github.com/gregrahn/tpcds-kit.git | |
pushd tpcds-kit/tools | |
make OS=LINUX | |
tpcds_home=$PWD | |
#change this to some high performance large volume eg. /dev/nvme1n1 | |
#mounted at /mnt/nvme with postgres as owner | |
pushd /mnt/nvme | |
sudo -u postgres cp $tpcds_home/tpcds.idx . | |
sudo -u postgres createdb tpcds | |
sudo -u postgres psql tpcds -f tpcds.sql | |
$tpcds_home/dsdgen -FORCE -VERBOSE -SCALE 100 -PARALLEL 4 -CHILD 1 & | |
$tpcds_home/dsdgen -FORCE -VERBOSE -SCALE 100 -PARALLEL 4 -CHILD 2 & | |
$tpcds_home/dsdgen -FORCE -VERBOSE -SCALE 100 -PARALLEL 4 -CHILD 3 & | |
$tpcds_home/dsdgen -FORCE -VERBOSE -SCALE 100 -PARALLEL 4 -CHILD 4 & | |
sudo -u postgres psql tpcds -c "CREATE EXTENSION pg_bulkload" | |
mkdir -p cleanup | |
sudo -u postgres rm -f cleanup/*.dat | |
for f in customer call_center catalog_page catalog_returns catalog_sales customer_address customer_demographics \ | |
date_dim dbgen_version household_demographics income_band inventory item promotion reason ship_mode store \ | |
store_returns store_sales time_dim warehouse web_page web_returns web_sales web_site | |
do | |
sudo -u postgres cat ${f}_[0-9]*_4.dat | sed -e "s/|$//" | sudo -u postgres dd of=./cleanup/$f.dat oflag=append conv=notrunc | |
sudo -u postgres rm -f ${f}_[0-9]*_4.dat | |
done | |
pushd cleanup | |
for i in `ls *.dat`; do | |
t=${i/.dat/} | |
cat <<-EOF | tee $t.ctl | |
OUTPUT = $t | |
INPUT = $PWD/$i | |
TYPE = CSV | |
QUOTE = "\"" | |
DELIMITER = "|" | |
EOF | |
done | |
for i in `ls *.dat`; do | |
table=${i/.dat/} | |
echo "Loading $table..." | |
sudo -u postgres psql tpcds -q -c "TRUNCATE $table" | |
start_time=$(date +%s.%N) | |
sudo -u postgres /usr/lib/postgresql/12/bin/pg_bulkload -d tpcds $table.ctl | |
stop_time=$(date +%s.%N) | |
duration=$(echo "$stop_time - $start_time" | bc) | |
echo "Load time $table -- $duration" | |
done | |
popd #back to /mnt/nvme | |
popd #back to tcpds-kit/tools | |
# fix benchmark queries for PGSQL language idiosyncrasies | |
sed -i -e "s/from catalog_sales)/from catalog_sales) as a/" ../query_templates/query2.tpl | |
sed -i -e "s/14 days/INTERVAL '14 days'/" ../query_templates/query5.tpl | |
sed -i -e "s/decimal(/numeric(/" ../query_templates/query5.tpl | |
sed -i -e "s/decimal(/numeric(/" ../query_templates/query90.tpl | |
sed -i -e "s/decimal(/numeric(/" ../query_templates/query61.tpl | |
sed -i -e "s/decimal(/numeric(/" ../query_templates/query49.tpl | |
sed -i -e "s/decimal(/numeric(/" ../query_templates/query18.tpl | |
./dsqgen -DIRECTORY ../query_templates -INPUT ../query_templates/templates.lst \ | |
-VERBOSE Y -QUALIFY Y -DIALECT netezza -QUALIFY Y -SCALE 100 -STREAMS 7 | |
sudo -u postgres psql tpcds -c "ANALYZE VERBOSE" | |
sudo -u postgres wget https://raw.githubusercontent.com/RunningJon/TPC-DS/032b0cb9934bf7a8e4e4a59d80aa2ded108a15c2/03_ddl/051.postgresql.foreignkeys.sql | |
sudo -u postgres psql tpcds -f 051.postgresql.foreignkeys.sql | |
sudo -u postgres wget https://raw.githubusercontent.com/RunningJon/TPC-DS/032b0cb9934bf7a8e4e4a59d80aa2ded108a15c2/03_ddl/052.postgresql.indexes.sql | |
sudo -u postgres psql tpcds -f 052.postgresql.indexes.sql | |
sudo -u postgres psql tpcds -c "create index idx_catalog_sales_1 on catalog_sales (cs_sales_price, cs_net_profit)" | |
sudo -u postgres psql tpcds -c "create index idx_catalog_sales_2 on catalog_sales (cs_quantity, cs_ext_sales_price, cs_net_profit)" | |
sudo -u postgres psql tpcds -c "create index idx_web_sales_1 on web_sales (ws_sales_price, ws_net_profit)" | |
sudo -u postgres psql tpcds -c "create index idx_web_sales_2 on web_sales (ws_quantity, ws_ext_sales_price, ws_net_profit)" | |
# TPC-DS test requires 7 parallel query streams | |
for i in {0..6}; do | |
cat << EOF > query_engine_${i}.sh | |
#!/bin/bash | |
total_cost=0 | |
echo "begin run Q${i}, query_$i.sql, \$(date)" | |
begin_time=\$(date +%s.%N) | |
sudo -u postgres psql tpcds < query_${i}.sql > /tmp/log_query_${i}.log | |
rc=\$? | |
end_time=\$(date +%s.%N) | |
cost=\$(echo "\$end_time-\$begin_time"|bc) | |
total_cost=\$(echo "\$total_cost+\$cost"|bc) | |
if [ \$rc -ne 0 ] ; then | |
echo "run Q\$i fail, cost: \$cost, totalCost: \$total_cost, \$(date)\n" > /tmp/query${i}.report | |
else | |
echo "run Q\$i succ, cost: \$cost, totalCost: \$total_cost, \$(date)\n" > /tmp/query${i}.report | |
fi | |
EOF | |
chmod +x query_engine_${i}.sh | |
./query_engine_${i}.sh & | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment