Skip to content

Instantly share code, notes, and snippets.

@sam
Created April 2, 2012 19:37
Show Gist options
  • Save sam/2286613 to your computer and use it in GitHub Desktop.
Save sam/2286613 to your computer and use it in GitHub Desktop.
dbcompare
#!/usr/bin/env bash
SOURCE=
TARGET=
DATABASE=
TABLE=
function usage() {
cat <<"EOS"
USAGE: dbcompare -h source target database [ table ]
This script compares the tables and sequences in the $database on $source and $target,
reports wether the schemas match, and if a table, wether the row counts match.
You can optionally pass a $table if you want to check just one table or sequence.
OPTIONS:
-h Help: This Message
EOS
}
while getopts "h" OPTION; do
case $OPTION in
h)
usage
exit 0
;;
?)
usage
exit 1
;;
esac
done
SOURCE="$1"; shift
TARGET="$1"; shift
DATABASE="$1"; shift
if [ -z "$SOURCE" ] || [ -z "$TARGET" ] || [ -z "$DATABASE" ]; then
usage
exit 1
fi
if [ -n "$1" ]; then
TABLE="$1"
fi
function compare_servers() {
local database="$1"
while read table; do
if [ -n "$table" ]; then
compare_table "$database" "$table"
fi
done < <(ssh $SOURCE "psql $database -t -c '\\d'" | awk '{print $3}')
}
function compare_table() {
local database="$1"
local table="$2"
# log_cnt is a feature of Sequences. It indicates how many writes are left before a WAL must be written.
# It's not expected that it would be in sync between two servers.
# We're not filtering all the triggers here. Just the bucardo trigger, and the line "Triggers:" that
# begins the description of the triggers since the source database is always going to contain triggers
# (Bucardo's triggers at least), so this line may not appear on the slave side if the table doesn't have
# any other triggers.
db1_table_description=$(ssh -n $SOURCE "psql $database -c '\\d $table'" | awk '!/Triggers:|bucardo|log_cnt/')
db2_table_description=$(ssh -n $TARGET "psql $database -c '\\d $table'" | awk '!/Triggers:|log_cnt/')
# Compare the descriptions of the tables/sequences:
if [ "$(md5 -qs "$db1_table_description")" = "$(md5 -qs "$db2_table_description")" ] || [ "$table" = "photos" ]; then
# If the output is the same, and this is a Sequence, then we're done here, this one is good to go.
if echo "$table" | grep '_seq$' > /dev/null; then
echo "OK: $table"
else
# Since this isn't a Sequence, it's a table. Better compare row counts:
db1_rows=$(ssh -n $SOURCE "psql $database -tc 'SELECT COUNT(*) FROM $table'")
db2_rows=$(ssh -n $TARGET "psql $database -tc 'SELECT COUNT(*) FROM $table'")
if [ "$db1_rows" = "$db2_rows" ]; then
echo "OK: $table"
else
echo "MISMATCHED ROWCOUNT: $table [$db1_rows/$db2_rows]"
fi
fi
else
echo "MISMATCHED SCHEMA: $table"
diff <(echo "$db1_table_description") <(echo "$db2_table_description")
fi
}
if [ -n "$TABLE" ]; then
compare_table "$DATABASE" "$TABLE"
else
compare_servers "$DATABASE"
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment