Skip to content

Instantly share code, notes, and snippets.

@yvanzo
Last active July 10, 2019 17:20
Show Gist options
  • Save yvanzo/52f9f9038ad3c60631ada7b7c0d26d92 to your computer and use it in GitHub Desktop.
Save yvanzo/52f9f9038ad3c60631ada7b7c0d26d92 to your computer and use it in GitHub Desktop.
Script to check search indexes status using musicbrainz-docker
#!/bin/bash
set -e -u
HELP=$(cat <<EOH
Usage: $0
For each of MusicBrainz Solr cores/collections,
compare the count of existing documents in PostgreSQL,
with the count of indexed documents in Solr.
EOH
)
if [ $# -ne 0 ]; then
echo "$0: wrong number of arguments"
echo "$HELP"
exit 1
fi
MB_DOCKER_ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/../" && pwd)
cd "$MB_DOCKER_ROOT"
if ! docker-compose ps db | grep -qw 'Up'; then
echo "$0: cannot count existing documents: 'db' is not a running docker-compose service"
exit 1
fi
if ! docker-compose ps indexer | grep -qw 'Up'; then
echo "$0: cannot count indexed documents: 'indexer' is not a running docker-compose service"
exit 1
fi
# Count existing documents by core in PostgreSQL
declare -A queries
queries['annotation']="$(cat <<EOSQL
SELECT SUM(annotation_count) FROM (
SELECT COUNT(DISTINCT area ) annotation_count FROM area_annotation AS one
UNION SELECT COUNT(DISTINCT artist ) annotation_count FROM artist_annotation
UNION SELECT COUNT(DISTINCT event ) annotation_count FROM event_annotation
UNION SELECT COUNT(DISTINCT instrument ) annotation_count FROM instrument_annotation
UNION SELECT COUNT(DISTINCT label ) annotation_count FROM label_annotation
UNION SELECT COUNT(DISTINCT place ) annotation_count FROM place_annotation
UNION SELECT COUNT(DISTINCT recording ) annotation_count FROM recording_annotation
UNION SELECT COUNT(DISTINCT release ) annotation_count FROM release_annotation
UNION SELECT COUNT(DISTINCT release_group) annotation_count FROM release_group_annotation
UNION SELECT COUNT(DISTINCT series ) annotation_count FROM series_annotation
UNION SELECT COUNT(DISTINCT work ) annotation_count FROM work_annotation
) AS total
EOSQL
)"
queries['cdstub']="SELECT COUNT(id) FROM release_raw"
for table in area artist editor event instrument label place \
recording release release_group series tag url work
do
queries["${table//_/-}"]="SELECT COUNT(id) FROM $table"
done
declare -A counts
POSTGRES_USER=musicbrainz
POSTGRES_DATABASE=musicbrainz_db
for core in "${!queries[@]}"
do
counts["$core"]=$(docker-compose exec db \
psql -U $POSTGRES_USER -d $POSTGRES_DATABASE \
-c "COPY(${queries[$core]}) TO STDOUT" | tr -d '\r')
done
# Sort cores by ascending number of documents
declare -a ascending_cores=( $(
for core in "${!counts[@]}"
do
echo $core ${counts["$core"]}
done | sort -n -k2 | sed 's/ .*$//'
) )
# Count indexed documents by core in Solr
declare -A indexed_docs
while read line
do
core=${line% *}
docs=${line#* }
indexed_docs["$core"]="${docs/$'\r'/}"
done < <(docker-compose exec indexer bash -c "
wget -q -O - http://search:8983/v2/cores | python2 -c '
import sys, json;
json_status = json.load(sys.stdin)[\"status\"];
for core in json_status:
print core, json_status[core][\"index\"][\"numDocs\"]
'" | sort -n -k2)
# Compare number of indexed docs with number of existing docs, by core
for core in "${ascending_cores[@]}"
do
if [ ${counts[$core]} -eq ${indexed_docs[$core]} ]
then
echo "$core" "OK" "${indexed_docs[$core]}" "/${counts[$core]}"
else
echo "$core" "--" "${indexed_docs[$core]}" "/${counts[$core]}"
fi
done | (
if column --version &>/dev/null
then
column --table --table-columns CORE,STATUS,INDEX,DB --table-right INDEX
else
(echo CORE STATUS INDEX DB; cat) | column -t
fi
)
We can make this file beautiful and searchable if this error is corrected: No commas found in this CSV file in line 0.
CORE STATUS INDEX DB
cdstub OK 0 /0
event OK 16 /16
release OK 32 /32
release-group OK 32 /32
series OK 34 /34
label OK 74 /74
place OK 82 /82
url OK 283 /283
instrument OK 945 /945
annotation OK 1197 /1197
work OK 1862 /1862
editor OK 1898 /1898
area OK 2015 /2015
tag OK 2119 /2119
artist OK 8172 /8172
recording OK 38617 /38617
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment