Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save itudoben/0a212658add362aeef1a04c0e07e78c7 to your computer and use it in GitHub Desktop.
Save itudoben/0a212658add362aeef1a04c0e07e78c7 to your computer and use it in GitHub Desktop.
#!/usr/bin/env bash
BASEDIR=$(cd `dirname $0`/../ && pwd)
create_semaphore() {
# Number of concurrent jobs
NUM_CONCURRENT_JOBS=5
# Semaphore file
SEMAPHORE=/tmp/semaphore
# Create semaphore file with a specified number of tokens
mkfifo $SEMAPHORE
exec 3<>$SEMAPHORE
rm -f $SEMAPHORE
for i in $(seq $NUM_CONCURRENT_JOBS); do
echo >&3
done
}
get_query(){
local -r ll_field=$1
shift
local -r ll_search_after_values=$1
cat <<EOT
{
"size" : 10000,
"query" : {
"match_all" : {}
},
"track_total_hits" : true,
"_source" : false,
"fields" : [
"$ll_field"
],
${ll_search_after_values}
"sort" : {
"num_long" : "asc"
}
}
EOT
}
execute_query() {
local -r coordinator_url=$1
shift
local -r l_index=$1
shift
local -r search_after_values=$1
shift
local -r l_field=$1
query=$(get_query $l_field "$search_after_values")
# Validate the query
echo $query | jq . &>/dev/null
if [[ $? != 0 ]]; then
echo ERROR in query
echo $query
exit 1
fi
curl -s -X POST "$coordinator_url/$l_index/_search" \
-H "Content-type: application/json" -d "$query"
}
check_count() {
local -r l_index=$1
shift
local -r l_field=$1
local search_after_values=''
response=$(execute_query "$coordinator_url" "$l_index" "$search_after_values" "$l_field")
while [[ "$(echo "$response" | jq -r .hits.hits)" != "[]" ]]; do
values=$(echo $response | jq -r '.hits.hits[].fields.'$l_field'[]')
response_pid=$(echo $values | tr ' ' '\n' | sort -n)
l_lower_limit=$(echo $values | tr ' ' '\n' | head -1)
l_upper_limit=$(echo $values | tr ' ' '\n' | tail -1)
# Acquire a semaphore token
read <&3
# Run the command in the background
{
echo "Checking field '$l_field' uniform distribution ($similar_item items per '${l_field}') \
with values from $l_lower_limit to $l_upper_limit"
$BASEDIR/groovy/check_distribution.groovy "$l_field" "$similar_item" $response_pid
# for number in $(seq "$l_lower_limit" "$l_upper_limit"); do
# _count=$(echo $response_pid | tr ' ' '\n' | grep -wc "$number")
# if [[ $_count != $similar_item ]]; then
# echo "ERROR $l_field value $number occurs $_count times"
# echo $response_pid
# exit 1
# fi
# done
echo "Completed '$l_field' [$l_lower_limit to $l_upper_limit]"
# Release the semaphore token
echo >&3
} &
search_after_values=$(echo "$response" | jq '.hits.hits[-1].sort')
search_after_values='"search_after" : '${search_after_values}','
response=$(execute_query "$coordinator_url" "$l_index" "$search_after_values" "$l_field")
done
wait
}
coordinator_url='http://es-discovery:9200'
declare -r size=$(curl -s -X POST "$coordinator_url/$l_index/_count" | jq -r '.count')
if [[ -z $size ]]; then
echo Found no data in the cluster!
exit 0
fi
echo Checking $size documents of index joinchild
create_semaphore
declare -r similar_item=5
check_count joinchild 'pid'
check_count joinchild 'num_int'
check_count joinchild 'num_long'
check_count joinchild 'num_long_plus_one'
# Close the open pipe
exec 3>&-
echo All fields of index joinchild are valid
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment