Last active
June 27, 2016 22:08
-
-
Save divideby0/c329d29e5d0807a03fa77f24765e9b68 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
set -o xtrace # trace what gets executed | |
set -o errexit # exit when a command fails. | |
set -o nounset # exit when your script tries to use undeclared variables | |
__dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" | |
DATA_DIR="${__dir}/containers/elasticsearch/data" | |
DOCKER_MACHINE_IP="$(docker-machine ip $(docker-machine active))" | |
TMP_DATA_DIR="${DATA_DIR}/tmp" | |
ES_URL="http://${DOCKER_MACHINE_IP}:9200" | |
MAX_SPLIT_LINES=10000 | |
mkdir -p "$TMP_DATA_DIR" | |
create_index () { | |
index=${1} | |
data_file=${2} | |
echo "Creating ${index} index" | |
# Clear and recreate the index | |
http --ignore-stdin DELETE "${ES_URL}/${index}" | |
http --ignore-stdin POST "${ES_URL}/${index}" | |
# Slow the refresh interval to 120s while doing bulk updates | |
echo '{"index": {"refresh_interval": "120s"}}' | http --ignore-stdin PUT "${ES_URL}/${index}/_settings" | |
mkdir -p "${index}" | |
cd "${index}" | |
cd "${TMP_DATA_DIR}" | |
tar -xzvf "${DATA_DIR}/bulk/${data_file}.json.bulk.tar.gz" | |
# Split the lines file into 10k-line pieces and submit them to the bulk APIs | |
split -a 5 -l ${MAX_SPLIT_LINES} "${TMP_DATA_DIR}/${data_file}.json.bulk" "${data_file}.json.bulk." | |
for f in `ls ${TMP_DATA_DIR}/${data_file}.json.bulk.*` | |
do | |
http --ignore-stdin --timeout 60 -h POST "${ES_URL}/${index}/_bulk" "@${f}" | |
done | |
# Force a refresh and optimize (to merge immediately) | |
http POST "${ES_URL}/${index}/_refresh" | |
http POST "${ES_URL}/${index}/_optimize" | |
# Restore the refresh settings after bulk indexing | |
echo '{"index": {"refresh_interval": "1s"}}' | http --ignore-stdin PUT "${ES_URL}/${index}/_settings" | |
} | |
create_index "freebase" "films" | |
cd "${__dir}" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment