afiore · December 16, 2015 21:59
diff --git a/batch-index.sh b/batch-index.sh

 #! /usr/bin/env bash

 if [[ "" == "$DATASET_OUTPUT" ]]; then
  export DATASET_OUTPUT="../results.json/part-00000"
 fi

 if [ -a import.log ]; then
  rm import.log
 fi

 elastic_search_endpoint="http://localhost:9201/_bulk"
 batch_size=1000
 file_size=$(wc -l $DATASET_OUTPUT  | grep -o "[0-9]\{1,100\}\s")
 n_line=0
 n_batch=0
 body=""
 let "total_batches=$file_size/$batch_size"

 cat $DATASET_OUTPUT | while read line
 do
  let "n_line += 1"
  body+="$line\n"

  if [ "$(expr $n_line % $batch_size)" == "0" -o  "$n_line" == "$file_size" ]; then
    echo "[${n_batch} of $total_batches] Indexing ${batch_size} documents"
    echo -e $body | curl -XPOST $elastic_search_endpoint --data-binary @- >> import.log
    body=""
    let "n_batch += 1"
  fi
 done

	#! /usr/bin/env bash

	if [[ "" == "$DATASET_OUTPUT" ]]; then
	export DATASET_OUTPUT="../results.json/part-00000"
	fi

	if [ -a import.log ]; then
	rm import.log
	fi

	elastic_search_endpoint="http://localhost:9201/_bulk"
	batch_size=1000
	file_size=$(wc -l $DATASET_OUTPUT \| grep -o "[0-9]\{1,100\}\s")
	n_line=0
	n_batch=0
	body=""
	let "total_batches=$file_size/$batch_size"

	cat $DATASET_OUTPUT \| while read line
	do
	let "n_line += 1"
	body+="$line\n"

	if [ "$(expr $n_line % $batch_size)" == "0" -o "$n_line" == "$file_size" ]; then
	echo "[${n_batch} of $total_batches] Indexing ${batch_size} documents"
	echo -e $body \| curl -XPOST $elastic_search_endpoint --data-binary @- >> import.log
	body=""
	let "n_batch += 1"
	fi
	done