Skip to content

Instantly share code, notes, and snippets.

@mipearson
Created August 24, 2014 23:20
Show Gist options
  • Save mipearson/2d76284e2fa0febb6e3b to your computer and use it in GitHub Desktop.
Save mipearson/2d76284e2fa0febb6e3b to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
# Throwaway script to dump an index's contents to disk, delete the index, then
# re-upload it. Used for testing index templates in Logstash.
#
# Installing the yajl gem is highly recommended for fast JSON, but not required.
require 'rubygems'
require 'yajl/json_gem' rescue require 'json'
require 'elasticsearch'
require 'active_support/core_ext/array/grouping'
def index_file_name
"#{@index_name}.json"
end
def get_index_data
hits = @client.search(index: @index_name, size: 0)['hits']['total']
puts "#{hits} documents in #{@index_name}"
results = @client.search(index: @index_name, size: hits)
data = results['hits']['hits'].map do |hit|
{ index: { _index: @index_name, _type: hit['_type'], _id: hit['_id'], data: hit['_source'] } }
end
File.open(index_file_name, 'w+') do |f|
f.write(data.to_json)
end
puts "wrote #{index_file_name} for later use"
data
end
@index_name = ARGV[0]
raise "Syntax: replay_es_index.rb INDEX_NAME" if @index_name.nil?
@client = Elasticsearch::Client.new
index_data = if File.exist? index_file_name
JSON.parse(File.read(index_file_name))
else
get_index_data
end
puts "Deleting #{@index_name} if it exists"
@client.indices.delete index: @index_name rescue nil
puts "Bulk inserting #{@index_name} (in groups of 10,000)"
groups = index_data.in_groups_of 10_000, false
groups.each_with_index do |group, idx|
@client.bulk body: group
puts "inserted group #{idx + 1} of #{groups.count}"
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment