Skip to content

Instantly share code, notes, and snippets.

@hayduke19us
Last active March 14, 2018 21:04
Show Gist options
  • Save hayduke19us/726539721efd91933664697f028a0885 to your computer and use it in GitHub Desktop.
Save hayduke19us/726539721efd91933664697f028a0885 to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
require 'uri'
require 'net/http'
require 'json'
require 'bigdecimal'
require 'securerandom'
# Usage
#
# Options: option(default)
# Static:
# * LIMIT(5000) - Changes the batch size of records to request for.
# * FULL(true) - If set to true it creates a single catalog file. If set to false
# it creates a directory with pages of <= LIMIT records like priceline-catalog-mmddyy/page-(request_count).
# Dynamic:
# * verbose - If set to true a progress bar, timer, and a retrospective
# overview will be outputed.
#
# Single Use:
# From the directory in which you want the CSV catalog written, run the
# following:
#
# Running verbose:
# ruby priceline_catalog.rb api_key, true
#
# Automated:
#
# Running not verbose:
# Where ever the script lives, is where the catalog will be built.
# `* * * * * /usr/bin/ruby /home/path_to_file/priceline_catalog.rb api_key`
class PricelineCatalog
REFID = '7459'.freeze
FORMAT = 'json'.freeze
BASE = 'https://api.rezserver.com/api/shared/getBOF2.Downloads.Hotel.Hotels'.freeze
LIMIT = 5000.freeze
FULL = true
attr_reader :uri, :filename, :request_count, :resume_key, :resume_keys, :time, :file_key, :verbose
attr_accessor :catalog_size
def initialize(api_key, verbose)
@api_key = api_key
@uri = URI(BASE).tap { |u| u.query = query_params }
@time = Time.now.strftime("%F")
@file_key = SecureRandom.hex
@verbose = verbose
@request_count = 0
@resume_keys = []
end
def filename
if FULL
"priceline-catalog-#{time}-#{file_key}.csv"
else
"#{dirname}/page-#{request_count}.csv"
end
end
def dirname
"priceline-catalog-#{time}-#{file_key}"
end
def params
p = {
api_key: @api_key,
refid: REFID,
format: FORMAT,
limit: LIMIT
}
resume_key ? p.merge(resume_key: resume_key) : p
end
# Used to paginate thru the remote hotel records.
def resume_key=(value)
@resume_key = value
resume_keys.push value
uri.query = URI.encode_www_form params
end
def query_params
URI.encode_www_form params
end
def self.build(api_key, verbose: false)
catalog = new api_key, verbose
catalog.say_with_time do
loop do
catalog.get
catalog.record_progress
break if catalog.done?
end
print "Done!\n" if verbose
end
puts catalog.info if verbose
end
def get
response = Net::HTTP.get_response uri
if response.is_a? Net::HTTPSuccess
handle_response response
else
puts "Failed to retrieve catalog status_code: #{response.code}, request_index: #{request_count}"
end
end
def handle_response(response)
primary_key = 'getSharedBOF2.Downloads.Hotel.Hotels'
body = JSON.parse response.body
if results = body.dig(primary_key, 'results')
self.catalog_size = results.dig('total_records') unless catalog_size # Just store it the first time
self.resume_key = results.dig 'resume_key'
write results.dig('csv')
elsif error = body.dig(primary_key, 'error')
puts error
abort
end
end
def write(csv)
return unless csv
Dir.mkdir dirname unless Dir.exists?(dirname) || FULL
# Headers are included in every response, so we only write the headers to the
# file after the first response.
if File.exist? filename
File.open(filename, 'a+') { |f| f.write csv.split("\n", 2).last }
else
File.write(filename, csv)
end
end
def info
''.tap do |i|
dups = resume_keys.group_by(&:itself).select { |_, v| v.size > 1 }
i << "duplicate_resume_keys: #{dups}\n" if dups.any?
i << "last_resume_key: #{resume_keys.last}\n"
i << "request_count: #{request_count.to_i}"
end
end
def record_progress
@request_count += 1
print '.' if verbose
end
def done?
if catalog_size
c_size = BigDecimal.new catalog_size.to_i.to_s
# Resume key is nil, a blank string, or we have requested one more than
# the estimated total request count.
!resume_key || resume_key.empty? || request_count > (c_size / LIMIT).round + 1
end
end
def say_with_time
if verbose
start = Time.now
puts "Building catalog"
end
yield
if verbose
puts "Finished building catalog"
finish = Time.now
puts "Total Time: #{(finish - start) / 60} minutes"
end
end
end
api_key, verbose = ARGV
PricelineCatalog.build api_key, verbose: verbose
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment