Last active
March 14, 2018 21:04
-
-
Save hayduke19us/726539721efd91933664697f028a0885 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'uri' | |
require 'net/http' | |
require 'json' | |
require 'bigdecimal' | |
require 'securerandom' | |
# Usage | |
# | |
# Options: option(default) | |
# Static: | |
# * LIMIT(5000) - Changes the batch size of records to request for. | |
# * FULL(true) - If set to true it creates a single catalog file. If set to false | |
# it creates a directory with pages of <= LIMIT records like priceline-catalog-mmddyy/page-(request_count). | |
# Dynamic: | |
# * verbose - If set to true a progress bar, timer, and a retrospective | |
# overview will be outputed. | |
# | |
# Single Use: | |
# From the directory in which you want the CSV catalog written, run the | |
# following: | |
# | |
# Running verbose: | |
# ruby priceline_catalog.rb api_key, true | |
# | |
# Automated: | |
# | |
# Running not verbose: | |
# Where ever the script lives, is where the catalog will be built. | |
# `* * * * * /usr/bin/ruby /home/path_to_file/priceline_catalog.rb api_key` | |
class PricelineCatalog | |
REFID = '7459'.freeze | |
FORMAT = 'json'.freeze | |
BASE = 'https://api.rezserver.com/api/shared/getBOF2.Downloads.Hotel.Hotels'.freeze | |
LIMIT = 5000.freeze | |
FULL = true | |
attr_reader :uri, :filename, :request_count, :resume_key, :resume_keys, :time, :file_key, :verbose | |
attr_accessor :catalog_size | |
def initialize(api_key, verbose) | |
@api_key = api_key | |
@uri = URI(BASE).tap { |u| u.query = query_params } | |
@time = Time.now.strftime("%F") | |
@file_key = SecureRandom.hex | |
@verbose = verbose | |
@request_count = 0 | |
@resume_keys = [] | |
end | |
def filename | |
if FULL | |
"priceline-catalog-#{time}-#{file_key}.csv" | |
else | |
"#{dirname}/page-#{request_count}.csv" | |
end | |
end | |
def dirname | |
"priceline-catalog-#{time}-#{file_key}" | |
end | |
def params | |
p = { | |
api_key: @api_key, | |
refid: REFID, | |
format: FORMAT, | |
limit: LIMIT | |
} | |
resume_key ? p.merge(resume_key: resume_key) : p | |
end | |
# Used to paginate thru the remote hotel records. | |
def resume_key=(value) | |
@resume_key = value | |
resume_keys.push value | |
uri.query = URI.encode_www_form params | |
end | |
def query_params | |
URI.encode_www_form params | |
end | |
def self.build(api_key, verbose: false) | |
catalog = new api_key, verbose | |
catalog.say_with_time do | |
loop do | |
catalog.get | |
catalog.record_progress | |
break if catalog.done? | |
end | |
print "Done!\n" if verbose | |
end | |
puts catalog.info if verbose | |
end | |
def get | |
response = Net::HTTP.get_response uri | |
if response.is_a? Net::HTTPSuccess | |
handle_response response | |
else | |
puts "Failed to retrieve catalog status_code: #{response.code}, request_index: #{request_count}" | |
end | |
end | |
def handle_response(response) | |
primary_key = 'getSharedBOF2.Downloads.Hotel.Hotels' | |
body = JSON.parse response.body | |
if results = body.dig(primary_key, 'results') | |
self.catalog_size = results.dig('total_records') unless catalog_size # Just store it the first time | |
self.resume_key = results.dig 'resume_key' | |
write results.dig('csv') | |
elsif error = body.dig(primary_key, 'error') | |
puts error | |
abort | |
end | |
end | |
def write(csv) | |
return unless csv | |
Dir.mkdir dirname unless Dir.exists?(dirname) || FULL | |
# Headers are included in every response, so we only write the headers to the | |
# file after the first response. | |
if File.exist? filename | |
File.open(filename, 'a+') { |f| f.write csv.split("\n", 2).last } | |
else | |
File.write(filename, csv) | |
end | |
end | |
def info | |
''.tap do |i| | |
dups = resume_keys.group_by(&:itself).select { |_, v| v.size > 1 } | |
i << "duplicate_resume_keys: #{dups}\n" if dups.any? | |
i << "last_resume_key: #{resume_keys.last}\n" | |
i << "request_count: #{request_count.to_i}" | |
end | |
end | |
def record_progress | |
@request_count += 1 | |
print '.' if verbose | |
end | |
def done? | |
if catalog_size | |
c_size = BigDecimal.new catalog_size.to_i.to_s | |
# Resume key is nil, a blank string, or we have requested one more than | |
# the estimated total request count. | |
!resume_key || resume_key.empty? || request_count > (c_size / LIMIT).round + 1 | |
end | |
end | |
def say_with_time | |
if verbose | |
start = Time.now | |
puts "Building catalog" | |
end | |
yield | |
if verbose | |
puts "Finished building catalog" | |
finish = Time.now | |
puts "Total Time: #{(finish - start) / 60} minutes" | |
end | |
end | |
end | |
api_key, verbose = ARGV | |
PricelineCatalog.build api_key, verbose: verbose |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment