Created
September 26, 2014 12:21
-
-
Save johnholdun/31688a6824bf29bbb957 to your computer and use it in GitHub Desktop.
Scrape the Spirit Halloween store locator and return a sensible array of hashes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'open-uri' | |
require 'nokogiri' | |
require 'yaml' | |
def aggressive_strip text | |
text.strip.gsub(/[\s\r\n]+/, ' ') | |
end | |
postal_code = 10003 | |
url = "http://checkout.spirithalloween.com/storelocation.aspx?zipPostalCode=#{ postal_code }" | |
doc = Nokogiri::HTML open(url) | |
results = [] | |
doc.css('.vStoreLocatorMultipleLabelStyleLocationContainer').each do |node| | |
result = {} | |
result['location'] = aggressive_strip node.css('.locationName').text | |
result['distance'] = aggressive_strip node.css('.locationDistance').text | |
node.css('dl').each do |dl| | |
term = aggressive_strip dl.css('dt').text.downcase | |
value = aggressive_strip dl.css('dd').text | |
class_name = dl.css('dt').first.attributes['class'] | |
if class_name and class_name.value | |
value = term | |
term = class_name.value | |
elsif term == 'address' | |
parts = aggressive_strip(dl.css('dd').inner_html).split('<br>').map{ |part| part.strip } | |
result['name'] = parts.shift | |
result['map'] = parts.pop | |
value = parts | |
elsif term == '' | |
if value =~ /(now )?open(ing soon)?/i | |
term = 'open' | |
value = !!(value =~ /now open/i) | |
end | |
end | |
if term == 'GalleryStore' | |
term = 'type' | |
end | |
result[term] = value | |
end | |
if result['map'] | |
map = Nokogiri::HTML result['map'] | |
anchor = map.css('a').first | |
result['map'] = anchor.attributes['href'].value rescue result['map'] | |
result['name'] = [result['name']] | |
result['name'].unshift aggressive_strip(anchor.text).sub(/get directions to spirit - /i, '') | |
begin | |
coords = result['map'].scan(/\/place\/([0-9.,-]+)/).flatten.first.split(',') | |
result['lat'] = coords[0].to_f | |
result['lng'] = coords[1].to_f | |
result.delete 'map' | |
# rescue | |
end | |
end | |
results << result | |
end | |
puts results.to_yaml |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment