Created
July 28, 2017 13:33
-
-
Save LitvinenkoD89/c186d41f83959e1d011c25e9f269bb18 to your computer and use it in GitHub Desktop.
GREASE MONKEY AUTOMOTIVE
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# GREASE MONKEY AUTOMOTIVE {source_name: 'greasemonkeyintl', batch_number: 7, request_id: 7000000040, request_name: 'GREASE MONKEY AUTOMOTIVE'} | |
scraper_service.scrape do |browser, scraper, init_vars| | |
easy_seeder = Library.lib('EasySeeder') | |
easy_extractor = Library.lib('EasyExtractor') | |
fetcher_agent = Library.lib('FetcherAgent') | |
easy_seeder.seed(source_name: init_vars[:source_name]) do | |
loop_postal_codes "us", radius: 10 do |zip, lat, long| | |
queue_url "http://www.greasemonkeyintl.com/store-locator?zip=#{zip}&dist=50" | |
end | |
end | |
easy_extractor.extract( | |
source_name: init_vars[:source_name], | |
scraper: scraper, | |
batch_number: init_vars[:batch_number], | |
request_id: init_vars[:request_id], | |
request_name: init_vars[:request_name], | |
) do | |
find_pages page_format: :html do |url, parser_page, page| | |
page.search('table').each do |store| | |
next if store.at('font').nil? || store.at('font').at('a').nil? | |
location_url = store.at('font').at('a').attr('href') | |
id = store.at('font').at('a').children.children.text.split('#')[1] | |
tmp = store.at('font').inner_html.gsub("\t", '').gsub("\r", '').gsub("\n", '').split('"#575757">') | |
tmp = tmp[1].split('<br>').slice(1, tmp.length) | |
address_container = tmp.join('<br>') | |
street1, city_and_st_zip = tmp | |
city, st_zip = city_and_st_zip.split(', ') | |
state, zip_code = st_zip.split(' ') | |
doc_id = store_doc({ | |
store_id: id, | |
brand: "GREASE MONKEY AUTOMOTIVE", | |
type: "", | |
property_id: id, | |
name: '', # required | |
address_1: street1, | |
address_2: '', | |
city: city, | |
state: state, | |
zipcode: zip_code, | |
country: '', | |
# lat: lat, | |
# long: long, | |
address_container_html: address_container, | |
# map_link: store.parent.parent.at('.googlemap').attr('name'), | |
location_url: location_url, | |
flags: { | |
} | |
}) | |
seeder.queue_url location_url, { | |
page_type: 'profile', | |
doc_id: doc_id | |
} | |
end | |
end | |
# Find profile pages | |
find_pages page_format: :html, page_type: 'profile' do |url, parser_page, page| | |
doc = find_location(parser_page[:doc_id]) | |
lat, long = page.at('[src="/inc/js/markerclusterer.js"]').next.next.children.first.text.gsub("\t", '').gsub("\r", '').gsub("\n", '').split("'lat': ")[1].gsub("'lng': ", '').split(',') | |
doc[:lat] = lat.strip | |
doc[:long] = long.strip | |
store_doc doc | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment