-
-
Save SecureCloud-biz/442737b7862de01c9ac1bd47846f8350 to your computer and use it in GitHub Desktop.
GREASE MONKEY AUTOMOTIVE
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# GREASE MONKEY AUTOMOTIVE {source_name: 'greasemonkeyintl', batch_number: 7, request_id: 7000000040, request_name: 'GREASE MONKEY AUTOMOTIVE'} | |
scraper_service.scrape do |browser, scraper, init_vars| | |
easy_seeder = Library.lib('EasySeeder') | |
easy_extractor = Library.lib('EasyExtractor') | |
fetcher_agent = Library.lib('FetcherAgent') | |
easy_seeder.seed(source_name: init_vars[:source_name]) do | |
loop_postal_codes "us", radius: 10 do |zip, lat, long| | |
queue_url "http://www.greasemonkeyintl.com/store-locator?zip=#{zip}&dist=50" | |
end | |
end | |
easy_extractor.extract( | |
source_name: init_vars[:source_name], | |
scraper: scraper, | |
batch_number: init_vars[:batch_number], | |
request_id: init_vars[:request_id], | |
request_name: init_vars[:request_name], | |
) do | |
find_pages page_format: :html do |url, parser_page, page| | |
page.search('table').each do |store| | |
next if store.at('font').nil? || store.at('font').at('a').nil? | |
location_url = store.at('font').at('a').attr('href') | |
id = store.at('font').at('a').children.children.text.split('#')[1] | |
tmp = store.at('font').inner_html.gsub("\t", '').gsub("\r", '').gsub("\n", '').split('"#575757">') | |
tmp = tmp[1].split('<br>').slice(1, tmp.length) | |
address_container = tmp.join('<br>') | |
street1, city_and_st_zip = tmp | |
city, st_zip = city_and_st_zip.split(', ') | |
state, zip_code = st_zip.split(' ') | |
doc_id = store_doc({ | |
store_id: id, | |
brand: "GREASE MONKEY AUTOMOTIVE", | |
type: "", | |
property_id: id, | |
name: '', # required | |
address_1: street1, | |
address_2: '', | |
city: city, | |
state: state, | |
zipcode: zip_code, | |
country: '', | |
# lat: lat, | |
# long: long, | |
address_container_html: address_container, | |
# map_link: store.parent.parent.at('.googlemap').attr('name'), | |
location_url: location_url, | |
flags: { | |
} | |
}) | |
seeder.queue_url location_url, { | |
page_type: 'profile', | |
doc_id: doc_id | |
} | |
end | |
end | |
# Find profile pages | |
find_pages page_format: :html, page_type: 'profile' do |url, parser_page, page| | |
doc = find_location(parser_page[:doc_id]) | |
lat, long = page.at('[src="/inc/js/markerclusterer.js"]').next.next.children.first.text.gsub("\t", '').gsub("\r", '').gsub("\n", '').split("'lat': ")[1].gsub("'lng': ", '').split(',') | |
doc[:lat] = lat.strip | |
doc[:long] = long.strip | |
store_doc doc | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment