Last active
August 2, 2017 09:04
-
-
Save LitvinenkoD89/d2a759d5a23819930b28684e2c4699ac to your computer and use it in GitHub Desktop.
chase
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# CHASE BANK {source_name: 'chase', batch_number: 7, request_id: 7000000068, request_name: 'CHASE BANK'} | |
scraper_service.scrape do |browser, scraper, init_vars| | |
easy_seeder = Library.lib('EasySeeder') | |
easy_extractor = Library.lib('EasyExtractor') | |
fetcher_agent = Library.lib('FetcherAgent') | |
easy_seeder.seed(source_name: init_vars[:source_name]) do | |
loop_states "us", radius: 10 do |short_code, full_name| | |
queue_url "https://locator.chase.com/search/#{short_code}/?q=#{full_name}" | |
end | |
end | |
easy_extractor.extract( | |
source_name: init_vars[:source_name], | |
scraper: scraper, | |
batch_number: init_vars[:batch_number], | |
request_id: init_vars[:request_id], | |
request_name: init_vars[:request_name], | |
) do | |
find_pages page_format: :html do |url, parser_page, page| | |
page.search('.result.clearfix').each do |detail| | |
next if detail.at('.resultName').nil? | |
max_page = t_body.at('.clearfix.pagination').search('li').map(&:text).map(&:to_i).max rescue nil | |
if max_page && max_page > 1 | |
(2..max_page).each do |p| | |
seeder.queue_url "#{url}&page=#{p}" | |
end | |
end | |
name = detail.at('.resultName').text.gsub("\t", '').gsub("\r", '').gsub("\n", '') | |
href = detail.at('.titleLeft').at('a').attr('href') | |
location_url = URI.join(url, href).to_s | |
street1 = detail.at('.address').at('.line.street-address').text | |
city = detail.at('.address').at('.line.last').at('.locality').text | |
state = detail.at('.address').at('.line.last').at('.region').text | |
zip_code = detail.at('.address').at('.line.last').at('.postal-code').text | |
loc_type = detail.at('.locationType').text | |
if loc_type =~ /ONLY/i | |
t = "CHASE ATM ONLY" | |
else | |
t = "CHASE BRANCHES" | |
end | |
doc_id = store_doc({ | |
# store_id: id, | |
brand: "CHASE",, | |
type: t, | |
# property_id: id, | |
name: name, # required | |
address_1: street1, | |
address_2: '', | |
city: city, | |
state: state, | |
zipcode: zip_code, | |
country: '', | |
# lat: lat, | |
# long: long, | |
# address_container_html: address_container, | |
# map_link: store.parent.parent.at('.googlemap').attr('name'), | |
location_url: location_url, | |
flags: { | |
} | |
}) | |
seeder.queue_url location_url, { | |
page_type: 'profile', | |
doc_id: doc_id | |
} | |
end | |
end | |
# Find profile pages | |
find_pages page_format: :html, page_type: 'profile' do |url, parser_page, page| | |
doc = find_location(parser_page[:doc_id]) | |
if doc.present? | |
atm_count = 0 | |
unless page.at('.atmInformation').nil? | |
unless page.at('.atmInformation').search('.halfMain.floatLeft').nil? | |
page.at('.atmInformation').search('.halfMain.floatLeft').each do |info| | |
tmp = info.text.split(' ') | |
condition = Float(tmp.first) != nil rescue false | |
if condition | |
atm_count = tmp.first | |
break | |
end | |
end | |
end | |
end | |
bs = "" | |
page.search(".sectionTitle").each do |st| | |
next unless st.text == "Branch Services" | |
divs = st.parent.search('div:not([class])') | |
tmp = [] | |
divs.each do |div| | |
next if div.text.nil? | |
tmp.push(div.text.split("\n")[0]) | |
end | |
bs = tmp.join(", ") | |
end | |
lat = page.at('[property="place:location:latitude"]').attr('content') | |
long = page.at('[property="place:location:longitude"]').attr('content') | |
frmt = page.at('.branchType').text | |
frmt = frmt.gsub("\t", '').gsub("\r", '').gsub("\n", '') unless frmt.nil? | |
doc[:lat] = lat | |
doc[:long] = long | |
doc[:flags]['ATM COUNT'] = atm_count | |
doc[:flags]['BRANCH SERVICES'] = bs | |
doc[:flags]['FORMAT'] = frmt | |
store_doc doc | |
end | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment