Last active
January 17, 2023 21:56
-
-
Save senid231/97cd1a36bcaad6a039e6cb941730e4fd to your computer and use it in GitHub Desktop.
parsing ukraine dtek oem shutdowns page and saving screenshot
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env ruby | |
# frozen_string_literal: true | |
# rubocop:disable Performance/Casecmp | |
require 'bundler/inline' | |
$stdout.sync = true | |
gemfile(true) do | |
source 'https://rubygems.org' | |
# https://github.com/rubycdp/ferrum | |
gem 'ferrum', git: 'https://github.com/rubycdp/ferrum.git' | |
end | |
# Usage: | |
# $ ./crawler.rb Одеса "Дерибасівська вул." 12 | |
puts "ARGV=#{ARGV.inspect}" | |
# Input params | |
city_value = ARGV[0] | |
street_value = ARGV[1] | |
house_num_value = ARGV[2] | |
# Code | |
require 'ferrum' | |
site_url = 'https://www.dtek-oem.com.ua/shutdowns' | |
browser = Ferrum::Browser.new window_size: [1440, 1080] | |
puts "Opens URL #{site_url} ..." | |
browser.go_to(site_url) | |
puts 'URL opened.' | |
begin | |
table_selector = '.discon-schedule-table' | |
table = nil | |
puts 'Searching for table on a page ...' | |
while table.nil? | |
table = browser.at_css(table_selector) | |
break unless table.nil? | |
puts 'Table not found, wait 2 sec ...' | |
sleep 2 | |
end | |
puts 'Table found.' | |
puts "Choosing city #{city_value.inspect} ..." | |
city_input_selector = '#city' | |
city_dropdown_selector = '#cityautocomplete-list' | |
browser.at_css(city_input_selector).focus.type(city_value) | |
city_dropdown_items = browser.css("#{city_dropdown_selector} > div") | |
city_dropdown_item = city_dropdown_items.detect { |node| node.text.downcase == city_value.downcase } | |
city_dropdown_item.click | |
puts 'City chosen.' | |
puts "Choosing street #{street_value.inspect} ..." | |
street_input_selector = '#street' | |
street_dropdown_selector = '#streetautocomplete-list' | |
browser.at_css(street_input_selector).focus.type(street_value) | |
street_dropdown_items = browser.css("#{street_dropdown_selector} > div") | |
street_dropdown_item = street_dropdown_items.detect { |node| node.text.downcase == street_value.downcase } | |
street_dropdown_item.click | |
puts 'Street chosen.' | |
puts "Choosing house num #{house_num_value.inspect} ..." | |
house_num_input_selector = '#house_num' | |
house_num_dropdown_selector = '#house_numautocomplete-list' | |
house_num_input = browser.at_css(house_num_input_selector) | |
until house_num_input.attribute('disabled').nil? | |
puts 'House num disabled, wait 2 sec ...' | |
sleep 2 | |
house_num_input = browser.at_css(house_num_input_selector) | |
end | |
browser.at_css(house_num_input_selector).focus.type(house_num_value) | |
house_num_dropdown_items = browser.css("#{house_num_dropdown_selector} > div") | |
house_num_dropdown_item = house_num_dropdown_items.detect { |node| node.text.downcase == house_num_value.downcase } | |
house_num_dropdown_item.click | |
puts 'House num chosen.' | |
browser.at_css(table_selector).scroll_into_view | |
screenshot_path = "./dtek-oem-#{Time.now.strftime('%Y%m%d%H%M%S')}.png" | |
browser.screenshot(path: screenshot_path, selector: '.discon-schedule-interact') | |
puts "Success screenshot saved at #{File.absolute_path(screenshot_path)}." | |
rescue StandardError => e | |
time = Time.now.strftime('%Y%m%d%H%M%S') | |
error_screenshot_path = "./error-dtek-oem-#{time}.png" | |
error_html_path = "./error-dtek-oem-#{time}.html" | |
browser.screenshot(path: error_screenshot_path) | |
File.write(error_html_path, browser.body) | |
puts "Error screenshot saved at #{File.absolute_path(error_screenshot_path)}." | |
puts "Error html saved at #{File.absolute_path(error_html_path)}." | |
raise e | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ ./crawler.rb Одеса 'Дерибасівська вул.' 12 | |
Fetching https://github.com/rubycdp/ferrum.git | |
Resolving dependencies...Fetching gem metadata from https://rubygems.org/.... | |
...... | |
Using bundler 2.3.26 | |
Using concurrent-ruby 1.1.10 | |
Using public_suffix 5.0.1 | |
Using webrick 1.7.0 | |
Using websocket-extensions 0.1.5 | |
Using addressable 2.8.1 | |
Using websocket-driver 0.7.5 | |
Using ferrum 0.13 from https://github.com/rubycdp/ferrum.git (at main@a71634d) | |
ARGV=["Одеса", "Дерибасівська вул.", "12"] | |
Opens URL https://www.dtek-oem.com.ua/shutdowns ... | |
URL opened. | |
Searching for table on a page ... | |
Table not found, wait 2 sec ... | |
Table not found, wait 2 sec ... | |
Table not found, wait 2 sec ... | |
Table not found, wait 2 sec ... | |
Table not found, wait 2 sec ... | |
Table not found, wait 2 sec ... | |
Table not found, wait 2 sec ... | |
Table not found, wait 2 sec ... | |
Table not found, wait 2 sec ... | |
Table not found, wait 2 sec ... | |
Table not found, wait 2 sec ... | |
Table not found, wait 2 sec ... | |
Table found. | |
Choosing city "Одеса" ... | |
City chosen. | |
Choosing street "Дерибасівська вул." ... | |
Street chosen. | |
Choosing house num "12" ... | |
House num disabled, wait 2 sec ... | |
House num chosen. | |
Success screenshot saved at /home/senid/projects/my/dtek-oem-20230115212941.png. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
result screenshot example