Created
March 21, 2015 19:48
-
-
Save jikkujose/fb8d26ff8e420e07750e to your computer and use it in GitHub Desktop.
Quick and dirty implementation of HK flight data parsing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'curb' | |
require 'nokogiri' | |
require 'date' | |
CURRENT_DATE = Date.today.strftime("%Y-%-m-%d") | |
link = "http://www.hongkongairport.com/flightinfo/eng/real_arrinfo.do?fromDate<wbr>=#{CURRENT_DATE}" | |
USER_AGENT = "Mac / Firefox 29: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:29.0) Gecko/20100101 Firefox/29.0" | |
h = Curl::Easy.new(link) do |http| | |
http.headers["User-Agent"] = USER_AGENT | |
http.follow_location = true | |
http.enable_cookies = true | |
http.cookiefile = "cookies-2015Mar.txt" | |
http.cookiejar = "cookies-2015Mar.txt" | |
http.verbose = false | |
end | |
h.perform | |
html = Nokogiri::HTML(h.body_str) do |config| | |
config.noent.noblanks.noerror | |
end | |
html.search("tr[@mr='true']").remove | |
rows = html.xpath('//table[@id="list"]/tr') | |
flight_records = rows.collect do |row| | |
if row.attr("date") | |
date = Date.strptime(row.attr("date").split(",").last, "%F") | |
end | |
{}.tap do |flight_record| | |
[ | |
['td[1]/text()', :time], | |
['td[2]/text()', :flight], | |
['td[3]/text()', :origin], | |
['td[4]/text()', :airline], | |
['td[5]/text()', :hall], | |
['td[6]/text()', :status], | |
].each do |xpath, option| | |
flight_record[option] = row.at_xpath(xpath).to_s.strip | |
end | |
flight_record[:date] = date | |
end | |
end | |
puts flight_records.group_by { |record| record[:date] } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment