Skip to content

Instantly share code, notes, and snippets.

@lenagroeger
Created May 21, 2014 21:15
Show Gist options
  • Save lenagroeger/d4ed908482afebada178 to your computer and use it in GitHub Desktop.
Save lenagroeger/d4ed908482afebada178 to your computer and use it in GitHub Desktop.
require 'rubygems'
require 'restclient'
require 'nokogiri'
require 'logger'
require 'net/http'
require 'mechanize'
# page_num = 2
# item_num = 2
SEARCH_URL = "http://cgmix.uscg.mil/IIR/IIRSearch.aspx"
RESULTS_URL = "http://cgmix.uscg.mil/IIR/IIRSearchResults.aspx"
viewState = "/wEPDwULLTE0MjgxNzk2ODUPZBYCAgMPZBYKAgEPZBYKAgEPD2QWBB4Lb25tb3VzZW92ZXIFK3VuZGVybGluZSgnSUlSSGVhZGVyMV9MaW5rQnV0dG9uQ0dNSVhIb21lJykeCm9ubW91c2VvdXQFLXVuZGVybGluZU5PKCdJSVJIZWFkZXIxX0xpbmtCdXR0b25DR01JWEhvbWUnKWQCAw8PZBYEHwAFKXVuZGVybGluZSgnSUlSSGVhZGVyMV9MaW5rQnV0dG9uSUlSSG9tZScpHwEFK3VuZGVybGluZU5PKCdJSVJIZWFkZXIxX0xpbmtCdXR0b25JSVJIb21lJylkAgUPD2QWBB8ABSt1bmRlcmxpbmUoJ0lJUkhlYWRlcjFfTGlua0J1dHRvbklJUlNlYXJjaCcpHwEFLXVuZGVybGluZU5PKCdJSVJIZWFkZXIxX0xpbmtCdXR0b25JSVJTZWFyY2gnKWQCBw8PZBYEHwAFLXVuZGVybGluZSgnSUlSSGVhZGVyMV9MaW5rQnV0dG9uRGVmaW5pdGlvbnMnKR8BBS91bmRlcmxpbmVOTygnSUlSSGVhZGVyMV9MaW5rQnV0dG9uRGVmaW5pdGlvbnMnKWQCCQ8PZBYEHwAFK3VuZGVybGluZSgnSUlSSGVhZGVyMV9MaW5rQnV0dG9uQ29udGFjdFVzJykfAQUtdW5kZXJsaW5lTk8oJ0lJUkhlYWRlcjFfTGlua0J1dHRvbkNvbnRhY3RVcycpZAIFD2QWAgIbDxAPFgIeC18hRGF0YUJvdW5kZ2QQFRwDQUxMGUNvbW1lcmNpYWwgRmlzaGluZyBWZXNzZWwWRmlzaCBQcm9jZXNzaW5nIFZlc3NlbA1GcmVpZ2h0IEJhcmdlDEZyZWlnaHQgU2hpcBFJbmR1c3RyaWFsIFZlc3NlbB1Nb2JpbGUgT2Zmc2hvcmUgRHJpbGxpbmcgVW5pdBZPZmZzaG9yZSBTdXBwbHkgVmVzc2VsDE9pbCBSZWNvdmVyeRZQYXNzZW5nZXIgKDYgb3IgRmV3ZXIpFVBhc3NlbmdlciAoSW5zcGVjdGVkKRdQYXNzZW5nZXIgKE1vcmUgVGhhbiA2KRdQYXNzZW5nZXIgKFVuaW5zcGVjdGVkKR1QYXNzZW5nZXIgQmFyZ2UgICg2IG9yIEZld2VyKRxQYXNzZW5nZXIgQmFyZ2UgIChJbnNwZWN0ZWQpHlBhc3NlbmdlciBCYXJnZSAgKE1vcmUgVGhhbiA2KR5QYXNzZW5nZXIgQmFyZ2UgIChVbmluc3BlY3RlZCkOUHVibGljIEZyZWlnaHQVUHVibGljIFRhbmtzaGlwL0JhcmdlG1B1YmxpYyBWZXNzZWwsIFVuY2xhc3NpZmllZAxSZWNyZWF0aW9uYWwPUmVzZWFyY2ggVmVzc2VsC1NjaG9vbCBTaGlwClRhbmsgQmFyZ2UJVGFuayBTaGlwDVRvd2luZyBWZXNzZWwMVW5jbGFzc2lmaWVkB1Vua25vd24VHANBTEwZQ29tbWVyY2lhbCBGaXNoaW5nIFZlc3NlbBZGaXNoIFByb2Nlc3NpbmcgVmVzc2VsDUZyZWlnaHQgQmFyZ2UMRnJlaWdodCBTaGlwEUluZHVzdHJpYWwgVmVzc2VsHU1vYmlsZSBPZmZzaG9yZSBEcmlsbGluZyBVbml0Fk9mZnNob3JlIFN1cHBseSBWZXNzZWwMT2lsIFJlY292ZXJ5FlBhc3NlbmdlciAoNiBvciBGZXdlcikVUGFzc2VuZ2VyIChJbnNwZWN0ZWQpF1Bhc3NlbmdlciAoTW9yZSBUaGFuIDYpF1Bhc3NlbmdlciAoVW5pbnNwZWN0ZWQpHVBhc3NlbmdlciBCYXJnZSAgKDYgb3IgRmV3ZXIpHFBhc3NlbmdlciBCYXJnZSAgKEluc3BlY3RlZCkeUGFzc2VuZ2VyIEJhcmdlICAoTW9yZSBUaGFuIDYpHlBhc3NlbmdlciBCYXJnZSAgKFVuaW5zcGVjdGVkKQ5QdWJsaWMgRnJlaWdodBVQdWJsaWMgVGFua3NoaXAvQmFyZ2UbUHVibGljIFZlc3NlbCwgVW5jbGFzc2lmaWVkDFJlY3JlYXRpb25hbA9SZXNlYXJjaCBWZXNzZWwLU2Nob29sIFNoaXAKVGFuayBCYXJnZQlUYW5rIFNoaXANVG93aW5nIFZlc3NlbAxVbmNsYXNzaWZpZWQHVW5rbm93bhQrAxxnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZGQCCQ8PFgIeB1Zpc2libGVoZBYEAgEPDxYCHwNoZGQCCw88KwANAGQCCw8PFgIfA2hkZAIND2QWAgIDDw8WBB4EVGV4dAUUTW9uZGF5LCBNYXkgMTksIDIwMTQeB1Rvb2xUaXAFFE1vbmRheSwgTWF5IDE5LCAyMDE0ZGQYAQULR3JpZFZpZXdJSVIPZ2Te5HentmxZfU9uaNCFerLpCxGKsA=="
eventValidation = "/wEWKQK39qHFBQKogLOFBALJ8fjaBALC8ezcBwL+vrLWAQLZ9fCmDgLS9aTbDAKuw6qbDgKShLrBDALGyJezCgK+q+SvAQLQl5DIAQLymLHFDgKS1repAQLk68g5Avr6moEOAtSw57oNAoiXqYoGArPhnq0NAqeRvKAEAvr8yqYCAsz08PYKAsHBwOQJAtn8kqsJAo7smJINAoOtsPAPApvVyZAEAt+ImcoGAqrb1t0DApTwwZECAuukjZEHAuiU3dMDApa3rtoNArqyr+4HAoC80roKAqzErtUDAoaa7fcCAv6v2f4NAva9lMsEAr3t4tUJArrY8x24g9IcL1hd9oyWKKex/+sw2Mj4nA=="
if searchResultsPage = RestClient.post(SEARCH_URL, {
"__EVENTTARGET" => "",
"__EVENTARGUMENT" => "",
"__VIEWSTATE" => viewState,
"__VIEWSTATEGENERATOR" => "6E4D6470",
"__EVENTVALIDATION" => eventValidation,
"ActNum" => '',
"TextBoxStDtMM" =>1,
"TextBoxStDtDD" => 1,
"TextBoxStDtYYYY" => '2013',
"TextBoxEdDtMM"=>1,
"TextBoxEdDtDD"=>1,
"TextBoxEdDtYYYY" =>2014,
"DropDownListClassification" =>'ALL',
"Vessel" => '',
"OrgName" => '',
"Inv_Fac" => '',
"KeyWord" => '',
"ButtonSearch"=>'Search'
})
searchResultsPageBody = Nokogiri::HTML(searchResultsPage)
eventValidation = searchResultsPageBody.css("input")[2]['value']
viewState = searchResultsPageBody.css("input")[0]['value']
cookie = "d08859029afb214d95d11d5b11950fb760123b2e01801ecd537bb47b36af3f5eb1469d24"
page_num = 1
# LOOPS THROUGH THE SEARCH RESULTS PAGE
2.upto(23) do |page_num|
headers = {:cookies => {'ASP.NET_SessionId' => "evpbwo551hc3zzjeegcojy45", 'TSa4e033' => cookie}}
params = {
"__EVENTTARGET" => "GridViewIIR",
"__EVENTARGUMENT" => "Page$#{page_num}",
"__VIEWSTATE" => viewState,
"__VIEWSTATEGENERATOR" => "6E4D6470",
"__EVENTVALIDATION" => eventValidation
}
nextSearchResultsPage = RestClient.post(SEARCH_URL, params, headers)
nextSearchResultsPageBody = Nokogiri::HTML(nextSearchResultsPage)
eventValidation = nextSearchResultsPageBody.css("input")[2]['value']
viewState = nextSearchResultsPageBody.css("input")[0]['value']
cookie = nextSearchResultsPage.headers[:set_cookie][0].split(';')[0].split("TSa4e033=")[1]
# GET TO THE VIEW DETAILS PAGE
if page_num > 2
2.upto(5) do |item_num|
puts "trying #{page_num}#{item_num}"
file = "cruisefiles/#{page_num}#{item_num}.html"
if File.exist?(file)
input = File.open(file,'rb')
else
detailsheaders = {:cookies => {'ASP.NET_SessionId' => "evpbwo551hc3zzjeegcojy45", 'TSa4e033' => cookie}}
prefix = item_num < 9 ? 0 : ""
puts "GridViewIIR$ctl#{prefix}#{item_num}$ReportButton"
detailsparams = {
"__EVENTTARGET" => "GridViewIIR$ctl#{prefix}#{item_num}$ReportButton",
"__EVENTARGUMENT" => "",
"__VIEWSTATE" => viewState,
"__VIEWSTATEGENERATOR" => "6E4D6470",
"__EVENTVALIDATION" => eventValidation
}
detailsPage = RestClient.post(RESULTS_URL, detailsparams, detailsheaders)
input = detailsPage
output = File.open(file,'wb')
output.write(input)
output.close
end
detailsPageBody = Nokogiri::HTML(input)
detailsSummary = detailsPageBody.css("#LabelInvestigationReport")
detailsBrief = detailsPageBody.css("#LabelIncidentBrief")
filename = detailsPageBody.css("#LabelInvestigationReport").attr('title').text.split("MISLE Activity Number: ")[1].split("Originating Unit:")[0]
puts filename
end
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment