Created
May 21, 2014 21:15
-
-
Save lenagroeger/d4ed908482afebada178 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| require 'rubygems' | |
| require 'restclient' | |
| require 'nokogiri' | |
| require 'logger' | |
| require 'net/http' | |
| require 'mechanize' | |
| # page_num = 2 | |
| # item_num = 2 | |
| SEARCH_URL = "http://cgmix.uscg.mil/IIR/IIRSearch.aspx" | |
| RESULTS_URL = "http://cgmix.uscg.mil/IIR/IIRSearchResults.aspx" | |
| viewState = "/wEPDwULLTE0MjgxNzk2ODUPZBYCAgMPZBYKAgEPZBYKAgEPD2QWBB4Lb25tb3VzZW92ZXIFK3VuZGVybGluZSgnSUlSSGVhZGVyMV9MaW5rQnV0dG9uQ0dNSVhIb21lJykeCm9ubW91c2VvdXQFLXVuZGVybGluZU5PKCdJSVJIZWFkZXIxX0xpbmtCdXR0b25DR01JWEhvbWUnKWQCAw8PZBYEHwAFKXVuZGVybGluZSgnSUlSSGVhZGVyMV9MaW5rQnV0dG9uSUlSSG9tZScpHwEFK3VuZGVybGluZU5PKCdJSVJIZWFkZXIxX0xpbmtCdXR0b25JSVJIb21lJylkAgUPD2QWBB8ABSt1bmRlcmxpbmUoJ0lJUkhlYWRlcjFfTGlua0J1dHRvbklJUlNlYXJjaCcpHwEFLXVuZGVybGluZU5PKCdJSVJIZWFkZXIxX0xpbmtCdXR0b25JSVJTZWFyY2gnKWQCBw8PZBYEHwAFLXVuZGVybGluZSgnSUlSSGVhZGVyMV9MaW5rQnV0dG9uRGVmaW5pdGlvbnMnKR8BBS91bmRlcmxpbmVOTygnSUlSSGVhZGVyMV9MaW5rQnV0dG9uRGVmaW5pdGlvbnMnKWQCCQ8PZBYEHwAFK3VuZGVybGluZSgnSUlSSGVhZGVyMV9MaW5rQnV0dG9uQ29udGFjdFVzJykfAQUtdW5kZXJsaW5lTk8oJ0lJUkhlYWRlcjFfTGlua0J1dHRvbkNvbnRhY3RVcycpZAIFD2QWAgIbDxAPFgIeC18hRGF0YUJvdW5kZ2QQFRwDQUxMGUNvbW1lcmNpYWwgRmlzaGluZyBWZXNzZWwWRmlzaCBQcm9jZXNzaW5nIFZlc3NlbA1GcmVpZ2h0IEJhcmdlDEZyZWlnaHQgU2hpcBFJbmR1c3RyaWFsIFZlc3NlbB1Nb2JpbGUgT2Zmc2hvcmUgRHJpbGxpbmcgVW5pdBZPZmZzaG9yZSBTdXBwbHkgVmVzc2VsDE9pbCBSZWNvdmVyeRZQYXNzZW5nZXIgKDYgb3IgRmV3ZXIpFVBhc3NlbmdlciAoSW5zcGVjdGVkKRdQYXNzZW5nZXIgKE1vcmUgVGhhbiA2KRdQYXNzZW5nZXIgKFVuaW5zcGVjdGVkKR1QYXNzZW5nZXIgQmFyZ2UgICg2IG9yIEZld2VyKRxQYXNzZW5nZXIgQmFyZ2UgIChJbnNwZWN0ZWQpHlBhc3NlbmdlciBCYXJnZSAgKE1vcmUgVGhhbiA2KR5QYXNzZW5nZXIgQmFyZ2UgIChVbmluc3BlY3RlZCkOUHVibGljIEZyZWlnaHQVUHVibGljIFRhbmtzaGlwL0JhcmdlG1B1YmxpYyBWZXNzZWwsIFVuY2xhc3NpZmllZAxSZWNyZWF0aW9uYWwPUmVzZWFyY2ggVmVzc2VsC1NjaG9vbCBTaGlwClRhbmsgQmFyZ2UJVGFuayBTaGlwDVRvd2luZyBWZXNzZWwMVW5jbGFzc2lmaWVkB1Vua25vd24VHANBTEwZQ29tbWVyY2lhbCBGaXNoaW5nIFZlc3NlbBZGaXNoIFByb2Nlc3NpbmcgVmVzc2VsDUZyZWlnaHQgQmFyZ2UMRnJlaWdodCBTaGlwEUluZHVzdHJpYWwgVmVzc2VsHU1vYmlsZSBPZmZzaG9yZSBEcmlsbGluZyBVbml0Fk9mZnNob3JlIFN1cHBseSBWZXNzZWwMT2lsIFJlY292ZXJ5FlBhc3NlbmdlciAoNiBvciBGZXdlcikVUGFzc2VuZ2VyIChJbnNwZWN0ZWQpF1Bhc3NlbmdlciAoTW9yZSBUaGFuIDYpF1Bhc3NlbmdlciAoVW5pbnNwZWN0ZWQpHVBhc3NlbmdlciBCYXJnZSAgKDYgb3IgRmV3ZXIpHFBhc3NlbmdlciBCYXJnZSAgKEluc3BlY3RlZCkeUGFzc2VuZ2VyIEJhcmdlICAoTW9yZSBUaGFuIDYpHlBhc3NlbmdlciBCYXJnZSAgKFVuaW5zcGVjdGVkKQ5QdWJsaWMgRnJlaWdodBVQdWJsaWMgVGFua3NoaXAvQmFyZ2UbUHVibGljIFZlc3NlbCwgVW5jbGFzc2lmaWVkDFJlY3JlYXRpb25hbA9SZXNlYXJjaCBWZXNzZWwLU2Nob29sIFNoaXAKVGFuayBCYXJnZQlUYW5rIFNoaXANVG93aW5nIFZlc3NlbAxVbmNsYXNzaWZpZWQHVW5rbm93bhQrAxxnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZGQCCQ8PFgIeB1Zpc2libGVoZBYEAgEPDxYCHwNoZGQCCw88KwANAGQCCw8PFgIfA2hkZAIND2QWAgIDDw8WBB4EVGV4dAUUTW9uZGF5LCBNYXkgMTksIDIwMTQeB1Rvb2xUaXAFFE1vbmRheSwgTWF5IDE5LCAyMDE0ZGQYAQULR3JpZFZpZXdJSVIPZ2Te5HentmxZfU9uaNCFerLpCxGKsA==" | |
| eventValidation = "/wEWKQK39qHFBQKogLOFBALJ8fjaBALC8ezcBwL+vrLWAQLZ9fCmDgLS9aTbDAKuw6qbDgKShLrBDALGyJezCgK+q+SvAQLQl5DIAQLymLHFDgKS1repAQLk68g5Avr6moEOAtSw57oNAoiXqYoGArPhnq0NAqeRvKAEAvr8yqYCAsz08PYKAsHBwOQJAtn8kqsJAo7smJINAoOtsPAPApvVyZAEAt+ImcoGAqrb1t0DApTwwZECAuukjZEHAuiU3dMDApa3rtoNArqyr+4HAoC80roKAqzErtUDAoaa7fcCAv6v2f4NAva9lMsEAr3t4tUJArrY8x24g9IcL1hd9oyWKKex/+sw2Mj4nA==" | |
| if searchResultsPage = RestClient.post(SEARCH_URL, { | |
| "__EVENTTARGET" => "", | |
| "__EVENTARGUMENT" => "", | |
| "__VIEWSTATE" => viewState, | |
| "__VIEWSTATEGENERATOR" => "6E4D6470", | |
| "__EVENTVALIDATION" => eventValidation, | |
| "ActNum" => '', | |
| "TextBoxStDtMM" =>1, | |
| "TextBoxStDtDD" => 1, | |
| "TextBoxStDtYYYY" => '2013', | |
| "TextBoxEdDtMM"=>1, | |
| "TextBoxEdDtDD"=>1, | |
| "TextBoxEdDtYYYY" =>2014, | |
| "DropDownListClassification" =>'ALL', | |
| "Vessel" => '', | |
| "OrgName" => '', | |
| "Inv_Fac" => '', | |
| "KeyWord" => '', | |
| "ButtonSearch"=>'Search' | |
| }) | |
| searchResultsPageBody = Nokogiri::HTML(searchResultsPage) | |
| eventValidation = searchResultsPageBody.css("input")[2]['value'] | |
| viewState = searchResultsPageBody.css("input")[0]['value'] | |
| cookie = "d08859029afb214d95d11d5b11950fb760123b2e01801ecd537bb47b36af3f5eb1469d24" | |
| page_num = 1 | |
| # LOOPS THROUGH THE SEARCH RESULTS PAGE | |
| 2.upto(23) do |page_num| | |
| headers = {:cookies => {'ASP.NET_SessionId' => "evpbwo551hc3zzjeegcojy45", 'TSa4e033' => cookie}} | |
| params = { | |
| "__EVENTTARGET" => "GridViewIIR", | |
| "__EVENTARGUMENT" => "Page$#{page_num}", | |
| "__VIEWSTATE" => viewState, | |
| "__VIEWSTATEGENERATOR" => "6E4D6470", | |
| "__EVENTVALIDATION" => eventValidation | |
| } | |
| nextSearchResultsPage = RestClient.post(SEARCH_URL, params, headers) | |
| nextSearchResultsPageBody = Nokogiri::HTML(nextSearchResultsPage) | |
| eventValidation = nextSearchResultsPageBody.css("input")[2]['value'] | |
| viewState = nextSearchResultsPageBody.css("input")[0]['value'] | |
| cookie = nextSearchResultsPage.headers[:set_cookie][0].split(';')[0].split("TSa4e033=")[1] | |
| # GET TO THE VIEW DETAILS PAGE | |
| if page_num > 2 | |
| 2.upto(5) do |item_num| | |
| puts "trying #{page_num}#{item_num}" | |
| file = "cruisefiles/#{page_num}#{item_num}.html" | |
| if File.exist?(file) | |
| input = File.open(file,'rb') | |
| else | |
| detailsheaders = {:cookies => {'ASP.NET_SessionId' => "evpbwo551hc3zzjeegcojy45", 'TSa4e033' => cookie}} | |
| prefix = item_num < 9 ? 0 : "" | |
| puts "GridViewIIR$ctl#{prefix}#{item_num}$ReportButton" | |
| detailsparams = { | |
| "__EVENTTARGET" => "GridViewIIR$ctl#{prefix}#{item_num}$ReportButton", | |
| "__EVENTARGUMENT" => "", | |
| "__VIEWSTATE" => viewState, | |
| "__VIEWSTATEGENERATOR" => "6E4D6470", | |
| "__EVENTVALIDATION" => eventValidation | |
| } | |
| detailsPage = RestClient.post(RESULTS_URL, detailsparams, detailsheaders) | |
| input = detailsPage | |
| output = File.open(file,'wb') | |
| output.write(input) | |
| output.close | |
| end | |
| detailsPageBody = Nokogiri::HTML(input) | |
| detailsSummary = detailsPageBody.css("#LabelInvestigationReport") | |
| detailsBrief = detailsPageBody.css("#LabelIncidentBrief") | |
| filename = detailsPageBody.css("#LabelInvestigationReport").attr('title').text.split("MISLE Activity Number: ")[1].split("Originating Unit:")[0] | |
| puts filename | |
| end | |
| end | |
| end | |
| end | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment