Skip to content

Instantly share code, notes, and snippets.

@dallasmarlow
Created January 24, 2014 18:16
Show Gist options
  • Save dallasmarlow/8602928 to your computer and use it in GitHub Desktop.
Save dallasmarlow/8602928 to your computer and use it in GitHub Desktop.
hadoop jobtracker machine list parser
require 'nokogiri'
require 'net/http'
require 'timeout'
require 'uri'
job_tracker_machine_list_uri = URI.parse 'http://jobtracker:50030/machines.jsp?type=active'
response = Timeout::timeout 30 do
Net::HTTP.start job_tracker_machine_list_uri.host, job_tracker_machine_list_uri.port do |http|
http.request Net::HTTP::Get.new job_tracker_machine_list_uri.request_uri
end
end
if response.kind_of? Net::HTTPSuccess
document = Nokogiri::HTML response.body
fields = document.xpath('//table//thead//th//b').flat_map do |field_entry|
field_entry.children.map(&:text).reject do |value|
value == 'Task Trackers'
end.map do |value|
value.sub('# ', '').gsub(' ', '_').downcase.to_sym
end
end
task_trackers = document.xpath('//table//tbody//tr').map do |table_row|
row_cells = table_row.children.map do |row_cell|
row_cell.children.map do |cell_entry|
case cell_entry.name
when 'a'
cell_entry.attributes['href'].value
else
cell_entry.text
end.chomp
end
end
Hash[fields.zip(row_cells.flatten)]
end
end
# example output:
# {:name=>"http://dn-9be23803:50060/",
# :host=>"dn-9be23803",
# :running_tasks=>"8",
# :max_map_tasks=>"8",
# :max_reduce_tasks=>"6",
# :task_failures=>"41",
# :directory_failures=>"0",
# :node_health_status=>"N/A",
# :seconds_since_node_last_healthy=>"0",
# :total_tasks_since_start=>"85628",
# :succeeded_tasks_since_start=>"83337",
# :total_tasks_last_day=>"0",
# :succeeded_tasks_last_day=>"0",
# :total_tasks_last_hour=>"0",
# :succeeded_tasks_last_hour=>"0",
# :seconds_since_heartbeat=>"0"}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment