Skip to content

Instantly share code, notes, and snippets.

@avibryant
Created September 5, 2012 21:25
Show Gist options
  • Save avibryant/3645065 to your computer and use it in GitHub Desktop.
Save avibryant/3645065 to your computer and use it in GitHub Desktop.
Simple visualization of hadoop job history files
# visualize the output with gg.js
# gg({layers: [{ geometry: 'line', mapping: { x: 'minutes', y: 'task', group: 'stage', color: 'type'}}]});
def parse(line)
output = {}
parts = line.split(/[ "]/)
output["TYPE"] = parts.shift
while(parts.size > 0)
next_part = parts.shift
if next_part =~ /^(\w+)=$/
output[$1] = parts.shift
end
end
output
end
class TaskAttempt
attr_accessor :start, :finish, :shuffle, :sort, :type, :status, :id
def json_row(start, time, status)
minutes = (time.to_i - start) / 60000.0
"{task: #{@id}, stage: \"#{@id}-#{status}\", minutes: #{minutes}, type: \"#{@type}-#{status}\"},"
end
def json_rows(start)
if @shuffle == nil
rows = [json_row(start, @start, @status),
json_row(start, @finish, @status)]
else
rows = [json_row(start, @start, "SHUFFLE"),
json_row(start, @shuffle, "SHUFFLE"),
json_row(start, @shuffle, "SORT"),
json_row(start, @sort, "SORT"),
json_row(start, @sort, @status),
json_row(start, @finish, @status)]
end
rows.join("\n")
end
end
task_attempts = Hash.new
ARGF.each do |line|
l = parse(line)
if(id = l["TASK_ATTEMPT_ID"])
ta = task_attempts[id] ||= TaskAttempt.new
if(s = l["START_TIME"])
ta.start = s.to_i
end
if(f = l["FINISH_TIME"])
ta.finish = f.to_i
end
if(st = l["TASK_STATUS"])
ta.status = st
end
if(sf = l["SHUFFLE_FINISHED"])
ta.shuffle = sf
end
if(sf = l["SORT_FINISHED"])
ta.sort = sf
end
if(t = l["TASK_TYPE"])
ta.type = t
end
end
end
sorted = task_attempts.values.sort{|a,b| a.start <=> b.start}
sorted.each_with_index{|ta, i| ta.id = i}
start = sorted[0].start
puts "var data = ["
task_attempts.values.each do |ta|
puts ta.json_rows(start)
end
puts "]"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment