Created
August 11, 2020 12:07
-
-
Save rahim/dee3ea9b7c93e6dc32b610c12842c591 to your computer and use it in GitHub Desktop.
Aggregate, analyze structured deprecation logs from s3 with lambda
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require "aws-sdk-s3" | |
require "shellwords" | |
require "json" | |
def handler(event:, context:) | |
# Get the record | |
first_record = event["Records"].first | |
bucket_name = first_record["s3"]["bucket"]["name"] | |
object_key = first_record["s3"]["object"]["key"] | |
puts "object_key: #{object_key}" | |
puts | |
build_hash = object_key.split("/").first | |
puts "build_hash: #{build_hash}" | |
unless build_hash.match(/\A[a-z0-9]{7}\z/) | |
puts "exiting, not a commit hash: #{build_hash}" | |
return { status: 400 } | |
end | |
# Get reference to the file in S3 | |
client = Aws::S3::Client.new(region: 'eu-west-1') | |
s3 = Aws::S3::Resource.new(client: client) | |
puts "bucket_name: #{bucket_name}" | |
bucket = s3.bucket(bucket_name) | |
objects = bucket.objects(prefix: build_hash) | |
dir = Dir.mktmpdir | |
objects.each do |object| | |
next if object.key.end_with?("/") # skip any dir entries | |
file_name = object.key.split("/").last | |
next unless file_name.end_with?(".log") | |
object.download_file("#{dir}/#{file_name}") | |
end | |
Dir.chdir dir | |
# these are distinct json objects | |
# at the time of writing that means distinct [message+location+spec_location] | |
# we rely on the fact that only our JSON formatted deprecation warnings | |
# include the string "deprecation_message" | |
distinct_json_grep_result = | |
%x{grep --no-filename "deprecation_message" *.log | sort | uniq} | |
# This gives us an array of hashes | |
# | |
# [ | |
# { "deprecation_message" => "x", "location" => "foo/bar/baz.rb:123", "spec_location" => "qux/blah_spec.rb" }, | |
# { "deprecation_message" => "x", "location" => "foo/bar/baz.rb:123", "spec_location" => "baz/blah_spec.rb" }, | |
# { "deprecation_message" => "x", "location" => "foo/bar/baz.rb:456", "spec_location" => "qux/blah_spec.rb" }, | |
# { "deprecation_message" => "y", "location" => "foo/bar/baz.rb:123" } | |
# ] | |
# | |
distinct_json = | |
distinct_json_grep_result.lines.map(&:strip).map do |entry| | |
JSON.parse(entry) rescue nil | |
end.compact | |
# We're going to collapse down and concern ourselves with only location | |
# not source_location, to get: | |
# | |
# { | |
# "x" => ["foo/bar/baz.rb:123", "foo/bar/baz.rb:123", "foo/bar/baz.rb:456"], | |
# "y" => ["foo/bar/baz.rb:123"] | |
# } | |
warnings_map = {} | |
distinct_json.each do |entry| | |
deprecation_message = entry["deprecation_message"].gsub("\n", " ") | |
location = entry["location"] | |
# We deliberately keep duplicates (rather than eg using a set), these will | |
# occur where the spec_location differs, that's useful because it allows | |
# us to see how many times one location is responsible for a repeated | |
# warning in the suite | |
warnings_map[deprecation_message] ||= [] | |
warnings_map[deprecation_message] << location | |
end | |
# { | |
# "x" => 3, | |
# "y" => 1 | |
# } | |
frequency_map = | |
warnings_map.transform_values { |locations| locations.length } | |
# { | |
# "x" => { "foo/bar/baz.rb:123" => 2, "foo/bar/baz.rb:456" => 1 }, | |
# "y" => { "foo/bar/baz.rb:123" => 1 } | |
# } | |
location_frequency_map = | |
warnings_map.transform_values do |locations| | |
locations.sort.group_by(&:itself).transform_values(&:length) | |
end | |
File.open("err-report.txt", "w") do |results_file| | |
results_file.puts "Distinct warnings count: #{warnings_map.length}" | |
results_file.puts | |
results_file.puts "# Warnings with frequency" | |
results_file.puts | |
frequency_map.sort_by { |message, count| -count }.each do |message, count| | |
results_file.puts "#{count.to_s.ljust(5)} #{message}" | |
end | |
results_file.puts | |
results_file.puts "# Warnings with location frequency" | |
results_file.puts | |
frequency_map.sort_by { |message, count| -count }.each do |message, _count| | |
results_file.puts message | |
location_frequencies = location_frequency_map[message] | |
location_frequencies.sort_by { |location, count| -count }.each do |location, count| | |
results_file.puts " #{count.to_s.ljust(5)} #{location}" | |
end | |
results_file.puts | |
end | |
end | |
puts `cat err-report.txt` | |
report_object = bucket.object("#{build_hash}/err-report.txt") | |
report_object.upload_file("err-report.txt") | |
{ status: 200, body: "ok" } | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment