Skip to content

Instantly share code, notes, and snippets.

@rahim
Created August 11, 2020 12:07
Show Gist options
  • Save rahim/dee3ea9b7c93e6dc32b610c12842c591 to your computer and use it in GitHub Desktop.
Save rahim/dee3ea9b7c93e6dc32b610c12842c591 to your computer and use it in GitHub Desktop.
Aggregate, analyze structured deprecation logs from s3 with lambda
require "aws-sdk-s3"
require "shellwords"
require "json"
def handler(event:, context:)
# Get the record
first_record = event["Records"].first
bucket_name = first_record["s3"]["bucket"]["name"]
object_key = first_record["s3"]["object"]["key"]
puts "object_key: #{object_key}"
puts
build_hash = object_key.split("/").first
puts "build_hash: #{build_hash}"
unless build_hash.match(/\A[a-z0-9]{7}\z/)
puts "exiting, not a commit hash: #{build_hash}"
return { status: 400 }
end
# Get reference to the file in S3
client = Aws::S3::Client.new(region: 'eu-west-1')
s3 = Aws::S3::Resource.new(client: client)
puts "bucket_name: #{bucket_name}"
bucket = s3.bucket(bucket_name)
objects = bucket.objects(prefix: build_hash)
dir = Dir.mktmpdir
objects.each do |object|
next if object.key.end_with?("/") # skip any dir entries
file_name = object.key.split("/").last
next unless file_name.end_with?(".log")
object.download_file("#{dir}/#{file_name}")
end
Dir.chdir dir
# these are distinct json objects
# at the time of writing that means distinct [message+location+spec_location]
# we rely on the fact that only our JSON formatted deprecation warnings
# include the string "deprecation_message"
distinct_json_grep_result =
%x{grep --no-filename "deprecation_message" *.log | sort | uniq}
# This gives us an array of hashes
#
# [
# { "deprecation_message" => "x", "location" => "foo/bar/baz.rb:123", "spec_location" => "qux/blah_spec.rb" },
# { "deprecation_message" => "x", "location" => "foo/bar/baz.rb:123", "spec_location" => "baz/blah_spec.rb" },
# { "deprecation_message" => "x", "location" => "foo/bar/baz.rb:456", "spec_location" => "qux/blah_spec.rb" },
# { "deprecation_message" => "y", "location" => "foo/bar/baz.rb:123" }
# ]
#
distinct_json =
distinct_json_grep_result.lines.map(&:strip).map do |entry|
JSON.parse(entry) rescue nil
end.compact
# We're going to collapse down and concern ourselves with only location
# not source_location, to get:
#
# {
# "x" => ["foo/bar/baz.rb:123", "foo/bar/baz.rb:123", "foo/bar/baz.rb:456"],
# "y" => ["foo/bar/baz.rb:123"]
# }
warnings_map = {}
distinct_json.each do |entry|
deprecation_message = entry["deprecation_message"].gsub("\n", " ")
location = entry["location"]
# We deliberately keep duplicates (rather than eg using a set), these will
# occur where the spec_location differs, that's useful because it allows
# us to see how many times one location is responsible for a repeated
# warning in the suite
warnings_map[deprecation_message] ||= []
warnings_map[deprecation_message] << location
end
# {
# "x" => 3,
# "y" => 1
# }
frequency_map =
warnings_map.transform_values { |locations| locations.length }
# {
# "x" => { "foo/bar/baz.rb:123" => 2, "foo/bar/baz.rb:456" => 1 },
# "y" => { "foo/bar/baz.rb:123" => 1 }
# }
location_frequency_map =
warnings_map.transform_values do |locations|
locations.sort.group_by(&:itself).transform_values(&:length)
end
File.open("err-report.txt", "w") do |results_file|
results_file.puts "Distinct warnings count: #{warnings_map.length}"
results_file.puts
results_file.puts "# Warnings with frequency"
results_file.puts
frequency_map.sort_by { |message, count| -count }.each do |message, count|
results_file.puts "#{count.to_s.ljust(5)} #{message}"
end
results_file.puts
results_file.puts "# Warnings with location frequency"
results_file.puts
frequency_map.sort_by { |message, count| -count }.each do |message, _count|
results_file.puts message
location_frequencies = location_frequency_map[message]
location_frequencies.sort_by { |location, count| -count }.each do |location, count|
results_file.puts " #{count.to_s.ljust(5)} #{location}"
end
results_file.puts
end
end
puts `cat err-report.txt`
report_object = bucket.object("#{build_hash}/err-report.txt")
report_object.upload_file("err-report.txt")
{ status: 200, body: "ok" }
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment